
    *`i                     X    d Z ddlmZmZ ddlZddlZddlZ G d dej                  ZdS )z
This file helps integrate xgrammar in HF transformers package by extending
transformers.LogitsProcessor, which is to be fed to `model.generate()`.
    )ListUnionNc                       e Zd ZdZdeej        eej                 f         fdZde	j
        de	j        de	j        fdZdS )	LogitsProcessora  
    LogitsProcessor for processing logits in transformers' generate() method.

    Example usage
    -------------
        .. code:: python

            model_name = "Qwen/Qwen2.5-0.5B-Instruct"
            tokenizer = AutoTokenizer.from_pretrained(model_name)
            config = AutoConfig.from_pretrained(model_name)
            # This can be larger than tokenizer.vocab_size due to paddings
            full_vocab_size = config.vocab_size
            tokenizer_info = xgr.TokenizerInfo.from_huggingface(tokenizer, vocab_size=full_vocab_size)

            grammar_compiler = xgr.GrammarCompiler(tokenizer_info)
            compiled_grammar = grammar_compiler.compile_builtin_json_grammar()
            xgr_logits_processor = xgr.contrib.hf.LogitsProcessor(compiled_grammar)
            model.generate(prompt, logits_processor=[xgr_logits_processor])

        For an end-to-end example, see folder `examples/hf_transformers/`.

    Notes
    -----
        - Note that this LogitsProcessor can only be used once. For each `generate()` call,
            instantiate a new one.
        - Note that this implementation may contain extra overhead.
    compiled_grammarc                     g | _         t          |t                    r|n|g| _        | j        d         j        j        | _        d| _        d| _        d| _	        dS )a  Initialize the LogitsProcessor.

        Parameters
        ----------
        compiled_grammar : xgr.CompiledGrammar | List[xgr.CompiledGrammar]
            One or more grammars compiled according to the given grammar and the model's tokenizer_info.
        r   NF)
matchers
isinstancelistcompiled_grammarstokenizer_info
vocab_sizefull_vocab_sizetoken_bitmask	prefilled
batch_size)selfr   s     g/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/xgrammar/contrib/hf.py__init__zLogitsProcessor.__init__+   sb     35 *+;T B BZIYHZ 	  $5a8GR!    	input_idsscoresreturnc                     t           j                  dk    r|j        d          _        t           j                  dk    r j        n j         j        z   _        t           j                   j        k    s
J d             fdt           j                  D              _        t          j         j         j                   _	        |j        d          j        k    r)t          dd|j        d          d j         dz              j        sd	 _        nft           j                  D ]Q} j        |                                         s0||         d
         } j        |                             |          sJ Rt           j                  D ]G} j        |                                         s& j        |                              j	        |           H|j        j        }|dk    r|                    d          }t          j        | j	                            |j                             |dk    r|                    |          }|S )z
        Accept token sampled in the last iteration, fill in bitmask, and apply bitmask to logits.

        Returns:
            scores: Logits modified with bitmask.
        r      z@The number of compiled grammars must be equal to the batch size.c                 N    g | ]!}t          j        j        |                   "S  )xgrGrammarMatcherr   ).0ir   s     r   
<listcomp>z,LogitsProcessor.__call__.<locals>.<listcomp>N   s9       BC"4#9!#<==  r   z;Expect input_ids.shape[0] to be LogitsProcessor.batch_size.zGot z for the former, and z for the latter.Tcudacpu)lenr	   shaper   r   ranger   allocate_token_bitmaskr   r   RuntimeErrorr   is_terminatedaccept_tokenfill_next_token_bitmaskdevicetypetoapply_token_bitmask_inplace)r   r   r   r!   sampled_tokendevice_types   `     r   __call__zLogitsProcessor.__call__<   so    t}""'oa0DO t-..22 &&+do= " D*++t>>>Q ?>>   GLT_G]G]  DM "%!;DOTMa!b!bD?100Mc+cc$/cccd  
 ~ 	H!DNN4?++ H H}Q'5577 H$-aL$4M=+88GGGGGt'' 	P 	PA=#1133 Pa 889KQOOO m(&  YYu%%F'0B0E0Efm0T0TUUU&  YY{++F r   N)__name__
__module____qualname____doc__r   r   CompiledGrammarr   r   torch
LongTensorFloatTensorr4   r   r   r   r   r      s         8s/BDI\D]/])^    "6%"2 6E<M 6RWRc 6 6 6 6 6 6r   r   )	r8   typingr   r   r:   transformersxgrammarr   r   r   r   r   <module>r@      s    
                 d d d d dl2 d d d d dr   