
    *`i<                        d Z ddlZddlmZmZmZmZmZmZm	Z	 ddl
Z
ddlmZ ddlmZ ddlmZmZ ddlmZmZ dd	lmZmZ dd
lmZ ddddddde	eee         eeef         f         dedee         deeeef                  dee         dedefdZdAdededefdZdBdededefdZde	eef         defdZ ddddde	eef         d ed!ed"ed#edefd$Z!	 dCd%e
j"        d&ed'edee         fd(Z#	 dCd%e
j"        d&ed'edeeef         fd)Z$d*e
j"        de
j"        fd+Z%dDd,e
j"        d&ee         de
j"        fd-Z&	 dDde	eef         d.ee         defd/Z'd0edee         fd1Z(dEd2ee         d3ee         defd4Z)dEd2ee*         d3ee*         defd5Z+dedefd6Z,de	eee         eeef         f         defd7Z-d8e
j"        d9e
j"        d:e
j"        d;d<d=e
j"        ddfd>Z. G d? d@          Z/dS )FzTesting utilities.

The APIs in this module are used for testing and debugging and are prone to
change. Don't use them in production.    N)AnyDictListOptionalTupleTypeUnion)	BaseModel   )_core)CompiledGrammarGrammarCompiler)Grammar_convert_schema_to_str)GrammarMatcherbitmask_dtype)TokenizerInfoT)any_whitespaceindent
separatorsmax_whitespace_cntstrict_modeschemar   r   r   r   r   returnc                h    t          |           }t          j                            ||||||          S )a  Convert JSON schema string to BNF grammar string. For test purposes.

    Parameters
    ----------
    schema : Union[str, Type[BaseModel], Dict[str, Any]]
        The schema string or Pydantic model or JSON schema dict.

    indent : Optional[int], default: None
        The number of spaces for indentation. If None, the output will be in one line.

    separators : Optional[Tuple[str, str]], default: None
        Two separators used in the schema: comma and colon. Examples: (",", ":"), (", ", ": ").
        If None, the default separators will be used: (",", ": ") when the indent is not None,
        and (", ", ": ") otherwise.

    strict_mode : bool, default: True
        Whether to use strict mode. In strict mode, the generated grammar will not allow
        properties and items that is not specified in the schema. This is equivalent to
        setting unevaluatedProperties and unevaluatedItems to false.

        This helps LLM to generate accurate output in the grammar-guided generation with JSON
        schema.

    max_whitespace_cnt : Optional[int], default: None
        The maximum number of whitespace characters allowed between elements, such like keys, values, separators and so on.
        If None, there is no limit on the number of whitespace characters.
        If specified, it will limit the number of whitespace characters to at most max_whitespace_cnt.
        It should be a positive integer.

    Returns
    -------
    bnf_string : str
        The BNF grammar string.
    )r   r   testing_json_schema_to_ebnf)r   r   r   r   r   r   
schema_strs          d/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/xgrammar/testing.pyr   r      s:    V (//J=--NFJEW      regexwith_rule_namec                 B    t           j                            | |          S )as  Convert a regex string to BNF grammar string. For test purposes. The regex grammar
    follows the syntax in JavaScript (ECMA 262). Check
    https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Regular_expressions
    for a tutorial. Currently the following features are not supported:
    1. Backreference (\1)
    2. non-capturing group, naming capture groups and assertions ((?...))
    3. Unicode character class escape (\p{...})
    4. Word boundary (\b)
    5. Unicode property escapes (\p{...})
    6. Quantifier with range {x,y}. Now user can just repeat the element as a workaround.

    This method is primarily intended for testing and debugging purposes.

    Parameters
    ----------
    regex : str
        The regex string to be converted.

    Returns
    -------
    bnf_string : str
        The BNF grammar string converted from the input regex.
    )r   r   _regex_to_ebnf)r!   r"   s     r   r$   r$   D   s    0 =''~>>>r    rootebnf_stringroot_rule_namec                 f    t          j        t          j                            | |                    S )a  Convert a BNF grammar string to a Grammar object without normalization. For test
    purposes. The result grammar cannot be compiled / used in GrammarMatcher.

    Parameters
    ----------
    ebnf_string : str
        The BNF grammar string to be converted.

    Returns
    -------
    grammar : Grammar
        The unnormalized Grammar object converted from the input BNF grammar string.
    )r   _create_from_handler   r   !_ebnf_to_grammar_no_normalization)r&   r'   s     r   r*   r*   _   s.     &77^TT  r    grammarc                     t          g           }t          |d          }|                    |           }t          |fddi|S )a  Create a GrammarMatcher from a grammar. The tokenizer info will be set to an empty
    TokenizerInfo. The result matcher can only accept strings, and cannot accept tokens.

    Parameters
    ----------
    grammar : Union[Grammar, str]
        The grammar to create the matcher from. Can be either a Grammar object or a string
        containing EBNF grammar.

    Returns
    -------
    matcher : GrammarMatcher
        The created grammar matcher.
    Fcache_enabledterminate_without_stop_tokenTr   r   compile_grammarr   )r+   kwargstokenizer_infogrammar_compilercompiled_grammars        r   _get_matcher_from_grammarr6   r   sU     #2&&N&~UKKK'77@@*XXXQWXXXr    F)debug_print
print_timerequire_termination	input_strr7   r8   r9   c          	         t          |           }|rt          j                    }|                    ||          }|r2t          j                    }t	          d| d| d||z
  dz   d           |sdS |sdS |                                S )	a#  Check if a grammar accepts a string. For test purposes.

    Parameters
    ----------
    grammar : Union[Grammar, str]
        The grammar to check. Can be either a Grammar object or a BNF grammar string.
    input_str : str
        The input string to check.
    debug_print : bool, default: False
        Whether to print debug information during matching.
    print_time : bool, default: False
        Whether to print timing information.

    Returns
    -------
    bool
        True if the grammar accepts the string, False otherwise.
    )r7   z
Accepting z
, result: z, time: g     @@z usFT)r6   timemonotonic_nsaccept_stringprintis_terminated)	r+   r:   r7   r8   r9   grammar_matcherstartacceptedends	            r   _is_grammar_accept_stringrE      s    4 088O $!##,,YK,PPH \!!Z9ZZZZ3;RUBUZZZ[[[ u t((***r    bitmask
vocab_sizeindexc                    | j         j        dk    rt          d          | j        t          k    rt          dt           d          t
          j                            |                                 t          | j
                  ||          S )a  Get the ids of the rejected tokens from the bitmask. Mainly for debug purposes.

    Parameters
    ----------
    bitmask : torch.Tensor
        The rejected token bitmask. Should be generated by allocate_token_bitmask and
        filled by fill_next_token_bitmask. Should be on CPU.

    index : int, default: 0
        The batch index of the bitmask. For batch inference, bitmask[index] will be used.
        Otherwise is ignored.

    Returns
    -------
    rejected_token_ids : List[int]
        A list of rejected token ids.
    cpuzbitmask should be on CPU.zbitmask should be of type .)devicetype
ValueErrordtyper   r   r   _get_masked_tokens_from_bitmaskdata_ptrlistshaperF   rG   rH   s      r   rP   rP      s    ( ~e##4555}%%FmFFFGGG=88D//U  r    c                     t           j                            |                                 t	          | j                  ||          S )a  Check if the bitmask is a single token bitmask.

    Parameters
    ----------
    bitmask : torch.Tensor
        The bitmask to check. Should be on CPU.
    vocab_size : int
        The size of the vocabulary.
    index : int, default: 0
        The index of the bitmask.

    Returns
    -------
    is_single_token : bool
        True if the bitmask is a single token bitmask, False otherwise.
    token_id : int
        The id of the token if the bitmask is a single token bitmask, -1 otherwise.
    )r   r   _is_single_token_bitmaskrQ   rR   rS   rT   s      r   rV   rV      s=    * =11D//U  r    	bool_maskc                 2   |                      t          j                  }d| j        d         dz  z
  dz  }|dk    r)t          j        j                            |d|fd          }|                    | j        d         dd          }t          j        d t          d          D             | j
        t          j                                       t          j                  }||z                      d	          }|                     t          j                  S )
a  Get the bitmask from bool mask. If the bool mask does not align with the 32-bit block
    size, it will add extra 1 paddings.

    Parameters
    ----------
    bool_mask : torch.Tensor
        The rejected token bool mask. For each element value, True means the token is allowed,
        while False means the token is rejected.

    Returns
    -------
    bitmask : torch.Tensor
        The rejected token bitmask.
        r   r   )valuec                     g | ]}d |z  S )r    ).0is     r   
<listcomp>z(bool_mask_to_bitmask.<locals>.<listcomp>  s    ###Aa###r    )rL   rO      )dim)totorchint32rS   nn
functionalpadviewtensorrangerL   int64sum)rW   bool_mask_int32pad_sizebool_mask_viewweightsrF   s         r   bool_mask_to_bitmaskrr      s      ll5;//OY_Q'",,2H!||(-11/Ax=XY1ZZ$)))/!*<b"EENl##r###I,<EK  boo  ',,,33G::ek"""r    bit_maskc                    | j         j        dk    rt          d          | j        t          k    rt          d          || j        d         dz  }|| j        d         dz  k    rt          d          t          j        | j        d         |ft          j        	          }t          |          D ]%}| dd|dz  f         d|dz  z  z  dk    |dd|f<   &|S )
a  
    Convert a bitmask tensor to a boolean mask tensor.

    Parameters
    ----------
    bit_mask : torch.Tensor
        The bitmask tensor to convert. Should be on CPU and of type int32.
    vocab_size : Optional[int], default: None
        The size of the vocabulary. If provided, the output mask will be cut to this size.

    Returns
    -------
    bool_mask : torch.Tensor
        The converted boolean mask tensor.
    rJ   zbit_mask should be on CPU.z'bit_mask should be of type torch.int32.Nr   rY   zLvocab_size should be less than or equal to the size represented by bit_mask.r   )rO   )
rL   rM   rN   rO   r   rS   rd   zerosboolrk   )rs   rG   rW   r_   s       r   bitmask_to_bool_maskrw   	  s    $ u$$5666~&&BCCC^A&+
HN1%***Z
 
 	
 X^A.
;5:NNNI: H H#AAAqBwJ/1R=AaG	!!!Q$r    r3   c                     |t          g           }t          |d          }|                    |           }t          |fi |S )ak  Create a GrammarMatcher from a grammar and tokenizer info.

    Parameters
    ----------
    grammar : Union[Grammar, str]
        The grammar to create the matcher from. Can be either a Grammar object or a string
        containing EBNF grammar.
    tokenizer_info : Optional[TokenizerInfo], default: None
        Information about the tokenizer to use with this grammar. If None, an empty
        TokenizerInfo will be created.
    **kwargs
        Additional keyword arguments to pass to the GrammarMatcher constructor.

    Returns
    -------
    matcher : GrammarMatcher
        The created grammar matcher.
    NFr-   r0   )r+   r3   r2   r4   r5   s        r   ,_get_matcher_from_grammar_and_tokenizer_infory   -  sU    * &r**&~UKKK'77@@*55f555r    r5   c                 J    t           j                            | j                  S N)r   r   _get_allow_empty_rule_ids_handle)r5   s    r   r|   r|   I  s    =223C3KLLLr    rB   rD   c                 B    t           j                            | |          S r{   )r   r   _generate_range_regexrB   rD   s     r   r   r   M      =..uc:::r    c                 B    t           j                            | |          S r{   )r   r   _generate_float_regexr   s     r   r   r   Q  r   r    c                 J    t           j                            | j                  S )zcPrint the FSMs of the grammar. Now the fsms are initialized in the grammar compilation
    process.)r   r   _print_grammar_fsmsr}   r+   s    r   r   r   U  s     =,,W_===r    c                 ^    t          |           }t          j                            |          S )z-Convert Qwen XML tool calling schema to EBNF.)r   r   r   _qwen_xml_tool_calling_to_ebnf)r   r   s     r   r   r   [  s%    '//J=77
CCCr    retrieve_next_tokenretrieve_next_siblingdraft_tokensmatcherr   allocate_token_bitmaskc                 V    t           j                            | |||j        |           dS )a  Traverse the tree constructed by the draft model to generate the logits mask.

    Parameters
    ----------
    retrieve_next_token : torch.Tensor
        1D int64 tensor where retrieve_next_token[i] gives the index of the child node
        of node i, or -1 if no child exists.
    retrieve_next_sibling : torch.Tensor
        1D int64 tensor where retrieve_next_sibling[i] gives the index of the sibling node
        of node i, or -1 if no sibling exists.
    draft_tokens : torch.Tensor
        1D int64 tensor of draft token ids at each position in the tree.
    matcher : GrammarMatcher
        The grammar matcher to use for validation.
    allocate_token_bitmask : torch.Tensor
        2D int32 tensor (num_nodes x bitmask_size) to store the generated bitmasks.
    N)r   r   _traverse_draft_treer}   )r   r   r   r   r   s        r   r   r   a  s9    0 
M&&    r    c                       e Zd ZdZededefd            Zededefd            Zededefd            Zededefd            Z	ededefd            Z
ededefd	            Zededd
fd            Zd
S )GrammarFunctorzrA utility class for transforming grammars. These methods are called during grammar parsing.
    For test purposes.r+   r   c                 x    t          j        t          j        j                            | j                            S )z'Normalize the structure of the grammar.)r   r)   r   r   grammar_functorstructure_normalizerr}   r   s    r   r   z#GrammarFunctor.structure_normalizer  1     *M)>>wOO
 
 	
r    c                 x    t          j        t          j        j                            | j                            S )z+Inline some rule references in the grammar.)r   r)   r   r   r   rule_inlinerr}   r   s    r   r   zGrammarFunctor.rule_inliner  s1     *M)66wGG
 
 	
r    c                 x    t          j        t          j        j                            | j                            S )z-Fuse the byte string elements in the grammar.)r   r)   r   r   r   byte_string_fuserr}   r   s    r   r   z GrammarFunctor.byte_string_fuser  1     *M);;GOLL
 
 	
r    c                 x    t          j        t          j        j                            | j                            S )z2Eliminate the not referenced rules in the grammar.)r   r)   r   r   r   dead_code_eliminatorr}   r   s    r   r   z#GrammarFunctor.dead_code_eliminator  r   r    c                 x    t          j        t          j        j                            | j                            S )z4Analyze and add lookahead assertions in the grammar.)r   r)   r   r   r   lookahead_assertion_analyzerr}   r   s    r   r   z+GrammarFunctor.lookahead_assertion_analyzer  s1     *M)FFwWW
 
 	
r    c                 x    t          j        t          j        j                            | j                            S )zOptimize the grammar.)r   r)   r   r   r   grammar_optimizerr}   r   s    r   r   z GrammarFunctor.grammar_optimizer  r   r    Nc                 |    t          j        t          j        j                            | j                             dS )z$Normalize the repetition expression.N)r   r)   r   r   r   repetition_normalizerr}   r   s    r   r   z$GrammarFunctor.repetition_normalizer  s;     	#M)??PP	
 	
 	
 	
 	
r    )__name__
__module____qualname____doc__staticmethodr   r   r   r   r   r   r   r   r]   r    r   r   r     s`         
g 
' 
 
 
 \
 
g 
' 
 
 
 \
 
7 
w 
 
 
 \
 
g 
' 
 
 
 \
 
g 
' 
 
 
 \
 
7 
w 
 
 
 \
 
w 
4 
 
 
 \
 
 
r    r   )T)r%   )r   r{   )NN)0r   r<   typingr   r   r   r   r   r   r	   rd   pydanticr
   baser   compilerr   r   r+   r   r   r   r   r   r3   r   strrv   intr   r$   r*   r6   rE   TensorrP   rV   rr   rw   ry   r|   r   floatr   r   r   r   r   r]   r    r   <module>r      s(  ) )
  @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @              6 6 6 6 6 6 6 6 4 4 4 4 4 4 4 4 2 2 2 2 2 2 2 2 ) ) ) ) ) )   ,0(,. . .#tIS#X67. . SM	.
 sCx). !. . 	. . . .b? ?# ?t ?s ? ? ? ?6 3  Y`    &YuWc\': Y Y Y Y Y2  $++ ++ ++7C< ++++ 	++
 ++ ++ 
++ ++ ++ ++^ :; \'*36	#Y   < :; \'*36
49   4#EL #U\ # # # #:! !5< !Xc] !V[Vb ! ! ! !J MQ6 67C< 62:=2I66 6 6 68M MDI M M M M; ;# ;HSM ;UX ; ; ; ;; ;% ;huo ;Y\ ; ; ; ;> >S > > > >D5d9otCQTH~1U+V D[^ D D D D < , 	
 "L 
   B3
 3
 3
 3
 3
 3
 3
 3
 3
 3
r    