
    
`i@                     n   d dl mZ d dlmZmZmZmZ d dlmZm	Z	 d dl
Z
e G d d                      Z G d de          Z G d	 d
e          Ze G d de                      Ze G d de                      Ze G d de                      Ze G d de                      Ze G d de                      Ze G d de                      Ze G d de                      Z G d d          Zdeeef         ddfdZdedefdZdedefd Zdedefd!Zd$d"Zed#k    r e             dS dS )%    )	dataclass)OptionalListTupleIterator)ABCabstractmethodNc                   d    e Zd ZU dZeed<   eed<   ddedd fdZdefdZddedefd	Z	defd
Z
dS )Positionz2Tracks position in source text for error reportingtextpos   nreturnc                 <    t          | j        | j        |z             S N)r   r   r   selfr   s     k/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/llguidance/gbnf_to_lark.pyadvancezPosition.advance   s    	48a<000    c                 d    | j         t          | j                  k     r| j        | j                  ndS N )r   lenr   r   s    r   currentzPosition.current   s*    &*hTY&?&?ty""RGr   c                 :    | j         | j        | j        |z            S r   )r   r   r   s     r   peekzPosition.peek   s    yDHqL011r   c                     | j                             dd| j                  dz   }| j         t          d| j        dz
            | j                 }| j         | j        | j        dz            }d| dt	          |           dt	          |           S )N
r   r      zline z, z ^ )r   countr   maxrepr)r   line_noprefsuffs       r   __str__zPosition.__str__   s    )//$4844q8yQ2..9:yDHrM12=w==$t**==d===r   N)r   )__name__
__module____qualname____doc__str__annotations__intr   r   r   r)    r   r   r   r   	   s         <<
III	HHH1 1 1Z 1 1 1 1H H H H H2 2c 2# 2 2 2 2> > > > > > >r   r   c                   (     e Zd Zdedef fdZ xZS )GbnfToLarkErrorr   messagec                 b    || _         t                                          | d|            d S )Nz at )r   super__init__)r   r   r4   	__class__s      r   r7   zGbnfToLarkError.__init__!   s6    G..../////r   )r*   r+   r,   r   r.   r7   __classcell__)r8   s   @r   r3   r3       sK        0H 0s 0 0 0 0 0 0 0 0 0 0r   r3   c                   n    e Zd Zedefd            ZdefdZdefdZdefdZ	d	dZ
ded          fdZdS )
ASTNoder   c                     d S r   r1   r   s    r   r)   zASTNode.__str__(   s    r   c                     dS )NTr1   r   s    r   	is_atomiczASTNode.is_atomic,   s    tr   c                 X    t          d |                                 D                       S )Nc              3   >   K   | ]}|                                 V  d S r   )is_terminal).0cs     r   	<genexpr>z&ASTNode.is_terminal.<locals>.<genexpr>0   s*      <<q1==??<<<<<<r   )allchildrenr   s    r   rA   zASTNode.is_terminal/   s'    <<DMMOO<<<<<<r   c                 *    |                                  S r   )r)   r   s    r   top_strzASTNode.top_str2   s    ||~~r   c                     | S r   r1   r   s    r   simplifyzASTNode.simplify5   s    r   c                     g S r   r1   r   s    r   rF   zASTNode.children8   s    	r   N)r   r;   )r*   r+   r,   r	   r.   r)   boolr>   rA   rH   rJ   listrF   r1   r   r   r;   r;   '   s            ^4    =T = = = =       $y/      r   r;   c                   &    e Zd ZU eed<   defdZdS )LiteralNodevaluer   c                     d| j          dS )N")rP   r   s    r   r)   zLiteralNode.__str__@   s     4:    r   Nr*   r+   r,   r.   r/   r)   r1   r   r   rO   rO   <   s:         JJJ! ! ! ! ! ! !r   rO   c                   &    e Zd ZU eed<   defdZdS )	RegexNoderxr   c                     d| j          dS )N/)rV   r   s    r   r)   zRegexNode.__str__H   s    47~~~r   NrS   r1   r   r   rU   rU   D   s:         GGG      r   rU   c                   L    e Zd ZU eed<   dZed         ed<   defdZdefdZ	dS )RuleRefNodenameNRuleNodetargetr   c                 ,    | j         dS | j         j        S NF)r]   rule_is_terminalr   s    r   rA   zRuleRefNode.is_terminalQ   s    ;5{++r   c                 6    | j         | j        S | j         j        S r   )r]   r[   r   s    r   r)   zRuleRefNode.__str__V   s    ;9{r   )
r*   r+   r,   r.   r/   r]   r   rL   rA   r)   r1   r   r   rZ   rZ   L   sj         
III#'FHZ ''',T , , , ,
             r   rZ   c                   j    e Zd ZU eed<   eed<   ee         ed<   dee         fdZdefdZ	de
fdZdS )	RepetitionNodenode	min_times	max_timesr   c                     | j         gS r   )rd   r   s    r   rF   zRepetitionNode.childrenb   s    	{r   c                 B    | j                                         | _         | S r   )rd   rJ   r   s    r   rJ   zRepetitionNode.simplifye   s    I&&((	r   c                 T   t          | j                  }| j                                        sd| d}| j        dk    r| j        | dS | j        dk    r| j        | dS | j        dk    r| j        dk    r| dS | j        t          | j                  nd}| d	| j         d
| dS )N()r   *r   +?r   {,})r.   rd   r>   re   rf   )r   innermax_strs      r   r)   zRepetitionNode.__str__i   s    DIy""$$ 	! LLLE>Q4>#9;;;>Q4>#9;;;>Q4>Q#6#6;;;)-)C#dn%%%774>77G7777r   N)r*   r+   r,   r;   r/   r0   r   r   rF   rJ   r.   r)   r1   r   r   rc   rc   \   s         
MMMNNN}$w-    '    8 8 8 8 8 8 8r   rc   c                   b    e Zd ZU ee         ed<   defdZdefdZ	defdZ
dee         fdZdS )SequenceNodenodesr   c                 \    | j         sdS d                    d | j         D                       S )Nz"" c              3   4   K   | ]}t          |          V  d S r   r.   )rB   rd   s     r   rD   z'SequenceNode.__str__.<locals>.<genexpr>~   s(      99dD		999999r   )rv   joinr   s    r   r)   zSequenceNode.__str__{   s4    z 	4xx99dj999999r   c                     dS r_   r1   r   s    r   r>   zSequenceNode.is_atomic       ur   c                     t          t          | j                            D ])}| j        |                                         | j        |<   *t          | j                  dk    r| j        d         S | S Nr   r   )ranger   rv   rJ   r   is     r   rJ   zSequenceNode.simplify   sd    s4:'' 	5 	5A JqM2244DJqMMtz??a:a= r   c                     | j         S r   )rv   r   s    r   rF   zSequenceNode.children   s
    zr   N)r*   r+   r,   r   r;   r/   r.   r)   rL   r>   rJ   rM   rF   r1   r   r   ru   ru   w   s         =: : : : :
4    '    $w-      r   ru   c                   n    e Zd ZU ee         ed<   defdZdefdZde	fdZ
defdZdee         fdZdS )	AlternativeNodealternativesr   c                 J    d                     d | j        D                       S )Nz
     | c              3   4   K   | ]}t          |          V  d S r   rz   rB   alts     r   rD   z*AlternativeNode.top_str.<locals>.<genexpr>   (      FFSCFFFFFFr   r{   r   r   s    r   rH   zAlternativeNode.top_str   s(    FFD4EFFFFFFr   c                 V    dd                     d | j        D                       z   dz   S )Nrj   z | c              3   4   K   | ]}t          |          V  d S r   rz   r   s     r   rD   z*AlternativeNode.__str__.<locals>.<genexpr>   r   r   rk   r   r   s    r   r)   zAlternativeNode.__str__   s0    UZZFFD4EFFFFFFLLr   c                     dS r_   r1   r   s    r   r>   zAlternativeNode.is_atomic   r}   r   c                     t          t          | j                            D ])}| j        |                                         | j        |<   *t          | j                  dk    r| j        d         S | S r   )r   r   r   rJ   r   s     r   rJ   zAlternativeNode.simplify   ss    s4,--.. 	C 	CA#'#4Q#7#@#@#B#BDa  t !!Q&&$Q''r   c                     | j         S r   r   r   s    r   rF   zAlternativeNode.children   s      r   N)r*   r+   r,   r   r;   r/   r.   rH   r)   rL   r>   rJ   rM   rF   r1   r   r   r   r      s         w-G G G G GM M M M M4    '    !$w- ! ! ! ! ! !r   r   c                   d    e Zd ZU eed<   eed<   eed<   dZeed<   dZde	e         fdZ
defd	Zd
S )r\   r[   r   commentFr`   r   r   c                     | j         gS r   r   r   s    r   rF   zRuleNode.children   s    !""r   c                 V    | j          | j         d| j                                         S )Nz: )r   r[   r   rH   r   s    r   r)   zRuleNode.__str__   s.    ,J	JJT->-F-F-H-HJJJr   N)r*   r+   r,   r.   r/   r;   r`   rL   orderr   rF   r)   r1   r   r   r\   r\      s         
IIILLL"d"""E#$w- # # # #K K K K K K Kr   r\   c                      e Zd ZddZdedeeef         fdZdede	eef         fdZ
dede	eef         fdZdede	eef         fd	Zedede	eef         fd
            Zedede	eef         fd            ZdededefdZededefd            Zededefd            Zdede	eef         fdZdedede	eef         fdZdedede	eef         fdZdedede	eef         fdZdedee         defdZdS )GrammarParserr   Nc                     d| _         d S r   )curr_commentr   s    r   r7   zGrammarParser.__init__   s    r   r   c                 D   t          |d          }|                     |d          }g }|                                rX|                     |          \  }}|                    |           |                     |d          }|                                Xd |D             S )Nr   Tallow_newlinesc                     i | ]
}|j         |S r1   )r[   )rB   rules     r   
<dictcomp>z'GrammarParser.parse.<locals>.<dictcomp>   s    222D	4222r   )r   _skip_spacer   _parse_ruleappend)r   r   r   rulesr   s        r   parsezGrammarParser.parse   s    tQs488 "kkmm 	=((--ID#LL""3t"<<C kkmm 	=
 32E2222r   r   c                    dt           dt          fd}|                                dk    r|                    d          d         st	          |d          |                                }|                                }|dv rd|z   |                                fS |d	k    rj|                    d
          dd
         }t          |          dk    s ||          st	          |d|           |                    d
          }d| |fS |dk    r}|                    d          dd         }t          |          dk    s ||          st	          |d|           |                    d          }d|                    d           |fS |dk    r}|                    d          dd         }t          |          dk    s ||          st	          |d|           |                    d          }d|                    d           |fS t	          |d|           |                                dk    rt	          |d          |                                |                                fS )Nsr   c                 4    t          d | D                       S )Nc              3      K   | ]}|d v V  	dS )0123456789abcdefABCDEFNr1   )rB   chs     r   rD   z@GrammarParser._parse_char.<locals>.is_all_hex.<locals>.<genexpr>   s(      BB"r55BBBBBBr   )rE   )r   s    r   
is_all_hexz-GrammarParser._parse_char.<locals>.is_all_hex   s    BBBBBBBBr   \   r   zIncomplete escape sequencez"\[]nrtx   zInvalid \x escape sequence: \xz\xu      zInvalid \u escape sequence: \uz\u0U	      zInvalid \U escape sequence: \Uz\UzInvalid escape sequence \r   zUnexpected end of input)r.   rL   r   r   r3   r   r   lstrip)r   r   r   rC   	hex_values        r   _parse_charzGrammarParser._parse_char   s   	C# 	C$ 	C 	C 	C 	C ;;==D  88A;;q> I%c+GHHH++--CAJax..cHHQKK!,	y>>Q&&jj.C.C&)K	KK   kk!nn(Y((#--cHHQKK!,	y>>Q&&jj.C.C&)K	KK   kk!nn4Y--c2244c99cHHQKK!,	y>>Q&&jj.C.C&)K	KK   kk!nn4Y--c2244c99%c+K+K+KLLL[[]]b  !#'@AAA{{}}ckkmm++r   c                    |                                 dk    rt          |d          d}|                                }	 |                     |          \  }}|dv r	|d|z   z  }n||z  }|dk    rn2t	          |          |fS )N[zExpected '['Tz/[r   ])r   r3   r   r   rU   r   r   rrC   s       r   _parse_char_classzGrammarParser._parse_char_class   s    ;;==C!#~666kkmm	%%c**FAsDyyTAXQCxx	 ||S  r   c                     |                                 dk    rt          |d          |                                }d}	 |                     |          \  }}|dk    rn||z  }%t	          |          |fS )NrR   zExpected '"'r   )r   r3   r   r   rO   r   s       r   _parse_literalzGrammarParser._parse_literal  s    ;;==C!#777kkmm	%%c**FAsCxxFA		 1~~s""r   c                 J   | j         }t                              |                                           r@|                                 } t                              |                                           @| j         |k    rt          | d          | j        || j                  | fS )NzExpected name)r   r   _is_word_charr   r   r3   r   r   starts     r   _parse_namezGrammarParser._parse_name  s    ))#++--88 	 ++--C ))#++--88 	 7e!#777x(#--r   c                 L   | j         }|                                                                 r:|                                 } |                                                                 :| j         |k    rt	          | d          t          | j        || j                            | fS )NzExpected integer)r   r   isdigitr   r3   r0   r   r   s     r   
_parse_intzGrammarParser._parse_int  s    kkmm##%% 	 ++--C kkmm##%% 	 7e!#'9:::38ECGO,--s22r   r   c                    |                                 r5|                                 dv r|                                }n|r1|                                 dv rt                              |          }n|                                 dk    r|                                }d}|                                 rk|                                 dvrU||                                 z  }|                                }|                                 r|                                 dvU| xj        |dz   z  c_        nn|                                 5|S )Nz 	
#z//r!   )r   r   r   _skip_newliner   )r   r   r   cmts       r   r   zGrammarParser._skip_space!  s&   kkmm 	{{}}%%kkmm 
CKKMMV$;$;#11#66#%%kkmmkkmm (V(C(C3;;==(C++--C kkmm (V(C(C !!S4Z/!!! kkmm 	 
r   c                    |                                  dk    rA|                                 } |                                  dk    r|                                 } n,|                                  dk    r|                                 } | S )Nr!   )r   r   )r   s    r   r   zGrammarParser._skip_newline2  sh    ;;==D  ++--C{{}}$$kkmm[[]]d""++--C
r   rC   c                 B    |                                  p| dk    p| dk    S )N-_)isalnum)rC   s    r   r   zGrammarParser._is_word_char<  s"    yy{{2a3h2!s(2r   c                    |                      |          \  }}|                     |d          }|                    d          dk    rt          |d          |                    d          }|                     |d          }|                     |d          \  }}|                     |          }| j        }d| _        t          |||          |fS )	NFr   r   z::=zExpected ::=T	is_nestedr   )	r   r   r   r3   r   _parse_alternativesr   r   r\   )r   r   r[   r   r   s        r   r   zGrammarParser._parse_rule@  s    $$S))	cs59988A;;%!#~666kk!nns488 44SE4JJc  %%lC00#55r   r   c                 >   g }	 |                      ||          \  }}|                    |           |                     ||          }|                                dk    rn,|                                }|                     |d          }t          |          |fS )NTr   |)_parse_sequencer   r   r   r   r   )r   r   r   r   sequences        r   r   z!GrammarParser._parse_alternativesP  s     ')		= 00i@@MHc)))""3y"AAC{{}}##++--C""3t"<<C		= |,,c11r   c                    g }|                                 r?|                                 dvr(|s|                                 dvr|                                 dk    r/|                     |          \  }}|                    |           n@|                                 dk    r.|                     |          \  }}|                    |           n|                                 dk    r0|                     ||          \  }}|                    |           n|                                 dk    r7|                    t          d                     |                                }nc|                     |                                           r;|                     |          \  }}|                    t          |                     nn| 
                    ||          }|                     ||          }| 
                    ||          }|                                 r0|                                 dvr||                                 dvt          |          |fS )	Nz|)r   rR   r   rj   r   .r   )r   r   r   r   _parse_grouprU   r   r   r   rZ   r   _parse_repetitionru   )r   r   r   rv   rd   r[   s         r   r   zGrammarParser._parse_sequenceb  s6     " KKMM	BT)) *!kkmm699{{}}## //44	cT""""#%% 22377	cT""""#%% --cY-GG	cT""""#%%Ys^^,,,kkmm##CKKMM22  ,,S11	c[..////""3y"AAC((e44C""3y"AAC1 KKMM	BT)) *!kkmm6990 E""C''r   c                    |                                 dk    rt          |d          |                                }|                     |d          }|                     |d          \  }}|                                 dk    rt          |d          |                                }||                     ||          fS )Nrj   zExpected '('Tr   rk   zExpected ')')r   r3   r   r   r   )r   r   r   r   s       r   r   zGrammarParser._parse_group  s    ;;==C!#~666kkmmsD)) 44SD4IIc;;==C!#~666kkmmT--c9====r   rv   c                    |s|S |                                 dk    r.t          |d         dd           |d<   |                                S |                                 dk    r.t          |d         dd           |d<   |                                S |                                 dk    r.t          |d         dd          |d<   |                                S |                                 dk    r|                                }|                     |d          }|                     |          \  }}|                     |d          }|                                 d	k    r.t          |d         ||          |d<   |                                S |                                 d
k    r|                     |                                d          }d }|                                                                 r|                     |          \  }}|                     |d          }|                                 d	k    rt          |d          t          |d         ||          |d<   |                                S t          |d          |S )Nrl   r   rm   r   rn   ro   Trq   rp   zExpected '}'zExpected ',' or '}')r   rc   r   r   r   r   r3   )r   r   rv   re   rf   s        r   r   zGrammarParser._parse_repetition  s1    	J;;==C&uRy!T::E"I;;== [[]]c!!&uRy!T::E"I;;== [[]]c!!&uRy!Q77E"I;;== [[]]c!!++--C""3--C!__S11NIs""3--C{{}}##*59iKKb	{{}}$#%%&&s{{}}d;; 	;;==((** :%)__S%9%9NIs&&sD11;;==C'')#~>>>*59iKKb	{{}}$%c+@AAA
r   r   N)r*   r+   r,   r7   r.   dictr\   r   r   r   r   r;   r   r   staticmethodr   r0   r   rL   r   r   r   r   r   r   ru   r   r   r   r   r1   r   r   r   r      s          
3# 
3$sH}"5 
3 
3 
3 
3(,x (,E#x-,@ (, (, (, (,T!X !%8I2J ! ! ! !"#( #uWh5F/G # # # # . .eCM&: . . . \. 3 3U3=%9 3 3 3 \3x  (    " 8     \ 3 3 3 3 3 \36x 6E(H2D,E 6 6 6 6 22(,2	(	)2 2 2 2$ ( ((, (	|X%	& (  (  (  (D> >T >eGXDU>V > > > >#X #d7m # # # # # # #r   r   r   r   c                 H    dt           dt          dd f fd}t                                                     D ]*\  }}||_        |j                                        |_        +dt          dt          t                   ffd                                 D ]X} |          D ]J}t          |t                    r3|j         vrt          d|j         d           |j                 |_        KYd	 vrt          d
           | d	         d           d}|dk    rVd}                                 D ]9}|j        dk    r,|j        s%|j                                        rd|_        |dz  }:|dk    Vt!                                                     D ]}|j                            dd          }t%          j        dd|                                          }|j        r|                                }n|                                }|j        |k    r |||           d S )Nr   r[   r   c                 ^    |v rt          d| d          | j        = || _        | |<   d S )NRule 'z' already exists)	Exceptionr[   )r   r[   r   s     r   renamezresolve.<locals>.rename  sC    5==;T;;;<<<!&Mdr   rd   c              3   d   K   |                                  D ]}|V   |          E d {V  d S r   )rF   )rd   rC   all_childrens     r   r   zresolve.<locals>.all_children  sT       	' 	'AGGG#|A&&&&&&&&	' 	'r   r   z' not foundrootzNo 'root' rule foundr   r   r   Tr   r   z([a-z])([A-Z])z\1_\2)r\   r.   	enumeratevaluesr   r   rJ   r;   r   
isinstancerZ   r[   r   r]   r`   rA   rM   replaceresublowerupper)r   r   r   r   rd   num_fixnew_namer   s   `      @r   resolver     s~   ( # $       %,,..)) 3 310022'7 'x'8 ' ' ' ' ' '
 \\^^ / / LOO 	/ 	/D$,, /9E))#$CTY$C$C$CDDD#DI.		/ U.///
F5='"""G
A++ 	 	A'!!* "N..00 " &*"1 A++ %,,..!! 	  	 6>>#s++6+Xx@@FFHH 	(~~''HH~~''H6XF1h	  	 r   r   c                 F   t                      }|                    |           }t          |           t          |                                          }|                    d            d}d}|D ]/}t          |          }|s	d|v r|dz  }||dz   z  }d|v }|r|dz  }0|S )z<
    Convert a GBNF (llama.cpp) grammar to Lark syntax.
    c                     | j         S r   )r   )r   s    r   <lambda>zgbnf_to_lark.<locals>.<lambda>  s    QW r   )keyz%llguidance {}

Tr!   )r   r   r   rM   r   sortr.   )r   parserr   rlistresprev_nlr   r   s           r   gbnf_to_larkr    s     __FLLEENNN  E	JJ$$J%%%
CG  FF 	41994KCq4x!) 	4KCJr   c                 0    t          j        d|           duS )z6
    Check if the text is already in Lark syntax.
    z!(?m)^\s*(%llguidance\b|start\s*:)N)r   searchr   s    r   is_lark_syntaxr	    s     994@@LLr   c                 B    t          |           r| S t          |           S )z5
    Convert a grammar to Lark syntax if needed.
    )r	  r  r  s    r   any_to_larkr  	  s'     d r   c                      dd l } dd ldt          dd ffd}t          | j                  dk     r$t          d           |                     d           | j        dd          D ]} ||           d S )Nr   fnr   c                    t          |  ddd           t          |           5 }|                                }d d d            n# 1 swxY w Y   t          |          }j                            |           d         dz   }t          |d          5 }|                    |           d d d            n# 1 swxY w Y   t          d           d S )	Nz... r   T)endflushr   z.larkwOK)printopenreadr  pathsplitextwrite)r  fr   larkfn_larkoss        r   process_filezmain.<locals>.process_file  s5   kkkr...."XX 	6688D	 	 	 	 	 	 	 	 	 	 	 	 	 	 	D!!'""2&&q)G3'3 	1GGDMMM	 	 	 	 	 	 	 	 	 	 	 	 	 	 	es#   AAAB66B:=B:r   z*Usage: gbnf_to_lark.py <file1> <file2> ...r   )sysr  r.   r   argvr  exit)r  r  r  r  s      @r   mainr!    s    JJJIII        38}}q:;;;hqrrl  R r   __main__r   ) dataclassesr   typingr   r   r   r   abcr   r	   r   r   r   r3   r;   rO   rU   rZ   rc   ru   r   r\   r   r   r.   r   r  rL   r	  r  r!  r*   r1   r   r   <module>r&     si   " ! ! ! ! ! 2 2 2 2 2 2 2 2 2 2 2 2 # # # # # # # # 				 > > > > > > > >,0 0 0 0 0i 0 0 0    c   * ! ! ! ! !' ! ! !                  '       8 8 8 8 8W 8 8 84     7   , ! ! ! ! !g ! ! !. K K K K Kw K K K@ @ @ @ @ @ @ @F1 4X& 1 4 1  1  1  1 hs s    ,M M M M M Mc c       * zDFFFFF r   