
    .`iDS                        d dl mZ d dlmZmZmZmZ d dlmZ	 d dl
mZmZ d dlmZ d dlmZmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZ ddlmZ erd dlm Z  	 d dl!m"Z" n# e#$ r	 d dl!m$Z" Y nw xY w ee%          Z&d&dZ'd&dZ(	 	 	 d'de)d         de)e*e+ef                  dz  de,de,de-e)d         e)e*e+ef                  dz  f         f
dZ.d(dZ/d d!d"e+e0z  de1fd#Z2 G d$ d%e          Z3dS ))    )Path)TYPE_CHECKINGAnycastoverload)ChatCompletionRequest)FunctionTool)ValidationMode)SpecialTokenPolicySpecialTokens)InstructTokenizerV13)SentencePieceTokenizer)
Tekkenizer)ChatCompletionMessageParam)init_logger   )TokenizerLike)BatchEncodingMistralCommonBackendMistralCommonTokenizerrequestMistralChatCompletionRequestc                 P   t          | j                  D ]\  }}|                    d          dk    rr|                    dd                                          }g }	 	 t	          |          }|                    |           n# t          $ r Y nw xY w7|| j        |         d<   d S )Nrole	assistant
tool_calls )	enumeratemessagesget__iter__nextappendStopIteration)r   imessagetool_calls_validatorvalidated_tool_calls	tool_calls         k/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/tokenizers/mistral.pymaybe_serialize_tool_callsr.   *   s    .   011 E E
7;;v+--#*;;|R[[]]#K#K #%  $%9 : :I(//	::::$   E	 1EGQ-E Es   $B
BBc           	      h   t          | j                  D ]\  }}|                    d          dk    r|                    dg           }|D ]^}t          |d                   dk    rCt                              d|d         |d         dd                    |d         dd         |d<   _|| j        |         d<   |                    d          d	v r]d
|v rY|d
         }t          |          dk    r.t                              d||dd                    |dd         }|| j        |         d
<   dS )z6Truncates tool call IDs for Mistral's ID requirements.r   r   r   id	   z!Truncating tool call ID: %s to %siN>   tooltool_resultstool_call_idz!Truncating tool_call_id: %s to %s)r!   r"   r#   lenloggerwarning)r   r(   r)   r   r,   r4   s         r-   truncate_tool_call_idsr8   O   sg    011 C C
7;;v+-- \266J' ; ;	y''!++NN;!$!$,  
 '0obcc&:IdO0:GQ--[[  $<<<((&~6|$$q((NN;$$RSS)  
 $0#4L6B #N33C C    NFr"   r   toolscontinue_final_messageadd_generation_promptreturnc                 8   |r|rt          d          t          t          t          t          f         | d                   }|r|d         dk    rt          d          |r|d         dk    rt          d          | D ]}|                    dd           }|rd |D             D ]6}|                    d	          i |d	<   |                    d
          d|d
<   7t          t          j	        
                                          }t          t          j	        
                                          }	|D ]}
t          |

                                          }|D ]}||vr3|
                    |           t                              d| d           |
d         dk    rjt          |
d         
                                          }|D ]?}||	vr9|
d                             |           t                              d| d           @t          d          | |fS )NzMCannot set both `add_generation_prompt` and `continue_final_message` to True.r   r   zCannot set `add_generation_prompt` to True when the last message is from the assistant. Consider using `continue_final_message` instead.z\Cannot set `continue_final_message` to True when the last message is not from the assistant.	reasoningc                 6    g | ]}|d          dk    |d         S )typefunctionr    ).0r2   s     r-   
<listcomp>zC_prepare_apply_chat_template_tools_and_messages.<locals>.<listcomp>   s2     
 
 
!%$v,*2L2LD2L2L2Lr9   
parametersdescription 'z[' is not supported by mistral-common for tools. It has been poped from the tool definition.rB   rC   zh' is not supported by mistral-common for function tools. It has been poped from the function definition.z,mistral-common only supports function tools.)
ValueErrorr   dictstrr   popr#   setr
   model_fieldskeysr	   listr6   warning_once)r"   r:   r;   r<   last_messager)   _rC   tools_fieldsfunction_fieldsr2   	tool_keystool_keyfunction_keysfunction_keys                  r-   /_prepare_apply_chat_template_tools_and_messagesr[   m   s     
!7 
0
 
 	

 S#X55L
  
f!5!D!D6
 
 	

  
,v"6+"E"E:
 
 	
  + +KKT**
  "U
 
).
 
 
 	- 	-H ||L))1)+&||M**2*,'
 4,113344h388::;; 	U 	UDTYY[[))I% U U<//HHX&&&''FH F F F   <:--$(j)9)>)>)@)@$A$AM(5  '>> ,00>>>"//!7L !7 !7 !7   %%STTT%U( U?r9   r   c                 @    | j         | j        t          d          d S )Nz6chat_template is not supported for Mistral tokenizers.)chat_templatechat_template_kwargsrJ   )r   s    r-   validate_request_paramsr_      s+    (G,H,TQRRR -U,Tr9   	tokenizerr   tc                    t          | t                    sJ t          |                       t          |t                    s|                    d          n|}| j        }	 || j        |         z   S # t          $ rR |                    d          }|| j	        v r| j	        |         cY S t                              d|           | j        cY S w xY w)Nzutf-8z6Failed to convert token %s to id, replacing with <unk>)
isinstancer   rB   bytesencodenum_special_tokens_tekken_token2id_nospecialKeyErrordecode_special_tokens_reverse_vocabr6   r7   unk_id)r`   ra   t_bytesshiftt_strs        r-   _tekken_token_to_idro      s    i,,==d9oo==,'1!U';';BahhwG(E	 y;GDDD      w''I;;;:5AAAADg	
 	
 	
  s   !A1 15C("CCc                   h    e Zd Zedddddeez  dededz  dedz  dd f
d	            Zd8 fdZde	e
         fdZde	e
         de	e         fdZde
fdZede	e         fd            Zede	e
         fd            Zede
fd            Zede
fd            Zede
fd            Zedefd            Zede
fd            Zede
fd            Zedefd            Zde
defdZde
fdZde
fdZ	 	 	 	 d9dee	e         z  d edz  d!ed"ed#e
dz  dd$fd%Zede	e         fd&            Zdeee
f         fd'Zdeee
f         fd(Z	 	 	 d:ded"edz  d#e
dz  d!ede	e
         f
d)Z 	 d;d*e	d+         d,e	eee!f                  dz  de	e
         fd-Z"d<d.e	e
         e
z  d/edefd0Z#	 d<d.e	e	e
                  e	e
         z  d/edefd1Z$e%d2ede
fd3            Z&e%d2e	e         de	e
         fd4            Z&d2ee	e         z  de
e	e
         z  fd5Z&d2e	e         defd6Z'	 d<d.e	e
         d/ede	e         fd7Z( xZ)S )=MistralTokenizerFN)trust_remote_coderevisiondownload_dirpath_or_repo_idrr   rs   rt   r=   c                    	 ddl m} n# t          $ r	 ddl m} Y nw xY w |j        |g|R t
          j        ||dn|d|} | |          S )Nr   r   r   main)mode	cache_dirrs   )(transformers.tokenization_mistral_commonr   ImportErrorr   from_pretrainedr   test)	clsru   rr   rs   rt   argskwargsr   r`   s	            r-   r|   z MistralTokenizer.from_pretrained   s    	UUUUUUU 	 	 	       	 9(8

 
  $"'/VVX
 
 
 
	 s9~~s   	 r`   r   c                     t                                                       | _        |j         _         j        j         _         j        j         _         j        j        j        }|t          j
        k    rt          d          t           j        j        j                  }t          |                    d          d                    _        t#           j        t$                     _        t#           j        t(                     _         j        s+ j        s$t-          dt/           j                              fdt1           j        dz
  dd          D              _        t7          t9           j                                        d                      _         j                                         _         j        dz
   _          !                                 _"        tG           j"                   _$         %                     j"                   _&        tG           j&                   _'        d S )	NzzMistral tokenizer must be in test mode. Make sure to set `mode='ValidationMode.test'` when creating the Mistral tokenizer.vr?   zUnsupported tokenizer: c                 N    i | ]!}                     |gd           d         |"S )Fskip_special_tokensr   )convert_ids_to_tokensrD   r(   selfs     r-   
<dictcomp>z-MistralTokenizer.__init__.<locals>.<dictcomp>	  sE     
 
 
 &&s&FFqI1
 
 
r9   r   c                     | d         S )Nr   r    )xs    r-   <lambda>z+MistralTokenizer.__init__.<locals>.<lambda>  s    qQRt r9   )key)(super__init__transformers_tokenizerr`   mistralinstruct_tokenizerinstruct"_chat_completion_request_validator_moder   r}   rJ   rL   versionvalueintsplitrc   r   	is_tekkenr   is_spm	TypeErrorrB   range
vocab_size_vocab_dictrK   sorteditemsvocab_vocab_max_token_id_get_special_token_ids_special_token_idsrN   _special_token_ids_set_get_special_tokens_special_tokens_special_tokens_set)r   r`   rx   _mistral_version_str	__class__s   `   r-   r   zMistralTokenizer.__init__   s   &/# *70|>D>&&&%    #4>#9#?@@ 4 : :3 ? ? CDD#DNJ?? 1GHH 	N$+ 	NLd4>6J6JLLMMM
 
 
 
4?Q.B77
 
 

  t'7'='='?'?^^ T T TUU n**,,!_q0 #'"="="?"?&)$*A&B&B##778OPP#&t';#<#<   r9   c                 ^      fdt          t           j                            D             S )Nc                 H    g | ]}j                             |          |S r    )r`   
is_specialr   s     r-   rE   z;MistralTokenizer._get_special_token_ids.<locals>.<listcomp>  s/    SSSadn6O6OPQ6R6RSSSSr9   )r   r5   r   r   s   `r-   r   z'MistralTokenizer._get_special_token_ids  s/    SSSS5T[!1!122SSSSr9   all_special_idsc                        fd|D             S )Nc                 ^    g | ])}j                             |gt          j                   *S ))special_token_policy)r`   ri   r   KEEPr   s     r-   rE   z8MistralTokenizer._get_special_tokens.<locals>.<listcomp>  sD     
 
 
 N!!1#<N<S!TT
 
 
r9   r    )r   r   s   ` r-   r   z$MistralTokenizer._get_special_tokens  s.    
 
 
 
$
 
 
 	
r9   c                 F    t          |                     d                    S )NrH   )r5   re   r   s    r-   num_special_tokens_to_addz*MistralTokenizer.num_special_tokens_to_add#  s    4;;r??###r9   c                     | j         S N)r   r   s    r-   all_special_tokensz#MistralTokenizer.all_special_tokens(  s    ##r9   c                     | j         S r   )r   r   s    r-   r   z MistralTokenizer.all_special_ids,  s    &&r9   c                     | j         j        S r   )r`   bos_idr   s    r-   bos_token_idzMistralTokenizer.bos_token_id0      ~$$r9   c                     | j         j        S r   )r`   eos_idr   s    r-   eos_token_idzMistralTokenizer.eos_token_id4  r   r9   c                     | j         j        S r   )r`   pad_idr   s    r-   pad_token_idzMistralTokenizer.pad_token_id8  r   r9   c                     dS )NTr    r   s    r-   is_fastzMistralTokenizer.is_fast<  s    tr9   c                     | j         j        S r   )r   r   r   s    r-   r   zMistralTokenizer.vocab_size@  s    *55r9   c                     | j         S r   )r   r   s    r-   max_token_idzMistralTokenizer.max_token_idD  s    !!r9   c                     | j         j        S r   )r   truncation_sider   s    r-   r   z MistralTokenizer.truncation_sideH  s    *::r9   token_idc                     || j         v S r   )r   )r   r   s     r-   _is_special_token_idz%MistralTokenizer._is_special_token_idL  s    4666r9   c                 :    t          t          |                     S r   )hashr0   r   s    r-   __hash__zMistralTokenizer.__hash__O  s    BtHH~~r9   c                     | j         S r   )r   r   s    r-   __len__zMistralTokenizer.__len__R  s
    r9   Ttext	text_pairadd_special_tokens
truncation
max_lengthr   c                 (   |t          d          |                     |||||          }|d         r^|d         d         | j        k    rG|d                             d           |                    d          x}r|                    d           |S )Nz<`text_pair` is not supported by `MistralTokenizer.__call__`.)r   r   r   r   r   	input_idsr?   attention_mask)rJ   r   r   rM   r#   )r   r   r   r   r   r   encodedr   s           r-   __call__zMistralTokenizer.__call__U  s      N   --1!! . 
 
 ; 	'GK$8$<@Q$Q$QK $$R(((!(-=!>!>>~ '""2&&&r9   c                     | j         S r   )r   r   s    r-   r   zMistralTokenizer.vocabs  s
    {r9   c                     | j         S r   )r   r   s    r-   	get_vocabzMistralTokenizer.get_vocabw  s    r9   c                     i S r   r    r   s    r-   get_added_vocabz MistralTokenizer.get_added_vocabz  s    	r9   c                 `    | j                             ||d          }|dur|
|d |         S |S )NF)boseos)r`   re   )r   r   r   r   r   r   s         r-   re   zMistralTokenizer.encode~  sE     .''2D%'PPU""z'=;J;''Nr9   r"   r   r:   c                 x   |                     dd          }|                    dd          }|                    dd          }|                    dd          }|                    dd          }|                    d          }	t          ||||          \  }}| j                            |||||||	d d		  	        S )
Nr<   Fr;   tokenizeTpaddingr   r   )	conversationr:   r;   r   r   r   r   return_tensorsreturn_dict)rM   r#   r[   r   apply_chat_template)
r   r"   r:   r   r<   r;   r   r   r   r   s
             r-   r   z$MistralTokenizer.apply_chat_template  s     !'

+BE J J!',De!L!L::j$//**Y..ZZe44
ZZ--
Ie35J
 
% *>>!#9!! ? 

 

 
	
r9   idsr   c                 j    t          |t                    r|g}| j                            ||          S Nr   )rc   r   r   ri   r   r   r   s      r-   ri   zMistralTokenizer.decode  sC     c3 	%C*11%8 2 
 
 	
r9   c                 :    | j                             ||          S r   )r   batch_decoder   s      r-   r   zMistralTokenizer.batch_decode  s*     *77%8 8 
 
 	
r9   tokensc                     d S r   r    r   r   s     r-   convert_tokens_to_idsz&MistralTokenizer.convert_tokens_to_ids  s    9<r9   c                     d S r   r    r   s     r-   r   z&MistralTokenizer.convert_tokens_to_ids  s    EHSr9   c                 6    | j                             |          S r   )r   r   r   s     r-   r   z&MistralTokenizer.convert_tokens_to_ids  s    *@@HHHr9   c                 z    t           j        h j        rt           j        t
                    sJ t           j                               fd|D             }t          d |D                       r5 fd|D             } j                            |t          j
                  }nd                    |          }nt           j        t                    sJ t           j                              g }g }d}|D ]m}|v rR|r:|                     j                            |t          j                             g }|                    |           X|                    |           n|r8|                     j                            |t          j                             d                    |          }|S )Nc                 .    g | ]}|v s	|j         v|S r    )r   )rD   ra   r   to_decode_special_tokenss     r-   rE   z=MistralTokenizer.convert_tokens_to_string.<locals>.<listcomp>  s<       111Qd>V5V5V 5V5V5Vr9   c              3   @   K   | ]}t          |t                    V  d S r   )rc   rd   rD   ra   s     r-   	<genexpr>z<MistralTokenizer.convert_tokens_to_string.<locals>.<genexpr>  s,      88A:a''888888r9   c                 :    g | ]}t          j        |          S r    )ro   r`   )rD   ra   r   s     r-   rE   z=MistralTokenizer.convert_tokens_to_string.<locals>.<listcomp>  s&    NNN!*4>1==NNNr9   rH   )r   r   r   rc   r`   r   rB   anyri   r   r   joinr   r&   IGNORE)r   r   r   decodedregular_tokensdecoded_listtokenr   s   `      @r-   convert_tokens_to_stringz)MistralTokenizer.convert_tokens_to_string  s   $1$<#= > ,	,dnj99OO4;O;OOO9      F 8888888 *NNNNvNNN .//5G5LMM''&// dn.DEE  tH H  E )+N&(LG 1 1444% ,$++ N11 .0B0I   
 *, ''...."))%0000 ##N)).:L:STT   ggl++Gr9   c                     |s fd|D             S  j                             t          j                  ht	           j        t                    rV j        j        r                     j        j                    j        j	        r                     j        j	                    fd|D             } fd|D             }t          d |D                       r j        r fd|D             }|S )Nc                 D    g | ]}j                             |          S r    r`   id_to_piecerD   r   r   s     r-   rE   z:MistralTokenizer.convert_ids_to_tokens.<locals>.<listcomp>  s)    MMMXDN..x88MMMr9   c                 F    g | ]}|v s                     |          |S r    )r   )rD   r(   non_skip_special_tokens_idsr   s     r-   rE   z:MistralTokenizer.convert_ids_to_tokens.<locals>.<listcomp>  sC     
 
 
///t7P7PQR7S7S/ ///r9   c                 D    g | ]}j                             |          S r    r  r  s     r-   rE   z:MistralTokenizer.convert_ids_to_tokens.<locals>.<listcomp>  s)    PPP8$.,,X66PPPr9   c              3      K   | ]}d |v V  	dS )u   �Nr    r   s     r-   r   z9MistralTokenizer.convert_ids_to_tokens.<locals>.<genexpr>  s&      **auz******r9   c                     g | ]V}|j         vr%j                            |t          j                  n%j                            |gt          j                  WS r    )r   r`   id_to_byte_piecer   r   ri   r  s     r-   rE   z:MistralTokenizer.convert_ids_to_tokens.<locals>.<listcomp>  so         4#>>> //:L:QRRR^**H:7I7NOO  r9   )r`   get_control_tokenr   r   rc   r   r   BEGIN_THINKadd	END_THINKr  r   )r   r   r   ids_keptr   r  s   `    @r-   r   z&MistralTokenizer.convert_ids_to_tokens  sQ   
 # 	NMMMMMMMM N,,]-EFF'
# dm%9:: 	I}( K+//0IJJJ}& I+//0GHHH
 
 
 
 

 
 
 QPPPxPPP**6***** 	t~ 	    !)	  F r9   )r`   r   r=   N)NTFN)NNTr   )F)*__name__
__module____qualname__classmethodrL   r   boolr|   r   rQ   r   r   r   r   propertyr   r   r   r   r   r   r   r   r   r   r   r   r   r   rK   r   r   re   r   r   ri   r   r   r   r  r   __classcell__)r   s   @r-   rq   rq      s       
 #(##'  t  	
 * Dj 
   [:(= (= (= (= (= (=TTS	 T T T T
49 
c 
 
 
 
$3 $ $ $ $
 $DI $ $ $ X$ 'c ' ' ' X' %c % % % X% %c % % % X% %c % % % X%     X 6C 6 6 6 X6 "c " " " X" ; ; ; ; X;7S 7T 7 7 7 7#         !%#' !% DIo : !	
  $J 
   < tCy    X 4S>        c3h     #'!%#'  4K $J	
 ! 
c   & .2
 
34
 DcN#d*

 
c
 
 
 
:
 
$s)c/ 
 
QT 
 
 
 
 MR
 
S	?T#Y.
EI
	
 
 
 
 <C<C<<< X<HDIH$s)HHH XHIC$s)O Id3i I I I I0tCy 0S 0 0 0 0j %*( (#Y( "( 
c	( ( ( ( ( ( ( (r9   rq   )r   r   )NFF)r   r   )4pathlibr   typingr   r   r   r   (mistral_common.protocol.instruct.requestr   r   +mistral_common.protocol.instruct.tool_callsr	   r
   *mistral_common.protocol.instruct.validatorr   %mistral_common.tokens.tokenizers.baser   r   )mistral_common.tokens.tokenizers.instructr   .mistral_common.tokens.tokenizers.sentencepiecer   'mistral_common.tokens.tokenizers.tekkenr   vllm.entrypoints.chat_utilsr   0vllm.entrypoints.openai.chat_completion.protocolvllm.loggerr   protocolr   transformersr   rz   r   r{   r   r  r6   r.   r8   rQ   rK   rL   r  tupler[   r_   rd   r   ro   rq   r    r9   r-   <module>r/     s         5 5 5 5 5 5 5 5 5 5 5 5      G F F F F F F F E E E E E E        K J J J J J      ? > > > > > B B B B B B R R R R R R # # # # # # # # # # # # 

******
QQQQQQQ 
 
 
	
 	
 	
 	
 	
 	
 	
 	

 
X		"E "E "E "EJC C C C@ *.#("'	K K/0KS#X$&K !K  	K
 4,-tDcN/Cd/JJKK K K K\S S S S
 <  C%K  C        "N N N N N} N N N N Ns   "A) )A76A7