
    .`i                         U d dl Z d dlZd dlmZ d dlmZ d dlmZmZm	Z	 d dl
mZ ddlmZ ee	z  Zeed<   d	ed
efdZ G d de          ZdS )    N)Path)	TypeAlias)AutoTokenizerPreTrainedTokenizerPreTrainedTokenizerFast))get_sentence_transformer_tokenizer_config   )TokenizerLikeHfTokenizer	tokenizerreturnc                     t          j                    } j         j                                         t	                     t                                                    t           d          rFt          j	        t                    5  t           j                  ddd           n# 1 swxY w Y    G  fdd j                  }d j        j         |_        ||_        |S )z
    By default, transformers will recompute multiple tokenizer properties
    each time they are called, leading to a significant slowdown.
    This proxy caches these properties for faster access.
    
vocab_sizeNc                       e Zd Zedee         ffd            Zedee         ffd            Zedef fd            Z	de
eef         ffdZdeffdZfdZdS )	-get_cached_tokenizer.<locals>.CachedTokenizerr   c                     S N )selftokenizer_all_special_idss    f/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/tokenizers/hf.pyall_special_idsz=get_cached_tokenizer.<locals>.CachedTokenizer.all_special_ids(   s	    ,,    c                     S r   r   )r   tokenizer_all_special_tokenss    r   all_special_tokensz@get_cached_tokenizer.<locals>.CachedTokenizer.all_special_tokens,   s	    //r   c                     S r   r   )r   max_token_ids    r   r   z:get_cached_tokenizer.<locals>.CachedTokenizer.max_token_id0   s	    r   c                     S r   r   )r   tokenizer_vocabs    r   	get_vocabz7get_cached_tokenizer.<locals>.CachedTokenizer.get_vocab4   s	    ""r   c                     S r   r   )r   tokenizer_lens    r   __len__z5get_cached_tokenizer.<locals>.CachedTokenizer.__len__7   s	      r   c                     t           ffS r   )get_cached_tokenizer)r   r   s    r   
__reduce__z8get_cached_tokenizer.<locals>.CachedTokenizer.__reduce__:   s    ')55r   N)__name__
__module____qualname__propertylistintr   strr   r   dictr!   r$   r'   )r   r   r   r   r#   r    s   r   CachedTokenizerr   '   s       		-T#Y 	- 	- 	- 	- 	- 
	- 
	0S	 	0 	0 	0 	0 	0 
	0 
	 # 	  	  	  	  	  
	 	#tCH~ 	# 	# 	# 	# 	# 	#	!S 	! 	! 	! 	! 	! 	!	6 	6 	6 	6 	6 	6 	6r   r0   Cached)copyr   r   r!   lenmaxvalueshasattr
contextlibsuppressNotImplementedErrorr   	__class__r(   )r   cached_tokenizerr0   r   r   r   r#   r    s   `  @@@@@r   r&   r&      sx    y++ ) 9#,#? ))++O	NNM--//00L
 y,'' C !455 	C 	C|Y-ABBL	C 	C 	C 	C 	C 	C 	C 	C 	C 	C 	C 	C 	C 	C 	C6 6 6 6 6 6 6 6 6 6 6 6)- 6 6 6,  G	(;(DFFO!0s   B88B<?B<c                   V    e Zd Zedddddeez  dededz  dedz  def
d	            ZdS )
CachedHfTokenizerFN)trust_remote_coderevisiondownload_dirpath_or_repo_idr>   r?   r@   r   c                   	 t          j        |g|R |||d|}nJ# t          $ r=}|s4dt          |          v sdt          |          v rd}	t	          |	          ||d }~ww xY wt          ||          }
t          |
t                    rN|
                    dd          r8d |j	        
                                D             }|                    |           t          |          S )N)r>   r?   	cache_dirz,does not exist or is not currently imported.z*requires you to execute the tokenizer filezFailed to load the tokenizer. If the tokenizer is a custom tokenizer not yet available in the HuggingFace transformers library, consider setting `trust_remote_code=True` in LLM or using the `--trust-remote-code` flag in the CLI.do_lower_caseFc                 >    i | ]\  }}||                                 S r   )lower).0kvs      r   
<dictcomp>z5CachedHfTokenizer.from_pretrained.<locals>.<dictcomp>r   s3     " " "!%A17799" " "r   )r   from_pretrained
ValueErrorr.   RuntimeErrorr   
isinstancer/   getspecial_tokens_mapitemsadd_special_tokensr&   )clsrA   r>   r?   r@   argskwargsr   eerr_msgencoder_configrP   s               r   rK   z!CachedHfTokenizer.from_pretrainedD   sS   	%5  #4!&   II  	 	 	 % >#a&&HH?3q66IIA  #7++2#	* CX
 
 nd++ 	=0B0BU1
 1
 	=" ")2)E)K)K)M)M" " " (();<<<#I...s    
A#8AA#)	r(   r)   r*   classmethodr.   r   boolr   rK   r   r   r   r=   r=   C   s        
 #(##'2/ 2/ 2/t2/  	2/
 *2/ Dj2/ 
2/ 2/ 2/ [2/ 2/ 2/r   r=   )r7   r2   pathlibr   typingr   transformersr   r   r   vllm.transformers_utils.configr   protocolr
   r   __annotations__r&   r=   r   r   r   <module>ra      s                      T T T T T T T T T T T T T T T T # # # # # #,/FFY F F F/K /K / / / /d4/ 4/ 4/ 4/ 4/ 4/ 4/ 4/ 4/ 4/r   