
     `i                     n*   d Z ddlZddlZddlZddlZddlmZ ddlmZm	Z	m
Z
 ddlmZ ddlmZ ddlmZmZ dd	lmZ dd
lmZ ddlmZ ddlmZmZmZmZmZmZ ddlm Z  ddl!m"Z" ddl#m$Z$m%Z%m&Z&m'Z'm(Z(  e            rddl)m*Z* ndZ* ej+        e,          Z- ee.e/e	e.         e	e.         f         f         g dd e            rdndffd e            rdnd e            rdndffdd e            rdndffdd e            rdndffdd e            rdndffd d e            rd!ndffd"d e            rdndffd#d$ e            rd%nd e            rd&ndffd'd(d e            rdndffd) e            rd*nddffd+d,d- e            rd.nd e            rd/ndffd0d1 e            rd2ndffd3d4d e            rd5ndffd6d7d8d e            rdndffd9d: e            rd;ndffd<d e            rd=ndffd>d e            rd5ndffd?d@ e            rdAndffdBd e            rdndffdCdD e            rdEnd e            rdFndffdGdH e            rdnd e            rdndffdId e            rdndffdJd@ e            rdAndffdKd e            rdndffdLd e            rdndffdMdN e            rdOnd e            rdPndffdQdR e            rdSndffdTd e            rd!ndffdUd e            rd!ndffdVd e            rdndffdWdX e            rdYndffdZd[ e            rd\ndffd] e            rd^nd e            rd_ndffd`dad e            rd5ndffdbdcddd@ e            rdAndffded: e            rd;ndffdfdg e            rdhndffdi e            rdjnd e            rdkndffdl e            rdnd e            rdndffdm e            rdnd e            rdndffdn e            rdnd e            rdndffdo e            rdnd e            rdndffdpdq e            rdnd e            rdndffdrds e            rdtndffdudv e            rdwndffdxdy e            rdzndffd{d: e            rd;ndffd|d e            rdndffd}d e            rdndffd~d e            rdndffd e            rdnddffdd e            rd:nd e            rd;ndffdd e            rd5ndffdd e            rdndffd e            rdnddffddd e            rd;ndffdd e            rdndffddd e            rdndffd e            rdnd e            rdndffd e            rdnd e            rdndffd e            rdnd e            rdndffd e            rdnd e            rdndffd e            rdnd e            rdndffd e            rdnd e            rdndffdd e            rdndffdd e            rd5ndffdd e            rd5ndffdd e            rd5ndffdd e            rd5ndffdd e            rd5ndffd e            rdnddffdd: e            rd;ndffdd: e            rd;ndffdd: e            rd;ndffdd e            rdndffddd e            rd5ndffdd: e            rd;ndffddddddd e            rdndffdd e            rdndffdd e            rd5ndffdd e            rdndffddd@ e            rdAndffdd e            rdndffdd e            rdndffdd e            rdndffdd: e            rd;ndffdd: e            rd;ndffddX e            rdYndffd e            rdnd e            rdndffdd e            rdndffd e            rdnd e            rdndffdd e            rdnd e            rdndffdd e            rd5ndffdd e            rdndffdd e            rdndffdd e            rdndffdd e            rdndffdd e            rdndffdd e            rdndffd e            rdnd e            rdndffd e            rdnd e            rdndffd e            rdnd e            rdndffdd e            rdndffdd e            rdndffdd e            rdndffdd e            rdndffdd e            rdndffd e            rdnd e            rdndffddd e            rdndffd e            rdnddffdd e            rdndffdd e            rdndffd e            rdnddffd e            rdnd e            rdndffd e            rdnd e            rdndffdd@ e            rdAndffdd e            rdndffdd e            rdndffdd e            rd:nd e            rd;ndffd e            rdn e            rdnd e            r e            sdndffd e            rdn e            rdnd e            r e            sdndffd e            rdn e            rdnd e            r e            sdndffd e            rdn e            rdnd e            r e            sdndffdd e            rdndffd e            rdnddffdd e            rdndffdd e            rd ndffdd e            rd5ndffdd e            rd5ndffdd e            rd5ndffdd e            rdndffdd e            rdndffdd@ e            rdAndffd	 e            rd
nd e            rdndffdd e            rdndffdd e            rdndffdd e            rdndffddd e            rd5ndffdd e            rdndffd e            rdnd e            rdndffd e            rdnd e            rdndffd e            rdnd e            rdndffdd e            rdndffdd e            rdndffdd e            rd;ndffdd e            rdndffdd e            rdndffdd e            rdndffdd  e            rd!ndffd"d: e            rd;ndffd#d e            rdndffd$d e            rdndffd%d e            rdndffd&d' e            rd1nd e            rd2ndffd( e            rd1nd e            rd2ndffd)d* e            rdnd e            rdndffd+dR e            rdSndffd,d e            rdndffd-d e            rdndffd.d/d e            rdndffd0d e            rdn e            rd5ndffd1 e            rd2nddffd3d4d e            rdndffd5dX e            rdYndffd6dX e            rdYndffd7dX e            rdYndffd8dX e            rdYndffd9dX e            rdYndffd:dX e            rdYndffd;dX e            rdYndffd<dX e            rdYndffd=dX e            rdYndffd>dX e            rdYndffd?dX e            rdYndffd@dX e            rdYndffdAdBdC e            rdDndffdE e            rdnd e            rdndffdF e            rdGnd e            rdHndffdI e            rdJnd e            rdKndffdLdM e            rdNndffdOd@ e            rdAndffdPd@ e            rdAndffdQdRdS e            rdTndffdUd e            rdndffdV e            rdWnd e            rdXndffdY e            rdWnd e            rdXndffdZ e            rdnd e            rdndffd[ e            rd\nddffd] e            rdnd e            rdndffd^d e            rd5ndffd_ e            rd`nddffdadb e            rdcnddffdddedf e            rdgndffdhd e            rdndffdid: e            rd;ndffdj e            rdnd e            rdndffdk e            rdnd e            rdndffdl e            rdnd e            rdndffdmdndodpd e            rdndffdq e            rdrnd e            rdsndffdt e            rdnd e            rdndffdud e            rdndffdvd e            rdndffdwd e            rdndffdxd e            rdndffdydz e            rdnd e            r e            sd5ndffd{d|d}d~dd e            rdndffdd e            rdndffd e            rdnd e            rdndffdd e            rdnddffd e            rdnd e            rdndffd e            rdnd e            rdndffd e            rdnd e            rdndffdd e            rdndffd e            rdnd e            rdndffd e            rdnd e            rdndffd e            rdnd e            rdndffd e            rdnd e            rdndff          Z0 e"e$e0          Z1d  e$j2                    D             Z3de.de
e4e         df         fdZ5	 	 	 	 	 	 	 	 dde
e.ej6        e.         f         de	e
e.ej6        e.         f                  de7de	e7         de	e8e.e.f                  de	e
e7e.f                  de	e.         de7de.de8e.ef         fdZ9 G d d          Z:ddgZ;dS (  zAuto Tokenizer class.    N)OrderedDict)AnyOptionalUnion)is_mistral_common_available   )PretrainedConfig)get_class_from_dynamic_moduleresolve_trust_remote_code)load_gguf_checkpoint)PreTrainedTokenizer)TOKENIZER_CONFIG_FILE)cached_fileextract_commit_hashis_g2p_en_availableis_sentencepiece_availableis_tokenizers_availablelogging   )EncoderDecoderConfig   )_LazyAutoMapping)CONFIG_MAPPING_NAMES
AutoConfigconfig_class_to_model_typemodel_type_to_module_name!replace_list_option_in_docstrings)PreTrainedTokenizerFastaimv2CLIPTokenizerCLIPTokenizerFastalbertAlbertTokenizerAlbertTokenizerFastalignBertTokenizerBertTokenizerFastarceeLlamaTokenizerLlamaTokenizerFastaria
aya_visionCohereTokenizerFastbark)bart)BartTokenizerBartTokenizerFastbarthezBarthezTokenizerBarthezTokenizerFast)bartpho)BartphoTokenizerNbertzbert-generationBertGenerationTokenizer)zbert-japanese)BertJapaneseTokenizerN)bertweet)BertweetTokenizerNbig_birdBigBirdTokenizerBigBirdTokenizerFastbigbird_pegasusPegasusTokenizerPegasusTokenizerFast)biogpt)BioGptTokenizerNbitnetr   )
blenderbot)BlenderbotTokenizerBlenderbotTokenizerFast)zblenderbot-small)BlenderbotSmallTokenizerNblipzblip-2GPT2TokenizerGPT2TokenizerFastbloomBloomTokenizerFastbltbridgetowerRobertaTokenizerRobertaTokenizerFastbros)byt5)ByT5TokenizerN	camembertCamembertTokenizerCamembertTokenizerFast)canine)CanineTokenizerN	chameleonchinese_clipclapclipclipseg)clvp)ClvpTokenizerN
code_llamaCodeLlamaTokenizerCodeLlamaTokenizerFastcodegenCodeGenTokenizerCodeGenTokenizerFastcoherecohere2colpalicolqwen2Qwen2TokenizerQwen2TokenizerFastconvbertConvBertTokenizerConvBertTokenizerFastcpmCpmTokenizerCpmTokenizerFast)cpmant)CpmAntTokenizerNcsm)ctrl)CTRLTokenizerN)zdata2vec-audioWav2Vec2CTCTokenizerNzdata2vec-textdbrxdebertaDebertaTokenizerDebertaTokenizerFastz
deberta-v2DebertaV2TokenizerDebertaV2TokenizerFastdeepseek_v2deepseek_v3deepseek_vldeepseek_vl_hybrid)dia)DiaTokenizerN	diffllama
distilbertDistilBertTokenizerDistilBertTokenizerFastdprDPRQuestionEncoderTokenizerDPRQuestionEncoderTokenizerFastelectraElectraTokenizerElectraTokenizerFastemu3ernieernie4_5ernie4_5_moeernie_mErnieMTokenizer)esm)EsmTokenizerNexaone4falconfalcon_mambaGPTNeoXTokenizerFastfastspeech2_conformerFastSpeech2ConformerTokenizer)flaubert)FlaubertTokenizerN	flex_olmofnetFNetTokenizerFNetTokenizerFast)fsmt)FSMTTokenizerNfunnelFunnelTokenizerFunnelTokenizerFastgemmaGemmaTokenizerGemmaTokenizerFastgemma2gemma3gemma3_textgemma3ngemma3n_textgitglmglm4glm4_moeglm4v	glm4v_moezgpt-sw3GPTSw3Tokenizergpt2gpt_bigcodegpt_neogpt_neox)gpt_neox_japanese)GPTNeoXJapaneseTokenizerNgpt_ossgptj)zgptsan-japanese)GPTSanJapaneseTokenizerN)graniterJ   N)
granitemoer   )granitemoehybridr   )granitemoesharedr   zgrounding-dinogroupvitheliumherbertHerbertTokenizerHerbertTokenizerFast)hubertrx   ibertideficsidefics2idefics3instructblipinstructblipvideointernvljambajanusjetmoe)jukebox)JukeboxTokenizerNzkosmos-2XLMRobertaTokenizerXLMRobertaTokenizerFastz
kosmos-2.5layoutlmLayoutLMTokenizerLayoutLMTokenizerFast
layoutlmv2LayoutLMv2TokenizerLayoutLMv2TokenizerFast
layoutlmv3LayoutLMv3TokenizerLayoutLMv3TokenizerFast	layoutxlmLayoutXLMTokenizerLayoutXLMTokenizerFastledLEDTokenizerLEDTokenizerFastliltllamallama4llama4_textllava
llava_nextllava_next_videollava_onevision
longformerLongformerTokenizerLongformerTokenizerFastlongt5T5TokenizerT5TokenizerFast)luke)LukeTokenizerNlxmertLxmertTokenizerLxmertTokenizerFastm2m_100M2M100Tokenizermambamamba2marianMarianTokenizermbartMBartTokenizerMBartTokenizerFastmbart50MBart50TokenizerMBart50TokenizerFastmegazmegatron-bert
metaclip_2)zmgp-str)MgpstrTokenizerNminimax	ministralMistralCommonTokenizermistralmistral3mixtralmllamamlukeMLukeTokenizerzmm-grounding-dino
mobilebertMobileBertTokenizerMobileBertTokenizerFast
modernbert	moonshinemoshimpnetMPNetTokenizerMPNetTokenizerFastmptmramt5MT5TokenizerMT5TokenizerFastmusicgenmusicgen_melodymvpMvpTokenizerMvpTokenizerFast)myt5)MyT5TokenizerNnemotronnezhanllbNllbTokenizerNllbTokenizerFastznllb-moenystromformerolmoolmo2olmo3olmoezomdet-turbo	oneformerz
openai-gptOpenAIGPTTokenizerOpenAIGPTTokenizerFastoptowlv2owlvit	paligemma)parakeet)ParakeetCTCTokenizerNpegasus	pegasus_x)	perceiver)PerceiverTokenizerN	persimmonphiphi3phimoe)phobert)PhobertTokenizerN
pix2structpixtralplbartPLBartTokenizer)
prophetnet)ProphetNetTokenizerNqdqbertqwen2qwen2_5_omni
qwen2_5_vlqwen2_audio	qwen2_moeqwen2_vlqwen3	qwen3_moe
qwen3_nextqwen3_omni_moeqwen3_vlqwen3_vl_moe)rag)RagTokenizerNrealmRealmTokenizerRealmTokenizerFastrecurrent_gemmareformerReformerTokenizerReformerTokenizerFastrembertRemBertTokenizerRemBertTokenizerFast	retribertRetriBertTokenizerRetriBertTokenizerFastrobertazroberta-prelayernorm)roc_bert)RoCBertTokenizerNroformerRoFormerTokenizerRoFormerTokenizerFastrwkvseamless_m4tSeamlessM4TTokenizerSeamlessM4TTokenizerFastseamless_m4t_v2shieldgemma2siglipSiglipTokenizersiglip2smollm3speech_to_textSpeech2TextTokenizer)speech_to_text_2)Speech2Text2TokenizerNspeecht5SpeechT5Tokenizer)splinter)SplinterTokenizerSplinterTokenizerFastsqueezebertSqueezeBertTokenizerSqueezeBertTokenizerFaststablelm
starcoder2switch_transformerst5t5gemma)tapas)TapasTokenizerN)tapex)TapexTokenizerN)z
transfo-xl)TransfoXLTokenizerNtvpudopUdopTokenizerUdopTokenizerFastumt5video_llavaviltvipllavavisual_bert)vits)VitsTokenizerNvoxtral)wav2vec2rx   )zwav2vec2-bertrx   )zwav2vec2-conformerrx   )wav2vec2_phoneme)Wav2Vec2PhonemeCTCTokenizerNwhisperWhisperTokenizerWhisperTokenizerFastxclipxglmXGLMTokenizerXGLMTokenizerFast)xlm)XLMTokenizerNzxlm-prophetnetXLMProphetNetTokenizerzxlm-robertazxlm-roberta-xlxlnetXLNetTokenizerXLNetTokenizerFastxlstmxmodyosozambazamba2c                     i | ]\  }}||	S  r  ).0kvs      ~/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/models/auto/tokenization_auto.py
<dictcomp>r  $  s    @@@41a!Q@@@    
class_namereturnc                 $   | dk    rt           S t                                          D ]s\  }}| |v rjt          |          }|dv r| dk    rt	          j        dd          }nt	          j        d| d          }	 t          ||           c S # t          $ r Y ow xY wtt          j	        
                                D ]"}|D ]}t          |dd           | k    r|c c S #t	          j        d          }t          ||           rt          ||           S d S )	Nr   )r  r  r  r  z.tokenization_mistral_commontransformers.ztransformers.models__name__)r   TOKENIZER_MAPPING_NAMESitemsr   	importlibimport_modulegetattrAttributeErrorTOKENIZER_MAPPING_extra_contentvalueshasattr)r  module_name
tokenizersmodule	tokenizermain_modules         r  tokenizer_class_from_namer  '  sq   ...&&#:#@#@#B#B 
 
Z##3K@@KAAAjTlFlFl"01OQ_``"01B[1B1BDYZZvz22222!    $ (6==?? ! !
# 	! 	!Iy*d33zAA       B	! ).99K{J'' 0{J///4s   8B


BBF pretrained_model_name_or_path	cache_dirforce_downloadresume_downloadproxiestokenrevisionlocal_files_only	subfolderc	                    |	                     dd          }
|
-t          j        dt                     |t	          d          |
}|	                    d          }t          | t          ||||||||ddd|          }|t          	                    d           i S t          ||          }t          |d	
          5 }t          j        |          }ddd           n# 1 swxY w Y   ||d<   |S )a  
    Loads the tokenizer configuration from a pretrained model tokenizer configuration.

    Args:
        pretrained_model_name_or_path (`str` or `os.PathLike`):
            This can be either:

            - a string, the *model id* of a pretrained model configuration hosted inside a model repo on
              huggingface.co.
            - a path to a *directory* containing a configuration file saved using the
              [`~PreTrainedTokenizer.save_pretrained`] method, e.g., `./my_model_directory/`.

        cache_dir (`str` or `os.PathLike`, *optional*):
            Path to a directory in which a downloaded pretrained model configuration should be cached if the standard
            cache should not be used.
        force_download (`bool`, *optional*, defaults to `False`):
            Whether or not to force to (re-)download the configuration files and override the cached versions if they
            exist.
        resume_download:
            Deprecated and ignored. All downloads are now resumed by default when possible.
            Will be removed in v5 of Transformers.
        proxies (`dict[str, str]`, *optional*):
            A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
            'http://hostname': 'foo.bar:4012'}.` The proxies are used on each request.
        token (`str` or *bool*, *optional*):
            The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated
            when running `hf auth login` (stored in `~/.huggingface`).
        revision (`str`, *optional*, defaults to `"main"`):
            The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
            git-based system for storing models and other artifacts on huggingface.co, so `revision` can be any
            identifier allowed by git.
        local_files_only (`bool`, *optional*, defaults to `False`):
            If `True`, will only try to load the tokenizer configuration from local files.
        subfolder (`str`, *optional*, defaults to `""`):
            In case the tokenizer config is located inside a subfolder of the model repo on huggingface.co, you can
            specify the folder name here.

    <Tip>

    Passing `token=True` is required when you want to use a private model.

    </Tip>

    Returns:
        `dict`: The configuration of the tokenizer.

    Examples:

    ```python
    # Download configuration from huggingface.co and cache.
    tokenizer_config = get_tokenizer_config("google-bert/bert-base-uncased")
    # This model does not have a tokenizer config so the result will be an empty dict.
    tokenizer_config = get_tokenizer_config("FacebookAI/xlm-roberta-base")

    # Save a pretrained tokenizer locally and you can reload its config
    from transformers import AutoTokenizer

    tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-cased")
    tokenizer.save_pretrained("tokenizer-test")
    tokenizer_config = get_tokenizer_config("tokenizer-test")
    ```use_auth_tokenNrThe `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.V`token` and `use_auth_token` are both specified. Please set only the argument `token`._commit_hashF)r  r  r  r  r  r  r  r   _raise_exceptions_for_gated_repo%_raise_exceptions_for_missing_entries'_raise_exceptions_for_connection_errorsr  z\Could not locate the tokenizer configuration file, will try to use the model config instead.zutf-8)encoding)popwarningswarnFutureWarning
ValueErrorgetr   r   loggerinfor   openjsonload)r  r  r  r  r  r  r  r  r  kwargsr  commit_hashresolved_config_filereaderresults                  r  get_tokenizer_configr  E  s\   R ZZ 0$77N! A	
 	
 	
 uvvv**^,,K&%%'))..305     #rsss	%&:KHHK	"W	5	5	5 #6""# # # # # # # # # # # # # # #(F>Ms   <CC!$C!c                   h    e Zd ZdZd Ze ee          d                         Ze	dd            Z
dS )AutoTokenizera  
    This is a generic tokenizer class that will be instantiated as one of the tokenizer classes of the library when
    created with the [`AutoTokenizer.from_pretrained`] class method.

    This class cannot be instantiated directly using `__init__()` (throws an error).
    c                      t          d          )Nz}AutoTokenizer is designed to be instantiated using the `AutoTokenizer.from_pretrained(pretrained_model_name_or_path)` method.)OSError)selfs    r  __init__zAutoTokenizer.__init__  s    _
 
 	
r  c           	      6
   |                     dd          }|Ct          j        dt                     |                    d          t          d          ||d<   |                     dd          }d|d<   |                     d	d          }|                     d
d          }|                     dd          }|                    d          }	|d}
t                              |d          }|8t          d| dd                    d t          D                        d          |\  }}|r,|t          |          }
nt          
                    d           |
t          |          }
|
t          d| d           |
j        |g|R i |S t          |fi |}d|v r|d         |d<   |                    d          }d}d|v rGt          |d         t          t          f          r	|d         }n|d                             dd          }|t          |t                     sM|	r7t#          ||	fi |}t%          |d          d         }t'          j        d(i |}nt'          j        |fd|i|}|j        }t-          |d          rd|j        v r|j        d         }|du}t1          |          t2          v p(|duo$t          |          dupt          |dz             du}|rP|r|d         	|d         }n|d         }d|v r|                    d          d         }nd}t7          |||||          }|rM|rKt9          ||fi |}
|                     dd          }|
                                  |
j        |g|R d|i|S |fd}
|r)|                    d          s| d}t          |          }
|
|}t          |          }
|
t          d| d            |
j        |g|R i |S t          |t>                    rdt1          |j                   t1          |j!                  ur5t          
                    d!|j!        j"         d"|j         j"         d#           |j!        }tG          t1          |          j$                  }|Vt2          t1          |                   \  }}|r|s| |j        |g|R i |S | |j        |g|R i |S t          d$          t          d%|j"         d&d                    d' t2          D                        d          ))a]  
        Instantiate one of the tokenizer classes of the library from a pretrained model vocabulary.

        The tokenizer class to instantiate is selected based on the `model_type` property of the config object (either
        passed as an argument or loaded from `pretrained_model_name_or_path` if possible), or when it's missing, by
        falling back to using pattern matching on `pretrained_model_name_or_path`:

        List options

        Params:
            pretrained_model_name_or_path (`str` or `os.PathLike`):
                Can be either:

                    - A string, the *model id* of a predefined tokenizer hosted inside a model repo on huggingface.co.
                    - A path to a *directory* containing vocabulary files required by the tokenizer, for instance saved
                      using the [`~PreTrainedTokenizer.save_pretrained`] method, e.g., `./my_model_directory/`.
                    - A path or url to a single saved vocabulary file if and only if the tokenizer only requires a
                      single vocabulary file (like Bert or XLNet), e.g.: `./my_model_directory/vocab.txt`. (Not
                      applicable to all derived classes)
            inputs (additional positional arguments, *optional*):
                Will be passed along to the Tokenizer `__init__()` method.
            config ([`PretrainedConfig`], *optional*)
                The configuration object used to determine the tokenizer class to instantiate.
            cache_dir (`str` or `os.PathLike`, *optional*):
                Path to a directory in which a downloaded pretrained model configuration should be cached if the
                standard cache should not be used.
            force_download (`bool`, *optional*, defaults to `False`):
                Whether or not to force the (re-)download the model weights and configuration files and override the
                cached versions if they exist.
            resume_download:
                Deprecated and ignored. All downloads are now resumed by default when possible.
                Will be removed in v5 of Transformers.
            proxies (`dict[str, str]`, *optional*):
                A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
                'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
            revision (`str`, *optional*, defaults to `"main"`):
                The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
                git-based system for storing models and other artifacts on huggingface.co, so `revision` can be any
                identifier allowed by git.
            subfolder (`str`, *optional*):
                In case the relevant files are located inside a subfolder of the model repo on huggingface.co (e.g. for
                facebook/rag-token-base), specify it here.
            use_fast (`bool`, *optional*, defaults to `True`):
                Use a [fast Rust-based tokenizer](https://huggingface.co/docs/tokenizers/index) if it is supported for
                a given model. If a fast tokenizer is not available for a given model, a normal Python-based tokenizer
                is returned instead.
            tokenizer_type (`str`, *optional*):
                Tokenizer type to be loaded.
            trust_remote_code (`bool`, *optional*, defaults to `False`):
                Whether or not to allow for custom models defined on the Hub in their own modeling files. This option
                should only be set to `True` for repositories you trust and in which you have read the code, as it will
                execute code present on the Hub on your local machine.
            kwargs (additional keyword arguments, *optional*):
                Will be passed to the Tokenizer `__init__()` method. Can be used to set special tokens like
                `bos_token`, `eos_token`, `unk_token`, `sep_token`, `pad_token`, `cls_token`, `mask_token`,
                `additional_special_tokens`. See parameters in the `__init__()` for more details.

        Examples:

        ```python
        >>> from transformers import AutoTokenizer

        >>> # Download vocabulary from huggingface.co and cache.
        >>> tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")

        >>> # Download vocabulary from huggingface.co (user-uploaded) and cache.
        >>> tokenizer = AutoTokenizer.from_pretrained("dbmdz/bert-base-german-cased")

        >>> # If vocabulary files are in a directory (e.g. tokenizer was saved using *save_pretrained('./test/saved_model/')*)
        >>> # tokenizer = AutoTokenizer.from_pretrained("./test/bert_saved_model/")

        >>> # Download vocabulary from huggingface.co and define model-specific arguments
        >>> tokenizer = AutoTokenizer.from_pretrained("FacebookAI/roberta-base", add_prefix_space=True)
        ```r  Nr  r  r  configT
_from_autouse_fasttokenizer_typetrust_remote_code	gguf_filezPassed `tokenizer_type` z3 does not exist. `tokenizer_type` should be one of z, c              3      K   | ]}|V  d S Nr  r  cs     r  	<genexpr>z0AutoTokenizer.from_pretrained.<locals>.<genexpr>+  s"       D Dq D D D D D Dr  r  zt`use_fast` is set to `True` but the tokenizer class does not have a fast version.  Falling back to the slow version.zTokenizer class z is not currently imported.r  tokenizer_classauto_mapr  F)return_tensorsFastr   r   z--code_revisionz- does not exist or is not currently imported.z The encoder model config class: z3 is different from the decoder model config class: z. It is not recommended to use the `AutoTokenizer.from_pretrained()` method in this case. Please use the encoder and decoder specific tokenizer classes.zzThis tokenizer cannot be instantiated. Please make sure you have `sentencepiece` installed in order to use this tokenizer.z!Unrecognized configuration class z8 to build an AutoTokenizer.
Model type should be one of c              3   $   K   | ]}|j         V  d S r  )r  r   s     r  r  z0AutoTokenizer.from_pretrained.<locals>.<genexpr>  s$      4[4[AQZ4[4[4[4[4[4[r  r  )%r  r  r  r  r  r  r  joinr  r  warningfrom_pretrainedr  
isinstancetuplelistr	   r   r   r   	for_modelr  r  r  typer  splitr   r
   register_for_auto_classendswithr   decoderencoder	__class__r   r  )clsr  inputsr  r  r  r  r  r  r  r  tokenizer_class_tupletokenizer_class_nametokenizer_fast_class_nametokenizer_configconfig_tokenizer_classtokenizer_auto_map	gguf_pathconfig_dicthas_remote_codehas_local_code	class_refupstream_repo_tokenizer_class_candidate
model_typetokenizer_class_pytokenizer_class_fasts                               r  r  zAutoTokenizer.from_pretrained  s   Z  $4d;;%M E   zz'"". l   -F7OHd++#|::j$//$4d;;"JJ':DAAJJ{++	 %"O$;$?$?PT$U$U!$, H~ H Hyy D D,C D D DDDH H H  
 ?T; "; ,8&?@Y&Z&ZOONN=   &";<P"Q"Q& !e4H!e!e!efff2?23PdSYddd]cddd 00MXXQWXX---%5n%EF>"!1!5!56G!H!H!)))*:6FF ]%5j%A""%5j%A%E%EoW[%\%\" ")f&677   +,I9 _ _X^ _ _I"6yQV"W"W"WX`"aK'1@@K@@FF'75 IZ^d F &,%;"vz** F&//Q/Q%+__%E",D8f):: 
"$. )*@AAM Z,-Cf-LMMUYY	 	  	 2.q1=.q1		.q1	y   ) 5 5a 8 $ 9!#@.Racp! !  	e0 	e;IGdoohnooO

?D11A335552?2-06  J[_e   $/"O W 6 ? ? G G W/E,K,K,K)";<U"V"V&,B)";<U"V"V& o'@ooo   3?23PdSYddd]cddd f233 	$FN##4+?+???2v~7O 2 2%+^%=2 2 2   ^F/V0EFF
!7Hf7V4 4# 	 	5G5O;+;<Ym\bmmmflmmm%1=-=>[o^dooohnooo$:  
 _0@ _ _+/994[4[IZ4[4[4[+[+[_ _ _
 
 	
r  NFc                    ||t          d          |$t          |t                    rt          d          |$t          |t                    rt          d          |=|;t          |t                    r&|j        |k    rt          d|j         d| d          | t
          j        v rt
          |          \  }}||}||}t
                              | ||f|           dS )	a  
        Register a new tokenizer in this mapping.


        Args:
            config_class ([`PretrainedConfig`]):
                The configuration corresponding to the model to register.
            slow_tokenizer_class ([`PretrainedTokenizer`], *optional*):
                The slow tokenizer to register.
            fast_tokenizer_class ([`PretrainedTokenizerFast`], *optional*):
                The fast tokenizer to register.
        NzKYou need to pass either a `slow_tokenizer_class` or a `fast_tokenizer_classz:You passed a fast tokenizer in the `slow_tokenizer_class`.z:You passed a slow tokenizer in the `fast_tokenizer_class`.zThe fast tokenizer class you are passing has a `slow_tokenizer_class` attribute that is not consistent with the slow tokenizer class you passed (fast tokenizer has z and you passed z!. Fix one of those so they match!)exist_ok)r  
issubclassr   r   slow_tokenizer_classr  r  register)config_classr-  fast_tokenizer_classr+  existing_slowexisting_fasts         r  r.  zAutoTokenizer.register  s6     ',@,Hjkkk+
;OQh0i0i+YZZZ+
;OQd0e0e+YZZZ !,$0/1HII 1$9=QQQ!'<! !Nb! ! !   ,;;;+<\+J(M=#+'4$#+'4$""<2FH\1]hp"qqqqqr  )NNF)r  
__module____qualname____doc__r  classmethodr   r  r  staticmethodr.  r  r  r  r  r    s         
 
 
 &&'>??`
 `
 @? [`
D )r )r )r \)r )r )rr  r  r  )NFNNNNFr  )<r5  r  r  osr  collectionsr   typingr   r   r   transformers.utils.import_utilsr   configuration_utilsr	   dynamic_module_utilsr
   r   modeling_gguf_pytorch_utilsr   tokenization_utilsr   tokenization_utils_baser   utilsr   r   r   r   r   r   encoder_decoderr   auto_factoryr   configuration_autor   r   r   r   r   tokenization_utils_fastr   
get_loggerr  r  strr  r  r  r  CONFIG_TO_TYPEr  r  PathLikebooldictr  r  __all__r  r  r  <module>rM     s43          				  # # # # # # ' ' ' ' ' ' ' ' ' ' G G G G G G 3 3 3 3 3 3 \ \ \ \ \ \ \ \ ? ? ? ? ? ? 5 5 5 5 5 5 < < < < < <                3 2 2 2 2 2 * * * * * *               #BBBBBBB" 
	H	%	% P+c5#1M+N&NOc'>'>'@'@J##d	
c %?%?%A%AK!!t)@)@)B)BL%%	
c 
?;R;R;T;T$^$7$7Z^_`c  
#=T=T=V=V%`%9%9\`ab!c" 
"<S<S<U<U$_$8$8[_`a#c$ 
7N7N7P7PZ33VZ[\%c& 
/:Q:Q:S:S#]#6#6Y]^_'c( 	9)c, &@&@&B&BL""*A*A*C*CM&&	
+c8 	09c: 
/:Q:Q:S:S#]#6#6Y]^_;c< 
:T:T:V:V`66\`bfgh=c> 	;?c@ 	2AcD &@&@&B&BL""*A*A*C*CM&&	
CcP 
/KbKbKdKd1n1G1GjnopQcR 	.ScT 
D7N7N7P7PZ33VZ[\UcV 	KWcX 	AYcZ 
/:Q:Q:S:S#]#6#6Y]^_[c\ 
O<S<S<U<U%_%8%8[_`a]c^ 
41H1H1J1JT--PTUV_c` 
4K4K4M4MW00SWXYacb 
+G^G^G`G`-j-C-Cfjklccd 
/:Q:Q:S:S#]#6#6Y]^_ecf 	*gcj (B(B(D(DN$$$,C,C,E,EO((4	
icv 	.wcz $>$>$@$@J  d(?(?(A(AK$$t	
ycF 
/BYBYB[B[+e+>+>aefgGcJ "*A*A*C*CM&&	
IcX '>'>'@'@J##d	
Wcf '>'>'@'@J##d	
ecr 	*scv (B(B(D(DN$$$,C,C,E,EO((4	
ucB 
'CZCZC\C\)f)?)?bfghCcD 
D3J3J3L3LV//RVWXEcF 
T4K4K4M4MW00SWXYGcH 
%?V?V?X?X'b';';^bcdIcJ 
&@W@W@Y@Y(c(<(<_cdeKcL 
)F]F]F_F_+i+B+BeijkMcP "<"<">">HD&=&=&?&?I""T	
Oc\ 	.]c^ 
4K4K4M4MW00SWXY_c` 	*acb 	;ccd 
-I`I`IbIb/l/E/Ehlmnecf 
/:Q:Q:S:S#]#6#6Y]^_gch 
'CZCZC\C\)f)?)?bfghicl (B(B(D(DN$$$,C,C,E,EO((4	
kcz $>$>$@$@J  d(?(?(A(AK$$t	
ycH $>$>$@$@J  d(?(?(A(AK$$t	
GcV $>$>$@$@J  d(?(?(A(AK$$t	
Ucd !$>$>$@$@J  d(?(?(A(AK$$t	
ccp 	(qct $>$>$@$@J  d(?(?(A(AK$$t	
sc@ 
-LcLcLeLe/o/H/HkopqAcD -5L5L5N5NX11TX	
CcP 
'CZCZC\C\)f)?)?bfghQcR 
/:Q:Q:S:S#]#6#6Y]^_ScT 
?;R;R;T;T$^$7$7Z^_`UcV 
d4K4K4M4MW00SWXYWcX 
$8O8O8Q8Q [ 4 4W[\]YcZ 
*D*D*F*FP&&DRVWX[c\ 	(]c` #:#:#<#<F$'>'>'@'@J##d	
_cl 
D7N7N7P7PZ33VZ[\mcn 
$:Q:Q:S:S ] 6 6Y]^_ocr $0C0C0E0EO,,4QUV	
qcx 	2ycz 
t4K4K4M4MW00SWXY{c| 
/:Q:Q:S:S#]#6#6Y]^_}c~ 	*c@ 
%@W@W@Y@Y'c'<'<_cdeAcD $>$>$@$@J  d(?(?(A(AK$$t	
CcR $>$>$@$@J  d(?(?(A(AK$$t	
Qc` $>$>$@$@J  d(?(?(A(AK$$t	
_cn $>$>$@$@J  d(?(?(A(AK$$t	
mc| $>$>$@$@J  d(?(?(A(AK$$t	
{cJ $>$>$@$@J  d(?(?(A(AK$$t	
IcV 
9P9P9R9R"\"5"5X\]^WcX 
4K4K4M4MW00SWXYYcZ 
$5L5L5N5NX11TXYZ[c\ 
d9P9P9R9R\55X\]^]c^ 
46M6M6O6OY22UYZ[_c` 
t:Q:Q:S:S]66Y]^_acb 
*D*D*F*FP&&DRVWXccd 
/:Q:Q:S:S#]#6#6Y]^_ecf 
AXAXAZAZ*d*=*=`defgch 
_=T=T=V=V&`&9&9\`abicj 
d6M6M6O6OY22UYZ[kcl 	Bmcn 
T8O8O8Q8Q[44W[\]ocp 
/:Q:Q:S:S#]#6#6Y]^_qcr 	?sct 	-ucv 	0wcx 	6ycz 	6{c| 
OD[D[D]D]-g-@-@cghi}c~ 
o>U>U>W>W'a':':]abcc@ 
D7N7N7P7PZ33VZ[\AcB 
'CZCZC\C\)f)?)?bfghCcD 	3EcF 
%AXAXAZAZ'd'='=`defGcH 
T3J3J3L3LV//RVWXIcJ 
&@W@W@Y@Y(c(<(<_cdeKcL 
&@W@W@Y@Y(c(<(<_cdeMcN 
/BYBYB[B[+e+>+>aefgOcP 
G^G^G`G`0j0C0CfjklQcR 
&@W@W@Y@Y(c(<(<_cdeScV $>$>$@$@J  d(?(?(A(AK$$t	
Ucb 
41H1H1J1JT--PTUVccf $>$>$@$@J  d(?(?(A(AK$$t	
ecr 	0scv )C)C)E)EO%%4-D-D-F-FP))D	
ucB	 
;R;R;T;T^77Z^_`C	cD	 
)F]F]F_F_+i+B+BeijkE	cF	 
-LcLcLeLe/o/H/HkopqG	cH	 
-LcLcLeLe/o/H/HkopqI	cJ	 
+I`I`IbIb-l-E-EhlmnK	cL	 
7N7N7P7P!Z!3!3VZ[\M	cN	 
'F]F]F_F_)i)B)BeijkO	cR	 $>$>$@$@J  d(?(?(A(AK$$t	
Q	c`	 $>$>$@$@J  d(?(?(A(AK$$t	
_	cn	 $>$>$@$@J  d(?(?(A(AK$$t	
m	cz	 
#=T=T=V=V%`%9%9\`ab{	c|	 
(BYBYB[B[*e*>*>aefg}	c~	 
.H_H_HaHa0k0D0Dgklm	c@
 
-G^G^G`G`/j/C/CfjklA
cB
 
-LcLcLeLe/o/H/HkopqC
cF
 !;!;!=!=G4%<%<%>%>H!!D	
E
cR
 	*S
cT
 
%@W@W@Y@Y'c'<'<_cdeU
cV
 
*D*D*F*FP&&DRVWXW
cX
 
43J3J3L3LV//RVWXY
cZ
 
D4K4K4M4MW00SWXY[
c\
 
)C)C)E)EO%%4QUVW]
c`
 $>$>$@$@J  d(?(?(A(AK$$t	
_
cn
 &@&@&B&BL""*A*A*C*CM&&	
m
cz
 
$@W@W@Y@Y&c&<&<_cde{
c|
 
?CZCZC\C\,f,?,?bfgh}
c@ %-D-D-F-FP))D	

cL 	/McP #=#=#?#?IT'>'>'@'@J##d	
Oc^  /.00R((*D*D*F*FP&&D(?(?(A(AqJeJeJgJgq$$mq		
]cp  /.00R((*D*D*F*FP&&D(?(?(A(AqJeJeJgJgq$$mq		
ocB  /.00R((*D*D*F*FP&&D(?(?(A(AqJeJeJgJgq$$mq		
AcT  /.00R((*D*D*F*FP&&D(?(?(A(AqJeJeJgJgq$$mq		
Scd 
$>U>U>W>W&a&:&:]abcecf 
'A'A'C'CM##tTUgch 
G^G^G`G`0j0C0Cfjklicj 
-LcLcLeLe/o/H/Hkopqkcl 
;R;R;T;T^77Z^_`mcn 
t:Q:Q:S:S]66Y]^_ocp 
46M6M6O6OY22UYZ[qcr 
#=T=T=V=V%`%9%9\`absct 
1H1H1J1JT--PTUVucv 
#?V?V?X?X%b%;%;^bcdwcz "<"<">">HD&=&=&?&?I""T	
ycF 
m:Q:Q:S:S%]%6%6Y]^_GcH 
]AXAXAZAZ,d,=,=`defIcJ 
7N7N7P7P!Z!3!3VZ[\KcL 	*McN 
d9P9P9R9R\55X\]^OcP 
?;R;R;T;T$^$7$7Z^_`QcT #=#=#?#?IT'>'>'@'@J##d	
Scb #=#=#?#?IT'>'>'@'@J##d	
acp %?%?%A%AK!!t)@)@)B)BL%%	
oc| 
$2I2I2K2KU..QUVW}c~ 
43J3J3L3LV//RVWXc@ 
40G0G0I0IS,,tTUAcB 
43J3J3L3LV//RVWXCcF 5L5L5N5NX11TXY	
EcL 
?V?V?X?X(b(;(;^bcdMcP !?V?V?X?X#b#;#;^bc	
OcV 
9P9P9R9R"\"5"5X\]^WcX 
?;R;R;T;T$^$7$7Z^_`YcZ 
O<S<S<U<U%_%8%8[_`a[c\ 
'AXAXAZAZ)d)=)=`def]c^ 	5_cb &@&@&B&BL""*A*A*C*CM&&	
acp &@&@&B&BL""*A*A*C*CM&&	
oc|	
}cL $>$>$@$@J  d(?(?(A(AK$$t	
KcX 
#?V?V?X?X%b%;%;^bcdYcZ 
"<S<S<U<U$_$8$8[_`a[c\ 
$>U>U>W>W&a&:&:]abc]c^ 	0_c` 
<S<S<U<U'_'8'8[_`aacd ..00X((3J3J3L3LV//RV		
cct 
)C)C)E)EO%%4QUVWucv 	6wcx 
_=T=T=V=V&`&9&9\`abyc|  (?(?(A(AK$$t	
{cH 
*D[D[D]D],g,@,@cghiIcJ 
(BYBYB[B[*e*>*>aefgKcL 
)CZCZC\C\+f+?+?bfghMcP  (?(?(A(AK$$t	
Oc\ 
&@W@W@Y@Y(c(<(<_cde]c`  (?(?(A(AK$$t	
_cn  (?(?(A(AK$$t	
mc|  (?(?(A(AK$$t	
{cH 
,F]F]F_F_.i.B.BeijkIcJ 
&@W@W@Y@Y(c(<(<_cdeKcL 
*D[D[D]D],g,@,@cghiMcN 	(OcP 
#=T=T=V=V%`%9%9\`abQcT $>$>$@$@J  d(?(?(A(AK$$t	
Scb 'A'A'C'CM##+B+B+D+DN''$	
acp &@&@&B&BL""*A*A*C*CM&&	
oc| 
+I`I`IbIb-l-E-Ehlmn}c~ 
'CZCZC\C\)f)?)?bfghcB #;R;R;T;T!^!7!7Z^_	
AcH 	1IcJ 
)F]F]F_F_+i+B+BeijkKcL 
$2I2I2K2KU..QUVWMcP *D*D*F*FP&&D.E.E.G.GQ**T	
Oc^ *D*D*F*FP&&D.E.E.G.GQ**T	
]cl $>$>$@$@J  d(?(?(A(AK$$t	
kcx 
)C)C)E)EO%%4QUVWyc| $>$>$@$@J  d(?(?(A(AK$$t	
{cH 
T8O8O8Q8Q[44W[\]IcJ 
6P6P6R6R\22X\^bcdKcL 	>McN 
-G-G-I-IS))tUYZ[OcP 	EQcT #CZCZC\C\%f%?%?bfg	
ScZ 
d6M6M6O6OY22UYZ[[c\ 
@W@W@Y@Y)c)<)<_cde]c` "!;!;!=!=G4%<%<%>%>H!!D	
_cn !;!;!=!=G4%<%<%>%>H!!D	
mc| $>$>$@$@J  d(?(?(A(AK$$t	
{cH 	,IcJ 	,KcL 	5McN 
9P9P9R9R"\"5"5X\]^OcR #=#=#?#?IT'>'>'@'@J##d	
Qc` !;!;!=!=G4%<%<%>%>H!!D	
_cl 
)CZCZC\C\+f+?+?bfghmcn 
/:Q:Q:S:S#]#6#6Y]^_ocp 
&@W@W@Y@Y(c(<(<_cdeqcr 
AXAXAZAZ*d*=*=`defsct 	*ucx ,G,G,I,IS((t-D-D-F-FvOjOjOlOlv))rv	
wcD 	5EcF 	:GcH 	?IcJ 	DKcL 
'CZCZC\C\)f)?)?bfghMcN 
?;R;R;T;T$^$7$7Z^_`OcR #=#=#?#?IT'>'>'@'@J##d	
Qc^ 	(_c` 
8R8R8T8T^44Z^`defacd )C)C)E)EO%%4-D-D-F-FP))D	
ccr )C)C)E)EO%%4-D-D-F-FP))D	
qc@ $>$>$@$@J  d(?(?(A(AK$$t	
cL 
43J3J3L3LV//RVWXMcP )C)C)E)EO%%4-D-D-F-FP))D	
Oc^ %?%?%A%AK!!t)@)@)B)BL%%	
]cl $>$>$@$@J  d(?(?(A(AK$$t	
kcz $>$>$@$@J  d(?(?(A(AK$$t	
yce e N %$%9;RSS @@#=#7#=#?#?@@@# %S	42H    @ 9= &*(,(,""l l#(bk#.>)>#?lc2;s#3345l l d^	l
 d38n%l E$)$%l sml l l 
#s(^l l l l^\r \r \r \r \r \r \r \r~ 
0r  