
     `i                         d dl Z d dlmZmZ d dlZd dlmZ ddlm	Z	 ddl
mZmZ ddlmZ  e            rd d	lmZ  ed
           G d de	j        j                              ZdgZdS )    N)OptionalUnion)pad_model_inputs   )keras)is_keras_nlp_availablerequires   )GPT2Tokenizer)BytePairTokenizer)	keras_nlp)backendsc            
            e Zd ZdZ	 	 ddeeef         dee         dee         dee         f fdZ	e
defd	            Ze
d
eeej        f         fd            Ze
d             Zd Zddee         fdZ xZS )TFGPT2Tokenizera7  
    This is an in-graph tokenizer for GPT2. It should be initialized similarly to other tokenizers, using the
    `from_pretrained()` method. It can also be initialized with the `from_tokenizer()` method, which imports settings
    from an existing standard tokenizer object.

    In-graph tokenizers, unlike other Hugging Face tokenizers, are actually Keras layers and are designed to be run
    when the model is called, rather than during preprocessing. As a result, they have somewhat more limited options
    than standard tokenizer classes. They are most useful when you want to create an end-to-end model that goes
    straight from `tf.string` inputs to outputs.

    Args:
        vocab (dict[str, int]): Vocabulary dict for Byte Pair Tokenizer
        merges (list[str]): Merges list for Byte Pair Tokenizer
    Nvocabmerges
max_lengthpad_token_idc                     t                                                       || _        || _        || _        || _        t          |||          | _        d S )N)sequence_length)super__init__r   r   r   r   r   tf_tokenizer)selfr   r   r   r   	__class__s        /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/models/gpt2/tokenization_gpt2_tf.pyr   zTFGPT2Tokenizer.__init__!   sU     	($
-eVZXXX    	tokenizerc                 f    d |j         D             }|                                } | ||g|R i |S )ag  Creates TFGPT2Tokenizer from GPT2Tokenizer

        Args:
            tokenizer (GPT2Tokenizer)

        Examples:

        ```python
        from transformers import AutoTokenizer, TFGPT2Tokenizer

        tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2")
        tf_tokenizer = TFGPT2Tokenizer.from_tokenizer(tokenizer)
        ```
        c                 8    g | ]}d                      |          S ) )join).0ms     r   
<listcomp>z2TFGPT2Tokenizer.from_tokenizer.<locals>.<listcomp>@   s"    ;;;!#((1++;;;r   )	bpe_ranks	get_vocab)clsr   argskwargsr   r   s         r   from_tokenizerzTFGPT2Tokenizer.from_tokenizer0   sM      <;y':;;;##%%s5&242226222r   pretrained_model_name_or_pathc                 N    t          j        |g|R i |} | j        |g|R i |S )a_  Creates TFGPT2Tokenizer from pretrained GPT2Tokenizer

        Args:
            pretrained_model_name_or_path (Union[str, os.PathLike]): Path to pretrained model

        Examples:

        ```python
        from transformers import TFGPT2Tokenizer

        tf_tokenizer = TFGPT2Tokenizer.from_pretrained("openai-community/gpt2")
        ```
        )r   from_pretrainedr+   )r(   r,   init_inputsr*   r   s        r   r.   zTFGPT2Tokenizer.from_pretrainedD   sJ     "12OhR]hhhaghh	!s!)DkDDDVDDDr   c                      | di |S )zCreates TFGPT2Tokenizer from configurations

        Args:
            config (Dict): Dictionary with keys such as stated in `get_config`.
         r1   )r(   configs     r   from_configzTFGPT2Tokenizer.from_configV   s     s}}V}}r   c                 8    | j         | j        | j        | j        dS )Nr   r   r   r   r5   )r   s    r   
get_configzTFGPT2Tokenizer.get_config_   s&    Zk/ -	
 
 	
r   c                     |                      |          }t          j        |          }| j        '||n| j        }|t          ||| j                  \  }}||dS )N)max_seq_length	pad_value)attention_mask	input_ids)r   tf	ones_liker   r   r   )r   xr   r;   r:   s        r   callzTFGPT2Tokenizer.callg   sv    %%a((	i00('1'=4?J%,<jDDU- - -)	> #1yIIIr   )NN)N)__name__
__module____qualname____doc__dictstrintlistr   r   classmethodr   r+   r   osPathLiker.   r3   r6   r?   __classcell__)r   s   @r   r   r      sP        & %)&*Y YCH~Y S	Y SM	Y
 smY Y Y Y Y Y 3} 3 3 3 [3& EE#r{BR<S E E E [E"   [
 
 
J J(3- J J J J J J J Jr   r   )rI   typingr   r   
tensorflowr<   tensorflow_textr   modeling_tf_utilsr   utils.import_utilsr   r	   tokenization_gpt2r   keras_nlp.tokenizersr   layersLayerr   __all__r1   r   r   <module>rV      s   				 " " " " " " " "     , , , , , , & & & & & & B B B B B B B B , , , , , ,  7666666 
>"""cJ cJ cJ cJ cJel( cJ cJ #"cJL 
r   