
     `iIQ                     :   d dl mZmZmZ d dlZd dlZd dlmZ ddlm	Z	m
Z
 ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZmZ ddlmZmZ ddlmZmZmZmZm Z m!Z!m"Z" ddl#m$Z$ erddl%m&Z& ddiZ'dZ( ej)        e*          Z+ G d de          Z, G d de$e          Z- G d dej.                  Z/ G d de          Z0 G d de"          Z1 G d de!          Z2 G d  d!e           Z3 G d" d#e          Z4 G d$ d%e          Z5 G d& d'e          Z6g d(Z7dS ))    )TYPE_CHECKINGAnyOptionalN)nn   )CacheDynamicCache)PretrainedConfig)create_causal_mask)BaseModelOutputWithPast)PreTrainedModel)Unpack)
AddedTokenPreTrainedTokenizer)TransformersKwargslogging   )LlamaForCausalLMLlamaForSequenceClassificationLlamaForTokenClassificationLlamaMLP
LlamaModelLlamaPreTrainedModelLlamaRotaryEmbedding)LlamaTokenizer)	TextInput
vocab_fileztokenizer.modelu   ▁c                        e Zd ZdZdZdgZddddddddZdgdgfd	d
gd	gfd	gd	gfdZ	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d fd	Z xZ	S ) GemmaConfiga  
    This is the configuration class to store the configuration of a [`GemmaModel`]. It is used to instantiate an Gemma
    model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
    defaults will yield a similar configuration to that of the Gemma-7B.
    e.g. [google/gemma-7b](https://huggingface.co/google/gemma-7b)
    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
    documentation from [`PretrainedConfig`] for more information.
    Args:
        vocab_size (`int`, *optional*, defaults to 256000):
            Vocabulary size of the Gemma model. Defines the number of different tokens that can be represented by the
            `inputs_ids` passed when calling [`GemmaModel`]
        hidden_size (`int`, *optional*, defaults to 3072):
            Dimension of the hidden representations.
        intermediate_size (`int`, *optional*, defaults to 24576):
            Dimension of the MLP representations.
        num_hidden_layers (`int`, *optional*, defaults to 28):
            Number of hidden layers in the Transformer decoder.
        num_attention_heads (`int`, *optional*, defaults to 16):
            Number of attention heads for each attention layer in the Transformer decoder.
        num_key_value_heads (`int`, *optional*, defaults to 16):
            This is the number of key_value heads that should be used to implement Grouped Query Attention. If
            `num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if
            `num_key_value_heads=1` the model will use Multi Query Attention (MQA) otherwise GQA is used. When
            converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed
            by meanpooling all the original heads within that group. For more details, check out [this
            paper](https://huggingface.co/papers/2305.13245). If it is not specified, will default to
            `num_attention_heads`.
        head_dim (`int`, *optional*, defaults to 256):
            The attention head dimension.
        hidden_act (`str` or `function`, *optional*, defaults to `"gelu_pytorch_tanh"`):
            The legacy activation function. It is overwritten by the `hidden_activation`.
        hidden_activation (`str` or `function`, *optional*):
            The non-linear activation function (function or string) in the decoder. Will default to `"gelu_pytorch_tanh"`
            if not specified. `"gelu_pytorch_tanh"` uses an approximation of the `"gelu"` activation function.
        max_position_embeddings (`int`, *optional*, defaults to 8192):
            The maximum sequence length that this model might ever be used with.
        initializer_range (`float`, *optional*, defaults to 0.02):
            The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
        rms_norm_eps (`float`, *optional*, defaults to 1e-06):
            The epsilon used by the rms normalization layers.
        use_cache (`bool`, *optional*, defaults to `True`):
            Whether or not the model should return the last key/values attentions (not used by all models). Only
            relevant if `config.is_decoder=True`.
        pad_token_id (`int`, *optional*, defaults to 0):
            Padding token id.
        eos_token_id (`int`, *optional*, defaults to 1):
            End of stream token id.
        bos_token_id (`int`, *optional*, defaults to 2):
            Beginning of stream token id.
        tie_word_embeddings (`bool`, *optional*, defaults to `True`):
            Whether to tie weight embeddings
        rope_theta (`float`, *optional*, defaults to 10000.0):
            The base period of the RoPE embeddings.
        attention_bias (`bool`, defaults to `False`, *optional*, defaults to `False`):
            Whether to use a bias in the query, key, value and output projection layers during self-attention.
        attention_dropout (`float`, *optional*, defaults to 0.0):
            The dropout ratio for the attention probabilities.
    ```python
    >>> from transformers import GemmaModel, GemmaConfig
    >>> # Initializing a Gemma gemma-7b style configuration
    >>> configuration = GemmaConfig()
    >>> # Initializing a model from the gemma-7b style configuration
    >>> model = GemmaModel(configuration)
    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```gemmapast_key_valuescolwiserowwise)zlayers.*.self_attn.q_projzlayers.*.self_attn.k_projzlayers.*.self_attn.v_projzlayers.*.self_attn.o_projzlayers.*.mlp.gate_projzlayers.*.mlp.up_projzlayers.*.mlp.down_proj	input_idsinputs_embedshidden_statesattention_mask)embed_tokenslayersnorm      `           gelu_pytorch_tanhN    {Gz?ư>Tr      r        @F        c                 $   || _         |
| _        || _        || _        || _        || _        || _        || _        || _        |	| _	        || _
        || _        || _        || _        || _        || _         t!                      j        d||||d| d S )N)pad_token_idbos_token_ideos_token_idtie_word_embeddings )
vocab_sizemax_position_embeddingshidden_sizeintermediate_sizenum_hidden_layersnum_attention_headshead_dimnum_key_value_heads
hidden_acthidden_activationinitializer_rangerms_norm_eps	use_cache
rope_thetaattention_biasattention_dropoutsuper__init__)selfr>   r@   rA   rB   rC   rE   rD   rF   rG   r?   rH   rI   rJ   r9   r;   r:   r<   rK   rL   rM   kwargs	__class__s                         {/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/models/gemma/modular_gemma.pyrO   zGemmaConfig.__init__   s    0 %'>$&!2!2#6  #6 $!2!2("$,!2 	
%%% 3		
 	

 	
 	
 	
 	
 	
    )r+   r,   r-   r.   r/   r/   r0   r1   Nr2   r3   r4   Tr   r5   r   Tr6   Fr7   )
__name__
__module____qualname____doc__
model_typekeys_to_ignore_at_inferencebase_model_tp_planbase_model_pp_planrO   __classcell__rR   s   @rS   r   r   5   s        A AF J#4"5%.%.%.%."+ )"+  &(9:#%568IJ!"_$56  & $ +/
 /
 /
 /
 /
 /
 /
 /
 /
 /
rT   r   c            	           e Zd ZdZ	 	 	 	 	 	 	 	 	 	 dd	eeeef                  fd
Zd Z	d Z
dddee         fdZd Z	 	 ddee         dededefdZd ZdS )GemmaTokenizera
  
    Construct a Gemma tokenizer. Based on byte-level Byte-Pair-Encoding. The default padding token is unset as there is
    no padding token in the original model.

    Args:
        vocab_file (`str`):
            Path to the vocabulary file.
        unk_token (`str` or `tokenizers.AddedToken`, *optional*, defaults to `"<unk>"`):
            The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this
            token instead.
        bos_token (`str` or `tokenizers.AddedToken`, *optional*, defaults to `"<bos>"`):
            The beginning of sequence token that was used during pretraining. Can be used a sequence classifier token.
        eos_token (`str` or `tokenizers.AddedToken`, *optional*, defaults to `"<eos>"`):
            The end of sequence token.
        pad_token (`str` or `tokenizers.AddedToken`, *optional*, defaults to `"<pad>"`):
            A special token used to make arrays of tokens the same size for batching purpose. Will then be ignored by
            attention mechanisms or loss computation.
        sp_model_kwargs (`dict[str, Any]`, `Optional`, *optional*):
            Will be passed to the `SentencePieceProcessor.__init__()` method. The [Python wrapper for
            SentencePiece](https://github.com/google/sentencepiece/tree/master/python) can be used, among other things,
            to set:

            - `enable_sampling`: Enable subword regularization.
            - `nbest_size`: Sampling parameters for unigram. Invalid for BPE-Dropout.

              - `nbest_size = {0,1}`: No sampling is performed.
              - `nbest_size > 1`: samples from the nbest_size results.
              - `nbest_size < 0`: assuming that nbest_size is infinite and samples from the all hypothesis (lattice)
                using forward-filtering-and-backward-sampling algorithm.

            - `alpha`: Smoothing parameter for unigram sampling, and dropout probability of merge operations for
              BPE-dropout.

        add_bos_token (`bool`, *optional*, defaults to `True`):
            Whether or not to add an `bos_token` at the start of sequences.
        add_eos_token (`bool`, *optional*, defaults to `False`):
            Whether or not to add an `eos_token` at the end of sequences.
        clean_up_tokenization_spaces (`bool`, *optional*, defaults to `False`):
            Whether or not to cleanup spaces after decoding, cleanup consists in removing potential artifacts like
            extra spaces.
        use_default_system_prompt (`bool`, *optional*, defaults to `False`):
            Whether or not the default system prompt for Gemma should be used.
        spaces_between_special_tokens (`bool`, *optional*, defaults to `False`):
            Whether or not to add spaces between special tokens.
    <unk><bos><eos><pad>NTFsp_model_kwargsc                 @   |i n|| _         t          |t                    rt          |dd          n|}t          |t                    rt          |dd          n|}t          |t                    rt          |dd          n|}t          |t                    rt          |dd          n|}|| _        || _        || _        |
| _        t          j	        di | j         | _
        | j
                            |           t          j        | f||||||||	|
|d
| d S )NFT)
normalizedspecial)
	bos_token	eos_token	unk_token	pad_tokenadd_bos_tokenadd_eos_tokenre   clean_up_tokenization_spacesuse_default_system_promptspaces_between_special_tokensr=   )re   
isinstancestrr   r   rm   rn   rp   spmSentencePieceProcessorsp_modelLoadr   rO   )rP   r   rk   ri   rj   rl   re   rm   rn   ro   rp   rq   rQ   s                rS   rO   zGemmaTokenizer.__init__   se    &5%<rr/MWXacfMgMgvJyUDIIIImv	MWXacfMgMgvJyUDIIIImv	MWXacfMgMgvJyUDIIIImv	MWXacfMgMgvJyUDIIIImv	$**)B&2JJT5IJJ:&&&$	
''+)E&?*G	
 	
 	
 	
 	
 	
 	
rT   c                      t          d          NzNot needed for GemmaAttributeErrorrP   s    rS   get_spm_processorz GemmaTokenizer.get_spm_processor      3444rT   c                      t          d          ry   rz   r|   s    rS   unk_token_lengthzGemmaTokenizer.unk_token_length  r~   rT   textr   returnc                 (    t          j        | |fi |S )ze
        Args:
            text: TextInput
        Simply calls PreTrainedTokenizer's method
        )r   tokenizerP   r   rQ   s      rS   r   zGemmaTokenizer.tokenize  s     #+D$AA&AAArT   c                 D    | j                             |t                    S )z
        Args:
            text: TextInput
        Returns a tokenized string. The Gemma tokenizer never adds a prefix space.
        )out_type)rv   encoders   r   s      rS   	_tokenizezGemmaTokenizer._tokenize$  s     }##D3#777rT   	token_idsskip_special_tokensrq   c                    g }g }|D ]}|r
|| j         v r|| j        v rW|r-|                    | j                            |                     |                    | j        |         j                   g }n|                    |           |r-|                    | j                            |                     |rd                    |          }nd                    |          }|                    t          d          S )N  )	all_special_ids_added_tokens_decoderappendrv   decodecontentjoinreplaceSPIECE_UNDERLINE)rP   r   r   rq   rQ   	sub_textscurrent_sub_textidss           rS   _decodezGemmaTokenizer._decode,  s"    	 		- 		-C" sd.B'B'Bd000# M$$T]%9%9:J%K%KLLL  !;C!@!HIII#%   '',,,, 	ET]112BCCDDD( 	+++II	**I  !13777rT   c                     g }d}|D ]C}|| j         v r#|| j                            |          |z   z  }g }.|                    |           D|| j                            |          z  }|S )z:Converts a sequence of tokens (string) in a single string.r   )_added_tokens_encoderrv   r   r   )rP   tokenscurrent_sub_tokens
out_stringtokens        rS   convert_tokens_to_stringz'GemmaTokenizer.convert_tokens_to_stringI  s    
 	1 	1E222dm223EFFNN
%'"""))%0000dm**+=>>>
rT   )
ra   rb   rc   rd   NTFFFF)FF)rU   rV   rW   rX   r   dictrs   r   rO   r}   r   listr   r   intboolr   r   r=   rT   rS   r`   r`      s#       , ,b 48%*"'&+)
 )
 "$sCx.1)
 )
 )
 )
V5 5 55 5 5B[ BtCy B B B B8 8 8 %*.3	8 898 "8 (,	8 
8 8 8 8:    rT   r`   c                   <     e Zd Zddedef fdZd Zd Zd Z xZ	S )	GemmaRMSNormr4   dimepsc                     t                                                       || _        t          j        t          j        |                    | _        d S )N)rN   rO   r   r   	Parametertorchzerosweight)rP   r   r   rR   s      rS   rO   zGemmaRMSNorm.__init__Y  s?    l5;s#3#344rT   c                     |t          j        |                    d                              dd          | j        z             z  S )Nr   T)keepdim)r   rsqrtpowmeanr   )rP   xs     rS   _normzGemmaRMSNorm._norm^  s8    5;quuQxx}}R}>>IJJJJrT   c                     |                      |                                          }|d| j                                        z   z  }|                    |          S )Ng      ?)r   floatr   type_as)rP   r   outputs      rS   forwardzGemmaRMSNorm.forwarda  sL    AGGII&& 3!2!2!4!445~~a   rT   c                 H    t          | j        j                   d| j         S )Nz, eps=)tupler   shaper   r|   s    rS   
extra_reprzGemmaRMSNorm.extra_reprh  s%    )**<<$(<<<rT   )r4   )
rU   rV   rW   r   r   rO   r   r   r   r]   r^   s   @rS   r   r   X  s        5 5C 5e 5 5 5 5 5 5
K K K! ! != = = = = = =rT   r   c                        e Zd Z fdZ xZS )GemmaMLPc                 .   t                                          |           t          j        | j        | j        d          | _        t          j        | j        | j        d          | _        t          j        | j        | j        d          | _        d S )NF)bias)	rN   rO   r   Linearr@   rA   	gate_projup_proj	down_proj)rP   configrR   s     rS   rO   zGemmaMLP.__init__m  s|       4#3T5KRWXXXy!143IPUVVV4#94;KRWXXXrT   )rU   rV   rW   rO   r]   r^   s   @rS   r   r   l  sA        Y Y Y Y Y Y Y Y YrT   r   c                       e Zd ZdS )GemmaRotaryEmbeddingNrU   rV   rW   r=   rT   rS   r   r   t          DrT   r   c                       e Zd Zd ZdS )GemmaPreTrainedModelc                     t          j        | |           d|j        j        v r |j        j                                         d S d S )NRMSNorm)r   _init_weightsrR   rU   r   datazero_)rP   modules     rS   r   z"GemmaPreTrainedModel._init_weightsy  sM    %dF333 (111M$$&&&&& 21rT   N)rU   rV   rW   r   r=   rT   rS   r   r   x  s#        ' ' ' ' 'rT   r   c                       e Zd Z	 	 	 	 	 	 	 ddeej                 deej                 deej                 dee         deej                 dee	         deej                 d	e
e         d
efdZdS )
GemmaModelNr$   r'   position_idsr!   r%   rJ   cache_positionrQ   r   c                    |d u |d uz  rt          d          ||                     |          }|r|t          | j                  }|B||                                nd}	t          j        |	|	|j        d         z   |j                  }||	                    d          }t          | j        |||||          }
|}|                     ||          }t          j        | j        j        dz  |j                  }||z  }| j        d | j        j                 D ]} ||f|
|||||d	|}|                     |          }t%          ||r|nd 
          S )Nz:You must specify exactly one of input_ids or inputs_embeds)r   r   r5   )device)r   input_embedsr'   r   r!   r   g      ?)dtype)r'   r   r!   rJ   r   position_embeddings)last_hidden_stater!   )
ValueErrorr(   r	   r   get_seq_lengthr   aranger   r   	unsqueezer   
rotary_embtensorr@   r   r)   rB   r*   r   )rP   r$   r'   r   r!   r%   rJ   r   rQ   past_seen_tokenscausal_maskr&   r   
normalizerdecoder_layers                  rS   r   zGemmaModel.forward  s    -t";< 	[YZZZ  --i88M 	?0*$+>>>O!CRC^==???de"\ "2]5H5K"KTaTh  N )33A66L(;&))+%
 
 
 & #oom\JJ
 \$+"93">mFYZZZ
%
2![)H4;+H)HI 
	 
	M)M	*) /#-$7	 	 	 	MM 		-00&+/8BOOd
 
 
 	
rT   )NNNNNNN)rU   rV   rW   r   r   
LongTensorTensorr   FloatTensorr   r   r   r   r   r=   rT   rS   r   r     s         151537+/59$(59A
 A
E,-A
 !.A
 u/0	A

 "%A
   12A
 D>A
 !!12A
 +,A
 
!A
 A
 A
 A
 A
 A
rT   r   c                        e Zd Z fdZ xZS )GemmaForCausalLMc                  6     t                      j        di | S )a|  
        Example:

        ```python
        >>> from transformers import AutoTokenizer, GemmaForCausalLM

        >>> model = GemmaForCausalLM.from_pretrained("google/gemma-7b")
        >>> tokenizer = AutoTokenizer.from_pretrained("google/gemma-7b")

        >>> prompt = "What is your favorite condiment?"
        >>> inputs = tokenizer(prompt, return_tensors="pt")

        >>> # Generate
        >>> generate_ids = model.generate(inputs.input_ids, max_length=30)
        >>> tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
        "What is your favorite condiment?"
        ```r=   )rN   r   )super_kwargsrR   s    rS   r   zGemmaForCausalLM.forward  s!    $ uww.....rT   )rU   rV   rW   r   r]   r^   s   @rS   r   r     s8        / / / / / / / / /rT   r   c                       e Zd ZdS )GemmaForSequenceClassificationNr   r=   rT   rS   r   r     r   rT   r   c                       e Zd ZdS )GemmaForTokenClassificationNr   r=   rT   rS   r   r     r   rT   r   )r   r`   r   r   r   r   r   )8typingr   r   r   sentencepiecert   r   r   cache_utilsr   r	   configuration_utilsr
   masking_utilsr   modeling_outputsr   modeling_utilsr   processing_utilsr   tokenization_utilsr   r   utilsr   r   llama.modeling_llamar   r   r   r   r   r   r   llama.tokenization_llamar   tokenization_utils_baser   VOCAB_FILES_NAMESr   
get_loggerrU   loggerr   r`   Moduler   r   r   r   r   r   r   r   __all__r=   rT   rS   <module>r     s    0 / / / / / / / / /            . . . . . . . . 3 3 3 3 3 3 / / / / / / 7 7 7 7 7 7 - - - - - - & & & & & & A A A A A A A A 0 0 0 0 0 0 0 0                  6 5 5 5 5 5  5444444!#45   
	H	%	%D
 D
 D
 D
 D
" D
 D
 D
NY Y Y Y Y^%8 Y Y Yx= = = = =29 = = =(Y Y Y Y Yx Y Y Y	 	 	 	 	/ 	 	 	' ' ' ' '/ ' ' 'B
 B
 B
 B
 B
 B
 B
 B
J/ / / / /' / / /,	 	 	 	 	%C 	 	 		 	 	 	 	"= 	 	 	  rT   