
     `iPH                         d Z ddlmZ ddlmZ  ej        e          Z G d de          Z G d de          Z	 G d d	e          Z
 G d
 de          Z G d de          Zg dZdS )zBlt model configuration   )PretrainedConfig)loggingc                   H     e Zd ZdZdZ	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d fd	Z xZS )BltLocalEncoderConfigzB
    Configuration class for the Blt Local Encoder component.
    blt_local_encoder  F            N   h㈵>         `      Asilu   {Gz?c                    || _         || _        || _        || _        || _        || _        |p|| _        ||z  | _        |pt          d|z  dz            | _	        || _
        |	| _        |
| _        || _        || _        || _        || _        || _        |                    dd             t'                      j        di |ddi d S N   r   tie_word_embeddingsF 
vocab_sizecross_attn_all_layerscross_attn_khidden_size_globalhidden_sizenum_attention_headsnum_key_value_headshead_dimintintermediate_sizenum_hidden_layersrms_norm_epsdropoutmax_position_embeddings
rope_thetarope_scaling
hidden_actinitializer_rangepopsuper__init__selfr   r   r   r   r   r    r!   r%   r&   r'   r(   r)   r*   r+   r$   r,   kwargs	__class__s                     }/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/models/blt/configuration_blt.pyr/   zBltLocalEncoderConfig.__init__       ( %%:"("4&#6 #6#M:M #'::!2!Nc!k/A:M6N6N!2('>$$($!2 	

($///==6==u======    )r   Fr	   r
   r   r   Nr   r   r   r   r   Nr   r   r   __name__
__module____qualname____doc__
model_typer/   __classcell__r3   s   @r4   r   r      s          %J #  %#(> (> (> (> (> (> (> (> (> (>r6   r   c                   H     e Zd ZdZdZ	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d fd	Z xZS )BltLocalDecoderConfigzB
    Configuration class for the Blt Local Decoder component.
    blt_local_decoderr   Tr	   r
   r   r   N	   r   r   r   r   r   r   r   c                    || _         || _        || _        || _        || _        || _        |p|| _        ||z  | _        |pt          d|z  dz            | _	        || _
        |	| _        |
| _        || _        || _        || _        || _        || _        |                    dd             t'                      j        di |ddi d S r   r   r0   s                     r4   r/   zBltLocalDecoderConfig.__init__Q   r5   r6   )r   Tr	   r
   r   r   NrB   r   r   r   r   Nr   r   r   r7   r>   s   @r4   r@   r@   J   s          %J "  %#(> (> (> (> (> (> (> (> (> (>r6   r@   c                   @     e Zd ZdZdZ	 	 	 	 	 	 	 	 	 	 	 	 d fd	Z xZS )BltGlobalTransformerConfigzG
    Configuration class for the Blt Global Transformer component.
    blt_global_transformerr
   r   N   r   r      r   r      r   c                 X   || _         || _        |p|| _        ||z  | _        |pt	          d|z  dz            | _        || _        || _        || _        || _	        || _
        |	| _        |
| _        || _        |                    dd             t                      j        di |ddi d S r   )r   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   )r1   r   r    r!   r%   r&   r'   r(   r)   r*   r+   r$   r,   r2   r3   s                 r4   r/   z#BltGlobalTransformerConfig.__init__   s      '#6 #6#M:M #'::!2!Nc!k/A:M6N6N!2('>$$($!2 	

($///==6==u======r6   )r
   r   NrG   r   r   rH   r   Nr   rI   r   r7   r>   s   @r4   rE   rE   |   st          *J   $ >  >  >  >  >  >  >  >  >  >r6   rE   c                   @     e Zd ZdZdZ	 	 	 	 	 	 	 	 	 	 	 	 d fd	Z xZS )BltPatcherConfiga	  
    Configuration class for the Blt Patcher/Entropy model component.

    Args:
            vocab_size (`int`, *optional*, defaults to 260):
                Vocabulary size of the Blt patcher model. Defines the number of different tokens that can be represented by the
                `inputs_ids` passed when calling the patcher model.
            hidden_size (`int`, *optional*, defaults to 768):
                Dimension of the hidden representations.
            num_hidden_layers (`int`, *optional*, defaults to 14):
                Number of hidden layers in the Transformer decoder.
            num_attention_heads (`int`, *optional*, defaults to 12):
                Number of attention heads for each attention layer in the Transformer decoder.
            num_key_value_heads (`int`, *optional*):
                This is the number of key_value heads that should be used to implement Grouped Query Attention. If
                `num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if
                `num_key_value_heads=1` the model will use Multi Query Attention (MQA) otherwise GQA is used. When
                converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed
                by meanpooling all the original heads within that group. For more details, check out [this
                paper](https://huggingface.co/papers/2305.13245). If it is not specified, will default to
                `num_attention_heads`.
            max_position_embeddings (`int`, *optional*, defaults to 8192):
                The maximum sequence length that this model might ever be used with.
            rms_norm_eps (`float`, *optional*, defaults to 1e-05):
                The epsilon used by the rms normalization layers.
            dropout (`float`, *optional*, defaults to 0.0):
                The dropout ratio for the attention probabilities.
            rope_theta (`float`, *optional*, defaults to 10000.0):
                The base period of the RoPE embeddings.
            intermediate_size (`int`, *optional*, defaults to 2048):
                Dimension of the MLP representations.
            rope_scaling (`dict`, *optional*):
                Dictionary containing the RoPE scaling configuration.
            initializer_range (`float`, *optional*, defaults to 0.02):
                The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
    blt_patcherr            N    r   r        @r
   r   c                 t   || _         || _        || _        || _        ||z  | _        ||n|| _        || _        || _        || _        |	| _	        d| _
        |
pt          d| j        z  dz            | _        || _        || _        |                    dd             t!                      j        di |ddi d S )Nr   r   r   r   Fr   )r   r   r%   r    r"   r!   r(   r&   r'   r)   r+   r#   r$   r*   r,   r-   r.   r/   )r1   r   r   r%   r    r!   r(   r&   r'   r)   r$   r*   r,   r2   r3   s                 r4   r/   zBltPatcherConfig.__init__   s      %&!2#6 #':::M:Y#6#6_r '>$($ !2!Sc!d>N:NQR:R6S6S(!2 	

($///==6==u======r6   )r   rN   rO   rP   NrQ   r   r   rR   r
   Nr   r7   r>   s   @r4   rL   rL      su        # #J J   $!> !> !> !> !> !> !> !> !> !>r6   rL   c                   d     e Zd ZdZdZdgZeeee	dZ
	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d fd	Z xZS )	BltConfigas  
    This is the configuration class to store the configuration of a [`BltModel`]. It is used to instantiate a
    Blt model according to the specified arguments, defining the model architecture.

    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
    documentation from [`PretrainedConfig`] for more information.

    Args:
            vocab_size (`int`, *optional*, defaults to 260):
                Vocabulary size of the Blt model. Defines the number of different tokens that can be represented by the
                `inputs_ids` passed when calling [`BltModel`].
            max_position_embeddings (`int`, *optional*, defaults to 4096):
                The maximum sequence length that this model might ever be used with.
            patch_in_forward (`bool`, *optional*, defaults to `True`):
                Whether to perform patching during the forward pass.
            patch_size (`int`, *optional*, defaults to 4):
                Size of the patches used in the patching mechanism.
            patching_mode (`str`, *optional*, defaults to `"entropy"`):
                The mode used for patching, such as entropy-based patching.
            patching_threshold (`float`, *optional*, defaults to 1.34):
                Threshold value used for determining when to apply patches.
            patching_batch_size (`int`, *optional*, defaults to 1):
                Batch size used during the patching process.
            max_patch_length (`int`, *optional*):
                Maximum length of patches that can be generated.
            cross_attn_k (`int`, *optional*, defaults to 2):
                Number of cross-attention heads used in the model.
            encoder_hash_byte_group_size (`list`, *optional*):
                List of byte group sizes used in the encoder hash function.
            encoder_hash_byte_group_vocab (`int`, *optional*, defaults to 500002):
                Vocabulary size for the encoder hash byte groups.
            encoder_hash_byte_group_nb_functions (`int`, *optional*, defaults to 1):
                Number of hash functions used in the encoder byte grouping.
            patcher_config (`BltPatcherConfig`, *optional*):
                Configuration for the patcher component of the model.
            encoder_config (`BltLocalEncoderConfig`, *optional*):
                Configuration for the local encoder component of the model.
            decoder_config (`BltLocalDecoderConfig`, *optional*):
                Configuration for the local decoder component of the model.
            global_config (`BltGlobalTransformerConfig`, *optional*):
                Configuration for the global transformer component of the model.
            tie_word_embeddings (`bool`, *optional*, defaults to `False`):
                Whether to tie weight embeddings.
            initializer_range (`float`, *optional*, defaults to 0.02):
                The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
            rope_theta (`float`, *optional*, defaults to 500000.0):
                The base period of the RoPE embeddings.
            rope_scaling (`dict`, *optional*):
                Dictionary containing the RoPE scaling configuration.

    ```python
    >>> from transformers import BltModel, BltConfig

    >>> # Initializing a Blt configuration
    >>> configuration = BltConfig()

    >>> # Initializing a model from the configuration
    >>> model = BltModel(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```

    Checkpoint: [facebook/blt](https://huggingface.co/facebook/blt)
    bltpast_key_values)patcher_configencoder_configdecoder_configglobal_configr   rH   T   entropy   ]?r   Nr	   " Fr   r   c                    || _         || _        || _        || _        || _        || _        || _        || _        || _        || _	        || _
        |                    dd          | _        |                    dd          | _        |                    d          | _        |                    dd          | _        |	| _        |
pg d| _        || _        || _        |0t)          |	          | _        t,                              d
           nYt1          |t2                    r(|                    d|           t)          di || _        nt1          |t(                    r|| _        |0t7          |	          | _        t,                              d           nYt1          |t2                    r(|                    d|           t7          di || _        nt1          |t6                    r|| _        |0t;          |	          | _        t,                              d           nYt1          |t2                    r(|                    d|           t;          di || _        nt1          |t:                    r|| _        |0t?          |	          | _         t,                              d           nYt1          |t2                    r(|                    d|           t?          di || _         nt1          |t>                    r|| _         | j        j!        | j        z  }|| j         j!        k    r|nd | j         _"        |#                    dd             tI                      j%        dd|i| d S )Npatching_devicecudarealtime_patchingTpatching_threshold_addmonotonicityF)r   r\            r   )r,   z8patcher_config is None, using default Blt patcher configr,   z8encoder_config is None, using default Blt encoder configz8decoder_config is None, using default Blt decoder configz6global_config is None, using default Blt global configr   r   )&r   r(   r,   r)   r*   patch_in_forward
patch_sizepatching_modepatching_thresholdpatching_batch_sizemax_patch_lengthgetra   rc   rd   re   r   encoder_hash_byte_group_sizeencoder_hash_byte_group_vocab$encoder_hash_byte_group_nb_functionsrL   rX   loggerinfo
isinstancedict
setdefaultr   rY   r@   rZ   rE   r[   r   encoder_cross_output_sizer-   r.   r/   )r1   r   r(   ri   rj   rk   rl   rm   rn   r   rp   rq   rr   rX   rY   rZ   r[   r   r,   r)   r*   r2   rx   r3   s                          r4   r/   zBltConfig.__init__>  s   2 %'>$!2$( !1$*"4#6  0%zz*;VDD!',?!F!F&,jj1I&J&J#"JJ~u== ) -I,^L^L^L^)-J*4X1 !"2EV"W"W"WDKKRSSSS-- 	1%%&9;LMMM"2"D"D^"D"DD(899 	1"0D!"7J["\"\"\DKKRSSSS-- 	1%%&9;LMMM"7"I"I."I"ID(=>> 	1"0D!"7J["\"\"\DKKRSSSS-- 	1%%&9;LMMM"7"I"I."I"ID(=>> 	1"0D !;N_!`!`!`DKKPQQQQt,, 	/$$%8:KLLL!;!L!Lm!L!LD'ABB 	/!.D %)$7$CdFW$W!)BdFXFd)d)d%%jn 	4
 	

($///KK-@KFKKKKKr6   )r   rH   Tr\   r]   r^   r   Nr	   Nr_   r   NNNNFr   r   N)r8   r9   r:   r;   r<   keys_to_ignore_at_inferencerL   r   r@   rE   sub_configsr/   r=   r>   s   @r4   rU   rU      s        @ @D J#4"5*//3	 K  $,%)&,-.!+`L `L `L `L `L `L `L `L `L `Lr6   rU   )rU   rL   r   r@   rE   N)r;   configuration_utilsr   utilsr   
get_loggerr8   rs   r   r@   rE   rL   rU   __all__r   r6   r4   <module>r      sN     3 3 3 3 3 3       
	H	%	%/> /> /> /> />, /> /> />d/> /> /> /> />, /> /> />d'> '> '> '> '>!1 '> '> '>TI> I> I> I> I>' I> I> I>XlL lL lL lL lL  lL lL lL^  r6   