
     `i/2                     :   d Z ddlmZ ddlmZ ddlmZmZ  e            rddlm	Z	m
Z
mZmZmZmZmZmZ dZnBddlmZ ed	         Z	ed
         Z
ed         Zed         Zed         Zed         ZdededefdZdZ ej        e          Z G d de          ZdgZdS )zxLSTM configuration.    )Optional   )PretrainedConfig)is_xlstm_availablelogging)BackendModeTypeChunkwiseKernelType	DtypeTypeSequenceKernelTypeStepKernelTypeWeightModeTyperound_up_to_next_multiple_ofxLSTMLargeConfigT)Literal)traintrain_with_padding	inference)chunkwise--native_autogradzparallel--native_autograd)float32bfloat16float16native_sequence__nativenative)singlefusedxmultiple_ofreturnc                 8    t          | |z   dz
  |z  |z            S )z0Rounds up x to the next multiple of multiple_of.   )int)r   r   s     /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/models/xlstm/configuration_xlstm.pyr   r   2   s#    Q_q([8KGHHH    Fc            A       l    e Zd ZdZdZ	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dBdededee         dee         d ee         d!ed"ed#ed$ed%ed&ed'ed(ed)e	d*e
d+ed,ed-ed.ed/ed0ed1ed2ed3ed4ed5ed6ed7ed8ed9ed:ed;ef@ fd<Zed=             Zed>             Zed?             Zed@             ZdA Z xZS )CxLSTMConfiga  
    This is the configuration class to store the configuration of a [`xLSTM`]. It is used to instantiate a xLSTM
    model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
    defaults will yield a similar configuration to that of the xLSTM-7b [NX-AI/xLSTM-7b](https://huggingface.co/NX-AI/xLSTM-7b) model.

    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
    documentation from [`PretrainedConfig`] for more information.


    Args:
        vocab_size (int, optional, *optional*, defaults to 50304):
            Vocabulary size of the xLSTM model. Defines the number of different tokens that can be represented by the
            `inputs_ids` passed when calling [`xLSTMModel`]. Defaults to the GPT2-NeoX tokenizer size.
        hidden_size (int, optional, *optional*, defaults to 4096):
            Dimensionality of the embeddings or hidden states.
        embedding_dim (int, optional, *optional*, defaults to 4096):
            Dimensionality of the embeddings or hidden states, use hidde_size if None.
        num_hidden_layers (int, optional, *optional*, defaults to 32):
            Number of blocks of the xLSTM model.
        num_blocks (int, optional, *optional*, defaults to 32):
            Number of blocks of the xLSTM model, use num_hidden_layers if None.
        num_heads (int, optional, *optional*, defaults to 8):
            Number of heads for the xLSTM Layer/Cell.
        use_bias (bool, optional, *optional*, defaults to `False`):
            Whether to use biases in the xLSTM model.
        norm_reduction_force_float32 (bool, optional, *optional*, defaults to `True`):
            Whether to force the float32 norm reduction op to be done in fp32 precision.
        tie_word_embeddings (bool, optional, *optional*, defaults to `False`):
            Whether to tie word embeddings to the lm head weights.
        add_out_norm (bool, optional, *optional*, defaults to `True`):
            Whether to add an output norm after the blocks before the LMHead.
        norm_eps (float, optional, *optional*, defaults to 1e-06):
            Norm eps for RMSNorm and Layer Norm.
        qk_dim_factor (float, optional, *optional*, defaults to 0.5):
            Scale factor for the query and key dimension.
        v_dim_factor (float, optional, *optional*, defaults to 1.0):
            Scale factor for the value dimension.
        chunkwise_kernel (ChunkwiseKernelType, optional, *optional*, defaults to `"chunkwise--native_autograd"`):
            Kernel type for chunkwise processing mode.
        sequence_kernel (SequenceKernelType, optional, *optional*, defaults to `"native_sequence__native"`):
            Kernel type for sequence processing mode.
        step_kernel (StepKernelType, optional, *optional*, defaults to `"native"`):
            Kernel type for step processing mode.
        mode (BackendModeType, optional, *optional*, defaults to `"inference"`):
            Operation mode (inference is needed for generation).
        chunk_size (int, optional, *optional*, defaults to 64):
            Internal chunk size.
        return_last_states (bool, optional, *optional*, defaults to `True`):
            If to return the last states / cache internally. Needed as True for generation.
        autocast_kernel_dtype (DtypeType, optional, *optional*, defaults to `"bfloat16"`):
            Kernel dtype for the states.
        eps (float, optional, *optional*, defaults to 1e-06):
            Epsilon for the mLSTM cell post norm.
        inference_state_dtype (DtypeType, optional, *optional*, defaults to `"float32"`):
            Kernel dtype for states in inference.
        ffn_proj_factor (float, optional, *optional*, defaults to 2.667):
            Size factor of the post-up projection gated Feed Forward network.
        ffn_round_up_to_multiple_of (int, optional, *optional*, defaults to 64):
            Size factor round value of the post-up projection gated Feed Forward network.
        gate_soft_cap (float, optional, *optional*, defaults to 15.0):
            Gate soft cap scale.
        output_logit_soft_cap (float, optional, *optional*, defaults to 30.0):
            Output logit soft cap scale.
        weight_mode (`Literal`, *optional*, defaults to `"single"`):
            Whether parallel linear layers are separated or fused (single).
        use_cache (bool, optional, *optional*, defaults to `True`):
            Whether to use the cache (xLSTMCache).
        pad_token_id (int, optional, *optional*, defaults to 1):
            Pad token id needed for generation.
        bos_token_id (int, optional, *optional*, defaults to 0):
            BOS token id needed for generation.
        eos_token_id (int, optional, *optional*, defaults to 2):
            EOS token id needed for generation.
        max_inference_chunksize (int, optional, *optional*, defaults to 16384):
            Limit the chunk size for inference to save memory.

    Example:

    ```python
    >>> from transformers import xLSTMConfig, xLSTMModel

    >>> # Initializing a xLSTM configuration
    >>> configuration = xLSTMConfig()

    >>> # Initializing a model (with random weights) from the configuration
    >>> model = xLSTMModel(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```xlstm     N       FTư>      ?      ?r   r   r   r   @   r   r   tV@      .@      >@r   r    r       @  
vocab_sizehidden_sizeembedding_dimnum_hidden_layers
num_blocks	num_headsuse_biasnorm_reduction_force_float32tie_word_embeddingsadd_out_normnorm_epsqk_dim_factorv_dim_factorchunkwise_kernelsequence_kernelstep_kernelmode
chunk_sizereturn_last_statesautocast_kernel_dtypeepsinference_state_dtypeffn_proj_factorffn_round_up_to_multiple_ofgate_soft_capoutput_logit_soft_capweight_mode	use_cachepad_token_idbos_token_ideos_token_idmax_inference_chunksizec!                 $   || _         ||n|| _        ||n|| _        ||n|| _        ||n|| _        || _        || _        |	| _        |
| _        || _	        || _
        || _        || _        || _        || _        || _        || _        || _        || _        || _        || _        || _        || _        || _        || _        || _        || _        || _        || _        || _        || _        | | _         tA                      j!        d||||	d|! d S )N)rQ   rR   rP   r<    )"r4   r5   r6   r7   r8   r9   r:   r<   r=   r>   r;   r?   r@   rA   rB   rC   rD   rE   rF   rG   rH   rI   rJ   rK   rL   rM   rN   rO   rP   rQ   rR   rS   super__init__)#selfr4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   r@   rA   rB   rC   rD   rE   rF   rG   rH   rI   rJ   rK   rL   rM   rN   rO   rP   rQ   rR   rS   kwargs	__class__s#                                     r"   rW   zxLSTMConfig.__init__   sj   X %*5*A;;}.;.G]][6G6S!2!2Yc(2(>**DU" #6 ( ,H)*( 0.&	$"4%:"%:".+F(*%:"&"((('>$ 	
%%% 3		
 	

 	
 	
 	
 	
 	
r#   c                 >    t          | j        | j        z  d          S Nr.   )r   )r   r5   r?   rX   s    r"   qk_dimzxLSTMConfig.qk_dim   s*    +t11
 
 
 	
r#   c                 >    t          | j        | j        z  d          S r\   )r   r5   r@   r]   s    r"   v_dimzxLSTMConfig.v_dim   s*    +t00
 
 
 	
r#   c                      | j         | j        z  S N)r^   r9   r]   s    r"   qk_head_dimzxLSTMConfig.qk_head_dim  s    {dn,,r#   c                      | j         | j        z  S rb   )r`   r9   r]   s    r"   
v_head_dimzxLSTMConfig.v_head_dim  s    zT^++r#   c                    t           rt          di d| j        d| j        d| j        d| j        d| j        d| j        d| j        d| j	        d	| j
        d
| j        d| j        d| j        d| j        d| j        d| j        d| j        d| j        d| j        d| j        d| j        d| j        d| j        d| j        d| j        S | S )Nr4   r6   r8   r9   r:   r=   r>   r;   r?   r@   rA   rB   rC   rD   rE   rF   rG   rH   rI   rJ   rK   rL   rM   rN   rU   )external_xlstmr   r4   r5   r7   r9   r:   r=   r>   r;   r?   r@   rA   rB   rC   rD   rE   rF   rG   rH   rI   rJ   rK   rL   rM   rN   r]   s    r"   to_xlstm_block_configz!xLSTMConfig.to_xlstm_block_config
  s     	#   ??"..  11 ..	
  "..  .2-N-N #00 ".. "&!6!6 !% 4 4 !,,  YY!"  ??#$ $(#:#:%& '+&@&@'( HH)* '+&@&@+. !% 4 4/0 -1,L,L14 #0056 '+&@&@78 !,,9 > Kr#   ) r'   r(   Nr)   Nr*   FTFTr+   r,   r-   r   r   r   r   r.   Tr   r+   r   r/   r.   r0   r1   r   Tr    r   r2   r3   )__name__
__module____qualname____doc__
model_typer!   r   boolfloatr	   r   r   r   r
   r   rW   propertyr^   r`   rc   re   rh   __classcell__)rZ   s   @r"   r%   r%   <   s       Y Yv J  '++-$(-1$)!"!0L.G&. +#'+5+4!&+-#'+&.',SX
 X
X
 X
  }	X

 $C=X
 SMX
 X
 X
 '+X
 "X
 X
 X
 X
 X
" .#X
$ ,%X
& $'X
* +X
, -X
0 !1X
2  )3X
4 5X
6  )7X
: ;X
< &)=X
@ AX
B  %CX
F $GX
J KX
L MX
N OX
P QX
R "%SX
 X
 X
 X
 X
 X
t 
 
 X
 
 
 X
 - - X- , , X,! ! ! ! ! ! !r#   r%   N)rl   typingr   configuration_utilsr   utilsr   r   xlstm.xlstm_large.modelr   r	   r
   r   r   r   r   r   rg   r   r!   
get_loggerri   loggerr%   __all__rU   r#   r"   <module>ry      s            3 3 3 3 3 3 0 0 0 0 0 0 0 0  	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 NNHIO!	% 89I !:;X&N./NI I# I# I I I I N 
	H	%	%o o o o o" o o od /r#   