
    Pi                         d dl mZ d dlZd dlmZ 	 ddddddd	d
ddddddddZ	 	 	 	 ddeeej        f         dededededeeej        f         fdZ		 	 	 	 ddeeej        f         dedededef
dZ
dS )    )DictN)get_mapped_keyztok_embeddings.weightzlayers.{}.attn.q_proj.weightzlayers.{}.attn.k_proj.weightzlayers.{}.attn.v_proj.weightz!layers.{}.attn.output_proj.weightzlayers.{}.mlp.w1.weightzlayers.{}.mlp.w3.weightzlayers.{}.mlp.w2.weightzlayers.{}.sa_norm.scalezlayers.{}.sa_scale.scalezlayers.{}.mlp_norm.scalezlayers.{}.mlp_scale.scaleznorm.rms_norm.scalezoutput.weight)zmodel.embed_tokens.weightz'model.layers.{}.self_attn.q_proj.weightz'model.layers.{}.self_attn.k_proj.weightz'model.layers.{}.self_attn.v_proj.weightz'model.layers.{}.self_attn.o_proj.weightz-model.layers.{}.self_attn.rotary_emb.inv_freqz$model.layers.{}.mlp.gate_proj.weightz"model.layers.{}.mlp.up_proj.weightz$model.layers.{}.mlp.down_proj.weightz&model.layers.{}.input_layernorm.weightz/model.layers.{}.post_attention_layernorm.weightz1model.layers.{}.post_feedforward_layernorm.weightz0model.layers.{}.pre_feedforward_layernorm.weightzmodel.norm.weightzlm_head.weight       
state_dict	num_headsnum_kv_headsdimhead_dimreturnc                     i }|z  fd}|                                  D ]D\  }}d|vr;t          |t                    }	d|v r |||          }nd|v r |||          }|||	<   E|S )a(  
    Convert a state dict from HF's format to torchtune's format. State dicts
    from multiple checkpoint files should be consolidated into a single state dict
    before calling this function.

    Eg of HF-format state dict can be found in the ``meta-llama/Llama-2-7b-hf``
    repo in HF (https://huggingface.co/meta-llama/Llama-2-7b-hf).

    Args:
        state_dict (Dict[str, torch.Tensor]): State dict in HF's format.
        num_heads (int): Number of heads in the model.
        num_kv_heads (int): Number of heads in the key/value projection layers.
        dim (int): Dimension of the model.
        head_dim (int): Dimension of the head. If not provided, it will be calculated
            as dim // num_heads.

    Returns:
        Dict[str, torch.Tensor]: State dict in torchtune's format.
    Nc                     |                      |ddz                                dd                              |z            S N      view	transposereshapetn_headsr
   r   s     |/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/torchtune/models/gemma2/_convert_weights.py_permutez#gemma2_hf_to_tune.<locals>._permuteF   sB    FF7Ax1}c22Yq!__Wh(3//	
    zrotary_emb.inv_freqq_projk_proj)itemsr   _GEMMA2_FROM_HF)
r   r   r	   r
   r   converted_state_dictr   keyvaluenew_keys
      ``     r   gemma2_hf_to_tuner$   (   s    4 )#
 
 
 
 
 
 !&&(( 2 2
U ++$S/::G3 	22S 55,1 )r   c                    i }d t                                           D             }|z  fd}|                                 D ];\  }}	t          ||          }
d|v r ||	|          }	nd|v r ||	|          }	|	||
<   <|S )an  
    Convert a state dict from torchtune's format to HF's format. This function
    doesn't handle any sharding or splitting of state dicts. It follows the
    state_dict IN -> state_dict OUT pattern.

    Args:
        state_dict (Dict[str, torch.Tensor]): State dict in torchtune's format.
        num_heads (int): Number of heads in the model.
        num_kv_heads (int): Number of heads in the key/value projection layers.
        dim (int): Dimension of the model.
        head_dim (int): Dimension of model attention heads. Default None.

    Returns:
        Dict[str, torch.Tensor]: State dict in HF's format.
    c                     i | ]\  }}||	S  r'   ).0kvs      r   
<dictcomp>z%gemma2_tune_to_hf.<locals>.<dictcomp>p   s    FFFdaQFFFr   Nc                     |                      |dz  d                              dd                              |z            S r   r   r   s     r   r   z#gemma2_tune_to_hf.<locals>._permuteu   sB    FF7HM1c22Yq!__Wh(3//	
r   r   r   )r   r   r   )r   r   r	   r
   r   r    inverted_mapping_dictr   r!   r"   r#   s      ``      r   gemma2_tune_to_hfr.   Y   s    , FFo.C.C.E.EFFF)#
 
 
 
 
 
 !&&(( . .
U &;<<s??HUI..EE__HUL11E(-W%%r   )r   r   r   N)typingr   torch torchtune.models.convert_weightsr   r   strTensorintr$   r.   r'   r   r   <module>r5      sW          ; ; ; ; ; ; "9/M/M/M/R59,E*C,E.G7Q9S8S.% * .  . S%,&'. .  .  
	. 
 .  
#u|
.  .  .  . f +  + S%,&'+ +  +  
	+ 
 +  +  +  +  +  + r   