
    Pi                     8   d dl mZ d dlZd dlmZ i dddddd	d
ddddddddddddddddddddddd d!d"Zd!Z	 	 	 	 	 d/d&eeej        f         d'e	d(e	d)e	d*e	d+e
d,eeej        f         fd-Z	 	 	 	 	 d/d&eeej        f         d'e	d(e	d)e	d*e	d+e
fd.ZdS )0    )DictN)get_mapped_keyzmodel.embed_tokens.weightztok_embeddings.weightz'model.layers.{}.self_attn.q_proj.weightzlayers.{}.attn.q_proj.weightz%model.layers.{}.self_attn.q_proj.biaszlayers.{}.attn.q_proj.biasz'model.layers.{}.self_attn.k_proj.weightzlayers.{}.attn.k_proj.weightz%model.layers.{}.self_attn.k_proj.biaszlayers.{}.attn.k_proj.biasz'model.layers.{}.self_attn.v_proj.weightzlayers.{}.attn.v_proj.weightz%model.layers.{}.self_attn.v_proj.biaszlayers.{}.attn.v_proj.biasz'model.layers.{}.self_attn.o_proj.weightz!layers.{}.attn.output_proj.weightz-model.layers.{}.self_attn.rotary_emb.inv_freqz$model.layers.{}.mlp.gate_proj.weightzlayers.{}.mlp.w1.weightz"model.layers.{}.mlp.up_proj.weightzlayers.{}.mlp.w3.weightz$model.layers.{}.mlp.down_proj.weightzlayers.{}.mlp.w2.weightz&model.layers.{}.input_layernorm.weightzlayers.{}.sa_norm.scalez/model.layers.{}.post_attention_layernorm.weightzlayers.{}.mlp_norm.scalezmodel.norm.weightz
norm.scalezlm_head.weightzoutput.weight       F
state_dict	num_headsnum_kv_headsdimhead_dimtie_word_embeddingsreturnc                     i }|||z  }|                                  D ]0\  }}|r
t          |v rd|v rt          |t                    }	|||	<   1|S )a  
    Convert a state dict from HF's format to TorchTune's format, which contains the weights
    of a Qwen2 model.
    State dicts from multiple checkpoint files should be consolidated into a single state dict
    before calling this function.
    The logic is identical to :func:`~torchtune.models.convert_weights.hf_to_tune`, but may not load
    output projection weights.

    Args:
        state_dict (Dict[str, torch.Tensor]): State dict in HF's format.
        num_heads (int): Number of heads in the model.
        num_kv_heads (int): Number of heads in the key/value projection layers.
        dim (int): Dimension of the model.
        head_dim (int): Dimension of the head. If not provided, it will be calculated
            as dim // num_heads.
        tie_word_embeddings (bool): Whether the model's input and output word embeddings should be tied.

    Returns:
        Dict[str, torch.Tensor]: State dict in torchtune's format.
    Nzrotary_emb.inv_freq)itemsQWEN2_TIED_KEYr   _FROM_HF)
r   r   r	   r
   r   r   converted_state_dictkeyvaluenew_keys
             {/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/torchtune/models/qwen2/_convert_weights.pyqwen2_hf_to_tuner   %   s    8 )# &&(( 	. 	.
U	$2c$9$9 C'' h//(-W%%    c                     i }d t                                           D             }|||z  }|                                 D ]\  }}	t          ||          }
|	||
<   |S )a  
    Convert a state dict from torchtune's format to HF's format. This function
    doesn't handle any sharding or splitting of state dicts. It follows the
    state_dict IN -> state_dict OUT pattern.

    Args:
        state_dict (Dict[str, torch.Tensor]): State dict in torchtune's format.
        num_heads (int): Number of heads in the model.
        num_kv_heads (int): Number of heads in the key/value projection layers.
        dim (int): Dimension of the model.
        head_dim (int): Dimension of the head. If not provided, it will be calculated
            as dim // num_heads.
        tie_word_embeddings (bool): Whether the model's input and output word embeddings should be tied.

    Returns:
        Dict[str, torch.Tensor]: State dict in HF's format.
    c                     i | ]\  }}||	S  r   ).0kvs      r   
<dictcomp>z$qwen2_tune_to_hf.<locals>.<dictcomp>l   s    ???daQ???r   )r   r   r   )r   r   r	   r
   r   r   r   inverted_mapping_dictr   r   r   s              r   qwen2_tune_to_hfr!   R   s{    2 ??hnn.>.>???)# &&(( . .
U &;<<(-W%%r   )r   r   r   NF)typingr   torch torchtune.models.convert_weightsr   r   r   strTensorintboolr   r!   r   r   r   <module>r)      s          ; ; ; ; ; ;!8-/M ,-I ./M	
 ,-I ./M ,-I ./R 4T +,E )*C +,E -.G 67Q   o!( "
  %*  * S%,&'* *  *  
	* 
 *  *  
#u|
*  *  *  * ^  %#  # S%,&'# #  #  
	# 
 #  #  #  #  #  #  # r   