
    PiL                         d dl mZmZ d dlZd dlmZ dddddd	d
ddd	Zdeeej        f         dee	         dee	         dee	         deeej        f         f
dZ
deeej        f         deeej        f         fdZdS )    )DictOptionalN)get_mapped_keyztok_embeddings.weightzlayers.{}.attn.q_proj.weightz!layers.{}.attn.output_proj.weightzlayers.{}.mlp.w1.weightzlayers.{}.mlp.w2.weightzlayers.{}.sa_norm.scalezlayers.{}.mlp_norm.scalez
norm.scalezoutput.weight)	zmodel.embed_tokens.weightz)model.layers.{}.self_attn.qkv_proj.weightz'model.layers.{}.self_attn.o_proj.weightz'model.layers.{}.mlp.gate_up_proj.weightz$model.layers.{}.mlp.down_proj.weightz&model.layers.{}.input_layernorm.weightz/model.layers.{}.post_attention_layernorm.weightzmodel.norm.weightzlm_head.weight
state_dict	num_headsnum_kv_headsdimreturnc                 8   i }|&||t          d          |}||z  |z  }||z  |z  }nd\  }}}|                                 D ]\  }}	t          |t                    }
d|v rt|t	          j        |	|||gd          \  }}}n|	                    dd          \  }}}|||
<   |||
                    dd	          <   |||
                    dd
          <   d|v r9|	                    dd          \  }}|||
<   |||
                    dd          <   |	||
<   |S )z
    Convertor from HF state dict to torchtune state dict. This handles:
    - Splitting the fused q,k and v matrix
    - Splitting the fused gate and up projection matrix
    NzKPhi models with GQA require dim, num_heads and num_kv_heads to be specified)NNNqkvr   r	      q_projk_projv_projgate   w1w3)
ValueErroritemsr   
_PHI3_MINItorchsplitchunkreplace)r   r   r   r	   converted_state_dictq_dimk_dimv_dimkeyvaluenew_keyqkvr   r   s                   z/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/torchtune/models/phi3/_convert_weights.pyphi3_hf_to_tuner(      s    
 4]   $	1$	1.ue &&(( 2 2
U j11C<< +eeUE-BJJJ1aa KKqK))	,- )HI 8!D!DEHI 8!D!DEEs]][[[**FB,. )@B t!<!<==,1 ))    c                    i }d t                                           D             }|                                 D ]\  }}d|v sd|v sd|v rt          ||          }d|v rY|}| |                    dd                   }| |                    dd                   }t	          j        |||gd          }	|	||<   d|v r<|}
| |                    dd                   }t	          j        |
|gd          }|||<   |||<   |S )	z
    Convertor from torchtune state dict to HF state dict. This handles:
    - Fusing q,k and v matrix
    - Fusing gate and up projection matrix
    c                     i | ]\  }}||	S  r,   ).0r%   r&   s      r'   
<dictcomp>z#phi3_tune_to_hf.<locals>.<dictcomp>P   s    AAAdaQAAAr)   r   r   r   r   r   r   r   )r   r   r   r   r   cat)r   r   inverted_mapping_dictr!   r"   r#   r$   r%   r&   r   	gate_projup_projgate_up_projs                r'   phi3_tune_to_hfr4   I   s>    AAj.>.>.@.@AAA &&(( 2 2
Us??h#oo &;<<s??A3;;x::;A3;;x::;A)Q1I1---C,/ ))S[[I T4!8!89G 9i%9qAAAL,8 )) -2 ))r)   )typingr   r   r    torchtune.models.convert_weightsr   r   strTensorintr(   r4   r,   r)   r'   <module>r:      s   " ! ! ! ! ! ! !  ; ; ; ; ; ; "91O/R/H,E.G7Q%%
 

+ S%,&'+ }+  3-+  
#	+ 
 
#u|
+  +  +  + \  S%,%6 7   DelAR<S                  r)   