
    Pi+                     X   d dl Z d dlmZmZmZ d dlZddddddd	d
dddddZddddd	ddddd
ddddZdedeeef         defdZ	deeej
        f         deeej
        f         fdZdeeej
        f         deeej
        f         fdZ	 	 	 	 d2deeej
        f         dededededeeej
        f         fdZ	 	 	 	 d2deeej
        f         dedededef
dZd d!d"d#Zd$d%d&d'd(d)d*d+d,Zg d-Z	 d3d.eeef         d/ee         fd0Z	 	 	 	 d2deeej
        f         dedededee         f
d1ZdS )4    N)AnyDictOptionaltok_embeddings.weightz
norm.scaleoutput.weightzlayers.{}.attn.k_proj.weightzlayers.{}.attn.q_proj.weightzlayers.{}.attn.v_proj.weightz!layers.{}.attn.output_proj.weightzlayers.{}.sa_norm.scalezlayers.{}.mlp_norm.scalezlayers.{}.mlp.w1.weightzlayers.{}.mlp.w2.weightzlayers.{}.mlp.w3.weight)r   znorm.weightr   zlayers.{}.attention.wk.weightzlayers.{}.attention.wq.weightzlayers.{}.attention.wv.weightzlayers.{}.attention.wo.weightzlayers.{}.attention_norm.weightzlayers.{}.ffn_norm.weightz layers.{}.feed_forward.w1.weightz layers.{}.feed_forward.w2.weightz layers.{}.feed_forward.w3.weight)zmodel.embed_tokens.weightz'model.layers.{}.self_attn.q_proj.weightz'model.layers.{}.self_attn.k_proj.weightz'model.layers.{}.self_attn.v_proj.weightz'model.layers.{}.self_attn.o_proj.weightz-model.layers.{}.self_attn.rotary_emb.inv_freqz$model.layers.{}.mlp.gate_proj.weightz"model.layers.{}.mlp.up_proj.weightz$model.layers.{}.mlp.down_proj.weightz&model.layers.{}.input_layernorm.weightz/model.layers.{}.post_attention_layernorm.weightzmodel.norm.weightzlm_head.weightkeymapping_dictreturnc                 v   	 t          d |                     d          D                       r\t          j        dd|           }t          j        d|                               d          }||         }|                    |          }n||          }n&# t          $ r}t          d|  d          |d }~ww xY w|S )	Nc              3   >   K   | ]}|                                 V  d S N)isdigit).0ks     t/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/torchtune/models/convert_weights.py	<genexpr>z!get_mapped_key.<locals>.<genexpr>2   s*      33qqyy{{333333    .z(\.\d+)z.{}z\d+r   z8Error converting the state dict. Found unexpected key: "zG". Please make sure you're loading a checkpoint with the right format. )	anysplitresubsearchgroupformatKeyError	Exception)r   r	   abstract_key	layer_numnew_keyes         r   get_mapped_keyr"   /   s    33CIIcNN33333 	(6*eS99L	&#..44Q77I"<0GnnY//GG"3'G   Ss S S S
 
 	 Ns   BB 
B6B11B6
state_dictc                 z    i }|                                  D ]#\  }}|dvrt          |t                    }|||<   $|S )a  
    Convert a state dict from Meta's format to torchtune's format. State dicts
    from multiple checkpoint files should be consolidated into a single state dict
    before calling this function.

    Eg of Meta-format state dict can be found in the ``meta-llama/Llama-2-7b``
    repo in HF (https://huggingface.co/meta-llama/Llama-2-7b).

    Args:
        state_dict (Dict[str, torch.Tensor]): State dict in Meta's format.

    Returns:
        Dict[str, torch.Tensor]: State dict in torchtune's format.
    )z
rope.freqs)itemsr"   
_FROM_META)r#   converted_state_dictr   valuer    s        r   meta_to_tuner)   C   sU      &&(( 2 2
Un$$$S*55G,1 )r   c                     i }d t                                           D             }|                                 D ]\  }}t          ||          }|||<   |S )av  
    Convert a state dict from torchtune's format to Meta's format. This function
    doesn't handle any sharding or splitting of state dicts. It follows the
    state_dict IN -> state_dict OUT pattern.

    Args:
        state_dict (Dict[str, torch.Tensor]): State dict in torchtune's format.

    Returns:
        Dict[str, torch.Tensor]: State dict in Meta's format.
    c                     i | ]\  }}||	S  r,   r   r   vs      r   
<dictcomp>z tune_to_meta.<locals>.<dictcomp>h   s    AAAdaQAAAr   )r&   r%   r"   )r#   r'   inverted_mapping_dictr   r(   r    s         r   tune_to_metar1   [   sm     AAj.>.>.@.@AAA &&(( . .
U &;<<(-W%%r          	num_headsnum_kv_headsdimhead_dimc                     i }|z  fd}|                                  D ]D\  }}d|vr;t          |t                    }	d|v r |||          }nd|v r |||          }|||	<   E|S )a(  
    Convert a state dict from HF's format to torchtune's format. State dicts
    from multiple checkpoint files should be consolidated into a single state dict
    before calling this function.

    Eg of HF-format state dict can be found in the ``meta-llama/Llama-2-7b-hf``
    repo in HF (https://huggingface.co/meta-llama/Llama-2-7b-hf).

    Args:
        state_dict (Dict[str, torch.Tensor]): State dict in HF's format.
        num_heads (int): Number of heads in the model.
        num_kv_heads (int): Number of heads in the key/value projection layers.
        dim (int): Dimension of the model.
        head_dim (int): Dimension of the head. If not provided, it will be calculated
            as dim // num_heads.

    Returns:
        Dict[str, torch.Tensor]: State dict in torchtune's format.
    Nc                     |                      |ddz                                dd                              |z            S N      view	transposereshapetn_headsr6   r7   s     r   _permutezhf_to_tune.<locals>._permute   sB    FF7Ax1}c22Yq!__Wh(3//	
r   zrotary_emb.inv_freqq_projk_proj)r%   r"   _FROM_HF)
r#   r4   r5   r6   r7   r'   rD   r   r(   r    s
      ``     r   
hf_to_tunerH   q   s    4 )#
 
 
 
 
 
 !&&(( 2 2
U ++$S(33G3 	22S 55,1 )r   c                    i }d t                                           D             }|z  fd}|                                 D ];\  }}	t          ||          }
d|v r ||	|          }	nd|v r ||	|          }	|	||
<   <|S )an  
    Convert a state dict from torchtune's format to HF's format. This function
    doesn't handle any sharding or splitting of state dicts. It follows the
    state_dict IN -> state_dict OUT pattern.

    Args:
        state_dict (Dict[str, torch.Tensor]): State dict in torchtune's format.
        num_heads (int): Number of heads in the model.
        num_kv_heads (int): Number of heads in the key/value projection layers.
        dim (int): Dimension of the model.
        head_dim (int): Dimension of model attention heads. Default None.

    Returns:
        Dict[str, torch.Tensor]: State dict in HF's format.
    c                     i | ]\  }}||	S r,   r,   r-   s      r   r/   ztune_to_hf.<locals>.<dictcomp>   s    ???daQ???r   Nc                     |                      |dz  d                              dd                              |z            S r:   r=   rA   s     r   rD   ztune_to_hf.<locals>._permute   sB    FF7HM1c22Yq!__Wh(3//	
r   rE   rF   )rG   r%   r"   )r#   r4   r5   r6   r7   r'   r0   rD   r   r(   r    s      ``      r   
tune_to_hfrL      s    , ??hnn.>.>???)#
 
 
 
 
 
 !&&(( . .
U &;<<s??HUI..EE__HUL11E(-W%%r   lora_Alora_Blora_magnitude_vector)lora_alora_b	magnituderE   rF   v_projo_proj	gate_proj	down_projup_projlm_head)rE   rF   rS   output_projw1w2w3output)target_modulesr
lora_alphaadapter_configbase_model_name_or_pathc                 Z    t           fdt          D                       s,t          dt           d                                             d         D ]}|t          vrt          d|           t          t          t          j         d                              d<   |r| d<    S )Nc                 <    g | ]}|                                 v S r,   )keys)r   xra   s     r   
<listcomp>z/tune_to_peft_adapter_config.<locals>.<listcomp>   s*    OOOq^((***OOOr   zPEFT adapter config requires z, found r^   zUnknown target module rb   )all_PEFT_CONFIG_EXPECTED_KEYS
ValueErrorre   _TO_PEFT_TARGET_MODULESlistmapget)ra   rb   r   s   `  r   tune_to_peft_adapter_configro      s     OOOO4NOOOPP 
g,FggP^PcPcPePegg
 
 	
 ,- ; ;+++9a99::: ,'+#'8H)IJJ( (N#$
  L4K01r   c                 h   i }i }t                                           D ]\  }}t                                          D ]\  }	}
|
|dk    r3|
                    dd|           }|	                    dd|           }n4|
                    dd| d          }|	                    dd| d          }|                    ||i           ||z  fd}|                                 D ]F\  }}t          ||          }d|v rd|v r |||          }nd|v rd|v r |||          }||d|z   <   G|S )	NrR   z.weightr   c                     | j         d         }|                     |dz  d|                              dd                              |z  |          S )Nr;   r<   )shaper>   r?   r@   )rB   rC   rankr7   s      r   _permute_lora_matrixz:tune_to_peft_adapter_weights.<locals>._permute_lora_matrix  sM    wr{FF7HM1d33Yq!__Wh(400	
r   rE   rN   rF   zbase_model.model.)_TO_PEFT_KEYSr%   rG   replaceupdater"   )r#   r4   r5   r6   r7   r'   full_mappingpeft_keypeft_valhf_keyhf_valadapter_keyadapter_valru   r   r(   r    s       `            r   tune_to_peft_adapter_weightsr      s    L ,1133 < <(&nn.. 	< 	<NFF~;&&$nnYHGG$nnYHGG %nnY8MH8M8M8MNN$nnY8MH8M8M8MNNk :;;;;	< )#
 
 
 
 
 !&&(( D D
U l33w8w#6#6((	::EE  X%8%8((==E>C07:;;r   )r2   r2   r3   Nr   )r   typingr   r   r   torchr&   rG   strr"   Tensorr)   r1   intrH   rL   rv   rk   ri   ro   r   r,   r   r   <module>r      s   
			 & & & & & & & & & &  5$%C%C%C%H'@!;(A(A(A 
" "9/M/M/M/R59,E*C,E.G7Q%% " 4S> c    ( T#u|"34  c5<>O9P        0 T#u|"34  c5<>O9P        0 .  . S%,&'. .  .  
	. 
 .  
#u|
.  .  .  . f +  + S%,&'+ +  +  
	+ 
 +  +  +  + ` (  


	 	  CBB 
 .2 cN%c]   2 ".  . S%,&'. .  .  
	. 
 sm.  .  .  .  .  . r   