
    PiK                         d dl mZ d dlZd dlmZ dddddd	d
ddddddZ	 	 	 	 ddeeej        f         dededededeeej        f         fdZ		 	 	 ddeeej        f         dedededeeej        f         f
dZ
dS )    )DictN)get_mapped_keyztok_embeddings.weightzlayers.{}.attn.q_proj.weightzlayers.{}.attn.k_proj.weightzlayers.{}.attn.v_proj.weightz!layers.{}.attn.output_proj.weightzlayers.{}.mlp.w1.weightzlayers.{}.mlp.w3.weightzlayers.{}.mlp.w2.weightzlayers.{}.sa_norm.scalezlayers.{}.mlp_norm.scalez
norm.scalezoutput.weight)zmodel.embed_tokens.weightz'model.layers.{}.self_attn.q_proj.weightz'model.layers.{}.self_attn.k_proj.weightz'model.layers.{}.self_attn.v_proj.weightz'model.layers.{}.self_attn.o_proj.weightz$model.layers.{}.mlp.gate_proj.weightz"model.layers.{}.mlp.up_proj.weightz$model.layers.{}.mlp.down_proj.weightz&model.layers.{}.input_layernorm.weightz/model.layers.{}.post_attention_layernorm.weightzmodel.norm.weightzscore.weight       
state_dict	num_headsnum_kv_headsdimhead_dimreturnc                     i }|z  fd}|                                  D ]K\  }}|dk    rd|vrt          |t                    }	d|v r |||          }nd|v r |||          }|||	<   L|S )a  
    Convert a state dict from HF's format to torchtune's format, which contains the weights
    of a reward model (i.e. a classifier with a single class).
    State dicts from multiple checkpoint files should be consolidated into a single state dict
    before calling this function.
    The logic is identical to :func:`~torchtune.models.convert_weights.hf_to_tune`, but with a different mapping.

    Eg of HF-format state dict can be found in the ``Ray2333/reward-model-Mistral-7B-instruct-Unified-Feedback``
    repo in HF.

    Args:
        state_dict (Dict[str, torch.Tensor]): State dict in HF's format.
        num_heads (int): Number of heads in the model.
        num_kv_heads (int): Number of heads in the key/value projection layers.
        dim (int): Dimension of the model.
        head_dim (int): Dimension of the head. If not provided, it will be calculated
            as dim // num_heads.

    Returns:
        Dict[str, torch.Tensor]: State dict in torchtune's format.
    Nc                     |                      |ddz                                dd                              |z            S N      view	transposereshapetn_headsr
   r   s     y/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/torchtune/rlhf/utils/_convert_weights.py_permutez#reward_hf_to_tune.<locals>._permute=   sB    FF7Ax1}c22Yq!__Wh(3//	
    z
score.biaszrotary_emb.inv_freqq_projk_proj)itemsr   _REWARD)
r   r   r	   r
   r   converted_state_dictr   keyvaluenew_keys
      ``     r   reward_hf_to_tuner$      s    8 )#
 
 
 
 
 
 !&&(( . .
U , ++$S'22Gs??HUI..EE__HUL11E(-W%%r   c                 
  
 i }d t                                           D             }|z  

fd}|                                 D ];\  }}t          ||          }	d|v r |||          }nd|v r |||          }|||	<   <|S )ap  
    Convert a state dict from torchtune's format to Hugging Face's format for a reward model.

    This function takes a state dictionary in torchtune's format, which contains the weights of a reward model
    (i.e. a classifier with a single class), and converts it into a format that can be loaded into a Hugging Face model.
    The logic is identical to :func:`~torchtune.models.convert_weights.tune_to_hf`, but with a different mapping.

    Args:
        state_dict (Dict[str, torch.Tensor]): State dict in torchtune's format.
        num_heads (int, optional): Number of heads in the model. Defaults to 32.
        num_kv_heads (int, optional): Number of heads in the key/value projection layers. Defaults to 32.
        dim (int, optional): Dimension of the model. Defaults to 4096.

    Returns:
        Dict[str, torch.Tensor]: State dict in Hugging Face's format.

    c                     i | ]\  }}||	S  r'   ).0kvs      r   
<dictcomp>z%reward_tune_to_hf.<locals>.<dictcomp>l   s    >>>daQ>>>r   c                     |                      |dz  d                              dd                              |z            S r   r   r   s     r   r   z#reward_tune_to_hf.<locals>._permuteo   sB    FF7HM1c22Yq!__Wh(3//	
r   r   r   )r   r   r   )r   r   r	   r
   r    inverted_mapping_dictr   r!   r"   r#   r   s      `      @r   reward_tune_to_hfr.   T   s    . >>gmmoo>>>iH
 
 
 
 
 
 !&&(( . .
U &;<<s??HUI..EE__HUL11E(-W%%r   )r   r   r   N)r   r   r   )typingr   torch torchtune.models.convert_weightsr   r   strTensorintr$   r.   r'   r   r   <module>r5      sR          ; ; ; ; ; ; "9/M/M/M/R,E*C,E.G7Q%# $ 4  4 S%,&'4 4  4  
	4 
 4  
#u|
4  4  4  4 r 	*  * S%,&'* *  *  
	* 
 
#u|
*  *  *  *  *  * r   