
    Pi*                        d dl mZmZ d dlmZmZ d dlmZmZ d dl	m
Z
mZ d dlmZ d dlmZ d dlmZ 	 defd	Zdefd
ZdefdZ	 	 	 	 d"dededee         dee         dee         defdZ	 	 	 	 	 	 	 d#dee         dedededededededefdZ	 	 	 	 	 	 d$dee         dededededededefd Z	 	 	 	 	 	 d$dee         dededededededefd!ZdS )%    )ListOptional)_get_prompt_template_TemplateType)
lora_qwen2qwen2)QWEN2_SPECIAL_TOKENSQwen2Tokenizer)TransformerDecoder)LORA_ATTN_MODULES)parse_hf_tokenizer_jsonreturnc                  4    t          dddddddddd	

  
        S )z
    Builder for creating a Qwen2 model initialized w/ the default 7B parameter values
    from https://huggingface.co/Qwen/Qwen2-7B-Instruct

    Returns:
        TransformerDecoder: Instantiation of Qwen2 7B model
     R           J             ư>    .A)

vocab_size
num_layers	num_headsnum_kv_heads	embed_dimintermediate_dimmax_seq_lenattn_dropoutnorm_eps	rope_baser        z/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/torchtune/models/qwen2/_model_builders.pyqwen2_7br'      s9        r%   c                  6    t          ddddddddd	d
d          S )a  
    Builder for creating a Qwen2 model initialized w/ the default 0.5B parameter values
    from https://huggingface.co/Qwen/Qwen2-0.5B-Instruct

    Returns:
        TransformerDecoder: Instantiation of Qwen2 0.5B model

    Note:
        Qwen2 0.5B and Qwen2 1.5B model builders will enable `tie_word_embeddings` by default
        and returns an instance of `TransformerDecoder`.
    Q               r   r   r   r   Tr   r   r   r   r   r   r   r    r!   r"   tie_word_embeddingsr#   r$   r%   r&   
qwen2_0_5br1   -   s<         r%   c                  6    t          ddddddddd	d
d          S )a  
    Builder for creating a Qwen2 model initialized w/ the default 1.5B parameter values
    from https://huggingface.co/Qwen/Qwen2-1.5B-Instruct

    Returns:
        TransformerDecoder: Instantiation of Qwen2 1.5B model

    Note:
        Qwen2 0.5B and Qwen2 1.5B model builders will enable `tie_word_embeddings` by default
        and returns an instance of `TransformerDecoder`.
    r)   r      r,       #  r   r   r   r   Tr/   r#   r$   r%   r&   
qwen2_1_5br6   H   s<         r%   Npathmerges_filespecial_tokens_pathr   prompt_templatec           	      |    |t          |          nt          }|t          |          nd}t          d| ||||d|S )a  
    Tokenizer for Qwen2.

    Args:
        path (str): path to the vocab.json file.
        merges_file (str): path to the merges.txt file.
        special_tokens_path (Optional[str]): Path to ``tokenizer.json`` from Hugging Face
            model files that contains all registered special tokens, or a local json file
            structured similarly. Default is None to use the canonical Qwen2 special tokens.
        max_seq_len (Optional[int]): A max sequence length to truncate tokens to.
            Default: None
        prompt_template (Optional[_TemplateType]): optional specified prompt template.
            If a string, it is assumed to be the dotpath of a :class:`~torchtune.data.PromptTemplateInterface`
            class. If a dictionary, it is assumed to be a custom prompt template mapping role to the
            prepend/append tags. Default is None.

    Returns:
        Qwen2Tokenizer: Instantiation of the Qwen2 tokenizer
    N)r7   r8   special_tokensr   r:   r$   )r   r	   r   r
   )r7   r8   r9   r   r:   kwargsr<   templates           r&   qwen2_tokenizerr?   c   sy    : * 	  3444!  2A1L_---RV   %     r%   F      r   lora_attn_modulesapply_lora_to_mlpapply_lora_to_output	lora_rank
lora_alphalora_dropoutuse_doraquantize_basec                     t          di d| d|d|ddddddd	d
ddddddddddddd|d|d|d|d|S )a  
    Builder for creating a Qwen2 7B model with LoRA enabled.

    The Qwen2 defaults are the same as in :func:`~torchtune.models.qwen2.qwen2_7b`,
    while LoRA default params are based on
    https://github.com/tloen/alpaca-lora/blob/8bb8579e403dc78e37fe81ffbb253c413007323f/finetune.py#L41-L43.

    Args:
        lora_attn_modules (List[LORA_ATTN_MODULES]): list of which linear layers
            LoRA should be applied to in each self-attention block. Options are
            ``{"q_proj", "k_proj", "v_proj", "output_proj"}``.
        apply_lora_to_mlp (bool): whether to apply LoRA to the MLP in each transformer layer.
            Default: False
        apply_lora_to_output (bool): whether to apply LoRA to the model's final output projection.
            Default: False
        lora_rank (int): rank of each low-rank approximation
        lora_alpha (float): scaling factor for the low-rank approximation
        lora_dropout (float): dropout probability for the low-rank approximation. Default: 0.0
        quantize_base (bool): Whether to quantize base model weights

    Returns:
        TransformerDecoder: Instantiation of Qwen2 7B model with LoRA applied
    rB   rC   rD   r   r   r   r   r   r   r   r   r   r   r   r   r   r    r   r!   r   r"   r   rE   rF   rG   rH   rI   r$   r   )rB   rC   rD   rE   rF   rG   rH   rI   s           r&   lora_qwen2_7brL      s    B    ++++ 21 6	
 2 " Q $  E S  ) ) :  "\!" #$ $m% r%   c                     t          d i d| d|ddddddd	d
ddddddddddddddddd|d|d|d|d|S )!a  
    Builder for creating a Qwen2 0.5B model with LoRA enabled.

    The Qwen2 defaults are the same as in :func:`~torchtune.models.qwen2.qwen2_0_5b`,
    while LoRA default params are based on
    https://github.com/tloen/alpaca-lora/blob/8bb8579e403dc78e37fe81ffbb253c413007323f/finetune.py#L41-L43.

    Args:
        lora_attn_modules (List[LORA_ATTN_MODULES]): list of which linear layers
            LoRA should be applied to in each self-attention block. Options are
            ``{"q_proj", "k_proj", "v_proj", "output_proj"}``.
        apply_lora_to_mlp (bool): whether to apply LoRA to the MLP in each transformer layer.
            Default: False
        lora_rank (int): rank of each low-rank approximation
        lora_alpha (float): scaling factor for the low-rank approximation
        lora_dropout (float): dropout probability for the low-rank approximation. Default: 0.0
        quantize_base (bool): Whether to quantize base model weights

    Returns:
        TransformerDecoder: Instantiation of Qwen2 0.5B model with LoRA applied

    Note:
        Qwen2 0.5B and Qwen2 1.5B model builders will enable `tie_word_embeddings` by default
        and returns an instance of `TransformerDecoder`.
    rB   rC   rD   Fr   r)   r   r*   r   r+   r   r,   r   r-   r   r.   r   r   r    r   r!   r   r"   r   r0   TrE   rF   rG   rH   rI   r$   rK   rB   rC   rE   rF   rG   rH   rI   s          r&   lora_qwen2_0_5brO      s    D    ++++ #U 6	
 2 " Q #  E S  ) !D )  :!" "\#$ %& $m' r%   c                     t          d i d| d|ddddddd	d
ddddddddddddddddd|d|d|d|d|S )!a  
    Builder for creating a Qwen2 1.5B model with LoRA enabled.

    The Qwen2 defaults are the same as in :func:`~torchtune.models.qwen2.qwen2_1_5b`,
    while LoRA default params are based on
    https://github.com/tloen/alpaca-lora/blob/8bb8579e403dc78e37fe81ffbb253c413007323f/finetune.py#L41-L43.

    Args:
        lora_attn_modules (List[LORA_ATTN_MODULES]): list of which linear layers
            LoRA should be applied to in each self-attention block. Options are
            ``{"q_proj", "k_proj", "v_proj", "output_proj"}``.
        apply_lora_to_mlp (bool): whether to apply LoRA to the MLP in each transformer layer.
            Default: False
        lora_rank (int): rank of each low-rank approximation
        lora_alpha (float): scaling factor for the low-rank approximation
        lora_dropout (float): dropout probability for the low-rank approximation. Default: 0.0
        quantize_base (bool): Whether to quantize base model weights

    Returns:
        TransformerDecoder: Instantiation of Qwen2 1.5B model with LoRA applied

    Note:
        Qwen2 0.5B and Qwen2 1.5B model builders will enable `tie_word_embeddings` by default
        and returns an instance of `TransformerDecoder`.
    rB   rC   rD   Fr   r)   r   r   r   r3   r   r,   r   r4   r   r5   r   r   r    r   r!   r   r"   r   r0   TrE   rF   rG   rH   rI   r$   rK   rN   s          r&   lora_qwen2_1_5brQ      s    D    ++++ #U 6	
 2 " Q $  E S  ) !D )  :!" "\#$ %& $m' r%   )NNNN)FFr@   rA   r   FF)Fr@   rA   r   FF)typingr   r    torchtune.data._prompt_templatesr   r   *torchtune.models.qwen2._component_buildersr   r   !torchtune.models.qwen2._tokenizerr	   r
   torchtune.modulesr   torchtune.modules.peftr   'torchtune.modules.transforms.tokenizersr   r'   r1   r6   strintr?   boolfloatrL   rO   rQ   r$   r%   r&   <module>r]      s   " ! ! ! ! ! ! ! P P P P P P P P H H H H H H H H R R R R R R R R 0 0 0 0 0 0 4 4 4 4 4 4 K K K K K K$    ,&    6&    : )-!%/3* *
** "#* #	*
 m,* * * * *^ $!&4 4-.44 4 	4
 4 4 4 4 4 4 4 4r $6 6-.66 6 	6
 6 6 6 6 6 6 6v $6 6-.66 6 	6
 6 6 6 6 6 6 6 6 6r%   