
    Pi!                        d dl mZ d dlmZmZ d dlmZmZ d dlm	Z	m
Z
 d dlmZ d dlmZ d dlmZ d dlmZ 	 d	efd
Zd	efdZ	 	 	 d$dedee         dee         dee         d	ef
dZ	 	 	 	 	 	 	 d%dee         dededededededed	efdZ	 	 	 	 	 	 	 d%dee         dededededededed	efdZ eed !          Zd"e_         eed !          Zd#e_        dS )&    )partial)ListOptional)_get_prompt_template_TemplateType)llama3lora_llama3)Llama3Tokenizer)TransformerDecoder)LORA_ATTN_MODULES)parse_hf_tokenizer_jsonreturnc                  4    t          dddddddddd	

  
        S )z
    Builder for creating a Llama3 model initialized w/ the default 8b parameter values.

    Returns:
        TransformerDecoder: Instantiation of Llama3 8B model
                     8          h㈵>  

vocab_size
num_layers	num_headsnum_kv_heads	embed_dimmax_seq_lenintermediate_dimattn_dropoutnorm_eps	rope_baser        {/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/torchtune/models/llama3/_model_builders.py	llama3_8br(      9        r&   c                  4    t          dddddddddd	

  
        S )z
    Builder for creating a Llama3 model initialized w/ the default 70B parameter values.

    Returns:
        TransformerDecoder: Instantiation of Llama3 70 model
    r   P   @   r   r    p  r   r   r   r   r$   r%   r&   r'   
llama3_70br.   /   r)   r&   Npathspecial_tokens_pathr   prompt_templatec                 t    |t          |          nd}|t          |          nd}t          | |||          S )a  
    Tokenizer for Llama3.

    Args:
        path (str): path to the tokenizer
        special_tokens_path (Optional[str]): Path to ``tokenizer.json`` from Hugging Face
            model files that contains all registered special tokens, or a local json file
            structured similarly. Default is None to use the canonical Llama3 special tokens.
        max_seq_len (Optional[int]): maximum sequence length for tokenizing a single list of messages,
            after which the input will be truncated. Default is None.
        prompt_template (Optional[_TemplateType]): optional specified prompt template.
            If a string, it is assumed to be the dotpath of a :class:`~torchtune.data.PromptTemplateInterface`
            class. If a dictionary, it is assumed to be a custom prompt template mapping role to the
            prepend/append tags.

    Returns:
        Llama3Tokenizer: Instantiation of the Llama3 tokenizer
    N)r/   special_tokensr   r1   )r   r   r
   )r/   r0   r   r1   r3   templates         r'   llama3_tokenizerr5   D   sg    4 * 	  3444  2A1L_---RV  % 	   r&   Fr      r   lora_attn_modulesapply_lora_to_mlpapply_lora_to_output	lora_rank
lora_alphalora_dropoutquantize_baseuse_dorac                     t          di d| d|d|ddddddd	d
ddddddddddddd|d|d|d|d|S )a\  
    Builder for creating a Llama3 8B model with LoRA enabled.

    The Llama3 defaults are the same as in :func:`~torchtune.models.llama3.llama3_8b`,
    while LoRA default params are based on
    https://github.com/tloen/alpaca-lora/blob/8bb8579e403dc78e37fe81ffbb253c413007323f/finetune.py#L41-L43.

    Args:
        lora_attn_modules (List[LORA_ATTN_MODULES]): list of which linear layers
            LoRA should be applied to in each self-attention block. Options are
            ``{"q_proj", "k_proj", "v_proj", "output_proj"}``.
        apply_lora_to_mlp (bool): whether to apply LoRA to the MLP in each transformer layer.
            Default: False
        apply_lora_to_output (bool): whether to apply LoRA to the model's final output projection.
            Default: False
        lora_rank (int): rank of each low-rank approximation
        lora_alpha (float): scaling factor for the low-rank approximation
        lora_dropout (float): dropout probability for the low-rank approximation. Default: 0.0
        quantize_base (bool): Whether to quantize base model weights
        use_dora (bool): Decompose the LoRA weight into magnitude and direction, as
            introduced in "DoRA: Weight-Decomposed Low-Rank Adaptation" (https://arxiv.org/abs/2402.09353).

    Returns:
        TransformerDecoder: Instantiation of Llama3 8B model with LoRA applied
    r7   r8   r9   r   r   r   r   r   r   r   r   r   r   r   r    r   r!   r   r"   r   r#   r   r:   r;   r<   r=   r>   r%   r	   r7   r8   r9   r:   r;   r<   r=   r>   s           r'   lora_llama3_8brB   l       F    ++++ 21 7	
 2 " Q $ D  S  ' ) :  "\!" $m#$ % r&   c                     t          di d| d|d|dddddd	d
dddddddddddddd|d|d|d|d|S )a_  
    Builder for creating a Llama3 70B model with LoRA enabled.

    The Llama3 defaults are the same as in :func:`~torchtune.models.llama3.llama3_70b`,
    while LoRA default params are based on
    https://github.com/tloen/alpaca-lora/blob/8bb8579e403dc78e37fe81ffbb253c413007323f/finetune.py#L41-L43.

    Args:
        lora_attn_modules (List[LORA_ATTN_MODULES]): list of which linear layers
            LoRA should be applied to in each self-attention block. Options are
            ``{"q_proj", "k_proj", "v_proj", "output_proj"}``.
        apply_lora_to_mlp (bool): whether to apply LoRA to the MLP in each transformer layer.
            Default: False
        apply_lora_to_output (bool): whether to apply LoRA to the model's final output projection.
            Default: False
        lora_rank (int): rank of each low-rank approximation
        lora_alpha (float): scaling factor for the low-rank approximation
        lora_dropout (float): dropout probability for the low-rank approximation. Default: 0.0
        quantize_base (bool): Whether to quantize base model weights
        use_dora (bool): Decompose the LoRA weight into magnitude and direction, as
            introduced in "DoRA: Weight-Decomposed Low-Rank Adaptation" (https://arxiv.org/abs/2402.09353).

    Returns:
        TransformerDecoder: Instantiation of Llama3 70B model with LoRA applied
    r7   r8   r9   r   r   r   r+   r   r,   r   r   r   r   r   r    r-   r!   r   r"   r   r#   r   r:   r;   r<   r=   r>   r%   r@   rA   s           r'   lora_llama3_70brE      rC   r&   T)r=   z
Builder for creating a Llama3 8B model with QLoRA enabled. Base model weights in linear layers
that LoRA is applied to are quantized per the QLoRA paper: https://arxiv.org/abs/2305.14314.
Please see `lora_llama3_8b` for full API arguments.
z
Builder for creating a Llama3 70B model with QLoRA enabled. Base model weights in linear layers
that LoRA is applied to are quantized per the QLoRA paper: https://arxiv.org/abs/2305.14314.
Please see `lora_llama3_70b` for full API arguments.
)NNN)FFr   r6   r   FF)	functoolsr   typingr   r    torchtune.data._prompt_templatesr   r   +torchtune.models.llama3._component_buildersr   r	   "torchtune.models.llama3._tokenizerr
   torchtune.modulesr   torchtune.modules.peftr   'torchtune.modules.transforms.tokenizersr   r(   r.   strintr5   boolfloatrB   rE   qlora_llama3_8b__doc__qlora_llama3_70br%   r&   r'   <module>rU      s         ! ! ! ! ! ! ! ! P P P P P P P P K K K K K K K K > > > > > > 0 0 0 0 0 0 4 4 4 4 4 4 K K K K K K%    *&    . *.!%/3	% %
%!#% #% m,	%
 % % % %T $!&6 6-.66 6 	6
 6 6 6 6 6 6 6 6v $!&6 6-.66 6 	6
 6 6 6 6 6 6 6 6r '.===  7?$???     r&   