
    Pi                        d dl mZmZ d dlmZmZ d dlmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZ 	 d	efd
Zd"dedee         dee         d	e	fdZ	 	 	 	 	 	 d#dee         dedededededed	efdZ eed          Zde_        d	efdZ	 	 	 	 	 	 d#dee         dedededededed	efd Z eed          Zd!e_        dS )$    )ListOptional)gemma
lora_gemma)TransformerDecoder)GemmaTokenizer)LORA_ATTN_MODULES)_TemplateType)_get_prompt_template)partialreturnc                  4    t          ddddddddd	d

  
        S )z
    Builder for creating a Gemma 2B model initialized w/ the default 2b parameter values
    from: https://blog.google/technology/developers/gemma-open-models/

    Returns:
        TransformerDecoder: Instantiation of Gemma 2B model
                      @              ư>

vocab_size
num_layers	num_headshead_dimnum_kv_heads	embed_dimintermediate_dimmax_seq_lenattn_dropoutnorm_epsr        z/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/torchtune/models/gemma/_model_builders.pygemma_2br(      s9        r&   Npathr!   prompt_templatec                 H    t          | ||t          |          nd          S )a  
    Tokenizer for Gemma.

    Args:
        path (str): path to the tokenizer
        max_seq_len (Optional[int]): maximum sequence length for tokenizing a single list of messages,
            after which the input will be truncated. Default is None.
        prompt_template (Optional[_TemplateType]): optional specified prompt template.
            If a string, it is assumed to be the dotpath of a :class:`~torchtune.data.PromptTemplateInterface`
            class. If a dictionary, it is assumed to be a custom prompt template mapping role to the
            prepend/append tags.
        

    Returns:
        GemmaTokenizer: Instantiation of the Gemma tokenizer
    Nr)   r!   r*   )r   r   r,   s      r'   gemma_tokenizerr-   .   sG    " t  xG  xSNbcrNsNsNs  Y]  ^  ^  ^  ^r&   Fr      r   lora_attn_modulesapply_lora_to_mlp	lora_rank
lora_alphalora_dropoutuse_doraquantize_basec                 |    t          di d| d|ddddddd	d
ddddddddddddd|d|d|d|d|S )a  
    Builder for creating a Gemma 2B model with LoRA enabled.

    The Gemma defaults are the same as in :func:`~torchtune.models.gemma.gemma_2b`,
    while LoRA default params are based on
    https://github.com/tloen/alpaca-lora/blob/8bb8579e403dc78e37fe81ffbb253c413007323f/finetune.py#L41-L43.

    Args:
        lora_attn_modules (List[LORA_ATTN_MODULES]): list of which linear layers
            LoRA should be applied to in each self-attention block. Options are
            ``{"q_proj", "k_proj", "v_proj", "output_proj"}``.
        apply_lora_to_mlp (bool): whether to apply LoRA to the MLP in each transformer layer.
            Default: False
        lora_rank (int): rank of each low-rank approximation
        lora_alpha (float): scaling factor for the low-rank approximation
        lora_dropout (float): dropout probability for the low-rank approximation. Default: 0.0
        use_dora (bool): Decompose the LoRA weight into magnitude and direction, as
            introduced in "DoRA: Weight-Decomposed Low-Rank Adaptation" (https://arxiv.org/abs/2402.09353).
        quantize_base (bool): Whether to quantize base model weights

    Returns:
        TransformerDecoder: Instantiation of Gemma 2B model with LoRA applied
    r/   r0   r   r   r   r   r   r   r   r   r   r   r   r   r    r   r!   r   r"   r   r#   r   r1   r2   r3   r4   r5   r%   r   r/   r0   r1   r2   r3   r4   r5   s          r'   lora_gemma_2br9   B   s    @    ++++ 7 2	
 !  Q $  D S  ) : "\  !" $m# r&   T)r5   z
Builder for creating a Gemma model with QLoRA enabled. Base model weights in linear layers
that LoRA is applied to are quantized per the QLoRA paper: https://arxiv.org/abs/2305.14314.
Please see `lora_gemma_2b` for full API arguments.
c                  4    t          dddddddddd	

  
        S )z
    Builder for creating a Gemma 7B model initialized w/ the default 7b parameter values
    from: https://blog.google/technology/developers/gemma-open-models/

    Returns:
        TransformerDecoder: Instantiation of Gemma 7B model
    r      r.   r       `  r   r   r   r   r$   r%   r&   r'   gemma_7br>      s9        r&   c                 |    t          di d| d|ddddddd	d
ddddddddddddd|d|d|d|d|S )a  
    Builder for creating a Gemma 7B model with LoRA enabled.

    The Gemma defaults are the same as in :func:`~torchtune.models.gemma.gemma_7b`,
    while LoRA default params are based on
    https://github.com/tloen/alpaca-lora/blob/8bb8579e403dc78e37fe81ffbb253c413007323f/finetune.py#L41-L43.

    Args:
        lora_attn_modules (List[LORA_ATTN_MODULES]): list of which linear layers
            LoRA should be applied to in each self-attention block. Options are
            ``{"q_proj", "k_proj", "v_proj", "output_proj"}``.
        apply_lora_to_mlp (bool): whether to apply LoRA to the MLP in each transformer layer.
            Default: False
        lora_rank (int): rank of each low-rank approximation
        lora_alpha (float): scaling factor for the low-rank approximation
        lora_dropout (float): dropout probability for the low-rank approximation. Default: 0.0
        use_dora (bool): Decompose the LoRA weight into magnitude and direction, as
            introduced in "DoRA: Weight-Decomposed Low-Rank Adaptation" (https://arxiv.org/abs/2402.09353).
        quantize_base (bool): Whether to quantize base model weights

    Returns:
        TransformerDecoder: Instantiation of Gemma 7B model with LoRA applied
    r/   r0   r   r   r   r;   r   r.   r   r   r   r   r<   r    r=   r!   r   r"   r   r#   r   r1   r2   r3   r4   r5   r%   r7   r8   s          r'   lora_gemma_7br@      s    @    ++++ 7 2	
 "  R $  D S  ) : "\  !" $m# r&   z
Builder for creating a Gemma model with QLoRA enabled. Base model weights in linear layers
that LoRA is applied to are quantized per the QLoRA paper: https://arxiv.org/abs/2305.14314.
Please see `lora_gemma_7b` for full API arguments.
)NN)Fr   r.   r   FF)typingr   r   *torchtune.models.gemma._component_buildersr   r   torchtune.modulesr   !torchtune.models.gemma._tokenizerr   torchtune.modules.peftr	    torchtune.data._prompt_templatesr
   r   	functoolsr   r(   strintr-   boolfloatr9   qlora_gemma_2b__doc__r>   r@   qlora_gemma_7br%   r&   r'   <module>rO      st   " ! ! ! ! ! ! ! H H H H H H H H 0 0 0 0 0 0 < < < < < < 4 4 4 4 4 4 : : : : : : A A A A A A      $    ,^ ^# ^HSM ^S[\iSj ^  wE ^ ^ ^ ^, $2 2-.22 2 	2
 2 2 2 2 2 2 2h d;;; $    0 $2 2-.22 2 	2
 2 2 2 2 2 2 2h d;;;   r&   