
    Pic                     :   d dl mZmZ d dlmZmZ d dlmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZ d d	lmZ 	 d
e	fdZd"dedee         dee         dee         d
ef
dZ	 	 	 	 	 	 	 d#dee         dededededededed
e	fdZ eed           Zd!e_        dS )$    )ListOptional)phi3	lora_phi3)Phi3MiniTokenizer)TransformerDecoder)LORA_ATTN_MODULES)partial)parse_hf_tokenizer_json)_TemplateType)_get_prompt_templatereturnc                  2    t          ddddddddd	  	        S )	a`  
    Builder for creating the Phi3 Mini 4K Instruct Model.
    Ref: https://huggingface.co/microsoft/Phi-3-mini-4k-instruct

    Note:
        This model does not currently support 128K context length nor optimizations
        such as sliding window attention.

    Returns:
        TransformerDecoder: Instantiation of Phi3 Mini 4K Instruct Model
    @}                        h㈵>)	
vocab_size
num_layers	num_headsnum_kv_heads	embed_dimintermediate_dimmax_seq_lenattn_dropoutnorm_eps)r        y/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/torchtune/models/phi3/_model_builders.py	phi3_minir#      s6     
 
 
 
r!   Npathspecial_tokens_pathr   prompt_templatec                 t    |t          |          nd}|t          |          nd}t          | |||          S )a  Phi-3 Mini tokenizer.
    Ref: https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/blob/main/tokenizer_config.json

    Args:
        path (str): Path to the SPM tokenizer model.
        special_tokens_path (Optional[str]): Path to ``tokenizer.json`` from Hugging Face
            model files that contains all registered special tokens, or a local json file 
            structured similarly. Default is None to use the canonical Phi3 special tokens.
        max_seq_len (Optional[int]): maximum sequence length for tokenizing a single list of messages,
            after which the input will be truncated. Default is None.
        prompt_template (Optional[_TemplateType]): optional specified prompt template.
            If a string, it is assumed to be the dotpath of a :class:`~torchtune.data.PromptTemplateInterface`
            class. If a dictionary, it is assumed to be a custom prompt template mapping role to the
            prepend/append tags.

    Note:
        This tokenizer includes typical LM EOS and BOS tokens like
        <s>, </s>, and <unk>. However, to support chat completion,
        it is also augmented with special tokens like <endoftext>
        and <assistant>.

    Returns:
        Phi3MiniSentencePieceBaseTokenizer: Instantiation of the SPM tokenizer.
    N)r$   special_tokensr   r&   )r   r   r   )r$   r%   r   r&   r(   templates         r"   phi3_mini_tokenizerr*   ,   sR    2 FYEd,-@AAAjnN8G8S#O444Y]H$~S^pxyyyyr!   F      r   lora_attn_modulesapply_lora_to_mlpapply_lora_to_output	lora_rank
lora_alphalora_dropoutuse_doraquantize_basec                 |    t          di d| d|d|ddddddd	dd
dddddddddd|d|d|d|d|S )a_  
    Builder for creating a Phi3 Mini (3.8b) model with LoRA enabled.

    The Phi3 defaults are the same as in :func:`~torchtune.models.phi3.phi3_mini`,
    while LoRA default params are based on
    https://github.com/tloen/alpaca-lora/blob/8bb8579e403dc78e37fe81ffbb253c413007323f/finetune.py#L41-L43.

    Args:
        lora_attn_modules (List[LORA_ATTN_MODULES]): list of which linear layers
            LoRA should be applied to in each self-attention block. Options are
            ``{"q_proj", "k_proj", "v_proj", "output_proj"}``.
        apply_lora_to_mlp (bool): whether to apply LoRA to the MLP in each transformer layer.
            Default: False
        apply_lora_to_output (bool): whether to apply LoRA to the model's final output projection.
            Default: False
        lora_rank (int): rank of each low-rank approximation
        lora_alpha (float): scaling factor for the low-rank approximation
        lora_dropout (float): dropout probability for the low-rank approximation. Default: 0.0
        use_dora (bool): Decompose the LoRA weight into magnitude and direction, as
            introduced in "DoRA: Weight-Decomposed Low-Rank Adaptation" (https://arxiv.org/abs/2402.09353).
        quantize_base (bool): Whether to quantize base model weights

    Returns:
        TransformerDecoder: Instantiation of Phi3 Mini model with LoRA applied
    r-   r.   r/   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r0   r1   r2   r3   r4   r    )r   )r-   r.   r/   r0   r1   r2   r3   r4   s           r"   lora_phi3_minir6   J   s    F    ++++ 21 6	
 2 " R $  D S  ) : "\  !" $m# r!   T)r4   z
Builder for creating a Phi3 mini model with QLoRA enabled. Base model weights in linear layers
that LoRA is applied to are quantized per the QLoRA paper: https://arxiv.org/abs/2305.14314.
Please see `lora_phi3_mini` for full API arguments.
)NNN)FFr+   r,   r   FF)typingr   r   )torchtune.models.phi3._component_buildersr   r    torchtune.models.phi3._tokenizerr   torchtune.modulesr   torchtune.modules.peftr	   	functoolsr
   'torchtune.modules.transforms.tokenizersr    torchtune.data._prompt_templatesr   r   r#   strintr*   boolfloatr6   qlora_phi3_mini__doc__r    r!   r"   <module>rE      s   ! ! ! ! ! ! ! ! E E E E E E E E > > > > > > 0 0 0 0 0 0 4 4 4 4 4 4       K K K K K K : : : : : : A A A A A A%    0z zc z z[cdg[h z  CK  LY  CZ z  fw z z z z@ $!&5 5-.55 5 	5
 5 5 5 5 5 5 5 5p '.===   r!   