
    Pi8.                         d dl Z d dlmZmZmZ d dlZd dlmc mZ	 d dlmZ d dl
mZmZ d dlmZ d dlmZ  G d dej        e          Z G d	 d
e          Zdej        ddfdZdej        ddfdZdS )    N)ListOptionalUnion)nn)
linear_nf4to_nf4)_register_nf4_dispatch_ops)AdapterModulec                        e Zd ZdZ	 	 	 ddededededed	ed
ef fdZdddee	e
ej        ef                  defdZd Zdee
         fdZdej        dej        fdZ xZS )
LoRALineara  LoRA linear layer as introduced in `LoRA: Low-Rank Adaptation of Large Language Models <https://arxiv.org/abs/2106.09685>`_.

    LoRA perturbs a given layer via a low-rank approximation where only
    the rank decomposition matrices are trainable. In a linear layer instead of
    :math:`x \mapsto W_0x` a LoRALinear layer is defined as
    :math:`x \mapsto W_0x + (\alpha / r)BAx`, where :math:`r` is the rank of
    the matrices :math:`A` and :math:`B` and :math:`\alpha` is a scaling factor.
    As in the original implementation, we support dropout before multiplication
    by the low-rank matrices.

    Args:
        in_dim (int): input dimension
        out_dim (int): output dimension
        rank (int): rank of the low-rank approximation
        alpha (float): scaling factor for the low-rank approximation
        dropout (float): dropout probability. Default: 0.0
        use_bias (bool): whether to include bias in the original linear layer.
            Default: False
        quantize_base (bool): Whether to quantize base linear weight or not.
            Default: False
        **quantization_kwargs: Keyword arguments to pass to `to_nf4` when quantizing the base linear weight.
            Examples of valid arguments are `block_size` and `scaler_block_size`, which control the granularity of
            weight quantization and scaler quantization respectively. This is only used if `quantize_base` is True.
            Default None

    Raises:
        ValueError: If ``quantize_base`` is False, but quantization kwargs are provided.
            Fin_dimout_dimrankalphadropoutuse_biasquantize_basec                 ~   t                                                       || _        || _        || _        || _        || _        || _        | j        s=t          d |	                                D                       rt          d|           t          j        ||| j                  }	| j        s|	j        nt          |	j        fi |}
| j        r|	j        nd }d| _        |                     dt          j        |
                     |                     d|t          j        |          nd            |dk    rt          j        |          nt          j                    | _        t          j        ||d          | _        t          j        ||d          | _        d| _        |                                  d S )	Nc                     g | ]}|S  r   ).0vs     o/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/torchtune/modules/peft/lora.py
<listcomp>z'LoRALinear.__init__.<locals>.<listcomp>D   s    +T+T+T!A+T+T+T    zO``quantize_base`` is False, but received the following quantization arguments: )in_featuresout_featuresbiasFweightr   r   )p)super__init__r   r   r   r   r   _quantize_baseanyvalues
ValueErrorr   Linearr    r   r   disabledregister_parameter	ParameterDropoutIdentityr   lora_alora_bmergedinitialize_parameters)selfr   r   r   r   r   r   r   quantization_kwargslinearr    r   	__class__s               r   r#   zLoRALinear.__init__1   s    		
 +" 	s+T+T7J7Q7Q7S7S+T+T+T'U'U 	wbuww  
 vG$-XXX &>FMM==)<== 	
 #m5v{{
 ",v*>*>???$*:BL&&&	
 	
 	
 18#rzG,,,,2;==iFERRRiDwUSSS""$$$$$r   T)recursedevicer6   c                v    | j                             ||           | j                            ||           d S )N)r7   r6   )r.   to_emptyr/   )r2   r7   r6   s      r   r9   zLoRALinear.to_empty`   s@     	FG<<<FG<<<<<r   c                 V    t          | j                   t          | j                   d S )N)_lora_a_init_paramsr.   _lora_b_init_paramsr/   )r2   s    r   r1   z LoRALinear.initialize_parametersf   s*     	DK(((DK(((((r   returnc                     ddg}|S )z
        Return a list of strings corresponding to the names of the ``nn.Parameter`` s in
        the model coming from the adapter.

        For LoRA this means lora_a.weight and lora_b.weight.
        zlora_a.weightzlora_b.weightr   )r2   adapter_paramss     r   r?   zLoRALinear.adapter_paramsl   s     *?;r   xc                 V   | j         r(t          || j                  }| j        r
|| j        z   }n t          j        || j        | j                  }| j        r|S |                     | 	                    |                    }| j
        | j        z  |                     |          z  }||z   S )
        Args:
            x (torch.Tensor): input tensor with shape ``(..., in_dim)``

        Returns:
            torch.Tensor: output tensor with shape ``(..., out_dim)``

        )inputr    )r$   r   r    r   r   Fr4   r)   r.   r   r   r   r/   )r2   r@   outlora_outs       r   forwardzLoRALinear.forwardx   s      	61T[999C} &DIo(1dk4955C= 	J;;t||A//J*dkk(.C.CCX~r   )r   FF)__name__
__module____qualname____doc__intfloatboolr#   r   r   strtorchr7   r9   r1   r   r?   TensorrG   __classcell__r5   s   @r   r   r      s9        F #-% -%-% -% 	-%
 -% -% -% -% -% -% -% -% -%` SW= = =!%U\3(>"?@=KO= = = =) ) )
S	 
 
 
 
 %,        r   r   c                        e Zd ZdZ	 	 	 ddededededed	ed
         ded
         f fdZdej	        dej	        fdZ
e	 	 dded	ed
         ded
         dd fd            Z xZS )QATLoRALineara  
    LoRA linear layer with quantization-aware training (QAT) applied to the
    activations and/or weights before the low rank adapters.

    QAT leverages fake quantization to simulate the quantization numerics during
    training without actually casting the data to lower precision. This class
    combines LoRA with QAT to improve the final quantized accuracy during inference
    while reducing the memory required during training.

    Args:
        in_dim (int): input dimension
        out_dim (int): output dimension
        rank (int): rank of the low-rank approximation
        alpha (float): scaling factor for the low-rank approximation
        dropout (float): dropout probability. Default: 0.0
        activation_qat_config (Optional[FakeQuantizeConfig]): config for specifying
            how input activations will be fake quantized, defaults to None
        weight_qat_config (Optional[FakeQuantizeConfig]): config for specifying
            how weights will be fake quantized, defaults to None

    Raises:
        ValueError: If `in_dim` is not divisible by weight `group_size`

    Example usage::

        activation_qat_config = FakeQuantizeConfig(
            dtype=torch.int8,
            granularity="per_token",
            is_symmetric=False,
        )
        weight_qat_config = FakeQuantizeConfig(
            dtype=torch.int4,
            group_size=8,
            is_symmetric=True,
        )
        qat_lora_linear = QATLoRALinear(
            in_dim=512,
            out_dim=1024,
            rank=8,
            alpha=16,
            dropout=0.0,
            activation_qat_config=activation_qat_config,
            weight_qat_config=weight_qat_config,
        )
        qat_lora_linear(torch.randn(512))
    r   Nr   r   r   r   r   activation_qat_configFakeQuantizeConfigweight_qat_configc           	         t                                          |||||dd           	 ddlm} ddlm}	 n"# t          $ r}
t          d          |
d }
~
ww xY w|#t          ||          sJ  |	|          | _	        nt          j                    | _	        |Lt          ||          sJ |j        }|||z  dk    rt          d|d|d	           |	|          | _        d S t          j                    | _        d S )
NF)r   r   r   )rW   )FakeQuantizerz2QATLoRALinear is only compatible with torchao 0.7+zin_dim (z#) must be divisible by group_size ())r"   r#   torchao.quantization.qat.apirW   'torchao.quantization.qat.fake_quantizerrZ   ImportErrorr'   
isinstanceactivation_fake_quantizerr   r-   
group_sizeweight_fake_quantizer)r2   r   r   r   r   r   rV   rX   rW   rZ   errra   r5   s               r   r#   zQATLoRALinear.__init__   st    	 	 	
 	
 	
	GGGGGGMMMMMMM 	 	 	D 	 !,35GHHHHH-:];P-Q-QD**-/[]]D* (/1CDDDDD*5J%&:*=*B*B jvvzzz+   *77H)I)ID&&&)+D&&&s   8 
AAAr@   r=   c                 @   |                      |          }|                     | j                  }t          j        ||          }| j        r|S |                     |                     |                    }| j        | j	        z  | 
                    |          z  }||z   S )rB   )r`   rb   r    rD   r4   r)   r.   r   r   r   r/   )r2   r@   _xwrE   rF   s         r   rG   zQATLoRALinear.forward   s     ++A..&&t{33hr1oo= 	J;;t||A//J*dkk(.C.CCX~r   lora_linearc           	      T   |j         t          d           |j        rt          d           t          |j        t
          j                  r|j        j        }nd} | |j        |j	        |j
        |j        |||          }|j        j        t          j        d          k    r|j        |_        |j        j        j        t          j        d          k    r|j        j        |j        _        |j        j        j        t          j        d          k    r|j        j        |j        _        |S )zv
        Create a `QATLoRALinear` from an existing `LoRALinear`,
        preserving the weights and adapters.
        Nz'Bias is not supported in QAT + LoRA yetz/quantize_base is not compatible with QAT + LoRAr   meta)r   r'   r$   r_   r   r   r,   r!   r   r   r   r   r    r7   rP   r.   r/   )clsrg   rV   rX   r   
new_linears         r   from_lora_linearzQATLoRALinear.from_lora_linear  s    '@AAA% 	JHIIIk)2:66 	!)+GGGS!
 

 $V(<(<<< + 2J$+u|F/C/CCC'2'9'@J$$+u|F/C/CCC'2'9'@J$r   )r   NN)NN)rH   rI   rJ   rK   rL   rM   r   r#   rP   rQ   rG   classmethodr   rl   rR   rS   s   @r   rU   rU      s4       - -j  AE<@17 1717 17 	17
 17 17  ((<=17 $$8917 17 17 17 17 17f %,    $  AE<@& &&
  ((<=& $$89& 
& & & [& & & & &r   rU   r@   r=   c                 v    t           j                            | j        t	          j        d                     dS )z6
    Initialize LoRA A weight to Kaiming uniform.
       )aN)r   initkaiming_uniform_r    mathsqrtr@   s    r   r;   r;   -  s/     GQX166666r   c                 N    t           j                            | j                   dS )z,
    Initialize LoRA B weight to zeros.
    N)r   rq   zeros_r    ru   s    r   r<   r<   4  s      GNN18r   )rs   typingr   r   r   rP   torch.nn.functionalr   
functionalrD   torchao.dtypes.nf4tensorr   r   torchtune.modules.low_precisionr	   torchtune.modules.peftr
   Moduler   rU   r(   r;   r<   r   r   r   <module>r      s\    ( ( ( ( ( ( ( ( ( (                 7 7 7 7 7 7 7 7 F F F F F F 0 0 0 0 0 0x x x x xM x x xv\ \ \ \ \J \ \ \~729 7 7 7 7 729       r   