
    Pi                         d dl Z d dlmZmZmZ d dlZd dlmc mZ	 d dlmZ d dl
mZmZ d dlmZ d dlmZ  G d dej        e          Zd	ej        d
dfdZd	ej        d
dfdZdS )    N)ListOptionalUnion)nn)
linear_nf4to_nf4)_register_nf4_dispatch_ops)AdapterModulec                        e Zd ZdZ	 	 	 ddededededed	ed
ef fdZdddee	e
ej        ef                  defdZd Z ej                    d             Zd Zdee
         fdZdej        dej        fdZ xZS )
DoRALinearaQ  DoRA linear layer as introduced in
    `DoRA: Weight-Decomposed Low-Rank Adaptation of Large Language Models <https://arxiv.org/abs/2402.09353>`_.

    DoRA (Weight-Decomposed Low-Rank Adaptation) fine-tunes a layer by decomposing the pre-trained weights
    into two components: magnitude and direction. The magnitude component is a learnable scalar vector
    that scales each output channel, while the direction component, modified via LoRA, adjusts the orientation
    of weights. By scaling the LoRA update component :math:`BAx` with the `magnitude` vector, DoRA allows the model
    to apply distinct scaling adjustments across different output dimensions.

    Args:
        in_dim (int): input dimension
        out_dim (int): output dimension
        rank (int): rank of the low-rank approximation
        alpha (float): scaling factor for the low-rank approximation
        dropout (float): dropout probability. Default: 0.0
        use_bias (bool): whether to include bias in the original linear layer.
            Default: False
        quantize_base (bool): Whether to quantize base linear weight or not.
            Default: False
        **quantization_kwargs: Keyword arguments to pass to `to_nf4` when quantizing the base linear weight.
            Examples of valid arguments are `block_size` and `scaler_block_size`, which control the granularity of
            weight quantization and scaler quantization respectively. This is only used if `quantize_base` is True.
            Default None

    Raises:
        ValueError: If ``quantize_base`` is False, but quantization kwargs are provided.

            Fin_dimout_dimrankalphadropoutuse_biasquantize_basec                    t                                                       || _        || _        ||z  | _        || _        || _        | j        s=t          d |                                D                       rt          d|           t          j        ||| j                  }	| j        s|	j        nt          |	j        fi |}
| j        r|	j        nd }d| _        |                     dt          j        |
                     |                     d|t          j        |          nd            |dk    rt          j        |          nt          j                    | _        t          j        ||d          | _        t          j        ||d          | _        t          j        t/          j        |                    | _        |                                  d S )	Nc                     g | ]}|S  r   ).0vs     o/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/torchtune/modules/peft/dora.py
<listcomp>z'DoRALinear.__init__.<locals>.<listcomp>D   s    +T+T+T!A+T+T+T    zO``quantize_base`` is False, but received the following quantization arguments: )in_featuresout_featuresbiasFweightr   r   )p)super__init__r   r   scalingr   _quantize_baseanyvalues
ValueErrorr   Linearr    r   r   disabledregister_parameter	ParameterDropoutIdentityr   lora_alora_btorchempty	magnitudeinitialize_parameters)selfr   r   r   r   r   r   r   quantization_kwargslinearr    r   	__class__s               r   r#   zDoRALinear.__init__2   s    	t| +" 	s+T+T7J7Q7Q7S7S+T+T+T'U'U 	wbuww  
 vG$-XXX &>FMM==)<== 	
 #m5v{{
 ",v*>*>???$*:BL&&&	
 	
 	
 18#rzG,,,,2;==iFERRRiDwUSSSek'&:&:;;""$$$$$r   T)recursedevicer9   c                2   | j                             ||           | j                            ||           t          j        t          j        | j        |          | j        j                  }t
          j	        
                    | j        |           | S )N)r:   r9   )r:   )requires_grad)r/   to_emptyr0   r   r,   r1   
empty_liker3   r<   utilsswap_tensors)r5   r:   r9   r3   s       r   r=   zDoRALinear.to_empty`   s     	FG<<<FG<<<LT^F;;;.6
 
 
	 	  ;;;r   c                 V    t          | j                   t          | j                   d S )N)_lora_a_init_paramsr/   _lora_b_init_paramsr0   )r5   s    r   r4   z DoRALinear.initialize_parametersm   s*     	DK(((DK(((((r   c                 x   t          | j        j        | j        j        j        | j        j        j        g          rt          d          | j                            | j        j        j                  }| j        j        | j        j        z  }| j        	                    | 
                    ||                     dS )a=  
        DoRA initializes the magnitude vector such that its outputs are initially
        identical to standard LoRA's outputs.

        This must be called after loading/initializing base model and LoRA params.

        Raises:
            RuntimeError: If base or LoRA parameters are still on meta device.
        zUCannot initialize DoRA magnitude if base or LoRA parameters are still on meta device.N)r&   r    is_metar/   r0   RuntimeErrortodtyper3   copy__get_weight_norm)r5   base_weightlora_weights      r   initialize_dora_magnitudez$DoRALinear.initialize_dora_magnitudes   s     #"*"*
 
 		 g   knnT[%7%=>>k(4;+==T22;LLMMMMMr   c                     || j         |z  z   }t          j                            |d                              |j                  }|S )N   )dim)r$   r1   linalgnormrG   rH   )r5   r    rL   weight_norms       r   rJ   zDoRALinear._get_weight_norm   sC    $,44l''A'6699&,GGr   returnc                     g d}|S )z
        Return a list of strings corresponding to the names of the ``nn.Parameter`` s in
        the model coming from the adapter.

        For DoRA this means lora_a.weight, lora_b.weight, and magnitude.
        )zlora_a.weightzlora_b.weightr3   r   )r5   adapter_paramss     r   rV   zDoRALinear.adapter_params   s     IHHr   xc                 2   | j         r(t          || j                  }| j        r
|| j        z   }n t          j        || j        | j                  }| j        r|S |                     |          }| 	                    | 
                    |                    }t          j        | j
        j        j        d         | j
        j        j        |j                  }| 	                    | 
                    |                    j        }| j        }| j                            |j                  }|                     ||                                          }|                                }||z                      dd          }	|	dz
  |z  |	|z  | j        z  z   }
|
|z   S )z
        Args:
            x (torch.Tensor): input tensor with shape ``(..., in_dim)``

        Returns:
            Tensor: output tensor with shape ``(..., out_dim)``
        )inputr    rO   )r:   rH   )r%   r   r    r   r   Fr7   r*   r   r0   r/   r1   eyeshaper:   rH   Tr3   rG   rJ   detachviewr$   )r5   rW   base_outlora_outx_eyerL   r3   r    rS   mag_norm_scaledora_outs              r   forwardzDoRALinear.forward   s     	;!$+>>>H} 0#di/x4;	::H= 	OLLOO;;t{{1~~.. 	K$Q'0B0IQRQX
 
 
 kk$++e"4"4557N	((++FK4F4F4H4HII!((**#k1772>> Q%04<?@ (""r   )r   FF)__name__
__module____qualname____doc__intfloatboolr#   r   r   strr1   r:   r=   r4   no_gradrM   rJ   r   rV   Tensorrf   __classcell__)r8   s   @r   r   r      sn        F #,% ,%,% ,% 	,%
 ,% ,% ,% ,% ,% ,% ,% ,% ,%^ SW  !%U\3(>"?@KO   ) ) ) U]__N N _N0  
S	    $# $#%, $# $# $# $# $# $# $# $#r   r   rW   rT   c                 v    t           j                            | j        t	          j        d                     dS )z6
    Initialize LoRA A weight to Kaiming uniform.
       )aN)r   initkaiming_uniform_r    mathsqrtrW   s    r   rB   rB      s/     GQX166666r   c                 N    t           j                            | j                   dS )z,
    Initialize LoRA B weight to zeros.
    N)r   ru   zeros_r    ry   s    r   rC   rC      s      GNN18r   )rw   typingr   r   r   r1   torch.nn.functionalr   
functionalr[   torchao.dtypes.nf4tensorr   r   torchtune.modules.low_precisionr	   torchtune.modules.peftr
   Moduler   r)   rB   rC   r   r   r   <module>r      s)    ( ( ( ( ( ( ( ( ( (                 7 7 7 7 7 7 7 7 F F F F F F 0 0 0 0 0 0k# k# k# k# k#M k# k# k#\729 7 7 7 7 729       r   