
    Pi                     \    d dl mZ d dlZd dlmZ d dlmZmZ  G d dej                  Z	dS )    )OptionalN)
linear_nf4to_nf4c                        e Zd ZdZ	 	 	 ddedededeej                 deej	                 f
 fd	Z
d
ej        dej        fdZ xZS )FrozenNF4LinearaN  
    A linear layer similar to ``torch.nn.Linear`` but uses a quantized
    NF4Tensor as its weight. This class also freezes its ``weight`` parameter
    and is meant to be used as the base Linear layer for modeling
    use cases such as QLoRA where base model parameters are frozen.

    Args:
        in_dim (int): input dimension
        out_dim (int): output dimension
        bias (bool): whether to include bias in the linear layer. Default: False
        device (Optional[torch.device]): device to use for the underlying weight. If ``None``, uses the default
            device given by `torch.get_default_device()`.
        dtype (Optional[torch.dtype]): dtype to use for the underlying weight. If ``None``, uses the default
        **quantization_kwargs: Keyword arguments to pass to `to_nf4` when quantizing the base linear weight.
            Examples of valid arguments are `block_size` and `scaler_block_size`, which control the granularity of
            weight quantization and scaler quantization respectively. This is only used if `quantize_base` is True.
            Default None
    FNin_dimout_dimbiasdevicedtypec                 v   t                                          |||||           | j                            d           | j        | j                            d           t          | j        fi |}t          j                            | j        t          j	        
                    |d                     d S )N)r
   r   r   F)requires_grad)super__init__weightrequires_grad_r
   r   torchutilsswap_tensorsnn	Parameter)	selfr   r	   r
   r   r   quantization_kwargs
nf4_weight	__class__s	           ~/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/torchtune/modules/low_precision/nf4_linear.pyr   zFrozenNF4Linear.__init__#   s     	tF%PPP""5)))9 I$$U+++DK??+>??
 	  K++Je+LL	
 	
 	
 	
 	
    inputreturnc                 T    t          || j                  }| j        
|| j        z   }|S )ak  
        Runs linear operation with input tensor as given by `input`. Computation happens in higher
        precision, though only the nf4 weight is saved for backward for gradient computation to ensure
        additional memory is not used.
        Args:
            input (torch.Tensor): input tensor

        Returns:
            Tensor: output tensor
        )r   r   )r   r   r
   )r   r   outs      r   forwardzFrozenNF4Linear.forward7   s0     uT[9999 	/C
r   )FNN)__name__
__module____qualname____doc__intboolr   r   r   r   r   Tensorr"   __classcell__)r   s   @r   r   r      s         . )-'+
 

 
 	

 &
 $
 
 
 
 
 
(U\ el        r   r   )
typingr   r   torch.nnr   torchao.dtypes.nf4tensorr   r   Linearr    r   r   <module>r0      s                 7 7 7 7 7 7 7 76 6 6 6 6bi 6 6 6 6 6r   