
    *`i*                     x   d dl Z d dlmZ d dlmZmZ d dlZd dlmZ d dl	m
Z
mZmZmZmZ d dlmZmZmZ g dZ G d d	e          Z	 d
edee         defdZd
edefdZ e            Z e edej        ej        ddde
j        e
j                            Z e edej        ej        dddde
j        e
j        	  	         edej        ej        dej        dde
j        e
j        	  	                  Z  e edej        ej!        dddej"        ej"                            Z# e edej        ej!        dddej"        ej"                   edej        ej!        dddej"        ej"                            Z$ e edej%        ej&        dd           edej%        ej'        ddd                    Z( e edej%        ej&        dd                    Z) e edej%        ej!        ddd                    Z* e edej%        ej!        ddd                    Z+ e edej%        dej!        dd           edej%        ej'        ddd                    Z, e edej        ej-        dd           edej        ej-        dd                    Z. e edej        ej&        dd           edej        ej'        ddd                    Z/ e edej        ej0        ddddg            edej        ej!        dddd!                    Z1ee)e*e+e(e(e,e.e/e1ee e#e$d"Z2dS )#    N)deepcopy)ListOptional)CompressionFormat)FP8_E4M3_DATADynamicTypeQuantizationArgsQuantizationStrategyQuantizationType)	BaseModel
ConfigDictmodel_validator)QuantizationSchemepreset_name_to_schemeis_preset_schemec                       e Zd ZU dZee         ed<   dZee	         ed<   dZ
ee	         ed<   dZee	         ed<   dZee         ed<    ed	          dd            Z ed          ZdS )r   a  
    Set of QuantizationArgs defining how the weights, inputs and outputs of target list
    of modules should be quantized

    :param targets: list of modules to apply the QuantizationArgs to, can be layer
    names, layer types or a regular expression, typically ["Linear"]
    :param weights: quantization config for layer weights
    :param input_activations: quantization config for layer inputs
    :param output_activations: quantization config for layer outputs
    :param format: CompressionFormat for the layer
    targetsNweightsinput_activationsoutput_activationsformatafter)modemodelreturnc                    | j         }| j        }| j        }| j        }||j        t
          j        t
          j        t
          j        t
          j	        t
          j
        fvrE|j        t
          j        k    r|j        du rt          d          t          d|j         d          |j        t          d          ||j        t          d          |t          j        j        k    rt          d          |ri|rg|j        t
          j        k    rR|j        t
          j        k    r=|j        |j        k    r-t'          j        d|j         d	|j         d
t*          d           | S )NTzDStatic and local group-wise activation quantization is not supportedzUsing z6 strategy is not supported for activation quantizationz*Cannot apply actorder to input activationsz+Cannot apply actorder to output activationszBmixed-precision cannot be set as a format for a QuantizationSchemezXUsing GROUP strategy for both weights and input_activations with different group sizes (z vs zu) may complicate fused kernel implementations. Consider using TENSOR_GROUP strategy for both or matching group sizes.   )
stacklevel)r   r   r   r   strategyr
   TOKENTENSORGROUPTENSOR_GROUP	ATTN_HEADdynamicNotImplementedErroractorder
ValueErrorr   mixed_precisionvalue
group_sizewarningswarnUserWarning)r   inputsoutputsr   r   s        /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/compressed_tensors/quantization/quant_scheme.pyvalidate_model_afterz'QuantizationScheme.validate_model_after8   s   (*-$*$+$*$1$.'   O';'AAA$..-8  
 *.V_ . . .  
 * !MNNN+ !NOOO&6<<<T  
 		  $8$>>>#7#==="f&777M/6/A $  
         forbid)extra)r   r   r   r   )__name__
__module____qualname____doc__r   str__annotations__r   r   r	   r   r   r   r   r2   r   model_config r3   r1   r   r   %   s         
 
 #Y*.GX&'...48x 0188859!12999 FHSM   _'"""9 9 9 #"9v :H---LLLr3   r   namer   r   c           	          |                                  } | t          vr9t          d|  dt          t                                                               t          t          |                    }t          dd|i|S )a  
    :param name: preset quantization settings name. must exist in upper case in
        PRESET_SCHEMES
    :param targets: list of quantization targets to be passed to the Scheme
    :return: new QuantizationScheme for a given name with the given targets
    zUnknown preset scheme name z, available names: r   r=   )upperPRESET_SCHEMESKeyErrorlistkeysr   r   )r>   r   scheme_argss      r1   r   r   |   s     ::<<D>!!>$ > > $^%8%8%:%: ; ;> >
 
 	

 >$/00K  
  r3   c                 8    |                                  t          v S )zn
    :param name: preset quantization settings name
    :return: True if the name is a preset scheme name
    )r@   rA   )r>   s    r1   r   r      s    
 ::<<>))r3      TF   )num_bitstyper   	symmetricr%   r+   scale_dtypezp_dtype)r   static_minmax)	rI   rJ   r   rK   r%   r+   observerrL   rM   )r   r       )rI   rJ   r   r%   rK   r+   rL   rM      )rI   rJ   r   rK   r%   )rI   rJ   r   rK   r%   rO      )rI   rJ   r   r+   rK   r%   )rI   rJ   r+   r   rK   r%   )rI   rJ   r   rK   r%   block_structure)rI   rJ   r   rK   r%   rO   r+   )UNQUANTIZEDW8A16W4A16
W4A16_ASYMW8A8INT8W4A8FP8FP8_DYNAMIC	FP8_BLOCKNVFP4A16NVFP4MXFP4A16MXFP4)3r,   copyr   typingr   r   torchcompressed_tensors.configr   *compressed_tensors.quantization.quant_argsr   r   r	   r
   r   pydanticr   r   r   __all__r   r:   r   boolr   dictrT   FLOATr#   dtyper^   LOCALr_   r"   uint8r`   ra   INTCHANNELr    	INT8_W8A8rU   rV   rW   	INT8_W4A8r!   r[   r\   BLOCKr]   rA   r=   r3   r1   <module>rt      sc          ! ! ! ! ! ! ! !  7 7 7 7 7 7              < ; ; ; ; ; ; ; ; ;  O. O. O. O. O. O. O. O.d
 d3i <N    ,*3 *4 * * * * dff4#%2!'$	 	 	   	#%2 !'$
 
 
 '&#%2! !'$
 
 
	 	 	2 4#%+K	 	 	   	#%+K	 	 	 '&#%+K	 	 		 	 	2 D!%-   '&!%+    	& 	!%-  	 	 	 	!%+  		 		 		 T!%+  	 	 	
 D!%+   '&!%+    	( d#%,   '&#%,    $ d#%-   '&#%+    * D#%+c
   '&#%+    	, % r3   