
     `i|                         d dl mZmZ ddlmZ ddlmZ erddlmZ ddl	m
Z
mZmZmZ ddlmZ  e            rd d	lZ ej        e          Z G d
 de          Zd	S )    )TYPE_CHECKINGOptional   )HfQuantizer)get_module_from_name   )PreTrainedModel)is_fp_quant_availableis_qutlass_availableis_torch_availablelogging)QuantizationConfigMixinNc                        e Zd ZdZdZdZdZdgZdef fdZ	d Z
ddZdddddeddfdZ	 	 ddZddZdee         ded
ee         fdZedded         fd            ZddZddded
efdZ xZS ) FPQuantHfQuantizerz
    Quantizer for the FP-Quant method. Enables the loading of prequantized models and in-flight quantization of full-precision models.
    FTfp_quantquantization_configc                 J     t                      j        |fi | || _        d S N)super__init__r   )selfr   kwargs	__class__s      ~/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/quantizers/quantizer_fp_quant.pyr   zFPQuantHfQuantizer.__init__+   s1    ,77777#6       c                 2   t           j                                        st          d          t	                      s| j        j        st          d          | j        j        rt          	                    d           t                      st          d          || j        j        st          d          t          |t                    rGd|                                v sd|                                v r| j        j        st          d          d S d S d S )	NzPFPQuant quantization is only supported on GPU. Please use a different quantizer.a  Using `fp_quant` with real quantization requires a **Blackwell GPU** and qutlass: `git clone https://github.com/IST-DASLab/qutlass.git && cd qutlass && pip install --no-build-isolation .`. You can use `FPQuantConfig(pseudoquantization=True, ...)` to use Triton-based pseudo-quantization. It doesn't provide any speedups but emulates the quantization behavior of the real quantization.zUsing pseudo-quantization for FP-Quant. This doesn't provide any speedups but emulates the quantization behavior of the real quantization.zGUsing `fp_quant` quantization requires fp_quant: `pip install fp_quant`zyYou are attempting to load a FPQuant model without setting device_map. Please set device_map comprised of 'cuda' devices.cpudiskzYou are attempting to load a FPQuant model with a device_map that contains a CPU or disk device. This is not supported. Please remove the CPU or disk device from the device_map.)torchcudais_availableNotImplementedErrorr   r   pseudoquantizationImportErrorloggerwarningr
   
ValueError
isinstancedictvalues)r   
device_mapr   s      r   validate_environmentz'FPQuantHfQuantizer.validate_environment/   sW   z&&(( 	%b   $%% 	d.F.Y 	 S   #6 	NN ]   %&& 	ighhhd&>&QF  
 z4((	*++----:;L;L;N;N1N1N,? 2O d  	 	1N1N1N1Nr   dtypetorch.dtypereturnc                     |'t                               d           t          j        }n#|t          j        k    rt	          d| d          |S )NzJ`dtype` is None. Setting `dtype=torch.bfloat16` for qutlass compatibility.zInvalid `dtype` z=. fp_quant quantization only supports `dtype=torch.bfloat16`.)r%   infor   bfloat16r'   )r   r-   s     r   update_dtypezFPQuantHfQuantizer.update_dtypeQ   sO    =KKdeeeNEEen$$ttttuuur   modelr	   param_valueztorch.Tensor
param_nametarget_deviceztorch.devicec                 D   t          ||          \  }}|                    d          rIt          j                            |                    |          d          |_        d |_        d |_        d S |                    d          rNt          j                            |                    |                    |_        d |_        d |_        d |_	        d S t          j                            |                    |                    |_        |
                                 d S )Nz.qweightF)requires_gradz	.dqweight)r   endswithr   nn	Parametertoqweightweightdqweightscalespre_forward)r   r4   r5   r6   r7   r   module_s           r   create_quantized_paramz)FPQuantHfQuantizer.create_quantized_paramZ   s    )
;;	 z** 	"X//}--# 0  FN !FM"FOF{++ 	#h001N1NOOFO FM!FN FMF **;>>-+H+HIIr   c                 v    ddl m} ddlm}  || || j                             | j        |j        _        d S )Nr   )replace_with_fp_quant_linearr   )adapt_fp_quant_config)fp_quant_linear_config)r   rG   integrations.fp_quantrH   r   config)r   r4   r   rG   rH   s        r   $_process_model_before_weight_loadingz7FPQuantHfQuantizer._process_model_before_weight_loading   sk    
 	:99999AAAAAA$$#8#89Q#R#R	
 	
 	
 	
 ,0+C(((r   c                     |S r    )r   r4   r   s      r   #_process_model_after_weight_loadingz6FPQuantHfQuantizer._process_model_after_weight_loading   s    r   missing_keysprefixc                     ddl m fd|                                D             dt          dt          ffdfd|D             S )Nr   FPQuantLinearc                 :    h | ]\  }}t          |          |S rN   )r(   ).0namerC   rT   s      r   	<setcomp>z9FPQuantHfQuantizer.update_missing_keys.<locals>.<setcomp>   s.    nnn<4JW]_lLmLmn$nnnr   keyr/   c                                            d          s                      d          rdS  d  t           fdD                       S )Nz.weightz.biasF.c              3   (   K   | ]}|v p|v V  d S r   rN   )rV   rW   full_keyrY   s     r   	<genexpr>zQFPQuantHfQuantizer.update_missing_keys.<locals>.should_exclude.<locals>.<genexpr>   s1      RR4ts{6dh&6RRRRRRr   )r:   any)rY   r]   fp_quant_namesrQ   s   `@r   should_excludez>FPQuantHfQuantizer.update_missing_keys.<locals>.should_exclude   sj    ||I&& #,,w*?*? u ((3((HRRRRR>RRRRRRr   c                 *    g | ]} |          |S rN   rN   )rV   rY   ra   s     r   
<listcomp>z:FPQuantHfQuantizer.update_missing_keys.<locals>.<listcomp>   s(    GGG>>#3F3FGGGGr   )r   rT   named_modulesstrbool)r   r4   rP   rQ   rT   r`   ra   s      `@@@r   update_missing_keysz&FPQuantHfQuantizer.update_missing_keys   s    ******nnnn53F3F3H3Hnnn	S 	S 	S 	S 	S 	S 	S 	S 	S HGGG|GGGGr   Nc                 V    | j         j        }|st                              d           |S )NzYou are attempting to train a model with FPQuant quantization. This is only supported when `store_master_weights=True`. Please set `store_master_weights=True` to train the model.)r   store_master_weightsr%   r&   )r   r4   	trainables      r   is_trainablezFPQuantHfQuantizer.is_trainable   s9    ,A	 	NN E   r   c                     dS )NTrN   )r   safe_serializations     r   is_serializablez"FPQuantHfQuantizer.is_serializable   s    tr   c                 d    ddl m} t          ||          \  }}t          ||          r|dv rdS dS )Nr   rS   )r?   r>   r@   TF)r   rT   r   r(   )r   r4   r6   r   rT   rC   tensor_names          r   param_needs_quantizationz+FPQuantHfQuantizer.param_needs_quantization   sO    ******25*EEfm,, 	@a1a1a45r   )r-   r.   r/   r.   )r4   r	   r   )__name__
__module____qualname____doc__requires_calibration requires_parameters_quantizationis_qat_trainablerequired_packagesr   r   r,   r3   re   rE   rL   rO   listrg   propertyr   rk   rn   rf   rq   __classcell__)r   s   @r   r   r   !   s         !'+$#7,C 7 7 7 7 7 7     D   $ $ $$ 	$
 &$ $ $ $LD D D D D   HtCy H# HRVWZR[ H H H H  (+<"=    X   .? S _c        r   r   )typingr   r   baser   quantizers_utilsr   modeling_utilsr	   utilsr
   r   r   r   utils.quantization_configr   r   
get_loggerrr   r%   r   rN   r   r   <module>r      s   + * * * * * * *       2 2 2 2 2 2  1000000 \ \ \ \ \ \ \ \ \ \ \ \ ? ? ? ? ? ?  LLL		H	%	%R R R R R R R R R Rr   