
     `iz                         d dl mZmZ ddlmZ erddlmZ ddlmZm	Z	m
Z
mZ ddlmZ  e
            rd dlZ ej        e          Z G d	 d
e          ZdS )    )TYPE_CHECKINGOptional   )HfQuantizer   )PreTrainedModel)is_accelerate_availableis_eetq_availableis_torch_availablelogging)get_module_from_nameNc                        e Zd ZdZdZdZddgZ fdZd ZddZ	ddde
d
efdZddddde
ddfdZddZ	 ddddeee
                  fdZddZed
efd            Z xZS )EetqHfQuantizera  
    8-bit quantization from EETQ quantization method:
        before loading: converts transformer layers into W8A16Linear during loading: load 16bit weight and pass to the
        layer object after: quantizes individual weights in Linear8bitLt into 8bit at first .cuda() call
    TFeetq
acceleratec                 J     t                      j        |fi | || _        d S N)super__init__quantization_config)selfr   kwargs	__class__s      z/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/quantizers/quantizer_eetq.pyr   zEetqHfQuantizer.__init__-   s1    ,77777#6       c                    t                      st          d          	 dd l}n4# t          $ r'}dt          |          v rt          d          | d }~ww xY wt	                      st          d          |                    dd          s|                    dd          rt          d	          t          j        	                                st          d
          |                    d          }|t                              d           d S |Pt          |t                    r=d|                                v sd|                                v rt          d          d S d S d S )NzUsing `eetq` 8-bit quantization requires eetq.Please install the latest version of eetq from : https://github.com/NetEase-FuXi/EETQr   shard_checkpointzYou are using a version of EETQ that is incompatible with the current transformers version. Either downgrade transformers to <= v4.46.3 or, if available, upgrade EETQ to > v1.0.0.zNLoading an EETQ quantized model requires accelerate (`pip install accelerate`)from_tfF	from_flaxzConverting into 8-bit weights from tf/flax weights is currently not supported, please make sure the weights are in PyTorch format.z/No GPU found. A GPU is needed for quantization.
device_mapzYou have loaded an EETQ model on CPU and have a CUDA device available, make sure to set your model on a GPU device in order to run your model.cpudiskzYou are attempting to load an EETQ model with a device_map that contains a CPU or disk device. This is not supported. Please remove the CPU or disk device from the device_map.)r
   ImportErrorr   strr	   get
ValueErrortorchcudais_availableRuntimeErrorloggerwarning_once
isinstancedictvalues)r   argsr   r   excr    s         r   validate_environmentz$EetqHfQuantizer.validate_environment1   s    "" 	h  
	KKKK 
	 
	 
	!SXX-- "n  
 
	 '(( 	pnooo::i'' 	6::k5+I+I 	;  
 z&&(( 	RPQQQZZ--
I     #*d++ *:K:K:M:M1M1MQW[e[l[l[n[nQnQn h   $# QnQns   $ 
A"AAdtypetorch.dtypereturnc                     |(t           j        }t                              d|           n*|t           j        k    rt                              d           |S )NzOverriding dtype=%s with `dtype=torch.float16` due to requirements of `eetq` to enable model loading in 8-bit. Pass your own dtype to specify the dtype of the remaining non-linear layers or pass dtype=torch.float16 to remove this warning.zLWe suggest you to set `dtype=torch.float16` for better efficiency with EETQ.)r'   float16r+   info)r   r3   s     r   update_dtypezEetqHfQuantizer.update_dtype_   sZ    =MEKK?     em##KKfgggr   modelr   
param_namec                 z    ddl m} t          ||          \  }}t          ||          r| j        s|dk    rdS dS dS )Nr   )
EetqLinearbiasFT)r   r=   r   r-   pre_quantized)r   r:   r;   r   r=   moduletensor_names          r   param_needs_quantizationz(EetqHfQuantizer.param_needs_quantizationm   s^    ######25*EEfj)) 	! [F%:%:utur   param_valueztorch.Tensortarget_deviceztorch.devicec                    ddl m}m} t          ||          \  }}	 ||          \  }
}t	          ||          rM| j        s|	dk    r+|	dk    r$|j        t          j        k    rt          d          n|	dk    rt          d          |

                    |          |j        |	<   |                    d|
                    |                     d S )	Nr   )r=   quantize_and_preprocess_weightsr>   weightz6Expect quantized weights but got an unquantized weightweight_scalez;Expect unquantized weights but got a quantized weight_scaleweight_scales)r   r=   rF   r   r-   r?   r3   r'   int8r&   to_buffersregister)r   r:   rC   r;   rD   r   r=   rF   r@   rA   	new_valuerH   s               r   create_quantized_paramz&EetqHfQuantizer.create_quantized_paramy   s     	EDDDDDDD25*EE"A"A+"N"N	< fj)) 	d! d[F%:%:(**{/@EJ/N/N$%]^^^.00$%bccc'0||M'B'B$)G)GHHHHHr   c                     |S r    )r   r:   r   s      r   #_process_model_after_weight_loadingz3EetqHfQuantizer._process_model_after_weight_loading   s    r   Nkeep_in_fp32_modulesc                     ddl m} |                     || j        j        |          | _         ||| j        | j        | j                  }| j        |j        _        d S )Nr   )replace_with_eetq_linear)modules_to_not_convertr   r?   )integrationsrU   get_modules_to_not_convertr   rV   r?   config)r   r:   rS   r   rU   s        r   $_process_model_before_weight_loadingz4EetqHfQuantizer._process_model_before_weight_loading   s     	<;;;;;&*&E&E4+BDX'
 '
# )(#'#> $ 8,	
 
 
 ,0+C(((r   c                     dS NTrQ   )r   safe_serializations     r   is_serializablezEetqHfQuantizer.is_serializable   s    tr   c                     dS r\   rQ   )r   s    r   is_trainablezEetqHfQuantizer.is_trainable   s    tr   )r3   r4   r5   r4   )r:   r   r   )__name__
__module____qualname____doc__ requires_parameters_quantizationrequires_calibrationrequired_packagesr   r2   r9   r$   boolrB   rO   rR   r   listrZ   r^   propertyr`   __classcell__)r   s   @r   r   r   !   s{         (,$ .7 7 7 7 7, , ,\   
.? 
S 
_c 
 
 
 
I I $I 	I
 &I I I I2    59D D D 'tCy1D D D D*    d    X    r   r   )typingr   r   baser   modeling_utilsr   utilsr	   r
   r   r   quantizers_utilsr   r'   
get_loggerra   r+   r   rQ   r   r   <module>rr      s    + * * * * * * *        1000000 [ [ [ [ [ [ [ [ [ [ [ [ 2 2 2 2 2 2  LLL 
	H	%	%N N N N Nk N N N N Nr   