
     `i                         d dl mZmZ ddlmZ erddlmZ ddlmZm	Z	m
Z
mZ ddlmZ  e	            rd dlZ ej        e          Z G d	 d
e          ZdS )    )TYPE_CHECKINGOptional   )HfQuantizer   )PreTrainedModel)is_accelerate_availableis_torch_availableis_vptq_availablelogging)QuantizationConfigMixinNc                        e Zd ZdZdZdgZdef fdZd Zdd
Z		 dddde
ee                  fdZddZed	efd            ZddZ xZS )VptqHfQuantizerzS
    Quantizer of the VPTQ method. Enables the loading of prequantized models.
    Tvptqquantization_configc                 J     t                      j        |fi | || _        d S N)super__init__r   )selfr   kwargs	__class__s      z/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/quantizers/quantizer_vptq.pyr   zVptqHfQuantizer.__init__(   s1    ,77777#6       c                 z    t                      st          d          t                      st          d          d S )NzGUsing `vptq` quantization requires Accelerate: `pip install accelerate`zEUsing `vptq` quantization requires VPTQ>=0.0.4: `pip install -U vptq`)r	   ImportErrorr   )r   argsr   s      r   validate_environmentz$VptqHfQuantizer.validate_environment,   sJ    &(( 	ighhh "" 	gefff	g 	gr   dtypetorch.dtypereturnc                 D   |t           j                                        r't           j        }t                              d           nXdd l}t          |dd           } |d          du rt          d          t           j	        }t                              d           |S )	NzCUDA available. Assuming VPTQ inference on GPU and loading the model in `torch.float16`. To overwrite it, set `dtype` manually.r   device_availabilityc                     dS NF )devices    r   <lambda>z.VptqHfQuantizer.update_dtype.<locals>.<lambda>=   s    Z_ r   cpuTzKNo GPU found. Please wait for the next release of VPTQ to use CPU inferencezVNo GPU found. Assuming VPTQ inference on CPU and loading the model in `torch.float32`.)
torchcudais_availablefloat16loggerinfor   getattrRuntimeErrorfloat32)r   r   r   r#   s       r   update_dtypezVptqHfQuantizer.update_dtype3   s    =z&&(( v V    &-d4IK_K_&`&`#&&u--55&'tuuutuuur   Nmodelr   keep_in_fp32_modulesc                     ddl m} |                     || j        j        |          | _         ||| j        | j                   | j        |j        _        dS )z
        we don't have param like modules_to_not_convert to indicate which layers should not be quantized
        because `quantization_config` include the layers that should be quantized
        r   )replace_with_vptq_linear)r   modules_to_not_convertN)integrationsr7   get_modules_to_not_convertr   r8   config)r   r4   r5   r   r7   s        r   $_process_model_before_weight_loadingz4VptqHfQuantizer._process_model_before_weight_loadingD   s|     	<;;;;;&*&E&E4+BDX'
 '
# 	!  $ 8#'#>	
 	
 	
 	

 ,0+C(((r   c                     |S r   r&   )r   r4   r   s      r   #_process_model_after_weight_loadingz3VptqHfQuantizer._process_model_after_weight_loading[   s    r   c                     dS r%   r&   )r   s    r   is_trainablezVptqHfQuantizer.is_trainable^   s    ur   c                     dS )NTr&   )r   safe_serializations     r   is_serializablezVptqHfQuantizer.is_serializableb   s    tr   )r   r    r!   r    r   )r4   r   )__name__
__module____qualname____doc__requires_calibrationrequired_packagesr   r   r   r3   r   liststrr<   r>   propertyboolr@   rC   __classcell__)r   s   @r   r   r       s          7,C 7 7 7 7 7 7g g g   ( 59D D D 'tCy1D D D D.    d    X       r   r   )typingr   r   baser   modeling_utilsr   utilsr	   r
   r   r   utils.quantization_configr   r*   
get_loggerrD   r.   r   r&   r   r   <module>rU      s    + * * * * * * *        1000000 [ [ [ [ [ [ [ [ [ [ [ [ ? ? ? ? ? ?  LLL		H	%	%C C C C Ck C C C C Cr   