
     `i                         d dl Z d dlmZ d dlmZ ddlmZ erddlmZ ddl	m
Z
mZmZmZmZ dd	lmZmZ  e            rd dlZ ej        e          Z G d
 de          ZdS )    N)TYPE_CHECKING)version   )HfQuantizer   )PreTrainedModel)is_auto_gptq_availableis_gptqmodel_availableis_optimum_availableis_torch_availablelogging)
GPTQConfigQuantizationConfigMixinc                        e Zd ZdZdZg dZdZdef fdZd Z	ddZ
d ZddZddZed
efd            ZddZ xZS )GptqHfQuantizerz
    Quantizer of the GPTQ method - for GPTQ the quantizer support calibration of the model through
    `auto_gptq` or `gptqmodel` package. Quantization is done under the hood for users if they load a non-prequantized model.
    F)optimum	auto_gptq	gptqmodelNquantization_configc                      t                      j        |fi | t                      st          d          ddlm} |                    | j                                                  | _	        d S )NGLoading a GPTQ quantized model requires optimum (`pip install optimum`)r   )GPTQQuantizer)
super__init__r   ImportErroroptimum.gptqr   	from_dictr   to_dict_optimumoptimum_quantizer)selfr   kwargsr   	__class__s       z/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/quantizers/quantizer_gptq.pyr   zGptqHfQuantizer.__init__-   s~    ,77777#%% 	ighhh......!.!8!89Q9a9a9c9c!d!d    c                 H   t                      st          d          t                      r(t                      rt                              d           t                      oFt          j        t          j	                            d                    t          j        d          k    pt                      }|s-t          j                                        st          d          t                      st                      st          d          t                      rVt          j        t          j	                            d                    t          j        d          k     rt          d          t                      rt          j        t          j	                            d	                    t          j        d
          k     sGt          j        t          j	                            d                    t          j        d          k     rt          d          d S d S )Nr   z4Detected gptqmodel and auto-gptq, will use gptqmodelz	auto-gptqz0.4.2z2GPU is required to quantize or run quantize model.z|Loading a GPTQ quantized model requires gptqmodel (`pip install gptqmodel`) or auto-gptq (`pip install auto-gptq`) library. r   zYou need a version of auto_gptq >= 0.4.2 to use GPTQ: `pip install --upgrade auto-gptq` or use gptqmodel by `pip install gptqmodel>=1.4.3`.r   z1.4.3r   1.23.99zJThe gptqmodel version should be >= 1.4.3, optimum version should >= 1.24.0)r   r   r	   r
   loggerwarningr   parse	importlibmetadatatorchcudais_availableRuntimeError)r    argsr!   gptq_supports_cpus       r#   validate_environmentz$GptqHfQuantizer.validate_environment6   s   #%% 	ighhh!## 	S(>(@(@ 	SNNQRRR #$$ `i088EEFFW^I_I__& $%% 	 ! 	l)@)@)B)B 	lSTTT(** 	l.D.F.F 	l O   $%% 
	l'-	8J8R8RS^8_8_*`*`cjcpd
 d
 +
 +
  ^   $%% 	lM),44[AABBW]SZE[E[[[}Y/77	BBCCgmT]F^F^^^jkkk		l 	l^^r$   dtypetorch.dtypereturnc                     |'t           j        }t                              d           n*|t           j        k    rt                              d           |S )NzLLoading the model in `torch.float16`. To overwrite it, set `dtype` manually.zLWe suggest you to set `dtype=torch.float16` for better efficiency with GPTQ.)r,   float16r'   info)r    r3   s     r#   update_dtypezGptqHfQuantizer.update_dtypeR   sI    =MEKKfggggem##KKfgggr$   c                     |dt          j        d          i}t                      s"|ddt          j        d          ifv r|ddik     |S )N cpur   )r,   devicer
   )r    
device_maps     r#   update_device_mapz!GptqHfQuantizer.update_device_mapZ   s^    el5112J%'' 	"J52u|TYGZGZB[:\,\,\2q'!!r$   modelr   c                 B   |j         j        dk    rt          d          | j        rxt	          j        t          j                            d                    t	          j        d          k    r| j        	                    |          }d S  | j        j	        |fi |}d S d S )N	input_idsz%We can only quantize pure text model.r   r&   )
r"   main_input_namer/   pre_quantizedr   r)   r*   r+   r   convert_modelr    r@   r!   s      r#   $_process_model_before_weight_loadingz4GptqHfQuantizer._process_model_before_weight_loadingb   s    ?*k99FGGG 	N}Y/77	BBCCw}U^G_G___.<<UCC<.<UMMfMM	N 	Nr$   c                 :   | j         r| j                            |          }d S | j        j        |j        | j        _        | j                            || j        j                   t          j        | j        	                                          |j
        _        d S N)rD   r   post_init_modelr   	tokenizername_or_pathquantize_modelr   r   to_dictconfigrF   s      r#   #_process_model_after_weight_loadingz3GptqHfQuantizer._process_model_after_weight_loadingm   s     	f*::5AAEEE'195:5G(2"11%9Q9[\\\/9/CDDZDbDbDdDd/e/eEL,,,r$   c                     dS NT )r    s    r#   is_trainablezGptqHfQuantizer.is_trainablew   s    tr$   c                     dS rR   rS   )r    safe_serializations     r#   is_serializablezGptqHfQuantizer.is_serializable{   s    tr$   )r3   r4   r5   r4   )r@   r   rI   )__name__
__module____qualname____doc__requires_calibrationrequired_packagesr   r   r   r2   r9   r?   rG   rP   propertyboolrT   rW   __classcell__)r"   s   @r#   r   r   #   s        
 !===e,C e e e e e el l l8     	N 	N 	N 	Nf f f f d    X       r$   r   )r*   typingr   	packagingr   baser   modeling_utilsr   utilsr	   r
   r   r   r   utils.quantization_configr   r   r,   
get_loggerrX   r'   r   rS   r$   r#   <module>rh      s                                1000000 u u u u u u u u u u u u u u K K K K K K K K  LLL		H	%	%Y Y Y Y Yk Y Y Y Y Yr$   