
     `i                         d dl Z d dlmZmZmZ d dlmZ ddlmZ ddl	m
Z
 erddlmZ dd	lmZmZmZmZ dd
lmZ  e            rd dlZ ej        e          Z G d de          ZdS )    N)TYPE_CHECKINGOptionalUnion)version   )HfQuantizer)get_module_from_name   )PreTrainedModel)is_accelerate_availableis_optimum_quanto_availableis_torch_availablelogging)QuantoConfigc                   X    e Zd ZdZddgZdZdZdef fdZd Z	d	 Z
d
 Zd%dZdee         dedee         fdZdddedefdZdeeeeef         f         deeeeef         f         fdZdddddeddfdZd&dZ	 d'ddd eee                  fd!Zd" Zedefd#            Zd'd$Z xZS )(QuantoHfQuantizerz*
    Quantizer for the quanto library
    quanto
accelerateTFquantization_configc                 d     t                      j        |fi | |                                  d S N)super__init__	post_init)selfr   kwargs	__class__s      |/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/quantizers/quantizer_quanto.pyr   zQuantoHfQuantizer.__init__2   s9    ,77777    c                 N    | j         j        | j        st          d          dS dS )z 
        Safety checker
        NzWe don't support quantizing the activations with transformers library.Use quanto library for more complex use cases such as activations quantization, calibration and quantization aware training.)r   activationspre_quantized
ValueErrorr   s    r   r   zQuantoHfQuantizer.post_init6   s>     #/;DDV;O   <;;;r   c                 z    t                      st          d          t                      st          d          d S )NzhLoading an optimum-quanto quantized model requires optimum-quanto library (`pip install optimum-quanto`)z`Loading an optimum-quanto quantized model requires accelerate library (`pip install accelerate`))r   ImportErrorr   )r   argsr   s      r   validate_environmentz&QuantoHfQuantizer.validate_environment@   sT    *,, 	z   '(( 	r  	 	r   c                 F    |ddi}t                               d           |S )N cpuzThe device_map was not initialized. Setting device_map to {'':'cpu'}. If you want to use the model for inference, please set device_map ='auto')loggerinfo)r   
device_maps     r   update_device_mapz#QuantoHfQuantizer.update_device_mapJ   s5    eJKK\  
 r   dtypetorch.dtypereturnc                 V    |&t                               d           t          j        }|S )NzPYou did not specify `dtype` in `from_pretrained`. Setting it to `torch.float32`.)r,   r-   torchfloat32)r   r0   s     r   update_dtypezQuantoHfQuantizer.update_dtypeT   s&    =KKjkkkMEr   missing_keysprefixc                 B   t                      rddlm} g |                                D ]f\  }}t	          ||          rQ|D ]N}||v s	|| d| v r?|                    d          s*|                    d          s                    |           Ogfd|D             S )Nr   QModuleMixin.z.weightz.biasc                     g | ]}|v|	S  r>   ).0knot_missing_keyss     r   
<listcomp>z9QuantoHfQuantizer.update_missing_keys.<locals>.<listcomp>h   s$    EEEa14D+D+D+D+D+Dr   )r   optimum.quantor;   named_modules
isinstanceendswithappend)	r   modelr7   r8   r;   namemodulemissingrA   s	           @r   update_missing_keysz%QuantoHfQuantizer.update_missing_keysZ   s    &(( 	4333333!//11 	9 	9LD&&,// 9+ 9 9GDv4I4I4I4I,I,I ' 0 0 ; ; -J ' 0 0 9 9 -J )//888EEEE<EEEEr   rH   r   
param_namec                     t                      rddlm} t          ||          \  }}t	          ||          rd|v r|j         S dS )Nr   r:   weightF)r   rC   r;   r	   rE   frozen)r   rH   rM   r   r;   rJ   tensor_names          r   param_needs_quantizationz*QuantoHfQuantizer.param_needs_quantizationj   sb    &(( 	433333325*EEfl++ 	K0G0G}$$5r   
max_memoryc                 B    d |                                 D             }|S )Nc                      i | ]\  }}||d z  S )g?r>   )r?   keyvals      r   
<dictcomp>z7QuantoHfQuantizer.adjust_max_memory.<locals>.<dictcomp>w   s"    III(#sc3:IIIr   )items)r   rS   s     r   adjust_max_memoryz#QuantoHfQuantizer.adjust_max_memoryv   s'    IIj6F6F6H6HIII
r   param_valueztorch.Tensortarget_deviceztorch.devicec                     ddl m}  ||||                    |                     t          ||          \  }}|                                 d|j        _        d S )Nr
   )_load_parameter_into_modelF)modeling_utilsr^   tor	   freezerO   requires_grad)	r   rH   r[   rM   r\   r   r^   rJ   _s	            r   create_quantized_paramz(QuantoHfQuantizer.create_quantized_paramz   sh     	@?????""5*knn]6S6STTT(
;;	&+###r   target_dtypec                 "   t          j        t          j                             d                    t          j        d          k    r:ddlm} t          j        |j        |j	        |j
        d}|| j        j                 }|S t          d          )Nr   z0.27.0r   )CustomDtype)int8float8int4int2zYou are using `device_map='auto'` on an optimum-quanto quantized model. To automatically compute the appropriate device map, you should upgrade your `accelerate` library,`pip install --upgrade accelerate` or install it from source.)r   parse	importlibmetadataaccelerate.utilsrg   r4   rh   FP8INT4INT2r   weightsr#   )r   re   rg   mappings       r   adjust_target_dtypez%QuantoHfQuantizer.adjust_target_dtype   s    =+33LAABBW]S[E\E\\\444444 
%/#(#(	 G #4#;#CDLP  r   Nkeep_in_fp32_modulesc                     ddl m} |                     || j        j        |          | _         ||| j        | j                  \  }}| j        |j        _        d S )Nr
   )replace_with_quanto_layers)modules_to_not_convertr   )integrationsrx   get_modules_to_not_convertr   ry   config)r   rH   rv   r   rx   rc   s         r   $_process_model_before_weight_loadingz6QuantoHfQuantizer._process_model_before_weight_loading   s|     	>=====&*&E&E4+BDX'
 '
# .-$*E[_[s
 
 
q ,0+C(((r   c                     |S r   r>   )r   rH   r   s      r   #_process_model_after_weight_loadingz5QuantoHfQuantizer._process_model_after_weight_loading   s    r   c                     dS )NTr>   r$   s    r   is_trainablezQuantoHfQuantizer.is_trainable   s    tr   c                     dS )NFr>   )r   safe_serializations     r   is_serializablez!QuantoHfQuantizer.is_serializable   s    ur   )r0   r1   r2   r1   )re   r1   r2   r1   r   )__name__
__module____qualname____doc__required_packages requires_parameters_quantizationrequires_calibrationr   r   r   r(   r/   r6   liststrrL   boolrR   dictr   intrZ   rd   ru   r   r}   r   propertyr   r   __classcell__)r   s   @r   r   r   )   s2         "<0'+$ L               FtCy F# FRVWZR[ F F F F 
.? 
S 
_c 
 
 
 
DeCHo1E,F 4PSUZ[^`c[cUdPdKe    , , $, 	,
 &, , , ,   ( UYD D&D>FtCy>QD D D D   d    X       r   r   )rm   typingr   r   r   	packagingr   baser   quantizers_utilsr	   r_   r   utilsr   r   r   r   utils.quantization_configr   r4   
get_loggerr   r,   r   r>   r   r   <module>r      s?       1 1 1 1 1 1 1 1 1 1             2 2 2 2 2 2  1000000            5 4 4 4 4 4  LLL		H	%	%I I I I I I I I I Ir   