
     `i                         d dl mZmZmZ d dlmZ ddlmZ  e            rddlZ ej	        e
          Z G d de          ZdS )	   )is_compressed_tensors_availableis_torch_availablelogging)CompressedTensorsConfig   )HfQuantizer    Nc                        e Zd ZdZdZdgZdef fdZd Zdd
Z	d Z
d Zd Zed             Zd	efdZdd	efdZ xZS )CompressedTensorsHfQuantizerz
    Quantizer for the compressed_tensors package.  Loads and restores models to
    quantized state with compressed_tensors
    Tcompressed_tensorsquantization_configc                     t                      j        |fi | t                      st          d          |                                 ddlm} |                    |          | _        |j	        | _	        || _
        d S )NuUsing `compressed_tensors` quantized models requires the compressed-tensors library: `pip install compressed-tensors`r	   )ModelCompressor)super__init__r   ImportError	post_initcompressed_tensors.compressorsr   from_compression_config
compressorrun_compressedr   )selfr   kwargsr   	__class__s       /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/quantizers/quantizer_compressed_tensors.pyr   z%CompressedTensorsHfQuantizer.__init__$   s    ,77777.00 	3   	%%'''BBBBBB)AABUVV1@#6       c                 z    t                      st          d          t                      st          d          d S )Nr   z;torch is required for using compressed-tensors quantization)r   r   r   )r   argsr   s      r   validate_environmentz1CompressedTensorsHfQuantizer.validate_environment7   sS    .00 	3   "## 	][\\\	] 	]r   dtypetorch.dtypereturnc                     |'t                               d           t          j        }n*|t          j        k    rt                               d           |S )NzELoading model using torch.float16 for compressed-tensors quantizationzZWe suggest you to set `dtype=torch.float16` for better efficiency with compressed_tensors.)loggerinfotorchfloat16)r   r!   s     r   update_dtypez)CompressedTensorsHfQuantizer.update_dtypeA   sH    =KK_```MEEem##KKtuuur   c                     ddl m} | j        j        } |||| j                   | j        j        s| j        j        r| j                            |           d S d S )Nr	   )apply_quantization_configmodel)compressed_tensors.quantizationr+   r   r   r   is_quantization_compressedis_sparsification_compressedcompress_model)r   r-   r   r+   ct_quantization_configs        r   $_process_model_before_weight_loadingzACompressedTensorsHfQuantizer._process_model_before_weight_loadingI   s    MMMMMM!%!D 	"!%)?ATUUU$?	8'D	8 O***77777		8 	8r   c                 ~    | j         j        r| j        r| j         j        r| j                            |           dS dS )z3Decompress loaded model if necessary - need for qatr,   N)r   r/   r   r0   r   decompress_model)r   r-   r   s      r   #_process_model_after_weight_loadingz@CompressedTensorsHfQuantizer._process_model_after_weight_loadingV   sV     $?	:HLH[	:%B	: O,,5,99999	: 	:r   c                     dddddd}|                                 E|                                 j        ,|                                 j                            |           |S )Nlocal_colwiselocal_rowwise)z0layers.*.feed_forward.experts.*.gate_proj.weightz6layers.*.feed_forward.experts.*.gate_proj.weight_scalez.layers.*.feed_forward.experts.*.up_proj.weightz4layers.*.feed_forward.experts.*.up_proj.weight_scalez0layers.*.feed_forward.experts.*.down_proj.weight)get_text_configbase_model_tp_planupdate)r   configadditional_plans      r   update_tp_planz+CompressedTensorsHfQuantizer.update_tp_plan^   sm    @OFU>MDS@O
 
 !!##/F4J4J4L4L4_4k""$$7>>OOOr   c                     dS )NT r   s    r   is_trainablez)CompressedTensorsHfQuantizer.is_trainablek       tr   c                 ,    | j          p| j        j         S )z7Loaded Models can carry out quantization aware training)r   r   r/   rB   s    r   is_qat_trainablez-CompressedTensorsHfQuantizer.is_qat_trainableo   s     &&ad.F.a*aar   Nc                     dS )z>Models quantized using compressed tensors can be saved to diskTrA   )r   safe_serializations     r   is_serializablez,CompressedTensorsHfQuantizer.is_serializablet   rD   r   )r!   r"   r#   r"   )N)__name__
__module____qualname____doc__requires_calibrationrequired_packagesr   r   r    r)   r3   r6   r?   propertyrC   boolrF   rI   __classcell__)r   s   @r   r   r      s        
  -.7,C 7 7 7 7 7 7&] ] ]   8 8 8: : :     Xb$ b b b b
 $        r   r   )utilsr   r   r   utils.quantization_configr   baser   r'   
get_loggerrJ   r%   r   rA   r   r   <module>rW      s     Q P P P P P P P P P ? ? ? ? ? ?        LLL		H	%	%[ [ [ [ [; [ [ [ [ [r   