
    *`i-                        d dl mZ d dlmZ d dlmZmZmZmZm	Z	m
Z
mZ d dlmZ d dlmZmZ d dlmZmZ d dlmZmZ d dlmZmZmZ d d	lmZ g d
Z G d dee          Zej         ej!        ej"        ej#        gZ$dZ%dZ& G d de          Z'dS )    )defaultdict)Enum)	AnnotatedAnyDictListOptionalSetUnion)CompressionFormat)DynamicTypeQuantizationArgs)QuantizationSchemepreset_name_to_scheme)is_module_quantizedmodule_type)	BaseModel
ConfigDictField)Module)QuantizationStatusQuantizationConfigLIFECYCLE_ORDERDEFAULT_QUANTIZATION_METHODDEFAULT_QUANTIZATION_FORMATc                   b    e Zd ZdZdZdZdZdZede	d          fd            Z
d Zd	 Zd
 Zd ZdS )r   a@  
    Enum storing the different states a quantized layer can be in

    Initialized: scale, zero points and observers have been attached to the layer but
    are set to dummy values (not yet calibrated)
    Calibration: scale and zero points have been calibrated through OBCQ or similar
    algorithm, observers are still attached
    Frozen: scale and zero points are finalized, observers have been deleted, weights
    are still in their original precision
    Compressed: weights have been converted to their target type or compressed to
    their closed approximation
    initializedcalibrationfrozen
compressedreturnc                     dS )zG
        :return: list of correct quantization lifecycle order
        N )clss    /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/compressed_tensors/quantization/quant_config.pylifecycle_orderz"QuantizationStatus.lifecycle_order9   s	    
 	    c                     |dS t          || j                  st          t                              |           t                              |          k    S NT
isinstance	__class__NotImplementedErrorr   indexselfothers     r%   __ge__zQuantizationStatus.__ge__@   sM    =4%00 	&%%$$T**o.C.CE.J.JJJr'   c                     |dS t          || j                  st          t                              |           t                              |          k    S r)   r*   r/   s     r%   __gt__zQuantizationStatus.__gt__G   sM    =4%00 	&%%$$T**_-B-B5-I-IIIr'   c                     |dS t          || j                  st          t                              |           t                              |          k     S NFr*   r/   s     r%   __lt__zQuantizationStatus.__lt__N   sM    =5%00 	&%%$$T**_-B-B5-I-IIIr'   c                     |dS t          || j                  st          t                              |           t                              |          k    S r6   r*   r/   s     r%   __le__zQuantizationStatus.__le__U   sM    =5%00 	&%%$$T**o.C.CE.J.JJJr'   N)__name__
__module____qualname____doc__INITIALIZEDCALIBRATIONFROZEN
COMPRESSEDclassmethodr   r&   r2   r4   r7   r9   r#   r'   r%   r   r   &   s           KKFJ%9 :    [K K KJ J JJ J JK K K K Kr'   r   zcompressed-tensors	fakequantc            	          e Zd ZU dZeeeeee         f         f         e	d<   e
Zee	d<   dZee         e	d<   eZee	d<   ej        Zee	d<   dZee         e	d<    ee	          Zeee                  e	d
<   dZee ed          f         e	d<   d Zd Ze	 ddedeeeef                  ded          fd            Z d Z! e"d
          Z#dS )r   a=  
    Full configuration specifying how a model is quantized. Each quantized layer is
    mapped to a QuantizationScheme in config_groups.

    :param config_groups: dict of QuantizationSchemes specifying the quantization
    settings for each quantized layer. A group could also be a reference to
    a predefined scheme name, mapped to a list of its target layers/classes
    :param quant_method: a constant used to differentiate compressed-tensors
    quantization from other quantization configs
    :param format: specifies how the quantized model is stored on disk
    :quantization_status: specifies the current status of all quantized layers. It is
        assumed all layers are in the same state.
    :param kv_cache_scheme: optional QuantizationArgs, that specify the
        quantization of the kv cache. If None, kv cache is not quantized.
        When applying kv cache quantization to transformer AutoModelForCausalLM,
        the kv_cache_scheme gets converted into a QuantizationScheme that:
            - targets the `q_proj` and `k_proj` modules of the model. The outputs
              of those modules are the keys and values that might be cached
            - quantizes the outputs of the aformentioned layers, so that
              keys and values are compressed before storing them in the cache
        There is an explicit assumption that the model contains modules with
        `k_proj` and `v_proj` in their names. If this is not the case
        and kv_cache_scheme != None, the quantization of kv cache will fail
    :global_compression_ratio: optional informational config to report the model
        compression ratio acheived by the quantization config
    :ignore: optional list of layers to ignore from config_groups. Layers in this list
        are not quantized even if they match up with a target in config_groups
    config_groupsquant_methodNkv_cache_schemeformatquantization_statusglobal_compression_ratio)default_factoryignoreT)excluderun_compressedc                     | j                                         D ]4\  }}t          |t                    rt	          ||          | j         |<   5dS )zh
        updates any quantization schemes defined as presets to be fully loaded
        schemes
        )nametargetsN)rE   itemsr+   r   r   )r0   _QuantizationConfig__context
group_nametargets_or_schemes       r%   model_post_initz"QuantizationConfig.model_post_init   sn    
 .2-?-E-E-G-G 	 	)J)+-?@@ -B). . .Dz**	 	r'   c                 *    |                                  S N)
model_dump)r0   s    r%   to_dictzQuantizationConfig.to_dict   s       r'   modelr!   c                 j   ddl m} ddlm} t	                      }d}t                      }t          t                    }d}|                                 D ]\  }	}
t          |
          }t          |
          o ||
           pt          |
|          }t          |
          o
 ||
          }|rIt          |
d|          }|                    |           |
j        |vr|                    |
j                   |rt          |
d|          }|
j        j        }|s$||vrg ||<   ||                             |	           t!          |          dk    r|dS g }|                                D ]\  }}||v r||z  }i }t%          |          D ]\  }}dt'          |          z   }|||<   |4|t(          j        k    rt,          j        j        }nSt,          j        j        }nAt5          |t                    r,t!          |          dk    rt,          j        j        n|d         }t9          |||d||          S )	a  
        Converts a model into its associated QuantizationConfig based on the
        QuantizationScheme attached to each quantized module

        :param model: model to calculate quantization scheme of
        :return: filled out QuantizationScheme for the input model
        r   )	IMPL_ATTR)is_attention_moduleNrI   group_   )rE   rI   rG   rJ   rH   rL   )compressed_tensors.modelingr]   4compressed_tensors.quantization.lifecycle.initializer^   listsetr   named_modulesr   r   hasattrgetattraddquantization_schemeappendinput_activationslenrR   	enumeratestrr   rA   r   int_quantizedvaluedenser+   mixed_precisionr   )r[   rH   r]   r^   quantization_schemesmodel_statusquantization_type_namesrL   rG   rP   	submodule
layer_typehas_config_grouphas_kv_cacheconsolidated_ignoreignore_namesrE   idxschemerT   s                       r%   from_pretrainedz"QuantizationConfig.from_pretrained   s    	:99999	
 	
 	
 	
 	
 	
 :>  -0EE (34'8'8 7;$2244 	0 	0OD)))44J  39==  ''	222Sgi6S6S  /y99 >Q>Q? ?L   O&y2GVV'++J77708LLL(//	0MNNN R&y2GVV"+"?"Q# 0V++)+F:&z"))$/// $%%**/F4 !(. 	4 	4$J444#|3#
 $%9:: 	/ 	/KC!CHH,J(.M*%% >1<<<*8>*06%% 	 v;;?? "177AY  "' ,+%)&
 
 
 	
r'   c                     | j         dS | j                                        D ]?\  }}|j        |j        j        dt
          j        fv r dS |j        |j        j        s dS @dS )NTF)rG   rE   rR   rk   dynamicr   LOCALoutput_activations)r0   _r}   s      r%   requires_calibration_dataz,QuantizationConfig.requires_calibration_data  s    +4+1133 	  	 IAv'3+3{?P7QQQ44(408  44ur'   )extrarX   )$r:   r;   r<   r=   r   rn   r   r   r   __annotations__r   rF   rG   r	   r   r   rH   r   r>   rI   rJ   floatr   rc   rL   rN   r   r   rV   rZ   staticmethodr   r~   r   r   model_configr#   r'   r%   r   r   h   s         : U#5tCy#@AABBBB3L#33326OX./666-FC---.@.L+LLL04huo444"'%"="="=FHT#Y=== ;?NIc55#6#6#667>>>  ! ! ! <@f
 f
f
'c4i(89f
	&	'f
 f
 f
 \f
P   :H---LLLr'   r   N)(collectionsr   enumr   typingr   r   r   r   r	   r
   r   compressed_tensors.configr   *compressed_tensors.quantization.quant_argsr   r   ,compressed_tensors.quantization.quant_schemer   r   %compressed_tensors.quantization.utilsr   r   pydanticr   r   r   torch.nnr   __all__rn   r   r>   r?   r@   rA   r   r   r   r   r#   r'   r%   <module>r      s   $ # # # # #       C C C C C C C C C C C C C C C C C C 7 7 7 7 7 7 T T T T T T T T        S R R R R R R R 1 1 1 1 1 1 1 1 1 1        4K 4K 4K 4K 4Kd 4K 4K 4Kp ""!	 3 ) r. r. r. r. r. r. r. r. r. r.r'   