
    *`i&                        d dl mZ d dlmZ d dlmZmZmZ d dlmZ d dlm	Z	 d dl
Z
d dlmZ d dlmZmZ d dlmZmZ d d	lmZ d d
lmZmZ d dlmZ d dlmZ d dlmZmZmZ d dl m!Z! d dl"m#Z# d dl$m%Z% d dl&m'Z' d dl(m)Z) ddgZ*d dl+m,Z, d dl"m-Z- 	 	 d,de)dee.         dee/         fdZ0	 d-de)de	edf         de/fdZ1de
j2        j)        ded efd!Z3d"e.d#e.d$e)d%efd&Z4d'ee.ef         d(ee.         d)e.d*efd+Z5dS ).    )OrderedDict)deepcopy)DictListOptional)UnionN)CompressionFormat)initialize_hooked_attentioninitialize_hooked_kv_cache)"initialize_module_for_quantizationis_attention_module)QuantizationArgs)QuantizationConfigQuantizationStatus)QuantizationScheme)replace_module)is_narrow_matchmatch_named_modulesmatch_targets)update_parameter_data)get_safetensors_folder)logger)	safe_open)Module'load_pretrained_quantization_parametersapply_quantization_config)is_module_quantized)*get_quantization_parameter_to_path_mappingFmodelmodel_name_or_pathload_weight_qparamsc                 `   t          |          }t          |          }|                                 D ]z\  }}t          |          s|j        j        d}t          ||||           |j        j        d}t          ||||           |r!|j        j        rd}t          ||||           {dS )az  
    Loads the quantization parameters (scale and zero point) from model_name_or_path to
    a model that has already been initialized with a quantization config.

    NOTE: Will always load inputs/output parameters. Will conditioanlly load weight
    parameters, if load_weight_qparams is set to True.

    :param model: model to load pretrained quantization parameters to
    :param model_name_or_path: Hugging Face stub or local folder containing a quantized
        model, which is used to load quantization parameters
    :param load_weight_qparams: whether or not the weight quantization parameters
        should be loaded
    Ninput)	base_namemodule_namemodulemappingoutputweight)	r   r   named_modulesr   quantization_schemeinput_activations_load_quant_args_from_mappingoutput_activationsweights)r   r    r!   
model_pathr'   name	submoduler$   s           /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/compressed_tensors/quantization/lifecycle/apply.pyr   r   =   s   $ ((:;;J8DDG ..00  i"9-- 	(:FI)#  	    (;G I)#  	     	9#@#H 	 I)#  	   -     configrun_compressedc                 6   ddl m} t          |          }|t                      S |j        t
          j        k    }|j        t          | |j        |j                   t                      }|j
                                        D ]}|j        D ]}|||<   t          | ||j        d          D ]\  }}	t          ||	|          }
t!          ||
|          }||	_        |rht%          |	t&          j        j                  rI|j        t.          j        j        k    r/|                    |	||j                  }t7          | ||           nFt9          |	          r&t;          | |j        |          rt=          | |	           t?          |	|           |j        |	_        dS )an  
    Initializes the model for quantization in-place based on the given config.
    Optionally coverts quantizable modules to compressed_linear modules

    :param model: model to apply quantization config to
    :param config: quantization config
    :param run_compressed: Whether the model will be run in compressed mode or
        decompressed fully on load
    r   )CompressedLinearNT)warn_on_fail)r+   quantization_formatforce_zero_point) +compressed_tensors.linear.compressed_linearr8   r   dictquantization_statusr   
COMPRESSEDkv_cache_scheme_apply_kv_cache_schemer   config_groupsvaluestargetsr   ignorer   _scheme_from_targetsr+   
isinstancetorchnnLinearformatr	   densevaluefrom_linearr   r   r   r
   r   )r   r5   r6   r8   r<   target_to_schemeschemetargetr1   r2   matched_targetscompressed_linears               r3   r   r   p   s    MLLLLLfF~vv 15G5RR )6)6+E	
 	
 	
 #}}&--// . .n 	. 	.F'-V$$	. /T   $C $Ci
 (i9IJJ%&6NN(.	%
 	9eho66	 !2!8!>>> !1 < <$*$*M != ! !
 5$(9:::: #9-- >/v~t3 3 > ,E9===.!1   
 )/(B	%%I$C $Cr4   rA   statusc                 
   |j         st          j        d          t          dg|          }|                                 D ]@}t          |          r/||_        t          | |           t          |d           ||_	        Ad S )Nz6vLLM does not support asymmetric kv cache quantizationz.*self_attn$)rE   r,   Fr;   )
	symmetricr   warningr   modulesr   r+   r   r   r?   )r   rA   rU   rQ   r2   s        r3   rB   rB      s    
 $ WnUVVV
   )  F ]]__ 3 3	y)) 	3,2I)&ui888.y5QQQQ,2I)3 3r4   r$   r%   r&   r'   c                    |  d}|  d}|  d}|                     | d| d          }|                     | d| d          }|                     | d| d          }	|	Ut          |	dd          5 }
|
                    | d|           }ddd           n# 1 swxY w Y   t          |||           |t          |dd          5 }
|
                    | d|           }ddd           n# 1 swxY w Y   t          |||           |t	          j        |d	          }nDt          |dd          5 }
|
                    | d|           }ddd           n# 1 swxY w Y   t          |||           dS dS )
a  
    Loads scale and zero point from a state_dict into the specified module

    :param base_name: quantization target, one of: weights, input_activations or
    output_activations
    :param module_name: pytorch module name to look up in state_dict
    :module: pytorch module associated with module_name
    :mapping: mapping to search fetch paths on disk for a given parameter
    _scale_zero_point_g_idx.Nptcpu)	frameworkdevice)rb   )getr   
get_tensorr   rI   
zeros_like)r$   r%   r&   r'   
scale_namezp_name
g_idx_namestate_dict_scale_pathstate_dict_zp_pathstate_dict_g_idx_pathfstate_dict_g_idxstate_dict_scalestate_dict_zps                 r3   r-   r-      s    %%%J'''G%%%J#KK;(E(E(E(EtLL %?%?g%?%?FF#KK;(E(E(E(EtLL(,UKKK 	Kq ||{,I,IZ,I,IJJ	K 	K 	K 	K 	K 	K 	K 	K 	K 	K 	K 	K 	K 	K 	K 	f&6
CCC(,UKKK 	Kq ||{,I,IZ,I,IJJ	K 	K 	K 	K 	K 	K 	K 	K 	K 	K 	K 	K 	K 	K 	K 	f&6
CCC%!,-=eLLLMM-eLLL IPQ !-G-Gg-G-G H HI I I I I I I I I I I I I I I 	fmW===== )(s6   5BB #B C33C7:C7:E!!E%(E%rP   rE   r1   returnc                     | |d                  S )Nr    )rP   rE   r1   s      r3   rG   rG      s     GAJ''r4   )NF)F)6collectionsr   copyr   typingr   r   r   OrderedDictTyper   rI   compressed_tensors.configr	   compressed_tensors.modelingr
   r   4compressed_tensors.quantization.lifecycle.initializer   r   *compressed_tensors.quantization.quant_argsr   ,compressed_tensors.quantization.quant_configr   r   ,compressed_tensors.quantization.quant_schemer    compressed_tensors.utils.helpersr   compressed_tensors.utils.matchr   r   r    compressed_tensors.utils.offloadr   )compressed_tensors.utils.safetensors_loadr   logurur   safetensorsr   torch.nnr   __all__-compressed_tensors.quantization.utils.helpersr   r   strboolr   r   rJ   rB   r-   rG   rr   r4   r3   <module>r      s]   $ # # # # #       ' ' ' ' ' ' ' ' ' ' 1 1 1 1 1 1        7 7 7 7 7 7               H G G G G G        L K K K K K ; ; ; ; ; ;         
 C B B B B B L L L L L L       ! ! ! ! ! !       .
 N M M M M M      )-*/0 00 0 "$0 0 0 0h TYGC GCGC !3T!9:GCLPGC GC GC GCT38?3%3 3 3 3 3.)>)>!$)>.4)>?C)> )> )> )>X(%c+=&=>(#Y( ( 	( ( ( ( ( (r4   