
    *`iw/                     r   d dl Z d dlmZmZmZ d dlZd dlmZmZm	Z	m
Z
 d dlmZmZmZmZmZmZmZ d dlmZ d dlmZ d dlmZmZmZmZmZmZ d dlmZm Z  g d	Z! e j"        e#          Z$	 	 ddedee         de%fdZ&defdZ'	 ddede(dedeee)df                  dej*        de%fdZ+dedede%fdZ,defdZ-dS )    N)OptionalTupleUnion)	IMPL_ATTRKV_CACHE_ATTRQuantizedAttentionImplQuantizedKVCache)ActivationOrderingDynamicTypeQuantizationArgsQuantizationMetadataQuantizationSchemeQuantizationStatusQuantizationStrategy)wrap_module_forward_quantized)strategy_cdiv)disable_hf_hookget_execution_deviceget_head_dimget_num_attn_headsget_num_kv_headsregister_offload_parameter)Module	Parameter)"initialize_module_for_quantizationis_attention_moduleinitialize_qparamsinitialize_attn_qparamsTmoduleschemeforce_zero_pointc                    |pt          | dd          }|dS t          j        |            t          |           rt	          | ||           nt          | t          j        j                  s*t          
                    dt          |                       t          | d          r$| j        }t          |t          j                  sJ n<t          
                    dt          |            dt          |                       dS |j        ,t!          | d|j        |j        dd         |j        |	           |j        $t!          | d|j        |j        |j        |	           |j        ,t!          | d
|j        |j        dd         |j        |	           t+          |           5  t-          | |           ddd           n# 1 swxY w Y   || _        t0          j        | _        dS )aa  
    Attaches appropriate scales, zero points, and observers to a layer
    given its target quantization scheme.

    Previously initialized scales and zero points will be removed from
    module if they no longer apply to the scheme

    :param module: module to set for calibration
    :param scheme: scheme to use for quantization. if None is provided,
        will attempt to use scheme stored in the module under `quantization_scheme`,
        if not provided, the layer will be skipped
    :param force_zero_point: whether to force initialization of a zero point for
        symmetric quantization
    quantization_schemeNz&Attempting to quantize module of type weightzmodule type zR targeted for quantization but has no attribute weight, skipping quantization for inputobserved_shapeobserved_dtyper!   output)getattrr   clear_all_qparamsr   r   
isinstancetorchnnLinear_LOGGERwarningtypehasattrr$   Tensorinput_activationsr   shapedtypeweightsoutput_activationsr   r   r#   r   INITIALIZEDquantization_status)r   r    r!   r$   s       /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/compressed_tensors/quantization/lifecycle/initialize.pyr   r   =   sk   & Cwv'<dCCF~*62226"" 6:0@AAAA &%(/22 	UOOST&\\SSTTT 68$$ 
	]Ffel333333 OOUtF|| U UFJ6llU U   F#/(%|BCC0%|!1    >%%|%|!1    $0)%|CRC0%|!1    V$$ 	: 	: *&&999	: 	: 	: 	: 	: 	: 	: 	: 	: 	: 	: 	: 	: 	: 	:
 "(F!3!?Fs   'GGGc                     d| j         j                                        v o/t          | d          pt          | d          pt          | d          S )N	attentionk_projv_projqkv_proj)	__class____name__lowerr4   )r   s    r=   r   r      sW    &*399;;; !! 	'68$$	'6:&&    	base_namequantization_argsr(   r)   c                    |j         }|j        }|j        }t          |           }	|du rdS |t          j        k    rDt          t          j        dt          j	        |	          d          }
t          | | d|
           |t          j        k    rdS |t          j        k    rd}n|t          j        k    rt          d	          |t          j        k    r.t#          |          d
k     rt          d          |d         df}n|t          j        t          j        fv r|j        J t#          |          dk     rt          d          |j        }t)          |d         ||          }g |dd         |R }|t*          j        k    rLt          t          j        |d         fd|	t          j                  d          }t          | | d|           n|t          j        k    rq|j        J t#          |          d
k     rt          d          |j        }t)          |d         |d         |          }t)          |d         |d         |          }||f}nK|t          j        k    r.t#          |          dk     rt          d          |d         ddf}nJ d|             |}|t          j        t          j        t          j	        t          j        fvrt          j        }t          t          j        |||	          d          }t          | | d|           |s|j        sAt          t          j        ||	|j                   d          }t          | | d|           dS dS )a  
    Initialize quantization parameters for a given basename according to the passed
    quantization args. The shape and dtype of the observed weight/activation must also
    be provided.

    Scales will always be initialized. Global scales are initialized depending on args.
    Zero points will be initialized if not symmetric or if `force_zero_point` is True.

    :param module: module to register qparams to
    :param base_name: base name of qparams, for example "input", "weight", "k", "v"
    :param quantization_args: arguments for quantization
    :param observed_shape: last (right-most) known dimensions of the observed weight/act
    :param observed_dtype: dtype of the observed weight/actt
    :param force_zero_point: force the zero_point parameter to be initialized
    TN   )r8   deviceF)requires_grad_global_scale)rJ   z(Cannot perform static token quantization   z5Channel quant requires at least 2 observed dimensionsz2Group quant requires at least 1 observed dimensionr&   )rK   r8   _g_idxz3Block quant requires at least 2 observed dimensions   z7Attention quant requires at least 3 observed dimensionszUnknown strategy _scale_zero_point)!strategydynamicactorderr   r   TENSOR_GROUPr   r.   emptyfloat32r   r   LOCALTENSORTOKEN
ValueErrorCHANNELlenGROUP
group_sizer   r
   fullintBLOCKblock_structure	ATTN_HEADfloat16bfloat16float64	symmetriczeroszp_dtype)r   rG   rH   r(   r)   r!   rU   rV   rW   rK   init_global_scaleexpected_shaperb   
num_groups
init_g_idxrf   num_rowsnum_colsscale_dtype
init_scaleinit_zero_points                        r=   r   r      s   . !)H'G )H!&))F $ '444%Kv>>>
 
 
 	#y///1B	
 	
 	

 +### '...	)/	/	/CDDD	)1	1	1~""TUUU(,a0	*02F2ST	T	T +777~""QRRR&1
">"#5z8LL
;>#2#.;
;; )///"
N2.0"V59UUU#  J 'v)/C/C/CZPPP	)/	/	/ 0<<<~""RSSS+; !3_R5H(SS !3_R5H(SS"H-	)3	3	3~""VWWW(,a3 	54(4444u !K	   m N+fEEE  J v)';';';ZHHH W0: W#Kv5F5O    	
 
 
 	#6i+D+D+DoVVVVVW WrF   c                 4   t          | t          d          }t          | t          d          }|"| t          dt           dt           d          t	          |           |j        }t          |          }t          |          }t          |          }|d|f}	|d|f}
t          | 
                                          j        }|t          | d|j        |	||           |6t          | d|j        |
||           t          | d|j        |
||           dS dS )	z(Initlaize k_scale, v_scale for self_attnNz0Attention module has quantization scheme but no z or zc attributes. Please ensure that these attributes are initialized using `apply_quantization_config`.qr'   kv)r+   r   r   r^   _validate_attention_schemeconfigr   r   r   next
parametersr8   r   r6   )r   r    r!   implkv_cacher|   num_attn_headsnum_kv_headshead_dimq_observed_shapekv_observed_shaper)   s               r=   r   r     s   
 .5VY-M-MD+26=$+O+OH|(Ly L LL L L
 
 	
 v&&& _F'//N#F++LF##H 'h7%tX6&++--..4N$+)-	
 	
 	
 	
 $,)-	
 	
 	
 	
 	$,)-	
 	
 	
 	
 	
 	
 rF   c                     | j         t          d          | j        t          d          | j        t          d          d S )NzmCannot apply weight quantization to attention. Instead, target the (q|k|v)_proj submodule layers of attentionzHCannot apply attention quantization without specifying input activationsz-Cannot apply output quantization to attention)r9   r^   r6   r:   )r    s    r=   r{   r{   N  sd    ~!M
 
 	

 'V
 
 	
  ,HIII -,rF   )NT)T).loggingtypingr   r   r   r.   compressed_tensors.modelingr   r   r   r	   compressed_tensors.quantizationr
   r   r   r   r   r   r   1compressed_tensors.quantization.lifecycle.forwardr   %compressed_tensors.quantization.utilsr   compressed_tensors.utilsr   r   r   r   r   r   torch.nnr   r   __all__	getLoggerrD   r1   boolr   r   strrd   r8   r   r   r{    rF   r=   <module>r      s     ) ) ) ) ) ) ) ) ) )                                   @ ? ? ? ? ?                ' & & & & & & &   '
H
%
%
 ,0!R@ R@R@'(R@ R@ R@ R@ R@j     "xW xWxWxW (xW %T	*+	xW
 KxW xW xW xW xWv6
6
.6
BF6
 6
 6
 6
rJ'9 J J J J J JrF   