
    *`ih?                        U d dl Z d dlZd dlmZmZmZ d dlZd dlmZm	Z	m
Z
mZmZmZmZ d dlmZ d dlmZmZmZ d dlmZ d dlmZ d dlmZmZmZ d d	lmZ g d
ZdgZ e j         e!          Z"e j#        e$d<   	 d7dedededee         deeef         f
dZ%	 d7dededej&        j        dee         fdZ'dede(defdZ)dede*fdZ+dede*fdZ,dede(fdZ- ed          dedeee(ef         ddf         fd            Z. ed          	 	 	 d8ded"e*d#e*d$e*deee(ef         ddf         f
d%            Z/dej        de0fd&Z1dej        d'd(de*fd)Z2 e            d*ede*fd+            Z3e	eej4        fd,ej        d-ej        d.ee
         d/ee
         d0eej5                 f
d1Z6	 d9de0d2e0d3ee         d4e*de0f
d5Z7d0ej5        de8fd6Z9dS ):    N)	GeneratorOptionalTuple)FP4_E2M1_DATAFP8_E4M3_DATA	FloatArgsQuantizationArgsQuantizationStrategyQuantizationTyperound_to_quantized_type_dtype)QuantizationScheme)generate_mxfp4_scalesmaybe_convert_from_mxfp4_expshould_generatre_mxfp4_scales)
deprecated)logger)FloatTensor	IntTensorTensor)Module)is_module_quantizedis_model_quantizedmodule_typeget_torch_bit_depthcan_quantizeKV_CACHE_TARGETSis_kv_cache_quant_schemeiter_named_leaf_modulesiter_named_quantizable_modulescompute_dynamic_scales_and_zpcalculate_rangecalculate_qparamsgenerate_gparamstrategy_cdivzre:.*self_attn$_LOGGERmin_valsmax_valsquantization_argsglobal_scalereturnc                    t          j        | t          j        |                     } t          j        |t          j        |                    }| j        }t          ||          \  }}||z
  }|j        rt          j        t          j        |           t          j        |                    }t          |          rt          |          }	n|t          |          dz  z  }	t          j        |	j        || j                  }
nb|j        dk    r$|j        t           j        k    rt%          d          || z
  t          |          z  }	|| |	z  z
  }
t          j        |
||          }
|||	z  }	|j        t+          |	|j                  }	t-          ||	          }	t/          |j        |j        n|	j                  }t          j        |	d	k    t          j        ||	j        |
          |	          }	t+          |
|j        d          }
|	j        d	k    r*|	                    d          }	|
                    d          }
|	|
fS )a  
    :param min_vals: tensor of min value(s) to calculate scale(s) and zero point(s)
        from
    :param max_vals: tensor of max value(s) to calculate scale(s) and zero point(s)
        from
    :param quantization_args: settings to quantization
    :param global_scale: additional global scale to scale the locally generated scale
        currently only applied/supported for Fp4

    :return: tuple of the calculated scale(s) and zero point(s). For FP4, the calculated
        scale is of dtype FP8
    )args)x   )devicedtype   z0Asymmetric Quantization is not supported for FP4Nr0   r   )r0   r/   F)r0   cast_to_original_dtype   )torchmin
zeros_likemaxr/   r!   	symmetricabsr   r   floatzerosshaper0   num_bitstyper   FLOATNotImplementedErrorclampscale_dtyper   r   _get_dtype_epswheretensorzp_dtypendimreshape)r&   r'   r(   r)   r/   bit_minbit_max	bit_rangemax_val_posscaleszero_pointsepss               /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/compressed_tensors/quantization/utils/helpers.pyr"   r"   A   sm   ( y5#3H#=#=>>Hy5#3H#=#=>>H_F&'8&AAGW'!I " Ai	( 3 3UYx5H5HII(.?@@@ 	:*[999FF E)$4$4q$89Fk&,vX^TTT &!++!&*:*@@@%B   X%y)9)99F!23k+w@@ & $0.+7
 
 

 **;VDDF (4  ++\  C
 [!SV<<< F 0,5e  K {a""!))!,,;    valuer,   modulec                   
 d}|j         t          j        k    r2ddh
t          
fdt	          | j                  D                       }n|j         t          j        k    rd}n|j         t          j        t          j        fv rId}d}t          j
        | j        d         |j        z            |j        f}|                     d|          } n?t          j        t          j        t          j        t          j        f}t          d|           |st          j        |           \  }}	n.t          j        | ||	          }t          j        | ||	          }	t'          ||	||
          S )a  
    Returns the computed scales and zero points for dynamic activation
    quantization.

    :param value: tensor to calculate quantization parameters for
    :param args: quantization args
    :param reduce_dims: optional tuple of dimensions to reduce along,
        returned scale and zero point will be shaped (1,) along the
        reduced dimensions
    :return: tuple of scale and zero point derived from the observed tensor
    Tr   r4   c              3   $   K   | ]
}|v|V  d S N ).0idxdims     rQ   	<genexpr>z0compute_dynamic_scales_and_zp.<locals>.<genexpr>   s'      OOC3COOrR   NFz+Dynamic quantization is only supported for )r[   keepdims)r)   )strategyr
   TOKENtuplerangerH   TENSORTENSOR_GROUPGROUPmathceilr=   
group_size	unflatten
ValueErrorr5   aminmaxaminamaxr"   )rS   r,   rT   r)   	keep_dimsreduce_dimsreshaped_dimssupported_strategiesmin_valmax_valr[   s             @rQ   r    r       sx   $ I},222!fOOOO5+<+<OOOOO	.5	5	5	)" 
 

 	 Iek"o788O
 M22 !& ' - &	 
 9#%
 
 	

  I =//*UiHHH*UiHHHWgt,OOOOrR   r/   c                 \   | j         t          j        k    rAd| j        z  }t	          j        |dz  dz
  |          }t	          j        | dz  |          }n| j         t          j        k    r| j        dk    rAt	          j        t          j        |          }t	          j        t          j	        |          }nr| j        dk    rAt	          j        t          j        |          }t	          j        t          j	        |          }n&t          d          t          d| j                    ||fS )a  
    Calculated the effective quantization range for the given Quantization Args

    :param quantization_args: quantization args to get range of
    :param device: device to store the range to
    :return: tuple endpoints for the given quantization range
    r.   r4   )r/      r1   z1Range calculation only supported for 4 and 8 bitszInvalid quantization type )r?   r   INTr>   r5   rF   r@   r   r8   r6   r   rA   rj   )r(   r/   rL   q_maxq_mins        rQ   r!   r!      s'    !1!555(11	Y]Q.v>>>iZ!^F;;;		#3#9	9	9%**L!26BBBEL!26BBBEE'1,,L!26BBBEL!26BBBEE%C   N6G6LNNOOO%<rR   c                 ~    t          | d          sdS | j        j        dS | j        j        dS | j        j        dS dS )z
    Check if a module is quantized, based on the existence of a non-empty quantization
    scheme

    :param module: pytorch module to check
    :return: True if module is quantized, False otherwise
    quantization_schemeFNT)hasattrrz   weightsinput_activationsoutput_activationsrT   s    rQ   r   r      sT     6011 u!)5t!3?t!4@t5rR   modelc                 X    t          d |                                 D                       S )z
    Check if any modules in a model are quantized, based on the existence of a non-empty
    quantization scheme in at least one module

    :param model: pytorch model
    :return: True if model is quantized, False otherwise
    c              3   4   K   | ]}t          |          V  d S rW   )r   )rY   	submodules     rQ   r\   z%is_model_quantized.<locals>.<genexpr>  s+      OO)"9--OOOOOOrR   )anymodules)r   s    rQ   r   r     s)     OOu}}OOOOOOrR   c                 *    t          |           j        S )z
    Gets a string representation of a module type

    :module: pytorch module to get type of
    :return: module type as a string
    )r?   __name__r   s    rQ   r   r     s     <<  rR   zThis function will be removed in a future release. Please use `model.named_modules()` and filter by compressed_tensors.InternalModule if neceessary)messagec              #     K   |                                  D ]\  }}t          |                                          }t          |          dk    rd|v r||fV  Dt          |          dk    r+t	          t          |                                           \  }}d}t          t          |                    D ]}||         }d|vrd}|s||fV  dS )z
    Yields modules that do not have any submodules except observers. The observers
    themselves are not yielded
    :param model: model to get leaf modules of
    :returns: generator tuple of (name, leaf_submodule)
    r   observerFTN)named_moduleslistchildrenlenzipnamed_childrenrb   )r   namer   r   r   has_non_observer_childreni
child_names           rQ   r   r     s      !..00 & &i	**,,--x==A*"4"4	/!!!!8}}q  +.Y5M5M5O5O0P0P+Q((-%3x==)) 5 5+A.
Z//04-, &Io%%%!& &rR   TFinclude_childreninclude_attninclude_mlpc              #     K   |                                  D ]\  }}|rt          |                                          }t          |          dk    rd|vr||fV  nut          |          dk    r+t	          t          |                                           \  }}d}t          t          |                    D ]}	||	         }
d|
vrd}|s||fV  |r|                    d          r||fV  |r|                    d          r||fV  dS )aU  
    Yield name and submodule of
    - leaf modules, set by include_children
    - attention modyles, set by include_attn
    :param model: model to get leaf modules of
    :param include_children: flag to get the leaf modules
    :param inlcude_attn: flag to get the attention modules
    :returns: generator tuple of (name, submodule)
    r   r   FT	self_attnmlpN)r   r   r   r   r   r   rb   endswith)r   r   r   r   r   r   r   r   r   r   r   s              rQ   r   r   :  sd     ( !..00 & &i 	*I..0011H8}}!!j&<&<Io%%%%x==1$$/2D9Q9Q9S9S4T4T/U,NH,1)s8}}-- 9 9A!/!2J!334810 *	/))) 	&}}[)) &Io%%% 	&}}U## &Io%%%/& &rR   c                     	 t          j        | j                  j        }n.# t          $ r! t          j        | j                  j        }Y nw xY w|S )z
    Determine the number of bits used to represent the dtype of a tensor

    :param value: tensor to check bit depth of
    :return: bit depth of each element in the value tensor
    )r5   finfor0   bits	TypeErroriinfo)rS   	bit_depths     rQ   r   r   h  sX    2K,,1		 2 2 2K,,1			2 s   ! (AA
quant_argsr	   c                     t          |           }|j        }||j        k     r!t                              d| d| d           ||j        k    S )aI  
    Checks if value can be quantized by quant_args.

    :param value: tensor to check for quantization
    :param quant_args: QuantizationArgs to use for quantization
    :return: False if value is already quantized to quant_args or value is incompatible
    with quant_args, True if value can be quantized with quant_args
    z%Can't quantize tensor with bit depth z to zH.The QuantizationArgs provided are not compatible with the input tensor.)r   r>   r%   warn)rS   r   r   requested_depths       rQ   r   r   w  su     $E**I )O:&&&VI V V? V V V	
 	
 	

 z***rR   schemec                 2    | j         D ]}|t          v r dS dS )a
  
    Check whether the QuantizationScheme targets the kv cache.
    It does if all the following criteria are met:
    - the scheme targets either exactly match the KV_CACHE_TARGETS
        or the match KV_CACHE_TARGETS regex pattern
    - the scheme quantizes output_activations (we want to quantize the
        outputs from the KV_CACHE_TARGETS, as their correspond to the
        keys and values that are to be saved in the cache)

    :param scheme: The QuantizationScheme to investigate
    :return: boolean flag
    TF)targetsr   )r   targets     rQ   r   r     s4     .  %%%44 & 5rR   updated_min_valupdated_max_val
scale_data
quant_datar0   c                    t          j        | t          j        |                     }t          j        |t          j        |                    }t          j        t          j        |          t          j        |                    }|j        |j        z  |z  }|                    |                              dg          S )ah  
    Generate a global scale for an entire tensor (input_tensor).
    Goal of the scale is to ensure that the quantization (local) scale
    falls into the approproiate dtype range.

    E.g. for NVFP4, group (local) scales are in dtype FP8. The global_scale
    attempts to use the entire FP8 dtype range while mapping a per-group max
    to the FP4 max.
    r4   )r5   r6   r7   r8   r:   torI   )	r   r   r   r   r0   r&   r'   rM   r)   s	            rQ   r#   r#     s      y%*:?*K*KLLHy%*:?*K*KLLH)EIh//81D1DEEK>JN2[@L??5!!))1#...rR   divisorr_   strictc                     t          j        | |z            }||z  | k    rF| d|  d| d| }|rt          |          t          j        d                              |           |S )NzJ quantization strategy requires strict division of weight/activation size z and group/block size z[. consider reducing the group/block size or ignoring modules with weights not divisible by T)log_once)rf   rg   rj   r   bindwarning)rS   r   r_   r   dividendr   s         rQ   r$   r$     s     y))H'U"" 2 2&+2 2CJ2 2 )02 2 	  	8W%%% K&&&..w777OrR   c                     | t           j        k    rdS | t          j        k    rdS t          j        t          j        g |                     rt          j        |           j        S dS )Ng      ?g      ?r2   r4   )r   r0   r   r5   is_floating_pointrF   r   rP   r2   s    rQ   rD   rD     sb    ###u	-%	%	%t		 b!>!>!>	?	? {5!!%%qrR   rW   )TFF)F):loggingrf   typingr   r   r   r5   *compressed_tensors.quantization.quant_argsr   r   r   r	   r
   r   r   ,compressed_tensors.quantization.quant_schemer   1compressed_tensors.quantization.utils.mxfp4_utilsr   r   r   compressed_tensors.utilsr   logurur   r   r   r   torch.nnr   __all__r   	getLoggerr   r%   Logger__annotations__r"   nnr    strr!   boolr   r   r   r   r   intr   r   r   float32r0   r#   r$   r;   rD   rX   rR   rQ   <module>r      s     - - - - - - - - - -                   L K K K K K         
 0 / / / / /       0 0 0 0 0 0 0 0 0 0        & && +'+H55 5 5 5 &*	S SSS (S 6"	S
 ;	!"S S S St &*	8P 8P8P
8P HO8P 6"	8P 8P 8P 8Pv'7      : 4    .Pf P P P P P! !3 ! ! ! ! 6  
&6 &ic6k8JDRV8V.W & & & 
&4 6   "	&& &&&&&& && 	&&
 uS&[!4-.&& && && 
&&Ru|     + +2D + + + + +( %7 D    . '4&3#(=/ /\/\/ #/ #	/
 EK / / / /6 	  +, 	
 	   .%+ %      rR   