
    *`i=                     0   d dl Z d dlmZ d dlmZmZmZmZmZ d dl	Z	d dl
mZ d dlmZ d dlmZ d dlmZmZmZmZmZmZ g dZ G d	 d
          Z G d de          Z G d de          Z G d de          Z G d dee          Z G d dee          Z G d dee          Z G d deee          Z  G d ded          Z!	 d&de	j"        de	j#        dee$         d e	j"        fd!Z%	 d&de	j"        d"e!d#e	j"        d$e	j"        dee$         d e	j"        fd%Z&dS )'    N)Enum)AnyDictListOptionalUnion)	Aliasable)
deprecated)
TorchDtype)	BaseModel
ConfigDictFieldfield_serializerfield_validatormodel_validator)FP8_E4M3_DATAFP4_E2M1_DATABFLOAT16_DATA	FloatArgsQuantizationTypeQuantizationStrategyQuantizationArgsround_to_quantized_type_argsround_to_quantized_type_dtypeActivationOrderingDynamicTypec                       e Zd ZU eed<   eed<   dZee         ed<   dZee         ed<   dZ	ee         ed<   dZ
eej
                 ed<   dS )r   exponentmantissaNbitsmaxmindtype)__name__
__module____qualname__int__annotations__r    r   r!   floatr"   r#   torch     ~/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/compressed_tensors/quantization/quant_args.pyr   r   0   sw         MMMMMMD(3-C%C%#'E8EK '''''r,   r   c                   R    e Zd ZdZdZdZdZdZee	j
        d                         ZdS )r                  @g      c                 N   t          j        |           }t          j        |           } d| | dk    | dk    z  <   d| | dk    | dk     z  <   d| | dk    | dk    z  <   d| | dk    | dk     z  <   d	| | dk    | d
k    z  <   d| | d
k    | dk     z  <   d| | dk    | dk    z  <   d| | dk    <   | |z  S )Ng        g      ?g      ?g      ?g      ?g      ?g      ?g      ?g       @g      @g      @g      @g      @g      @r2   )r*   signabs)xr4   s     r-   cast_to_fp4zFP4_E2M1_DATA.cast_to_fp4@   s     z!}}IaLL&)18T	
"#%(1t8D
!"'*19d
#$%(1t8D
!"&)19c
"##&1s7q3w
 %(18S
!"!c'
4xr,   N)r$   r%   r&   r   r   r    r!   r"   staticmethodr*   compiler7   r+   r,   r-   r   r   9   sR        HHD
C
C
]  ] \  r,   r   c                       e Zd ZdZdZdZ ej        ej                  j	        Z	 ej        ej                  j
        Z
ej        ZdS )r   r1         N)r$   r%   r&   r   r   r    r*   finfofloat8_e4m3fnr!   r"   r#   r+   r,   r-   r   r   P   sS        HHD
%+e)
*
*
.C
%+e)
*
*
.CEEEr,   r   c                       e Zd ZdZdZdS )r   r<      N)r$   r%   r&   r   r   r+   r,   r-   r   r   Y   s        HHHHr,   r   c                       e Zd ZdZdZdZdS )r   z0
    Enum storing quantization type options
    r'   r)   N)r$   r%   r&   __doc__INTFLOATr+   r,   r-   r   r   ^   s$          CEEEr,   r   c                   .    e Zd ZdZdZdZdZdZdZdZ	dZ
d	S )
r   z4
    Enum storing quantization strategy options
    tensorchannelgroupblocktokentensor_group	attn_headN)r$   r%   r&   rB   TENSORCHANNELGROUPBLOCKTOKENTENSOR_GROUP	ATTN_HEADr+   r,   r-   r   r   g   s=          FGEEE!LIIIr,   r   c                       e Zd ZdZdZdS )r   aY  
    Enum storing potential dynamic types.

    1. If dynamic is True, all quantization parameters are generated on the fly.
    2. If dynamic is False, all quantization parameters generated are static.
    3. If "local" is provided, only local quantization parameters are dynamic.

    Note: "local" is only currently supported for NVFP4.

    localN)r$   r%   r&   rB   LOCALr+   r,   r-   r   r   u   s        	 	 EEEr,   r   c                   N    e Zd ZdZdZdZdZdZede	e
e
f         fd            ZdS )	r   a  
    Enum storing strategies for activation ordering

    Group: reorder groups and weight

    Weight: only reorder weight, not groups. Slightly lower accuracy but also lower
    latency when compared to group actorder

    Dynamic: alias for Group

    Static: alias for Weight

    rH   weightdynamicstaticreturnc                      dddS )NrH   rX   )rY   rZ   r+   r+   r,   r-   get_aliaseszActivationOrdering.get_aliases   s     
 
 	
r,   N)r$   r%   r&   rB   rO   WEIGHTDYNAMICSTATICr8   r   strr]   r+   r,   r-   r   r      sa          EFGF
c3h 
 
 
 \
 
 
r,   r   c                      e Zd ZU dZdZeed<   ej        Z	eed<   dZ
eed<   dZee         ed<   dZee         ed	<   dZeee                  ed
<   dZeeef         ed<   dZeeedf         ed<   dZee         ed<   dZee         ed<    edd          Zee         ed<    eed          Zeee f         ed<    e!d          de"j#        fd            Z$ e%dd          defd            Z& e%dd          deedf         fd            Z' e%d
d          deee                  fd            Z( e%d	d          deedf         fd            Z) e%dd          dee         fd            Z* e%dd          deeef         fd             Z+ e,d!          d)d#            Z-de"j#        fd$Z. e/d%          defd&            Z0 e1d'(          Z2dS )*r   aE  
    User facing arguments used to define a quantization config for weights or
    activations

    :param num_bits: quantization bit depth
    :param type: dtype to quantized to, either int or float
    :param symmetric: whether or not quantization scale is symmetric about zero-point
    :param strategy: string id determining the scope of scale/zero-point to apply
    :param group_size: group length to use for the group strategy
    :param block_structure: 2d block structure to use for the block strategy; must be
        a list of two ints [rows, cols] like [128, 128].
    :param dynamic: set True to perform dynamic quantization - values will not be
        calibrated during calibration phase, instead during inference new quantization
        ranges will be observed with every sample. Defaults to False for static
        quantization. Note that enabling dynamic quantization will change the default
        observer to a memoryless one
    :param actorder: whether to apply group quantization in decreasing order of
        activation. Defaults to None for arbitrary ordering
    r<   num_bitstypeT	symmetricN
group_sizestrategyblock_structureFrY   actorderscale_dtypezp_dtypezDetermines the method of computing quantization parameters (scales and zero-points). Defaults to min-max when not using dynamic quantization)defaultdescriptionobserverzoptional dict of kwargs to be passed directly to torch quantization Observers constructor excluding quantization range or symmetry)default_factoryrm   observer_kwargsr#   c                 2    | j         rd S t          |          S N)re   ra   )selfr#   s     r-   serialize_dtypez QuantizationArgs.serialize_dtype   s    > 	45zzr,   before)moder[   c                 r    t          |t                    r!t          |                                          S |S rr   )
isinstancera   r   lowerclsvalues     r-   validate_typezQuantizationArgs.validate_type   s/    eS!! 	3#EKKMM222r,   c                 @    ||S |dk     rt          d| d          |S )NInvalid group size K. Use group_size > 0 for strategy='group' and group_size = -1 for 'channel')
ValueErrorrz   s     r-   validate_groupzQuantizationArgs.validate_group   sH    =L2::Ee E E E  
 r,   c                    ||S t          |t                    rA	 d |                    d          D             S # t          $ r t	          d| d          w xY wt          |t
          t          f          rNt          |          dk    st          d |D                       st	          d| d          t          |          S t	          d| d          )Nc                 ,    g | ]}t          |          S r+   )r'   ).0r6   s     r-   
<listcomp>z=QuantizationArgs.validate_block_structure.<locals>.<listcomp>   s    9991A999r,   r6   zInvalid block_structure 'z''. Must be a list of ints [rows, cols].r/   c              3   @   K   | ]}t          |t                    V  d S rr   )rx   r'   )r   vs     r-   	<genexpr>z<QuantizationArgs.validate_block_structure.<locals>.<genexpr>   s,      )L)L*Q*<*<)L)L)L)L)L)Lr,   )	rx   ra   split	Exceptionr   listtuplelenallrz   s     r-   validate_block_structurez)QuantizationArgs.validate_block_structure   s   =LeS!! 	99C(8(89999    $ $ $ $  
 edE]++ 	5zzQc)L)Le)L)L)L&L&L $ $ $ $   ;;VVVV
 
 	
s	   : Ac                 r    t          |t                    r!t          |                                          S |S rr   )rx   ra   r   ry   rz   s     r-   validate_strategyz"QuantizationArgs.validate_strategy   s/    eS!! 	7'666r,   c                     t          |t                    r|rt          j        nd S t          |t                    r!t          |                                          S |S rr   )rx   boolr   rO   ra   ry   rz   s     r-   validate_actorderz"QuantizationArgs.validate_actorder  sU    eT"" 	?/4>%++$>eS!! 	5%ekkmm444r,   c                 r    t          |t                    r!t          |                                          S |S rr   )rx   ra   r   ry   rz   s     r-   validate_dynamicz!QuantizationArgs.validate_dynamic  s/    eS!! 	.u{{}}---r,   aftermodelc                    | j         }| j        }| j        }| j        }| j        }| j        }| j        }| j        }|H|t          j        }n9|dk    rt          j	        }n&|dk    rt          j
        }nt          d| d          |t          j        k    r|st          d          |t          j	        t          j        fv r||dk    rt          d| d          |/|dk    r)|t          j	        t          j        fvrt          d          |t          j        k    }|d u}	|r|	st          d	|            |	r|st          d
|            ||t          j	        k    rt          d          |rt          j        t          j        t          j        t          j	        f}
||
vrt          d|
 d          |t          j        k    r|t          j        k    rt          d          |!|du r|dk    rt#          j        d           d }n|t          j        k    rd}n|d}|A| j        dk    r"| j        t*          j        k    rt.          j        }n|                                 }|| _         || _        || _        | S )Nr   r   r   r   zCCannot perform static token quantization, please use `dynamic=True`z	strategy z2 requires group_size to be set to a positive valuez1group_size requires strategy to be set to 'group'z(Block strategy requires block structure
z(Block structure requires block strategy
zJMust use group quantization strategy in order to apply activation orderingzOne of z  must be used for dynamic quant.z1local is only supported for strategy tensor_groupT
memorylessz7No observer is used for dynamic quant., setting to Noneminmaxr1   )rg   rf   rh   ri   rY   rn   rk   r   rM   rO   rN   r   rQ   rR   rP   r   rV   warningswarnrc   rd   r   rD   r   r#   pytorch_dtype)r   rg   rf   rh   ri   rY   rn   rk   has_block_strategyhas_block_structuresupported_strategiess              r-   validate_model_afterz%QuantizationArgs.validate_model_after  s9    >%
/>->-> !/6a/5r!!/7 I* I I I   +111'1U  
 ,24H4UVVV!Z1__ . . . .  
 "Q(.0D0QRS S PQQQ &)=)CC-T9 	R&9 	RPPPQQQ 	R'9 	RPPPQQQ H0D0J$J$J&    !	 $*$+$1$*	$  333 T2TTT  
 ;,,, 4 AAA !TUUU#d?? L00 U    $Hk///'HH~""uz5E5K'K'K(. ..00 "!!r,   c                 D   | j         t          j        k    r&| j        dk    rt          j        S t          d          | j         t          j        k    r:| j        dk    rt          j	        S | j        dk    rt          j
        S t          j        S t          d| j                    )Nr<   z"Only num_bits in (8) are supported   Invalid quantization type )rd   r   rD   rc   r   r#   NotImplementedErrorrC   r*   int8int16int32r   rs   s    r-   r   zQuantizationArgs.pytorch_dtype  s    9(...}!!$**)*NOOOY*...}!!z!"$${"{"E$)EEFFFr,   zQuantizationArgs.observerc                     | j         S rr   )rn   r   s    r-   get_observerzQuantizationArgs.get_observer  s
    }r,   forbid)extra)r   r   r[   r   )3r$   r%   r&   rB   rc   r'   r(   r   rC   rd   re   r   rf   r   rg   r   rh   r   rY   r   r   ri   r   rj   r   rk   r   rn   ra   dictrp   r   r   r   r*   r#   rt   r   r}   r   r   r   r   r   r   r   r   r
   r   r   model_configr+   r,   r-   r   r      s         ( Hc-1D
111It $J$$$/3Hh+,333+/OXd3i(///(-GU;$%---6:He&d23:::(,K*%,,,%)Hhz")))#eT  Hhsm    ',eM' ' 'OT#s(^    j!!U[    "!
 _V(+++%5    ,+ _\111
eCI&6 
 
 
 21
 _&X666
c0C 
 
 
 76
. _Zh////CT/I)J    0/ _Zh///2D)E    0/ _YX...k4.?(@    /.
 _'"""k k k #"kZGu{ G G G G  Z+,,c    -, :H---LLLr,   r   T)use_enum_valuesrF   r#   cast_to_original_dtyper[   c                    | j         }t          j        t          j        g |                    rHt          j        |          }t          j        | |j        |j                                      |          }nYt          j	        |          }t          j
        t          j        | |j        |j                                                |          }|r|                    |          S |S )a\  
    Rounds an input tensor to the nearest quantized representation given a dtype.
    The original dtype is kept post-rounding.

    :param tensor: tensor to round
    :param dtype: dtype to use for rounding
    :param cast_to_original_dtype: whether or not we cast the rounded tensor to
        the original dtype
    :return: rounded tensor
    )r#   )r#   r*   is_floating_pointrF   r=   clampr"   r!   toiinforound)rF   r#   r   original_dtyper=   roundedr   s          r-   r   r     s     \Nu|Be<<<== SE""+fei;;>>uEEE""+ek&%)UYGGHHKKERR *zz.)))Nr,   argsr"   r!   c                    | j         }t          j        | ||          } |j        t          j        k    r`|j        dk    r |                     t          j                   }nv|j        dk    rt          
                    |           }nPt          d          |j        t          j        k    rt          j        |           }nt          d|j                   |r|                    |          S |S )a  
    Rounds an input tensor to the nearest quantized representation given
    qunatization args. The original dtype is kept post-rounding.

    :param tensor: tensor to round
    :param args: quantization args to use for rounding
    :param min: min value to use for clamping
    :param max: max value to use for clamping
    :param cast_to_original_dtype: whether or not we cast the rounded tensor to
        the original dtype
    :return: rounded tensor
    r<   r1   z%Only num_bits in (4, 8) are supportedr   )r#   r*   r   rd   r   rD   rc   r   r   r   r7   r   rC   r   r   )rF   r   r"   r!   r   r   r   s          r-   r   r     s    ( \N[c**Fy$***=Aii 344GG]a#//77GG%&MNNN	&*	*	*+f%%AdiAABBB *zz.)))Nr,   )T)'r   enumr   typingr   r   r   r   r   r*   compressed_tensors.utilsr	    compressed_tensors.utils.helpersr
   compressed_tensors.utils.typer   pydanticr   r   r   r   r   r   __all__r   r   r   r   ra   r   r   r   r   r   Tensorr#   r   r   r   r+   r,   r-   <module>r      s8          3 3 3 3 3 3 3 3 3 3 3 3 3 3  . . . . . . 7 7 7 7 7 7 4 4 4 4 4 4                 ( ( ( ( ( ( ( (    I   .         I          I   
    sD       3       #t   
 
 
 
 
C 
 
 
2y. y. y. y. y.y$ y. y. y. y.~ .2 L; %TN \	   B .2$ $L$
$ 
$ 
	$
 %TN$ \$ $ $ $ $ $r,   