
    PiL                        d dl Z d dlmZ d dlmZmZmZmZ d dlZd dl	m
Z
 d dlmZ d dlmZmZ d dlmZmZmZmZmZmZ d dlmZmZmZmZmZ d d	lmZ d d
lm Z  ddl!m"Z"  G d de j#                  Z$e G d de$                      Z%e G d de$                      Z&e G d de$                      Z' G d de'          Z(de
deee$         ee$         f         fdZ)dS )    N)	dataclass)AnyOptionalTupleUnion)AOBaseConfig)
e4m3_dtype)FP8Granularity_normalize_granularity)GranularityPerAxisPerGroupPerRow	PerTensorPerToken)_SUB_BYTE_INT_BOUNDS_SUB_BYTE_UINT_BOUNDSMappingTypeTorchAODTypeZeroPointDomain)Int4PackingFormat)_is_float8_type   _log_deprecation_warningc                       e Zd ZdZdS )FakeQuantizeConfigBasez?
    Base class for representing fake quantization config.
    N)__name__
__module____qualname____doc__     /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/torchao/quantization/qat/fake_quantize_config.pyr   r   (   s          	Dr#   r   c                       e Zd ZU dZeZej        ed<    e            Z	e
ed<   dZee         ed<   dZee         ed<   d ZdS )Float8FakeQuantizeConfiga  
    Config for float8 fake quantization, targeting :class:`~torchao.quantization.Float8Tensor`.

    Args:
       dtype (torch.dtype): the dtype for float8 Tensor
       granularity (FP8Granularity): the granularity for the Tensor, currently either PerRow() or PerTensor()
       hp_value_lb (Optional[float]): the lower bound for high precision floating point value for calculating scale
       hp_value_ub (Optional[float]): the upper bound for high precision floating point value for calculating scale
    dtypegranularityNhp_value_lbhp_value_ubc                     t          | j                  st          | j         d          t          | j        t
                    rt          d          t          | j                  t          t          fvrt          d| j                   dS )zG
        Verify dtype and granularity are the ones we support.
        z is not a float8 dtypez[Please specify the granularity object instead of the class, e.g. PerRow() instead of PerRowz.Expected PerRow or PerTensor granularity, got N)r   r'   
ValueError
isinstancer(   typer   r   selfs    r$   __post_init__z&Float8FakeQuantizeConfig.__post_init__A   s     tz** 	D
BBBCCCd&-- 	m    !!&))<<<SAQSS   =<r#   )r   r   r    r!   r	   r'   torch__annotations__r   r(   r
   r)   r   floatr*   r1   r"   r#   r$   r&   r&   0   s           $E5;###"(&((K***#'K%'''#'K%'''    r#   r&   c                   @    e Zd ZU dZdZeed<   eZe	j
        ed<   d ZdS )Int4WeightFakeQuantizeConfiga  
    Config for pint4 weight fake quantization that targets the numerics in the following preshuffled kernel:
        torch.ops.fbgemm.f8i4bf16_shuffled
        torch.ops.fbgemm.bf16i4bf16_shuffled
        torch.ops.fbgemm.bf16i4bf16_rowwise

    Currently this only supports float8 input activations. It is expected to be used in conjunction with
    :class:`~torchao.quantization.Float8DynamicActivationInt4WeightConfig`. In the future, we may extend
    this to support bfloat16 as well.
       
group_sizeactivation_dtypec                 j    | j         t          t          j        fvrt	          dt           d          d S )NzOnly z+ or torch.bfloat16 activation are supported)r9   r	   r2   bfloat16r,   r/   s    r$   r1   z*Int4WeightFakeQuantizeConfig.__post_init__a   s@     U^(DDDO
OOO   EDr#   N)r   r   r    r!   r8   intr3   r	   r9   r2   r'   r1   r"   r#   r$   r6   r6   Q   sT         	 	 J$.ek...    r#   r6   c                   F    e Zd ZU dZeej        ef         ed<   e	ed<   e
ed<   ej        ed<   ej        ed<   eed<   dZeed	<   d
Zeed<   dZee         ed<   ddej        ej        ej        dd
dfddddeej        ef         dee	edf         dee
         dej        dej        ded	ededee         dee         dee         fdZd Zdee	edf         dee         de	fdZdee
         dee         de
fdZedefd            Zedefd            Zdedef fdZ xZ S )IntxFakeQuantizeConfiga	  
    Config for how to fake quantize weights or activations,
    targeting integer dtypes up to torch.int8.

    Args:
        dtype: dtype to simulate during fake quantization, e.g. torch.int8.
            For PyTorch versions older than 2.6, you may use `TorchAODType` to represent
            torch.int1 to torch.int7 instead, e.g. TorchAODType.INT4.
        granularity: granularity of scales and zero points, e.g. PerGroup(32).
            We also support the following strings:
               1) 'per_token': equivalent to PerToken()
               2) 'per_channel': equivalent to PerAxis(0)
               3) 'per_group': equivalent to PerGroup(group_size), must be combined
                   with separate `group_size` kwarg, Alternatively, just set the
                   `group_size` kwarg and leave this field empty.
        mapping_type: whether to use symmetric (default) or asymmetric quantization
            Alternatively, set `is_symmetric` (bool) and leave this field empty.
        scale_precision: scale dtype (default torch.fp32)
        zero_point_precision: zero point dtype (default torch.int32)
        zero_point_domain: whether zero point is in integer (default) or float domain
        is_dynamic: whether to use dynamic (default) or static scale and zero points
        range_learning (prototype): whether to learn scale and zero points during training
            (default false), not compatible with `is_dynamic`.

    Keyword args:
        group_size: size of each group in per group fake quantization,
            can be set instead of `granularity`
        is_symmetric: whether to use symmetric or asymmetric quantization,
            can be set instead of `mapping_type`

    Example usage::

        # Per token asymmetric quantization
        IntxFakeQuantizeConfig(torch.int8, "per_token", is_symmetric=False)
        IntxFakeQuantizeConfig(torch.int8, PerToken(), MappingType.ASYMMETRIC)

        # Per channel symmetric quantization
        IntxFakeQuantizeConfig(torch.int4, "per_channel")
        IntxFakeQuantizeConfig(torch.int4, "per_channel", is_symmetric=True)
        IntxFakeQuantizeConfig(torch.int4, PerAxis(0), MappingType.SYMMETRIC)

        # Per group symmetric quantization
        IntxFakeQuantizeConfig(torch.int4, group_size=32)
        IntxFakeQuantizeConfig(torch.int4, group_size=32, is_symmetric=True)
        IntxFakeQuantizeConfig(torch.int4, "per_group", group_size=32, is_symmetric=True)
        IntxFakeQuantizeConfig(torch.int4, PerGroup(32), MappingType.SYMMETRIC)
    r'   r(   mapping_typescale_precisionzero_point_precisionzero_point_domainT
is_dynamicFrange_learningNeps)r8   is_symmetricr8   rF   c
                r   |t          d          || _        |                     ||
          | _        |                     ||          | _        || _        || _        || _        || _	        || _
        |	| _        t          j        t          j        g}|                    t!          t#          j                                         |                    t!          t'          j                                         ||vrt          d|d|          |r|rt          d          |                                  d S )Nz/Please use ZeroPointDomain.NONE instead of NonezUnsupported dtype 'z', choose from z4`is_dynamic` is not compatible with `range_learning`)r,   r'   _get_granularityr(   _get_mapping_typer?   r@   rA   rB   rC   rD   rE   r2   int8uint8extendlistr   keysr   r1   )r0   r'   r(   r?   r@   rA   rB   rC   rD   rE   r8   rF   
all_dtypess                r$   __init__zIntxFakeQuantizeConfig.__init__   sA    $NOOO
00jII 22<NN.$8!!2$, j%+.
$38::;;<<<$49;;<<===
""*<AEE::N  
  	U. 	USTTTr#   c                     dS )zc
        For deprecation only, can remove after https://github.com/pytorch/ao/issues/2630.
        Nr"   r/   s    r$   r1   z$IntxFakeQuantizeConfig.__post_init__   s	     	r#   returnc                    ||dk    r|t          d|z            t          |t                    ret          |t          t          t
          f          st          d|z            t          |t                    r|j        dk    rt          d          |S |dk    rt                      S |dk    rt	          d	          S |dk    r |t          d
          t          |          S t          |t                    rt          d|dg d          |"t          d|dt          |                    |t          d          t          |          S )ay  
        Parse the `Granularity` represented in the args.

        Granularity can be specified in one of three ways:
            1) `Granularity` object: one of PerToken(), PerAxis(), and PerGroup(group_size)
            2) str: one of 'per_token', 'per_channel', and 'per_group'
            3) None: `group_size` must be set instead, represents per group granularity
        N	per_groupz,`group_size` conflicts with granularity '%s'z!Granularity '%s' is not supportedr   z0Only axis=0 is supported for PerAxis granularity	per_tokenper_channel)axisz7Granularity was 'per_group' but no `group_size` was setzUnexpected granularity: 'z', must be one of )rU   rV   rT   zGranularity 'z' has unexpected type z9At least one of `granularity` or `group_size` must be set)	r,   r-   r   r   r   r   rW   strr.   )r0   r(   r8   s      r$   rH   z'IntxFakeQuantizeConfig._get_granularity   s    "{**'>L  
 k;// 	kHgx+HII T !D{!RSSS+w// UK4D4I4I !STTT +%%::M))???"K''! M   J'''S)) 	*;; I I I IK   "*;;[ 1 1 13   K   
###r#   c                     ||t          d          ||t          j        S |.|t          j        t          j        fvrt          d|z            |S |J |rt          j        S t          j        S )z
        Parse the `MappingType` represented in the args.

        Mapping type can be specified in one of two ways:
            1): `MappingType` object: one of SYMMETRIC or ASYMMETRIC
            2): is_symmetric bool
        Nz1Cannot set both `mapping_type` and `is_symmetric`z!MappingType '%s' is not supported)r,   r   	SYMMETRIC
ASYMMETRIC)r0   r?   rF   s      r$   rI   z(IntxFakeQuantizeConfig._get_mapping_type  s     #(@PQQQ L$8(( #K$9;;Q#RRR !D|!STTT ''' 	*(())r#   c                 |    t          | j        t                    r| j        j        S t	          d| j        z            )zm
        If this is per group granularity, return the group size.
        Otherwise, throw an error.
        z,`group_size` is undefined for %s granularity)r-   r(   r   r8   r,   r/   s    r$   r8   z!IntxFakeQuantizeConfig.group_size0  sB     d&11 	#..>AQQ  r#   c                 ,    | j         t          j        k    S )zT
        Return True if mapping type is symmetric, else False (asymmetric).
        )r?   r   rZ   r/   s    r$   rF   z#IntxFakeQuantizeConfig.is_symmetric=  s    
  K$999r#   namevaluec                 B   |dk    r1t                                          dt          |                     dS |dk    r>|rt          j        nt          j        }t                                          d|           dS t                                          ||           dS )zB
        Support setting `group_size` and `is_symmetric`.
        r8   r(   rF   r?   N)super__setattr__r   r   rZ   r[   )r0   r^   r_   r?   	__class__s       r$   rb   z"IntxFakeQuantizeConfig.__setattr__D  s     <GGx?????^##49U;00{?ULGG=====GGe,,,,,r#   )!r   r   r    r!   r   r2   r'   r   r3   r   r   r   rC   boolrD   rE   r   r4   float32int32INTrX   r<   rP   r1   rH   rI   propertyr8   rF   r   rb   __classcell__)rc   s   @r$   r>   r>   h   s        . .` l*++++[   +%%%&&&&J ND   C%
 6:.2',},1K-<-@$#( %)'+( ( (U[,./( ;T12( {+	(
 ( $k( +( ( ( e_( SM( tn( ( ( (T  :$;T12:$ SM:$ 
	:$ :$ :$ :$x*{+* tn* 
	* * * *@ 
C 
 
 
 X
 :d : : : X:
- 
-C 
- 
- 
- 
- 
- 
- 
- 
- 
- 
-r#   r>   c                       e Zd ZdZd ZdS )FakeQuantizeConfigzd
    (Deprecated) Please use :class:`~torchao.quantization.qat.IntxFakeQuantizeConfig` instead.
    c                 $    t          |            d S )Nr   r/   s    r$   r1   z FakeQuantizeConfig.__post_init__W  s     &&&&&r#   N)r   r   r    r!   r1   r"   r#   r$   rk   rk   R  s-         ' ' ' ' 'r#   rk   base_configrR   c                    ddl m} ddlm} ddlm}m}m}m}m	}m
} t          | |          ret          t          j        d| j        t           j        k              }	t          t          j        | j        | j        t           j        k              }
nt          | |          rd}	| j        d	k    rPt,          j        t,          j        g}| j        |vrt5          d
|           t7          dt          j                  }
n[| j        dk    rgddlm} | j        t@          j!        k    r!|tE          | j#                           d         }n| j        }t          t          j$        | j        d|          }
nt5          dtE          |                      t          | |          r|| j        d	k    r t5          dtE          |            d          tK          | j&                  \  }}tO          | j(        || j)        | j*                  }	tO          | j+        |          }
n>t          | |          r:tO          tX          t[                                }	t7          dtX                    }
nt          | |          r- || j.        dd          }	 || j.        d| j/                  }
nt          | |          r| j        d	k    s
J d            | j0        dk    s
J d            | j+        t          j1        k    s
J d            | j        t           j2        k    s
J d            | j3        t           j        k    s
J d            | j4        
J d            t          t          j        dd| j4                  }	t          | j+        | j5        | j3        | j4                   }
nt          | |          r| j        d	k    s
J d            | j0        dk    s
J d            | j        t           j        k    s
J d!            | j+        t          j1        k    s
J d            | j6        
J d"            d}	t          | j+        | j&        | j        | j6                   }
nt5          d#| z            |	|
fS )$a&  
    Given a base post-training quantization (PTQ) config, infer the corresponding
    `FakeQuantizeConfigBase`s for both the activations and the weights.
    This is called during the prepare phase of QAT.

    Return a 2-tuple of (activation_config, weight_config) for fake quantization.
    r   )'NVFP4DynamicActivationNVFP4WeightConfig)NVFP4FakeQuantizeConfig))Float8DynamicActivationFloat8WeightConfig'Float8DynamicActivationInt4WeightConfigInt4WeightOnlyConfig%Int8DynamicActivationInt4WeightConfig%Int8DynamicActivationIntxWeightConfigIntxWeightOnlyConfigrU   )r'   r(   rF   )r'   r8   rF   N   zPacking format must be one of r7   )r8   r9   r   )LAYOUT_TO_ZERO_POINT_DOMAINF)r'   r8   rF   rB   zUnknown version on base config zOnly version 2 of z is supported)r'   r(   r)   r*   )r'   r(   )use_per_tensor_scaleuse_swizzled_scalesuse_triton_kernelTzOnly version 2+ is supportedunpacked_to_int8z"Only unpacked_to_int8 is supportedzOnly int2+ is supportedz/Only asymmetric activation mapping is supportedz*Only symmetric weight mapping is supportedz.Specifying weight_scale_dtype is not supported)rF   r@   )r'   r(   r?   r@   z#Only symmetric mapping is supportedz'Specifying scale_dtype is not supportedzUnexpected base config: %s)7torchao.prototype.mx_formatsro   torchao.prototype.qatrp   torchao.quantizationrq   rr   rs   rt   ru   rv   r-   r>   r2   rJ   act_mapping_typer   rZ   int4r8   r?   versionr   PLAINPRESHUFFLEDint4_packing_formatr,   r6   r;   torchao.quantization.quant_apirx   rB   r   NONEr.   layoutuint4r   r(   r&   r9   activation_value_lbactivation_value_ubweight_dtyper	   r   use_dynamic_per_tensor_scaler{   intx_packing_formatint1r[   weight_mapping_typeweight_scale_dtypeweight_granularityscale_dtype)rm   ro   rp   rq   rr   rs   rt   ru   rv   
act_configweight_configsupported_packing_formatsrx   	zp_domainact_granularityr   s                   r$   _infer_fake_quantize_configsr   [  s                             +DEE AE+*#$59NN
 
 


 /*"-$1[5JJ
 
 

 
K!5	6	6 vE
!##!'!-)% .6OOO P5NPP   9!&  MM  A%%      ,0DDD7[=O8P8PQRST		'9	2k&1""+	  MM RtK?P?PRRSSS	K!J	K	K UE!##R${2C2CRRRSSS0F#1
 1
-, ..'#7#7	
 
 

 1**
 
 
 
K!H	I	I EE-
 
 

 5'
 
 
 
K!H	I	I <E,,!,!I %#
 
 


 0/!,!I $);
 
 

 
K!F	G	G 1E"a''')G'''.2DDDD0 EDD '5:5557P555+{/EEEE= FEE .+2GGGG8 HGG -55< 655 ,J':	
 
 

 /*#6$8':	
 
 
 
K!5	6	6 E"a''')G'''.2DDDD0 EDD ';+@@@@1 A@@ '5:5557P555&..5 /.. 
.*#/$1'3	
 
 
 5CDDD&&r#   )*abcdataclassesr   typingr   r   r   r   r2   torchao.core.configr   torchao.float8.configr	   torchao.float8.inferencer
   r    torchao.quantization.granularityr   r   r   r   r   r   %torchao.quantization.quant_primitivesr   r   r   r   r   (torchao.quantization.quantize_.workflowsr   torchao.utilsr   utilsr   ABCr   r&   r6   r>   rk   r   r"   r#   r$   <module>r      s   


 ! ! ! ! ! ! . . . . . . . . . . . .  , , , , , , , , , , , ,                                    G F F F F F ) ) ) ) ) ) + + + + + +	 	 	 	 	SW 	 	 	     5   @     #9   , e- e- e- e- e-3 e- e- e-R' ' ' ' '/ ' ' ']']'
8*+X6L-MMN]' ]' ]' ]' ]' ]'r#   