
    Pix                    !   U d dl Z d dlmZmZ d dlmZmZmZmZm	Z	m
Z
 d dlZd dlmZmZmZ d dlmZmZ g dZ G d de          Z G d	 d
e          Z G d de          Zej                            eeg           ej        ej        ej        ej        hZ	 ej        dej        dej         dej!        diZ"ee
ej#        ef         e	e$e$f         f         e%d<   ej&        dej'        dej(        dej)        dej*        dej+        dej,        dej        dej        dej         dej!        diZ-ee
ej#        ef         e	e$e$f         f         e%d<   i Z.ee
ej#        ef         e	e$e$f         f         e%d<   ej&        dej'        dej(        d ej)        d!ej*        d"ej+        d#ej,        d$iZ/ee
ej#        ef         e	e$e$f         f         e%d%<   ej0        d&ej1        d'ej2        d(ej3        d)ej4        d*ej5        d+ej6        d,iZ.e-7                    ej0        dej1        dej2        dej3        dej4        dej5        dej6        di           e/7                    ej8        dej9        dej:        d ej;        d!ej<        d"ej=        d#ej>        d$i           e-7                    ej8        dej9        dej:        dej;        dej<        dej=        dej>        di           e"7                    e.           e"7                    e/           e-?                                e"?                                k    sJ d-Z@d.  eAd          D             ZBejC        D                    d/d0          ZE eeE          ZF G d1 d2ejG        jH                  ZI G d3 d4ejG        jH                  ZJd5 ZKd6 ZL ejM                    	 	 dd7ejN        d8e	e$d9f         d:ejN        d;eejN                 d<ej#        d=ee
e$eOf                  d>ee
e$eOf                  d?ejN        fd@            ZPeF	 	 dd7ejN        d8ee$         d:ejN        d;eejN                 d<ej#        d=ee
e$eOeQf                  d>ee
e$eOeQf                  d?ejN        fdA            ZRd7ejN        d8ee$         d:ejN        d;eejN                 d=e
e$eOf         d>e
e$eOf         d?ejN        fdBZS	 	 dd7ejN        d8ee$         d:ejN        d;eejN                 d<ej#        d=ee
e$eOeQf                  d>ee
e$eOeQf                  d?ejN        fdCZT	 	 dd7ejN        d8e	e$d9f         d:ejN        d;eejN                 d=ee
e$eOf                  d>ee
e$eOf                  d?ejN        fdDZU	 	 dd7ejN        d8ee$         d:ejN        d;eejN                 d<ej#        d=ee
e$eOeQf                  d>ee
e$eOeQf                  d?ejN        fdEZV	 	 dd7ejN        d8e	e$d9f         d:ejN        d;eejN                 d=ee
e$eOf                  d>ee
e$eOf                  d?ejN        fdFZW	 	 dejX        dGd7ejN        d8e	e$d9f         d:ejN        d;eejN                 dHej#        d=ee
e$eOf                  d>ee
e$eOf                  d<ej#        d?ejN        fdIZYeFddejX        fd7ejN        d8ee$         d:ejN        d;eejN                 dHej#        d=ee
e$eOeQf                  d>ee
e$eOeQf                  d<ej#        d?ejN        fdJ            ZZejX        fd7ejN        d8ee$         d:ejN        d;eejN                 d=e
e$eOf         d>e
e$eOf         d<ej#        d?ejN        fdKZ[ejX        fd7ejN        d8ee$         d:ejN        d;eejN                 d=e
e$eOf         d>e
e$eOf         d<ej#        d?ejN        fdLZ\	 	 dejX        dGd7ejN        d8e	e$d9f         d:ejN        d;eejN                 dHej#        d=ee
e$eOf                  d>ee
e$eOf                  d<ej#        d?ejN        fdMZ]ejX        fd7ejN        d8ee$         d:ejN        d;eejN                 d=e
e$eOf         d>e
e$eOf         d<ej#        d?ejN        fdNZ^	 	 dejX        dGd7ejN        d8e	e$d9f         d:ejN        d;eejN                 dHej#        d=ee
e$eOf                  d>ee
e$eOf                  d<ej#        d?ejN        fdOZ_ddej`        fd7ejN        d8e	e$d9f         d:ejN        d;eejN                 dPej#        d=ee
e$eOf                  d>ee
e$eOf                  dQed?ejN        fdRZaddej`        fd7ejN        d8e	e$d9f         d:ejN        d;eejN                 dPej#        d=ee
e$eOf                  d>ee
e$eOf                  dQed?e	ejN        ejN        f         fdSZbddej`        fd7ejN        d8e	e$d9f         d:ejN        d;eejN                 dPej#        d=ee
e$eOf                  d>ee
e$eOf                  dQed?e	ejN        ejN        f         fdTZc ejM                    ddddej!        dUfd7ejN        dVed8e	e$         dWej#        d=ee
e$eOf                  d>ee
e$eOf                  dXeeO         dYeej#                 dZeej#                 d[eQd?e	ejN        ejN        f         fd\            Zd ejM                    	 	 	 	 	 dd7ejN        dVed8e	e$         dWej#        d=ee
e$eOf                  d>ee
e$eOf                  dXeeO         dYeej#                 dZeej#                 d?e	ejN        ejN        f         fd]            Ze	 	 	 	 	 dd7ejN        dVed8e	e$         dWej#        d=ee
e$eOeQf                  d>ee
e$eOeQf                  dXeeO         dYeej#                 dZeej#                 d?e	ejN        ejN        f         fd^Zfdddddd_ej`        fd`ejN        daejN        dVed8e	e$d9f         dWej#        d=ee$         d>ee$         dXeeO         dYeej#                 dZeej#                 dbeQdQed?e	ejN        ejN        f         fdcZgeF	 	 	 	 	 	 dd7eejN                 dVehd8ee$         dWej#        d=ee
e$eOeQf                  d>ee
e$eOeQf                  dXeeO         dYeej#                 dZeej#                 d[eQd?e	ejN        ejN        f         fdd            ZideejN        dfe$dge$d?e	ejN        ejN        ejN        f         fdhZjd7eejN                 d8ee$         dWej#        d?e	ejN        ejN        ejN        ejN        f         fdiZkd7ejN        d8ee$         dWej#        djejN        dkejN        dlejN        dmejN        d?ejN        fdnZl	 dd7ejN        d8ee$         dWej#        djejN        dkejN        dlejN        dmejN        d<eej#                 d?ejN        fdoZm	 ddeejN        dpejN        dqejN        dfe$dge$d<eej#                 fdrZndsejN        dteOdueOd?ejN        fdvZo ejp                    d dddUdwdxdydzd_d{fd|ejN        d:ejN        d}ejN        d~eqde$de
ej#        df         de
ehdf         deQderd?esfd            Ztde$de$d?eQfdZudejN        d:ejN        d}ejN        de$de
ee	ejv        f         d?e	fdZwddd_dejx        ddUdUetf	d|ejN        deOdge$deQde$dej#        dehdeQdeQded?esfdZy ejM                    dzdUdddejN        d8ee$         de$de$de$deQdeOd?e	ejN        ejN        f         fd            Zzddddzejx        fd|ejN        de$de$dge$de$dej#        d?esfdZ{d|ejN        de$de$d?ejN        fdZ|d|ejN        d:ejN        de$de$d?ejN        f
dZ}ejX        fd|ejN        d:ejN        de$de$d<ej#        d?ejN        fdZ~eFej        ejX        ddfd|ejN        d8ee$         dej#        dYej#        deeO         deeO         d?ejN        fd            Zd:ejN        dejv        d?ejN        fdZej        fd|ejN        d:ejN        dej#        d?ejN        fdZejX        fd|ejN        d:ejN        d<ej#        d?ejN        fdZ eeEdU          ej        fd|ejN        d:ejN        dej#        d?ejN        fd            Z eeEd          ej        fd|ejN        d:ejN        dej#        d?ejN        fd            Z eeEdU          ejX        fd|ejN        d:ejN        d<ej#        d?ejN        fd            Z eeEd          ejX        fd|ejN        d:ejN        d<ej#        d?ejN        fd            ZdS )    N)Enumauto)CallableDictListOptionalTupleUnion)_f32_to_floatx_unpacked_floatx_unpacked_to_f32_n_ones)_register_custom_op_register_meta_op)choose_qparams_affine"choose_qparams_affine_with_min_maxquantize_affinedequantize_affineMappingTypeZeroPointDomainTorchAODType_choose_qparams_affine_tinygemm)_choose_qparams_affine_dont_preserve_zero_choose_qparams_affine_floatx'_choose_qparams_and_quantize_affine_hqq+_choose_qparams_and_quantize_scale_only_hqq,_choose_qparams_and_quantize_scale_only_sinq'_choose_qparams_and_quantize_affine_qqq_choose_scale_float8_choose_qparams_gguf_quantize_affine_no_zero_point_quantize_affine_tinygemm_quantize_affine_floatx_quantize_affine_float8_quantize_gguf _dequantize_affine_no_zero_point_dequantize_affine_tinygemm_dequantize_affine_floatx_dequantize_affine_qqq_dequantize_affine_float8_dequantize_gguf_fake_quantize_affine_fake_quantize_affine_cachemaskc                   N    e Zd ZdZ e            Z e            Z e            ZdS )r   a  How floating point number is mapped to integer number

    symmetric mapping means floating point range is symmetrically mapped to integer range
    let's say we have floating point range (-3.5, 10.2) and integer range (-8, 7) (int4)
    we'll use (-10.2, 10.2) as the range for floating point and map that to (-8, 7)
    e.g. scale = (10.2 - (-10.2)) / (7 - (-8))

    SYMMETRIC_NO_CLIPPING_ERR is a variant of symmetric mapping, where the scale is the max of smin
    and smax, where smin = min_val_neg / quant_min, and smax = max_val_pos / quant_max. By calculating
    smin and smax individually, there can be less round error on negative values, and no out-of-range
    of all floating point values.

    asymmetric mapping means we just directly map the floating point range to integer range,
    for the above example, we will map (-3.5, 10.2) to (-8, 7) and calculate quantization parameter
    based on this mapping
    e.g. scale = (10.2 - (-3.5)) / (7 - (-8))
    N)__name__
__module____qualname____doc__r   	SYMMETRICSYMMETRIC_NO_CLIPPING_ERR
ASYMMETRIC     y/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/torchao/quantization/quant_primitives.pyr   r   8   s<         $ I $JJJr6   r   c                   N    e Zd ZdZ e            Z e            Z e            ZdS )r   aH  Enum that indicate whether zero_point is in integer domain or floating point domain

    integer domain: quantized_val = (float_val / scale) (integer) + zero_point (integer)
    float domain: quantized_val = (float_val - (zero_point (float) - scale * mid_point)) / scale
    none domain: quantized_val = (float_val / scale)
    N)r.   r/   r0   r1   r   INTFLOATNONEr5   r6   r7   r   r   P   s;          $&&CDFFE466DDDr6   r   c                       e Zd ZdZ e            Z e            Z e            Z e            Z e            Z	 e            Z
 e            ZdS )r   zG
    Placeholder for dtypes that do not exist in PyTorch core yet.
    N)r.   r/   r0   r1   r   INT1INT2INT3INT4INT5INT6INT7r5   r6   r7   r   r   ]   sg          466D466D466D466D466D466D466DDDr6   r   )r      )   )i i  )i   i_DTYPE_TO_QVALUE_BOUNDS                               _DTYPE_TO_BIT_WIDTH_SUB_BYTE_UINT_BOUNDS)r   )rH   )rJ   )rN   )i   )i   )i?   _SUB_BYTE_INT_BOUNDSr   rH   )r   rJ   )r   rN   )r   rX   )r   rY   )r   rZ   )r   rF      c                 ,    g | ]}t          |          S r5   )r   ).0is     r7   
<listcomp>ra      s    ,,,awqzz,,,r6   torchaoFRAGMENTc                   z    e Zd ZdZedej        dej        fd            Zedej        dej        fd            ZdS )_RoundzF
    Implementation of generic round operation with backward STE.
    xreturnc                 *    t          j        |          S N)torchround)ctxrf   s     r7   forwardz_Round.forward   s    {1~~r6   gyc                     |S ri   r5   rl   rn   s     r7   backwardz_Round.backward   s    	r6   N)	r.   r/   r0   r1   staticmethodrj   Tensorrm   rq   r5   r6   r7   re   re      s               \ %, 5<    \  r6   re   c                       e Zd ZdZedej        dej        dej        fd            Zedej        dej        fd            Z	dS )	_RoundToFloat8zH
    Implementation of `tensor.to(float8_dtype)` with backward STE.
    rf   float8_dtyperg   c                 ,    |                     |          S ri   )to)rl   rf   rv   s      r7   rm   z_RoundToFloat8.forward   s    ttL!!!r6   rn   c                 
    |d fS ri   r5   rp   s     r7   rq   z_RoundToFloat8.backward   s    4xr6   N)
r.   r/   r0   r1   rr   rj   rs   dtyperm   rq   r5   r6   r7   ru   ru      s          " "EK "EL " " " \" %, 5<    \  r6   ru   c                 @   | t           v r3t          j        |           j        t          j        |           j        }}n+| t
          vrt          d|            t
          |          \  }}||}||}||k    sJ d| d|             ||k    sJ d| d|             ||fS )a  Get quant_min and quant_max args based on dtype and also verify bounds.

    Args:
        dtype: Target quantization dtype (e.g., torch.uint8, torch.int8, or FP8 types)
        quant_min: Minimum quantized value, or None to use dtype default
        quant_max: Maximum quantized value, or None to use dtype default

    Returns:
        Tuple[int/float, int/float]: Validated (quant_min, quant_max) values

    Raises:
        ValueError: If dtype is unsupported
        AssertionError: If quant_min/quant_max are out of bounds for dtype
    zUnsupported dtype: Nz9quant_min out of bound for dtype, quant_min_lower_bound: z quant_min: z9quant_max out of bound for dtype, quant_max_upper_bound: z quant_max: )	FP8_TYPESrj   finfominmaxrG   
ValueError)rz   	quant_min	quant_maxquant_min_lower_boundquant_max_upper_bounds        r7   _get_and_check_qmin_qmaxr      s    	K"K"  5 
-	-	-6u667777Nu7U44)	)	----	Q"7	Q 	QEN	Q 	Q .--
 ----	Q"7	Q 	QEN	Q 	Q .-- ir6   c                 ~   t          |           t          |          k    sJ g }g }d}t          t          |                     D ]}| |         ||         k    r| |         dk    r||         | |         z  dk    s"J d| d||          d| d| |                      |                    ||         | |         z             |                    | |                    |                    |dz              |dz  }|                    ||                    | |         dk    r|                    |           |dz  }||fS )a  Given block_size and input size find the parameters for reduction:

    Output:
        shape_for_reduction: the shape we use to `view` input to prepare it for reduction
        reduction_dims: the dims we'll do reduction over

    Example::
        Input:
          block_size: (3, 3, 2, 10)
          input_size: (3, 3, 10, 10)

        Output:
          shape_for_reduction: (3, 3, 5, 2, 10)
          reduction_dim: [0, 1, 3, 4]
    r   rH   zExpecting input size at z dimension: z" to be divisible by block_size at rI   )lenrangeappend)
block_size
input_sizeshape_for_reductionreduction_dimscur_dimr`   s         r7   _get_reduction_paramsr     s     z??c*oo----NG3z??##  a=JqM))jma.?.?a=:a=0A555 J1  J  J*Q-  J  Jkl  J  J  {E  FG  {H  J  J 655  &&z!}
1'EFFF&&z!}555!!'A+...qLGG  &&z!}555 !}!!%%g...qLGG..r6   inputr   .scale
zero_pointoutput_dtyper   r   rg   c           	      ,    t          | ||||||          S )ag  
    Args:
      input (torch.Tensor): original float32, float16 or bfloat16 Tensor
      block_size: (Tuple[int, ...]): granularity of quantization, this means the size of the tensor elements that's sharing the same qparam
           e.g. when size is the same as the input tensor dimension, we are using per tensor quantization
      scale (float): quantization parameter for affine quantization
      zero_point (int): quantization parameter for affine quantization
      output_dtype (torch.dtype): requested dtype (e.g. torch.uint8) for output Tensor
      quant_min (Optional[int]): minimum quantized value for output Tensor, if not specified, it will be derived from dtype
      quant_max (Optional[int]): maximum quantized value for output Tensor, if not specified, it will be derived from dtype

    Note:
      How can block_size represent different granularities?
      let's say we have a Tensor of size: (3, 3, 10, 10), here is the table showing how block_size represents different
      granularities:

       granularity type       |     block_size
         per_tensor           |    (3, 3, 10, 10)
         per_axis (axis=0)    |    (1, 3, 10, 10)
         per_axis (axis=1)    |    (3, 1, 10, 10)
     per_group (groupsize=2)  |    (3, 3, 10, 2)
     per_group (groupsize=2) for axis = 3 | (3, 3, 2, 10)


    Output:
      quantized tensor with requested dtype
    )_quantize_affiner   r   r   r   r   r   r   s          r7   r   r   A  s,    J   r6   c                     t          |||          \  }}|t          v rt          j        }t	          | |||||                              |          S )a_  Quantize tensor using affine quantization with integer zero point domain.

    Op definition that has compatible signatures with custom op library.

    Args:
        input: Input tensor to quantize (float32, float16, or bfloat16)
        block_size: Granularity of quantization - size of tensor elements sharing same qparam
        scale: Quantization scale parameter
        zero_point: Quantization zero point parameter (optional)
        output_dtype: Target quantized dtype (e.g., torch.uint8, torch.int8)
        quant_min: Minimum quantized value, derived from dtype if None
        quant_max: Maximum quantized value, derived from dtype if None

    Returns:
        Quantized tensor with requested dtype

    Note:
        zero_point_domain is pre-defined as INT, meaning:
        quantized_val = (float_val / scale) (integer) + zero_point (integer)
    )r   rS   rj   uint8_quantize_affine_no_dtype_castrx   r   s          r7   r   r   q  sb    < 4L)YWWIy ,,,{)  	br6   c                    | j         t          j        t          j        t          j        fv sJ d| j                      t          |          |                                 k    s"J d|                                  d|             t          ||                                           \  }}| j	        }| 
                    |          } |}	|D ]}
d|	|
<   |
                    |	          }|.|                                dk    r|
                    |	          }nd}t          j        t                              | d|z  z            |z   ||          }|
                    |          }|S )a  Quantize tensor using affine quantization without dtype casting.

    Performs quantization with integer zero point domain without casting to target dtype.

    Args:
        input: Input tensor to quantize (float32, float16, or bfloat16)
        block_size: Granularity of quantization - size of tensor elements sharing same qparam
        scale: Quantization scale parameter
        zero_point: Quantization zero point parameter (optional)
        quant_min: Minimum quantized value
        quant_max: Maximum quantized value

    Returns:
        Quantized tensor without dtype casting

    The op does the following:
    1. Figure out the dimension for reduction based on block_size, also reshape the input to align with
       the shape after reduction
    2. Quantize the input based on the quantization parameters scale and zero_point with zero_point_domain = INT
    3. Reshape the quantized result to original shape
    Unsupported input dtype: Got input dim:, block_size: rH   Nr         ?rz   rj   float32float16bfloat16r   dimr   sizeshapeviewnumelclampre   applyr   r   r   r   r   r   r   r   original_shapeshape_after_reductionr`   quants               r7   r   r     s   > ;    15;00	  
 z??eiikk)))@@@J@@ *)) +@EJJLL+ +' [NJJ*++E/ % %#$a  JJ,--E*"2"2"4"4q"8"8__%:;;

 
KUcEk*++j8)Y E JJ~&&ELr6   c                     t          |||          \  }}|t          v rt          j        }t	          | |||||                              |          S )a  Quantize tensor using affine quantization with float zero point domain for tinygemm.

    Specialized quantization for tinygemm int4mm kernel where zero point is in floating point domain.

    Args:
        input: Input tensor to quantize (float32, float16, or bfloat16)
        block_size: Granularity of quantization - size of tensor elements sharing same qparam
        scale: Quantization scale parameter
        zero_point: Quantization zero point parameter (optional)
        output_dtype: Target quantized dtype (e.g., torch.uint8, torch.int8)
        quant_min: Minimum quantized value, derived from dtype if None
        quant_max: Maximum quantized value, derived from dtype if None

    Returns:
        Quantized tensor with requested dtype

    The op does the following:
    1. Figure out the dimension for reduction based on block_size, also reshape the input to align with
       the shape after reduction
    2. Quantize the input based on the quantization parameters scale and zero_point with zero_point_domain = FLOAT
    3. Reshape the quantized result to original shape

    Note:
        zero_point_domain is pre-defined as FLOAT, meaning:
        quantized_val = (float_val - (zero_point (float) - scale * mid_point)) / scale
    )r   rS   rj   r   '_quantize_affine_tinygemm_no_dtype_castrx   r   s          r7   r!   r!     sc    F 4L)YWWIy ,,,{2  	br6   c                    | j         t          j        t          j        t          j        fv sJ d| j                      t          |          |                                 k    s"J d|                                  d|             t          ||                                           \  }}| j	        }| 
                    |          } |}	|D ]}
d|	|
<   |
                    |	          }|.|                                dk    r|
                    |	          }nd}||z   dz   dz  }|||z  z
  }t          j        t                              | |z
  |z            ||          }|
                    |          }|S )a  Quantize tensor using affine quantization with float zero point domain without dtype casting.

    Specialized quantization for tinygemm int4mm kernel where zero point is in floating point domain.

    Args:
        input: Input tensor to quantize (float32, float16, or bfloat16)
        block_size: Granularity of quantization - size of tensor elements sharing same qparam
        scale: Quantization scale parameter
        zero_point: Quantization zero point parameter (optional)
        quant_min: Minimum quantized value
        quant_max: Maximum quantized value

    Returns:
        Quantized tensor without dtype casting

    The op does the following:
    1. Figure out the dimension for reduction based on block_size, also reshape the input to align with
       the shape after reduction
    2. Quantize the input based on the quantization parameters scale and zero_point with zero_point_domain = FLOAT
    3. Reshape the quantized result to original shape
    r   r   r   rH   Nr   rI   r   )r   r   r   r   r   r   r   r   r   r   r`   	mid_pointmin_valr   s                 r7   r   r     s   > ;    15;00	  
 z??eiikk)))@@@J@@ *)) +@EJJLL+ +' [NJJ*++E/ % %#$a  JJ,--E*"2"2"4"4q"8"8__%:;;

 
Y&*a/I59,,GKego%>??IVVEJJ~&&ELr6   c                     t          |||          \  }}|t          v rt          j        }t	          | |||||                              |          S )a+  Quantize tensor using affine quantization without zero point.

    Specialized quantization for cases where zero point is not needed (e.g., floatx quantization).

    Args:
        input: Input tensor to quantize (float32, float16, or bfloat16)
        block_size: Granularity of quantization - size of tensor elements sharing same qparam
        scale: Quantization scale parameter
        zero_point: Quantization zero point parameter (ignored, should be None)
        output_dtype: Target quantized dtype (e.g., torch.uint8, torch.int8)
        quant_min: Minimum quantized value, derived from dtype if None
        quant_max: Maximum quantized value, derived from dtype if None

    Returns:
        Quantized tensor with requested dtype

    The op does the following:
    1. Figure out the dimension for reduction based on block_size, also reshape the input to align with
       the shape after reduction
    2. Quantize the input based on the quantization parameters scale with zero_point_domain = NONE
    3. Reshape the quantized result to original shape

    Note:
        zero_point_domain is pre-defined as NONE, meaning:
        quantized_val = (float_val / scale) | This is primarily used for floatx quantization
        where we do not want to round values to nearest integer and instead scale and cast.
    )r   rS   rj   r   ,_quantize_affine_no_zero_point_no_dtype_castrx   r   s          r7   r    r    P  sc    H 4L)YWWIy ,,,{7  	br6   c                    | j         t          j        t          j        t          j        fv sJ d| j                      t          |          |                                 k    s"J d|                                  d|             t          ||                                           \  }}| j	        }| 
                    |          } |}	|D ]}
d|	|
<   |
                    |	          }|.|                                dk    r|
                    |	          }nd}t          j        t                              | d|z  z            ||          }|
                    |          }|S )a  Quantize tensor using affine quantization without zero point and without dtype casting.

    Specialized quantization for cases where zero point is not needed without casting to target dtype.

    Args:
        input: Input tensor to quantize (float32, float16, or bfloat16)
        block_size: Granularity of quantization - size of tensor elements sharing same qparam
        scale: Quantization scale parameter
        zero_point: Quantization zero point parameter (ignored, should be None)
        quant_min: Minimum quantized value
        quant_max: Maximum quantized value

    Returns:
        Quantized tensor without dtype casting

    The op does the following:
    1. Figure out the dimension for reduction based on block_size, also reshape the input to align with
       the shape after reduction
    2. Quantize the input based on the quantization parameters scale with zero_point_domain = NONE
    3. Reshape the quantized result to original shape
    r   r   r   rH   Nr   r   r   r   s               r7   r   r     sx   > ;    15;00	  
 z??eiikk)))@@@J@@ *)) +@EJJLL+ +' [NJJ*++E/ % %#$a  JJ,--E*"2"2"4"4q"8"8__%:;;

 
KUcEk%:;;Y	RREJJ~&&ELr6   r   input_dtypec          
      0    t          | |||||||          S )a  
    Args:
      input (torch.Tensor): quantized tensor, should match the dtype `dtype` argument
      block_size: (List[int]): granularity of quantization, this means the size of the tensor elements that's sharing the same qparam
                               e.g. when size is the same as the input tensor dimension, we are using per tensor quantization
      scale (Tensor): quantization parameter for affine quantization
      zero_point (Tensor): quantization parameter for affine quantization
      input_dtype (torch.dtype): requested dtype (e.g. torch.uint8) for output Tensor
      quant_min (Optional[int]): minimum quantized value for input Tensor
      quant_max (Optional[int]): maximum quantized value for input Tensor
      output_dtype (torch.dtype): dtype for output Tensor, default is fp32

      Default value for zero_point is in integer domain, zero point is added to the quantized integer value during quantization

    Output:
      dequantized Tensor, with requested dtype or fp32
    r   )_dequantize_affiner   r   r   r   r   r   r   r   s           r7   r   r     s3    8 !	 	 	 	r6   c           	      
   |t           vr | j        |k    sJ d| d| j                     |t          j        t          j        t          j        fv sJ d|             t          |||          \  }}t          | ||||||          S )a  Dequantize tensor using affine dequantization with integer zero point domain.

    Op definition that has compatible signatures with custom op library.

    Args:
        input: Quantized tensor to dequantize
        block_size: Granularity of quantization - size of tensor elements sharing same qparam
        scale: Quantization scale parameter
        zero_point: Quantization zero point parameter (optional)
        input_dtype: Expected dtype of input tensor (e.g., torch.uint8, torch.int8)
        quant_min: Minimum quantized value for input tensor
        quant_max: Maximum quantized value for input tensor
        output_dtype: Target output dtype (default: torch.float32)

    Returns:
        Dequantized tensor with requested output dtype
    
Expected: , got: Unsupported output dtype: )rS   rz   rj   r   r   r   r   !_dequantize_affine_no_dtype_checkr   s           r7   r   r     s    : ///{k))):::U[:: *))     3L22	  
 4KIVVIy,  r6   c                 >   t          |          |                                 k    s"J d|                                  d|             t          ||                                           \  }}| j        }	|                     |          } |}
|D ]}d|
|<   |                    |
          }||                    |
          }|                     |d          }|||                    |          z
  }||z  }|                    |	                              |          S )a  Dequantize tensor using affine dequantization without dtype checking.

    Converts quantized tensors to their high precision floating point representation.

    Args:
        input: Quantized tensor to dequantize
        block_size: Granularity of quantization - size of tensor elements sharing same qparam
        scale: Quantization scale parameter
        zero_point: Quantization zero point parameter (optional)
        quant_min: Minimum quantized value for input tensor
        quant_max: Maximum quantized value for input tensor
        output_dtype: Target output dtype (default: torch.float32)

    Returns:
        Dequantized tensor with requested output dtype

    The op does the following:
    1. Figure out the dimension for reduction based on block_size, also reshape the input to align with
       the shape after reduction
    2. Dequantize the input based on the quantization parameters scale and zero_point
    3. Reshape the quantized result to original shape and change dtype to the output_dtype
    r   r   rH   NT)copyr   r   r   r   r   r   rx   r   r   r   r   r   r   r   r   r   r   r   r`   dequants                r7   r   r     s/   > z??eiikk)))@@@J@@ *)) +@EJJLL+ +' [NJJ*++E/ % %#$a  JJ,--E__%:;;
 hh|$h//GJMM,777oG<<''**<888r6   c                    t          |          |                                 k    s"J d|                                  d|             t          ||                                           \  }}| j        }	|                     |          } |}
|D ]}d|
|<   |                    |
          }|
J d            |                     |          }||z  }|                    |	                              |          S )a9  Dequantize tensor using affine dequantization without zero point and without dtype checking.

    Converts quantized tensors to their high precision floating point representation without zero point.

    Args:
        input: Quantized tensor to dequantize
        block_size: Granularity of quantization - size of tensor elements sharing same qparam
        scale: Quantization scale parameter
        zero_point: Quantization zero point parameter (ignored, should be None)
        quant_min: Minimum quantized value for input tensor
        quant_max: Maximum quantized value for input tensor
        output_dtype: Target output dtype (default: torch.float32)

    Returns:
        Dequantized tensor with requested output dtype

    The op does the following:
    1. Figure out the dimension for reduction based on block_size, also reshape the input to align with
       the shape after reduction
    2. Dequantize the input based on the quantization parameters scale (no zero point)
    3. Reshape the quantized result to original shape and change dtype to the output_dtype
    r   r   rH   Nz>zero_point should be None for _dequantize_affine_no_zero_pointr   r   s                r7   /_dequantize_affine_no_zero_point_no_dtype_checkr   T  s   > z??eiikk)))@@@J@@ *)) +@EJJLL+ +' [NJJ*++E/ % %#$a  JJ,--EH  hh|$$GoG<<''**<888r6   c          	      
   |t           vr | j        |k    sJ d| d| j                     |t          j        t          j        t          j        fv sJ d|             t          |||          \  }}t          | ||||||          S )a  
    Args:
      input (torch.Tensor): quantized tensor, should match the dtype `dtype` argument
      block_size: (List[int]): granularity of quantization, this means the size of the tensor elements that's sharing the same qparam
                               e.g. when size is the same as the input tensor dimension, we are using per tensor quantization
      scale (Tensor): quantization parameter for affine quantization
      zero_point (Tensor): quantization parameter for affine quantization, no zero point is used for this op
      input_dtype (torch.dtype): requested dtype (e.g. torch.uint8) for output Tensor
      quant_min (Optional[int]): minimum quantized value for input Tensor
      quant_max (Optional[int]): maximum quantized value for input Tensor
      output_dtype (torch.dtype): dtype for output Tensor, default is fp32

      Default value for zero_point is in integer domain, zero point is added to the quantized integer value during quantization

    Output:
      dequantized Tensor, with requested dtype or fp32
    r   r   r   )rS   rz   rj   r   r   r   r   r   r   s           r7   r%   r%     s    : ///{k))):::U[:: *))     3L22	  
 4KIVVIy:  r6   c                 4   t          |          |                                 k    s"J d|                                  d|             t          ||                                           \  }}| j        }	|                     |          } |}
|D ]}d|
|<   |                    |
          }||                    |
          }||z   dz   dz  }| |z
  }|                    |          }||z  }|||z  }|                    |	                              |          S )a  This function converts AQT tensors to their high precision floating point representation

    The op does the following:
    1. figure out the dimension for reduction based on block_size, also reshape the input to align with
       the shape after reduction
    2. dequantize the input based on the quantization parameters scale and zero_point and args like zero_point_domain
    3. reshape the quantized result to origianl shape and change dtype to the output_dtype
    r   r   rH   NrI   r   )r   r   r   r   r   r   r   r   r   r   r   r`   r   r   s                 r7   *_dequantize_affine_tinygemm_no_dtype_checkr     s>   " z??eiikk)))@@@J@@ *)) +@EJJLL+ +' [NJJ*++E/ % %#$a  JJ,--E__%:;;
 Y&*a/IiGjj&&GuG:<<''**<888r6   c          	      
   |t           vr | j        |k    sJ d| d| j                     |t          j        t          j        t          j        fv sJ d|             t          |||          \  }}t          | ||||||          S )a  
    Args:
      input (torch.Tensor): quantized tensor, should match the dtype `dtype` argument
      block_size: (List[int]): granularity of quantization, this means the size of the tensor elements that's sharing the same qparam
                               e.g. when size is the same as the input tensor dimension, we are using per tensor quantization
      scale (Tensor): quantization parameter for affine quantization
      zero_point (Tensor): quantization parameter for affine quantization
      input_dtype (torch.dtype): requested dtype (e.g. torch.uint8) for output Tensor
      quant_min (Optional[int]): minimum quantized value for input Tensor
      quant_max (Optional[int]): maximum quantized value for input Tensor
      output_dtype (torch.dtype): dtype for output Tensor, default is fp32

      Default value for zero_point is in floating point domain, zero point is subtracted from the floating point (unquantized)

    Output:
      dequantized Tensor, with requested dtype or fp32
    r   r   r   )rS   rz   rj   r   r   r   r   r   r   s           r7   r&   r&     s    : ///{k))):::U[:: *))     3L22	  
 4KIVVIy5  r6   quant_dtypezero_point_domainc           
          |t          d          |t          j        u r|t          d          t          | |||||||          \  }}	|	S )a  
    General fake quantize op for quantization-aware training (QAT).
    This is equivalent to calling `quantize_affine` + `dequantize_affine`
    but without the dtype casts.

    Args:
      input (torch.Tensor): original float32, float16 or bfloat16 Tensor
      block_size: (Tuple[int, ...]): granularity of quantization, this means the size of the tensor elements that's sharing the same qparam
           e.g. when size is the same as the input tensor dimension, we are using per tensor quantization
      scale (float): quantization parameter for affine quantization
      zero_point (int): quantization parameter for affine quantization
      quant_dtype (torch.dtype): desired quantized dtype for determining and validating quant_min and quant_max values.
      quant_min (Optional[int]): minimum quantized value for output Tensor, if not specified, it will be derived from dtype
      quant_max (Optional[int]): maximum quantized value for output Tensor, if not specified, it will be derived from dtype
      zero_point_domain (ZeroPointDomain): the domain that zero_point is in, should be either integer or float
        if zero_point is in integer domain, zero point is added to the quantized integer value during
        quantization
        if zero_point is in floating point domain, zero point is subtracted from the floating point (unquantized)
        value during quantization
        default is ZeroPointDomain.INT
    N/Please use ZeroPointDomain.NONE instead of None8zero_point should be None when zero_point_domain is NONE)r   r   r;   _do_fake_quantize_affine)
r   r   r   r   r   r   r   r   _fqs
             r7   r+   r+     sm    >  JKKK	o2	2	2z7MSTTT&	 	GQ Ir6   c           
          |t          d          ||t          d          t          | |||||||          \  }}	t          j        ||k    ||k              }
|	|
fS )ah  
    General fake quantize op for quantization-aware training (QAT).
    This is equivalent to calling `quantize_affine` + `dequantize_affine`
    but without the dtype casts.

    Note: Compared to :func:`~torchao.quantization.quant_primitives._fake_quantize_affine`,
    this consumes more memory and returns an additional outlier mask for
    intermediate quantized values.

    Args:
      Same as :func:`~torchao.quantization.quant_primitives._fake_quantize_affine`.

    Returns:
      A 2-tuple of (
          final fake quantized values,
          outlier mask for intermediate quantized values
      )

    Nr   r   )r   r   rj   logical_and)r   r   r   r   r   r   r   r   qdqmasks              r7   r,   r,   J  s    :  JKKK		"z'=STTT&	 	GQ a9nY@@D:r6   c           	      b   | j         }t          |||          \  }}|t          j        k    rt          }	t
          }
nP|t          j        k    rt          }	t          }
n1|t          j	        k    rt          }	t          }
nt          d|            |	| |||||          } |
|||||||          }||fS )a*  Helper function for fake quantization that returns both intermediate and final values.

    Performs quantization followed by dequantization without dtype casting, returning both
    the intermediate quantized values and the final dequantized values.

    Args:
        input: Input tensor to fake quantize (float32, float16, or bfloat16)
        block_size: Granularity of quantization - size of tensor elements sharing same qparam
        scale: Quantization scale parameter
        zero_point: Quantization zero point parameter (optional)
        quant_dtype: Target quantized dtype for determining quant_min/quant_max
        quant_min: Minimum quantized value, derived from dtype if None
        quant_max: Maximum quantized value, derived from dtype if None
        zero_point_domain: Domain of zero point (INT, FLOAT, or NONE)

    Returns:
        Tuple of (intermediate quantized values, final dequantized values)

    Helper function for `_fake_quantize_affine` that returns both the
    intermediate quantized values and the final dequantized values.
    z Unrecognized zero point domain: r   )rz   r   r   r9   r   r   r:   r   r   r;   r   r   r   )r   r   r   r   r   r   r   r   r   r   r   r   r   s                r7   r   r   y  s    > +K3KIVVIyO///9>	o3	3	3BG	o2	2	2GLO<MOOPPP	 	A 
		 
 
 
B r7Nr6   Fmapping_typetarget_dtypeepsscale_dtypezero_point_dtypekeepdimc
                 <    t          | |j        ||||||||	
  
        S )a  
    Args:
        input (torch.Tensor): fp32, bf16, fp16 input Tensor
        mapping_type (MappingType): determines how the qparams are calculated, symmetric or asymmetric
        block_size: (Tuple[int]): granularity of quantization, this means the size of the tensor elements that's sharing the same qparam
          e.g. when size is the same as the input tensor dimension, we are using per tensor quantization
        target_dtype (torch.dtype): dtype for target quantized Tensor
        quant_min (Optional[int]): minimum quantized value for target quantized Tensor
        quant_max (Optioanl[int]): maximum quantized value for target quantized Tensor
        eps (Optional[float]): minimum scale, if not provided, default to eps of input.dtype
        scale_dtype (torch.dtype): dtype for scale Tensor
        zero_point_dtype (torch.dtype): dtype for zero_point Tensor, defaults to torch.int32
        Now removed params:
            zero_point_domain (ZeroPointDomain): the domain that zero_point is in, defaults to Integer or None
            preserve_zero (bool): whether to preserve zero in the quantized Tensor, defaults to True

    Output:
        Tuple of scales and zero_points Tensor with requested dtype
    )_choose_qparams_affinename)
r   r   r   r   r   r   r   r   r   r   s
             r7   r   r     s8    @ "  r6   c	                    t          |||          \  }}|t          j        u sJ d|             || j        }|t	          j        | j                  j        }t          |          |                                 k    s"J d|                                  d|             t          || 
                                          \  }	}
|                     |	          } t	          j        | |
d          }t	          j        | |
d          }|}|}||z
  t          ||z
            z  }t	          j        ||          }||z   dz   d	z  }|||z  z   }|| j        }|                    |
          }|                    || j                  |fS )a  
    Specialized version of choose_qparams_affine

    This is used for tinygemm int4mm kernel where zero point is in floating point domain
    and zero does not have to be exactly representable.

    Args:
        input (torch.Tensor): fp32, bf16, fp16 input Tensor
        mapping_type (MappingType): determines how the qparams are calculated, symmetric or asymmetric
        block_size: (Tuple[int]): granularity of quantization, this means the size of the tensor elements that's sharing the same qparam
        target_dtype (torch.dtype): dtype for target quantized Tensor
        quant_min (Optional[int]): minimum quantized value for target quantized Tensor
        quant_max (Optioanl[int]): maximum quantized value for target quantized Tensor
        eps (Optional[float]): minimum scale, if not provided, default to eps of input.dtype
        scale_dtype (torch.dtype): dtype for scale Tensor
        zero_point_dtype (torch.dtype): dtype for zero_point Tensor

    Output:
        Tuple of scales and zero_points Tensor with requested dtype
    Unsupported mapping type: Nr   r   Fr   r   r~   rH   rI   rz   rz   device)r   r   r4   rz   rj   r}   r   r   r   r   r   r   aminamaxfloatr   rx   r   )r   r   r   r   r   r   r   r   r   r   r   r   max_valmin_val_negmax_val_posr   r   r   s                     r7   r   r     s   @ 4L)YWWIy;11113\33 211 k
{k%+&&*z??eiikk)))@@@J@@ *)) +@EJJLL+ +' JJ*++EjNEBBBGjNEBBBG KK;&%	I0E*F*FFEK3'''E Y&*a/Iuy00J ;%566J88+el8;;ZGGr6   c	                 d   t          |||          \  }}|t          j        k    sJ d|             || j        }|t	          j        | j                  j        }t          |          |                                 k    s"J d|                                  d|             t          || 
                                          \  }	}
|                     |	          } t	          j        | |
d          }t	          j        | |
d          }|}|}||z
  t          ||z
            z  }t	          j        ||          }|t                               ||z            z
  }t	          j        |||          }|t          j        }|                    || j                  |                    |	          fS )
a  Specialized version of choose_qparams_affine with zero_point_domain=ZeroPointDomain.INT and preserve_zero=False.

    Args:
        input (torch.Tensor): fp32, bf16, fp16 input Tensor
        mapping_type (MappingType): determines how the qparams are calculated, asymmetric only
        block_size: (Tuple[int]): granularity of quantization, this means the size of the tensor elements that's sharing the same qparam
        target_dtype (torch.dtype): dtype for target quantized Tensor
        quant_min (Optional[int]): minimum quantized value for target quantized Tensor
        quant_max (Optioanl[int]): maximum quantized value for target quantized Tensor
        eps (Optional[float]): minimum scale, if not provided, default to eps of input.dtype
        scale_dtype (torch.dtype): dtype for scale Tensor
        zero_point_dtype (torch.dtype): dtype for zero_point Tensor
        Now removed params default values:
            zero_point_domain (ZeroPointDomain): the domain that zero_point is in, defaults to Integer
            preserve_zero (bool): whether to preserve zero in the quantized Tensor, defaults to False

    Output:
        Tuple of scales and zero_points Tensor with requested dtype
    r   Nr   r   Fr   r   r   r   )r   r   r4   rz   rj   r}   r   r   r   r   r   r   r   r   r   r   re   r   int32rx   r   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   s                    r7   r   r   0  s   < 4L)YWWIy;11113\33 211 k
{k%+&&*z??eiikk)))@@@J@@ *)) +@EJJLL+ +' JJ*++EjNEBBBGjNEBBBG KK;&%	I0E*F*FFEK3'''EV\\+*=>>>JZI>>J ;88+el8;;Z]] >K > >  r6   Tr   r   preserve_zeroc                    |t          d          t          |||          \  }}|t          j        t          j        t          j        fv sJ d|             | |
J d            | j        |j        k    s
J d            || j        }|t          j        | j                  j	        }| j
        }|
rOt          j        | t          j        |                     }t          j        |t          j        |                    }n| }|}|t          j        k    s|t          j        k    r|t          j        k    r/t          j        | |          }|t          ||z
            dz  z  }nR|t          j        k    sJ |t          |          z  }|t          |          z  }||k    }t          j        |||          }|
st          d          |t           j        k    rt          d          |t           j        k    rd}n+t          j        |t)          ||z   d	z   dz                      }t          j        ||
          }n|t          j        k    sJ ||z
  t          j        t          ||z
            ||          z  }t          j        ||
          }|t           j        k    rd}n|t           j        k    rE|t0                              ||z            z
  }t          j        |||          }|	t          j        }	n-|t           j        k    s
J d            ||z   d	z   dz  }|||z  z   }||                    |	          }|                    || j
                  |fS )al  A variant of :func:`~torchao.quantization.quant_primitives.choose_qparams_affine`
    operator that pass in min_val and max_val directly instead of deriving these from a single input.
    This is used for observers in static quantization where min_val and max_val may be obtained through
    tracking all the data in calibration data set.

    Args:
      Mostly same as :func:`~torchao.quantization.quant_primitives.choose_qparams_affine`. with one
      difference: instead of passing in `input` Tensor and use that to calculate min_val/max_val
      and then scale/zero_point, we pass in min_val/max_val directly
    Nr   r   z@Need to provide `min_val` and `max_val`, got: {min_val, max_val}z]Expecting `min_val` and `max_val` to have the same dtype, got: {min_val.dtype, max_val.dtype}rI   zBpreserve_zero == False is not supported for symmetric quantizationzbzero_point_domain should be ZeroPointDomain.INT or ZeroPointDomain.NONE for symmetric quantizationrH   r   r   zGzero_point must be in FLOAT/INT/None domain for asymmetric quantizationr   )r   r   r   r2   r3   r4   rz   rj   r}   r   r   r~   
zeros_liker   r   wherer   r:   r;   	full_likeintr   tensorr9   re   r   r   rx   )r   r   r   r   r   r   r   r   r   r   r   r   scale_devicer   r   r   sminsmaxr   r   r   s                        r7   r   r   t  s   0  JKKK3L)YWWIy-    3L22	   7#6#6J $7#66 =GM)))g *)) m
{k'-((,>L i)9')B)BCCi)9')B)BCC 	---;@@@ ;000)[L+>>K5Y)>#?#?!#CDEE;#HHHHH y!1!11Dy!1!11D$;DKdD11E 	T    555 t    444JJY5JQ5NRS4S0T0TUUJEs+++{55555{*el)i'((L/
 /
 /
 
 Es+++ 444JJ/"555"V\\+2E%F%FFJZIFFJ'#(; $(====Y >== #Y.2a7I %uy'88J]])9]::
88+gn8==zIIr6   c
                 |   t          |||          \  }}|t          j        j        t          j        j        t          j        j        fv sJ d|             || j        }|t          j        | j                  j	        }t          |          |                                 k    s"J d|                                  d|             t          ||                                           \  }
}|                     |
          } t          j        | ||	          }t          j        | ||	          }t          j        |t          j        |                    }t          j        |t          j        |                    }|t          j        j        k    s|t          j        j        k    r|t          j        j        k    r/t          j        | |          }|t)          ||z
            dz  z  }nW|t          j        j        k    sJ |t)          |          z  }|t)          |          z  }||k    }t          j        |||          }t          j        |t/          ||z   dz   dz                      }t          j        ||          }n|t          j        j        k    sJ ||z
  t)          ||z
            z  }t          j        ||          }|t2                              ||z            z
  }t          j        |||          }|t          j        }|                    || j        	          |                    |
          fS )ao  op definition that has compatible signatures with custom op library

    The op does the following:
    1. figure out the dimension for reduction based on block_size
    2. find min_val/max_val based on the dimension for reduction
    3. calculate quantization parameters based on min_val/max_val based on args like `preserve_zero`
       and `zero_point_domain`
    r   Nr   r   r   rI   rH   r   r   r   )r   r   r2   r   r3   r4   rz   rj   r}   r   r   r   r   r   r   r   r   r~   r   r   r   r   r   r   r   re   r   r   rx   r   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   s                        r7   r   r     s)   * 4L)YWWIy"-2#    3L22	   k
{k%+&&*z??eiikk)))@@@J@@ *)) +@EJJLL+ +' JJ*++EjNGDDDGjNGDDDG)GU%5g%>%>??K)GU%5g%>%>??K 	-222;@EEE ;0555)[L+>>K5Y)>#?#?!#CDEE;#H#MMMMM y!1!11Dy!1!11D$;DKdD11E_UCY1F1Ja0O,P,PQQ
Es+++{5:::::{*eI	4I.J.JJEs+++kE.A!B!BB
[Y	BB
#${88+el8;;Z]] >K > >  r6   wnum_bits
group_sizec                 v   |dk    sJ d|             | j         \  |ddfv sJ d|             | j        }|dk    r}|k     r|                     d|f          } d|z  dz
  }|dz   dz  }t          j        t          j        |           dd	          }|d|z  z  }t                              | |z                                            }||z  }t          j	        |d
|          }||z
  
                                |z  }fd}	 |	|          } |	|          }t          j        t          j        |          dd	          }
|
dz  }
||
z                                  	                    dd                              t          j                  }|
                                |
z  }|
                    dd                              t          j                  }
|                    d                                          |
z                      t          j
                  }nd|dz
  z  dz
  }t          j        t          j        |           dd	          }
|
|z  }
t                              | |
z                                            }t          j	        || |          }|
                                |
z  }t          j        g t          j
        |          }|
dd|z
  z  z  }
|
                    d                                                              t          j                  }
|||
|fS )NrK   Unsupported num_bits = rT      Unsupported groupsize = rI   rH   T)r   r   c                 Z    |                      f                                          } | S ri   reshape
contiguousr   size_ksize_ns    r7   	reshape_wz:_choose_qparams_and_quantize_affine_qqq.<locals>.reshape_w^  *    		66*++6688AHr6   g     _@rE   rF   r   r   rO   )r   r   r  rj   r   absre   r   r   r   halfrk   rx   int8r   r  r   )r   r   r   orig_device	max_q_val
half_q_vals_groupq_ww_refr
  	s_channelt_int8r  r	  s               @@r7   r   r   ?  sA   
 q===>H>>===WNFF"c6****,Sz,S,S***(KR
FIIr:&''xK!O	!m)
 *UYq\\2t<<<1y=  ll1w;''++--zk#q),,z!''))G3	 	 	 	 	 	 inn	%   Juy//TBBB	U	)#**,,224==@@LL	)%%b!,,//ek/BB	 ??62..99;;iGKK* L 
 
 (Q,'!+	 Juy||R>>>	Y	 ll1y=))--//k#	z955

Y&,rKHHHQ1x<((	%%fb11<<>>AA%+NN	E))r6   c                    | j         }t          ||                                           \  }}|                     |          } t	          j        | |d          }t	          j        | |d          }d}d}	|t          ||	z
            dz  z  }
||z
  t          ||	z
            z  }
|}t          |d         z  dk    sJ dt          |d         z  f}t          ||
                                          \  }}|
                    |          }
|                    |          }|	                                }|D ]}d||<   t	          j        t	          j
        |
          |d          }t	          j        t	          j
        |          |d          }d}d}|t          ||z
            z  }|t          ||z
            z  }|                    |          }|                    |          }t	          j        |
|z  ||          }t	          j        ||z  ||          }|                    |          |                    |          |                    |          |                    |          fS )	a  
    There are two sets of qparams: quantized_block_scale, quantized_block_min and super_block_scale_scale and super_block_min_scale
    the relationship is the following:
    block_scale = quantized_block_scale * super_block_sclae
    block_min = quantized_block_min * super_block_min
    quantized_val = (float_val - block_min) / block_scale + quant_min
    first we calculate block_scale and block_min
    then we calculate super_block_scale_scale and super_block_min_scale
    after that we can calculate quantized_block_scale and quantized_min_scale
    the returned values are: super_block_scale_scale, super_block_min_scale, quantized_block_scale
    and quantized_min_scale
    Fr   rX   r   rI   rT   rH   rZ   )rz   r   r   r   rj   r   r   r   
_GGUF_QK_Kr   r  r   rx   )r   r   r   rz   r   r   r   r   r   r   block_scale	block_minsuper_block_sizer   r`   block_scale_absmaxblock_min_absmaxqparam_quant_maxqparam_quant_minsuper_block_scale_scalesuper_block_min_scalesuper_block_scale_scale_viewsuper_block_min_scale_viewquantized_block_scalequantized_block_mins                            r7   r   r     s   " KE +@EJJLL+ +' JJ*++EjNEBBBGjNEBBBGIIU9y#899A=>KW$i).C(D(DDKI 
2&!++++:B78*?+**,,+ +' ""#677K233I/4466 % %#$a  	+NE   z	).%    05++4 4  -u++0 0  $;#?#?@U#V#V !6!;!;<Q!R!R "K224DFV   +..0@BR  	 ""5))  ''  ''u%%	 r6   r   r!  r$  r%  c                 2   |t           j        k    sJ t          ||                                           \  }}|                                }	|D ]}
d|	|
<   | j        }|                     |          } |                    |	          }|                    |	          }dt          |d         z  df}t          ||                                          \  }}|                                }|D ]}
d||
<   |                    |          }|                    |          }|                    |          }|                    |          }||z  }||z  }|                    |	          }|                    |	          }| |z
  |z  }|                    |          }|S NrH   rT   )rj   uint4r   r   r   r   r   r  )r   r   r   r   r!  r$  r%  input_shape_for_reductionr   "block_qparam_shape_after_reductionr`   r   r  %super_block_input_shape_for_reduction(super_block_qparam_shape_after_reductionr  r  int_datas                     r7   r$   r$     s    5;&&&& 1FEJJLL1 1-~ *C)G)G)I)I& 2 201*1--[NJJ011E166*  .223UVV :B7;<Q/4466= =9)> 	.2244 -  8 867033166-  .22-  6::0  2660  *,AAK%(;;I ""#EFFKABBI	![0H}}^,,HOr6   c                 <   t          ||                                           \  }}	|                                }
|	D ]}d|
|<   | j        }|                     |          } |                    |
          }|                    |
          }dt
          |d         z  df}t          ||                                          \  }}	|                                }|	D ]}d||<   |                    |          }|                    |          }|                    |          }|                    |          }||z  }||z  }|                    |
          }|                    |
          }| |z  |z   }|                    |          }||                    |          }|S r'  )r   r   r   r   r   r  rx   )r   r   r   r   r!  r$  r%  r   r)  r   r*  r`   r   r  r+  r,  r  r  r   s                      r7   r*   r*     s    1FEJJLL1 1-~ *C)G)G)I)I& 2 201*1--[NJJ011E166*  .223UVV :B7;<Q/4466= =9)> 	.2244 -  8 867033166-  .22-  6::0  2660  *,AAK%(;;I ""#EFFKABBIk!I-Gll>**G**\**Nr6   r  r  c                 6  
 |dk    sJ d|             | j         \  
|dd
fv sJ d|             |dk    r
}|
k     r~|                     d|f          } d|z  dz
  }|dz   dz  }||                                z  }| |z
                                  |                    dd          z  }
fd}	 |	|          }n"|dd	|z
  z  z  }|                                 |z  }| |                    t          j                  }n|                    |          }|S )
NrK   r   rT   r  r  rI   rH   c                 Z    |                      f                                          } | S ri   r  r  s    r7   r
  z)_dequantize_affine_qqq.<locals>.reshape_wn  r  r6   rO   )r   r  r  rx   rj   r   )r   r  r  r   r   r   r  r  w_dqr
  r  r	  s             @@r7   r(   r(   U  se    q===>H>>===WNFF"c6****,Sz,S,S***R
FIIr:&''xK!O	!m)
INN,,,J$$&&Q)?)??	 	 	 	 	 	 y q8|!45	vvxx)#wwu}%%ww|$$Kr6   rf   betalp_normc           
         |dk    rQt          j        |           t           j        j                            t          j        |           d|z  z
            z  S t          j        |           t           j        j                            t          j        |           d|z  t          j        t          j        |           |dz
            z  z
            z  S )NrH   r   )rj   signnn
functionalrelur  pow)rf   r2  r3  s      r7   _shrink_lp_opr:    s    !||z!}}ux277	!sTz8QRRRRz!}}ux277IaLLC$J%)EIaLL'A+*N*NNN 
  
 
 	
r6   gffffff?g      $@g)\(?   )r3  r2  kappaiters
early_stopr   zeromin_maxaxisrz   r   verbose
opt_paramsc	                    |d         |d         |d         |d         |d         f\  }	}
}}}|| j         nt          j         |          }|#|j        dk    rt          j        nt          j        }|                     ||          }|                    ||          }|                    ||          }d}t          |          D ]}t          j        ||z  |z                                 |d	         |d
                   }||z
  |z  }t          ||z
  |
|	          }t          j
        |||z
  |z  z
  |d          }|
|z  }
t          t          j        ||z
            
                                          }|r3t          dt          |d
z             z   dt          |          z              |r||k     r|} n|                    | j                   }|                    | j                   }~~~~t          j                                         t          j        | |z  |z                                 |d	         |d
                   }|||fS )Nr3  r2  r<  r=  r>  cudar   g     @r   rH   TrA  r   zIter z
 | Error: )r   rj   typer   r   rx   r   rk   r   r:  meanr   r  printstrrE  empty_cache)r   r   r?  r@  rA  rz   r   rB  rC  r3  r2  r<  r=  r>  W_f
best_errorr`   W_qW_rW_ecurrent_errors                        r7    optimize_weights_proximal_legacyrR    sR   & 	9677< /+GT5%  &~V]]EL4H4HF}"(+"7"7em
))%)
/
/CHH5H00E77v7..DJ5\\  k#+,--33GAJ
KKTzU"C#ItW55z#se 33$MMMeic	227799:: 	K'CAJJ&s=7I7I(IJJJ 	z))*

		 HHV]##E776=!!DS#s	J
+funt+
,
,
2
271:wqz
J
JCtr6   val1val2c                 X    t          |t          j        | |z            z            | k    S ri   )r   mathceil)rS  rT  s     r7   _is_divisiblerX    s(    tdit,,,--55r6   rN  nbitsr   c                     d}d|z  dz
  }||z   dz   dz  }||                                 z
  |                                 z                      |j                  }|}	|                     |          }
|
|	|fS )Nr   rI   rH   )r   rx   rz   r   )rN  r   r?  rY  r   r   r   r   zero_aoscale_aoW_q_aos              r7   "_convert_to_affinequantized_formatr^    sz     I51IY&*a/IDJJLL(EKKMM9==djIIGHXXe__F8W$$r6   @   rE  optimizecompute_dtype
raw_outputoptimize_weightsc
           	         |dv s
J d            |Tt          |                                 |          s2J dt          | j                  z   dz   t          |          z               |                     |t
          j                  }
|
j        }|4|dk    r|
                    d|g          n|
                    |dg          }
|
                    |d	
          d         }|
	                    |d	
          d         }t          d|z  dz
            }d}||g}|||z
  z                      d          }| |z  }|dv rt                              |          }|r |	|
||||||          \  }}}nj|                    |          }|                    |          }t                              |
|z  |z                                 |d         |d                   }d|z  }|du rt          |||||          \  }}}n|                    |          }|dk    r9|                    |d         d          }|                    |d         d          }n8|                    d|d                   }|                    d|d                   }|                    t
          j        |          }|                    ||          }|                    ||          }~
~~t
          j                                         ||||fS )a  Choose quantization parameters and quantize tensor using HQQ (Half-Quadratic Quantization).

    Performs quantization using HQQ method with optional weight optimization via proximal solver.

    Args:
        tensor: Input tensor to quantize (float32, float16, or bfloat16)
        nbits: Number of bits for quantization (default: 4)
        group_size: Size of quantization groups (default: 64)
        optimize: Whether to optimize weights using proximal solver (default: True)
        axis: Axis along which to perform quantization (0 or 1, default: 1)
        compute_dtype: Target compute dtype (default: torch.float16)
        device: Target device for computation (default: "cuda")
        verbose: Whether to print optimization error information (default: False)
        raw_output: If True, return params in HQQ library format (default: False)
        optimize_weights: Weight optimization function (default: optimize_weights_proximal_legacy)

    Returns:
        Tuple of (quantized_weights, scale, zero_point, original_shape)

    Note:
        Uses proximal solver to minimize ||W - dequantize(quantize(W))||_p^p for weight optimization.
    r\   zaxis should be either 0 or 1NzEgroup_size should be divisble by the total tensor dimensions. shape: , group_size: r   rz   rH   rT   TrF  r   rI   g     @)r   )rK   )r   r   r?  r@  rA  r   rB  r   Fr   )rX  r   rJ  r   rx   rj   r   r  r~   r   rk   r   re   r   r^  r   rE  rK  )r   rY  r   r`  rA  ra  r   rB  rb  rc  Wr   _min_maxmax_vmin_vr@  r   r?  rN  s                       r7   r   r     s.   D 6>>>9>>>V\\^^Z88 	
 	
S&,  *oo	
 	
8 			u}	55AGE ,0AIIAIIr:&'''AIIzSUFV<W<W 55dD5))!,D55dD5))!,D!U(Q,EEenG dTk"))c)22E55=D ||||D!!  K++
 
 
UDD ww}%%''ll1u9t+,,2271:wqzJJ %KE U=eU
 
UDD kk%  199MM%(B//E<<a"--DDMM"eBi00E<<E"I..D
&&u{6&
2
2CHH=H88E77v766D 	
4	JtU""r6   gh㈵>)r=  
stochasticearly_stop_tol	hp_tensorqminqmaxr=  rl  rm  c                F   | j         dk    s
J d            t          |t          t          f          rt	          |          dk    s
J d            |d         dk    r|d         dk    s
J d            ||k     s
J d            t
          j        }t          j        |          j        }| j	        \  }	}
t          |d                   }|
|z  dk    sJ d|
 d	|             d
t
          j        dt
          j        fd}d
t
          j        dt
          j        fd}|r|n|}|                     |                                          }|
|z  }|                    |	||          }t          t!          |          t!          |                    pd}|                                                    d          |z                      |          }|                                }t)          t          d|                    D ]} |||                    d          z                                ||          }||z                      dt
          j                  }||z                      dt
          j                  }t          j        |dk    ||z  |          }|                    |                                          }||z
                                  |                    |          z                                  }||k     r n|} |||                    d          z                                ||          }|                    |	|
                                                              t
          j                  }| j        }|                    |          }||fS )a  
    Half-Quadratic Quantization (scale-only, symmetric) for 2D weights with row-wise blocks.
    - hp_tensor: [out, in] (bf16/fp16/fp32 accepted; promoted to fp32 internally)
    - block_size: must be [1, group_size]; groups along the last dim
    - qmin, qmax: integer range (e.g., -8, 7 for signed 4-bit)
    Returns:
      qdata: int32, same shape as hp_tensor
      scale: hp_tensor.dtype, shape [out, in // group_size] (one scale per row-wise block)
    rI   zhp_tensor must be 2D [out, in]z)block_size must be a 2-element list/tupler   rH   z7block_size must be [1, group_size] with group_size >= 1zqmin must be < qmaxzin_features=z! must be divisible by group_size=rf   rg   c                 *    |                                  S ri   )rk   rf   s    r7   	round_detz>_choose_qparams_and_quantize_scale_only_hqq.<locals>.round_det}  s    wwyyr6   c                 T    t          j        | t          j        |           z             S ri   )rj   floor	rand_likers  s    r7   round_stochz@_choose_qparams_and_quantize_scale_only_hqq.<locals>.round_stoch  s!    {1uq111222r6   r   rT   )r   rz   )ndim
isinstancelisttupler   rj   r   r}   r   r   r   rs   rx   r  r   r   r  r   	clamp_mincloner   	unsqueezer   sumr   r   rz   )rn  r   ro  rp  r=  rl  rm  ra  compute_epsnkr   rt  rx  _rrg  n_groupsWgqabsr   
prev_scaler   Qgnumdenrelqdata	out_dtypes                               r7   r   r   U  s   * >Q @j4-00 S__5I5I5I3 6J5II a=A*Q-1"4"4"4A #5"44 $;;;-;;; MM+m,,0K?DAqZ]##Jz>QGqGG:GG U\ el    3u| 3 3 3 3 3 #	1	B 	]##..00AJH	
8Z	(	(B s4yy#d))$$)DVVXX]]q]!!D(33K@@EJ 3q%==!!   RU__R((())//d;; Bwmmm77Bwmmm77C!GS3Y
;;
 

#%% 	 
"''))J,@,@,M,MMRRTTE

 
B$$$	%	%	+	+D$	7	7B GGAqMM$$&&))%+66EIHHYE%<r6   rW   niterc                    |7t          |                                 |          sJ d| j         d|             |                     |          }|j        }|                    d|          }t          |                    d                                                                          |                    d                                                                                    }t          |d	          }|	                                }	t          j        |j        d         |j        |
          }
t          j        |j        d         |j        |
          }t          |          D ]}|	                    d          |z  }t          j        |d	          }|	|                    d          z  }	|
|z  }
|	                    d          |z  }t          j        |d	          }|	|                    d          z  }	||z  }|	                                                    dd          t%          |          z                      d	          }t(                              |	|z                                ||          }|                    |                                                              t          j                  }|                    d          |z                      |d         d                              |          }|d         |z  }|
                    |          d|d                                      |          }|||fS )a  
    SINQ: Sinkhorn-Normalized Quantization (https://www.arxiv.org/abs/2509.22944)

    Iteratively normalizes row and column standard deviations to minimize
    matrix imbalance before quantization with dual scales.

    Args:
        tensor: Input weight tensor
        group_size: Quantization group size (default: 64)
        niter: Number of Sinkhorn iterations (default: 20)
        compute_dtype: Target compute dtype (default: torch.float16)

    Returns:
        Tuple of (qdata, scale_row, scale_col)
    Nz/group_size must divide tensor elements. shape: re  r   rT   r   ry  rH   g:0yE>rf  r   Tr   )rX  r   r   rx   r  r~   stditemr   r  rj   onesr   r   r   r  r  r   r   r~  re   r   r   r  r  repeat)r   ro  rp  r   r  ra  rg  r   q_minW_hatscale_col_sinkhornscale_row_sinkhornr   q_colq_rowscale_sQr  	scale_row
num_groups	scale_cols                        r7   r   r     s   . V\\^^Z88 	
 	
fflffZdff	
 	
8 				&&AGE 	
		"j!!A !  ""''))155Q5<<+;+;+=+=+B+B+D+DEEEtEGGIIEAGAJqx}UUUAGAJqx}UUU5\\ 8 8		a	  5(Et,,,***/%7 		a	  5(Et,,,***/%7 yy{{At44uT{{BMMdSSGUW_%%++D$77A FF5MM$$&&))%*55E 
b		.	.44U1XrBBEEmTT  qZ'J"))*55jajADD]SSI)Y&&r6   ebitsmbitsc                 R   t           |dz
           }dt           |         |z
  z  t           |dz            d|z  z  z  }| j        }|                                 } |                                                     d                              d          |z  }|                    |          S )a3  Choose quantization parameters for floatx quantization.

    Calculates scale parameter for quantizing to custom floating point format.

    Args:
        tensor: Input tensor to quantize (float32, float16, or bfloat16)
        ebits: Number of exponent bits in target floatx format
        mbits: Number of mantissa bits in target floatx format

    Returns:
        Scale tensor for floatx quantization

    Note:
        Uses global lookup table as workaround for torch.compile() compatibility
        since _n_ones() is not compatible due to << operator.
    rH   rI   g-q=r   )_ONES_TABLErz   r   r  r   r   rx   )r   r  r  exp_bias
max_normalrz   r   s          r7   r   r     s    2 519%H{5)H45EAI!U(+J LE\\^^FJJLLa  &&5&11J>E88E??r6   c                 ~    |                                  } t          | |                    dd          z  ||          }|S )a  Quantizes the float32 high precision floating point tensor to low precision floating point number and
    converts the result to unpacked floating point format with the format of 00SEEEMM (for fp6_e3m2) where S means sign bit, e means exponent bit and m means mantissa bit
    rT   rH   )r   r   r   )r   r   r  r  tensor_floatxs        r7   r"   r"   !	  s;     \\^^F+FUZZA5F5F,FuUUMr6   c                     t          | ||          } | |                                                    dd          z  } |                     |          } | S )NrT   rH   r   )r   r   r   rx   )r   r   r  r  r   s        r7   r'   r'   ,	  sO     %VUE::Fekkmm((Q///FYY\Y**FMr6   rv   hp_value_lbhp_value_ubc                 H   t          j        |          j        }t                    dk    rG|                                                                 }||t          j        |||          }||z  }nt          | j                  \  }	}
|                     |	          }|                                	                    |
d          }||t          j        |||          }||z  }fdt          | j                  D             }|                    |          }|t           j        urV|t           j        u s
J d            t          j        t                              t          j        |                              }|                    t           j                  S )	a  
    Calculates float8 scaling factor for the given high precision tensor.

    Args:
        tensor (torch.Tensor): Input tensor to be quantized.
        float8_dtype (torch.dtype): Data type of the quantized tensor (e.g., torch.float8_e4m3fn, torch.float8_e5m2).
        scale_dtype (torch.dtype): Data type of the scaling factor (e.g., torch.float32).
        block_size (Optional[Tuple[int, ...]]): Block size for block-wise quantization. If None, tensorwise quantization is used.
        hp_value_lb (Optional[float]): the lower bound for high precision floating point value for calculating scale
        hp_value_ub (Optional[float]): the upper bound for high precision floating point value for calculating scale
    r   Nr~   r   Tr   c                 ,    g | ]\  }}||         z  S r5   r5   )r_   r`   r   r   s      r7   ra   z(_choose_scale_float8.<locals>.<listcomp>_	  s3     
 
 
,9AzJ*Q-'
 
 
r6   z!Only float8_e8m0fnuz is supportedr   )rj   r}   r   r   r  r   r   r   r   r   	enumerater  r   float8_e8m0fnuexp2re   r   log2rx   )r   r   rv   r   r  r  r   max_absr   r   r   tensor_reshapedoutput_shapes    `           r7   r   r   9	  s   ( L))-I
:!**,,""$$"k&=k'{LLLG)#.C/
 /
+^ !++&9::!%%'',,,NN"k&=k'{LLLG)#
 
 
 
=Fv|=T=T
 
 
 l++%-''e22224W222
6<<
5(9(9::;;88%-8(((r6   target_shapec                      j         k    r S                                  dk    r S t          d t           j                   D                       r S t	           j                   t	                    k    r4t          dt	           j                    dt	                               t           fdt          t	                              D                       }t          t           j         |                    D ]1\  }\  }}}|||z  k    rt          d| d| d| d	||z   d
	          2 }t          |          D ]"\  }}|dk    r|	                    ||          }#|S )a  
    Expand a scale tensor to match the target tensor shape for block-wise quantization.
    If this is rowwise quantization, however, just return the scale as is.

    Args:
        scale (torch.Tensor): Scale tensor with shape corresponding to block structure
        target_shape (torch.Size): Target tensor shape to expand to

    Returns:
        torch.Tensor: Scale tensor expanded to match target_shape
    rH   c              3   4   K   | ]\  }}||k    p|d k    V  dS )rH   Nr5   )r_   abs      r7   	<genexpr>z6_maybe_expand_scale_to_tensor_shape.<locals>.<genexpr>	  s3      
G
G116Q!V
G
G
G
G
G
Gr6   zScale tensor has z dimensions but target has c              3   D   K   | ]}|         j         |         z  V  d S ri   )r   )r_   r`   r   r  s     r7   r  z6_maybe_expand_scale_to_tensor_shape.<locals>.<genexpr>	  sC        ./Q5;q>)     r6   z
Dimension z: target size z' is not evenly divisible by scale size z (block size would be )ry  )
r   r   allzipr   r   r}  r   r  repeat_interleave)r   r  block_sizesr`   
target_dim	scale_dimr   expanded_scales   ``      r7   #_maybe_expand_scale_to_tensor_shaper  k	  s    {l""{{}} 
G
GEK(F(F
G
G
GGG  5;3|,,,,`EK 0 0``SQ]M^M^``
 
 	
      38\9J9J3K3K    K
 3<L%+{333 3  ..J	: Z///\Q \ \j \ \!*\ \BLyBX\ \ \   0 N";// Q Q:>>+==ja=PPNr6   c                    |                      t          j                  }t          || j                  }||z  }t          j        |          j        }|                    | |          }t          	                    ||          S )p
    Quantizes the high precision floating point tensor to a float8 tensor, using the given scaling factor.
    r  )
rx   rj   r   r  r   r}   r   r   ru   r   )r   r   rv   tensor_fp32scale_expandedtensor_scaled	max_valuetensor_clampeds           r7   r#   r#   	  sv     ))EM**K 9MMN.0ML))-I"((iZY(GGN===r6   c                     |                      t          j                  }t          || j                  }||z  }|                     |          S )A
    Dequantizes the float8 tensor to high precision tensor.
    )rx   rj   r   r  r   )r   r   r   
fp8_tensorr  rn  s         r7   r)   r)   	  sF     5=))J 9MMN^+I<<%%%r6   c                 &    t          | ||          S )r  r   r   rv   )r#   r  s      r7   &_quantize_affine_float8_non_decomposedr  	  s$     #!   r6   %quantize_affine_float8_non_decomposedc                 .    t          j        | |          S Nr   rj   
empty_liker  s      r7   _quantize_affine_float8_metar  	       F,7777r6   c                 &    t          | ||          S )r  r   r   r   )r)   r  s      r7   (_dequantize_affine_float8_non_decomposedr  	  s$     %!   r6   'dequantize_affine_float8_non_decomposedc                 .    t          j        | |          S r  r  r  s      r7   _dequantize_affine_float8_metar  	  r  r6   )NN)NNNNN)NNNNNFri   )rV  enumr   r   typingr   r   r   r   r	   r
   rj   !torchao.prototype.custom_fp_utilsr   r   r   torchao.utilsr   r   __all__r   r   r   serializationadd_safe_globalsfloat8_e4m3fnfloat8_e5m2float8_e4m3fnuzfloat8_e5m2fnuzr|   r   r  int16r   rG   rz   r   __annotations__r=   r>   r?   r@   rA   rB   rC   rR   rS   r[   uint1uint2uint3r(  uint5uint6uint7updateint1int2int3int4int5int6int7keysr  r   r  libraryLibrary	quant_libregister_custom_opautogradFunctionre   ru   r   r   no_gradrs   r   r   boolr   r   r!   r   r    r   r   r   r   r   r   r%   r   r&   r9   r+   r,   r   r   r   r   r   rJ  r   r   r   r$   r*   r(   r:  inference_moder|  dictr}  rR  rX  Sizer^  r   r   r   r   r   r"   r'   r   r  r#   r)   r  r  r  r  r5   r6   r7   <module>r     s            ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?          
       
  B    $   0
 
 
 
 
d 
 
 
    4       $ $k?%C D D D 
					
 
K	J	K&	K&	T eEK$=>c3hOP    qqqqqqq	K	J	K	KP T%\ 9:E#s(OKL    RT tE%+|";<eCHoMN S S S*******Q d5l!:;U38_LM    
K	K	K	K	K	K	K    QQQQQQQ
 
 
   
'
'
'
'
'
'
'
 
 
   
A
A
A
A
A
A
A
 
 
   4 5 5 5   3 4 4 4!!%<%A%A%C%CCCCC
,,5588,,,M!!)Z88	((33     U^$       U^,   &  &  & R&/ &/ &/R  .2-1, ,<,c3h, <, &	,
 +, c5j)*, c5j)*, \, , , ,^  4837) )<)S	) <) &	)
 +) c5$./0) c5$./0) \) ) ) )X=<=S	= <= &	=
 S%Z = S%Z = \= = = =L 4837/ /</S	/ </ &	/
 +/ c5$./0/ c5$./0/ \/ / / /n .2-1= =<=c3h= <= &	=
 c5j)*= c5j)*= \= = = =L 48370 0<0S	0 <0 &	0
 +0 c5$./00 c5$./00 \0 0 0 0p .2-1; ;<;c3h; <; &	;
 c5j)*; c5j)*; \; ; ; ;H .2-1% !&% % %<%c3h% <% &	%
 % c5j)*% c5j)*% +% \% % % %P  4837 %. .<.S	. <. &	.
 . c5$./0. c5$./0. +. \. . . .p !&69 69<69S	69 <69 &	69
 S%Z 69 S%Z 69 +69 \69 69 69 69@ !&29 29<29S	29 <29 &	29
 S%Z 29 S%Z 29 +29 \29 29 29 29v .2-1/ !&/ / /</c3h/ </ &	/
 / c5j)*/ c5j)*/ +/ \/ / / /r !&*9 *9<*9S	*9 <*9 &	*9
 S%Z *9 S%Z *9 +*9 \*9 *9 *9 *9f .2-1/ !&/ / /</c3h/ </ &	/
 / c5j)*/ c5j)*/ +/ \/ / / /p .2-1)8)<- -<-c3h- <- &	-
 - c5j)*- c5j)*- '- \- - - -l .2-1)8)<, ,<,c3h, <, &	,
 , c5j)*, c5j)*, ', 5<%&, , , ,j .2-1)8)<= =<=c3h= <= &	=
 = c5j)*= c5j)*= '= 5<%&= = = =@  .2-1)-.3k* *<** c
* +	*
 c5j)** c5j)** 
%* %+&* u{+* * 5<%&* * * *\  .2-1)-.2CH CH<CHCH c
CH +	CH
 c5j)*CH c5j)*CH 
%CH %+&CH u{+CH 5<%&CH CH CH CHX 4837)-.2@ @<@@ c
@ +	@
 c5$./0@ c5$./0@ 
%@ %+&@ u{+@ 5<%&@ @ @ @T  $#)-.2)8)<tJ tJ\tJ\tJ tJ c3h	tJ
 +tJ }tJ }tJ 
%tJ %+&tJ u{+tJ tJ 'tJ 5<%&tJ tJ tJ tJn  4837)-.2P PEL!PP S	P +	P
 c5$./0P c5$./0P 
%P %+&P u{+P P 5<%&P P P PfC*|C*C* C* 5<u|34	C* C* C* C*LPEL!PS	P +P 5<u|U\AB	P P P Pf;<;S	; +; #\	;
 !<; !<; ; \; ; ; ;L +/< <<<S	< +< #\	<
 !<< !<< < 5;'< \< < < <J +/( (|(\( |( 	(
 ( 5;'( ( ( (\
U\ 
 
 
5< 
 
 
 
  &*# 9 9L9<9 ,9 	9
 9 d"#9 #t)9 9 9  !9 9 9 9z6 63 64 6 6 6 6
%	%<% ,% 	%
 uej()% % % % %& !&!Aq# q#Lq#q# q# 	q#
 q# ;q# q# q# q# q# q# q# q# q#h   Y Y Y|YS	Y Y 	Y Y Y Y 5<%&Y Y Y Y|   !&H' H'LH'
H' H' 	H'
 H' ;H' H' H' H' H'V!L!!$!-0!
\! ! ! !HL!&69BE
\     !&
 
L
<
 
 	

 +
 \
 
 
 
  !& 3$}#'#'.) .)L.)S	.) +.) 	.)
 %.) %.) \.) .) .) .)b5<5',z5
\5 5 5 5v !& 3> >L><> +> \	> > > >, !&& &L&<& +& \	& & & &" Y&& !& 3 L< + \	   '& 9EFF !& 38 8L8<8 +8 \	8 8 8 GF8 Y&& !& L< + \	   '& 9GHH !&8 8L8<8 +8 \	8 8 8 IH8 8 8r6   