
    PiW                        U d dl mZmZmZmZ d dlZd dlmZ d dlm	Z	 d dl
mZmZmZmZmZmZmZmZmZmZmZ d dlmZmZ ddlmZmZmZmZmZmZm Z  g d	Z!d
 Z"da#ee$         e%d<   d Z&d Z'i Z(eee$         eee$         eee$         e)f         f         f         e%d<    G d de          Z* G d d          Z+d3dZ,dej-        dee)         fdZ.d Z/d Z0ej1        fdZ2d Z3ej1        fdZ4ej1        fdZ5ddej6        ej7        ddfd Z8ej6        fd!Z9d" Z:ddej7        fd#Z;ddej7        fd$Z<ddej6        ej7        dfd%Z=	 	 d4d&Z>ddej1        ej?        dfd'Z@ddej1        ej?        fd(ZAej1        ej1        dfd)ej-        d*ejB        d+ejB        d,eeC         dej-        f
d-ZDd. ZEd/ee)d0f         d1edee)d0f         fd2ZFdS )5    )DictListOptionalTupleN)TorchDispatchMode)int_scaled_matmul)MappingTypeZeroPointDomain)_choose_qparams_affine_dont_preserve_zero_choose_qparams_affine_tinygemm _dequantize_affine_no_zero_point_dequantize_affine_tinygemm_quantize_affine_no_zero_point_quantize_affine_tinygemmchoose_qparams_affinedequantize_affinequantize_affine)check_cpu_versioncheck_xpu_version   )GranularityPerAxisPerBlockPerGroupPerRow	PerTensorPerToken)compute_error%_quantize_activation_per_token_absmax$_quant_int8_dynamic_per_token_linear dynamically_quantize_per_channeldequantize_per_tensordequantize_per_channelget_groupwise_affine_qparamspack_tinygemm_scales_and_zeros unpack_tinygemm_scales_and_zeros-groupwise_affine_quantize_tensor_from_qparams/groupwise_affine_dequantize_tensor_from_qparams groupwise_affine_quantize_tensor"groupwise_affine_dequantize_tensorper_token_dynamic_quantget_group_qparams_symmetric"recommended_inductor_config_setterc                     t           j                            |           }t           j                            | |z
            }dt          j        ||z            z  S )N   )torchlinalgnormlog10)xyPsPns       n/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/torchao/quantization/utils.pyr   r   ?   sI    			1		B			1q5	!	!BBG$$$$    _cur_fqnc                       fd}|S )Nc                     a d S N)r:   )moduleinputfqns     r8   forward_hookz'_get_logging_hook.<locals>.forward_hookK   s    r9    )r@   rA   s   ` r8   _get_logging_hookrC   J   s$         r9   c                 ~    |                                  D ]'\  }}|                    t          |                     (d S r=   )named_modulesregister_forward_pre_hookrC   )modelnamemods      r8   _apply_logging_hookrJ   R   sK    ((** ? ?	c%%&7&=&=>>>>? ?r9   _fqn_to_op_to_shape_to_countc                       e Zd ZddZdS )LoggingTensorModerB   Nc                 2   |i } ||i |}|j          d|j         }d}|D ]C}t          |t          j                  r'|t          t          |j                            dz   z  }D|dk    r
|d d         }t          t          vri t          t          <   |t          t                   vri t          t                   |<   |t          t                   |         vrdt          t                   |         |<   t          t                   |         |xx         dz  cc<   |S )N. z, r   r   )

__module____name__
isinstancer0   Tensorstrlistshaper:   rK   )	selffunctypesargskwargsrsop_name	shape_strargs	            r8   __torch_dispatch__z$LoggingTensorMode.__torch_dispatch__^   s#   >FT4"6""/;;DM;;	 	9 	9C#u|,, 9Sci11D88	??!#2#I77757(26x@@@>@(27;8B7KKKIJ(27;IF$X.w7	BBBaGBBB	r9   )rB   N)rS   rR   __qualname__rb   rB   r9   r8   rM   rM   ]   s(             r9   rM   c                   ,    e Zd Zd Zd Zd Zd Zd ZdS )_MultiInputc                 .    t          |          | _        d S r=   )rW   values)rY   inputss     r8   __init__z_MultiInput.__init__w   s    6llr9   c                 :    | j                             |           | S r=   )rg   append)rY   r?   s     r8   	add_inputz_MultiInput.add_inputz   s    5!!!r9   c                 6    t          | j        |                   S r=   )re   rg   )rY   slices     r8   __getitem__z_MultiInput.__getitem__~   s    4;u-...r9   c                 2    d | j         D             | _         d S )Nc                 n    g | ]2}t          |t          j                  r|                                n|3S rB   )rT   r0   rU   cuda.0vals     r8   
<listcomp>z$_MultiInput.cuda.<locals>.<listcomp>   sB     
 
 
EH*S%,77@CHHJJJS
 
 
r9   rg   rY   s    r8   rr   z_MultiInput.cuda   s(    
 
LPK
 
 
r9   c                 2    d | j         D             | _         d S )Nc                 n    g | ]2}t          |t          j                  r|                                n|3S rB   )rT   r0   rU   xpurs   s     r8   rv   z#_MultiInput.xpu.<locals>.<listcomp>   sB     
 
 
DGC66?CGGIIIC
 
 
r9   rw   rx   s    r8   r{   z_MultiInput.xpu   s(    
 
KO;
 
 
r9   N)rS   rR   rc   ri   rl   ro   rr   r{   rB   r9   r8   re   re   v   s_        # # #  / / /
 
 


 
 
 
 
r9   re   c           	          |)| j         |k    rt          d| d| d| j          d          |C|                                 |k    r-t          d| d| d|                                  d          d S d S )NzExpected Tensor argument z to have dtype z
, but got z	 instead.z to have size )dtype
ValueErrorsize)
tensor_argarg_namer}   r   s       r8   _guard_dtype_sizer      s    Z-66mmmmmR\Rbmmm
 
 	
 JOO--55lllllPZP_P_PaPalll
 
 	
 55r9   r4   returnc                     g }t          t          | j                  dz
            D ]}|                    d           |                    | j        d                    |S )Nr   )rangelenrX   rk   )r4   
block_size_s      r8   _get_per_token_block_sizer      sb    J3qw<<!#$$  !agbk"""r9   c           
      `   t           j        }t          | j                  }t	          t          |          dz
            D ]}d||<   t          j        }d}d}d}| j        t          j	        k    rt          j
        nd }t          | |||||||          \  }	}
t          | ||	|
|||          }||	fS )Nr   gh㈵>i   )scale_dtype)r	   	SYMMETRICrW   rX   r   r   r0   int8r}   float16float32r   r   )tmapping_typer   ir}   eps	quant_min	quant_maxr   scale
zero_point	quantizeds               r8   r   r      s    (LagJ3z??Q&''  
1JE
C II#$7em#;#;%--K-		 	 	E:  	:uj%I I er9   c                 ^    t          |           \  }}t          |||||          }|||z   }|S )ze
    like F.linear, but with int8 dynamic quantization of activation,
    and a quantized weight
    )r   _quant_int8_per_token_matmul)r4   w_vals_int8_tw_scalesbias	out_dtypex_vals_int8x_scalesmm_outs           r8   r    r       sG     B!DDK)X}h	 F $Mr9   c                    | j         t          j        k    sJ d| j          d            |j         t          j        k    sJ d|j          d            |j         t          j        t          j        fv sJ d|j                      |                     d| j        d                   }t          |||                    dd                    } ||z  j        g | j        dd         |j        d         R  }|                    |          }|S )a  
    Quantized matmul of int8 operands that accumulates to int32 and returns
    output_dtype. For now, this is written for approximate numerical
    Assumes that activation and weight quantization are symmetric,
    i.e. act_zp and w_zp is 0.
    Assumes that weight quantization is per-channel.

    see
    https://github.com/google/gemmlowp/blob/master/doc/quantization.md
    for an overview of quantized matmul compute

    in scalar form, assuming output_dtype is fp32 and zw == 0:

      Y_i_j_fp32 = sx * sw dot(X_i, W_j)
    zx dtype z not yet supportedzw dtype z?x_scales needs to be a torch.float32 or torch.bfloat16 but got r   r   N)	r}   r0   r   floatbfloat16reshaperX   r   to)r   r   r   r   output_dtypetmpy_dot_scaledr5   s           r8   r   r      sB   . 
***8;$888 +** %*,,,:=&::: -,, >    	[(.ZZ	  $ 

b+"3B"7
8
8C$S-9I9I"a9P9PQQL)	 ) 			3B3		!-!3B!7	 	 	A
 	
\AHr9   c           
      B   |                                  dk    s
J d            t          j        t          j                  j        }d| j        d         f}t          j        }t          j        }t          | |||||||          \  }}	t          | |||	|||          }
|
||	fS )z
    assumes symmetric quantization
    assumes axis == 0
    assumes dense memory format
    TODO(future): relax ^ as needed
       only support 2d Tensorsr   )target_dtyper   r   r   zero_point_dtype)dimr0   finfor   r   rX   int64r	   r   r   r   )r4   r   r   r   r   r   r   r   r   r   quants              r8   r!   r!     s     5577a<<<2<<<
+em
$
$
(CQWQZJ{(L-	!)	 	 	E: 	:uj,	9 E %##r9   c                     | j         }| j        }|                                dk    sJ d|                                             t          | |||||          }|S )Nr   zscale size: r   )rX   r}   numelr   )int_reprr   r   r   r   input_dtypedequantizeds          r8   r"   r"   3  sk    J.K;;==A=ekkmm==#*eZ9  K r9   c                     |                                  dk    s
J d            |                                 } | j        d         df}| j        }t	          | |||||          }|                                }|S )Nr   r   r   r   r   )r   r   rX   r}   r   )r   scaleszero_pointsr   r   r   r   s          r8   r#   r#   >  s    <<>>Q 9
 zz||H.#Q'J.K#*fk;Y  K --//Kr9         Fc                 *   || j         d         k    r| j         d         }|dk    sJ | j         d         |z  dk    sJ |                                 dk    sJ |dk    sJ d|             t          j        }t          j        }d|f}	d}
d|z  dz
  }|d}|}|t          j        k    r|nt          j        }|t          j        k    r|st          | ||	||
||||	  	        \  }}nI|t          j        k    r|st          | ||	||
||||	  	        \  }}nt          | ||	||
||||	  	        \  }}|                    |	                              | j         d         d          |                    |	                              | j         d         d          fS )
Nr   r   r   r      z-only n_bit smaller than 8 is supported, got: gư>)r   r   r}   )rX   r   r	   
ASYMMETRICr0   int32r
   INTFLOATr   r   r   r   r   )wn_bit	groupsizer}   zero_point_domainpreserve_zeror   r   r   r   r   r   r   r   r   r   s                   r8   r$   r$   O  s    172;GBK	q====72;"a''''5577a<<<<A:::NuNN:::)L;LYJI51I
{K"o&999u{  O111-1;#-

 

 

zz 
o1	1	1-	1E#-

 

 

zz 2#-

 

 

z 88%8  ((R88*-- ;H ; ;gagaj" r9   c                 ^   t          | d||                                           t          |d|           |                                 }t          j        |                     d          |                    d          g|                              dd                                          S )Nr   )r}   r   zerosr   r   rQ   )r   r   r   r0   cat	unsqueeze	transpose
contiguous)r   r   r}   r   s       r8   r%   r%     s    fhe%**,,GGGGeWE2222
**,,C	  $$## 	
 	
 
2r			
r9   c                 |    | j         d         dk    sJ t          j        |                     dd          dd          S )Nr   r   r   rQ   r   )rX   r0   splitr   )scales_and_zeross    r8   r&   r&     s@    !"%****;'11"b991bAAAr9   c           	      ~   |dk    sJ || j         d         k    r|j         d         dk    r| j         d         }| j         d         |z  dk    sJ |                                 dk    sJ d|f}t          j        }d}d|z  dz
  }	|t          j        k    rt          }
nG|t          j        k    rt          }
n/t          t          j	        k    rt          }
nt          d|            |
| ||||||	          }| j         d         dk    rt          |j                  sSt          |j                  s?|d d d d df         dz  |d d dd df         z                      t          j                  }t          |j                  r?|d d dd df         dz  |d d d d df         z                      t          j                  }|S )Nr   r   r   r   z Unrecognized zero point domain: r   )rX   r   r0   r   r
   r   r   r   r   NONEr   r~   r   devicer   r   uint8)r   r   r   r   r   r   r   r   r   r   _quantize_affineint_datas               r8   r'   r'     s    q====172;6<#3q#8#8GBK	72;"a''''5577a<<<<YJ;LI51IO///*	o3	3	34	O0	0	09O<MOOPPP	 H 	wr{Q"8?33 	U"8?33	U !SSqS)Q."""add(1CCGGTTHX_-- 	U QTT*a/(222sss72CCGGTTHOr9   c           
      z   |dk    sJ |                                  dk    sJ | j        t          j        k    s| j        d         dk    rt          | j                  s|                     t          j                  }|dz	  }|dz  }t          j	        | j        d         | j        d         dz  ft          j        | j                  }	t          | j                  s||	d d d d df<   ||	d d dd df<   n||	d d d d df<   ||	d d dd df<   n| }	||	j        d         k    r|j        d         dk    r|	j        d         }|	j        d         |z  dk    sJ d|f}
t          j        }d}d|z  dz
  }|t          j        k    rt          }n|t          j        k    rt          }nt           } ||	|
||||||j                  S )	Nr   r   r   r      r   )r}   r   r   )r   r}   r0   r   rX   r   r   r   r   r   r   r
   r   r   r   r   r   )w_int4x8r   r   r   r   r   data	high_bitslow_bitsw_int32r   r   r   r   _dequantize_affines                  r8   r(   r(     s    q====<<>>Q%+%%);a)?)?(/** *@ {{5;''AI	$;+^Aq 1A 56+?
 
 

 "(/22 	*(GBBB!G (GBBB1H'GBBB!G )GBBB1H 7=$$$b)9Q)>)>M"%	=y(A----YJ+KI51IO///.	o3	3	38=\	 	 	 	r9   c                     t          | |||||          \  }}t          | |||||          }t          |||          }	||	fS )N)r   r   )r   )r$   r'   r%   )
r   r   r   r}   r   r   r   r   r   r   s
             r8   r)   r)     sr     1	+#  MFE =	65%>O  H 6feUKK%%%r9   c                 L    t          |          \  }}t          | ||||          S r=   )r&   r(   )r   r   r   r   r   r   s         r8   r*   r*   '  s3     55EFFMFE:&%	  r9   c                 b   || j         d         k    r| j         d         }|dk    sJ | j         d         |z  dk    sJ |                                 dk    sJ |dk    sJ d|             d|f}|t          j        | j                  j        }i }d|d<   t          dd          D ]}d|dz
  z   d|dz
  z  dz
  f||<   ||         \  }	}
t          | ||t          j        |	|
|||		  	        \  }}|	                    | j         d         d          |	                    | j         d         d          fS )
Nr   r   r   r   r   zunsupported n_bit: )r   r   	   )r   r   r   r   r   r   )
rX   r   r0   r   r}   r   r   r   r   r   )r   r   r   	precisionr   r   r   rangesr   r   r   r   r   s                r8   r,   r,   4  sc    172;GBK	q====72;"a''''5577a<<<<A:::4U44:::YJ
{k!'""&FF1I1a[[ 8 8QU|_aAElQ&67q		!%=Iy-	Z"
 
 
E: ==R((**<*<QWQZ*L*LLLr9   c           	          t          | ||||          \  }}d}d|dz
  z  dz
  }d|dz
  z   }ddlm}	  |	| ||||t          j        |          }
|
||fS )Nr   r   r   r   )8_quantized_decomposed_quantize_per_channel_group_wrapper)r,   torchao._executorch_opsr   r0   r   )r   r   
group_sizer   r   r   r   max_intmin_intr   w_int8s              r8   group_quantize_tensor_symmetricr   [  s     0	5*i MFE EEAI"Geai !G      FE	65'7EJ
 F 65  r9   r?   r   r   r   c                     t           j        }t          |           }d}d}t          j        }| j        }	t          | ||||||||	  	        \  }
}t          | ||
||||          }t          |||
|||||	          }|S )Nir   )r   r   r   r   )	r	   r   r   r0   r   r}   r   r   r   )r?   r   r   r   r   r   r   r   quant_dtyper   r   r   qdqs                 r8   r+   r+   u  s     )L*511JII*K;L/)
 
 
FK 		 	A 
	!	
 	
 	
B Ir9   c                     dt           j        j        _        dt           j        j        _        dt           j        j        _        dt           j        j        _        dt           j        j        j        _        t          j	        d           dS )a  
    Set inductor config to use the following optimizations which have been showed to improve performance for quantized models:
        coordinate_descent_tuning = True
        coordinate_descent_check_all_directions = True
        force_fuse_int_mm_with_mul = True
        fx_graph_cache = True
        triton.unique_kernel_names = True
        torch.set_float32_matmul_precision("high")
    ThighN)
r0   	_inductorconfigcoordinate_descent_tuning'coordinate_descent_check_all_directionsforce_fuse_int_mm_with_mulfx_graph_cachetritonunique_kernel_namesset_float32_matmul_precisionrB   r9   r8   r-   r-     s_     8<EO4EIEOB8<EO5,0EO)8<EO!5	&v.....r9   input_shape.granularityc                    t          |t                    r| S t          |t                    r(t          |           }d||j        <   t          |          S t          |t                    r|j        }t          |          t          |           k     rtt          |          }t          |          t          |           k     r6|	                    dd           t          |          t          |           k     6t          |          }t          |          t          |           k    sJ d| d|              t          t          |                    D ]'}| |         ||         z  dk    sJ d|  d|             (|S t          |t                    rdt          |           dz
  z  | d         fz   S t          |t                    r7dgt          |           z  }| |j                 ||j        <   t          |          S t          |t                    rM| d         |j        z  dk    sJ d	| d          d
|j                     dt          |           dz
  z  |j        fz   S t!          d|           )zGet the block size based on the input shape and granularity type.
    Args:
        input_shape: The input tensor shape possibly more than 2 dimensions
        granularity: The granularity type of the quantization
    r   r   zBlock size z8 must have the same number of dimensions as input shape zNot all shapes in input shape z are divisible by block size )r   r   zLast dimension of input z  is not divisible by group size zUnsupported Granularity: )rT   r   r   rW   axistupler   r   r   insertr   r   r   r   r   r   r~   )r  r  r   block_size_listr   s        r8   get_block_sizer    s    +y)) #I	K	)	) !I+&&
'(
;#$Z   	K	*	* I +
 z??S----":..Oo&&[)9)999&&q!,,, o&&[)9)999//J:#k"2"2222k*kk^ikk 322 s:'' 	 	Aq>JqM1Q666ggg[egg 7666 	K	*	* 
Is;''!+,B/AAA	K	(	( IS3{+++
&1+/&B
;?#Z   	K	*	* I2!771<<<p{2ppXcXnpp =<< s;''!+,0F/HHH
>>>
?
??r9   )NN)r   r   )Gtypingr   r   r   r   r0   torch.utils._python_dispatchr   torchao.kernelr   %torchao.quantization.quant_primitivesr	   r
   r   r   r   r   r   r   r   r   r   torchao.utilsr   r   r  r   r   r   r   r   r   r   __all__r   r:   rV   __annotations__rC   rJ   rK   intrM   re   r   rU   r   r   r    r   r   r!   r"   r#   r   r   r$   r%   r&   r'   r(   r)   r*   r   r,   r   r}   r   r+   r-   r  rB   r9   r8   <module>r     s   / . . . . . . . . . . . .  : : : : : :                                     
                   *% % % (3-     ? ? ?  dSM4tHSM3,>'??@@   
    )   2
 
 
 
 
 
 
 
,
 
 
 
 $s)      B  2 9 9 9 9x$ $ $@ BG     EJM    & 
.%+C C C CL 9>    "B B B @U) ) ) )` %+6 6 6 6v 
.%+& & & &4 		 	 	 	 m&$M $M $M $MR m&! ! ! !8  %}$)M	+ +<++ k+ 
%	+
 \+ + + +\/ / /$,@sCx,@/:,@
38_,@ ,@ ,@ ,@ ,@ ,@r9   