
    PiRS                     8   d dl Z d dlmZmZmZmZmZ d dlZd dlm	Z	 d dl
m	c mZ d dlmZ d dlmZ ddlmZmZ ddlmZ ddlmZmZmZ ej        j        Zg d	Zd'd
Zej        ej        fdej        dej        dej        de de dej!        dej!        fdZ" G d dej	        j#                  Z$dej        ej        e$dfdej	        j#        de dee          de%dee         dej!        dej!        deej	        j#                 de%fdZ&	 d(dZ' G d de          Z(d  Z) G d! d"ej	        j#                  Z*	 d)dej	        j#        de de%dej!        dej!        deej	        j#                 de%fd#Z+dej	        j#        de de%dej!        dej!        f
d$Z, G d% d&e          Z-dS )*    N)AnyCallableDictOptionalType)	is_device)find_multiple   )MappingTypedequantize_affine)	Quantizer)group_quantize_tensor_symmetric groupwise_affine_quantize_tensorper_token_dynamic_quant)WeightOnlyInt4LinearInt4WeightOnlyQuantizerInt8DynActInt4WeightQuantizerc                 <    | |z  dk    }|| |dz  z  dk    }|o|S |S )Nr       )k	groupsizeinner_k_tilesk_divisible_by_groupsize%k_divisible_by_16_times_inner_k_tiless        }/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/torchao/quantization/linear_quant_modules.py_check_linear_int4_kr   )   s>     9}1 01]R5G0HA0M-'Q,QQ##    xweight_int4packscales_and_zerosout_featuresr   	precisionscales_precisionc                 z   |                                  }|                     d|d                   } t          | j        j        d          rgt
          j        j                            | 	                    |          |||	                    |                    	                    | j
                  }nft
          j        j                            | 	                    |          |||	                    |                    	                    | j
                  }|d d         |fz   }	|                    |	          }|S )Ncpudtype)sizereshaper   devicetypetorchopsaten_weight_int4pack_mm_for_cputor)   _weight_int4pack_mm)
r   r    r!   r"   r   r#   r$   origin_x_sizec	new_shapes
             r   linear_forward_int4r7   1   s    FFHHM			"mB'((A&& IN66DDOO 011	
 

 "17"

 	
 IN..DDOO 011	
 

 "17"

 	
 crc"l_4I			)AHr   c                        e Zd ZU ddgZeed<   eed<   ej        ed<   dddddej        ej        fdededed	ed
ej	        dej	        ddf fdZ
dej        dej        fdZ xZS )r   in_featuresr"   weightFN      r   r   r#   r$   returnc
           
      D   t                                                       t          |||           | _        | j        r|| _        t          |d          }|| _        || _        |r
J d            || _        || _	        || _
        || _        |	| _        |t          d          |dz  dk    s
J d            ||dz  z  dk    s
J d            t          |j        d	          r;|                     d
t#          j        ||dz  ft"          j        |                     nE|                     d
t#          j        |dz  ||dz  z  d|dz  ft"          j        |                     || _        |                     dt#          j        ||z  |df| j        |                     d S )N   zrequire bias=False-Please specify 'precision' instead of 'dtype'r<   r   zrequire out_features % 8 == 0r   z-require in_features % (innerKTiles * 16) == 0r'   r:      )r)   r,       r!   )super__init__r   paddingorigin_in_featuresr	   r9   r"   r,   r   r   r#   r$   
ValueErrorr   r-   register_bufferr.   zerosuint8int32r)   )selfr9   r"   biasr,   r)   r   r   r#   r$   	__class__s             r   rD   zWeightOnlyInt4Linear.__init__U   s    	/YVVV< 	;&1D#'T::K&(-----x"*" 0LMMMa1$$$&E$$$mb01Q666; 766 V[%(( 	  $#q(  +!  
 
 
 
   $)#(:;%*	  +!	 	 	   
K	)<;+  	
 	
 	
 	
 	
r   inputc           	          | j         r%t          j        |d| j        | j        z
  f          }t          || j        | j        | j        | j	        | j
        | j                  S )Nr   pad)rE   FrR   r9   rF   r7   r:   r!   r"   r   r#   r$   rL   rO   s     r   forwardzWeightOnlyInt4Linear.forward   sg    < 	VE%a)9D<S)S%TUUUE"K!NN!
 
 	
r   )__name__
__module____qualname____constants__int__annotations__r.   Tensorbfloat16r)   rD   rU   __classcell__rN   s   @r   r   r   O   s        "N3ML !&(-E
 E
E
 E
 E
 E
 ;E
  +E
 
E
 E
 E
 E
 E
 E
N
U\ 
el 
 
 
 
 
 
 
 
r   r   Fmoduler   padding_allowedskip_layer_funclinear_classcopy_weightsc	                    |                                  D ]\  }	}
t          |
t          j                  r|
j        | ||
j                  st          |
j        ||          s|rh ||
j        |
j        d|
j        j	        ||||          }|r.|
j        j	        t          j	        d          k    r|
j        |_        t          | |	|           t          |
||||||||	  	         d S )NF)rM   r,   r   r   r#   r$   meta)named_children
isinstancennLinearrM   r:   r   r9   r"   r,   r.   setattr_replace_linear_int4)r`   r   r   ra   rb   r#   r$   rc   rd   namechild
new_linears               r   rl   rl      s0    ,,.. ' 'e ubi((%	
" (0M0M( %U%6	=QQ2"2 *\%& <.'"/'%5	 	 	
   5EL$75<;O;O$O$O(-J%j111  
 
 
 
;' 'r   c                 :    t          | ||||t                     d S )N)rc   )rl   r   )r`   r   r   ra   rb   s        r   replace_linear_int4rq      s7     )     r   c                   j    e Zd Zddd ej        d          ej        fdededee         dej        d	ej	        d
df fdZ
 ej                    dej        j        d
eeej        f         fd            Zdej        j        d
ej        j        fdZdej        j        deded
ej        j        fdZ xZS )r      Tr<   cudar   ra   r   r,   r#   r=   Nc                     t                                                       |dv sJ |dv sJ || _        || _        || _        || _        || _        d S )N)rA      r<   )rB   @   r;   rs   )rC   rD   r   r   ra   r,   r#   )rL   r   ra   r   r,   r#   rN   s         r   rD   z Int4WeightOnlyQuantizer.__init__   sg     		)))).....*'%4$*&/r   modelc           	      x   |                                 }|                                D ]\  }}t          |t          j        j                  r|j        |j        }|j        }t          j
        d| d| d|            || j        z  dk    sJ d| d| j         d            |j        j        }t          || j        | j                  sm| j        rIdd lmc m} t          j        d| d	           t)          |d
          }	 |j        |d|	|z
  f          }nt          j        d| ddz              t-          |d| j        | j                  \  }
}t1          |
j        j        d          rCt          j        j                            |
                    | j                  | j                  }nBt          j        j                            |
                    | j                  | j                  }|                    | j                  || d<   |                    | j                  || d<   |S )Nlinear: , in=, out=r   require in_features: % self.groupsize: == 0	warning: - is padded to satisfy in_features % 1024 == 0r?   rQ   P is skipped, int4 requires that in_features is 32, 64, or is divisible by 1024, =and that groupsize and inner_k_tiles*16 evenly divide into itrv   r'   .weightz.scales_and_zeros) 
state_dictnamed_modulesrh   r.   ri   rj   rM   r"   r9   logginginfor   r:   datar   r   ra   torch.nn.functional
functionalwarningr	   rR   r   r#   r   r,   r-   r/   r0   #_convert_weight_to_int4pack_for_cpur2   _convert_weight_to_int4pack)rL   rx   cur_state_dictfqnmodr"   r9   r:   rS   padded_in_featuresw_int4x8r!   r    s                r   _create_quantized_state_dictz4Int4WeightOnlyQuantizer._create_quantized_state_dict   s    ))++++-- 3	 3	HC#ux// 2CH4D"/!oSSS+SS\SSTTT"T^3q888_;__$.___ 988 +1C  ! + !777777777ZZZZ   .;;-M-M*!&",>,L(M" " "  }}}}]^   !/ONN	0 0,+ X_1599 		JJ$KK44d6H  $O ',in&P&P DK00$2D' 'O 3B2D2DT[2Q2Q#/<L<O<OK= =#8889 r   c           	      d    t          || j        | j        | j        d | j        | j                   |S )N)rb   r#   r$   )rl   r   r   ra   r#   rL   rx   s     r   _convert_for_runtimez,Int4WeightOnlyQuantizer._convert_for_runtime;  sA    N  n!^	
 	
 	
 	
 r   argskwargsc                     |                      |          }|                     |          }|                    |d           |S NF)strictr   r   load_state_dictrL   rx   r   r   r   s        r   quantizez Int4WeightOnlyQuantizer.quantizeG  H     66u==
))%00j777r   )rV   rW   rX   r.   r,   r]   rZ   boolr   r)   rD   no_gradri   Moduler   strr\   r   r   r   r   r^   r_   s   @r   r   r      sR         $'(+u|F33!&0 00 0  }	0
 0 ;0 
0 0 0 0 0 0& U]__8X_8	c5<	 8 8 8 _8t
%(/ 
eho 
 
 
 
X_-0<?	       r   r   c           
      X   t          | t          j        t          j        t          j        t          j                  j                  } d}d|dz
  z   }	d|dz
  z  dz
  }
d|f}t          ||||t          j        |	|
|          }t          j        j        	                    | ||          }|S )N)scale_dtypezero_point_dtypeepsrv   rA   r
   )output_dtype)
r   r.   float32finfor   r   int8ri   r   linear)r   weight_int8rM   scalesrI   r"   r   output_precisionn_bit	quant_min	quant_max
block_sizew_dqr5   s                 r   linear_forward_8da4wr   Q  s     	 	MK&&*		 	 	A E	"#Ieai 1$IYJ
%	 	 	D 	""1dD11A
 Hr   c                        e Zd ZU ddgZeed<   eed<   ej        ed<   ej        ed<   	 ddddej        ej        fdededed	ej	        d
ej	        ddf fdZ
dej        dej        fdZ xZS )Int8DynActInt4WeightLinearr9   r"   r:   rM   TNrs   r   r#   r$   r=   c	                 h   t                                                       ||z  dk    sJ d| d| d            || _        || _        || _        || _        |t          d          |                     dt          j	        ||ft          j
                             |                     dt          j	        |||z  f|                     |                     d	t          j	        |||z  f|                     |r,|                     d
t          j	        ||                     d S d | _        d S )Nr   r}   z % groupsize:r   r@   r:   r(   r   rI   rM   )rC   rD   r9   r"   r   r#   rG   rH   r.   rI   r   rM   )
rL   r9   r"   rM   r,   r)   r   r#   r$   rN   s
            r   rD   z#Int8DynActInt4WeightLinear.__init__  s    	 Y&!+++M;MMYMMM ,++ '(" #LMMM 	K{35:FFF	
 	
 	
 	K{i78&  	
 	
 	
 	K{i78&  	
 	
 	
  	  \)S)S)STTTTTDIIIr   rO   c           
          |                     | j                  }t          || j        | j        | j        | j        | j        | j        | j                  S N)	r2   r#   r   r:   rM   r   rI   r"   r   rT   s     r   rU   z"Int8DynActInt4WeightLinear.forward  sP    (( $KIKJNN	
 	
 		
r   )rV   rW   rX   rY   rZ   r[   r.   r\   r   r)   rD   rU   r^   r_   s   @r   r   r     s        "N3ML
, !&(-8 88 8 8 ;8  +8 
8 8 8 8 8 8t
U\ 
el 
 
 
 
 
 
 
 
r   r   c                     ddl m} dt          j        j        dt
          dt          ffd}dt          j        j        dt          j        j        ffd}	 || |	|           d S )Nr   ))_replace_with_custom_fn_if_matches_filterrn   cur_fqnr=   c                 f    t          | t          j                  ot          | j                  pS r   )rh   ri   rj   r   r9   )rn   r   r   ra   s     r   	filter_fnz(_replace_linear_8da4w.<locals>.filter_fn  s1    %++ 
 !2I>>Q/	
r   c           	           | j         | j        | j        d u| j        j                  }r:| j        j        t          j        d          k    r| j        |_        | j        |_        |S )N)rM   r,   r   r#   r$   rf   )r9   r"   rM   r:   r,   r.   )rn   ro   rd   r   rc   r#   r$   s     r   replacement_fnz-_replace_linear_8da4w.<locals>.replacement_fn  s|    !\4'<&-
 
 

  	)EL/5<3G3GGG %J#jJOr   )torchao.quantization.quant_apir   r.   ri   r   r   r   )
r`   r   ra   r#   r$   rc   rd   r   r   r   s
    ``````   r   _replace_linear_8da4wr     s     YXXXXX
 
3 
4 
 
 
 
 
 
 

eho %(/          $ .-fniPPPPPr   c                 8    t          | ||||t                     d S r   )r   r   )r`   r   ra   r#   r$   s        r   replace_linear_8da4wr     s2     "    r   c                       e Zd Zddej        ej         ej        d          ej        fdede	dej
        dej
        dej        d	ed
df fdZ ej                    dej        j        d
eeej        f         fd            Zdej        j        d
ej        j        fdZdej        j        deded
ej        j        fdZ xZS )r   rs   Fr'   r   ra   r#   r$   r,   mapping_typer=   Nc                     t                                                       || _        || _        || _        || _        || _        || _        d S r   )rC   rD   r   ra   r#   r$   r,   r   )rL   r   ra   r#   r$   r,   r   rN   s          r   rD   z&Int8DynActInt4WeightQuantizer.__init__  sN     	'%4&/-=$*)5r   rx   c           	         |                                 }|                                D ]\  }}t          |t          j        j                  r||j        }|j        }t          j	        d| d| d|            || j
        z  dk    sJ d| d| j
         d            |j        j        }t          || j
                  sm| j        rIdd lmc m} t          j        d| d	           t%          |d
          }	 |j        |d|	|z
  f          }nt          j        d| ddz              t)          |                    | j                  d| j
        | j        | j                  \  }
}}|
                    | j                  || d<   |                    | j                  || d<   |                    | j                  || d<   |S )Nrz   r{   r|   r   r}   r~   r   r   r   r?   rQ   r   r   rv   )r   r   z.scalesz.zeros)r   r   rh   r.   ri   rj   r"   r9   r   r   r   r:   r   r   ra   r   r   r   r	   rR   r   r2   r#   r$   r   r,   )rL   rx   r   r   r   r"   r9   r:   rS   r   r   r   rI   s                r   r   z:Int8DynActInt4WeightQuantizer._create_quantized_state_dict*  s<    ))++++-- *	G *	GHC#ux// )G"/!oSSS+SS\SSTTT"T^3q888_;__$.___ 988 +KHH !+ !777777777ZZZZ   .;;-M-M*!&",>,L(M" " "  }}}}]^   !
 4IIdn--N)!%!2  	 3>..2M2M#/28))DK2H2H#/16$+1F1F#~~~.r   c                 T    t          || j        | j        | j        | j                   |S r   )r   r   ra   r#   r   s     r   r   z2Int8DynActInt4WeightQuantizer._convert_for_runtime]  s3    N NN	
 	
 	
 r   r   r   c                     |                      |          }|                     |          }|                    |d           |S r   r   r   s        r   r   z&Int8DynActInt4WeightQuantizer.quantizeh  r   r   )rV   rW   rX   r.   r   r,   r   	SYMMETRICrZ   r   r)   rD   r   ri   r   r   r   r\   r   r   r   r   r^   r_   s   @r   r   r     sb         %!&(-+u|E22$/$96 66 6 ;	6
  +6 6 "6 
6 6 6 6 6 6" U]__0X_0	c5<	 0 0 0 _0d	%(/ 	eho 	 	 	 	X_-0<?	       r   r   )r
   Nr   )F).r   typingr   r   r   r   r   r.   torch.nnri   r   r   rS   torchao.dtypes.utilsr   torchao.utilsr	   quant_primitivesr   r   unifiedr   utilsr   r   r   r/   r0   __all__r   r]   r\   rZ   r)   r7   r   r   r   rl   rq   r   r   r   r   r   r   r   r   r   <module>r      s    6 6 6 6 6 6 6 6 6 6 6 6 6 6                 * * * * * * ' ' ' ' ' '                       y~  $ $ $ $ #^$)N |\ l 	
  { k   <X
 X
 X
 X
 X
58? X
 X
 X
@ +/"^$)N*>2 2HO22 C=2 	2
 h'2 {2 k2 ux'2 2 2 2 2l HL
 
 
 
b b b b bi b b bJ2 2 2jX
 X
 X
 X
 X
 X
 X
 X
D #Q #QHO#Q#Q #Q {	#Q
 k#Q ux'#Q #Q #Q #Q #QLHO  {	
 k   "W W W W WI W W W W Wr   