
    Pi                        d dl Z d dlmZmZmZmZmZmZmZm	Z	 d dl
Z
d dlmZ d dlmZmZ d dlmZmZmZ d dlmZ d dlmZ d dlmZ d dlmZmZmZ i Zg d	Z G d
 de
j                   Z! G d de
j        j"                  Z# G d de          Z$ G d de$          Z% G d d          Z&d)dZ'	 d*dee         deeeedf                  ee(         f         fdZ) e
j*        d          dfdZ+d+dZ,d Z- e
j*        d          fd Z.	 d,d"ej"        d#eej"        gej"        f         d$eej"        e/ge0f         d%e/dej"        f
d&Z1d"ej"        dej"        fd'Z2d"ej"        dej"        fd(Z3dS )-    N)AnyCallableDictListOptionalSequenceTupleUnion)tree_flattentree_unflatten)LayoutTensorCoreTiledLayoutto_affine_quantized_intx_static)ZeroPointDomain)	Quantizer)compute_error)get_groupwise_affine_qparams/groupwise_affine_dequantize_tensor_from_qparams-groupwise_affine_quantize_tensor_from_qparams)Int4WeightOnlyGPTQQuantizerMultiTensorInputRecorderMultiTensorGPTQQuantizerStateDictManagerc                      e Zd ZU dZdZdZdZdZdZdZ	dZ
eed<   dZeed<   dZeed<   dZeed	<   ed
eej        eej                 f         dedd fd            Zd
eej        eej                 f         deddfdZdefdZd
ej        fdZd
eej        eej                 f         dd fdZd-dZd.dZedddd ej         d          fdej         fd            Z!e	 	 	 d/de"de#e$df         de#edf         de%e&eef                  de'defd             Z(ed!e)e#edf                  de#e)e         e'f         fd"            Z*ed#             Z+ed$             Z,edi dfde"de#e$df         de#edf         de&eef         de'defd%            Z-de#e)e         e%e         f         fd&Z.ed'e&eef         d(e%e         d)ej/        d*e#edf         dd f
d+            Z0ede"de'fd,            Z1dS )0r   N
group_size{Gz?percdamp   	blocksize   in_place_thresholdinputkwargsreturnc                     t          |t          t          f          r|d         }|                    d|j                  |d<   |                    d|j                  }t          j        j	        | |fi |S )Nr   dtypeshape)

isinstancelisttuplegetr(   popr)   torchTensor_make_wrapper_subclass)clsr$   r%   r)   s       r/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/torchao/quantization/GPTQ/GPTQ.py__new__zMultiTensor.__new__=   sn     edE]++ 	!HE **Wek::w

7EK00|23HHHHH    c                     g | _         t                                          | _        d| _        |                     |           d| _        d| _        d S )Nr   F)valuesr   get_instancestate_dict_managercountadd_tensorsdebug	gptq_done)selfr$   r%   s      r3   __init__zMultiTensor.__init__G   sL     +-"2"?"?"A"A
 
r5   c                 L    | j         j         d| j         d| j        d          dS )Nz(shape=z
, example=r   ))	__class____name__r)   r7   r>   s    r3   __repr__zMultiTensor.__repr__Q   s1    ~&VVtzVVT[QR^VVV	
r5   c                 ,    |                      |          S N)r;   )r>   r$   s     r3   appendzMultiTensor.appendV   s    &&&r5   c                 0   t          |t          t          f          r|D ]}|                     |           n^t          |t          j                  sJ dt          |                       | xj        dz  c_        | j        	                    |           | S )NzMMultiTensor can only use add_tensors for Tensors or lists of tensors but got    )
r*   r,   r+   r;   r/   r0   typer:   r7   rH   )r>   r$   inps      r3   r;   zMultiTensor.add_tensorsY   s     eeT]++ 	& & &  %%%%& eU\22  m`dej`k`kmm 2 JJ!OJJKu%%%r5   Tc                 B   | j         |k     r|rMt          || j         z
            D ]4}|                     | j        d                                                    5nD|                     | j                                      | j        d         g|| j         z
  z            S | S Nr   )r:   ranger;   r7   clonerB   )r>   lengthpad_in_place_s       r3   pad_to_lengthzMultiTensor.pad_to_lengthg   s    : v
233 > >A $$T[_%:%:%<%<====> ~~dk22>>[_%$*)<=   r5   rJ   c                 f    t          || j                  }| j        d |         | _        || _        d S rG   )minr:   r7   )r>   r:   s     r3   unpadzMultiTensor.unpadx   s0    E4:&&k&5&)


r5   cudadevicec                     || _         || _        || _        || _        || _        || _        ||nd | _        || _        |	| _        |
| _	        || _
        d S )Nc                     | S rG    )xs    r3   <lambda>z9MultiTensor.configure_quantization_mode.<locals>.<lambda>   s    RS r5   )get_qparams_funcquantize_funcdequantize_funccombine_qparams_list_funcmake_qtensorskip_layer_funcact_fake_quant_funcr   r   r!   rY   )r2   r_   r`   ra   rb   rc   rd   re   r   r   r!   rY   s               r3   configure_quantization_modez'MultiTensor.configure_quantization_mode}   sn      0)-(A%'-#6#B 	 $!


r5   r\   Ffunctypes.args	skip_gptqc           
      x   | o|                      |          }t          | d          r't          | j        t          j                  r| j        }nd}|t
          vrdd dt
          |<   t
          |         dxx         dz  cc<   t
          |         d         t
          |         d         }n#t
          |         d         | j        k    s|rd}nd	}|i n|}t          ||f          \  }	}
|st          |	|
          }	t          |	|          \  }}t          j
                                        5  |sJ|                     |||
||          }t
          |         d         st          |	|           |cd d d            S t          ||
|          }|d         }t          |t                    r|j        d         }|                    |j                  }|                     ||                                |          \  }}}|                     ||          }t*                                          }|                    |d                   }|                    ||           t3          |           |                     |||d         |                                g|dd          R |d	          }|d         j        st          |	|           |cd d d            S |d         j        r|d         j        d                             |          }|d         &|d         j        d                             |          n|d         }|j        d                                         }|                     ||||d         j        d         |f|d	          j        d                                         }|                     ||                              |j                  }t3          dt?          ||                     t3          dt?          ||                    |                               t3          dt?          ||                     |                                 }t          j!        j"        #                    |||                                          }t3          dt?          ||                     t3          dt?          ||                     | $                    ||j                  }| %                    ||          }|                     ||                              |j                  }|                     |||||f|d	          j        d                                         } t3          dt?          ||                      t          |	|           |cd d d            S 	 d d d            d S # 1 swxY w Y   d S )NrY   cpur   )r:   is_in_placer:   rJ   rm   FTrY      )rj   )orig_countsz!SQNR for QDQ (this should be inf)zSQNR for weight (can be low)z)SQNR for output with GPTQ (hopefully 35+)z?SQNR for output from qtensor vs output from DQ (should be high)z.SQNR for DQ vs DQ from qtensor (should be inf)z8SQNR for output without GPTQ (should be less than above))&is_linear_layerhasattrr*   rY   r/   GPTQ_FUNC_LISTr#   r   _tensors_to_device_flat_to_grouped_and_pad_CDisableTorchFunctionSubclass_evaluate_function	_do_unpad_calculate_hessianr   r7   tofaster_quantdetachrc   r   r8   get_name_for_paramupdate_paramprint__torch_function__rl   r<   ra   r(   SQNR
dequantizenn
functionallinearr_   r`   )!r2   rg   rh   ri   r%   rj   quantize_linearrY   rm   	flat_argsspecgrouped_argsrp   outHWQDQall_qparamsqtensorr9   original_param_nameactbiasnew_outold_outDQ_afterDQ_from_qtensorqtensor_outqparams2Q2DQ2	old_q_outs!                                    r3   r   zMultiTensor.__torch_function__   s   @ (-EC,?,?,E,E3!! 	jU\&J&J 	ZFFF ~%%-.t#D#DN4 tW%%%*%%%$.:(.}=KKD!'*c.DDDDKKK~6 'f~66	4  	E*9VDDDI
 %=Y$T$T!kX2244 _	 _	" ,,,k6 
 &d+M: 6i555_	 _	 _	 _	 _	 _	 _	 _	 #<v>>A QA![))  HQKQXA!$!1!1!QXXZZ!H!HAr; &&q+66G "2!>!>!@!@"4"G"GQ"P"P++,?III%&&& ((ed1grvvxx;$qrr(;;Vt )  C 7= )====M_	 _	 _	 _	 _	 _	 _	 _	N Aw} 81gnQ'**6227;Aw7JtAw~a(++F333PTUVPW*Q-++--**d1gnQ/6"& +   A SUU  ..q+>>AA!'JJ7b(9K9K   2DBEE&MM4J4J   ?'**  
 #*"4"4"6"6#h188gtLLPPRRUg..   D_--  
 //17;;&&q(33))"h77::17CC**ec3%5v +   A SUU  N),,   )====_	 _	 _	 _	 _	 _	 _	 _	N8O_	 _	 _	 _	 _	 _	 _	 _	 _	 _	 _	 _	 _	 _	 _	 _	 _	 _	s!   +A V/8EV/K V//V36V3groupedc                      t          t          |           } fd|D             }t          d |D                       }||fS )Nc                     g | ]G}t          |d          t          j                  r |                                          n|d          HS r   )r*   r/   r0   rl   ).0tupr2   s     r3   
<listcomp>z/MultiTensor.grouped_to_flat.<locals>.<listcomp>A  sV     
 
 
 )Q>>JCCHHLLNNNCF
 
 
r5   c              3      K   | ]?t          d          t          j                  "t          fdD                       V  @dS )r   c              3   0   K   | ]}|d          k    V  dS )r   Nr\   )r   r]   r   s     r3   	<genexpr>z8MultiTensor.grouped_to_flat.<locals>.<genexpr>.<genexpr>F  s+      ))SV))))))r5   N)r*   r/   r0   all)r   r   s    @r3   r   z.MultiTensor.grouped_to_flat.<locals>.<genexpr>E  sj        
  
c!fel33 
))))S))))) 
  
  
  
  
  
r5   )r+   zipr   )r2   r   	flat_tups	flattenednon_tensors_equals   `    r3   grouped_to_flatzMultiTensor.grouped_to_flat<  sz     g''	
 
 
 
 
 
 
	    
  
  
  
  
 
 

 +++r5   c                    g }|D ]!}t          ||          }t          ||          \  }	}
 ||	i |
}|                    t          |t          j                  r|                                n|           |rt          ||t          |         d                   }|r7t          |         d         $dt          |         d<   t          d| d           t          |         d         | j
        k    r6t          t          |         d         t                    sdt          |         d<   #d |D             }t          |d	                   d
         }|                     |          \  }}|sJ d| ddz   dz               t          ||          }|S )Nrm   )forceTz#>>GPTQ process identified function z as in-place, continuing...<<r:   Fc                 8    g | ]}t          |          d          S r   )r   r   r]   s     r3   r   z2MultiTensor._evaluate_function.<locals>.<listcomp>o  s#    ???!<??1-???r5   r   rJ   z ERR: found a function in model: z which zScaused an error in GPTQ MultiTensor, the function dispatch only works for functionszSwith Tensor outputs or that have the same non-Tensor output value across all inputs)rt   r   rH   r*   r/   r0   rl   _maybe_copy_new_valuesrs   r   r#   boolr   r   )r2   rg   r   r   rm   rY   outputsrL   
device_inpcur_args
cur_kwargsr   detected_mutationgrouped_outputsout_specflat_outputsr   	final_outs                     r3   rx   zMultiTensor._evaluate_functionL  s    	@ 	@C+C88J $2*d#C#C Hj$/J//CNN
3(E(EN3779993OOO
  @$:>$+?+N% % %! % @)=m)L)T:>N4(7adaaa   
 $D)+, ,4>"4(75 5,
 ;@N4(7??w???
++A.*-*=*=o*N*N''  	
 	
<t<<<cdcd	
 	
 
 #<::	r5   c                 	   d}| j         
J |            | j        
J |            | j        
J |            | j        
J |            | j        }| j        }| j        }|j        }|                                	                                }|j
        d         |j
        d         }
}	|j        }|dk    r|
}nt          j        ||z            |z  }t          j        |          dk    }d|||f<   d|dd|f<   t          j        |          }|t          j        t          j        |                    z  }t          j        |
|          }|||fxx         |z  cc<   t          j                            |          }t          j        |          }t          j                            |d          }|}d}g }t-          d|
|          D ]^}t/          ||z   |
          }|dd||f                                         }t          j        |          }t          j        |          }|||||f         }t-          |||          D ]}t/          ||z   |
          }||z  dk    r7|                      |dd||f         |          }|                    |           t-          ||          D ]}||z
  }|dd|f         }|||f         }|                     |                    d          |                                          }|                     |                    d          |                                          }||dd|f<   ||z
  |z  } |dd|dfxx         |                     |j                                      d                              |||df                             d                    z  cc<   | |dd|f<   !||dd||f<   |dd|dfxx         |                    |j                                      ||||df                   z  cc<   `d	|j        v rt          j                                          n)d
|j        v rt          j!                                          n	 |g k    r|                    |           |                     |          }|                     ||          }!|!|                    |          |fS )aj  
        GPTQ quantization implementation.

        Args:
            H: Hessian matrix approximation
            W: Weight matrix to quantize
            device: accelerator device

        Returns:
            Tuple containing:
            - Q: Quantized weights
            - DQ: Dequantized weights
            - all_qparams: Quantization parameters
        zItried to do faster quant but configure quantization mode was never calledNr   rJ   r   rn   T)upperxpurX   )"r_   r`   ra   rb   r   r!   r   r(   r}   floatr)   rY   mathceilr/   diag
zeros_likemeanarangelinalgcholeskycholesky_inverserO   rV   rP   rH   	unsqueezeflattenr{   matmulrK   r   synchronizerX   )"r2   r   r   rY   msgr   r!   r   
orig_dtyperS   columnsdeadr   dampr   Hinvcur_qparamsr   block_start	block_endW1DQ1Err1Hinv1group_start	group_endindexiwdqdqerr1r   s"                                     r3   r|   zMultiTensor.faster_quant{  s7   " X 	 #///// ,,c,,,".....,88#888<M	^
W
HHJJWQZ7 JJ	)j"899JFIz!}}!$*!!!T'
a  %*UZ]]333|GF333	$*L!!!$$"1%%L!!!4!00 w	
 
 (	 (	K K)3W==I111k)++,2244B"2&&C#B''DY.I0EEFE$Y
    & &  j 8'BB	+q00"%"6"6!!![223Z# #K  &&{333";	:: & &E+A111a4AadA))!++a..+FFNNPPA,,Q[[^^[IIQQSSB "C1IFa<Dqqq!""uIII,,"1ae 6 6q 9 9::III
 "&DAJJ!&$ ,/Bqqq+i''(aaam
 3 3 : :[*IJJ67! !  FKI!!####v{""J""$$$$"{+++33K@@b+.."%%
##[00r5   c                     d S rG   r\   )r2   rg   rh   ri   r%   rj   s         r3   __torch_dispatch__zMultiTensor.__torch_dispatch__  s	     	r5   c                     dgd fS Nr7   r\   rD   s    r3   __tensor_flatten__zMultiTensor.__tensor_flatten__  s    z4r5   tensor_data_dicttensor_attributes
outer_sizeouter_stridec                 $     | |d                   S r   r\   )r2   r   r   r   r   s        r3   __tensor_unflatten__z MultiTensor.__tensor_unflatten__  s     s#H-...r5   c                 6    |t           j        j        j        k    S rG   )r/   r   r   r   )r2   rg   s     r3   rq   zMultiTensor.is_linear_layer  s    ux*111r5   TrJ   )r\   NF)2rC   
__module____qualname__r_   r`   ra   rb   rc   rd   re   r   int__annotations__r   r   r!   r#   staticmethodr
   r/   r0   r   r   r4   r?   strrE   rH   r;   rT   rW   classmethodrY   rf   r   r	   rK   r   r   r   r   r   r   rx   r|   r   r   Sizer   rq   r\   r5   r3   r   r   .   sW        MO $LOJHeIs	     I%,(>>?IKNI	I I I \I5<%,)??@LO	   
# 
 
 
 

'EL ' ' ' '5<%,)??@	      "   
  !+u|F33     [8 
 !#+/_ __ T3Y_ CHo	_
 c3h(_ _ 
_ _ _ [_B ,d5c?&; ,d3iQUo@V , , , [, , , [,\ k1 k1 [k1Z 
 !#!#  T3Y CHo	
 S#X  
   [ E$s)Xc]*B$C         /sCx./ $C=/ J	/
 CHo/ 
/ / / [/ 28 2 2 2 2 [2 2 2r5   r   c                        e Zd Zdef fd	Zdededd fdZdeedf         fdZdeeedf         e	e
ef         f         fd	Z xZS )
r   Fc                     t                                                       g | _        d | _        | | _        || _        d| _        d S Nr   )superr?   r   r   validatetarget_classr:   )r>   disable_input_validationr   rB   s      r3   r?   z!MultiTensorInputRecorder.__init__  sD    	44(


r5   ri   r%   r&   c                 p     fd}|i n|}t          ||f          \  }} j        | _         fd|D              _         S  j        r |||            xj        dz  c_        t          |          D ]?\  }}t          |t          j                  r  j        |         	                    |           @ S )Nc           	          j          |j         k    sJ dj          d|             t          |           D ]\  }}j        |         }t          |t          j                  s||k    sJ d| d| d| d            Gt          |j                  sJ dt          |           d|             |j        |j        k    sJ d|j         d|j         d| d	            |j	        |j	        k    sJ d
|j	         d|j         d| d            d S )Nz:got two different input structures when recording inputs, z is not the same as z)got different values for nontensor input z for flattened input element zD, different inputs to input recorder must have same nontensor valuesz,expected input of type torch.Tensor but got zexpected input of dtype z	 but got z@ different inputs to input recorder must have same tensor dtypeszexpected input of shape z? different inputs to input recorder must have same tensor shape)
r   	enumerater   r*   r/   r0   r   rK   r(   r)   )r   r   r:   r]   yr>   s        r3   validate_inputz8MultiTensorInputRecorder.forward.<locals>.validate_input  s   y ty(((vQUQZvvptvv )(( &i00  qN5)!!U\22 666 GA  G  G[\  G  G  |A  G  G  G "666 &a):;;  ttAwwttmrtt ; 7ag--- s17  s  sQW  s  skp  s  s  s .-- 7ag--- r17  r  rQW  r  rkp  r  r  r .--- r5   c                 r    g | ]3}t          |t          j                  r                    |          n|4S r\   )r*   r/   r0   r   )r   r]   r>   s     r3   r   z4MultiTensorInputRecorder.forward.<locals>.<listcomp>+  sM        )31el(C(CJ!!!$$$  r5   rJ   )
r   r   r   r   r:   r  r*   r/   r0   rH   )r>   ri   r%   r  r   r   r:   r]   s   `       r3   forwardz MultiTensorInputRecorder.forward  s    	 	 	 	 	. ~6&f~66	49DI   "  DN K= 	,N9d+++

a

!),, 	0 	0HE1!U\** 0u%,,Q///r5   .c                 n    |                                  \  }}t          |          dk    s
J d            |S )Nr   zxkwargs is not empty but get_recorded_inputs called on MultiTensorInputRecorder, use get_recorded_args_and_kwargs instead)get_recorded_args_and_kwargslenr>   ri   r%   s      r3   get_recorded_inputsz,MultiTensorInputRecorder.get_recorded_inputs:  sC    88::f6{{a G   r5   c                 f    | j         
J d            t          | j        | j                   \  }}||fS )Nz no inputs have been recorded yet)r   r   r   r  s      r3   r	  z5MultiTensorInputRecorder.get_recorded_args_and_kwargsA  s:    y$$&H$$$%dndi@@fV|r5   )rC   r   r   r   r?   r   r  r	   r  r   r   r	  __classcell__rB   s   @r3   r   r     s        05K      )S )C )4N ) ) ) )VU38_    eE#s(OT#s(^4S.T        r5   r   c                   h     e Zd Z fdZd Zd Z ej                    	 	 	 d	defd            Z	 xZ
S )
r   c                     t                                                       t                                          | _        d | _        d | _        d | _        d | _        d | _	        d | _
        d | _        d | _        d S rG   )r   r?   r   r8   r9   r_   r`   ra   rb   rc   rd   re   rY   )r>   rB   s    r3   r?   zGPTQQuantizer.__init__H  sp    "2"?"?"A"A $!#)-& ##' r5   c                     | j         
J d            | j        
J d            | j        
J d            | j        
J d            | j        
J d            | j        
J d            d S )Nzget_qparams_func must be setzquantize_func must be setzdequantize_func must be setz%combine_qparams_list_func must be setzmake_qtensor must be setzskip_layer_func must be set)r_   r`   ra   rb   rc   rd   rD   s    r3   _check_functionszGPTQQuantizer._check_functionsT  s    $002P000!--/J---#//1N///-993 :99  ,,.H,,,#//1N/////r5   c                     |                                 D ]*\  }}t          |t                    r|j        d         ||<   +|S r   )itemsr*   r   r7   )r>   
state_dictkeyvalues       r3   covert_multi_tensors_to_tensorsz-GPTQQuantizer.covert_multi_tensors_to_tensors^  sI    $**,, 	2 	2JC%-- 2"',q/
3r5   @   r    r   r&   c                 "   |i }t                               | j        | j        | j        | j        | j        | j        |||| j        
  
         | j	        
                    |           t          j                    5  t          |t          d            d d d            n# 1 swxY w Y   | j	                            |           t          j                    5   ||i | d d d            n# 1 swxY w Y   | j	                                        }|S )N)
r_   r`   ra   rb   rc   rd   r   r   r!   rY   c                     dS NTr\   r]   r  s     r3   r^   z<GPTQQuantizer._create_quantized_state_dict.<locals>.<lambda>  s    t r5   modelreplacement_fn	filter_fn)r   rf   r_   r`   ra   rb   rc   rd   rY   r9   set_state_dictr/   no_grad)_replace_with_custom_fn_if_matches_filter-_replace_buffers_and_params_with_multitensorsupdate_id_to_nameget_state_dict)r>   r   ri   r%   r   r!   r   r  s           r3   _create_quantized_state_dictz*GPTQQuantizer._create_quantized_state_dictd  s    >F//!2, 0&*&D* 0!; 	0 	
 	
 	
 	..u555]__ 	 	5L++   	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	11%888 ]__ 	# 	#E4"6"""	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	#,;;==
s$   9BB"%B"	C++C/2C/)r  r    r   )rC   r   r   r?   r  r  r/   r$  r   r)  r  r  s   @r3   r   r   G  s        
 
 
 
 
O O O   U]__ ' ' 
' ' ' _' ' ' ' 'r5   r   c            	            e Zd Zddddd ej        d           ed          fdej        d	ee         f fd
Zdej	        j
        deedf         deeef         dej	        j
        fdZ xZS )r   r  r    r      TrX   )inner_k_tilesrY   layoutc                    	 t                                                        _        | _        | _        | _         _        | _         j         _        d  _        | _	        d	d j        j
        v r#t          j         _        t          j         _        nt          j         _        	 fd _        	 fd _        	 fd _        d  _        fd _         fd}| _                                          d S )	N   r   c                 6    t          | |j                  S )N)r(   zero_point_domain)r   r1  )r   	precisionr   n_bitr>   s     r3   r^   z6Int4WeightOnlyGPTQQuantizer.__init__.<locals>.<lambda>  s)    5Q"46
 6
 6
 r5   c                 P    t          | |d         |d         j                  S Nr   rJ   )r1  )r   r1  )r   qparamsr   r3  r>   s     r3   r^   z6Int4WeightOnlyGPTQQuantizer.__init__.<locals>.<lambda>  s4    L

"&"8      r5   c                 P    t          | |d         |d         j                  S r5  )r   r1  )r   r6  r   r3  r>   s     r3   r^   z6Int4WeightOnlyGPTQQuantizer.__init__.<locals>.<lambda>  s4    N

"&"8      r5   c                 (    d t          |  D             S )Nc                 :    g | ]}t          j        |d           S )rJ   )dim)r/   catr   s     r3   r   zJInt4WeightOnlyGPTQQuantizer.__init__.<locals>.<lambda>.<locals>.<listcomp>  s4     ?
 ?
 ?
$%EIaQ?
 ?
 ?
r5   )r   )qparams_lists    r3   r^   z6Int4WeightOnlyGPTQQuantizer.__init__.<locals>.<lambda>  s%     ?
 ?
),l);?
 ?
 ?
 r5   c                 @    t          | j        d                   p S rN   )_check_linear_int4_kr)   )linear_weightr   padding_alloweds    r3   r^   z6Int4WeightOnlyGPTQQuantizer.__init__.<locals>.<lambda>  s$     !4R!8*EEX6
 r5   c                                         | |          }|d         }|d         }j        t          j        k    r|                    j                  }d
f}t          j        }d}d}t          |||||||j        j	        	  	        }	|	S )Nr   rJ      )scale
zero_point
block_sizetarget_dtype	quant_min	quant_maxr1  _layout)
ra   r1  r   INTr{   zeros_precisionr/   int32r   r-  )r   r6  weightrC  rD  rE  rF  rG  rH  quantized_tensorr   r>   s             r3   rc   z:Int4WeightOnlyGPTQQuantizer.__init__.<locals>.make_qtensor  s     ))!W55FAJE J%)<<<']]4+?@@
 ZJ ;LII  ?%%)##"&"8
  
  
  $#r5   )r   r?   r   r!   r   r,  r@  rY   re   r-  rK   r   rJ  r1  r/   int8rK  FLOATr_   r`   ra   rb   rd   rc   r  )r>   r   r!   r   r,  r@  rY   r-  rc   r3  rB   s   ``   `   @r3   r?   z$Int4WeightOnlyGPTQQuantizer.__init__  sw    	$" *.k#' DK$$$%4%8D"#(:D  %4%:D"!
 !
 !
 !
 !
 !
      	      	*
 *
& 
  
  
  
  
	$ 	$ 	$ 	$ 	$ 	$> )r5   r   ri   .r%   r&   c                     |                      |||| j        | j        | j                  }t	          |t
          d           }d |D             }|D ]}||= |                    |dd           |S )Nc                     dS r  r\   r  s     r3   r^   z6Int4WeightOnlyGPTQQuantizer.quantize.<locals>.<lambda>  s    4 r5   r  c                     g | ]}d |v |	S )kv_cacher\   )r   ks     r3   r   z8Int4WeightOnlyGPTQQuantizer.quantize.<locals>.<listcomp>  s    ;;;:??!???r5   TF)assignstrict)r)  r   r!   r   r%  ,_remove_multitensors_from_buffers_and_paramsload_state_dict)r>   r   ri   r%   r  removerU  s          r3   quantizez$Int4WeightOnlyGPTQQuantizer.quantize  s     66ONM
 

 :G''
 
 

 <;Z;;; 	 	A1jeDDDr5   )rC   r   r   r/   rY   r   r   r   r?   r   Moduler	   r   r   r   r[  r  r  s   @r3   r   r     s         +u|F33#8#8q#I#I#I`  `  `   `  `  `  `  `  ` DX_-238_HLSRUX	       r5   r   c                   L    e Zd ZdZed             Zd Zd Zd Zd Z	d Z
d ZdS )	r   Nc                  b    t           j        t                      t           _        t           j        S rG   )r   	_instancer\   r5   r3   r8   zStateDictManager.get_instance  s#    %-)9););&))r5   c                 "    i | _         i | _        d S rG   )r  
id_to_namerD   s    r3   r?   zStateDictManager.__init__  s    r5   c                 ~    |                                 | _         d |                                D             | _        d S )Nc                 4    i | ]\  }}t          |          |S r\   idr   rU  vs      r3   
<dictcomp>z3StateDictManager.set_state_dict.<locals>.<dictcomp>  $    III12a55!IIIr5   )r  named_parametersra  r>   r   s     r3   r#  zStateDictManager.set_state_dict  s:    **,,II0F0F0H0HIIIr5   c                 L    d |                                 D             | _        d S )Nc                 4    i | ]\  }}t          |          |S r\   rd  rf  s      r3   rh  z6StateDictManager.update_id_to_name.<locals>.<dictcomp>!  ri  r5   )rj  ra  rk  s     r3   r'  z"StateDictManager.update_id_to_name   s'    II0F0F0H0HIIIr5   c                 R    | j                             t          |          d           S rG   )ra  r-   re  )r>   params     r3   r~   z#StateDictManager.get_name_for_param#  s     ""2e99d333r5   c                     || j         v r8t          |t                    r|j        d         | j         |<   d S || j         |<   d S t	          d| d          )Nr   z
Parameter z not found in state_dict)r  r*   r   r7   KeyError)r>   name	new_values      r3   r   zStateDictManager.update_param&  sl    4?"")[11 2(1(8)%%% )2%%%FFFFGGGr5   c                     | j         S rG   )r  rD   s    r3   r(  zStateDictManager.get_state_dict1  s
    r5   )rC   r   r   r_  r   r8   r?   r#  r'  r~   r   r(  r\   r5   r3   r   r     s        I* * \*
  J J JJ J J4 4 4	H 	H 	H    r5   r   rJ   c                 <    | |z  dk    }|| |dz  z  dk    }|o|S |S )a  
    Check if the dimensions are compatible with int4 quantization.

    Args:
        k: The dimension size to check
        group_size: The group size for quantization
        inner_k_tiles: The inner k tiles size

    Returns:
        bool: Whether the dimensions are compatible
    r   N   r\   )rU  r   r,  k_divisible_by_group_size%k_divisible_by_16_times_inner_k_tiless        r3   r>  r>  :  s@     !"J! 3 01]R5G0HA0M-(R-RR$$r5   Tflatr&   .c                     d | D             }t          |          t          t          fd| D                        }||fS )z
    Convert flattened arguments to grouped arguments with padding.

    Args:
        flat: Flattened arguments
        pad_in_place: Whether to pad in place

    Returns:
        Tuple containing grouped arguments and original counts
    c                 J    g | ] }t          |t                    r|j        nd !S r   )r*   r   r:   r   s     r3   r   z,_flat_to_grouped_and_pad.<locals>.<listcomp>[  s-    NNNAjK88?177aNNNr5   c                 ~    g | ]9}t          |t                    r|                               j        n|gz  :S ))rR   )r*   r   rT   r7   )r   r]   multi_tensor_sizerR   s     r3   r   z,_flat_to_grouped_and_pad.<locals>.<listcomp>_  s^         a--- 1MMTTS,,  r5   )maxr+   r   )ry  rR   rp   r   r}  s    `  @r3   ru   ru   M  sy     ONNNNKK((     	  	
	 	G Kr5   rX   Fc                    g }| D ]}t          |t                    rT|j        dk    s|rG|                    |                    |j        d                             |                               k|                    t          |t          j                  r*t          |t                    s|                    |          n|           |S )a  
    Move tensors to accelerator for faster processing.

    Args:
        args: Arguments that may contain tensors
        device: accelerator device
        move_all: Whether to move all tensors or just single count tensors

    Returns:
        List with tensors moved to CUDA
    rJ   r   )	r*   r   r:   rH   rB   r7   r{   r/   r0   )ri   rY   move_allnew_argsr]   s        r3   rt   rt   j  s     H  a%% 	17a<<8<OOAKKv(>(>??@@@@OOa..7A![7Q7QV   
 Or5   c                     d}t          | |          D ]h\  }}t          |t          j                  rI|s0||                    |j                  k                                    r|                    |           d}i|S )a[  
    Copy values from new inputs to original inputs if they've changed.
    Used for handling in-place operations.

    Args:
        orig_inp: Original inputs
        new_inp: New inputs (potentially modified)
        force: Whether to force copying regardless of differences

    Returns:
        bool: Whether any differences were detected
    FT)r   r*   r/   r0   r{   rY   anycopy_)orig_inpnew_inpr   detected_differencer]   new_xs         r3   r   r     s      '** + +5a&& 	+ +ehhqx0005577 +&*#r5   c                     t          | |          D ]:\  }}t          |t                    r |j        |k    r|                    |           ;dS )z
    Unpad MultiTensors to their original counts.

    Args:
        args: Arguments that may contain MultiTensors
        orig_counts: Original counts of MultiTensors
    N)r   r*   r   r:   rW   )ri   rp   argr:   s       r3   ry   ry     s\     $,,  
Uc;'' 	CI,=,=IIe r5   c                    d}d}| D ]}fd|D             }t          ||          \  }}|d                                         }	|	j        }
t          |
          dk    rdn|
d         }|	                    d|
d                   }	||||z   z  z  }||z  }d|z  dz  |	                                z  }	||	                    |	                                          z  }|S )z
    Calculate the Hessian matrix for GPTQ.

    Args:
        grouped_args: Grouped arguments
        spec: Original structure specification
        device: accelerator device

    Returns:
        torch.Tensor: Hessian matrix
    r   c                 r    g | ]3}t          |t          j                  r|                              n|4S r\   )r*   r/   r0   r{   )r   r]   rY   s     r3   r   z&_calculate_hessian.<locals>.<listcomp>  s8    VVVQjEL&A&AHadd6lllqVVVr5   ro   rJ   r   g      ?)r   r   r)   r
  reshapetr   )r   r   rY   r   total_batchesrL   r   r   rS   r]   r)   ns     `         r3   rz   rz     s     	
AM  VVVVRUVVV
 %Z66! QKUqAAeAhIIb%)$$ 	
]ma/00-U+qssuu4	QXXaccee__Hr5    r   r!  r"  cur_fqnc           	           || |dd                   r ||           } |                                  D ]1\  }}t          |||| | d          }||urt          | ||           2| S )a{  
    Replace modules in the model if they match a filter.

    Args:
        model: The model to modify
        replacement_fn: Function to apply to matching modules
        filter_fn: Function to determine if a module should be replaced
        cur_fqn: Current fully qualified name (for tracking position in model hierarchy)

    Returns:
        nn.Module: Modified model
    Nr   .)named_childrenr%  setattr)r   r!  r"  r  rr  child	new_childs          r3   r%  r%    s    $ y%% &u%%++-- , ,e=>9.A$.A.A.A
 
	 E!!E4+++Lr5   c           
      "   |                      d          D ]$\  }}t          | |t          |g                     %|                     d          D ]<\  }}t          | |t	          j        t          |g          |j                             =| S )z
    Replace model buffers and parameters with MultiTensors.

    Args:
        model: The model to modify

    Returns:
        nn.Module: Modified model
    Frecurse)named_buffersr  r   rj  r   	Parameterrequires_gradr   rr  bufro  s       r3   r&  r&    s     (((77 1 1	ct[#//0000--e-<< V VetR\+ug*>*>@STTUUUULr5   c           
         |                      d          D ]6\  }}t          |t                    rt          | ||j        d                    7|                     d          D ]Y\  }}t          |t                    r?t          | |t          j        |j        d         |j        d         j                             Z| S )z
    Convert MultiTensors in model buffers and parameters back to regular tensors.

    Args:
        model: The model to modify

    Returns:
        nn.Module: Modified model
    Fr  r   )	r  r*   r   r  r7   rj  r   r  r  r  s       r3   rX  rX    s     (((77 0 0	cc;'' 	0E4A///--e-<<  ee[)) 	U\!_el1o.KLL   Lr5   )rJ   Nr   )F)r  )4r   typingr   r   r   r   r   r   r	   r
   r/   torch.nnr   torch.utils._pytreer   r   torchao.dtypesr   r   r   %torchao.quantization.quant_primitivesr   torchao.quantization.unifiedr   torchao.quantization.utilsr   r   r   r   r   rs   __all__r0   r   r\  r   r   r   r   r>  r   ru   rY   rt   r   ry   rz   r   r   r%  r&  rX  r\   r5   r3   <module>r     s    N N N N N N N N N N N N N N N N N N N N        < < < < < < < <         
      3 2 2 2 2 2 < < < < < <            U2 U2 U2 U2 U2%, U2 U2 U2p> > > > >ux > > >BE E E E EI E E EP} } } } }- } } }@# # # # # # # #V% % % %( #'   
s) 
4c3h $s)+,       : %1EL$8$85    2   ,
 
 
 3?%,v2F2F " " " "R 	 9bi[")34 C($./ 	
 Y   > ry    &	 bi      r5   