
    *`i%D                     L   d dl mZ d dlmZ d dlmZ d dlZd dlmZm	Z	m
Z
mZ d dlmZ d dlmZ d dlmZmZ d d	lmZ g d
Z ej                    	 	 	 d&dej        dej        dej        de	deej                 deej                 deej                 dej        fd            Z ej                    	 	 	 	 	 d'dej        dej        deej                 dee	         deej                 deej                 deej                 dej        fd            Z ej                    	 	 d(dej        dej        dej        de	deej                 deej                 dej        fd            Z ej                    	 	 	 	 	 d)dej        dej        dej        de	deej                 deej                 dededeej                 dej        fd            ZdedefdZdedej        dedd dej        f
d!Z  ej                    	 	 d(dej        dej        dej        d"ej        d#ej        de	deej                 deej                 dej        fd$            Z! ej                    	 	 	 d&dej        dej        dej        deej                 deej                 dej        fd%            Z"dS )*    )wraps)ceil)OptionalN)DynamicTypeQuantizationArgsQuantizationStrategyround_to_quantized_type_args)QuantizationStatus)QuantizationScheme)calculate_rangecompute_dynamic_scales_and_zp)Module)quantize
dequantizefake_quantizewrap_module_forward_quantizedforward_quantizexscale
zero_pointargsdtypeg_idxglobal_scalereturnc                 2    t          | ||||dd||	  	        S )a  
    Quantize the input tensor x using the QuantizationStrategy specified in args.
    Quantization can be done per tensor, channel, token or group. For group
    quantization, the group_size must be divisible by the column size. The input scale
    and zero_points are reshaped to support vectorization (Assumes 1 is the
    channel dimension)

    :param x: Input tensor
    :param scale: scale tensor
    :param zero_point: zero point tensor
    :param args: quantization args dictating how to quantize x
    :param dtype: optional dtype to cast the quantized output to
    :param g_idx: optional mapping from column index to group index
    :param global_scale: optional constant to scale the quantization scale during QDQ
    :return: fake quantized tensor
    TF)	r   r   r   r   r   do_quantizedo_dequantizer   r   _process_quantization)r   r   r   r   r   r   r   s          /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/compressed_tensors/quantization/lifecycle/forward.pyr   r   ,   s6    6 !
!
 
 
 
    x_qc                    |J|j         dk    s|j         dk    rt          t          j                  }n|j         dk    r|j        d         dk    rt          t          j                  }n|j        d         dk    s|j        d         | j        d         k    rDt          | j        d         |j        d         z            }t          t          j        |          }np| j        d         | j        d         }	}||j        d         z  }
|	|j        d         z  }t          t          j        |
|g	          }nt          d
|j          d          ||j
        }t          | |||dd|||	  	        S )a?  
    Dequantize a quantized input tensor x_q based on the strategy specified in args. If
    args is not provided, the strategy will be inferred.

    :param x: quantized input tensor
    :param scale: scale tensor
    :param zero_point: zero point tensor
    :param args: quantization args used to quantize x_q
    :param dtype: optional dtype to cast the dequantized output to
    :param g_idx: optional mapping from column index to group index
    :param global_scale: optional constant to scale the quantization scale during QDQ
    :return: dequantized float tensor
    Nr      )strategy   )r&   
group_size)r&   block_structurez8Could not infer a quantization strategy from scale with z* dimmensions. Expected 0 or 2 dimmensions.FT)	r   r   r   r   r   r   r   r   r   )ndimr   r   TENSORshapeCHANNELintGROUPBLOCK
ValueErrorr   r    )r#   r   r   r   r   r   r   r(   rowscolsblock_heightblock_widths               r!   r   r   T   s   . |:??ejAoo#-A-HIIIDDZ1__{1~""'1E1MNNN +a.A%%5;q>SYq\+I+I 1A!>??
'17J   !Yr]CIbMd#u{1~5"ek!n4'17%1;$?  
 <5: < < <  
 } 
!
 
 
 
r"   c           
      0    t          | |||dd||          S )a  
    Fake quantize the input tensor x by quantizing then dequantizing with
    the QuantizationStrategy specified in args. Quantization can be done per tensor,
    channel, token or group. For group quantization, the group_size must be divisible
    by the column size. The input scale  and zero_points are reshaped to support
    vectorization (Assumes 1 is the channel dimension)

    :param x: Input tensor
    :param scale: scale tensor
    :param zero_point: zero point tensor
    :param args: quantization args dictating how to quantize x
    :param g_idx: optional mapping from column index to group index
    :param global_scale: optional constant to scale the quantization scale during QDQ
    :return: fake quantized tensor
    T)r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   s         r!   r   r      s3    0 !
!	 	 	 	r"   Tr   r   c	           
      v   t          || j                  \  }	}
|j        }|j        t          j        k    rL| j        }| j        d         | j        d         }}|j        \  }}||z  dk    rt          d| d| d          ||z  dk    rt          d| d| d          ||z  }||z  }| 	                    ||||          
                    d	d
          }|                    d                              d          }|(|                    d                              d          nd }|rt          ||||	|
|||          }|rt          ||||          }|
                    d	d
          	                    |          }n|j        t          j        t          j        fv re||n| j        }t#          j        |                               |          }|j        d         }|j        d
k     r9|                    d	          }||                    d	          nd }|j        d
k     9||k    r||z  dk    rt          d| d|           |d u pd|v }|rBt+          t-          ||z                      }t#          j        |f|t"          j                  }n]t#          j        |d          \  }}|t#          j        |                   }t#          j        |          }|                     d|          } t-          | j        d         |z            |f}|                     d|          } |rAt          | |                    d          ||                    d          nd |||	|
|          }|rC|r|n| }t          ||                    d          ||                    d          nd |          }|                    d          }|                    |          }|s*t#          j        |          } |                    d|           }n2|rt          | |||	|
|||          }|rt          |r|n| |||          }|S )Nr)   r*   r   zTensor height z" is not divisible by block_height z-. Block quantization requires exact division.zTensor width z! is not divisible by block_width r%   r'   )r   r   r   q_minq_maxr   r   r   )r#   r   r   r   z=tensor column shape must be divisble by the given group_size z	 but got )r   T)return_counts)r   r   r   r   r   r;   r<   r   )	start_dim)r   r   r   )r   devicer(   r&   r   r2   r.   r+   r3   reshape	transpose	unsqueeze	_quantize_dequantizer1   TENSOR_GROUPr   torch
zeros_liketor,   r0   r   fulluniqueargsortindex_select	unflattenflatten)!r   r   r   r   r   r   r   r   r   r;   r<   r(   original_shaper4   r5   r6   r7   num_rows_blocksnum_cols_blocksx_blockssbzboutputoutput_dtypecolumnsis_column_order
num_groupsgroup_sizesgroup_indicespermreshaped_dimsinputinv_perms!                                    r!   r    r       sR    #422LE5J },222WR[!'"+d$($8!k ,!##@ @ @ @ @ @   +""? ? ?{ ? ? ?   ,.+-99	
 

 )Aq// 	 __R  **2..7A7MZ!!"%%//333SW 	 )	 	 	H  	")	  H ##Aq))11.AA	") 
 

 !& 1uuqw!!$$''55,r" j1nnOOA&&E4>4J--a000PTJ j1nn
 j  #q(( N/9N NDKN N    4-62; 		)T'J"67788J*j]JeiPPPKK */e4)P)P)P&M;%emM&B&BCK=''Dr4((A z)**
 KKM** 
	oob))7A7M://333SW)	 	 	F  	)0FFqE oob))7A7M://333SW)	  F "--<(( 	7}T**H((X66F  
	%)	 	 	F  	 %,1%)	  F Mr"   moduleschemec                      t           j        d          r j        j        n j        j        t	                     fd            }|                      j                  }t           d|           d S )N__func__c                    t          dd          s!                     j                  |i |S |d         }j        t          j        k    }	j        t          |d	j                  }	j        F|sD| j	        j
                                        }t          | j	        d	j                  | j	        _
                             j                  |g|dd          R i |}	j        |s|| j	        _
        	j        :j        t          j        k    r	j        j        s|S t          |d	j                  }|S )Nquantization_enabledTr   r^   weightr%   rU   )getattr__get__	__class__quantization_statusr
   
COMPRESSEDinput_activationsr   weightsrf   datacloneoutput_activationsCALIBRATIONdynamic)
selfr   kwargsinput_
compressedunquantized_weightrU   forward_func_origr`   ra   s
          r!   wrapped_forwardz6wrap_module_forward_quantized.<locals>.wrapped_forwardl  s   v5t<< 	X G$,,VV5EFFWPVWWWa/3E3PP
#/%ffgv?WXXF>%j%!%!1!7!7!9!9/Xv~   DK
 E"**663CDD
!""X
 
 
!'
 

 >%j%1DK$0 *.@.LLL19 M %&*C F r"   forward)hasattrrz   rc   funcr   rh   ri   setattr)r`   ra   ry   bound_wrapped_forwardrx   s   ``  @r!   r   r   d  s     v~z** 0"N3"N/
) ) ) ) ) ) )X ,33FF<LMMFI455555r"   value	base_namer   c                    | j         t          j        k    r|dk    r|S |                                dk    r|S t	          | dd           }t	          | | dd           }|j        dt          j        fv rt          ||| |          \  }}n't	          | | d          }t	          | | dd           }t          ||||||	          S )
Nrf   r   weight_g_idx_global_scaleT)r   r   r`   r   _scale_zero_pointr9   )
rj   r
   rk   numelrg   rr   r   LOCALr   r   )r`   r   r   r   r   r   r   r   s           r!   r   r     s    	"&8&CCC!!{{}} FND11E6i#>#>#>EEL|k/0009d6
 
 
zz
 9 4 4 455V	%>%>%>EE

!   r"   r;   r<   c                     |||z  }| |z  }|||                     | j                  z  }t          ||||          }	||	                     |          }	|	S )N)tensorr   minmax)rH   r   r	   )
r   r   r   r;   r<   r   r   r   scaledquantized_values
             r!   rC   rC     s{     $YF*--((( 3De  O ),,U33r"   c                     |||z  }|                      |j                  }|||                     |j                  z
  }||z  }||                     |          }|S )N)rH   r   )r#   r   r   r   r   dequant_values         r!   rD   rD     sn     $FF5;''M%
ek(B(BB!E)M%((//r"   )NNN)NNNNN)NN)NNTTN)#	functoolsr   mathr   typingr   rF   *compressed_tensors.quantization.quant_argsr   r   r   r	   ,compressed_tensors.quantization.quant_configr
   ,compressed_tensors.quantization.quant_schemer   %compressed_tensors.quantization.utilsr   r   torch.nnr   __all__no_gradTensorr   r   r   r   boolr    r   strr   rC   rD    r"   r!   <module>r      sL                                 L K K K K K K K K K K K                 $($(+/$ $|$<$ $ 	$
 EK $ EL!$ 5<($ \$ $ $ $N  *.'+#'$(+/D D	D<D &D #
$	D
 EK D EL!D 5<(D \D D D DN  %)+/   | <    	 
 EL!  5<(  \       F  %)#'+/` `|`<` ` 	`
 EL!` EK ` ` ` 5<(` \` ` ` `F76& 76:L 76 76 76 76t%% <%47%?Q%
\% % % %P  $(+/ |<  <	
 <  EK  5<( \   @   $#'+/ 	<  EK 	
 5<( \     r"   