
    .`i;C                         d dl mZ d dlmZmZmZ d dlZd dlZd dl	m
Z
 d dlmZmZ d dlmZmZ d dlmZ d dlmZ d d	lmZ erd d
lmZ  e
e          Z G d de          ZdS )    )Fraction)TYPE_CHECKINGAnyOptionalN)init_logger)
LinearBaseUnquantizedLinearMethod)QuantizationConfigQuantizationMethods)ParallelLMHead)current_platform)scalar_types)WeightsMapperc                       e Zd ZdZh dZdhZddhZh dZ	 	 	 	 	 	 d)d
edede	de
de
ee
         z  dz  dee
ef         dz  de
de
ddf fdZde
fdZedefd            Zedeej                 fd            Zedefd            Zedee
         fd            Zedee
ef         dd fd            Zde
fdZd
ede	fdZd*d Zd+d!e
de
fd"Zd+d!e
de
fd#Zd!e
fd$Zd%ej        j         d!e
fd&Z!ede"d'         fd(            Z# xZ$S ),	INCConfigziConfig class for Intel Neural Compressor (INC).
    Repo: https://github.com/intel/neural-compressor
    >               intauto_round:auto_gptqzauto_round:auto_awq>   
awq:marlingptq:marlinawqautogptqipexmarlinTNr   weight_bits
group_sizesympacking_formatblock_name_to_quantizeextra_config	data_typebackendreturnc	                 V   t                                                       || j        vrt          d| d| j         d          || j        vrt          d| d| j         d          || j        vrt          d| d| j         d          || j        vrt          d| d| j         d          || _        || _        || _	        || _
        t          |t                    r|                    d	          n|| _        || _        || _        || _        t%          d
|          | _        d S )NzUnsupported weight_bits: z, currently only support .zUnsupported data_type: z, currently only support  zUnsupported packing_format: zUnsupported backend: z,  currently only support ,    )super__init__SUPPORTED_BITS
ValueErrorSUPPORTED_DTYPESSUPPORTED_FORMATSSUPPORTED_BACKENDSr   r    r!   r"   
isinstancestrsplitr#   r$   r%   r&   r   pack_factor)
selfr   r    r!   r"   r#   r$   r%   r&   	__class__s
            /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/layers/quantization/inc.pyr-   zINCConfig.__init__,   s    	d111AK A A*.*=A A A   D111E) E E,0,AE E E   !777D~ D D*.*@D D D   $111E E E*.*AE E E  
 '$, 0#66("((---' 	#
 )"#B44    c                 8    d| j          d| j         d| j         dS )NzINCConfig(weight_bits=z, group_size=z, sym=))r   r    r!   )r7   s    r9   __repr__zINCConfig.__repr__[   s>    =T%5 = =/= =15= = =	
r:   c                     dS )Ninc clss    r9   get_namezINCConfig.get_namea   s    ur:   c                 2    t           j        t           j        gS N)torchhalfbfloat16rA   s    r9   get_supported_act_dtypesz"INCConfig.get_supported_act_dtypese   s    
EN++r:   c                     dS )N<   r@   rA   s    r9   get_min_capabilityzINCConfig.get_min_capabilityi   s    rr:   c                     dgS )Nzquantization_config.jsonr@   rA   s    r9   get_config_filenameszINCConfig.get_config_filenamesm   s    *++r:   configc                     | |                      |dg          |                      |dg          |                      |dg          |                     |dgd          |                     |ddgd           |                     |dgd           |                     |d	gd
          |                     |ddgd                    S )Nbitsr    r!   r"   r   r#   to_quant_block_namesr$   r%   r   r&   vllm_backendr   )r   r    r!   r"   r#   r$   r%   r&   )get_from_keysget_from_keys_or)rB   rO   s     r9   from_configzINCConfig.from_configq   s    s))&6(;;((,@@!!&5'22//)*,B  $'#7#713IJD$ $ --f~6FMM**6K=%HH(()^1LfUU
 
 
 	
r:   
layer_namec                 0   	 ddt           dt          f fd j        r j        v r           S t          |t                     	 j        r t          fd j        D                       	 j        rrd|j        j        	                                v rR	fd j        D             }|r;t          t          |                    dk    r|d	         S t          d
 d           j        r j                                        D ]\  }v rv                              dk    r]fd|D             }	fd|D             }t          t          |                    dk    r
|d	         c S t          d d|            	          S )NTname	quantizedc                 l   j         s |rj        nd|rj        nd|rj        ndfS | j         v rij         |          }|                    d|rj        nd          |                    d|rj        nd          |                    d|rj        nd          fS t          d          j                                         D ]\  }}t          |t                    rt          fd|D                       s6	 t          j        t          j        |          |           ^|                    d|rj        nd          |                    d|rj        nd          |                    d|rj        nd          fc S # t          j        $ r Y w xY w|rj        nd|rj        nd|rj        ndfS )	N   TrQ   r    r!   z*+?^$()[]{}|\\c              3       K   | ]}|v V  	d S rE   r@   ).0cREGEX_SPECIAL_CHARSs     r9   	<genexpr>zAINCConfig.get_layer_config.<locals>.get_config.<locals>.<genexpr>   s:       ; ;12A,,; ; ; ; ; ;r:   )r$   r   r    r!   getsetitemsr3   r4   anyresearchcompileerror)rY   rZ   cfgpatternra   r7   s       @r9   
get_configz.INCConfig.get_layer_config.<locals>.get_config   s1   $ (19D$$r'08DOOb )3DHHt  t((('-GGF	$ID$4$4rJJGGLY*N$//BOOGGEy#B488dCC  #&&7"8"8 $ 1 7 7 9 9  !'3// s ; ; ; ;6=; ; ; 8 8  	yG!4!4d;;GGGF	,QD,<,<rRRGGLY2V$//TVWWGGEy+J488dKK     H x   H
 %.5  2#,4"%/4 s   ;BFFFc              3   B   K   | ]}                     |          V  d S rE   
startswith)r_   rY   rW   s     r9   rb   z-INCConfig.get_layer_config.<locals>.<genexpr>   sB        04
%%d++     r:   fusedmoec                 R    g | ]#}|                                |          $S r@   ro   )r_   rY   rm   rW   rZ   s     r9   
<listcomp>z.INCConfig.get_layer_config.<locals>.<listcomp>   sG       ??:..
4++  r:      r   zFused MoE layer 'z5' requires consistent quant config for all sub-layersc                 <    g | ]}                     |          S r@   )replace)r_   sub_key
fusion_keyrW   s     r9   rs   z.INCConfig.get_layer_config.<locals>.<listcomp>   s6     ! ! !DK
**:w??! ! !r:   c                 (    g | ]} |          S r@   r@   )r_   rY   rm   rZ   s     r9   rs   z.INCConfig.get_layer_config.<locals>.<listcomp>   s%    "U"U"U4::dI#>#>"U"U"Ur:   zFused module 'z'' requires consistent quant config for )T)r4   boolr$   r3   r   r#   rf   r8   __name__lowerlenrd   r/   packed_modules_mappingre   count)
r7   layerrW   moe_configssub_keys	sub_namessub_configsrx   rm   rZ   s
   ` `    @@@r9   get_layer_configzINCConfig.get_layer_config   s   '	 '	S '	T '	 '	 '	 '	 '	 '	T  	*t/@!@!@:j))) #5.999	& 	    8<8S    I
  	u/G/M/M/O/O!O!O      -  K
  s;''((A--&q>) B
 B B B    	(,(C(I(I(K(K  $
H++
0@0@0L0LPQ0Q0Q! ! ! ! !OW! ! !I #V"U"U"U"U9"U"U"UK3{++,,11*1~---$C C C7@C C   z*i000r:   c                     |dk     S )Nr\   r@   )r7   r   s     r9   check_quantizedzINCConfig.check_quantized   s    Rr:   hf_to_vllm_mapperr   c                     | j         |                    | j                   | _         | j        !|                    | j                  | _        d S d S rE   )r#   
apply_listr$   
apply_dict)r7   r   s     r9   apply_vllm_mapperzINCConfig.apply_vllm_mapper   s[    &2*;*F*F++ +D' ( 1 < <T=N O OD )(r:   prefixc                 l   ddl m} ddlm}m} |                     ||          \  }}}	|                     |          s,t          |t          t          f          rt                      S d S t                              d||j        j        |||	           |dk    sd|v rPt          j        t          j        d}
||
v o ||
|         ||	           }t          ||          r|o |||          }nd}|rdd	lm}m}m}  ||||	 di g 
          }nddlm}m}  ||||	           }t          ||          rK|r |||j                  S ddlm} d|||	 dd}|                    |                              ||          S t          |t          t          f          r|r ||          S  ||          S d S )Nr   FusedMoEcheck_marlin_supportedcheck_moe_marlin_supports_layer0[%s] Type: %s, Bits: %s, Group Size: %s, Sym: %sr   r   )r   r   F)AWQMarlinConfigAWQMarlinLinearMethodAWQMarlinMoEMethod)r   r    
zero_pointlm_head_quantizedfull_configmodules_to_not_convert)	AWQConfigAWQLinearMethod)r   r    r   MoeWNA16Configr   )quant_methodrQ   r    r   lm_head)$vllm.model_executor.layers.fused_moer   :vllm.model_executor.layers.quantization.utils.marlin_utilsr   r   r   r   r3   r   r   r	   loggerdebugr8   r{   r   uint4uint82vllm.model_executor.layers.quantization.awq_marlinr   r   r   +vllm.model_executor.layers.quantization.awqr   r   
moe_config1vllm.model_executor.layers.quantization.moe_wna16r   rV   get_quant_method)r7   r   r   r&   r   r   r   r   r    r!   AWQ_TYPE_MAP
use_marlinr   r   r   quant_args_marlinr   r   
quant_argsr   rO   s                        r9   apply_awq_quant_layerzINCConfig.apply_awq_quant_layer   s   AAAAAA	
 	
 	
 	
 	
 	
 	
 	

 (,'<'<UF'K'K$Z##K00 	%*n!=>> .000t>O$	
 	
 	
 fG 3 3%% L &5 ;Q;Q[):3w< <J %** ' ,K,K:- -

 J 	          !0'%"7"'')! ! !       
 #'%"7  J eX&& 	V O))*;U=MNNNXXXXXX !&#("%g  F "--f55FFufUUUej.9:: 	3 3,,->???&z222tr:   c           	      v   ddl m} ddlm}m} |                     ||          \  }}}	|                     |          s,t          |t          t          f          rt                      S d S t                              d||j        j        |||	           |dk    sd|v rUt          j        t          j        d}
||	f|
v o ||
||	f         ||	           }t          ||          r|o |||          }nd	}|rdd
lm}m}m}  ||||	d	d	i i           }nddlm}m}  |||d	d	i           }t          ||          rJ|r |||j                  S ddlm} d|||	d	d}|                    |                              ||          S t          |t          t          f          r|r ||          S  ||          S d S )Nr   r   r   r   r   r   ))r   T)r   T)has_zpF)GPTQMarlinConfigGPTQMarlinLinearMethodGPTQMarlinMoEMethod)r   r    is_symr   desc_actdynamicr   )
GPTQConfigGPTQLinearMethod)r   r    r   r   r   r   r   )r   rQ   r    r!   r   )r   r   r   r   r   r   r   r3   r   r   r	   r   r   r8   r{   r   uint4b8	uint8b1283vllm.model_executor.layers.quantization.gptq_marlinr   r   r   ,vllm.model_executor.layers.quantization.gptqr   r   r   r   r   rV   r   )r7   r   r   r&   r   r   r   r   r    r!   GPTQ_TYPE_MAPr   r   r   r   r   r   r   r   r   rO   s                        r9   apply_gptq_quant_layerz INCConfig.apply_gptq_quant_layer9  s   AAAAAA	
 	
 	
 	
 	
 	
 	
 	

 (,'<'<UF'K'K$Z##K00 	%*n!=>> .000t>O$	
 	
 	
 fG 3 3'/'1 M &s+}< AWAW{C01:#gB B BJ %** ' ,K,K:- -
 J 	          !1 0'%"'! ! !       
 $'%"'  J eX&& 	 **+<e>NOOO     
 %+'",$  &11&99JJ6   ej.9:: 	4 4--.?@@@''
333tr:   c                    |                      ||          \  }}}|                     |          s,t          |t          t          f          rt                      S d S ddlm}m}m	} t          |t          t          f          r[d| j
        v r |d||          }	 ||	          S d| j
        v r |d||          }	 ||	          S t          d| j
                   d S )Nr   )IPEXAWQLinearMethod
IPEXConfigIPEXGPTQLinearMethodr   )methodr   r    r   z7ipex backend only supports awq and gptq format,but got )r   r   r3   r   r   r	   2vllm.model_executor.layers.quantization.ipex_quantr   r   r   r"   r/   )
r7   r   r   r   r    r!   r   r   r   rO   s
             r9   apply_ipex_quant_layerz INCConfig.apply_ipex_quant_layer  sT   '+'<'<UF'K'K$Z##K00 	%*n!=>> .000t	
 	
 	
 	
 	
 	
 	
 	
 	
 	
 ej.9:: 	+++# kj   +*62224...#!{z   ,+F333 E/3/BE E  
 4r:   r   c                    |rU| j         rN| j         D ]F}||k    s	|d| k    r5| j         |                             dd          dk    rt                      c S Gt          j                    st          j                    s| j        dk    r|                     ||          S d| j        v s	d| j        v r| 	                    ||          S d| j        v s	d| j        v r| 
                    ||          S d S )Nzmodel.rQ   r\   r   r   r   )r$   rc   r	   r   is_cpuis_xpur&   r   r"   r   r   )r7   r   r   rW   s       r9   r   zINCConfig.get_quant_method  s*    	5d' 	5"/ 5 5
&((J:K6:K:K,K,K'
377CCrII244444#%%	>&((	> |v%%..uf===T(((Fdl,B,B..uf===D'''5DL+@+@--eV<<< ,A+@r:   r   c                 f    |                     dd          dk    }|r|                                 S dS )z*Override the `auto-round` method to `inc`.r   Nz
auto-round)rc   rC   )rB   hf_quant_cfg
user_quantis_auto_round_formats       r9   override_quantization_methodz&INCConfig.override_quantization_method  s:    
  ,//EEU 	"<<>>!tr:   )Tr   NNr   r   )r   r   )r   )%r{   
__module____qualname____doc__r.   r0   r1   r2   r   rz   r4   listdictr   r-   r=   classmethodr   rC   rF   dtyperI   rL   rN   rV   r   r   r   r   r   r   nnModuler   r   r   __classcell__)r8   s   @r9   r   r      s&         "\\Nw/1FG   49=.2-5 -5-5 -5 	-5
 -5 !$d3i$ 6-5 38nt+-5 -5 -5 
-5 -5 -5 -5 -5 -5^
# 
 
 
 
 ,    [ ,ek): , , , [, 3    [ ,T#Y , , , [, 
c3h 
K 
 
 
 [
 T1# T1 T1 T1 T1l 3  4        P P P PT T3 T T T T Tl[ [C [# [ [ [ [zC    @=eho =s = = = =$ 	'	(   [    r:   r   )	fractionsr   typingr   r   r   regexrg   rF   vllm.loggerr   !vllm.model_executor.layers.linearr   r	   'vllm.model_executor.layers.quantizationr
   r   3vllm.model_executor.layers.vocab_parallel_embeddingr   vllm.platformsr   vllm.scalar_typer    vllm.model_executor.models.utilsr   r{   r   r   r@   r:   r9   <module>r      s?         / / / / / / / / / /      # # # # # # Q Q Q Q Q Q Q Q        O N N N N N + + + + + + ) ) ) ) ) ) ?>>>>>>	X		v v v v v" v v v v vr:   