
    .`iQ              
          d dl mZmZ d dlZd dlmZ d dlmZmZ d dl	m
Z
mZ d dlmZmZmZmZ d dlmZmZ d dlmZ d d	lmZ  G d
 de          Zdedee         fdZd Z G d de          Zdej        dej        dej        dej        ddf
dZdej        dej        dej        dej        ddf
dZ 	  ededge ej!                   ej"        j#        j$        Z$n# e%$ rZ&e&dZ&[&ww xY w G d de          Z'dS )    )AnyUnionN)version)FusedMoEConfigFusedMoEQuantConfig)FusedMoEFusedMoEMethodBase)
LinearBaseLinearMethodBaseUnquantizedLinearMethodset_weight_attrs)QuantizationConfigQuantizationMethods)current_platform)direct_register_custom_opc                       e Zd ZdZ	 	 	 	 	 	 	 	 	 	 d d	ed
edededededededee         dz  deddf fdZdefdZ	e
defd            Ze
deej                 fd            Ze
defd            Zedee         fd            Ze
deeef         dd fd            Zdej        j        deded         dz  fdZ xZS )!BitsAndBytesConfigzaConfig class for BitsAndBytes Quantization.

    Reference: https://arxiv.org/abs/2305.14314
    FTfloat32uint8fp4N      @load_in_8bitload_in_4bitbnb_4bit_compute_dtypebnb_4bit_quant_storagebnb_4bit_quant_typebnb_4bit_use_double_quant llm_int8_enable_fp32_cpu_offloadllm_int8_has_fp16_weightllm_int8_skip_modulesllm_int8_thresholdreturnc                    t                                                       || _        || _        || _        || _        || _        || _        || _        || _	        |	pg | _
        |
| _        | j        dvrt          d| j                   d S )N)r   z$Unsupported bnb_4bit_quant_storage: )super__init__r   r   r   r   r   r   r   r   r    r!   
ValueError)selfr   r   r   r   r   r   r   r   r    r!   	__class__s              /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/layers/quantization/bitsandbytes.pyr%   zBitsAndBytesConfig.__init__%   s     	((&<#&<##6 )B&0P-(@%%:%@b""4&i77Tt7RTT   87    c                 h    d| j          d| j         d| j         d| j         d| j         d| j         dS )Nz BitsAndBytesConfig(load_in_8bit=z, load_in_4bit=z, bnb_4bit_compute_dtype=z, bnb_4bit_quant_storage=z, bnb_4bit_quant_type=z, llm_int8_skip_modules=))r   r   r   r   r   r    r'   s    r)   __repr__zBitsAndBytesConfig.__repr__C   s    Ct/@ C C -C C&*&AC C '+&AC C $(#;	C C
 &*%?C C C	
r*   c                     dS )Nbitsandbytes r-   s    r)   get_namezBitsAndBytesConfig.get_nameM   s    ~r*   c                 H    t           j        t           j        t           j        gS N)torchr   float16bfloat16r-   s    r)   get_supported_act_dtypesz+BitsAndBytesConfig.get_supported_act_dtypesQ   s    u}en==r*   c                     dS )NF   r1   )clss    r)   get_min_capabilityz%BitsAndBytesConfig.get_min_capabilityU   s    rr*   c                      g S r4   r1   r1   r*   r)   get_config_filenamesz'BitsAndBytesConfig.get_config_filenamesY   s    	r*   configc                 f    d fd	} ||dgd          } ||dgd          } ||dgd          } ||d	gd
          } ||dgd          } ||dgd          } ||dgd          }	 ||dgd          }
 ||dgg           } ||dgd          }  |||||||	|
||
  
        S )Nc                 b    	                      | |          }||n|S # t          $ r |cY S w xY wr4   )get_from_keysr&   )r?   keysdefault_valuevaluer;   s       r)   get_safe_valuez6BitsAndBytesConfig.from_config.<locals>.get_safe_value_   sS    %))&$77 % 1uu}D % % %$$$$%s    ..r   F)rD   r   Tr   r   r   r   r   r   r   r   r   r    r!   r   )
r   r   r   r   r   r   r   r   r    r!   r4   r1   )r;   r?   rF   r   r   r   r   r   r   r   r   r    r!   s   `            r)   from_configzBitsAndBytesConfig.from_config]   s   	% 	% 	% 	% 	% 	% &~f~.>eTTT%~f~.>dSSS!/-.i"
 "
 "
 "0-.g"
 "
 "
 -n*+5
 
 
 %3N01%
 %
 %
! ,:>78,
 ,
 ,
( $2>/0$
 $
 $
  !/,-R!
 !
 !
 ,^)*#
 
 
 s%%#9#9 3&?-M%="71
 
 
 	
r*   layerprefix)r   BitsAndBytesMoEMethodc                     t          |t                    r2t          || j                  rt	                      S t          |           S t          |t                    rt          | |j                  S d S r4   )	
isinstancer
   is_layer_skipped_bnbr    r   BitsAndBytesLinearMethodr   rJ   
moe_config)r'   rH   rI   s      r)   get_quant_methodz#BitsAndBytesConfig.get_quant_method   sq     eZ(( 	A#FD,FGG 1.000+D111x(( 	A(u/?@@@tr*   )
FTr   r   r   FFFNr   )__name__
__module____qualname____doc__boolstrlistfloatr%   r.   classmethodr   r2   r5   dtyper8   intr<   staticmethodr>   dictr   rG   nnModuler   rP   __classcell__r(   s   @r)   r   r      s         #!&/&-#(*/16).26$'   !$	
 !$ ! $( +/ #'  $Cy4/ " 
     <
# 
 
 
 
 -    [ >$u{*; > > > [> 3    [ $s)    \ .
c3h .
4H .
 .
 .
 [.
`	X_	.1		:	;d	B	 	 	 	 	 	 	 	r*   r   rI   r    c                     |                      d          t          fd|D                       }t          fdt          t	                              D                       }t          |          }t	          ||z            dk    }|p|S )N.c              3       K   | ]}|v V  	d S r4   r1   ).0module_name
componentss     r)   	<genexpr>z'is_layer_skipped_bnb.<locals>.<genexpr>   s9        &1z!     r*   c              3   X   K   | ]$}d                      d|dz                      V  %dS )rc   N   )join)re   irg   s     r)   rh   z'is_layer_skipped_bnb.<locals>.<genexpr>   s<      WW1*Wq1uW"566WWWWWWr*   r   )splitanysetrangelen)rI   r    substr_checkset_componentsset_llm_int8_skip_modulesprefix_checkrg   s         @r)   rM   rM      s    c""J     5J    L
 WWWWc*oo@V@VWWWWWN #$9 : :0>ABBaGL'<'r*   c                    | j         r=t          j        |           j        t          j        t          j                  j        z  S t          j        |           j        t          j        t          j                  j        z  S r4   )is_floating_pointr5   finfobitsiinfor   rZ   s    r)   calculate_quant_ratior|      s\     H{5!!&%+ek*B*B*GGG{5!!&%+ek*B*B*GGGr*   c                   d   e Zd ZdZdefdZdej        j        de	de
e	         de	de	d	ej        fd
Z	 ddej        j        dej        dej        dz  dej        fdZ	 ddej        j        dej        dej        dz  dej        fdZ	 ddej        j        dej        dej        dz  dej        fdZdS )rN   zjLinear method for BitsAndBytes.

    Args:
       quant_config: The BitsAndBytes quantization config.
    quant_configc                     	 dd l }t          j        |j                  t          j        d          k     rt	          d          n"# t          $ r}t	          d          |d }~ww xY w|| _        d S Nr   z0.46.1zCbitsandbytes version is wrong. Please install bitsandbytes>=0.46.1.ziPlease install bitsandbytes>=0.46.1 via `pip install bitsandbytes>=0.46.1` to use bitsandbytes quantizer.)r0   r   parse__version__ImportErrorr~   )r'   r~   r0   errs       r)   r%   z!BitsAndBytesLinearMethod.__init__   s    	}\566x9P9PPP!4   Q
  	 	 	*  		 )s   AA 
A$AA$rH   input_size_per_partitionoutput_partition_sizes
input_sizeoutput_sizeparams_dtypec                      ddl m  fd}fd}	 j        j        r |            }
n
 |	            }
|                    d|
           t          |
|           d S )Nr   )
Int8Paramsc            	           t          j        t                    t           j                  j        j        d          } t          | dddddd           | S )Nr{   F)datahas_fp16_weightsrequires_gradr   rj   T)	input_dim
output_dimpack_factoruse_bitsandbytes_8bit
generation)r5   emptysumint8r~   r   r   )qweightr   r   r   r'   s    r)   create_qweight_for_8bitzHBitsAndBytesLinearMethod.create_weights.<locals>.create_qweight_for_8bit   s     j[.//,*  
 "&!2!K#  G !""##$-1"# 	 	 	 Nr*   c                  *   t                    } t                    z  }|| z  dk    rt          d          t          j                            t          j        || z  dt          j                  d          }t          |dd| dd           |S )	Nr   z>The input size is not aligned with the quantized weight shape.rj   r{   Fr   T)r   r   r   use_bitsandbytes_4bit)	r|   r   r&   r5   r^   	Parameterr   r   r   )quant_ratio
total_sizer   r   r   r   s      r)   create_qweight_for_4bitzHBitsAndBytesLinearMethod.create_weights.<locals>.create_qweight_for_4bit   s    /==K1C8N4O4OOJK'1,, T   h((J+5qLLL# )  G !""##.-1	    Nr*   weight)bitsandbytes.nnr   r~   r   register_parameterr   )r'   rH   r   r   r   r   r   extra_weight_attrsr   r   r   r   s   ` ``  `    @r)   create_weightsz'BitsAndBytesLinearMethod.create_weights   s     	/.....	 	 	 	 	 	 	 	,	 	 	 	 	 	 	0 ) 	0--//GG--//G 	  7333"455555r*   Nxbiasr"   c                 v    | j         j        r|                     |||          S |                     |||          S r4   )r~   r   _apply_8bit_weight_apply_4bit_weight)r'   rH   r   r   s       r)   applyzBitsAndBytesLinearMethod.apply  sB     ) 	;**5!T:::**5!T:::r*   c           	         ddl m}m} |j        }|j        }d}|j        dk    r+|                    d|                    d                    }d}|                    t          j
                  }	|j        }
|
j        }|
j        }|
j        }|
j        }|j        d         }t!          d |                                D                       }t          j        ||t          j        |j                  }d}t+          t-          |                    D ]}||         j        d         }|dk    s|d	k    r |            ||<   |
||         ||d	z                     ||         _        ||                             |j                  ||         _        | j        j        ||         _        | j        j        ||         _        d||         _        ||         j        d
k    r||         j        sd||         _        |	                     d          } |||
||         ||d	z                     ||                   |d d |||z   f<   ||z  }|dk    rO| j        j        sC||         j        6||         j!        )||         `||         j!        |
||         ||d	z            <   |                    |          }|r* |j"        g |d d         |                    d          R  }|||z  }|
xj        d	z  c_        |S )Nr   )MatmulLtStatematmulF   Tc                 4    g | ]}|d          j         d         S rj   r   shapere   quant_states     r)   
<listcomp>z?BitsAndBytesLinearMethod._apply_8bit_weight.<locals>.<listcomp>2  $    MMM[^!!$MMMr*   rZ   devicerj   g        )state)#r0   r   r   rZ   r   ndimreshapesizetor5   r7   r   bnb_shard_offsetsbnb_quant_statematmul_stater   r   itemsr   r6   r   rp   rq   CBSCBr~   r!   	thresholdr   r   is_traininguse_pool	unsqueezeCxBview)r'   rH   r   r   r   r   original_typeoriginal_shapereshape_after_matmulbf_xr   offsetsquant_statesmatmul_statesr   	out_dim_0	out_dim_1outcurrent_indexrl   r   new_xs                         r)   r   z+BitsAndBytesLinearMethod._apply_8bit_weight  s=    	76666666$6A::		"affRjj))A#' ttEN##,+.,'
GAJ	MM8J8J8L8LMMM
 
	 k)YemAHUUUs<(()) $	L $	LA&q//2K Q*//#0=??a &-gaj71q5>.I&Ja #'3A'9'9!('C'Ca $-1->-Qa * &*%6%O "/4a ,!!$.44)!,= 5 15M!$-NN1%%EBH&wwqzGAEN:;=QRCSC C CC==;#>>>? [(M Q)B !!$'3!!$(4!!$'7DQ7G7K
WQU^34ff]## 	?#(>N3B3/>">>>C4KCa
r*   c                 l   |j         }|j        }d}|j        dk    r+|                    d|                    d                    }d}|                    t          j                  }|j        }|j	        }	|j
        }
|j        d         }t          d |	                                D                       }t          j        ||t          j        |j                  }t          |||
|           |                    |          }|r* |j        g |d d         |                    d          R  }|||z  }|S )NFr   r   Tr   c                 4    g | ]}|d          j         d         S r   r   r   s     r)   r   z?BitsAndBytesLinearMethod._apply_4bit_weight.<locals>.<listcomp>}  r   r*   r   )rZ   r   r   r   r   r   r5   r7   r   r   r   r   r   r   r   apply_bnb_4bitr   )r'   rH   r   r   r   r   r   r   r   r   r   r   r   r   s                 r)   r   z+BitsAndBytesLinearMethod._apply_4bit_weighti  s5    $6A::		"affRjj))A#' ttEN##,.+GAJ	MM8J8J8L8LMMM
 
	 k)YenQXVVVtWgs333ff]## 	?#(>N3B3/>">>>C4KC
r*   r4   )rQ   rR   rS   rT   r   r%   r5   r^   r_   r[   rW   rZ   r   Tensorr   r   r   r1   r*   r)   rN   rN      s        )%7 ) ) ) )$A6xA6 #&A6 !%S		A6
 A6 A6 kA6 A6 A6 A6N %)		; 	;x	; <	; lT!		;
 
	; 	; 	; 	; %)	N NxN <N lT!	N
 
N N N Nh %)	   x  <  lT!	 
 
           r*   rN   r   r   r   r   r"   c           	      "   ddl m} |j        }d}t          t	          |                    D ]b}||         j        d         } || |||         ||dz                                                     ||                   |d d |||z   f<   ||z  }cd S )Nr   )matmul_4bitrj   )r0   r   r   rp   rq   r   t)	r   r   r   r   r   r   r   rl   r   s	            r)   _apply_bnb_4bitr     s     )((((()LM3|$$%% 	% 	%"1o+A.
 ?Jkvgaj71q5>124466Q?
 ?
AAA}}{:::; 	$	% 	%r*   c                     d S r4   r1   )r   r   r   r   s       r)   _apply_bnb_4bit_faker     s	     Fr*   r   )op_nameop_funcmutates_args	fake_impldispatch_keyc                       e Zd ZdZdedef fdZdej        j	        de
de
de
d	ej        f
d
Zdej        j	        dedz  fdZdedej        dej        dej        dej        eej        ej        f         z  f
dZdej        j	        de
de
de
d	ej        f
dZdej        j	        de
de
de
d	ej        f
dZdej        j	        deej        ej        f         fdZdej        j	        deej        ej        f         fdZ xZS )rJ   zgMoE method for BitsAndBytes.

    Args:
       quant_config: The BitsAndBytes quantization config.
    r~   moec                 $   t                                          |           	 dd l}t          j        |j                  t          j        d          k     rt          d          n"# t          $ r}t          d          |d }~ww xY w|| _        d S r   )r$   r%   r0   r   r   r   r   r~   )r'   r~   r   r0   r   r(   s        r)   r%   zBitsAndBytesMoEMethod.__init__  s    
 		}\566x9P9PPP!4   Q
  	 	 	*  		 )s   AA' '
B1BBrH   num_expertshidden_sizeintermediate_size_per_partitionr   c                 V    | j         j        r| j        }n| j        } ||||||fi | d S r4   )r~   r   _create_weights_8bit_create_weights_4bit)r'   rH   r   r   r   r   r   call_funs           r)   r   z$BitsAndBytesMoEMethod.create_weights  s`     ) 	10HH0H+	
 	
 !	
 	
 	
 	
 	
r*   r"   Nc                     d S r4   r1   r'   rH   s     r)   get_fused_moe_quant_configz0BitsAndBytesMoEMethod.get_fused_moe_quant_config  s	     tr*   r   topk_weightstopk_idsc                     ddl m} | j        j        r|                     |          \  }}n|                     |          \  }} ||||||d|j        |j        |j        |j	        | j
                  S )Nr   )fused_expertsT)hidden_statesw1w2r   r   inplace
activationapply_router_weight_on_inputglobal_num_experts
expert_mapr~   )$vllm.model_executor.layers.fused_moer   r~   r   _apply_8bit_dequant_apply_4bit_dequntr   r   r   r   moe_quant_config)r'   rH   r   r   r   r   w13r   s           r)   r   zBitsAndBytesMoEMethod.apply  s     	GFFFFF ) 	5..u55GC--e44GC}%').)K$7'.
 
 
 	
r*   c           
      Z   t          |          }|dz  |z  |z  }t          j                            t          j        ||dt          j                  d          }	|                    d|	           t          |	|           t          |	||d|z  ||dz  |f|dd           ||z  |z  }
t          j                            t          j        ||
dt          j                  d          }t          |||||||f|dd           |                    d	|           t          ||           d S )
Nr   rj   r{   Fr   
w13_weightT)r   r   r   experts_shaper   r   	w2_weight)r|   r5   r^   r   r   r   r   r   )r'   rH   r   r   r   r   r   r   w13_total_sizew13_qweightw2_total_size
w2_qweights               r)   r   z*BitsAndBytesMoEMethod._create_weights_4bit  s    ,L99 !O== h((Kk	     ) 
 
 	  {;;;&8999*("AA3a7"
  +)- 	
 	
 	
  %'FF;VX''Kk	     ( 
 

 	*<)3"
  +)- 	
 	
 	
 	  j999%788888r*   c                     t           r4   NotImplementedError)r'   rH   r   r   r   r   r   s          r)   r   z*BitsAndBytesMoEMethod._create_weights_8bitS  s
     "!r*   c                 N   ddl m}  ||j                            dd          |j        j                  } ||j                            dd          |j        j                  }|                    |j        j                  }|                    |j        j                  }||fS )Nr   )dequantize_4bitr   rj   )bitsandbytes.functionalr  r  r   r   r  r  )r'   rH   r  r  r   s        r)   r  z(BitsAndBytesMoEMethod._apply_4bit_dequnt^  s     	<;;;;;o$$R++,
 
 _O##B**O+
 
 kk%*899ZZ566Bwr*   c                     t           r4   r  r   s     r)   r  z)BitsAndBytesMoEMethod._apply_8bit_dequanto  s
     "!r*   )rQ   rR   rS   rT   r   r   r%   r5   r^   r_   r[   rZ   r   r   r   r   r   tupler   r   r   r  r  r`   ra   s   @r)   rJ   rJ     s9        )() ) ) ) ) ) ),
x
 
 	

 *-
 k
 
 
 
,X_	t	#   


 <
 l	

 ,
 
elEL89	9
 
 
 
8C9xC9 C9 	C9
 *-C9 kC9 C9 C9 C9J	"x	" 	" 		"
 *-	" k	" 	" 	" 	"X_	u|U\)	*   ""X_"	u|U\)	*" " " " " " " "r*   rJ   )(typingr   r   r5   	packagingr   +vllm.model_executor.layers.fused_moe.configr   r   *vllm.model_executor.layers.fused_moe.layerr   r	   !vllm.model_executor.layers.linearr
   r   r   r   'vllm.model_executor.layers.quantizationr   r   vllm.platformsr   vllm.utils.torch_utilsr   r   rV   rW   rM   r|   rN   r   r   r   r   opsvllmr   AttributeErrorerrorrJ   r1   r*   r)   <module>r!     s                                                  , + + + + + < < < < < <x x x x x+ x x xv( (T#Y ( ( ( ("H H HW W W W W/ W W Wt%|%L% \% 
	%
 
% % % %.|L \ 
	
 
    W&%2    Y^2NN   
Kx" x" x" x" x". x" x" x" x" x"s   5'C C)"C$$C)