
    -`i;                        U d dl mZmZ d dlmZ d dlZd dlmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZmZmZmZmZmZmZmZmZ d dlmZ d dl m!Z! ej"        j#        j$        j%        Z&ej"        j#        j'        j%        Z(ej"        j#        j)        j%        Z*ej"        j+        j,        j%        Z-eej"        j#        j.        j%        eej"        j#        j/        j%        eej"        j#        j0        j%        iZ1e2ee	f         e3d<    e!j4                    r/ e5ej"        j#        d          rej"        j#        j6        j%        e1e<    e!j4                    r2ej"        j#        j7        j%        e1e<   ej"        j#        j7        j%        e1e<   ej"        j#        j8        j%        Z9 G d de          Z: G d de:          Z; G d de:          Z< G d de:          Z= G d de:          Z> G d de:          Z?dS )    )ABCabstractmethod)AnyN)auto_functionalized)
OpOverload)rocm_aiter_ops)get_current_vllm_config)
SiluAndMul)RMSNorm)QuantFP8)	
GroupShapeQuantKey_normalize_quant_group_shapekFp8Dynamic64SymkFp8Dynamic128SymkFp8DynamicTensorSymkFp8DynamicTokenSymkFp8StaticTensorSymkNvfp4Dynamic)RotaryEmbedding)current_platform	QUANT_OPSscaled_fp4_quantc                       e Zd ZdeddfdZedededefd            Zedededefd            Zdededefd	Z	dedede
j        fd
Zdedede
j        fdZdedede
j        fdZdee
j                 fdZdS )MatcherCustomOpenabledreturnNc                     t                      }|j        r|j        j        nd | _        |j        r|j        j        nd | _        || _        |r| j        n| j        | _	        d S N)
r	   model_configdtypemodel_dtypedevice_configdevicer   forward_customforward_nativeforward)selfr   configs      r/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/compilation/matcher_utils.py__init__zMatcherCustomOp.__init__3   se    (**8>8KU6.44QU5;5ISf*11t.5Nt**4;N    argskwargsc                     d S r    r(   r-   r.   s      r*   r%   zMatcherCustomOp.forward_custom;       r,   c                     d S r   r0   r1   s      r*   r&   zMatcherCustomOp.forward_native?   r2   r,   c                      | j         |i |S r   )r'   r1   s      r*   __call__zMatcherCustomOp.__call__C   s    t|T,V,,,r,   c                 >    t          j        || j        | j        d|S Nr!   r$   )torchemptyr"   r$   r1   s      r*   r:   zMatcherCustomOp.emptyF   s$    {D(8WWPVWWWr,   c                 H    t          j        |t           j        | j        d|S r7   )r9   r:   int64r$   r1   s      r*   empty_int64zMatcherCustomOp.empty_int64I   s"    {DDKRR6RRRr,   c                 H    t          j        |t           j        | j        d|S r7   )r9   r:   float32r$   r1   s      r*   	empty_f32zMatcherCustomOp.empty_f32L   s"    {DdkTTVTTTr,   c                     t           )z!Utility for inputs to the pattern)NotImplementedError)r(   s    r*   inputszMatcherCustomOp.inputsO   s    !!r,   )__name__
__module____qualname__boolr+   r   r   r%   r&   r5   r9   Tensorr:   r=   r@   listrC   r0   r,   r*   r   r   2   s       O O O O O O C 3 3    ^ C 3 3    ^-c -S -S - - - -X3 X# X%, X X X XS S S S S S SUs Uc Uel U U U U"U\* " " " " " "r,   r   c                   R    e Zd Z	 	 ddededededededz  d	df fd
Zd	eej                 fdZ	dej        dej        dej        dz  dej        d	e
ej        ej        dz  f         f
dZdej        dej        dej        dz  dej        d	e
ej        ej        dz  f         f
dZ xZS )MatcherRotaryEmbeddingFNis_neox	head_size	num_headsnum_kv_headsuse_flashinferr   r   c                 B   |t          j                    }t                                          |           || _        || _        || _        || _        | j        | j        z  | _        | j        | j        z  | _	        || _
        |rt          | _        d S t          | _        d S r   )r   r   superr+   rL   rM   rN   rO   q_sizekv_size
rotary_dimFLASHINFER_ROTARY_OP	rotary_op	ROTARY_OP)r(   rL   rM   rN   rO   rP   r   	__class__s          r*   r+   zMatcherRotaryEmbedding.__init__U   s     ?%-//G!!!""(nt~5(4>9# 	'1DNNN&DNNNr,   c                     |                      d          }|                     d| j                  }|                     d| j                  }|                     d| j                  }||||gS )N   i   )r=   r:   rS   rT   rU   )r(   	positionsquerykeycos_sin_caches        r*   rC   zMatcherRotaryEmbedding.inputsn   sc    $$Q''	

1dk**jjDL))

4995#}55r,   r\   r]   r^   r_   c           	          t          | j        |||| j        || j                  }|d         }t	          |          dk    r|d         nd }||fS )N)r\   r]   r^   rM   r_   rL         )r   rW   rM   rL   len)r(   r\   r]   r^   r_   result	query_outkey_outs           r*   r%   z%MatcherRotaryEmbedding.forward_customu   sd     %Nn'L
 
 
 1I	"6{{Q&))D'!!r,   c           	      X    t          j        |||| j        | j        || j                  }|S r   )r   forward_staticrM   rU   rL   )r(   r\   r]   r^   r_   rd   s         r*   r&   z%MatcherRotaryEmbedding.forward_native   s;     *  	 r,   )FN)rD   rE   rF   rG   intr+   rI   r9   rH   rC   tupler%   r&   __classcell__rY   s   @r*   rK   rK   T   s|         %#' '' ' 	'
 ' ' ' 
' ' ' ' ' '26U\* 6 6 6 6"<" |" \D 	"
 |" 
u|U\D00	1" " " "(< | \D 	
 | 
u|U\D00	1       r,   rK   c            	            e Zd Z	 	 ddededz  deddf fdZdeej                 fdZ	d	ej        d
ej        dej        fdZ
d	ej        d
ej        dej        fdZd	ej        d
ej        dej        fdZ xZS )MatcherRMSNormNFepsilonr   match_rocm_aiterr   c                     |t          j                    }t                                          |           || _        t
          | _        || _        |rt          j	                    | _        d S d S r   )
r   r   rR   r+   ro   RMS_OP_rmsnorm_oprp   r   get_rmsnorm_opr(   ro   r   rp   rY   s       r*   r+   zMatcherRMSNorm.__init__   so     ?o''G!!!! 0 	?-<>>D	? 	?r,   c                     | j         r|                     dd          n|                     dd          }|                     d          }||gS Nr[      r   r:   r@   r(   inputweights      r*   rC   zMatcherRMSNorm.inputs   sH    %)\L

1b!!!t~~a7L7LBvr,   r{   r|   c                 <    |                      ||| j                  S )N)xr|   variance_epsilonrs   ro   rz   s      r*   forward_rocm_aiterz!MatcherRMSNorm.forward_rocm_aiter   s,    
 !\   
 
 	
r,   c                     | j         r|                     ||          S t          j        |          }t	          | j        |||| j                  \  }}|S )N)rd   r{   r|   ro   )rp   r   r9   
empty_liker   rs   ro   )r(   r{   r|   rd   _s        r*   r%   zMatcherRMSNorm.forward_custom   sh    
   	:**5&999!%(('L
 
 
	6 r,   c                 l    t          j        || j        |                    d          | j        |          S Nr   rh   ro   sizer"   rz   s      r*   r&   zMatcherRMSNorm.forward_native   s3    
 %4<B1A6
 
 	
r,   NF)rD   rE   rF   floatrG   r+   rI   r9   rH   rC   r   r%   r&   rk   rl   s   @r*   rn   rn      s8         $!&	? ?? ? 	?
 
? ? ? ? ? ?"U\*    
	
|	
 	
 
		
 	
 	
 	
|  
	   &
|
 
 
	
 
 
 
 
 
 
 
r,   rn   c            
       l    e Zd Z	 	 ddededz  deddf fdZdeej                 fdZ	d	ej        d
ej        dej        de
ej        ej        f         fdZd	ej        d
ej        dej        de
ej        ej        f         fdZd	ej        d
ej        dej        de
ej        ej        f         fdZ xZS )MatcherFusedAddRMSNormNFro   r   rp   r   c                     |t          j                    }t                                          |           || _        || _        t          | _        |rt          j	                    | _        d S d S r   )
r   r   rR   r+   ro   rp   
RMS_ADD_OPrs   r   get_rmsnorm_fused_add_opru   s       r*   r+   zMatcherFusedAddRMSNorm.__init__   sr     ?o''G!!! 0% 	I-FHHD	I 	Ir,   c                     | j         r|                     dd          n|                     dd          }|                     d          }|                     dd          }|||gS rw   ry   r(   r{   r|   residuals       r*   rC   zMatcherFusedAddRMSNorm.inputs   s^    %)\L

1b!!!t~~a7L7LB::a$$vx((r,   r{   r|   r   c                 >    |                      |||| j                  S )N)r~   r   r|   r   r   r   s       r*   r   z)MatcherFusedAddRMSNorm.forward_rocm_aiter   s,     hv   
 
 	
r,   c                     | j         r|                     |||          S t          | j        |||| j                  \  }}}||fS )N)r{   r   r|   ro   )rp   r   r   rs   ro   )r(   r{   r|   r   r   rd   s         r*   r%   z%MatcherFusedAddRMSNorm.forward_custom   sd       	D**5&(CCC1L
 
 
68 xr,   c                 r    t          j        || j        |                    d          | j        ||          }|S r   r   )r(   r{   r|   r   rd   s        r*   r&   z%MatcherFusedAddRMSNorm.forward_native  s9     5<4J4<B1A685
 5
 r,   r   )rD   rE   rF   r   rG   r+   rI   r9   rH   rC   rj   r   r%   r&   rk   rl   s   @r*   r   r      s         $!&	I II I 	I
 
I I I I I I$)U\* ) ) ) )
|
 
 ,	

 
u|U\)	*
 
 
 
 |    ,	 
 
u|U\)	*       &	|	 	 ,		
 
u|U\)	*	 	 	 	 	 	 	 	r,   r   c                       e Zd Z	 	 	 	 ddededz  dedededdf fd	Z	 dd
ej        dej        dz  deej        ej        f         fdZ		 dd
ej        dej        dz  deej        ej        f         fdZ
	 dd
ej        dej        dz  deej        ej        f         fdZdd
ej        dedej        fdZdeej                 fdZ xZS )MatcherQuantFP8NF	quant_keyr   has_col_major_scalesis_e8m0rp   r   c                 N   |t          j                    }t                                          |           || _        || _        || _        || _        |r|j        j	        
                                r
J d            |j        j	                                        rt          j                    | _        n|j        j	        j        dk    s
J d            t!          j                    rt          j                    | _        nxt&          j        j        j        j        | _        nW|t0          v sJ d|             t0          |         | _        |j        t!          j                    k    s
J d            |j        J t          |j        j        |j        j	        ||d          | _        d S )Nz?ROCm aiter fusion pass does not support per tensor quantization   zTROCm aiter fusion pass currently supports quantization operation with group_size 128z unsupported quantization scheme zOnly QuantFP8 supported byF)column_major_scales	use_ue8m0compile_native)r   r   rR   r+   r   r   r   rp   scalegroup_shapeis_per_tensoris_per_tokenr   get_per_token_quant_opQUANT_OPcolr   is_fp8_fnuzget_group_quant_opr9   opsvllm triton_per_token_group_quant_fp8defaultr   r!   	fp8_dtypescale2static	quant_fp8)r(   r   r   r   r   rp   rY   s         r*   r+   zMatcherQuantFP8.__init__   s    ?&((G!!!"$8! 0 	, 2@@BB  Q B *7799  . E G G 26#===A >== $/11 $2$E$G$GDMM 	GO MM
 	)))>9>> *)) &i0DM?&6&@&B&BBBB, CBB #+++!O"O' 4 
 
 
r,   r{   r   c                     | j         j        j        }|t          j        k    r"|                     || j         j        |          S |                     ||j                  S )N)r~   quant_dtyper   )r   r   r   r   	PER_TOKENr   r!   r   )r(   r{   r   quant_key_group_shapes       r*   r   z"MatcherQuantFP8.forward_rocm_aiterV  sc    
 !% 4 @ J$888== N0 !    ==(=(ABBBr,   c                    | j         r|                     ||          S t          j        |j        |j        | j        j                  }| j        j        j	        
                                r|J |                     || j                  }t          j        | j        j                  }|j        }|j        }t!          | j        |||| j        j        j	        d         d||| j        	  	        \  }}}||fS | j        j        j        r#|J t!          | j        |||          \  }}||fS |J |                     |          }t!          | j        |||d           \  }}}||fS )Nr$   r!   )
transposedra   g|=)r{   output_qoutput_s
group_sizeepsfp8_minfp8_maxscale_ue8m0)rd   r{   r   )rd   r{   r   scale_ub)rp   r   r9   r:   shaper$   r   r!   r   r   is_per_group
make_scaler   finfominmaxr   r   r   r   )r(   r{   r   rd   r   r   r   r   s           r*   r%   zMatcherQuantFP8.forward_custome  s   
   	9**5%888KDN4H
 
 
 >+88:: 	!===OOEd6OOPPEK 455EiGiG2>/;A> L
  
  
 Avu 5= >& 	!$$$+fE  IAv 5= ===OOE**E2fEQU     Avu 5= r,   c                 .    |                      ||          S r   )r   )r(   r{   r   s      r*   r&   zMatcherQuantFP8.forward_native  s    
 ~~eU+++r,   r   c                    t          || j        j        j                  }|j        d         |d         z  |j        d         |d         z  f}|rVt          t          |                    }t          j        ||j	        t          j
                                      dd          S t          j        ||j	        t          j
                  S )Nr   ra   r   r   )r   r   r   r   r   rj   reversedr9   r:   r$   r?   permute)r(   r{   r   normalized_group_shapescale_shapes        r*   r   zMatcherQuantFP8.make_scale  s    !=4>'3"
 "
 KN4Q77KN4Q77
  	 5 566K;EL  gb"oo {;u|5=QQQQr,   c                     |                      dd          }| j        j        j        r||                     dd          gS |gS )Nr[   rx   ra   )r:   r   r   r   r@   r(   r{   s     r*   rC   zMatcherQuantFP8.inputs  sE    

1b!!>& 	14>>!Q//00wr,   )NFFFr   )F)rD   rE   rF   r   rG   r+   r9   rH   rj   r   r%   r&   r   rI   rC   rk   rl   s   @r*   r   r     s         $%*!&4
 4
4
 4
 #	4

 4
 4
 
4
 4
 4
 4
 4
 4
r &*C C|C |d"C 
u|U\)	*	C C C C$ &*-! -!|-! |d"-! 
u|U\)	*	-! -! -! -!d &*, ,|, |d", 
u|U\)	*	, , , ,R R R$ R5< R R R R U\*        r,   r   c                        e Zd Zd	dedz  ddf fdZdeej                 fdZdej        dej        fdZ	dej        dej        fdZ
 xZS )
MatcherSiluAndMulNr   r   c                 t    |t          j                    }t                                          |           d S r   )r
   r   rR   r+   )r(   r   rY   s     r*   r+   zMatcherSiluAndMul.__init__  s4    ? (**G!!!!!r,   c                 4    |                      dd          }|gS )Nr[      )r:   r   s     r*   rC   zMatcherSiluAndMul.inputs  s    

1a  wr,   r~   c                     |j         d         dz  }|j         d d         |fz   }t          j        ||j        |j                  }t          t          ||          }|d         S )Nr   rb   r8   )rd   r{   ra   )r   r9   r:   r!   r$   r   SILU_MUL_OP)r(   r~   doutput_shapeoutrd   s         r*   r%   z MatcherSiluAndMul.forward_custom  sb     GBK1wss|qd*k,agahGGG$[AFFFayr,   c                 *    t          j        |          S r   )r
   r&   )r(   r~   s     r*   r&   z MatcherSiluAndMul.forward_native  s     (+++r,   r   )rD   rE   rF   rG   r+   rI   r9   rH   rC   r%   r&   rk   rl   s   @r*   r   r     s        " "t "t " " " " " "
U\*    < 
   ,<, 
, , , , , , , ,r,   r   )@abcr   r   typingr   r9   torch._higher_order_opsr   
torch._opsr   vllm._aiter_opsr   vllm.configr	   %vllm.model_executor.layers.activationr
   $vllm.model_executor.layers.layernormr   7vllm.model_executor.layers.quantization.input_quant_fp8r   9vllm.model_executor.layers.quantization.utils.quant_utilsr   r   r   r   r   r   r   r   r   +vllm.model_executor.layers.rotary_embeddingr   vllm.platformsr   r   _Crms_normr   rr   fused_add_rms_normr   rotary_embeddingrX   r   flashinfer_rotary_embeddingrV   static_scaled_fp8_quantdynamic_scaled_fp8_quant"dynamic_per_token_scaled_fp8_quantr   dict__annotations__is_cudahasattrr   per_token_group_fp8_quantsilu_and_mulr   r   rK   rn   r   r   r   r0   r,   r*   <module>r      s   $ # # # # # # # #        7 7 7 7 7 7 ! ! ! ! ! ! * * * * * * / / / / / / < < < < < < 8 8 8 8 8 8 L L L L L L
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 H G G G G G + + + + + +			&Y\,4
IL)1	y~AI  =E%),?GHP)	4*$%     E''%),8J"K"K E$y|<DIm Q#(9<#I#QI "'),"H"PIil'/" " " " "c " " "DG G G G G_ G G GT<
 <
 <
 <
 <
_ <
 <
 <
~? ? ? ? ?_ ? ? ?DQ Q Q Q Qo Q Q Qh, , , , , , , , , ,r,   