
    -`iN                        U d dl mZmZ d dlZd dlmc mZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZmZ d dlmZ d d	lmZmZmZmZmZmZmZmZmZmZ d d
lmZ ddl m!Z! ddl"m#Z#m$Z$m%Z% ddl&m'Z'm(Z(  ee)          Z* ej+                    Z,ej-        Z.dededej/        fdZ0dededej/        fdZ1dededej/        fdZ2dededej/        fdZ3ej4        j5        j6        j7        Z8ej4        j5        j9        j7        Z:eej4        j5        j;        j7        eej4        j5        j<        j7        eej4        j5        j=        j7        iZ>e?eef         e@d<    ejA                    r/ eBej4        j5        d          rej4        j5        jC        j7        e>e<    ejA                    r2ej4        j5        jD        j7        e>e<   ej4        j5        jD        j7        e>e<    G d de          ZE eEed          ej4        j5        jF        j7         eEed          ej4        j5        jG        j7         eEed          ej4        j5        jH        j7         eEed          ej4        j5        jH        j7         eEed          ej4        j5        jI        j7         eEed          ej4        j5        jI        j7         eEed          ej4        j5        jI        j7         eEed          ej4        j5        jI        j7        iZJe?eEef         e@d<    G d d          ZK G d d eK          ZL G d! d"eK          ZM G d# d$eK          ZN G d% d&eK          ZO G d' d(eK          ZP G d) d*eK          ZQ G d+ d,e(          ZRdS )-    )Any
NamedTupleN)fx)auto_functionalized)PatternMatcherPass)
OpOverload)
VllmConfigget_current_vllm_config)init_logger)

GroupShapeQuantKey	ScaleDesckFp8Dynamic64SymkFp8Dynamic128SymkFp8DynamicTensorSymkFp8DynamicTokenSymkFp8StaticTensorSymkNvfp4DynamickStaticTensorScale)current_platform   )enable_fake_mode)MatcherFusedAddRMSNormMatcherQuantFP8MatcherRMSNorm)VllmInductorPassVllmPatternMatcherPassargskwargsreturnc                  B    t          j        | i |t           j        ddS Ncuda)dtypedevice)torchemptybfloat16r   r   s     k/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/compilation/fusion.py
empty_bf16r+   )   s&    ;LLenVLLLLL    c                  B    t          j        | i |t           j        ddS r"   )r&   r'   float32r)   s     r*   
empty_fp32r/   -   s&    ;KKemFKKKKKr,   c                  B    t          j        | i |t           j        ddS r"   )r&   r'   int32r)   s     r*   	empty_i32r2   1   &    ;IIek&IIIIIr,   c                  B    t          j        | i |t           j        ddS r"   )r&   r'   int64r)   s     r*   	empty_i64r6   5   r3   r,   	QUANT_OPSscaled_fp4_quantc                   4    e Zd ZU dZeed<   eed<   defdZdS )FusedRMSQuantKeyz
    Named tuple for identifying the type of RMSNorm + quant fusion.
    quant: type of quantization
    fused_add: does the op also perform the residual add
    quant	fused_addr    c                 0    d| j          d| j        rdnd dS )NzFusedQuantKey(z, with outz
 residual))r;   r<   selfs    r*   __str__zFusedRMSQuantKey.__str__R   s7    ;TZ ; ;^.rr; ; ;	
r,   N)	__name__
__module____qualname____doc__r   __annotations__boolstrrB    r,   r*   r:   r:   H   sO           OOOOOO
 
 
 
 
 
 
r,   r:   FT	FUSED_OPSc                   0    e Zd Z	 	 d	dededededdf
dZdS )
RMSNormQuantPatternFepsilonkeyhas_col_major_scalesis_e8m0r    Nc                 b   || _         |j        j        | _        t	                      }|j        r|j        j        nd | _        |t          v sJ d|             t          |         | _        |j	        st          |          nt          |          | _        t          |j        ||          | _        d S )Nz'unsupported fused rmsnorm+quant op for rP   rQ   )rN   r;   r$   quant_dtyper
   model_configmodel_dtyperK   FUSED_OPr<   r   r   rmsnorm_matcherr   quant_matcher)rA   rN   rO   rP   rQ   configs         r*   __init__zRMSNormQuantPattern.__init__v   s     9?(**8>8KU6.44QUi!P3!P!P!# =1N7###'00 	
 -I,@'
 
 
r,   )FF)rC   rD   rE   floatr:   rH   r[   rJ   r,   r*   rM   rM   u   se        
 &+
 

 
 #	

 
 

 
 
 
 
 
r,   rM   c            	       N     e Zd Z	 d
dedej        deddf fdZdeddfd	Z	 xZ
S )RMSNormStaticQuantPatternTrN   rT   	symmetricr    Nc                     t          dt          |t          |                    }t                                          ||           d S NFr$   scaler_   r<   r;   r:   r   r   superr[   )rA   rN   rT   r_   	fused_key	__class__s        r*   r[   z"RMSNormStaticQuantPattern.__init__   sX     %!);y  
 
 
	 	),,,,,r,   pm_passc                     dt           j        dt           j        dt           j        dt           j        f fd}dt           j        dt           j        dt           j        dt           j        f fd}g  j                                         j                                        d         } ||  t          j        |||t
          j        |           d S )Ninputweightrc   r    c                 h                         | |          }                    ||          d         S )Nr   rX   rY   )rk   rl   rc   
result_rmsrA   s       r*   patternz3RMSNormStaticQuantPattern.register.<locals>.pattern   s5     --eV<<J%%j%88;;r,   c                     |                      j                  } t          j        | j        | j        j                  }t          j        || ||j	                  }|d         S )Nr$   )r%   r$   )resultrk   rl   rc   rN   r   )
torV   r&   r'   shaper%   rT   r   rW   rN   )rk   rl   rc   rs   atrA   s        r*   replacementz7RMSNormStaticQuantPattern.register.<locals>.replacement   sv    
 HH4#3H44E[EL8H  F %  B a5Lr,   r   )r&   TensorrX   inputsrY   pmregister_replacementfwd_onlyrA   ri   rp   rw   ry   s   `    r*   registerz"RMSNormStaticQuantPattern.register   s    	<<	<).	<>Cl	<\	< 	< 	< 	< 	< 	<	<	).	>Cl	\	 	 	 	 	 	,
!((**
 %%''*

 	
fbk7SSSSSr,   TrC   rD   rE   r\   r&   r$   rH   r[   r   r~   __classcell__rh   s   @r*   r^   r^      s        JN	- 	-	-+0;	-CG	-		- 	- 	- 	- 	- 	-%T 2 %Tt %T %T %T %T %T %T %T %Tr,   r^   c            	       N     e Zd Z	 d
dedej        deddf fdZdeddfd	Z	 xZ
S )!FusedAddRMSNormStaticQuantPatternTrN   rT   r_   r    Nc                     t          dt          |t          |                    }t                                          ||           d S )NTrb   rd   re   )rA   rN   rT   r_   rO   rh   s        r*   r[   z*FusedAddRMSNormStaticQuantPattern.__init__   sX     !);y  
 
 
 	#&&&&&r,   ri   c                 $    dt           j        dt           j        dt           j        dt           j        dt          t           j        t           j        f         f
 fd}dt           j        dt           j        dt           j        dt           j        dt          t           j        t           j        f         f
 fd}g  j                                         j                                        d         }t          j        |||t          j        |           d S )	Nrk   rl   residualrc   r    c                 r                         | ||          \  }}                    ||          \  }}||fS Nrn   )rk   rl   r   rc   ro   rs   _rA   s          r*   rp   z;FusedAddRMSNormStaticQuantPattern.register.<locals>.pattern   sE     $(#7#7vx#P#P J**:u==IFA8##r,   c           	          |                      j                  } t          j        | j                  }t          j        || |||j                  }|d         |d         fS )Nrr   )rs   rk   r   rl   rc   rN   r      )rt   rV   r&   
empty_likerT   r   rW   rN   )rk   rl   r   rc   rs   rv   rA   s         r*   rw   z?FusedAddRMSNormStaticQuantPattern.register.<locals>.replacement   st     HH4#3H44E%e43CDDDF$!  B a5"Q%<r,   r   )	r&   rx   tuplerX   ry   rY   rz   r{   r|   r}   s   `    r*   r~   z*FusedAddRMSNormStaticQuantPattern.register   s5   		$<		$L		$ l		$ <			$
 5<-.		$ 		$ 		$ 		$ 		$ 		$	 <	 L	  l	  <		 
 5<-.	  	  	  	  	  	 0
!((**
 %%''*
 	K	
 	
 	
 	
 	
r,   r   r   r   s   @r*   r   r      s        JN	' 	'	'+0;	'CG	'		' 	' 	' 	' 	' 	'0
 2 0
t 0
 0
 0
 0
 0
 0
 0
 0
r,   r   c                   ^     e Zd Z	 	 	 ddedej        dedededed	d
f fdZde	d	d
fdZ
 xZS ) FusedAddRMSNormGroupQuantPatternTFrN   rT   group_shaper_   rP   rQ   r    Nc                     t          t          j        d|          }t          dt	          |||                    }|| _        || _        || _        t                      	                    ||||           d S )NFTrb   rd   rS   )
r   r&   r.   r:   r   r   rP   rQ   rf   r[   
rA   rN   rT   r   r_   rP   rQ   rc   rO   rh   s
            r*   r[   z)FusedAddRMSNormGroupQuantPattern.__init__  s     %-<<EYOOO
 
 
 '$8!S/CW 	 	
 	
 	
 	
 	
r,   ri   c                     dt           j        dt           j        dt           j        dt          t           j        t           j        t           j        f         f fd}dt           j        dt           j        dt           j        dt          t           j        t           j        t           j        f         f fd}t          j        || j                                        t          j        |           d S )Nrk   rl   r   r    c                 r                         | ||          \  }}                    |          \  }}|||fS r   rn   rk   rl   r   ro   rs   rc   rA   s         r*   rp   z:FusedAddRMSNormGroupQuantPattern.register.<locals>.pattern  sE     $(#7#7vx#P#P J ..z::MFE8U**r,   c                 D   |                      j                  } t          j        | j                  }j                            | j                  }t          j	        || ||j
        d |j        d         j        
  
        }|d         |d         |d         fS )Nrr   r   	rs   rk   rl   rc   rN   scale_ubr   
group_sizeis_scale_transposed   r   rt   rV   r&   r   rT   rY   
make_scalerP   r   rW   rN   r   rk   rl   r   rs   rc   rv   rA   s         r*   rw   z>FusedAddRMSNormGroupQuantPattern.register.<locals>.replacement   s    
 HH4#3H44E%e43CDDDF&11%9RSSE$!+A.$($=  B a5"Q%A&&r,   r&   rx   r   rz   r{   rX   ry   r|   rA   ri   rp   rw   s   `   r*   r~   z)FusedAddRMSNormGroupQuantPattern.register  s    	+<	+).	+AF	+5<u|;<	+ 	+ 	+ 	+ 	+ 	+	'<	').	'AF	'5<u|;<	' 	' 	' 	' 	' 	'2 	 ''))K	
 	
 	
 	
 	
r,   TFFrC   rD   rE   r\   r&   r$   r   rH   r[   r   r~   r   r   s   @r*   r   r     s         %*
 

 [
  	

 
 #
 
 

 
 
 
 
 
*'
 2 '
t '
 '
 '
 '
 '
 '
 '
 '
r,   r   c                   ^     e Zd Z	 	 	 ddedej        dedededed	d
f fdZde	d	d
fdZ
 xZS )RMSNormGroupQuantPatternTFrN   rT   r   r_   rP   rQ   r    Nc                     t          t          j        d|          }t          dt	          |||                    }|| _        t                                          ||||           d S )NFrb   rd   rS   )r   r&   r.   r:   r   r   rf   r[   r   s
            r*   r[   z!RMSNormGroupQuantPattern.__init__C  s     %-<<EYOOO
 
 
 'S/CW 	 	
 	
 	
 	
 	
r,   ri   c                 ~    dt           j        dt           j        dt          t           j        t           j        f         f fd}dt           j        dt           j        dt          t           j        t           j        f         f fd}t          j        || j                                        t          j        |           d S )Nrk   rl   r    c                 h                         | |          }                    |          \  }}||fS r   rn   )rk   rl   ro   rs   rc   rA   s        r*   rp   z2RMSNormGroupQuantPattern.register.<locals>.patternW  s;     --eV<<J ..z::MFE5= r,   c                 L   |                      j                  } t          j        | j                  }j                            | j        j                  }t          j	        || ||j
        d d j        d         j        j        
  
        }|d         |d         fS )Nrr   )
transposedr   r   r   r   rk   rl   rs   rc   rv   rA   s        r*   rw   z6RMSNormGroupQuantPattern.register.<locals>.replacement^  s    
 HH4#3H44E%e43CDDDF&11$"4"I 2  E %+A.$($6$K  B a5"Q%<r,   r   r   s   `   r*   r~   z!RMSNormGroupQuantPattern.registerV  s    	!<	!).	!5<-.	! 	! 	! 	! 	! 	!	 <	 ).	 5<-.	  	  	  	  	  	 6 	 ''))K	
 	
 	
 	
 	
r,   r   r   r   s   @r*   r   r   B  s         %*
 

 [
  	

 
 #
 
 

 
 
 
 
 
&)
 2 )
t )
 )
 )
 )
 )
 )
 )
 )
r,   r   c                   ^     e Zd Zej        dfdedej        dededdf
 fdZ	d	e
ddfd
Z xZS )RMSNormDynamicQuantPatternTrN   rT   r   r_   r    Nc                     t          t          j        d|          }t          dt	          |||                    }t                                          ||           d S ra   r   r&   r.   r:   r   rf   r[   rA   rN   rT   r   r_   rc   rO   rh   s          r*   r[   z#RMSNormDynamicQuantPattern.__init__  sc     %-<<EYOOO
 
 
 	#&&&&&r,   ri   c                 ~    dt           j        dt           j        dt          t           j        t           j        f         f fd}dt           j        dt           j        dt          t           j        t           j        f         f fd}t          j        || j                                        t          j        |           d S )Nrk   rl   r    c                 Z                         | |          }                    |          S r   rn   )rk   rl   ro   rA   s      r*   rp   z4RMSNormDynamicQuantPattern.register.<locals>.pattern  s.     --eV<<J%%j111r,   c           
         |                      j                  } t          j        | j                  }j                            |           }t          j        || ||j	        d d           }|d         |d         fS )Nrr   rs   rk   rl   rc   rN   r   r   r   r   
rt   rV   r&   r   rT   rY   r   r   rW   rN   r   s        r*   rw   z8RMSNormDynamicQuantPattern.register.<locals>.replacement  s    
 HH4#3H44E%e43CDDDF&11%88E$	 	 	B a5"Q%<r,   r   r   s   `   r*   r~   z#RMSNormDynamicQuantPattern.register  s    	2<	2).	25<-.	2 	2 	2 	2 	2 	2	 <	 ).	 5<-.	  	  	  	  	  	 . 	 ''))K	
 	
 	
 	
 	
r,   rC   rD   rE   r   	PER_TOKENr\   r&   r$   rH   r[   r   r~   r   r   s   @r*   r   r     s        
 #-"6' '' ['  	'
 ' 
' ' ' ' ' '%
 2 %
t %
 %
 %
 %
 %
 %
 %
 %
r,   r   c                   ^     e Zd Zej        dfdedej        dededdf
 fdZ	d	e
ddfd
Z xZS )"FusedAddRMSNormDynamicQuantPatternTrN   rT   r   r_   r    Nc                     t          t          j        d|          }t          dt	          |||                    }t                                          ||           d S )NFTrb   rd   r   r   s          r*   r[   z+FusedAddRMSNormDynamicQuantPattern.__init__  sc     %-<<EYOOO
 
 
 	#&&&&&r,   ri   c                     dt           j        dt           j        dt           j        dt          t           j        t           j        t           j        f         f fd}dt           j        dt           j        dt           j        dt          t           j        t           j        t           j        f         f fd}t          j        || j                                        t          j        |           d S )Nrk   rl   r   r    c                 r                         | ||          \  }}                    |          \  }}|||fS r   rn   r   s         r*   rp   z<FusedAddRMSNormDynamicQuantPattern.register.<locals>.pattern  sE     $(#7#7vx#P#P J ..z::MFE8U**r,   c           
         |                      j                  } t          j        | j                  }j                            |           }t          j        || ||j	        d |          }|d         |d         |d         fS )Nrr   r   r   r   r   r   r   s         r*   rw   z@FusedAddRMSNormDynamicQuantPattern.register.<locals>.replacement  s    
 HH4#3H44E%e43CDDDF&11%88E$!	 	 	B a5"Q%A&&r,   r   r   s   `   r*   r~   z+FusedAddRMSNormDynamicQuantPattern.register  s    	+<	+).	+AF	+5<u|;<	+ 	+ 	+ 	+ 	+ 	+	'<	').	'AF	'5<u|;<	' 	' 	' 	' 	' 	'. 	 ''))K	
 	
 	
 	
 	
r,   r   r   s   @r*   r   r     s        
 #-"6' '' ['  	'
 ' 
' ' ' ' ' '&
 2 &
t &
 &
 &
 &
 &
 &
 &
 &
r,   r   c                   |     e Zd ZdZededdf fd            Zej        de	j
        ddfd            ZdefdZ xZS )	RMSNormQuantFusionPassz~
    This pass fuses rms_norm & quant custom ops into a fused rms_norm_quant op.
    It also supports fused_add_rms_norm.
    rZ   r    Nc                 d   t                                          |           t          d          | _        dD ]Y}t	          |t
                                        | j                   t          |t
                                        | j                   t          |t
                                        | j                   t          |t
                                        | j                   t          j                    rt          dd          t          dd          fD ]n}dD ]i}dD ]d}t          |t
          |||                              | j                   t          |t
          |||                              | j                   ejo[|                     || j                   d S )	Nrmsnorm_quant_fusion_pass)	pass_name)gh㈵>gư>r      @   )TF)r   rP   rQ   )rf   r[   r   patternsr   	FP8_DTYPEr~   r^   r   r   r   is_cudar   r   r   dump_patterns)rA   rZ   rN   r   rP   rQ   rh   s         r*   r[   zRMSNormQuantFusionPass.__init__  s      ,>1-
 -
 -
 $ &	6 &	6G-gyAAJJ  
 &gy99BB4=QQQ /w	BBKK  
 'w	::CCDMRRR  ')) 6$.q#$6$6
1b8I8I#J 6 6K0= 6 6,'4 6 6G< ' ),75I(/   'ht}555 5 ' ),75I(/   'ht}5555#66( 	64=11111r,   graphc                     | j                             |          | _        t                              d| j                   d S )NzReplaced %s patterns)r   applymatched_countloggerdebug)rA   r   s     r*   __call__zRMSNormQuantFusionPass.__call__+  s8    !]0077+T-?@@@@@r,   c           
          |                      | t          t          t          t          t
          t          t                    S r   )hash_sourcer   rM   r^   r   r   r   r   r@   s    r*   uuidzRMSNormQuantFusionPass.uuid0  s2    $%&-.,	
 	
 		
r,   )rC   rD   rE   rF   r   r	   r[   r   time_and_logr   Graphr   rI   r   r   r   s   @r*   r   r     s         
 12z 12d 12 12 12 12 12 12f "Abh A4 A A A #"A

c 

 

 

 

 

 

 

 

r,   r   )Stypingr   r   r&   torch._inductor.pattern_matcher	_inductorpattern_matcherrz   r   *torch._higher_order_ops.auto_functionalizer   r   
torch._opsr   vllm.configr	   r
   vllm.loggerr   9vllm.model_executor.layers.quantization.utils.quant_utilsr   r   r   r   r   r   r   r   r   r   vllm.platformsr   inductor_passr   matcher_utilsr   r   r   vllm_inductor_passr   r   rC   r   	fp8_dtyper   uint8	FP4_DTYPErx   r+   r/   r2   r6   ops_Crms_normdefaultRMS_OPfused_add_rms_norm
RMS_ADD_OPstatic_scaled_fp8_quantdynamic_scaled_fp8_quant"dynamic_per_token_scaled_fp8_quantr7   dictrG   r   hasattrr8   per_token_group_fp8_quantr:   rms_norm_static_fp8_quant#fused_add_rms_norm_static_fp8_quant rms_norm_dynamic_per_token_quantrms_norm_per_block_quantrK   rM   r^   r   r   r   r   r   r   rJ   r,   r*   <module>r      s\   # " " " " " " " "  , , , , , , , , ,       J J J J J J > > > > > > ! ! ! ! ! ! ; ; ; ; ; ; ; ; # # # # # #                        , + + + + + + + + + + +         
 I H H H H H H H	X		&&((	K	Mc MS MU\ M M M MLc LS LU\ L L L LJS JC JEL J J J JJS JC JEL J J J J 
		&Y\,4
 =E%),?GHP)	4*$%   
  E''%),8J"K"K E$y|<DIm Q#(9<#I#QI "'),"H"PI
 
 
 
 
z 
 
 
$ U y|-5T y|7?U y|4<T y|4<5 y|,44 y|,4% y|,4$ y|,411	4 *,-   8
 
 
 
 
 
 
 
41T 1T 1T 1T 1T 3 1T 1T 1Th<
 <
 <
 <
 <
(; <
 <
 <
~=
 =
 =
 =
 =
': =
 =
 =
@=
 =
 =
 =
 =
2 =
 =
 =
@4
 4
 4
 4
 4
!4 4
 4
 4
n5
 5
 5
 5
 5
)< 5
 5
 5
pI
 I
 I
 I
 I
3 I
 I
 I
 I
 I
r,   