
    -`i34                        d dl Z d dlmc mZ d dl mZ d dlmZ d dlmZ d dl	Z
d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZmZmZ d d
lmZ ddlmZ ddlmZ ddlmZmZm Z m!Z! ddl"m#Z#m$Z$  ee%          Z& ej'                    Z( G d d          Z) G d de)          Z* G d de)          Z+ G d de)          Z, G d de)          Z- G d de$          Z. G d de          Z/ G d de$          Z0dS )     N)fx)PatternMatcherPass)
OpOverload)rocm_aiter_ops)ActivationQuantPattern)
VllmConfig)init_logger)
GroupShapeQuantKey	ScaleDesc)current_platform   )FusedRMSQuantKey)enable_fake_mode)MatcherFusedAddRMSNormMatcherQuantFP8MatcherRMSNormMatcherSiluAndMul)VllmInductorPassVllmPatternMatcherPassc                   &    e Zd Z	 ddededefdZdS )AiterRMSNormQuantPatternTepsilonkeymatch_aiter_quantc                     || _         |j        j        | _        |j        st          |d          nt          |d          | _        t          |j        |          | _	        d S )NT)match_rocm_aiter)
r   quantdtypequant_dtype	fused_addr   r   rmsnorm_matcherr   quant_matcher)selfr   r   r   s       v/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/compilation/rocm_aiter_fusion.py__init__z!AiterRMSNormQuantPattern.__init__'   su     9? =HN7T::::'$GGG 	
 -I.
 
 
    N)T)__name__
__module____qualname__floatr   boolr&    r'   r%   r   r   &   sH        OS
 

#3
HL
 
 
 
 
 
r'   r   c                        e Zd ZdZ ej                    Zdej        dfde	de
j        dedededd	f fd
Zdedd	fdZ xZS )AiterRMSNormDynamicQuantPatternz-AITER RMSNorm + Dynamic Quantization pattern.Tr   r    r   group_shape	symmetricreturnNc                     t          t          j        d|          }t          dt	          |||                    }t                                          |||           d S NFr   scaler1   r!   r   r   torchfloat32r   r   superr&   	r$   r   r    r   r0   r1   r6   r   	__class__s	           r%   r&   z(AiterRMSNormDynamicQuantPattern.__init__=   f     %-<<EYOOO
 
 

 	#'899999r'   pm_passc                 ~    dt           j        dt           j        dt          t           j        t           j        f         f fd}dt           j        dt           j        dt          t           j        t           j        f         f fd}t          j        || j                                        t          j        |           d S )Ninputweightr2   c                 h                         | |          }                    |          \  }}||fS Nr"   r#   rA   rB   
result_rmsresultr6   r$   s        r%   patternz9AiterRMSNormDynamicQuantPattern.register.<locals>.patternN   ;     --eV<<J ..z::MFE5= r'   c                 j                         | |j        j                  }|d         |d         fS )N)xrB   r   r    r   r   FUSED_OPr   r    )rA   rB   rH   r$   s      r%   replacementz=AiterRMSNormDynamicQuantPattern.register.<locals>.replacementV   sC     ]] ,	 #  F !9fQi''r'   r9   Tensortuplepmregister_replacementr"   inputsfwd_onlyr$   r?   rI   rO   s   `   r%   registerz(AiterRMSNormDynamicQuantPattern.registerM   s    	!<	!L	! 5<-.	! 	! 	! 	! 	! 	!	(<	(L	( 5<-.	( 	( 	( 	( 	( 	( 	 ''))K	
 	
 	
 	
 	
r'   )r(   r)   r*   __doc__r   "get_rmsnorm_fused_dynamic_quant_oprN   r
   	PER_TOKENr+   r9   r   r,   r&   r   rX   __classcell__r=   s   @r%   r/   r/   8   s        77@~@BBH #'","6: :: [:  	:
  : : 
: : : : : : 
 2 
t 
 
 
 
 
 
 
 
r'   r/   c                        e Zd ZdZ ej                    Zdej        dfde	de
j        dedededd	f fd
Zdedd	fdZ xZS )'AiterFusedAddRMSNormDynamicQuantPatternz7AITER RMSNorm Fused Add + Dynamic Quantization pattern.Tr   r    r   r0   r1   r2   Nc                     t          t          j        d|          }t          dt	          |||                    }t                                          |||           d S NFTr5   r7   r8   r<   s	           r%   r&   z0AiterFusedAddRMSNormDynamicQuantPattern.__init__q   f     %-<<EYOOO
 
 

 	#'899999r'   r?   c                     dt           j        dt           j        dt           j        dt          t           j        t           j        t           j        f         f fd}dt           j        dt           j        dt           j        dt          t           j        t           j        t           j        f         f fd}t          j        || j                                        t          j        |           d S )NrA   rB   residualr2   c                 r                         | ||          \  }}                    |          \  }}|||fS rD   rE   rA   rB   rd   rG   residual_outrH   r6   r$   s          r%   rI   zAAiterFusedAddRMSNormDynamicQuantPattern.register.<locals>.pattern   E    
 (,';';E68'T'T$J ..z::MFE<..r'   c                 z                         | ||j        j                  }|d         |d         |d         fS )N)rL   rd   rB   r   r    r   r      rM   )rA   rB   rd   rH   r$   s       r%   rO   zEAiterFusedAddRMSNormDynamicQuantPattern.register.<locals>.replacement   sL     ]]! , #  F !9fQi22r'   rP   rW   s   `   r%   rX   z0AiterFusedAddRMSNormDynamicQuantPattern.register   s    	/<	/L	/ l	/ 5<u|;<		/ 	/ 	/ 	/ 	/ 	/	3<	3).	3AF	35<u|;<	3 	3 	3 	3 	3 	3 	 ''))K	
 	
 	
 	
 	
r'   )r(   r)   r*   rY   r   &get_rmsnorm_fused_add_dynamic_quant_oprN   r
   r[   r+   r9   r   r,   r&   r   rX   r\   r]   s   @r%   r_   r_   l   s        AAD~DFFH #'","6: :: [:  	:
  : : 
: : : : : : 
 2 
t 
 
 
 
 
 
 
 
r'   r_   c                   z     e Zd ZdZ ej                    Z	 	 ddedej	        de
dededd	f fd
Zdedd	fdZ xZS )AiterRMSFp8GroupQuantPatternzw
    This pattern fuses aiter rms_norm & group fp8 quant custom
    ops into an aiter rms_norm_group_fp8_quant op.
    Tr   r    r0   r   r1   r2   Nc                     t          t          j        d|          }t          dt	          |||                    }t                                          |||           d S r4   r8   	r$   r   r    r0   r   r1   r6   r   r=   s	           r%   r&   z%AiterRMSFp8GroupQuantPattern.__init__   r>   r'   r?   c                 ~    dt           j        dt           j        dt          t           j        t           j        f         f fd}dt           j        dt           j        dt          t           j        t           j        f         f fd}t          j        || j                                        t          j        |           d S )NrA   rB   r2   c                 h                         | |          }                    |          \  }}||fS rD   rE   rF   s        r%   rI   z6AiterRMSFp8GroupQuantPattern.register.<locals>.pattern   rJ   r'   c                 `                         | |j        d          }|d         |d         fS )N   )rL   rB   variance_epsilon
group_sizer   r   rN   r   )rA   rB   atr$   s      r%   rO   z:AiterRMSFp8GroupQuantPattern.register.<locals>.replacement   s?     !%	   B a5"Q%<r'   rP   rW   s   `   r%   rX   z%AiterRMSFp8GroupQuantPattern.register   s    	!<	!L	! 5<-.	! 	! 	! 	! 	! 	!	 <	 L	  5<-.	  	  	  	  	  	  	[$"6"="="?"?g	
 	
 	
 	
 	
r'   TT)r(   r)   r*   rY   r    get_rmsnorm_group_fused_quant_oprN   r+   r9   r   r
   r,   r&   r   rX   r\   r]   s   @r%   rm   rm      s         
 ?~>@@H #': :: [:  	:
  : : 
: : : : : : 
 2 
t 
 
 
 
 
 
 
 
r'   rm   c                   z     e Zd ZdZ ej                    Z	 	 ddedej	        de
dededd	f fd
Zdedd	fdZ xZS )$AiterFusedAddRMSFp8GroupQuantPatternz
    This pattern fuses aiter rms_norm_with_add & group fp8 quant custom ops
    into a aiter rms_norm_with_add_group_fp8_quant op.
    Tr   r    r0   r   r1   r2   Nc                     t          t          j        d|          }t          dt	          |||                    }t                                          |||           d S ra   r8   ro   s	           r%   r&   z-AiterFusedAddRMSFp8GroupQuantPattern.__init__   rb   r'   r?   c                     dt           j        dt           j        dt           j        dt          t           j        t           j        t           j        f         f fd}dt           j        dt           j        dt           j        dt          t           j        t           j        t           j        f         f fd}t          j        || j                                        t          j        |           d S )NrA   rB   rd   r2   c                 r                         | ||          \  }}                    |          \  }}|||fS rD   rE   rf   s          r%   rI   z>AiterFusedAddRMSFp8GroupQuantPattern.register.<locals>.pattern   rh   r'   c                 p                         | ||j        d          }|d         |d         |d         fS )Nrs   )rL   rd   rB   rt   ru   r   r   rj   rv   )rA   rB   rd   rw   r$   s       r%   rO   zBAiterFusedAddRMSFp8GroupQuantPattern.register.<locals>.replacement   sI    
 !!%   B a5"Q%A&&r'   rP   rW   s   `   r%   rX   z-AiterFusedAddRMSFp8GroupQuantPattern.register   s    	/<	/L	/ l	/ 5<u|;<		/ 	/ 	/ 	/ 	/ 	/	'<	'L	' l	' 5<u|;<		' 	' 	' 	' 	' 	'  	[$"6"="="?"?g	
 	
 	
 	
 	
r'   rx   )r(   r)   r*   rY   r   $get_rmsnorm_group_add_fused_quant_oprN   r+   r9   r   r
   r,   r&   r   rX   r\   r]   s   @r%   r{   r{      s         
 C~BDDH #': :: [:  	:
  : : 
: : : : : : 
 2 
t 
 
 
 
 
 
 
 
r'   r{   c                   |     e Zd ZdZededdf fd            Zej        de	j
        ddfd            ZdefdZ xZS )	RocmAiterRMSNormFusionPassz
    This pass fuses aiter rms_norm & vllm/aiter quant custom ops
    into a fused rms_norm_quant op.
    It also supports fused_add_rms_norm.
    configr2   Nc           	      j   t                                          |           t          d          | _        dD ]}t	          |t
          t          dd                                        | j                   t          |t
          t          dd                                        | j                   dD ]`}t          |t
          |                              | j                   t          |t
          |                              | j                   a|                     || j                   d S )N%rocm_aiter_rms_norm_quant_fusion_pass	pass_name)gh㈵>gư>r   rs   )TF)r   )r;   r&   r   patternsrm   	FP8_DTYPEr
   rX   r{   r/   r_   dump_patterns)r$   r   r   r   r=   s       r%   r&   z#RocmAiterRMSNormFusionPass.__init__  s;      ,>=-
 -
 -
 $ 	* 	*G(Jq#$6$6 ht}%%% 1Jq#$6$6 ht}%%%%2 * *! 0Y:K  (4=))) 8Y:K  (4=))))* 	64=11111r'   graphc                     | j                             |          | _        t                              d| j                   d S NzReplaced %s patternsr   applymatched_countloggerdebugr$   r   s     r%   __call__z#RocmAiterRMSNormFusionPass.__call__8  8    !]0077+T-?@@@@@r'   c                 R    t           t          t          t          g} | j        | g|R  S rD   )r/   r_   rm   r{   hash_sourcer$   fusion_patternss     r%   uuidzRocmAiterRMSNormFusionPass.uuid=  s2    +3(0	
  t77777r'   )r(   r)   r*   rY   r   r   r&   r   time_and_logr   Graphr   strr   r\   r]   s   @r%   r   r     s          !2z !2d !2 !2 !2 !2 !2 !2F "Abh A4 A A A #"A8c 8 8 8 8 8 8 8 8r'   r   c                   r    e Zd ZdZ ej                    ZdeddfdZde	e
j                 fdZdeddfdZdS )	 AiterSiluMulFp8GroupQuantPatternz
    This pattern fuses aiter silu_and_mul & group fp8 quant custom
    ops into an aiter silu_and_mul_group_fp8_quant op.
    quant_opr2   Nc                 :    t                      | _        || _        d S rD   )r   silu_and_mul_matcherr   )r$   r   s     r%   r&   z)AiterSiluMulFp8GroupQuantPattern.__init__O  s    $5$7$7! r'   c                 B    | j                                         d         gS )Nr   )r   rU   )r$   s    r%   
get_inputsz+AiterSiluMulFp8GroupQuantPattern.get_inputsS  s#    %,,..q1
 	
r'   r?   c                 D    dt           j        dt          t           j        t           j        f         f fd}dt           j        dt          t           j        t           j        f         f fd}t          j        ||                                 t          j        |           d S )NrA   r2   c                 z                         |           }                    |d          }|d         |d         fS )Nrs   r   r   )r   r   )rA   at1at2r$   s      r%   rI   z:AiterSiluMulFp8GroupQuantPattern.register.<locals>.patternY  s=     ++E22C--S))Cq63q6>!r'   c                 R                         | d          }|d         |d         fS )Nrs   )rL   ru   r   r   )FUSED_SILU_MUL_QUANT_OP)rA   rw   r$   s     r%   rO   z>AiterSiluMulFp8GroupQuantPattern.register.<locals>.replacement`  s/     --#-FFBa5"Q%<r'   )r9   rQ   rR   rS   rT   r   rV   rW   s   `   r%   rX   z)AiterSiluMulFp8GroupQuantPattern.registerX  s    	"<	"5<-.	" 	" 	" 	" 	" 	"	 <	 5<-.	  	  	  	  	  	  	[$//"3"3R['	
 	
 	
 	
 	
r'   )r(   r)   r*   rY   r   $get_act_mul_fused_fp8_group_quant_opr   r   r&   listr9   rQ   r   r   rX   r-   r'   r%   r   r   G  s         
 RnQSS! ! ! ! ! !
D. 
 
 
 


 2 
t 
 
 
 
 
 
r'   r   c                        e Zd ZdZ ej                    Zej        j	        j
        j        ZeegZededdf fd            Zej        dej        j        ddfd            ZdefdZ xZS )	'RocmAiterSiluMulFp8GroupQuantFusionPassah  
    This pass fuses a pre-defined set of custom ops into fused ops.
    It uses the torch pattern matcher to find the patterns and replace them.

    Because patterns can only be registered once, the pass is a singleton.
    This will be addressed in a future version of PyTorch:
    https://github.com/pytorch/pytorch/pull/139321#issuecomment-2452354980
    r   r2   Nc                    t                                          |           t          d          | _        | j        D ])}t          |                              | j                   *|                     || j                   d S )N/rocm_aiter_silu_mul_fp8_group_quant_fusion_passr   )r;   r&   r   r   	QUANT_OPSr   rX   r   )r$   r   r   r=   s      r%   r&   z0RocmAiterSiluMulFp8GroupQuantFusionPass.__init__z  s       ,>G-
 -
 -
  	O 	OH,X66??NNNN64=11111r'   r   c                     | j                             |          | _        t                              d| j                   d S r   r   r   s     r%   r   z0RocmAiterSiluMulFp8GroupQuantFusionPass.__call__  r   r'   c                 B    t           t          g}t          j        | g|R  S rD   )r   r   r   r   r   s     r%   r   z,RocmAiterSiluMulFp8GroupQuantFusionPass.uuid  s*    ",
  +DC?CCCCr'   )r(   r)   r*   rY   r   get_group_quant_opAITER_GROUP_FP8_QUANT_OPr9   opsvllm triton_per_token_group_quant_fp8defaultTRITON_GROUP_FP8_QUANT_OPr   r   r   r&   r   r   r   r   r   r   r   r\   r]   s   @r%   r   r   k  s           A~@BB %	 O W)+DEI
2z 
2d 
2 
2 
2 
2 
2 
2 "Aehn A A A A #"ADc D D D D D D D Dr'   r   )1r9   torch._inductor.pattern_matcher	_inductorpattern_matcherrS   r   r   
torch._opsr   7vllm.model_executor.layers.quantization.utils.fp8_utilsr   vllm._aiter_opsr   (vllm.compilation.activation_quant_fusionr   vllm.configr   vllm.loggerr	   9vllm.model_executor.layers.quantization.utils.quant_utilsr
   r   r   vllm.platformsr   fusionr   inductor_passr   matcher_utilsr   r   r   r   vllm_inductor_passr   r   r(   r   	fp8_dtyper   r   r/   r_   rm   r{   r   r   r   r-   r'   r%   <module>r      s    , , , , , , , , ,       > > > > > > ! ! ! ! ! ! > > > > * * * * * * K K K K K K " " " " " " # # # # # #         
 , + + + + +      , + + + + +            I H H H H H H H	X		&&((	
 
 
 
 
 
 
 
$1
 1
 1
 1
 1
&> 1
 1
 1
h3
 3
 3
 3
 3
.F 3
 3
 3
l0
 0
 0
 0
 0
#; 0
 0
 0
f5
 5
 5
 5
 5
+C 5
 5
 5
p78 78 78 78 78!7 78 78 78t!
 !
 !
 !
 !
'= !
 !
 !
H&D &D &D &D &D.D &D &D &D &D &Dr'   