
    -`i                     |   U d dl mZmZ d dlmZ d dlZd dlmZ d dlm	Z	m
Z
mZ d dlmZ d dlmZ d dlmZ d d	lmZmZmZ d d
lmZ ddlmZmZmZmZ ddlmZ ddlm Z m!Z! ddl"m#Z#m$Z$  ee%          Z& ej'                    Z(ej)        Z*ej+        j,        j-        j.        Z/eej+        j,        j0        j.        iZ1e2eef         e3d<    ej4                    o e5ej+        j,        d          Z6e6rej+        j,        j7        j.        e1e<    G d de          Z8 G d de8          Z9 G d de8          Z: G d de$          Z;dS )    )ABCabstractmethod)AnyN)auto_functionalized)PatternMatcherPassfwd_onlyregister_replacement)
OpOverload)
VllmConfig)init_logger)QuantKeykFp8StaticTensorSymkNvfp4Dynamic)current_platform   )	QUANT_OPS
empty_bf16
empty_fp32	empty_i32)enable_fake_mode)MatcherQuantFP8MatcherSiluAndMul)VllmInductorPassVllmPatternMatcherPass	FUSED_OPSsilu_and_mul_nvfp4_quantc                   `    e Zd ZdZdeddfdZdededej        fdZ	e
d	eddfd
            ZdS )ActivationQuantPatternzW
    The base class for Activation+Quant fusions.
    Should not be used directly.
    	quant_keyreturnNc                 .   || _         |j        | _        | j         t          v sJ d| j                      t          | j                  | _        | j         t
          v sJ d| j                      t
          | j                  | _        t                      | _        d S )Nz unsupported quantization scheme zunsupported fusion scheme )	r   dtypequant_dtyper   QUANT_OPr   FUSED_OPr   silu_and_mul_matcher)selfr   s     |/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/compilation/activation_quant_fusion.py__init__zActivationQuantPattern.__init__5   s     #$?~***?t~?? +** "$.1~***999 +** "$.1$5$7$7!!!    argskwargsc                 <    | j         dd|}t          j        |i |S )Ncuda)r"   device)r#   torchempty)r'   r+   r,   s      r(   empty_quantz"ActivationQuantPattern.empty_quantH   s,    +vHHH{D+F+++r*   pm_passc                     t           N)NotImplementedError)r'   r3   s     r(   registerzActivationQuantPattern.registerL   s    !!r*   )__name__
__module____qualname____doc__r   r)   r   r0   Tensorr2   r   r   r7    r*   r(   r   r   /   s         
88 
8 8 8 8&, , , , , , , " 2 "t " " " ^" " "r*   r   c                   V     e Zd ZdZd fdZdeej                 fdZde	ddfdZ
 xZS )	SiluMulFp8StaticQuantPatternz3
    Fusion for SiluMul+Fp8StaticQuant Pattern
    r    Nc                     t                                          t                     t          t                    | _        d S r5   )superr)   r   r   quant_matcherr'   	__class__s    r(   r)   z%SiluMulFp8StaticQuantPattern.__init__V   s3    ,---,-@AAr*   c                 z    | j                                         d         }g | j                                        |S )Nr   )rB   inputsr&   )r'   scales     r(   
get_inputsz'SiluMulFp8StaticQuantPattern.get_inputsZ   sD    "))++A.
&--//

 	
r*   r3   c                     dt           j        dt           j        dt           j        f fd}dt           j        dt           j        dt           j        f fd}                                 } ||  t          |||t          |           d S )NinputrG   r    c                 j                         |           }                    ||          }|d         S )Nr   )r&   rB   )rJ   rG   result_silu_mulresult_quantr'   s       r(   patternz6SiluMulFp8StaticQuantPattern.register.<locals>.patternb   s7     #77>>O--ouEEL?"r*   c                     | j         d         dz  }| j         d d         |fz   }t          j        || j        j                  }t          j        || |          }|d         S )N   )r/   r"   )resultrJ   rG   r   )shaper0   r1   r/   r#   r   r%   )rJ   rG   doutput_shaperR   atr'   s         r(   replacementz:SiluMulFp8StaticQuantPattern.register.<locals>.replacementj   sz     B1$A ;ss+qd2L[U\9I  F %fE  B a5Lr*   )r0   r<   rH   r	   r   )r'   r3   rN   rW   inpss   `    r(   r7   z%SiluMulFp8StaticQuantPattern.registera   s    	#<	#<	# \	# 	# 	# 	# 	# 	#	<	<	 \	 	 	 	 	 	   Wk47KKKKKr*   r    Nr8   r9   r:   r;   r)   listr0   r<   rH   r   r7   __classcell__rD   s   @r(   r?   r?   Q   s         B B B B B B
D. 
 
 
 
L 2 Lt L L L L L L L Lr*   r?   c                   V     e Zd ZdZd fdZdeej                 fdZde	ddfdZ
 xZS )	SiluMulNvfp4QuantPatternz/
    Fusion for SiluMul+Nvfp4Quant Pattern
    r    Nc                 T    t                                          t                     d S r5   )rA   r)   r   rC   s    r(   r)   z!SiluMulNvfp4QuantPattern.__init__   s!    '''''r*   c                     |                      dd          }t          dd          }t          dd          }t          dd          }||||gS )N             @   r   )r2   r   r   r   )r'   rR   output_scaleinput_rG   s        r(   rH   z#SiluMulNvfp4QuantPattern.get_inputs   sS    !!!R(( a((Ar""1a  fe44r*   r3   c                     dt           j        dt           j        dt           j        dt           j        dt          t           j        t           j        f         f
 fd}dt           j        dt           j        dt           j        dt           j        dt          t           j        t           j        f         f
 fd}t          ||                                 t
          |           d S )NrR   rg   rJ   rG   r    c                                          |          }t          j        | |||d          }|d         |d         fS )NT)outputrJ   rg   input_scaleis_sf_swizzled_layoutr   rQ   )r&   r   r$   )rR   rg   rJ   rG   rL   rV   r'   s         r(   rN   z2SiluMulNvfp4QuantPattern.register.<locals>.pattern   sT     #77>>O$%)!&*  B a5"Q%<r*   c                 V    t          j        | |||          }|d         |d         fS )N)rR   result_block_scalerJ   input_global_scaler   rQ   )r   r%   )rR   rg   rJ   rG   rV   r'   s        r(   rW   z6SiluMulNvfp4QuantPattern.register.<locals>.replacement   s>     %#/#(  B a5"Q%<r*   )r0   r<   tupler	   rH   r   )r'   r3   rN   rW   s   `   r(   r7   z!SiluMulNvfp4QuantPattern.register   s    	 L	 ,	  <	  <		 
 5<-.	  	  	  	  	  	 "	 L	 ,	  <	  <		 
 5<-.	  	  	  	  	  	  	Wk4??3D3DhPWXXXXXr*   rY   rZ   r]   s   @r(   r_   r_   ~   s         ( ( ( ( ( (5D. 5 5 5 5!Y 2 !Yt !Y !Y !Y !Y !Y !Y !Y !Yr*   r_   c                        e Zd ZdZededdf fd            Zej        de	j
        j        ddfd            ZdefdZ xZS )	ActivationQuantFusionPassah  
    This pass fuses a pre-defined set of custom ops into fused ops.
    It uses the torch pattern matcher to find the patterns and replace them.

    Because patterns can only be registered once, the pass is a singleton.
    This will be addressed in a future version of PyTorch:
    https://github.com/pytorch/pytorch/pull/139321#issuecomment-2452354980
    configr    Nc                 X   t                                          |           t          d          | _        t	                      }|                    | j                   t          r(t                      }|                    | j                   |                     || j                   d S )Nactivation_quant_fusion_pass)	pass_name)	rA   r)   r   patternsr?   r7   "silu_and_mul_nvfp4_quant_supportedr_   dump_patterns)r'   rt   pattern_silu_mul_fp8pattern_silu_mul_nvfp4rD   s       r(   r)   z"ActivationQuantFusionPass.__init__   s       ,>4-
 -
 -
  <==%%dm444- 	;%=%?%?""++DM:::64=11111r*   graphc                     | j                             |          | _        t                              d| j                   d S )NzReplaced %s patterns)rx   applymatched_countloggerdebug)r'   r}   s     r(   __call__z"ActivationQuantFusionPass.__call__   s8    !]0077+T-?@@@@@r*   c                 N    t          j        | t          t          t                    S r5   )r   hash_sourcer   r?   r_   )r'   s    r(   uuidzActivationQuantFusionPass.uuid   s#    +"($	
 
 	
r*   )r8   r9   r:   r;   r   r   r)   r   time_and_logr0   fxGraphr   strr   r\   r]   s   @r(   rs   rs      s          2z 2d 2 2 2 2 2 2  "Aehn A A A A #"A
c 
 
 
 
 
 
 
 
r*   rs   )<abcr   r   typingr   r0   *torch._higher_order_ops.auto_functionalizer   torch._inductor.pattern_matcherr   r   r	   
torch._opsr
   vllm.configr   vllm.loggerr   9vllm.model_executor.layers.quantization.utils.quant_utilsr   r   r   vllm.platformsr   fusionr   r   r   r   inductor_passr   matcher_utilsr   r   vllm_inductor_passr   r   r8   r   	fp8_dtype	FP8_DTYPEuint8	FP4_DTYPEops_Csilu_and_muldefaultSILU_MUL_OPsilu_and_mul_quantr   dict__annotations__is_cudahasattrry   r   r   r?   r_   rs   r=   r*   r(   <module>r      s   $ # # # # # # # #        J J J J J J         
 " ! ! ! ! ! " " " " " " # # # # # #         
 , + + + + + @ @ @ @ @ @ @ @ @ @ @ @ + + + + + + = = = = = = = = H H H H H H H H	X		&&((	K	il'/ 8@)	4*$%    &>%5%=%?%? &GG	IL,E E " & M$y|DLIm" " " " "S " " "D*L *L *L *L *L#9 *L *L *LZ0Y 0Y 0Y 0Y 0Y5 0Y 0Y 0Yf&
 &
 &
 &
 &
 6 &
 &
 &
 &
 &
r*   