
    -`i"                     ^   d dl mZ d dlmZ d dlZd dlmc mZ d dlm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZmZ d d	lmZ d d
lmZ ddlmZmZmZ ddlmZ ddlmZmZ ddlm Z m!Z!  ee"          Z#ej$        j%        j&        j'        Z( ed          Z) G d d          Z* G d de!          Z+dS )    )Callable)	ParamSpecN)fx)auto_functionalized)PatternMatcherPass)	Attention)
VllmConfigget_layers_from_vllm_config)init_logger)RotaryEmbedding   )
empty_bf16
empty_fp32	empty_i64)enable_fake_mode)MatcherRMSNormMatcherRotaryEmbedding)VllmInductorPassVllmPatternMatcherPassPc                      e Zd ZdZ	 ddedededededed	d
fdZd	ee	j
                 fdZedeeej        f         deej        gd
f         d	eeej        f         fd            Zede	j        j        d	d
fd            Zded	d
fdZd
S )QkNormRopePatterna;  
    Match the unfused sequence in attention blocks and replace with the fused op.

    Unfused (conceptually):
      q, k, v = split(qkv, [qsz, kvsz, kvsz], -1)
      qh = reshape(q, [-1, num_heads, head_dim])
      kh = reshape(k, [-1, num_kv_heads, head_dim])
      qn = rms_norm(qh, q_weight, eps)
      kn = rms_norm(kh, k_weight, eps)
      qf = reshape(qn, [-1, num_heads * head_dim])
      kf = reshape(kn, [-1, num_kv_heads * head_dim])
      qf, kf = rotary_embedding(positions, qf, kf, head_dim, cos_sin_cache, is_neox)
      return qf, kf, v

    Fused replacement:
      fused_qk_norm_rope(qkv, num_heads, num_kv_heads, num_kv_heads, head_dim,
                         eps, q_weight, k_weight, cos_sin_cache, is_neox,
                         positions.view(-1))
      return split(qkv, [qsz, kvsz, kvsz], -1)
    Fhead_dim	num_headsnum_kv_headsepsis_neoxrope_flashinferreturnNc                 ,   || _         || _        || _        | j         | j        z  | _        | j        | j        z  | _        || _        t          |          | _        || _        || _	        t          || j        | j         | j        | j	                  | _        d S )N)r   	head_sizer   r   use_flashinfer)r   r   r   q_sizekv_sizer   r   rmsnorm_matcherr   r   r   rope_matcher)selfr   r   r   r   r   r   s          x/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/compilation/qk_norm_rope_fusion.py__init__zQkNormRopePattern.__init__4   s     #( nt}4(4=8-c22.2mn*/
 
 
    c                 *   d}t          || j        d| j        z  z             }t          |          }t          d| j                  }t          d| j                  }| j        rt          d| j                  }nt          d| j                  }|||||gS )N      r   i   )r   r#   r$   r   r   r   r   )r'   Tqkv	positionsq_weightk_weightcos_sin_caches          r(   
get_inputszQkNormRopePattern.get_inputsN   s    DK!dl*::;;aLL	a//a// 	<&tT];;MM&tT];;M
 	
r*   trace_fnprocess_fx_fnsc                 `     dt           j        dt           j        dt          j        f fd}|S )Nargskwargsr   c                  8     | i |}D ]} ||           |S N )r8   r9   gm
process_fxr6   r5   s       r(   wrappedz0QkNormRopePattern.wrap_trace_fn.<locals>.wrappedf   s:    4*6**B,  

2Ir*   )r   r8   r9   r   GraphModule)r5   r6   r?   s   `` r(   wrap_trace_fnzQkNormRopePattern.wrap_trace_fna   sI    
	16 	QX 	". 	 	 	 	 	 	 	 r*   r=   c                 (    ddl m}  ||            d S )Nr   )view_to_reshape)#torch._inductor.fx_passes.post_gradrC   )r=   rC   s     r(   fx_view_to_reshapez$QkNormRopePattern.fx_view_to_reshapeo   s*    GGGGGGr*   pm_passc                 v    dt           j        dt           j        dt           j        dt           j        dt           j        dt          t           j        t           j        t           j        f         f fd}dt           j        dt           j        dt           j        dt           j        dt           j        dt          t           j        t           j        t           j        f         f fd}t          j        ||                                 t                              t          j        t          j	                  |           d S )	Nr/   r0   r1   r2   r3   r   c                 6   |                      j        j        j        gd          \  }}} |j        g |j        d d         |j        d         j        z  j        R  }                    ||          }	|	                    |j                  }
 |j        g |j        d d         |j        d         j        z  j        R  }                    ||          }|                    |j                  }                    ||
||          \  }}|||fS )Ndim)splitr#   r$   viewshaper   r%   r&   )r/   r0   r1   r2   r3   qkv	q_by_headq_normed_by_headq_flat	k_by_headk_normed_by_headk_flatq_ropek_roper'   s                   r(   patternz+QkNormRopePattern.register.<locals>.patternv   sI    iidlDL IriRRGAq!  " wr{dm;=A]  I  $33IxHH%**1733F  " wr{dm;=A]  I  $33IxHH%**1733F "..y&&-XXNFF61$$r*   c                    t          t          | j        j        j        j        j        |||j        |                    d                    }|d         }|                    j	        j
        j
        gd          S )NrI   )r/   num_heads_qnum_heads_knum_heads_vr   r   r1   r2   r3   r   position_idsr   rJ   )r   FUSED_QK_ROPE_OPr   r   r   r   r   rM   rL   r#   r$   )r/   r0   r1   r2   r3   result
result_qkvr'   s          r(   replacementz/QkNormRopePattern.register.<locals>.replacement   s     )  N - -H!!+&^^B//  F  J ##T[$,$MSU#VVVr*   )
torchTensortuplepmregister_replacementr4   r   rA   fwd_onlyrE   )r'   rF   rZ   rc   s   `   r(   registerzQkNormRopePattern.registeru   sJ   	%	%|	% l	% l		%
 !<	% 5<u|;<	% 	% 	% 	% 	% 	%8	W	W|	W l	W l		W
 !<	W 5<u|;<	W 	W 	W 	W 	W 	W: 	OO++!4  		
 		
 		
 		
 		
r*   )F)__name__
__module____qualname____doc__intfloatboolr)   listrd   re   r4   staticmethodr   r   r   r@   rA   rE   r   rj   r<   r*   r(   r   r      sa        8 !&
 

 
 	

 
 
 
 

 
 
 
4
D. 
 
 
 
& 1bn,-!2>"2D"89 
!R^#	$   \ ux3     \
C
 2 C
t C
 C
 C
 C
 C
 C
r*   r   c                   |     e Zd ZdZededdf fd            Zej        de	j
        ddfd            ZdefdZ xZS )	QKNormRoPEFusionPasszJFuse Q/K RMSNorm + RoPE into fused_qk_norm_rope when the custom op exists.configr   Nc                 H   t                                          |           t          d          | _        |j        j        }|t          j        t          j        fvrt          
                    d|           d S t          |t                    }t          |          dk    rt          
                    d           d S t          t          |                                                    }dD ]}dD ]}t#          j                    rBdD ]>}t'          |j        |j        |j        |||	                              | j                   ?Wt'          |j        |j        |j        ||
                              | j                   |                     || j                   d S )Nqk_norm_rope_fusion_pass)	pass_namez5QK Norm+RoPE fusion not enabled: unsupported dtype %sr   zEQK Norm+RoPE fusion enabled, but no Attention layers were discovered.)gh㈵>gư>)TF)FT)r   r   r   r   r   r   )r   r   r   r   r   )superr)   r   patternsmodel_configdtyperd   bfloat16float16loggerwarning_oncer
   r   lennextitervaluesr   enabledr   r!   r   r   rj   dump_patterns)	r'   rv   r}   attn_layerslayerepsilonneoxr   	__class__s	           r(   r)   zQKNormRoPEFusionPass.__init__   s      ,>0-
 -
 -
 #)777G   F -HI-
 -
 {q  W   FT+,,..//00# 	. 	.G% . ."*,, .+8 2 2)%*_&+o).); '$(,;   #(4=11112 &!&"'/%*%7# $   ht}----%.( 	64=11111r*   graphc                     | j                             |          | _        t                              d| j                   d S )NzFused QK Norm+RoPE on %s sites)r{   applymatched_countr   debug)r'   r   s     r(   __call__zQKNormRoPEFusionPass.__call__   s8    !]00775t7IJJJJJr*   c                 6    t          j        | t                    S r;   )r   hash_sourcer   )r'   s    r(   uuidzQKNormRoPEFusionPass.uuid   s    +D2CDDDr*   )rk   rl   rm   rn   r   r	   r)   r   time_and_logr   Graphr   strr   __classcell__)r   s   @r(   ru   ru      s        TT-2z -2d -2 -2 -2 -2 -2 -2^ "Kbh K4 K K K #"KEc E E E E E E E Er*   ru   ),collections.abcr   typingr   rd   torch._inductor.pattern_matcher	_inductorpattern_matcherrg   r   *torch._higher_order_ops.auto_functionalizer   r   vllm.attention.layerr   vllm.configr	   r
   vllm.loggerr   +vllm.model_executor.layers.rotary_embeddingr   fusionr   r   r   inductor_passr   matcher_utilsr   r   vllm_inductor_passr   r   rk   r   ops_Cfused_qk_norm_ropedefaultr`   r   r   ru   r<   r*   r(   <module>r      s   % $ $ $ $ $        , , , , , , , , ,       J J J J J J > > > > > > * * * * * * ? ? ? ? ? ? ? ? # # # # # # G G G G G G 5 5 5 5 5 5 5 5 5 5 + + + + + + A A A A A A A A H H H H H H H H	X		9<2: IcNNZ
 Z
 Z
 Z
 Z
 Z
 Z
 Z
z9E 9E 9E 9E 9E1 9E 9E 9E 9E 9Er*   