
    -`id+                         d dl Z d dlmZ d dlZd dlmZ d dlmZ d dlm	Z	 ddl
mZ ddlmZ  ee          Z G d	 d
e          ZdS )    N)Iterable)auto_functionalized)init_logger)current_platform   )is_func)VllmInductorPassc                   J   e Zd ZdZej        dej        j        ddfd            Z	dej        j
        eej        j
                 z  ddfdZ	 ddej        j        dej        j
        d	eeej        j
        ez  f         d
eej        j
        ez  df         dz  ddf
dZdej        j
        d	eeej        j
        ez  f         ddfdZdej        j
        deeej        j
        f         fdZ	 ddej        j        dej        j
        d
eej        j
        ez  df         dz  ddfdZdS )FixFunctionalizationPassa"  
    This pass defunctionalizes certain nodes to avoid redundant tensor copies.
    After this pass, DCE (dead-code elimination) should never be run,
    as de-functionalized nodes may appear as dead code.

    To add new nodes to defunctionalize, add to the if-elif chain in __call__.
    graphreturnNc                 
   t          j                    rt                              d           d S g | _        d}|j        D ]}t          |t                    s|j        }|j	        d         }|t          j        j        j        j        k    r|d         }|d         }|                     |          }t          |t           j                  ret          |t           j                  rJ|j	        d         |j	        d         k    r-t          |j	        d         t          j        j        j        j                  rt)          d |                                D                       r|j	        d         j	        d         }	|                                D ]t}
|
j        D ]U}t          |t          j        j        j        j                  r*|                    |	           |                     |           V|                     |
           u|                     ||           |                     |           nddd}|                     |||           n|t          j        j        j        j        k    rddd}|                     |||           nG|t          j        j        j        j        k    rd	dd}|                     |||           n
|t          j        j        j        j        k    rd	d
dd}|                     |||           n|t          j        j        j        j        t          j        j        j         j        fv rdd	i}|                     |||           nwtC          t          j        j"        d          r@|t          j        j"        j#        j        k    r!dddddd}|                     |||           n|t          j        j        j$        j        k    rdd	i}|                     |||d           n|t          j        j        j%        j        k    rdd	i}|                     |||d           ntC          t          j        j        d          r>|t          j        j        j&        j        k    rd	dd}|                     |||d           nA|t          j        j        j'        j        k    r ddi}d}|                     ||||           n|dz  }| (                    |d           tS          | j                  }| j        D ]}|*                    |           t                              d||           | j        +                                 d S )NzBXPU platform does not support fix functionalizationpass currently.r   querykeyc              3   |   K   | ]7}|j         D ]-}t          |t          j        j        j        j                  V  .8d S N)usersr   torchopsatenslice_scatterdefault).0getitem_nodeusers      z/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/compilation/fix_functionalization.py	<genexpr>z4FixFunctionalizationPass.__call__.<locals>.<genexpr>9   sd        ($0$6  !  ein&B&JKK          )r      inputresidualresultscale)r   r      r   &flashinfer_trtllm_fused_allreduce_normallreduce_innorm_out	quant_out	scale_out)r   r   r$         )r"   r    args)r"   r    r#   silu_and_mul_nvfp4_quantresult_block_scale)r"   r/   r    input_global_scaleqkv)r1   num_heads_qnum_heads_knum_heads_vhead_dimepsq_weightk_weightcos_sin_cacheis_neoxposition_ids)mutated_argsr-   before_cleanupz,De-functionalized %s nodes, removed %s nodes),r   is_xpuloggerdebugnodes_to_removenodesr   r   kwargsr-   r   r   _Crotary_embeddingr   getitem_usersoperatorgetitemr   split_with_sizesallvaluesr   r   replace_all_uses_with_removeinsert_defunctionalizeddefunctionalizefused_add_rms_norm#fused_add_rms_norm_static_fp8_quant rms_norm_dynamic_per_token_quantrms_normrms_norm_static_fp8_quanthasattrvllmr%   silu_and_mulsilu_and_mul_quantr.   fused_qk_norm_rope
dump_graphlen
erase_nodeclear)selfr   countnoderC   	at_targetr   r   getitem_nodesmm_noder   user_of_getitemr<   r-   count_removeds                  r   __call__z!FixFunctionalizationPass.__call__   s    "$$ 	LLT   F46K @	 @	D4!455 [F	!IEIL9AAAwUm $ 2 24 8 8 E8#344$DX%566$D 
1!44
1uy~/N/VWW 5  ,9,@,@,B,B     5 $jm03G - 4 4 6 6 + +/3z > >O& /1M1U    > !0 E Eg N N N $_ = = =T****00===LL&&&& (/5#9#9L((lCCCC eil=EEE#*z::$$UD,????eilNVVV#+
;;$$UD,????eilKSSS#+JGG$$UD,????	%-	6>   !"8}$$UD,????	(PQQ<9>HPQ Q &!!""    $$UD,???? eil7??? !8}$$44G %     eil=EEE !8}$$44P %     	&@AA"!F!NNN#+0DEE$$ 	 % 
 
 
 
 eil=EEE !5z $$UD|RV$WWWWQJEE/000 D011( 	# 	#DT"""":E=	
 	
 	
 	""$$$$$r   node_or_nodesc                     t          |t          j        j                  r| j                            |           dS | j                            |           dS )zM
        Stage a node (or nodes) for removal at the end of the pass.
        N)
isinstancer   fxNoderA   appendextend)r^   rg   s     r   rM   z FixFunctionalizationPass._remove   sT     mUX]33 	7 ''66666 ''66666r   r`   r<   r-   .c                     |                      ||           |                     |||           |                     |           dS )z
        De-functionalize a node by replacing it with a call to the original.
        It also replaces the getitem users with the mutated arguments.
        See replace_users_with_mutated_args and insert_defunctionalized.
        r,   N)replace_users_with_mutated_argsrN   rM   )r^   r   r`   r<   r-   s        r   rO   z(FixFunctionalizationPass.defunctionalize   sM     	,,T<@@@$$UDt$<<<Tr   c                    |                      |                                          D ][\  }}||         }t          |t                    r|j        |         n|}|                    |           |                     |           \dS )a7  
        Replace all getitem users of the auto-functionalized node with the
        mutated arguments.
        :param node: The auto-functionalized node
        :param mutated_args: The mutated arguments, indexed by getitem index.
        If the value of an arg is a string, `node.kwargs[arg]` is used.
        N)rF   itemsri   strrC   rL   rM   )r^   r`   r<   idxr   args         r   ro   z8FixFunctionalizationPass.replace_users_with_mutated_args   s     ++D117799 	 	ICs#C&0c&:&:C$+c""C&&s+++LL		 	r   c                 v    i }|j         D ].}t          |t          j                  r|j        d         }|||<   /|S )z
        Returns the operator.getitem users of the auto-functionalized node,
        indexed by the index they are getting.
        r   )r   r   rG   rH   r-   )r^   r`   r   r   rs   s        r   rF   z&FixFunctionalizationPass.getitem_users   sI    
 J 	" 	"DtX-.. "il!c
r   c                 f   t          t                    sJ d d            |                              5  j        d         }||                    |j                   n2t          fd|D                       }|                    ||           ddd           dS # 1 swxY w Y   dS )a>  
        Insert a new defunctionalized node into the graph before node.
        If one of the kwargs is 'out', provide args directly,
        as node.kwargs cannot be used.
        See https://github.com/pytorch/pytorch/blob/a00faf440888ffb724bad413f329a49e2b6388e7/torch/_inductor/lowering.py#L351

        :param graph: Graph to insert the defunctionalized node into
        :param node: The auto-functionalized node to defunctionalize
        :param args: If we cannot use kwargs, specify args directly.
        If an arg is a string, `node.kwargs[arg]` is used.
        z%node must be auto-functionalized, is z insteadr   N)rC   c              3   `   K   | ](}t          |t                    rj        |         n|V  )d S r   )ri   rr   rC   )r   rt   r`   s     r   r   zCFixFunctionalizationPass.insert_defunctionalized.<locals>.<genexpr>  sO        JM
3(<(<EDK$$#     r   r,   )r   r   inserting_beforer-   call_functionrC   tuple)r^   r   r`   r-   functions     `  r   rN   z0FixFunctionalizationPass.insert_defunctionalized   s2   " t011 	
 	
BDBBB	
 	
1
 ##D)) 		9 		9y|H|##HT[#AAAA     QU     ##H4#888		9 		9 		9 		9 		9 		9 		9 		9 		9 		9 		9 		9 		9 		9 		9 		9 		9 		9s   AB&&B*-B*r   )__name__
__module____qualname____doc__r	   time_and_logr   rj   Graphrf   rk   r   rM   dictintrr   rz   rO   ro   rF   rN    r   r   r   r      s         "W%ehn W% W% W% W% #"W%r7UX]Xehm5L%L 7QU 7 7 7 7 8< x~ hm 3 334	
 EHMC',-4 
    HM15c58=3;N6N1O	    
%(- 
Dehm9K4L 
 
 
 
  8<	9 9x~9 hm9 EHMC',-4	9
 
9 9 9 9 9 9r   r   )rG   collections.abcr   r   *torch._higher_order_ops.auto_functionalizer   vllm.loggerr   vllm.platformsr   fx_utilsr   vllm_inductor_passr	   r|   r?   r   r   r   r   <module>r      s     $ $ $ $ $ $  J J J J J J # # # # # # + + + + + +       0 0 0 0 0 0	X		w9 w9 w9 w9 w9/ w9 w9 w9 w9 w9r   