
    fPi=b                         d dl mZ d dlZd dlmZ d dlmZ d dlm	Z	m
Z
mZmZ d dlmZ  ee          Z G d de          ZdS )	    )	getLoggerN)Fusion)FusionUtils)	NodeProtoTensorProtohelpernumpy_helper)	OnnxModelc                   *    e Zd ZdZdef fdZd!dedefdZded	e	defd
Z
dededefdZdededz  fdZdededz  fdZdedefdZdedeeef         de	fdZdededz  fdZdededz  fdZdededz  fdZdedededededefdZd  Z xZS )"FusionMultiHeadAttentionMMDitzO
    Fuse MultiHeadAttention for Multimodal Diffusion Transformer (MMDiT).
    modelc                 `    t                                          |ddg           i | _        d S )NMultiHeadAttentionSoftmax)fused_op_typesearch_op_types)super__init__unsqueeze_update_map)selfr   	__class__s     }/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/onnxruntime/transformers/fusion_mha_mmdit.pyr   z&FusionMultiHeadAttentionMMDit.__init__   s5    .BU^T_```$&!!!    r   
start_nodereturnc                 D   | j                             |g d|ddg|          }|dS |d         }t          |j                  dk    rdS | j                             |j        d                   }|dS t          |j                  dk    rdS t          |d                   S )	a  
        Detect num_heads from Reshape & Transpose of q/k/v for both Stable Diffusion 3.x and Flux 1.x:

                MatMul    .. [-1] [24] ..
                 |        |  |  /   /
                Add     Concat(axis=0)
                  |      /
                  Reshape
                     |
                 Transpose(perm=0,1,3,2)
                     |
               (start_node)
        )	TransposeReshapeConcatr      output_name_to_nodeN      )r   match_parent_pathleninputget_constant_valueshapeint)r   r   r"   input_indexnodesconcat_shapevalues          r   get_num_headsz+FusionMultiHeadAttentionMMDit.get_num_heads   s     
,,:::[!Q<Oex - 
 
 =1Ry|!""a''1
--l.@.CDD=1u{q  158}}r   transpose_kconcat_before_transposec                    |rA| j                             |ddgddg|          }|r|                     |d         |          S n>| j                             |dgdg|          }|r|                     |d         |          S dS )a  
                Detect num_heads from subgraph like the following (num_heads=24 in this example):
                               MatMu    .. [-1] [24] ..
                                 |       |  |  /   /
                                Add     Concat
                                  |      /
                                 Reshape
                                    |
                             Transpose(perm=0,2,1,3)
                                    |
                             SimplifiedLayerNormalization
                                    |
                            Transpose(perm=0,1,3,2)

                Another variant is to an extra Concat node to join two symmetrical subgraphs:

                           |              |
                          MatMul        MatMul   .. [-1] [24] ..
                           |              |       |  |  /   /
                          Add  Concat    Add      Concat
                            |  /          |      /
                          Reshape         Reshape
                            |              |
                         Transpose     Transpose(perm=0,2,1,3)
                            |              |
        SimplifiedLayerNormalization  SimplifiedLayerNormalization
                                |     /
                               Concat
                                 |
                            Transpose(perm=0,1,3,2)

                    Both patterns are used in stable diffusion 3.5 model.
        r   SimplifiedLayerNormalizationr   r    r!   )r   r&   r0   )r   r1   r"   r2   r-   s        r   get_num_heads_from_kz2FusionMultiHeadAttentionMMDit.get_num_heads_from_k:   s    D # 	IJ00h(FG!Qex 1  E  I))%(4GHHHI J00<=sXk 1  E  I))%(4GHHHqr   
input_nameoutput_namec                    d}| j                             |          }|Lt          j        t	          j        g dd          |          }| j                             || j                   t          j	        d||g|g| j         
                    d                    }| j                            |           | j        | j        |j        <   |j        d	         S )
a+  Add a Reshape node to convert 4D BxSxNxH to 3D BxSxD.

        Args:
            input_name (str): input name for the 4D tensor of shape BxSxNxH.
            output_name (str): output name for the 3D tensor of shape BxSxD, where D = N * H.

        Returns:
            str: the output name
        bsnh_to_bsd_reshape_dimsN)r   r   r#   int64)dtype)namer   inputsoutputsr<   r   )r   get_initializerr	   
from_arraynparrayadd_initializerthis_graph_namer   	make_nodecreate_node_namenodes_to_addappendnode_name_to_graph_namer<   output)r   r6   r7   new_dims_namenew_dims	reshape_qs         r   reshape_to_3dz+FusionMultiHeadAttentionMMDit.reshape_to_3dk   s     3:--m<<#.rx


'/R/R/RYfgggHJ&&x1EFFF$. M,,Y77	
 
 
	 	  +++7;7K$Y^4""r   mul_qNc                 .   | j                             |ddgddg          }|dS |\  }}t          j        |dg d          sdS |j        d         |j        d<   |j        d         }|dz   |j        d<   |                     |j        d         |dz             S )	a  
        MultiHeadAttenion requires query in BSD format. This function adjusts query from BNSH to BSD format.

        Before:
                               MatMul
                                 |
                               Add      Concat
                                 |      /
                                 Reshape
                                  |
                               Transpose(perm=0,2,1,3)
                                  |
                       SimplifiedLayerNorm
                                  |
                                 Mul

        After:
                               MatMul
                                 |
                                Add      Concat
                                 |      /
                                 Reshape
                                   |
                           SimplifiedLayerNorm
                                   |
                        Reshape (shape=[0, 0, -1])
        r4   r   r   Npermr   r%   r       _BSNH_BSD)r   r&   r   check_node_attributer(   rK   rO   )r   rP   r"   pathsln_atranspose_a
sln_outputs          r   'adjust_query_from_bnsh_to_bsd_no_concatzEFusionMultiHeadAttentionMMDit.adjust_query_from_bnsh_to_bsd_no_concat   s    : z+++[9F
 

 <4!{/V\\\RR 	4 %*1-A\!_
$w.Q!!%,q/:3FGGGr   c                 2   | j                             |g dg d          }|dS |\  }}}t          |j                  dk    rdS | j                             |ddgddg          }|dS |\  }}t	          j        |d	g d
          sdS t	          j        |d	g d
          sdS t	          j        |dd          sdS |j        d         |j        d<   |j        d         |j        d<   t          j        d|j        d         |j        d         g|j        d         dz   g| j         	                    d          d          }	| j
                            |	           | j        | j        |	j        <   |                     |	j        d         |j        d         dz             S )a  
        MultiHeadAttenion requires query in BSD format. This function adjusts query from BNSH to BSD format.

            Before:
                      MatMul      MatMul
                        |            |
                        Add Concat  Add    Concat
                         |    /      |      /
                         Reshape     Reshape
                            |           |
        Transpose(perm=0,2,1,3)      Transpose(perm=0,2,1,3)
                            |           |
            SimplifiedLayerNorm  SimplifiedLayerNorm
                            |     /
                            Concat(axis=2)
                             |
                            Mul

            After:
                   MatMul        MatMul
                     |              |
                    Add Concat     Add     Concat
                     |    /         |     /
                     Reshape       Reshape
                        |            |
           SimplifiedLayerNorm  SimplifiedLayerNorm
                        |       /
                      Concat(axis=1)
                         |
                      Reshape (shape=[0, 0, -1])
        )r   r4   r   )r   r   r   Nr%   r4   r   r    r   rR   rS   axisr   rU   r>   r?   r<   r^   rV   )r   r&   r'   r(   r   rW   r   rF   rK   rG   rH   rI   rE   rJ   r<   rO   )
r   rP   r"   rX   concatrY   rZ   sln_btranspose_bnew_concat_nodes
             r   adjust_query_from_bnsh_to_bsdz;FusionMultiHeadAttentionMMDit.adjust_query_from_bnsh_to_bsd   s   B z++CCCII
 

 <4%)"{v|!!4z+++[9F
 

 <4!{/V\\\RR 	4/V\\\RR 	4/BB 	4 %*1-A$*1-A *LOU\!_5]1%/0,,X66
 
 
 	  111=A=Q$_%9:!!/"8";V]1=MPV=VWWWr   	unsqueezec                    | j                             |j                  }|Ut          |j                  dk    rGt          j        d|j        |j        d         dz   g| j        	                    d          dg          }nd}| j        
                    |          Dt          j        |t          j        dgdg          }| j                            || j                   t          j        d|j        d         |g|j        d         dz   g| j        	                    d          	          }| j                            |           | j        | j        |j        <   |j        d         }|| j         |j        <   |S )
Nr    	Unsqueezer   rU   r%   )r>   r?   r<   axesunsqueeze_axes_2)r<   	data_typedimsvalsr=   )r   getr<   r'   r(   r   rF   rK   r   rG   r@   make_tensorr   INT64rD   rE   rH   rI   rJ   )r   re   updated_unsqueeze_outputnew_nodeinitializer_nameri   s         r   update_unsqueeze_axes_1_to_2z:FusionMultiHeadAttentionMMDit.update_unsqueeze_axes_1_to_2  s   #'#<#@#@#P#P #+9?##q((!+$?&-a07:;44[AA   $6 :--.>??G'-'9-"-"3SS	( ( ($ J../?AUVVV!+%OA.0@A&-a07:;44[AA	   $$X...:>:ND(7'/q'9$8PD%in5''r   addr"   c                    t          |j                  dk    rdS | j                            |g dg d|          }|dS t	          | j                  }|                    |d                   }||dgk    rdS |                    |d                   }||dgk    rdS | j                            |g dg d|          }|dS |                    |d                   }||dgk    rdS |                    |d                   }||dgk    rdS |                     |d                   |d         j        d<   |                     |d                   |d         j        d<   d	S )
a  
        Update axes of Unsqueeze from [1] to [2] in the following pattern:
                  Unsqueeze        Unsqueeze
                  (axes=[0])       (axes=[0])
                     |              |
                  Unsqueeze        Unsqueeze
              ... (axes=[1])  ...  (axes=[1])
                |     /        |   /
                   Mul         Mul
                    |       /
                     Add
        Args:
            add (NodeProto): the Add node
            output_name_to_node (Dict[str, NodeProto]): mapping from output name to node

        Returns:
            bool: True if the pattern is matched and updated successfully, False otherwise.
        r%   F)Mulrg   rg   )r    r    r   Nr    r   )r   r    r   T)r'   r(   r   r&   r   get_squeeze_or_unsqueeze_axesrs   )r   rt   r"   nodes_bfusion_utilsaxes_1axes_0nodes_as           r   update_unsqueeze_axesz3FusionMultiHeadAttentionMMDit.update_unsqueeze_axes(  s   & sy>>Q5 *..s4U4U4UW`W`W`buvv?5"4:..;;GAJGG>Vs]]5;;GAJGG>Vs]]5 *..s4U4U4UW`W`W`buvv?5;;GAJGG>Vs]]5;;GAJGG>Vs]]5"??
KK
"??
KK
tr   c                    | j                             |g dg d          }|dS |\  }}}}}t          |j                  dk    rdS | j                             |ddgddg          }|dS |\  }	}
t	          j        |d	g d
          sdS t	          j        |
d	g d
          sdS t	          j        |dd          sdS |                     ||          sdS |j        d         |j        d<   |
j        d         |	j        d<   t          j        d|j	        d         |	j	        d         g|j	        d         dz   g| j         
                    d          d          }| j                            |           | j        | j        |j        <   | j                             |j	        d         |j	        d                    |                     |j	        d         |j	        d         dz             S )a3  
        Adjust graph to change query format from BNSH to BSD for Flux model.
        Note that the graph pattern is complex, and we only do a shallow match here.

        Before:
                       |               |
        Transpose(perm=0,2,1,3)    Transpose(perm=0,2,1,3)
                        |              |
        SimplifiedLayerNorm  SimplifiedLayerNorm
                        |             /
                        Concat(axis=2)
                         |
                        Mul     Mul
                         |    /
                          Add
                           |
                          Mul

        After (Transpose nods are removed, and a Reshape is added):

                        |           |
            SimplifiedLayerNorm  SimplifiedLayerNorm
                        |         /
                    Concat(axis=1)
                        |
                        Mul    Mul
                         |    /
                          Add
                           |
                       Reshape (shape=[0, 0, -1])
        )Addrv   r   r4   r   )r   r   r   r   r   Nr%   r4   r   r    r   rR   rS   r^   r   rU   r_   rV   )r   r&   r'   r(   r   rW   r}   r   rF   rK   rG   rH   rI   rE   rJ   r<   replace_input_of_all_nodesrO   )r   rP   r"   rX   rt   _mul_ar`   rY   rZ   ra   rb   rc   s               r   "adjust_flux_query_from_bnsh_to_bsdz@FusionMultiHeadAttentionMMDit.adjust_flux_query_from_bnsh_to_bsd]  s   B z++QQQOO
 

 <426/VVUKv|!!4z+++[9F
 

 <4!{/V\\\RR 	4/V\\\RR 	4/BB 	4 ))#/BCC 	4 %*1-A$*1-A *LOU\!_5]1%/0,,X66
 
 
 	  111=A=Q$_%9:
--fmA.>@VWX@YZZZ!!#*Q-A1GHHHr   c                 t   | j                             |g dg d          }|dS |\  }}}}t          j        |dg d          sdS |                     ||          sdS |j        d         |j        d<   |j        d         dz   |j        d<   |                     |j        d         |j        d         dz             S )	a0  
        Adjust graph to change query format from BNSH to BSD for Flux model.
        Note that the graph pattern is complex, and we only do a shallow match here.

        Before:
                      |
                    Transpose(perm=0,2,1,3)
                      |
                    SimplifiedLayerNorm
                      |
                     Mul     Mul
                       |   /
                       Add
                        |
                       Mul

        After (Transpose is removed, and a Reshape is added):

                        |
                      SimplifiedLayerNorm
                        |
                        Mul   Mul
                         |   /
                         Add
                          |
                       Reshape (shape=[0, 0, -1])
        )r   rv   r4   r   )r   r   r   r   NrR   rS   r   rU   rV   )r   r&   r   rW   r}   r(   rK   rO   )r   rP   r"   rX   rt   r   rY   rZ   s           r   )adjust_flux_single_query_from_bnsh_to_bsdzGFusionMultiHeadAttentionMMDit.adjust_flux_single_query_from_bnsh_to_bsd  s    : z++GGGLL
 

 <4*.'VUK/V\\\RR 	4 ))#/BCC 	4 %*1-A
1/
1!!#*Q-A1GHHHr   qc           	      
   t          j        d|g|dz   g| j                            dd          g d          }| j                            |           | j        | j        |j        <   | 	                    |dz   |dz             S )Nr   rU   Transpose_BNSH_to_BSNH)name_prefixrS   )r<   rR   rV   )
r   rF   r   rG   rH   rI   rE   rJ   r<   rO   )r   r   r"   transpose_qs       r   transpose_reshape_bnsh_to_bsdz;FusionMultiHeadAttentionMMDit.transpose_reshape_bnsh_to_bsd  s    &C[M,,[F^,__
 
 
 	  ---9=9M$[%56!!!g+q6z:::r   kvrK   	num_headsc                     |dk    sJ |||g}|g}t          j        d||| j                            d                    }d|_        |j                            t          j        d|          g           |S )a~  
        Create a MultiHeadAttention node.

        Args:
            q (str): name of q
            k (str): name of k
            v (str): name of v
            output (str): output name of MHA
            num_heads (int): number of attention heads. If a model is pruned, it is the number of heads after pruning.

        Returns:
            NodeProto: the node created.
        r   r   r=   zcom.microsoftr   )r   rF   r   rG   domain	attributeextendmake_attribute)	r   r   r   r   rK   r   
mha_inputsmha_outputsmha_nodes	            r   create_multihead_attention_nodez=FusionMultiHeadAttentionMMDit.create_multihead_attention_node  s    , 1}}}} AY
 h# ,,-ABB	
 
 
 *!!6#8i#P#P"QRRR r   c                    |j         dk    sJ |}| j                            |j        d                   rd S | j                            |g dg d|          }|d S |\  }}}t          j        |dg d          sd S | j                            |g dg d          }	|	d S |	\  }
}}}}}}}|j        d         }||j        d         k    rd S | j                            |
d	d
gddg          }|d S |\  }}|j        d         }t          j        |dg d          sd S | j                            |ddgddg          }|d S |d         j        d         |j        d         k    rd S |j        d         }| j        	                    |dd|          }|y| j        	                    |d
d|          }|d S t          j        |dg d          sd S | j        	                    |d
d|          }|d S t          j        |dg d          sd S n<| j        	                    |d
d|          }|d S t          j        |dg d          sd S |r| 
                    ||          n| 
                    ||d          }|dk    r!|                     |||d u          }|dk    rd S ||                     ||          }n|                     ||          }|F|                     ||          }|.|                     ||          }||                     ||          }|                     ||||j        d         |          }| j                            |           | j        | j        |j        <   | j                            |||g           d| _        d S )Nr   r   )MatMulr   r   )r   r   r   r   rR   rS   )r   rv   SqrtDivr   CastSliceShape)r   r   r    r   r    r   r   r   rv   r   r    )r   r    rT   r%   r   r   r   )r,   r"   )r,   )r   r   r   rK   r   T)op_typer   find_graph_outputrK   match_child_pathr   rW   r&   r(   match_parentr0   r5   rd   r\   r   r   r   r   rH   rI   rE   rJ   r<   nodes_to_remover   prune_graph)r   nodeinput_name_to_nodesr"   softmaxr-   
matmul_s_vtranspose_outreshape_outq_nodes	matmul_qkrP   sqrt_q_2div_qsqrt_q_shape_qq_bnshk_nodesmul_kr1   r   k_scale_nodesr   concat_vtranspose_1transpose_2r   queryrq   s                                 r   fusez"FusionMultiHeadAttentionMMDit.fuse  s   |y(((( :''q(9:: 	F
++7779Q9Q9QSf
 
 =F16.
M;/v|||TT 	F*..NNN$$$
 
 ?FCJ@	5(E61aQW]1%%%F*..y5+:NQRTUPVWW?F${a /V\\\RR 	F
44UVUOaQRVTT F!!$q(999FQ :**:xQdw*xx *11+1J] 2  K "3KVV *11+1J] 2  K "3KVV 
 *11KQL_ 2  K "3KVV 
 TDx)<===##J0CQR#SS 	 >>11+?RT\dhThiiIA~~ 66u>QRREE@@H[\\E=;;ECVWWE}FFuNabb= !>>vGZ[[E77%a( 8 
 
 	  ***6:6J$X]3##Z$LMMM  r   )r   )__name__
__module____qualname____doc__r
   r   r   r+   r0   boolr5   strrO   r\   rd   rs   dictr}   r   r   r   r   r   __classcell__)r   s   @r   r   r      s|        'i ' ' ' ' ' ' 	 Z]    B/	 /im /ru / / / /b# ## ## # # # #4.HY .H`cfj`j .H .H .H .H`MX9 MXVY\`V` MX MX MX MX^"(i "(C "( "( "( "(H3 3cS\nI] 3bf 3 3 3 3jRI	 RI[^ae[e RI RI RI RIh1Iy 1Ibehlbl 1I 1I 1I 1If;s ;CRVJ ; ; ; ;)) ) 	)
 ) ) 
) ) ) )V             r   r   )loggingr   numpyrB   fusion_baser   ry   r   onnxr   r   r   r	   
onnx_modelr
   r   loggerr    r   r   <module>r      s   
                 $ $ $ $ $ $ = = = = = = = = = = = =            	8		K
  K
  K
  K
  K
 F K
  K
  K
  K
  K
 r   