
    fPi                         d dl mZ d dlZd dlmZ d dlmZ d dlm	Z	m
Z
 d dlmZmZmZmZ d dlmZ  ee          Z G d d	          Z G d
 de          ZdS )    )	getLoggerN)Fusion)AttentionMaskFormat)FusionUtilsNumpyHelper)	NodeProtoTensorProtohelpernumpy_helper)	OnnxModelc                   L    e Zd ZdZdefdZdefdZd Zd Z	de
d	e
d
z  fdZd
S )AttentionMask:
    Fuse Attention subgraph into one Attention node.
    modelc                     || _         i | _        i | _        t          |          | _        t
          j        | _        |                                | _	        d S N)
r   mask_indicemask_castedr   utilsr   MaskIndexEndmask_formatget_opset_versionopset_version)selfr   s     }/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/onnxruntime/transformers/fusion_attention.py__init__zAttentionMask.__init__   sM    
 ''
.;"4466    r   c                     || _         d S r   )r   )r   r   s     r   set_mask_formatzAttentionMask.set_mask_format    s    &r   c                 R    || j         v r|| j         |         k    sJ || j         |<   d S r   )r   )r   mask
mask_indexs      r   set_mask_indicezAttentionMask.set_mask_indice#   s<    4###!1$!77777!+r   c                 x    t          | j                  dk    sJ t          t          | j                            S Nr   )lenr   nextiter)r   s    r   get_first_maskzAttentionMask.get_first_mask(   s7    4#$$q((((D)**+++r   mask_2dreturnNc           	      ^   | j         t          j        k    rd S || j        v r| j        |         S | j                            |          r| j                            |          \  }}n| j                            |          \  }}d}|r
|| j	        |<   | j         t          j
        k    r|| j        |<   |S | j                            d          }| j        dk     rwt          j        d|g|g| j                            dd                    }|j                            t          j        ddg          t          j        d	d
          g           nd}| j                            |          =| j                            t          j        |t*          j        dgdgd                     t          j        d||g|g| j                            dd                    }|j                            t          j        d	d
          g           | j                            |           || j        |<   |S )NTr"      	ReduceSumMaskReduceSuminputsoutputsnameaxes   keepdimsr   ort_const_1_reduce_sum_axesFr3   	data_typedimsvalsraw)r   r   NoMaskr   r   find_graph_inputr   cast_graph_input_to_int32cast_input_to_int32r   r   create_node_namer   r
   	make_node	attributeextendmake_attributeget_initializeradd_initializermake_tensorr	   INT64add_node)r   r*   casted
input_name
_cast_nodeoutput_namemask_index_node	axes_names           r   process_maskzAttentionMask.process_mask,   sW   29994d&&&#G,, :&&w// 	!%!E!Eg!N!NFJJ%)Z%C%CG%L%L"J
F 	3(2DW% 2@@@(2DW% j11,??""$."|$Z00oNN	  O %,,f.CFQC.P.PRXRghrtuRvRv-wxxxx 6Iz)))44<
**&&"-"3SS!     %."I.$Z00oNN	  O %,,f.CJPQ.R.R-STTT
O,,,$/!r   )__name__
__module____qualname____doc__r   r   r   r   r#   r)   strrQ    r   r   r   r      s         7i 7 7 7 7'+> ' ' ' ', , ,
, , ,8C 8C$J 8 8 8 8 8 8r   r   c            (       X    e Zd ZdZdddddgfdededed	edz  d
ededee	         f fdZ
dedeeef         fdZdedeeef         fdZdefdZde	fdZde	de	de	fdZde	de	de	fdZdededz  dedz  d e	dedz  f
d!Zd"ed#ed$edededz  dedz  deeeef         fd%Z	 	 	 	 	 	 	 	 d5d"ed#ee	z  dz  d$ee	z  dz  dededz  dedz  deded'e	d(e	de	d)ede	de	d*e	d+e	d,ededz  f$d-Z	 	 	 	 	 	 	 d6d.e	dz  d"ed#ed$edededededed/e	d'e	d0e	d1ede	de	d*e	d+e	d2edz  dedz  f&d3Zd4 Z xZS )7FusionAttentionr   NFSkipLayerNormalizationLayerNormalizationr   hidden_size	num_headsattention_maskuse_multi_head_attention!disable_multi_head_attention_biassearch_op_typesc                    |rdnd}t                                          |||           || _        || _        |r|nt	          |          | _        || _        || _        d | _        d| _	        d| _
        d | _        d| _        d S )NMultiHeadAttention	AttentionT)superr   r\   r]   r   r^   r_   r`   mask_filter_valuenum_heads_warninghidden_size_warningshape_infershape_infer_done)
r   r   r\   r]   r^   r_   r`   ra   attention_op_name	__class__s
            r   r   zFusionAttention.__init__l   s     5M]00R] 1?CCC&"0>XnnMRWDXDX(@%1R.!% "&#'  $r   concatr+   c                    t          |j                  dk    r| j                            |j        d                   }| j                            |j        d                   }t	          |t
          j                  rI|j        dk    r>t	          |t
          j                  r$|j        dk    r|d         |d         |d         z  fS | j        | j	        fS )aU  
        Detect num_heads and hidden_size from Concat node in the following subgraph:

        SkipLayerNormalization or EmbedLayerNormalization
                        /        |
                     MatMul    Shape
                        |        |
                       Add     Gather(indices=0)
                        |        |
                        |      Unsqueeze
                        |        |
                        |     Concat (*, -1, 12, 64)
                        |     /
                       Reshape
                          |
                       Transpose
                 r5   r   )
r&   inputr   get_constant_value
isinstancenpndarraysizer]   r\   )r   rm   r]   	head_sizes       r   )get_num_heads_and_hidden_size_from_concatz9FusionAttention.get_num_heads_and_hidden_size_from_concat   s    $ v|!!
55fl1oFFI
55fl1oFFI9bj11ANa''y"*55 (Na'' |Yq\IaL%@@@~t///r   	reshape_qc                 T   | j                             |j        d                   }|q| j                             |d          }| |j        dk    r|                     |          S t                              d|j        d                    | j        | j	        fS t          |t          j                  r+t          |          dk    s|d         dk    s|d         dk    r)t                              d	|           | j        | j	        fS |d         }|d         }||z  }| j        dk    r:|| j        k    r/| j        r(t                              d
| j        |           d| _        | j	        dk    r:|| j	        k    r/| j        r(t                              d| j	        |           d| _        ||fS )zDetect num_heads and hidden_size from a reshape node.

        Args:
            reshape_q (NodeProto): reshape node for Q

        Returns:
            Tuple[int, int]: num_heads and hidden_size
        r5   NConcatz%s is not initializer.ro   rp   r   rq   zGq_shape_value=%s. Expected value are like [0, 0, num_heads, head_size].z>--num_heads is %d. Detected value is %d. Using detected value.Fz@--hidden_size is %d. Detected value is %d. Using detected value.)r   rs   rr   
get_parentop_typery   loggerdebugr]   r\   rt   ru   rv   r&   rg   warningrh   )r   rz   q_shape_valuerm   r]   rx   r\   s          r   get_num_heads_and_hidden_sizez-FusionAttention.get_num_heads_and_hidden_size   s    
55ioa6HII Z**9a88F!fn&@&@EEfMMMLL19?13EFFF>4#333 M2:66	4=!!Q&&a A%%q)9Q)>)>LLbdqrrr>4#333!!$	!!$	)+>A)t~"="=% /TVZVdfo   */&aK43C$C$C' 1VX\Xhju   ,1(+%%r   add_qkc                    | j         s'| j                            d          | _        d| _         | j        d S | j                            |j        d                   }| j                            |j        d                   }||t                              d|           d S ||k    rt                              d|           d S |j        d         S )NT)updater   r5   zone of the inputs of %s is Nonez)the shape of two inputs of %s is not same)rj   r   infer_runtime_shaperi   get_edge_shaperr   r   r   )r   r   input_0_shapeinput_1_shapes       r   get_add_qk_strzFusionAttention.get_add_qk_str   s    $ 	)#z==T=JJD$(D!#4(77QHH(77QHH M$9LL:FCCC4M))LLDfMMM4|Ar   c                    dz   t          t          fd| j                            }t          |          dk    rS t          |          dk    sJ | j                            d          }t          j        dfdt          | j	                  D             g|d          }| j        
                    |           | j        | j        |<   S )N_maskc                 &    | j         d         k    S r%   )output)nodemask_output_names    r   <lambda>z0FusionAttention.reshape_add_qk.<locals>.<lambda>   s    t{1~AQ/Q r   r5   r   r|   c                     g | ]}S rW   rW   ).0_r   s     r   
<listcomp>z2FusionAttention.reshape_add_qk.<locals>.<listcomp>   s    :::qF:::r   r1   r2   r3   axis)listfilternodes_to_addr&   r   rA   r
   rB   ranger]   appendthis_graph_namenode_name_to_graph_name)r   r   concat_nodeconcat_node_nameconcat_add_qk_fp32r   s    `   @r   reshape_add_qkzFusionAttention.reshape_add_qk   s     "G+ 6"Q"Q"Q"QSWSdeeff{q  ##;1$$$$:66x@@#-::::E$.$9$9:::%&!
 
 
 	  !34449=9M$%56r   past_kpast_vc                     | j                             d          }| j                             d          }|dz                       dd          }|dz                       dd          }t          j        d|g|g|dg          }t          j        d|g|g|dg          }| j                            |           | j                            |           | j        | j        |<   | j        | j        |<   | j                             d          }	|                    dd	                              dd                              d
d          }
t          j        d||g|
g|	d          }| j                            |           | j        | j        |	<   |
S )zConcatenate past_k and past_v inputs to create past_kv input.

        Args:
            past_k (str): name of past K value
            past_v (str): name of past V value

        Returns:
            kv_output_name (str): name of past KV value
        	Unsqueeze_5d.r   r   )r1   r2   r3   r4   r|   z.valuez.kv_value_kvr   )	r   rA   replacer
   rB   r   r   r   r   )r   r   r   unsqueeze_k_nameunsqueeze_v_name	k_5d_name	v_5d_namek_5dv_5dr   kv_output_name	concat_kvs               r   r   zFusionAttention.concat_kv  s     :66{CC:66{CCe^,,S#66	e^,,S#66	8K!
 
 
 8K!
 
 
 	  &&&  &&&9=9M$%569=9M$%56  :66x@@%88@@cJJRRS[]bcc$y)#$!
 
 
	 	  +++9=9M$%56r   present_k_namepresent_v_namekv_nodec                 $   d\  }}| j                             |          }| j                             |          }|Jt          j        t	          j        dd          |          }| j                             || j                   |Jt          j        t	          j        dd          |          }| j                             || j                   | j                             d          }| j                             d          }	t          j
        d||g|g|d	          }
t          j
        d||g|g|	d	          }| j                            |
           | j                            |           | j        | j        |<   | j        | j        |	<   dS )
a?  Split kv_node containing present KV values into separate present K and present V values.

        Args:
            present_k_name (str): name of output to store present K value in
            present_v_name (str): name of output to store present V value in
            kv_node (str): name of present KV values
        )index_0index_1Nr   int64)dtyper3   r5   Gatherr   )r   rF   r   
from_arrayru   arrayrG   r   rA   r
   rB   r   r   r   )r   r   r   r   k_indexv_indexk_dimv_dimgather_k_namegather_v_name	present_k	present_vs               r   split_kvzFusionAttention.split_kv8  s    0
**733
**733= +BHQg,F,F,FWUUUEJ&&ud.BCCC= +BHQg,F,F,FWUUUEJ&&ud.BCCC 
33H==
33H==$W%#$
 
 
	 $W%#$
 
 
	 	  +++  +++6:6J$]36:6J$]333r   q_addk_addv_addname_prefixc                 D   | j                             |j        d                   p$| j                             |j        d                   }t          j        |          }t          j        |          }t          j        |          }|^| j                             |j        d                   p$| j                             |j        d                   }	t          j        |	          }|^| j                             |j        d                   p$| j                             |j        d                   }
t          j        |
          }t          j        |||fd          }dt          j        |j	                  z  }|dz   }| 
                    ||j        |g|           |S )Nr5   r   r   rq   	_qkv_biasr3   r9   r:   r;   )r   rF   rr   r   to_arrayru   
zeros_likestackprodshaperG   r9   )r   r   r   r   r   q_biasqbkbvbk_biasv_biasqkv_biasqkv_bias_dim	bias_names                 r   create_combined_qkv_biasz(FusionAttention.create_combined_qkv_biase  s|    ++EKN;;itz?Y?YZ_ZefgZh?i?i!&))]2]2Z//A??m4:C]C]^c^ijk^lCmCmF%f--BZ//A??m4:C]C]^c^ijk^lCmCmF%f--B8RRLq1112728,,,+-	&	 	 	
 	
 	
 r   q_matmulk_matmulv_matmulc                    | j                             d          }|j        d         |j        d         k    r|j        d         |j        d         k    sJ | j                             |j        d                   }| j                             |j        d                   }	| j                             |j        d                   }
t	          j        |          }t	          j        |	          }t	          j        |
          }|j        |j        k    r|j        |j        k    sJ |j        d         }t          j        |||fd          	                    |d|z  f          }|dz   }| 
                    ||j        |j        d         |j        d         g|           |dz   }t          j        d|j        d         |g|g|	          }| j        | j        |<   |g}|d
z   }| 
                    |t           j        dgdgd           |dz   }| 
                    |t           j        dg|gd           |dz   }| 
                    |t           j        dgd|z  gd           |dz   }| 
                    |t           j        dgd|z  gd           |dz   }| 
                    |t           j        dgdgd           |dz   }t          j        d||||g|g| j                             d          	          }| j        | j        |j        <   |dz   }t          j        d||||g|g| j                             d          	          }| j        | j        |j        <   |dz   }t          j        d||||g|g| j                             d          	          }| j        | j        |j        <   |}|} |}!|                    |||g           | j        r|| j                             |j        d                   rdnd}"t          j        t	          j        | j                             |j        |"                                       r8||j        d|"z
  <   |}|                    |           | j        | j        |j        <   || j                             |j        d                   rdnd}"t          j        t	          j        | j                             |j        |"                                       r8||j        d|"z
  <   |} |                    |           | j        | j        |j        <   || j                             |j        d                   rdnd}"t          j        t	          j        | j                             |j        |"                                       r8||j        d|"z
  <   |}!|                    |           | j        | j        |j        <   | j                            |           || |!fS )a  Create packed QKV MatMul node before MultiHeadAttention node.
           This is for the scenario where an Attention node should be created but cannot be created
           because past_key and past_value are separate inputs and not one concatenated input.

        Args:
            q_matmul (NodeProto): name of MatMul from Q path - (batch_size, sequence_length, hidden_size)
            k_matmul (NodeProto): name of MatMul from K path - (batch_size, sequence_length, hidden_size)
            v_matmul (NodeProto): name of MatMul from V path - (batch_size, sequence_length, hidden_size)
            q_add (NodeProto): name of Add from Q path
            k_add (NodeProto): name of Add from K path
            v_add (NodeProto): name of Add from V path

        Returns:
             q_output (NodeProto): Slice node for Q
             k_output (NodeProto): Slice node for K
             v_output (NodeProto): Slice node for V
        MatMulr   r5   r   rq   _qkv_weightr   _qkv_outr0   _q_start_indexFr8   _k_start_index_v_start_indexrp   _end_of_qkv_index_qkv_last_axis_q_outSlice_k_out_v_out)r   rA   rr   rF   r   r   r   ru   r   reshaperG   r9   r
   rB   r   r   r	   rI   r3   rD   r`   anyr   r   )#r   r   r   r   r   r   r   matmul_node_nameq_weightk_weightv_weightqwkwvwd
qkv_weightqkv_weight_nameqkv_matmul_output
qkv_matmul	qkv_nodesq_slice_namek_slice_namev_slice_nameend_of_qkv_nameqkv_last_axis_nameq_slice_outputq_slicek_slice_outputk_slicev_slice_outputv_sliceq_outputk_outputv_outputinitializer_inputs#                                      r   create_packed_qkv_matmul_nodez-FusionAttention.create_packed_qkv_matmul_node  sP   4  :66x@@ ~a HN1$555(.:Kx~^_O`:`:`:`` :--hnQ.?@@:--hnQ.?@@:--hnQ.?@@!(++!(++!(++x28##BH(<(<(<<HQKXr2rl333;;QAJGG
*]: ("1%z'7':;	 	 	
 	
 	
" -z9%N1%7&'!	
 
 

 :>9M$%56L	 (*::,+:KSTRU]^\_ejkkk'*::,+:KSTRU]^\_ejkkk'*::,+:KSTRU]^ab]b\cinooo*-@@/[=NVWUX`ade`e_flqrrr-0@@"4@QYZX[cebflqrrr)H4"%|\CUV#$,,W55	
 
 
 6:5I$W\2)H4"%|\CUV#$,,W55	
 
 
 6:5I$W\2)H4"%|_FXY#$,,W55	
 
 
 6:5I$W\2'7G45551 	T )-)C)CEKPQN)S)S$ZAAYZ!6+.tz/I/I%+VgJh/i/ijjkk T9GEK$5 56$H$$U+++?C?SD0< )-)C)CEKPQN)S)S$ZAAYZ!6+.tz/I/I%+VgJh/i/ijjkk T9GEK$5 56$H$$U+++?C?SD0< )-)C)CEKPQN)S)S$ZAAYZ!6+.tz/I/I%+VgJh/i/ijjkk T9GEK$5 56$H$$U+++?C?SD0< 	  +++8++r    r   key_padding_maskunidirectionalr   r   
packed_qkvc                 z   |dk    sJ |dk    r'||z  dk    rt                               d||           dS d | j                                        j        D             }| j                            d          }g }|rY|                     ||||||          \  }}}|                    |j        d         |j        d         |j        d         g           n(t          |t                    rt          |t                    r{| j        r:|                    |j        d         |j        d         |j        d         g           n|                    |j        d         |j        d         |j        d         g           nt          |t                    rlt          |t                    rW||v rS||v rO| j        r$|                    |j        d         ||g           n&|                    |j        d         ||g           ndS | j        s.|                     ||||          }|                    |           n|                    d           |r|r|                    |
|||g           n|
s|r|                    |
|g           |	g}|r|r|                    ||g           t          j        d|||          }d	|_        |j                            t          j        d
|                     |r:|j                            t          j        dt)          |                               |                     d           |S )a  Create a MultiHeadAttention node.

        Args:
            q_matmul (NodeProto): name of MatMul from Q path - (batch_size, sequence_length, hidden_size)
            k_matmul (NodeProto): name of MatMul from K path - (batch_size, sequence_length, hidden_size) or (batch_size, num_heads, past_sequence_length, head_size)
            v_matmul (NodeProto): name of MatMul from V path - (batch_size, sequence_length, hidden_size) or (batch_size, num_heads, past_sequence_length, head_size)
            q_add (NodeProto): name of Add from Q path
            k_add (NodeProto): name of Add from K path
            v_add (NodeProto): name of Add from V path
            num_heads (int): number of attention heads. If a model is pruned, it is the number of heads after pruning.
            hidden_size (int): hidden dimension. If a model is pruned, it is the hidden dimension after pruning.
            output (str): output name of MHA
            key_padding_mask (str): name of key padding mask
            add_qk (str): name of add after Q x K'
            unidirectional (bool): whether to apply causal attention mask automatically or not
            past_k (str): name of past K value - (batch_size, num_heads, past_sequence_length, head_size)
            past_v (str): name of past V value - (batch_size, num_heads, past_sequence_length, head_size)
            present_k (str): name of present K value - (batch_size, num_heads, sequence_length, head_size)
            present_v (str): name of present V value - (batch_size, num_heads, sequence_length, head_size)
            packed_qkv (bool): whether to combine MatMuls from Q, K, V paths
                               Note: This is for the scenario where an Attention node should be created but cannot be created
                               because past_key and past_value are separate inputs and not one concatenated input.

        Returns:
            Union[NodeProto, None]: the node created or None if failed.
        r   9input hidden size %d is not a multiple of num of heads %dNc                     h | ]	}|j         
S rW   r   )r   r   s     r   	<setcomp>zBFusionAttention.create_multihead_attention_node.<locals>.<setcomp>H  s    LLL4TYLLLr   rd   r  rc   r0   com.microsoftr]   r  )r   r   r   graphrr   rA   r  rD   r   rt   r   r`   rV   r   r   r
   rB   domainrC   rE   intincrease_counter)r   r   r   r   r   r   r   r]   r\   r   r  r   r  r   r   r   r   r  graph_input_namesmha_node_name
mha_inputsr  r	  r  r   mha_outputsmha_nodes                              r   create_multihead_attention_nodez/FusionAttention.create_multihead_attention_node  s   ^ 1}}}}??i 7A==LLTVaclmmm4LL4:3C3C3E3E3KLLL
33K@@ 
 	(,(J(J) )%GWg w~a0'.2CW^TUEVWXXXX),, 	Hi1P1P 	5 `!!5<?HOA4FUV"XYYYY!!8?1#5xq7I8?[\K]"^____x%%	8S))	 ------5 L!!5<?Hh"GHHHH!!8?1#5x"JKKKK4 5 	"55eUE=YYIi((((b!!!  	:f 	:/HIIII 	: 	:/8999 h 	7 	7	95666# 	
 
 
 *!!&"7Y"O"OPPP 	d%%f&;<LcR`NaNa&b&bccc2333r   r"   first_input
add_qk_strcausalscalec                    |dk    sJ |	dk    r'|	|z  dk    rt                               d|	|           dS d}|||d}| j                            |j        d                   }| j                            |j        d                   }| j                            |j        d                   }d\  }}}|r| j                            |j        d                   p$| j                            |j        d                   }| j                            |j        d                   p$| j                            |j        d                   }| j                            |j        d                   p$| j                            |j        d                   }|r|r|r|sdS |t          |j        d          d           dS t          j        |          }t          j        |          }t          j        |          }|j        |j        k    sJ |j        d         }|j        d         }|j        d         }||cxk    r|k    sn J |	dk    r"|	|k    rt           	                    d	|	|           d} |j        |j        k    rd} t          j        |j        dd                   }!t          j        |j        dd                   }"t          j        |j        dd                   }#d}$| r"t          j        |||fd
          }%|!|"z   |#z   }$nt          j        |||fd
          }%d|!z  }$d}&d}'|rt          j        |          }(t          j        |          })t          j        |          }*t          j        |(j                  }+t          j        |)j                  },t          j        |*j                  }-|+|,cxk    r|!k    sn J |-|#k    sJ | r"t          j        |(|)|*fd
          }'|+|,z   |-z   }&nt          j        |(|)|*fd
          }'d|+z  }&| j                            d          }.| j        s0|                     |.dz   |j        |t%          |$          g|%           |r/|                     |.dz   |j        t%          |&          g|'           | j        r|rt                               d           dS |j        d         |j        d         |j        d         |.dz   g}/||/                    |           t+          j        d|/|g|.          }0|                     d           n9|
|.dz   |r|.dz   ndg}/||/                    |           n|/                    d           |o|}1|1r+|                     ||          }2|/                    |2           |r,|1s|/                    d           |/                    |           |g}3|rl|rj|                    dd                              dd                              dd          }4|3                    |4           |                     |||4           t+          j        d|/|3|.          }0|                     d           d|0_        |0j                            t+          j        d|          g           |r.|0j                            t+          j        dd          g           |.|0j                            t+          j        d|          g           | r1|0j                            t+          j        d|!|"|#g          g           | j        @|0j                            t+          j        dtA          | j                            g           |0S )a>  Create an Attention node.

        Args:
            mask_index (str | None): mask input
            q_matmul (NodeProto): MatMul node in fully connection for Q
            k_matmul (NodeProto): MatMul node in fully connection for K
            v_matmul (NodeProto): MatMul node in fully connection for V
            q_add (NodeProto): Add bias node in fully connection for Q
            k_add (NodeProto): Add bias node in fully connection for K
            v_add (NodeProto): Add bias node in fully connection for V
            num_heads (int): number of attention heads. If a model is pruned, it is the number of heads after pruning.
            hidden_size (int): hidden dimension. If a model is pruned, it is the hidden dimension after pruning.
            first_input (str): first input name
            output (str): output name
            add_qk_str (str): name of Add node after Q x K'
            causal: whether it is uni-directional mask.
            past_k (str): name of input for past K value
            past_v (str): name of input for past V value
            present_k (str): name of output to store present K value
            present_v (str): name of output to store present V value
            scale: scale before softmax

        Returns:
            Union[NodeProto, None]: the node created or None if failed.
        r   r  NTFr5   )NNNzl is not an initializer. Please set do_constant_folding=True in torch.onnx.export to unblock attention fusionzInput hidden size (%d) is not same as weight matrix dimension of q,k,v (%d). Please provide a correct input hidden size or pass in 0r   rq   rd   r   r   r   zVMultiHeadAttention does not support relative_position_bias: cannot fuse the attention.rc   r0   r  z.key_keyr   r   r  r]   r  r'  qkv_hidden_sizesrf   )!r   r   r   rF   rr   printr   r   r   r   ru   r   concatenater   rA   r_   rG   r9   r  r   r   r
   rB   r  r   r   r   r  rC   rD   rE   rf   float)5r   r"   r   r   r   r   r   r   r]   r\   r$  r   r%  r&  r   r   r   r   r'  has_biasr   r   r   r   r   r   r   r   r   
qw_in_size
kw_in_size
vw_in_sizeis_qkv_diff_dimsqw_out_sizekw_out_sizevw_out_sizeqkv_weight_dimr   r   r   r   r   r   q_bias_shapek_bias_shapev_bias_shapeattention_node_nameattention_inputsattention_nodepast_existspast_kvattention_outputs
present_kvs5                                                        r   create_attention_nodez%FusionAttention.create_attention_node  sJ   \ 1}}}}??i 7A==LLTVaclmmm4=U]u}H:--hnQ.?@@:--hnQ.?@@:--hnQ.?@@!1 	Z//A??m4:C]C]^c^ijk^lCmCmFZ//A??m4:C]C]^c^ijk^lCmCmFZ//A??m4:C]C]^c^ijk^lCmCmF  f  t>!$ g g g   4!(++!(++!(++ x28####Xa[
Xa[
Xa[
Z5555:555555??{j88NNJ	   !8rx#
 gbhqrrl++gbhqrrl++gbhqrrl++ 	-R1===J(;6DNN2r2,Q777J_N&* 	0%f--B%f--B%f--B728,,L728,,L728,,L<>>>>;>>>>>>;.... 0>2r2,Q???+l:\I8RRLq999 </"j99+FF, 	  (=8", #n"5"56	 !     	  (;6 *,''(	 !    ( 9	/ uvvvt """#k1	  % ''
333#-$'(	  N !!"67777 #m35=E#k112 
 % ''
3333 ''+++ +VK 1..88 ''000 4" 0$++B/// ''
333!' @Y @&..vr::BB62NNVVWZ\_``
!((444iJ???#-')(	  N !!+... / '')>{I)V)V(WXXX 	Z$++V-BCSUV-W-W,XYYY$++V-B7E-R-R,STTT 	$++&'9KVa;bccd   !-$++V-BCVX]^b^tXuXu-v-v,wxxxr   c                 @   |}|}|j         dk    r#| j                            |dd          }||}nd S | j                            |g dg d          }d }|	|\  }	}	}
}}n,| j                            |g dg d          }||\  }	}}}nd S g }t	          |j                  D ]7\  }}||vr
||d         j        d         k    r"|                    |           8t          |          dk    rd S |d         }| j                            |d	d          }|p||j        d                  }|6t          |          d
k    r#|d         }|j         dk    r|j        d         }nWd S |!t          |          dk    r|j        d         }n2d S |j         dk    r%||         }|D ]}|j         dk    r|j        d         }||         }|j         dk    r%t          |j                  dk    r|j        d         }||         }d |D             }|	                    d          dk    rd S | j                            |g dg d          }|t                              d           d S |\  }	}	}}d}d}d}d}g dg dfg dg dfg dg dfg dg dfg dg dfg dg dfd }d } |                                D ]S\  }!}"| j                            ||"d         |"d                   } | 0|!d!k    rd"}n|!d#k    rd"}n|!d$k    rd"}n|!d%k    rd"} | t                              d&           d S d }#d }$d }%d }&|r| \  }	}%}$}	n%|r| \  }	}#}%}$n|r| \  }	}	}$n|r	| \  }	}#}$}&}	n| \  }	}#}	}$|&p|$}&| j                            |&g dg d'          }'|'>| j                            |&g d(g d)          }'|'t                              d*           d S |'d+         }(|'d,         })|'d-         }*|$}+|rC| j                            |$d	d.gdd g          },|,t                              d/           d S |,\  }+}	| j                            |+g d|rdndddd g          }-|->| j                            |$g d0g d1          }-|-t                              d2           d S |-d,         }.|-d-         }/d }0d3}1|r6| j                            |%g d4g dfg d5g dfg d6g d7fg|          \  }	}0}	n|re| j                            |%g d8g d7fg d5g dfg|          \  }	}0}	|#4|                     |#          }1|1t                              d9|#           d S n?|rn<| j                            |#g d:g d;fg d<g d=fg d>g d?fg d@g dAfg|          \  }	}0}	|s|0t                              dB           d S |st          |0          dk    r| j                            |0d                   \  }	}2|28t!          |2t"          j                  r|2j        dk    rt)          |2          dk    rd S t)          |2          dCk    rt)          |2          | _        |j        d         |k    r]|*j        d         |k    rM|/j        d         |k    r=|s+| j                            |0d-         j        d                   nd }3||
n|}4|                     |(          \  }5}6|5dk    s|6dk    rt                              dD           d S |                     |3|*|/||)|.||5|6||4j        d         |1E          }7|7d S | j                            |7           | j        | j        |7j        <   ||j        d         }8dF|8z   }9|                     dG|8z   t@          j!        dgdd|5tE          |6|5z            gdH          }:| j        #                    tI          j%        dI|4j        d         |:j        g|9gdJ|8z             | j                   |9|j        d<   | j&        '                    |4||g           | j&        '                    |            | j&        '                    | j(        s|'n	|'d d-                    | j&        '                    | j(        s|-n	|-d d-                    | j&        '                    | j(        s|n	|d d-                    d"| _)        d S d S d S d S )KNr[   Addr   )rC  r   Reshape	Transposer   )NNr   r   r   )rC  EinsumrE  r   )r5   Nr   r   r5   Mulrp      rZ   ro   c                     g | ]	}|j         
S rW   )r~   )r   childs     r   r   z(FusionAttention.fuse.<locals>.<listcomp>  s    >>>E%->>>r   r   rq   )rE  rD  rC  r   )r5   r   r   Nz&fuse_attention: failed to match v pathF)SoftmaxrC  Divr   )r   r   Nr   )rK  rC  rG  r   )rK  Wherer   rL  )r   r   rp   r   )rK  rC  rM  r   )r   r   r   rp   )rK  rL  r   )r   r   r   )rK  rC  r   rG  Sqrt)r   r   Nr   r5   )path1path2path3path4path5sdparQ  TrR  rS  rT  z'fuse_attention: failed to match qk path)r   r   r   N)rL  rE  rD  rC  r   )r   r   r   r   Nz&fuse_attention: failed to match q pathr   rN  z/fuse_attention: failed to match mul sqrt q path)rE  rE  rD  rC  r   )r5   r   r   r   Nz&fuse_attention: failed to match k pathr  )ExpandrD  Equal)rX  r   r   )CastrW  rD  rX  )r   r   r   r   )rY  rX  r   r   z6fuse_attention: failed to verify shape inference of %s)rG  SubrY  r   r   )Nr   r5   r   r   )rG  rZ  r   r   )Nr   r5   r   )rM  rY  rZ  rW  r   r   )Nr   r   r5   r   r   )rM  rY  rZ  rY  rW  r   r   )Nr   r   r5   r   r   r   z)fuse_attention: failed to match mask pathizmFailed to detect num_heads and hidden_size for Attention fusion. Please specify those parameters in argument.)r"   r   r   r   r   r   r   r]   r\   r$  r   r%  edge_modified_shape_modified_tensorr8   rD  reshape_modified_)*r~   r   match_parentmatch_parent_path	enumeraterr   r   r   r&   countr   r   itemsmatch_parent_pathsr   get_constant_inputrt   ru   rv   rw   r-  rf   r^   rQ   r   r   rA  r   r   r   r3   rG   r	   rI   r  rJ   r
   rB   nodes_to_removerD   r_   prune_graph);r   r   input_name_to_nodesoutput_name_to_nodenormalize_node
start_nodeadd_before_layernormr   einsum_noder   reshape_qkvtranspose_qkv
matmul_qkvother_inputs_i
node_input
root_inputmul_before_layernormmul_childrenlayernorm_nodechildrenrJ  parent_nodechildren_typesv_nodesadd_vmatmul_v
is_distillis_distill_addis_no_mask_attentionis_sdpaqk_pathsqk_nodeskvr   	matmul_qkwhere_qkafter_qq_nodesrz   add_qmatmul_qafter_kmul_k_nodesk_nodesadd_kmatmul_k
mask_nodesr%  mul_valr"   attention_last_nodeq_num_headsq_hidden_sizenew_nodeunique_indexnew_edgeshape_tensors;                                                              r   fusezFusionAttention.fusev  s    #
!%999#':#:#:>5RS#T#T #/1

 J00???!!!
 
	
  =F:Q;zz 
44DDDooo I $>G;K

'
(899 	, 	,NB
!444Yq\0333
++++|!!F!!_
  $z66z5!LL+./C/J1/MNL'C,=,=,B,B!-a!)-AAA!/!6q!9JJF)c,.?.?1.D.D18;

#';;;*:6H! 1 1=$888!&aJ **5":::s;CU?V?VZ[?[?[$+A.J&z2>>X>>>))Q..F*..z;d;d;dfufufuvv?LLABBBF")Auh
$999???K999???K;;;\\\J;;;\\\J222III>@@@BTBTBTU
 
 NN$$ 	 	DAqz33J!adKKHG||!

g!%g'+$$fLLBCCCF	 		1*2'Q)QQ 	1/7,Q))! 	1 (Q99 	119.Q	7AA(0%Q9&Y*..w8a8a8acrcrcrss?j22@@@""" G
 EFFFBK	2; 	'*66y5&/TUW[S\]]K"NOOO&LWa*..>>>gATSTVWYZ\`@a
 
 ?j22FFF""" G
 EFFF2; 

 %	#z<<333YYY?888)))D;;;\\\J
 $   Az11  	#z<<@@@,,,O888)))D $   Az1 !!0088
%LL!Y[abbbF! 	#z<<EEEGYGYGYZ===OQQQShShShiYYY[s[s[st $
  
 Az1 $ 	
(:LLDEEEF# 
	8J!(;(;66z!}EEJAw "7BJ77 !<CLA<M<M'NNa''W~~'').w&>!
**x~a/@J/N/NS[SabcSdhrSrSrZnx,99*R.:Nq:QRRRtxJ1<1D++-)-)K)KI)V)V&Ka=A#5#5C    11%!!!%)&*1!4% 2  H $$X...:>:ND(7&*03+l:#330<?)/QS1L-M-MN  4     
##$!,3A68IJ!
+l:	  (   (0!!$ '')<mZ(XYYY ''111  ''t7T(fZabecebeZfggg ''t7T(fZabecebeZfggg ''t7T(fZabecebeZfggg  $DI +*/N/NSrSrr   )r  r  Fr  r  r  r  F)r  Fr  r  r  r  N)rR   rS   rT   rU   r   r  r   boolr   rV   r   r   tuplery   r   r   r   r   r   r   r  r#  r-  rA  r  __classcell__)rl   s   @r   rY   rY   g   s         04).27&>@T%U% %% % 	%
 &,% #'% ,0% c% % % % % %40	 0eTWY\T\o 0 0 0 0>,&y ,&U3PS8_ ,& ,& ,& ,&\Y    * S        25 5S 5S 5 5 5 5n+Ks +KC +K# +K +K +K +KZ 4 4	
  
T	   <M,M, M, 	M,
 M, 4M, 4M, 
y)Y.	/M, M, M, M,v !#$ %w ww c/D(w c/D(	w
 w 4w 4w w w w w w w w w  !w" #w$ %w& 
T	'w w w wL "'h h$Jh h 	h
 h h h h h h h h h h h  !h" #h$ %h& t|'h( 
T	)h h h hTo$ o$ o$ o$ o$ o$ o$r   rY   )loggingr   numpyru   fusion_baser   fusion_optionsr   fusion_utilsr   r   onnxr   r	   r
   r   
onnx_modelr   rR   r   r   rY   rW   r   r   <module>r     s  
                 . . . . . . 1 1 1 1 1 1 1 1 = = = = = = = = = = = =            	8		S S S S S S S Sl~$ ~$ ~$ ~$ ~$f ~$ ~$ ~$ ~$ ~$r   