
    fPiϐ                         d dl Z d dlZd dlmZmZ d dlmZ d dlm	Z	m
Z
 d dlmZ d dlmZmZmZ d dlmZ d dlmZ  e j        e          Z G d	 d
e          Z G d de          Z G d de          ZdS )    N)AttentionMaskFusionAttention)Fusion)"FusionSimplifiedLayerNormalization&FusionSkipSimplifiedLayerNormalization)NumpyHelper)	NodeProtoTensorProtohelper)	OnnxModel)BertOnnxModelc                        e Zd ZdZdedededef fdZdedz  d	e	d
e	de	dedededededz  de
de	dz  fdZdededededz  dedz  dedz  dedz  dededz  dedz  dedede	dz  fdZd Zd Zd Z xZS )FusionT5Attentionz=
    Fuse T5 Attention subgraph into one Attention node.
    modelhidden_size	num_headsattention_maskc                 f    t                                          ||||ddg           d| _        d S )NFSoftmax)use_multi_head_attentionsearch_op_types   )super__init__	static_kv)selfr   r   r   r   	__class__s        z/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/onnxruntime/transformers/onnx_model_t5.pyr   zFusionT5Attention.__init__   sG     	%*&K 	 	
 	
 	
     
mask_indexNq_matmulk_matmulv_matmulinputoutput	attn_biasscalereturnc                 ,   |dk    sJ |dk    r+||z  dk    r"t                               d| d|            dS | j                            |j        d                   }| j                            |j        d                   }| j                            |j        d                   }|||)||n||n|}t          |j        d          d           dS t          j        |          }t          j        |          }t          j        |          }|j        |j        k    sJ |j        d         }|j        d         }|j        d         }||cxk    r|k    sn J |dk    r'||k    r!t           	                    d| d| d	           t          j        |j        dd                   }t          j        |||fd
          }d|z  }| j                            d          }t          j        |dz   t           j        ||g|                                d          }| j                            || j                   ||dz   dg}|r|                    |           n|                    d           |	r*|                    d           |                    |	           |r.|d         dk    r"|                                 |r|d         dk    "t          j        d||g|          }d|_        |j                            t          j        d|          g           |
.|j                            t          j        d|
          g           | j        @|j                            t          j        dt;          | j                            g           |S )a  Create an Attention node.
        Args:
            mask_index (str): mask input
            q_matmul (NodeProto): MatMul node in fully connection for Q
            k_matmul (NodeProto): MatMul node in fully connection for K
            v_matmul (NodeProto): MatMul node in fully connection for V
            num_heads (int): number of attention heads. If a model is pruned, it is the number of heads after pruning.
            hidden_size (int): hidden dimension. If a model is pruned, it is the hidden dimension after pruning.
            input (str): input name
            output (str): output name
        Returns:
            Union[NodeProto, None]: the node created or None if failed.
        r   input hidden size # is not a multiple of num of heads Nr   zl is not an initializer. Please set do_constant_folding=True in torch.onnx.export to unblock attention fusionzInput hidden size (z3) is not same as weight matrix dimension of q,k,v (z:). Please provide a correct input hidden size or pass in 0)axis   	Attention_qkv_weightTname	data_typedimsvalsraw inputsoutputsr1   com.microsoftr   r'   mask_filter_value)loggerdebugr   get_initializerr$   printr   to_arrayshapewarningnpprodstackcreate_node_namer   make_tensorr
   FLOATtobytesadd_initializerthis_graph_nameappendpop	make_nodedomain	attributeextendmake_attributer<   float)r   r    r!   r"   r#   r   r   r$   r%   r&   r'   q_weightk_weightv_weightmatmulqwkwvw
qw_in_size
kw_in_size
vw_in_sizeqw_out_size
qkv_weightqkv_weight_dimattention_node_nameweightattention_inputsattention_nodes                               r   make_attention_nodez%FusionT5Attention.make_attention_node)   s   4 1}}}}??i 7A==LLikii^giijjj4:--hnQ.?@@:--hnQ.?@@:--hnQ.?@@x/83C!)!1XX8CSxxYaF<? g g g   4!(++!(++!(++ x28####Xa[
Xa[
Xa[
Z5555:555555??{j88NNJk J Jfp J J J  
 gbhqrrl++Xr2rl333
["j99+FF#$}4!'n-##%%
 
 
 	
""64+?@@@ -/

  	(##J////##B''' 	/##B'''##I... 	##3B#72#=#=  """  	##3B#72#=#=  )#H$	
 
 
 !0 '')>{I)V)V(WXXX$++V-B7E-R-R,STTT!-$++V-BCVX]^b^tXuXu-v-v,wxxxr   querykeyvaluepast_key
past_valuepresent_keypresent_valuec                 |   |dk    r|dk    r|r|r|sJ ||z  dk    r"t                               d| d|            d S | j                            d          }|||dg}|r|                    |           n|                    d           |r|                    |           n|                    d           |r.|sJ |                    |           |                    |           |r.|d         dk    r"|                                 |r|d         dk    "|g}|	r.|
sJ |                    |	           |                    |
           t          d|d|d	|           t          j        d|||
          }d|_	        |j
                            t          j        d|          g           |j
                            t          j        dd          g           | j        @|j
                            t          j        dt          | j                            g           |                     d           |S )Nr   r*   r+   MultiHeadAttentionr6   r7   zattention_inputs=z, attention_outputs=z, attention_node_name=r8   r;   r   r'         ?r<   )r=   r>   r   rG   rM   rN   r@   r   rO   rP   rQ   rR   rS   r<   rT   increase_counter)r   rg   rh   ri   r    r&   rj   rk   r%   rl   rm   r   r   rb   rd   attention_outputsre   s                    r   create_mha_nodez!FusionT5Attention.create_mha_node   s    1}}qUsuL)#))LLikii^giijjj4"j99:NOO	
  	(##J////##B''' 	(##I....##B''' 	0:##H---##J/// 	##3B#72#=#=  """  	##3B#72#=#= $H 	4   =$$[111$$]333R!RR&7RR<ORRSSS) #%$	
 
 
 !0 '')>{I)V)V(WXXX '')>w)L)L(MNNN!-$++V-BCVX]^b^tXuXu-v-v,wxxx2333r   c                 f    |                      |||          rd S |                     |||           d S N)fuse_t5_encoderfuse_t5_decoder)r   nodeinput_name_to_nodesoutput_name_to_nodes       r   fusezFusionT5Attention.fuse   sC    &9;NOO 	FT#68KLLLLLr   c                     |j         dk    sJ | j                            |g dg d|          }|dS |\  }}}| j                            |g dg d|          }|dS |d         }	| j                            |g d	g d
|          }
|
dS |
\  }}}| j                            |g dg d|          }|dS |\  }}}| j                            |g dg d|          }|d u }|	|d         }nP| j                            |g dg d|          }|%| j                            |g dg d|          }|dS |d         }| j                            |          \  }}|dS |dk    rt          |          | _        | j                            |d         g dg d|           | j                            |d         g dg d|          } t           fd| j                                        j	        D                       rK|I d         j	        d         |d         j	        d         k    r!t           d         j	                  dk    rd}n+| j                            |d         j	        d                   }d }| j                            |ddgddg          }|"|r | j                            |g dg d
          }|dS |d         j        d         }| j                            |g d	g d
          }|dS |\  }}}| j                            |g d	g d          }|dS |\  }}}|j	        d         |	j	        d         k    rdS |                     |          \  }}|                     |||||||	j	        d         |j        d         |d 
  
        }|dS | j                            |           | j        | j        |j        <   | j                            |           d!| _        d!S )"Nr   MatMul	TransposeReshaper   r   r   r   edgesry   FConcat	UnsqueezeGatherShaper   r   r   r   r7   r   r   r~   r   r   r   r   Addr~   r   r   r   r   MulSubCastr   r   r   r   r   r   r   r   r   )r   Slicer   r   r   r   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r      )ConstantOfShaper   r   r   r   r   r   r   r   r   )r   r   r   r   r   c              3   P   K   | ] }|j         d          j        d         k    V  !dS )r7   r   N)r1   r$   ).0r$   mask_nodes_2s     r   	<genexpr>z4FusionT5Attention.fuse_t5_encoder.<locals>.<genexpr>F  s7      bbEJ,r"2"8";;bbbbbbr   r   r6   r   RelativePositionBias)r   r   r   rp   )r   r   r$   r%   r&   r'   T)op_typer   match_child_pathmatch_parent_pathget_constant_inputrT   r<   anygraphr$   lenr   process_maskr%   get_num_heads_and_hidden_sizerf   nodes_to_addrM   rL   node_name_to_graph_namer1   nodes_to_removeprune_graph)!r   softmax_nodery   rz   	qkv_nodes
matmul_qkv_reshape_qkvqkv_shape_nodesinput_shape_nodev_nodes	reshape_vmatmul_vqk_nodesadd_qk	matmul_qk
mask_nodesis_pattern_for_one_graph_inputmul_nodemul_valmask_nodes_3r    res_pos_bias	rpb_nodesk_nodesmatmul_kq_nodes	reshape_qmatmul_qq_num_headsq_hidden_sizenew_noder   s!                                   @r   rv   z!FusionT5Attention.fuse_t5_encoder   s5   #y0000J//...*** 3	 0 
 
	 5%."
A{*66666LL	
 
 "5*2.*.....II	
 
 ?5!(9h ://(((II	
 
 5'69Z11CCC	
 

 *4t);&!!!}HH 55HHH"""#	 J !!Z99TTT)))'	 
 % 5!!}HZ228<<
7?5f%*7^^D" z33rNIIIOO	
 
 z33rNIIIOO	
 
 $bbbbIYIYI[I[Iabbbbb %(R &q)\"-=-CA-FFFLO)**a//JJ,99*R.:Nq:QRRJJ00*+F
 
	
 !?
44888		 I
 5 }+A.*.....II
 

 ?5 1h *.....II
 

 ?5!(9h >! 0 6q 9995%)%G%G	%R%R"]++!%"(+%a(" , 
 
 5  ***6:6J$X]3##K000tr   c                 (   |j         dk    sJ | j                            |g dg d|          }|d S |\  }}}| j                            |g dg d          }|d S |d         }	d }
d }d }| j                            |g dg d	          }|| j                            |g d
g d          }|R|\  }}}|j        d         }
|j        d         }d|vrd S |j        d         |	j        d         k    rd| _        npd| _        nh|j        d         }||v rd S d|vrd S d| _        nG|\  }}}}|j        d         }||v rd S d|vrd S |j        d         }d|vrd S |j        d         }
d| _        | j                            |g dg d          }|d S |\  }}}d }d }| j        dk    r| j                            |g dg d          }|	|d         }n,| j                            |g dg d          }|d S |d         }| j                            |          \  }}|dk    r|| _        | j	        
                    |d         j        d                   }n]| j                            |ddgddgfddgddgfg|          \  }}}|dk     rt                              d           d S |j        d         }d }d }d }| j        dk    r| j                            |g d
g d          }|g|\  } }!}|!j        d         }||!j        d                  }"|"D ]2}#| j                            |#j        d                   }$|$	|$j        } n3|d S d|vrd S n| j                            |dgdg          }|d S |d         } | j        d         }||v rd S d |vrd S n| j                            |g d!g d"fg d#g d$fg|          \  }%}}d }&d }"||d         |d%         }!}'|!j        d         }|%dk    r!||'j        d                  }&|&j        d         }n|'j        d         }||v rd S d&|vrd S |%dk    rI||'j        d                  }"|"D ]2}#| j                            |#j        d                   }$|$	|$j        } n3n|'j        d         }|d S d'|vrd S n| j                            |g d
g d          }|d S |\  }}!}|!j        d         }||!j        d                  }"|"D ]2}#| j                            |#j        d                   }$|$	|$j        } n3|d S d'|vrd S | j                            |g d
g d          }(|(d S |(\  })}*}+|+j        d         |	j        d         k    rd S |                     |*          \  },}-| j        dk    r
||}|}
d }d }|r|
r|,dk    r|-dk    sd S |                     |+j        d         ||
|||||j        d         |||,|-(          }.|.r| j                            |.           | j        | j        |.j        <   |s|rp||fD ]k}/|/r| j                            |/          st-          d)|/d*            d S |/|v sJ |/d+z   ||/         j        d<   | j                            |/|/d+z              l| j                            |           d,| _        d S d S )-Nr   r}   r   r   r   r   r7   )r   r   r   r~   )r   r   r   r   r   r   r   rm   r   past_value_crosspast_value_selfpresent_value_selfr   r   r   r   r   r   r   r   r   r   r   zGSkip MultiHeadAttention fusion since attention bias pattern not matchedpresent_key_crossr   past_key_cross)r   r   r   r~   )r   r   r   r   )r   r   r   r   r~   )r   r   r   r   r   past_key_selfpresent_key_self)rg   rh   ri   r    r&   rj   rk   r%   rl   rm   r   r   zgraph_output=z does not exist in graph output_copyF)r   r   r   r   r$   r%   r   r   r<   r   r   match_parent_pathsr=   r>   find_graph_outputr1   r   rs   r   rM   rL   r   r@   replace_input_of_all_nodesr   r   )0r   r   ry   rz   r   r   _transpose_qkvr   r   r   ri   rk   rm   r   transpose_vr   r   concat_vr   r   r   r   r    r   r   r   r   matched_path_indexrh   rj   rl   r   transpose_k	reshape_kpresent_key_transpose_nodespresent_key_transpose_nodepresent_key_candidateidxpast_key_transpose_nodeconcat_kr   transpose_qr   r   r   r   r   graph_outputs0                                                   r   rw   z!FusionT5Attention.fuse_t5_decoder  sw	   #y0000J//...*** 3	 0 
 
	 F2;/
NK*66666LL
 

 "F*2.
*..888LL
 

 ?j22222		 G
 "3:0Y!* + 21 5"-77F>!$(8(>q(AAA%&DNN%&DNN'-a0
!444F%Z77F!"(/%HaA!*J000 
22$OA.M#=88OA&EDN://(((II
 

 F'69
>Q55GGG""" J
 %%a=!Z99TTT))) 

 %F%a=66x@@JAw&  )0&,99*R.:Nq:QRRJJ'+z'D'DW%1v.34q!f= $( ($1 "A%%fggg!<?L>Qj22222		 G
 ",3)Yoa(.A)BRSTBU.V+2M  .,0J,H,HIcIjklIm,n,n),8&;&@ 9 &F&k99F : *66 MC 
 ?F%aj&,Q/222F#833F 4 #j;;AAA<<<PNNNP_P_P_` $ OC! '+#*.'"&-aj'"+)oa(!88.A(.QRBS.T+6<Q?HH'~a0H222F"(22F!882EhoVWFX2Y/6Q " "204
0L0LMgMnopMq0r0r-0<*?*DK!E = #+/!"4K&F%[88F 9 *66666II 
 ?F")9aoa(.A)BRSTBU.V+2M  .,0J,H,HIcIjklIm,n,n),8&;&@ 9 &F%[88F*.....II
 

 ?F+2(Y>! 0 6q 999F%)%G%G	%R%R"]>Q8#7CEHJ 	 	+//ma6G6GF''/!$!"!%a(#'!% ( 
 
  	%$$X...:>:ND(7  `m `%0-$@ ` `L( TZ-I-I,-W-W OOOOPPP'+>>>>>BNQXBX'5<Q?J99,W^H^____ ''444$D	% 	%r   )__name__
__module____qualname____doc__r   intr   r   strr	   rT   rf   rs   r{   rv   rw   __classcell__r   s   @r   r   r      s          	
 &     "o$Jo o 	o
 o o o o o :o o 
T	o o o obDD D 	D
 $JD :D *D $JD D 4ZD TzD D D 
T	D D D DLM M Mi i iVI% I% I% I% I% I% I%r   r   c                   *     e Zd Zdef fdZd Z xZS )FusionRelativePositionBiasBlockr   c                 P    t                                          |ddg           d S )Nr   r   )r   r   )r   r   r   s     r   r   z(FusionRelativePositionBiasBlock.__init__  s(     6DDDDDr   c                    | j                             |g dg d|          }|%| j                             |g dg d|          }|d S |d         }|d         }|d         }|d         }|| j        v rd S | j                             |g d	g d
|          }	|	d S | j                             |	d         j        d                   }
|	d         }| j                             |g dg d|          }d}|'| j                             |g dg d|          }d}|d S |d         }t          t          j        t          j        |
          d|rdndz  z                      }|dk    rt          
                    d| d           | j                             dd|rdndz             }| j                             |j        d                   }|d S t          j        |          }t          j        |          }t!          j        |dz   t$          j        t          j        |          d         t          j        |          d         g|                                d          }| j                             || j                   |j        |j        d         |j        d         g}|d z   }||j        d<   t!          j        d||g|!          }d"|_        |j                            t!          j        d#|          g           |j                            t!          j        d$|          g           | j        | j        |j        <   | j                             |           d| _!        d S )%N)r   r   r   r   r   r   Where)r   r   r   r   r   r   r   )r   r   r   r   r   r   r   r   )r   r   r   r   r   r   r   r      r7   r   r-   )	Minr   r   r   r   r   DivLogr   )	r   r   r   r   r   r   r   r   r   r   )r   Negr   r   r   r   r   Range)r   r   r   r   r   r   r   r   F)r   Absr   r   r   r   T          zmax_distance is z], which is different from the default value 128. Please double check the model configuration.r   RelPosBias_encoderdecodername_prefixr   _bias_table_weightr0   _rel_pos_biasr8   r;   max_distanceis_bidirectional)"r   r   r   get_constant_valuer$   r   rD   roundexpr=   rC   rG   r?   r   rA   	transposer   rH   r
   rI   rB   rJ   rK   rL   r1   rO   rP   rQ   rR   rS   r   r   rM   r   )r   rx   ry   rz   compute_bias_nodesgatherwhereslice	unsqueezecompute_buckets_nodeslog_maxdivrange_nodesr  
range_noder  	node_nametable_weight_itable_weighttable_weight_t
bias_tabler9   bias_outputrpb_nodes                           r   r{   z$FusionRelativePositionBiasBlock.fuse  s   !Z99PPP!!!	
 
 %!%!=!=[[[(((#	" " ")#A&"2&"1%&q)	 ,,,F $
 < <ZZZ'''	!
 !
 !(F *//0Eb0I0OPQ0RSS#B'j22[[[$$$	
 
 !*66AAA???Tg K  $" _
 28BF7OOrCS>ZaaYZ7[$\]]^^3NN?< ? ? ?  
 J//"N^Amdm0n 0 
 
	 33FLODD!F"+N;;l33'11!'(<((+RXl-C-CA-FG''))
 
 

 	
"":t/CDDDP /:#3A#6
8H8KL  /1$A#" M	
 
 
 *!!6#8#V#V"WXXX!!6#89KM]#^#^"_```6:6J$X]3  ***r   )r   r   r   r   r   r{   r   r   s   @r   r   r     sa        Ei E E E E E ER  R  R  R  R  R  R r   r   c                   \     e Zd Zddedef fdZd Zd ZddZd	 Zd
 Z	d Z
d Zd Z xZS )T5OnnxModelr   r   r   c                    t                                          |||           t          |           | _        t	          | j        j        j                  dk    rddlm	} |j
        | j        _        t          | | j        | j        | j                  | _        t!          |           | _        t%          |           | _        t)          |           | _        d S )Nr   r   )AttentionMaskFormat)r   r   r   r   r   r   r   r$   fusion_optionsr  NoMaskmask_formatr   r   r   attention_fusionr   layer_norm_fusionr   skip_layer_norm_fusionr   
rpb_fusion)r   r   r   r   r  r   s        r   r   zT5OnnxModel.__init__7  s    	;777+D11 tz%&&!++::::::.A.HD+ 1$8H$.Z^Zm n n!CD!I!I&LT&R&R#9$??r   c                 8    | j                                          d S ru   )r"  applyr   s    r   fuse_attentionzT5OnnxModel.fuse_attentionF  s    ##%%%%%r   c                 8    | j                                          d S ru   )r#  r'  r(  s    r   fuse_layer_normzT5OnnxModel.fuse_layer_normI  s    $$&&&&&r   Tc                 8    | j                                          d S ru   )r$  r'  )r   shape_infers     r   fuse_skip_layer_normz T5OnnxModel.fuse_skip_layer_normL  s    #))+++++r   c           
         |                                  D ]Y}|j        dk    rJ|                     |g dg d          }|)d | j        j        j        D             }|d         j        d         |v r|                     dd	          }t          j        d|d         j        d         g|d
z   g|          }t          j	        dt          j        dgdg          }|                     |           t          j        d|d
z   dg|dz   g|                     dd	          d          }|                     |           |                     |           |dz   |j        d<   |dz   |j        d<    d S [d S )Nr   )
r   r   r   r   r   r   r   r   SimplifiedLayerNormalizationr   )
r   r   r   r   r   r   r   r   r   r   c                     g | ]	}|j         
S  )r1   )r   r$   s     r   
<listcomp>z?T5OnnxModel.adjust_rel_pos_bis_length_input.<locals>.<listcomp>f  s    (X(X(X(X(X(Xr   r7   r   r   Added_Shape_r   _Outputr8   Constant_Index_1)r1   r2   r3   r4   r   _Output_Gather_1Added_Gather_r   )r9   r:   r1   r,   r   )nodesr   r   r   r   r$   rG   r   rO   rH   r
   INT64rK   add_node)r   rx   r9  graph_input_namesr  
shape_node	indices_1r  s           r   adjust_rel_pos_bis_length_inputz+T5OnnxModel.adjust_rel_pos_bis_length_inputO  s    JJLL 3	 3	D|555..   322 " $(X(XAQAW(X(X(X%Ryq)->>>$($9$9'~$9$^$^	%+%5#$)"IOA$6#7%.%:$;!*	& & &
 %+$6!3&1&7"#"#	% % %	 ,,Y777!'!1$$-	$9;M#N%.1C%C$D!%!6!6x_!6!]!]!"" " " j111f---(14F(F
1(14F(F
1e 63	 3	r   c                 z   g }|                                  D ]}|j        dk    r|                     |g dg d          }|+|                     |dgdg          }|G|d         }|j        d         |j        d<   |                    |           |                    |           |                     |           d S )Nr   )r   r   r   r   r   LessOrEqualTiler   r   r   r   )r   r   r   r   r   r   r   r   r   r   r   r   r   r9  r   r   r%   rR   rM   remove_nodesr   r   rx   extended_mask_nodesr   r  s         r   !remove_extended_mask_decoder_initz-T5OnnxModel.remove_extended_mask_decoder_init  s    JJLL 	3 	3D|u$$&*&<&<   655' '#" '. 224:P9QTUSVWW	$$Q<%)[^"&&':;;;&&t,,,!!/222?	3 	3r   c                 ~   g }|                                  D ]}|j        dk    r|                     |g dg d          }|+|                     |ddgddg          }|I|d         }|j        d         |j        d<   |                    |           |                    |           |                     |           d S )Nr   )r   r   r   r   r   r   rA  rB  r   r   r   r   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rC  rE  s         r   remove_extended_mask_decoderz(T5OnnxModel.remove_extended_mask_decoder  s    JJLL  	3  	3D|u$$&*&<&<   988!' '#$ '. 224'CY9Z]^`a\bcc	$$Q<%)[^"&&':;;;&&t,,,!!/222A 	3  	3r   c                 `    |                                   | j                                         d S ru   )adjust_reshape_and_expandr%  r'  r(  s    r   
preprocesszT5OnnxModel.preprocess  s.    &&(((r   c                     |                                   |                                  |                                  |                                  d S ru   )rG  rI  r?  r   r(  s    r   postprocesszT5OnnxModel.postprocess  sP    ..000))+++,,...r   r   )T)r   r   r   r   r   r)  r+  r.  r?  rG  rI  rL  rN  r   r   s   @r   r  r  6  s        @ @ @s @ @ @ @ @ @& & &' ' ', , , ,6 6 6r!3 !3 !3F"3 "3 "3H           r   r  )loggingnumpyrD   fusion_attentionr   r   fusion_baser   fusion_simplified_layernormr   r   fusion_utilsr   onnxr	   r
   r   
onnx_modelr   onnx_model_bertr   	getLoggerr   r=   r   r   r  r2  r   r   <module>rY     sh  
      ; ; ; ; ; ; ; ;       r r r r r r r r $ $ $ $ $ $ / / / / / / / / / /             ) ) ) ) ) )		8	$	$G
% G
% G
% G
% G
% G
% G
% G
%TV  V  V  V  V f V  V  V rc c c c c- c c c c cr   