
    fPiM                        d dl mZ d dlmZ d dlmZmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZmZ d dlmZmZ d dlmZ d dlm Z  d dl!m"Z" d dl#m$Z$ d dl%m&Z& d dl'm(Z( d dl)m*Z* d dl+m,Z, d dl-m.Z.m/Z/ d dl0m1Z1m2Z2 d dl3m4Z4 d dl5m6Z6m7Z7m8Z8 d dl9m:Z:  ee;          Z< G d de:          Z=dS )    )	getLogger)PackingMode)AttentionMaskFusionAttention)FusionBartAttention)FusionBiasGelu)FusionConstantFold)FusionEmbedLayerNormalization)FusionFastGelu)
FusionGelu)FusionGeluApproximation)FusionGemmFastGelu)FusionLayerNormalizationFusionLayerNormalizationTF)AttentionMaskFormatFusionOptions)FusionQOrderedAttention)FusionQOrderedGelu) FusionQOrderedLayerNormalization)FusionQOrderedMatMul)FusionQuickGelu)FusionReshape)FusionRotaryEmbeddings)FusionShape)"FusionSimplifiedLayerNormalization&FusionSkipSimplifiedLayerNormalization) FusionBiasSkipLayerNormalizationFusionSkipLayerNormalization)FusionUtils)
ModelProtoTensorProtohelper)	OnnxModelc                       e Zd Zd-dededef fdZd Zd Zd Zd	 Z	d
 Z
d Zd Zd Zd Zd Zd Zd Zd.dZd Zd Zd Zdedee         defdZdefdZd Zd/dZd  Zd! Zd" Zd# Z d0d&e!d$z  d'efd(Z"d) Z#d1d*Z$d2d+efd,Z% xZ&S )3BertOnnxModelr   model	num_headshidden_sizec                    |dk    r|dk    s|dk    r	||z  dk    sJ t                                          |           || _        || _        t	          |           | _        t          | | j        | j        | j                  | _        t          | | j        | j        | j                  | _	        t          |           | _        dS )aG  Initialize BERT ONNX Model.

        Args:
            model (ModelProto): the ONNX model
            num_heads (int, optional): number of attention heads. Defaults to 0 (detect the parameter automatically).
            hidden_size (int, optional): hidden dimension. Defaults to 0 (detect the parameter automatically).
        r   N)super__init__r'   r(   r   attention_maskr   attention_fusionr   qordered_attention_fusionr   utils)selfr&   r'   r(   	__class__s       |/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/onnxruntime/transformers/onnx_model_bert.pyr+   zBertOnnxModel.__init__&   s     Q;!#3#3Q;YbKbfgKgKgKgh"&+D11 /d6FX\Xk l l)@$"DND4G*
 *
& !&&


    c                 L    t          |           }|                                 d S N)r	   applyr0   fusions     r2   fuse_constant_foldz BertOnnxModel.fuse_constant_fold;        #D))r3   c                 j    | j                                          | j                                         d S r5   )r-   r6   r.   r0   s    r2   fuse_attentionzBertOnnxModel.fuse_attention?   s2    ##%%%&,,.....r3   c                    t          |           }|                                 t          |           }|                                 t          |           }|                                 t	          |           }|                                 d S r5   )r   r6   r   r   r   r7   s     r2   	fuse_geluzBertOnnxModel.fuse_geluD   sn    D!!%% &&#D))r3   c                 N    t          | |          }|                                 d S r5   )r   r6   )r0   is_fastgelur8   s      r2   fuse_bias_geluzBertOnnxModel.fuse_bias_geluO   s"    k22r3   c                 L    t          |           }|                                 d S r5   )r   r6   r7   s     r2   gelu_approximationz BertOnnxModel.gelu_approximationS   s     (..r3   c                 L    t          |           }|                                 d S r5   )r   r6   r7   s     r2   fuse_gemm_fast_geluz!BertOnnxModel.fuse_gemm_fast_geluW   r:   r3   c                 L    t          |           }|                                 d S r5   )r   r6   r7   s     r2   fuse_add_bias_skip_layer_normz+BertOnnxModel.fuse_add_bias_skip_layer_norm[   s     1$77r3   c                 L    t          |           }|                                 d S r5   )r   r6   r7   s     r2   fuse_reshapezBertOnnxModel.fuse_reshape_   s     t$$r3   c                 L    t          |           }|                                 d S r5   )r   r6   r7   s     r2   
fuse_shapezBertOnnxModel.fuse_shapec   s     T""r3   c                 N    t          | |          }|                                 d S r5   )r
   r6   )r0   use_mask_indexr8   s      r2   fuse_embed_layerzBertOnnxModel.fuse_embed_layerg   s"    .t^DDr3   c                     t          |           }|                                 t          |           }|                                 t          |           }|                                 d S r5   )r   r6   r   r   r7   s     r2   fuse_layer_normzBertOnnxModel.fuse_layer_normk   sV    )$//+D11 2$77r3   c                 L    t          |           }|                                 d S r5   )r   r6   r7   s     r2   fuse_simplified_layer_normz(BertOnnxModel.fuse_simplified_layer_normv   s     3D99r3   Tc                 P    t          | |          }|                                 d S )N)shape_infer)r   r6   )r0   rU   r8   s      r2   fuse_skip_layer_normz"BertOnnxModel.fuse_skip_layer_normz   s%    -dLLLr3   c                 L    t          |           }|                                 d S r5   )r   r6   r7   s     r2   fuse_skip_simplified_layer_normz-BertOnnxModel.fuse_skip_simplified_layer_norm~   s     7==r3   c                    t          |           }|                                 t          t          d | j        j        j                            }d |D             }d}|t          | j        j                  k     rh| j        j        |         }d|j	        v r)|j
        |vr | j        j                            |           n|dz  }|t          | j        j                  k     fd S d S )Nc                 .    | j         dk    o
| j        dk    S )NRotaryEmbeddingcom.microsoft)op_typedomain)nodes    r2   <lambda>z6BertOnnxModel.fuse_rotary_embeddings.<locals>.<lambda>   s    T\->>a4;RaCa r3   c                     h | ]	}|j         
S  )r^   ).0r_   s     r2   	<setcomp>z7BertOnnxModel.fuse_rotary_embeddings.<locals>.<setcomp>   s    !H!H!H$$+!H!H!Hr3   r   r[      )r   r6   listfilterr&   graphr_   len	functionsnamer^   remove)r0   r8   rot_emb_nodesnon_ms_domains_to_keepifns         r2   fuse_rotary_embeddingsz$BertOnnxModel.fuse_rotary_embeddings   s    '--aa
 % 
 
 "I!H-!H!H!H#dj*++++%a(B BG++	AW0W0W
$++B////Q #dj*++++++++r3   c                 L    t          |           }|                                 d S r5   )r   r6   r7   s     r2   fuse_qordered_mamtulz"BertOnnxModel.fuse_qordered_mamtul   s     %d++r3   r]   input_indicescastedc                   
 g }|                                  }|                     |          }|D ]

fd|D             }|D ]}|                     |          r|s|                    |           /||v rU||         }	|	j        dk    rB|                     |	j        d                   "|r |                    |	j        d                    |S )z
        Get graph inputs that feed into node type (like EmbedLayerNormalization or Attention).
        Returns a list of the graph input names based on the filter whether it is casted or not.
        c                 Z    g | ]'}|t          j                  k     j        |         (S rb   )ri   input)rc   ro   r_   s     r2   
<listcomp>zABertOnnxModel.get_graph_inputs_from_node_type.<locals>.<listcomp>   s1    WWWQ1s4:CVCV4:a=CVCVCVr3   Castr   )output_name_to_nodeget_nodes_by_op_typefind_graph_inputappendr]   rx   )r0   r]   rt   ru   graph_inputsr{   nodesbert_inputs
bert_inputparentr_   s             @r2   get_graph_inputs_from_node_typez-BertOnnxModel.get_graph_inputs_from_node_type   s   
 "6688))'22 
	A 
	ADWWWW-WWWK) A A
((44 A! 8$++J777#6660<F~//D4I4I&,WX/4Z4Z4f! A(//Q@@@A r3   c                 n    |                      dg d|          }||                      ddg|          z  }|S )NEmbedLayerNormalization)r   re      	Attention   )r   )r0   ru   inputss      r2   !get_graph_inputs_from_fused_nodesz/BertOnnxModel.get_graph_inputs_from_fused_nodes   sE    556OQZQZQZ\bcc$66{QCPPPr3   c                    |                                  }d}d}|j        D ]>}|                     |t          j                  \  }}|r|dz  }|t          |          z  }?t                              d| d| d           dS )zPChange data type of all graph inputs to int32 type, and add Cast node if needed.r   re   z)Graph inputs are changed to int32. Added z Cast nodes, and removed z Cast nodes.N)rh   rx   change_graph_input_typer!   INT32ri   loggerinfo)r0   rh   add_cast_countremove_cast_countgraph_inputnew_noderemoved_nodess          r2   change_graph_inputs_to_int32z*BertOnnxModel.change_graph_inputs_to_int32   s    

 ; 	4 	4K&*&B&B;P[Pa&b&b#Hm $!#]!3!33 A  A  Aar  A  A  A	
 	
 	
 	
 	
r3   
batch_sizemax_seq_lenc                    |                      d          |                      d          z   }| j        j        j        D ]S}|j        |v rH|j        j        j        j        d         }||_	        |#|j        j        j        j        d         }||_	        T| j        j        j
        D ]%}|j        j        j        j        d         }||_	        &dS )zD
        Update input and output shape to use dynamic axes.
        T)ru   Fr   Nre   )r   r&   rh   rx   rk   typetensor_typeshapedim	dim_paramoutput)r0   dynamic_batch_dimdynamic_seq_lenbert_graph_inputsrx   	dim_protor   s          r2   use_dynamic_axeszBertOnnxModel.use_dynamic_axes   s     !BB C 
 
22%2@@A Z%+ 	: 	:Ez...!J28<Q?	&7	#". %
 6 < @ CI*9I'j&- 	4 	4F/59!<I"3I	4 	4r3   c                 .    |                                   d S r5   )adjust_reshape_and_expandr<   s    r2   
preprocesszBertOnnxModel.preprocess   s    &&(((r3   c                 l   g }|                                  D ]X}|j        dk    rI|                     |j        d                   }|N|j        dk    rC|                    |g           |                     |j        d         |j        d                    |                     |g dg d| 	                                          }||d         }|                     |j        d                   }|d         }|                     |j        d                   }|d         }	|O|Mt          |          d	k    r:t          |          dk    r'|d         |d         k    r|	j        d         |j        d<   Z|rA|                     |           t                              d
t          |                      d S d S )NReshapere   r   )Expandr   r   Slice)r   r   r   r      z"Removed Reshape and Expand count: )r   r]   get_constant_valuerx   sizeextendreplace_input_of_all_nodesr   match_parent_pathr{   ri   remove_nodesr   r   )
r0   nodes_to_remover_   reshape_shapereshape_pathexpand_nodeexpand_shape_valuereshape_before_expandshape_value
slice_nodes
             r2   r   z'BertOnnxModel.adjust_reshape_and_expand   s   JJLL !	= !	=D|y(( !% 7 7
1 F F ,1Cq1H1H#**D622233DKNDJqMRRR  $55<<< LL,,..	     +".r"2K)-)@)@ARSTAU)V)V&,8,<)"&"9"9:O:UVW:X"Y"YK!-b!1J*6'3 233q88,,11.q1[^CC(2(9!(<
1 	Uo...KKSS=Q=QSSTTTTT	U 	Ur3   c                 "   |                                  }g }|                                 D ]}dddd}|j        |v r||j                 }|                     |g d|dddddg|          }|e|\  }}}	}
}}|j        d         |                                 j        d         j        k    r)|j        d         |j        d<   |                                  }|j        dk    r|                     |g dg d|          }||d	         j        d         |                                 j        d         j        k    rt          j	        d|j        dt          |j                  dz
           |j        |j        d
z             }d|_        |j                            t          j        d| j                  g           |                     ||                     |          j                   |                    |           |                     |           d S )Nre   r   r   )r   	ReduceSumr   )rz   ConstantOfShapeConcat	UnsqueezeGatherShaper   )r   rz   r   r   )r   r   r   r   r   _remove_mask)r   outputsrk   r\   r'   )r{   r   r]   r   rx   rh   rk   r   r"   	make_noderi   r^   	attributer   make_attributer'   add_nodeget_graph_by_noder~   r   )r0   r{   r   r_   op_input_idro   parent_nodescastconstantOfShapeconcat	unsqueezegatherr   attention_nodes                 r2   clean_graphzBertOnnxModel.clean_graph  s@   "6688JJLL <	5 <	5D 78aVWXXK|{**-#55   1aA&'     + %'!{1~);A)>)CCC38<?-a0.2.F.F.H.H+|{**
  $55EEE LL'	     +#B'-a0DJJLL4Fq4I4NNN)/)9'#':a#dj//A2E.E#F$(K!%^!;	* * * 1@-&0779N{\`\j9k9k8lmmmnd6L6L^6\6\6abbb'..t444/*****r3   c                 V    |                                   |                                  d S r5   )r   prune_graphr<   s    r2   postprocesszBertOnnxModel.postprocessF  s,    r3   NFoptionsadd_dynamic_axesc                 b   ||j         s|                                  | j                                         | j                                         |                                  ||j        r(|                                  |                                  ||j	        r| 
                                 |                                  |                                  ||j        r.|                     |j                    |                                  ||j        r|                                  |l| j                            |j                   |j        rFt-          | j        t0                    s,t3          | | j        | j        | j        |j                  | _        ||j        r|                                  ||j        r|                                  |                                   ||j!        r*|j        tD          j#        k    }| $                    |           | j        %                                 | &                                 ||j'        r,| (                    d           | (                    d           ||j)        r| *                                 ||j+        r| ,                                 ||j-        r| .                                 | /                                 |r| 0                                 tb          2                    d| 3                                            d S )NT)rA   Fzopset version: )4enable_shape_inferencedisable_shape_inferencer/   remove_identity_nodesremove_useless_cast_nodesr9   enable_layer_normrQ   rS   enable_gelur?   r   rJ   enable_skip_layer_normrV   rX   enable_rotary_embeddingsrq   r,   set_mask_formatattention_mask_formatuse_multi_head_attention
isinstancer-   r   r   r(   r'   enable_attentionr=   enable_qordered_matmulrs   rL   enable_embed_layer_normr   MaskIndexEndrO   remove_useless_reshape_nodesr   enable_bias_gelurB   enable_bias_skip_layer_normrH   enable_gelu_approximationrD   enable_gemm_fast_gelurF   remove_unused_constantr   r   r   get_opset_version)r0   r   r   rN   s       r2   optimizezBertOnnxModel.optimizeJ  s+   )G((***
((*** 	
,,... 	!!!O 9O  """++---O 3ONNO >O%%g&DEEE00222O @O'')))//0MNNN/ 
4CXZm8n8n (7$N'4) )% O 8O!!! O >O%%'''O ?O$:>Q>^^N!!.111 	
//111 O 8OD111E222O CO..0007#D##%%%7#@$$&&&##%%%  	$!!###@d&<&<&>&>@@AAAAAr3   c                     i }g d}g d}||z   D ])}|                      |          }t          |          ||<   *t                              d|            |S )z8
        Returns node count of fused operators.
        )r   r   MultiHeadAttentionGeluFastGeluBiasGeluGemmFastGeluLayerNormalizationSimplifiedLayerNormalizationSkipLayerNormalization SkipSimplifiedLayerNormalizationr[   )QOrderedAttentionQOrderedGeluQOrderedLayerNormalizationQOrderedMatMulzOptimized operators: )r|   ri   r   r   )r0   op_countopsq_opsopr   s         r2   get_fused_operator_statisticsz+BertOnnxModel.get_fused_operator_statistics  s     
 
 

 
 
 + 	& 	&B--b11Eu::HRLL6H66777r3   c                    |                                  dt          ffd} |d          } |d           |d          z    |d          z   } |d           |d	          z    |d
          z   } |d           |d          z   } |d           |d          z   }|dk    o|dk    o||k    o|d|z  k    p|d|z  k    }|dk    rt                              d           |dk    rt                              d           |dk    rt                              d           |dk    rt                              d           |dk    rt                              d           |S )zA
        Returns True when the model is fully optimized.
        Nop_namec                 2                         |           pdS )Nr   )get)r  fused_op_counts    r2   r  z2BertOnnxModel.is_fully_optimized.<locals>.op_count  s    !%%g..3!3r3   r   r   r   r   r   r   r   r   r   r   r   r   r   zLayer Normalization not fusedz$Simple Layer Normalization not fusedzGelu (or FastGelu) not fusedz!EmbedLayerNormalization not fusedz+Attention (or MultiHeadAttention) not fused)r  strr   debugwarning)	r0   r
  r  embed	attentiongelu
layer_normsimple_layer_norm
is_perfects	    `       r2   is_fully_optimizedz BertOnnxModel.is_fully_optimized  s    !!??AAN	4c 	4 	4 	4 	4 	4 	4 233H[))HH5I,J,JJXXViMjMjj	x((:"6"66*9M9MMX233hh?W6X6XX
$H%CDDxxPrGsGss QY XQXd"X I-V3DI3U	 	 ??LL8999!!LL?@@@199LL7888A::LL<===>>NNHIIIr3   use_symbolic_shape_inferc                 N    t          |           }|                    |           d S r5   )r   convert)r0   r  packing_modes      r2   convert_to_packing_modez%BertOnnxModel.convert_to_packing_mode  s*    "4((566666r3   )r   r   )T)r   r   )NFr5   )F)'__name__
__module____qualname__r    intr+   r9   r=   r?   rB   rD   rF   rH   rJ   rL   rO   rQ   rS   rV   rX   rq   rs   r  rf   boolr   r   r   r   r   r   r   r   r   r   r  r  r  __classcell__)r1   s   @r2   r%   r%   %   s}       ' 'j 'S '3 ' ' ' ' ' '*  / / /
	 	 	              	 	 	         (  s 4PS9 ^b    ,    

 
 
4 4 4 4(  'U 'U 'UR@+ @+ @+D  RB RB 4 RBt RB RB RB RBh  @& & & &P7 7 7 7 7 7 7 7 7 7r3   r%   N)>loggingr   r  r   fusion_attentionr   r   fusion_bart_attentionr   fusion_biasgelur   fusion_constant_foldr	   fusion_embedlayerr
   fusion_fastgelur   fusion_gelur   fusion_gelu_approximationr   fusion_gemmfastgelur   fusion_layernormr   r   fusion_optionsr   r   fusion_qordered_attentionr   fusion_qordered_gelur   fusion_qordered_layernormr   fusion_qordered_matmulr   fusion_quickgelur   fusion_reshaper   fusion_rotary_attentionr   fusion_shaper   fusion_simplified_layernormr   r   fusion_skiplayernormr   r   fusion_utilsr   onnxr    r!   r"   
onnx_modelr#   r  r   r%   rb   r3   r2   <module>r9     sR         / / / / / / ; ; ; ; ; ; ; ; 5 5 5 5 5 5 * * * * * * 3 3 3 3 3 3 ; ; ; ; ; ; * * * * * * " " " " " " = = = = = = 2 2 2 2 2 2 Q Q Q Q Q Q Q Q = = = = = = = = = = = = = = 3 3 3 3 3 3 F F F F F F 7 7 7 7 7 7 , , , , , , ( ( ( ( ( ( : : : : : : $ $ $ $ $ $ r r r r r r r r _ _ _ _ _ _ _ _ $ $ $ $ $ $ 0 0 0 0 0 0 0 0 0 0            	8		C7 C7 C7 C7 C7I C7 C7 C7 C7 C7r3   