
    fPi3                     v    d dl mZ d dlZd dlmZ d dlmZ d dlm	Z	  ee
          Zd Z G d de          ZdS )	    )	getLoggerN)FusionGptAttentionPastBase)helper)	OnnxModelc                 .    t          | |z
            dk    S )Ngư>)abs)valueexpected_values     /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/onnxruntime/transformers/fusion_gpt_attention_megatron.pyis_closer      s    u~%&&$..    c                   >     e Zd ZdZdedef fdZd Zd Zd Z	 xZ
S )FusionGptAttentionMegatronz^
    Fuse GPT-2 Attention with past state subgraph from Megatron into one Attention node.
    model	num_headsc                 L    t                                          ||           d S )N)super__init__)selfr   r   	__class__s      r   r   z#FusionGptAttentionMegatron.__init__   s#    	*****r   c                     | j                             d          }|                     |          }	|j        d         }
|j        d         |j        d         k    rdnd}t          j        d||j        d         |j        |         |	|g|
|g|          }d|_        |j        	                    t          j
        d| j                  t          j
        dd          g           | j        @|j        	                    t          j
        d	t          | j                            g           |g}| j        	                    |           |D ]}| j        | j        |j        <   | j                            |           d
| _        d S )NGptAttentionr      	Attention)inputsoutputsnamezcom.microsoftr   unidirectionalmask_filter_valueT)r   create_node_namecast_attention_maskoutputinputr   	make_nodedomain	attributeextendmake_attributer   r   floatnodes_to_addthis_graph_namenode_name_to_graph_namer   nodes_to_removeappendprune_graph)r   matmul_before_splitadd_before_splitpastpresentr#   reshape_qkvmaskattention_node_name
int32_maskr"   iattention_noder*   nodes                  r   fuse_attention_nodez.FusionGptAttentionMegatron.fuse_attention_node   s    #j99.II--d33
#A&"(+/B/I!/LLLAAST)#)!, &q) W%$
 
 
 !0 ''%k4>BB%&6::	
 	
 	
 !-$++V-BCVX]^b^tXuXu-v-v,wxxx&'  ...  	K 	KD6:6JD(33##K000  r   c                    | j                             |g dg d          }|t                              d           d S |\  }}}}	t	          |          dk    rB|d         j        dk    r1| j                             |d                   \  }
}|dk    r| | _        |j        d         |j	        d         k    rt                              d           d S | j
                            |dd	          st                              d
           d S | j
                            |dd          st                              d           d S | j                             |	j        d                   st                              d           d S | j
                            |ddg          st                              d           d S | j
                            |ddg          st                              d           dS | j
                            |ddg          st                              d           dS | j
                            |	ddg          st                              d           d S | j
                            |	ddg          st                              d           d S | j                             |g dg d          }||d         |k    rt                              d           d S | j                             |	g dg d          }||d         |k    rt                              d           d S | j                             |	g dg d          }||d         |k    rt                              d           d S | j                             |	g dg d           }| | j                             |	g d!g d           }||d         |k    rt                              d"           d S |	j        d         S )#N)MulSubSlicer?   )r   r   r   r   z8fuse_attention: failed to match unidirectional mask pathr   r   r=   i'  zCfuse_attention failed: mul_qk.input[1] != last_slice_mask.output[0]g     @z?fuse_attention failed: mul_mask input 1 is not constant 10000.0g      ?z;fuse_attention failed: sub_mask input 0 is not constant 1.0z+expect slick_mask input 0 to be graph inputzKfuse_attention failed: last_slice_mask input 1 (starts) is not constant [0]   zIfuse_attention failed: last_slice_mask input 3 (axes) is not constant [3]F   zJfuse_attention failed: last_slice_mask input 4 (steps) is not constant [1]   zDfuse_attention failed: slice_mask input 3 (axes) is not constant [2]zEfuse_attention failed: slice_mask input 4 (steps) is not constant [1])	UnsqueezeGatherShapeMatMul)rB   r   r   r   z/fuse_attention: failed to match last slice pathz0fuse_attention: failed to match first slice path)rC   r>   rD   rE   rF   )r   r   r   r   r   z3fuse_attention: failed to match last slice sub path)rC   r>   rD   rE   LayerNormalization)r   r   r   r   r   )rC   r>   rD   rE   SkipLayerNormalizationz5fuse_attention: failed to match last slice sub path 1)r   match_parent_pathloggerdebuglenop_typeget_constant_inputr   r#   r"   utilscheck_node_input_valuefind_graph_inputinfo)r   sub_qkmul_qk	matmul_qklayernorm_before_attention
mask_nodesmul_masksub_masklast_slice_mask
slice_mask_mul_vallast_slice_pathfirst_slice_pathfirst_slice_subfirst_slice_sub_1s                   r   
match_maskz%FusionGptAttentionMegatron.match_maskJ   s0   Z11&:Z:Z:Z\h\h\hii
LLSTTT4<F98_jz??Q:a=#8E#A#A66z!}EEJAw%*1&<?o4Q777LL^___4z001gFF 	LLZ[[[4z001cBB 	LLVWWW4z**:+;A+>?? 	KKEFFF4z00!aSII 	LLfggg4z00!aSII 	LLdeee5z00!aSII 	LLefff5z00QDD 	LL_```4z00QDD 	LL`aaa4*66GGG
 
 "ob&9Y&F&FLLJKKK4:77BBBLLL
 
 #'7';y'H'HLLKLLL4*66===OO
 

 "ob&9Y&F&FLLNOOO4 J88IIIOO
 
 $ $
 < <QQQ! ! $(9"(=A[([([LLPQQQ4""r   c           	      
   d }d }|j         dk    }d }|s#| j                            |g dg d|          }n"| j                            |g dg d|          }|d S d }|s|\  }	}
}}}}|	j        d         }n|\  }
}}}}|j        d         }| j                            |g dg d	          }| | j                            |g d
g d	          }|t                              d           d S |\  }}}}}}}|j         dk    r-||j        d         k    rt                              d           d S |j         dk    r-||j        d         k    rt                              d           d S | j                            |g dg d          }|t                              d           d S |\  }}}}| j                            |d          dk    rt                              d           d S |                     ||||          }| j                            |g dg d          }|t                              d           d S |\  }}} }!||!k    rt                              d           d S | j                            |g dg d          }"|"t                              d           d S |"\  }#}$}%}&}'}(||(k    rt                              d           d S | j        	                    |'          \  })}*t          |*t          j                  rIt          |*j                  dgk    r0|*d         dk    r$|*d         dk    r|*d         dk    r|*d         dk    st                              d           d S |*d         }+|+| j        k    r,t                              d|+ d | j                    |+| _        |*d         },| j        	                    |#          \  })}*t#          t          j        t          j        |,                              }-t'          |*|-          s"t                              d!|* d"|-            d S | j        	                    |          \  })}*t'          |*|-          s"t                              d#|* d"|-            d S |                     |%||          }|t                              d$           d S | j                            |          st                              d%           |                     ||          }|t                              d&           d S | j                            |          st                              d'           d S |                     |||||j        d         ||           d S )(NrI   )Addre   rF   Reshape	TransposerF   )r   r   Nr   r   r   )output_name_to_node)re   rF   rf   rg   rF   )r   Nr   r   r   r   )Concatrg   rf   Splitre   rF   rH   )r   r   r   r   r   Nr   )ri   rg   rf   rj   re   rF   rI   z&fuse_attention: failed to match v pathrH   zAfuse_attention: skip_input != layernorm_before_attention.input[0]r@   )Softmaxr>   r=   rF   )r   r   r   r   z'fuse_attention: failed to match qk pathaxisz+fuse_attention failed: softmax_qk axis != 3)Divrg   rf   rj   z&fuse_attention: failed to match q pathz-fuse_attention: skip since split_v != split_q)rm   rg   ri   rg   rf   rj   )r   r   r   r   r   r   z&fuse_attention: failed to match k pathz-fuse_attention: skip since split_v != split_krA   r   rB   z:fuse_attention: reshape constant input is not [0, 0, N, H]zDetected num_heads=z. Ignore user specified value zfuse_attention: div_k value=z
 expected=zfuse_attention: div_q value=z!fuse_attention: match past failedz(fuse_attention: past is not graph input.z$fuse_attention: match present failedz1fuse_attention: expect present to be graph output)rN   r   rJ   r#   rK   rL   r"   get_node_attributerc   rO   
isinstancenpndarraylistshaper   rS   r)   sqrtr   match_past_pattern_2rR   match_presentfind_graph_outputr;   ).r   normalize_nodeinput_name_to_nodesrh   r2   r3   is_normalize_node_skiplayernorm	qkv_nodes
skip_inputadd_skipadd_after_attentionmatmul_after_attentionr4   transpose_qkv
matmul_qkvv_nodesconcat_vtranspose_v	reshape_vsplit_vr1   r0   rW   qk_nodes
softmax_qkrT   rU   rV   attention_maskq_nodesdiv_qtranspose_q	reshape_qsplit_qk_nodesdiv_kr]   concat_ktranspose_k	reshape_ksplit_kr8   r	   r   hidden_size_per_headr
   s.                                                 r   fusezFusionGptAttentionMegatron.fuse   sk   *8*@D\*\'	. 	
44JJJ%%%$7	 5  II 
44CCC"""$7	 5  I F
. 	1 #& "*JJ #& (-a0J*..   %$$
 
 ?j22   )(( G ?LLABBBF 	
& '.2FFF8>qAAALL\]]]F '.2JJJ8?BBBLL\]]]F://
<_<_<_amamamnnLLBCCC42:/VVY:((V<<AALLFGGG4D^__*..y:b:b:bdpdpdpqq?LLABBBF3:0YgLLHIIIF*..KKK
 

 ?LLABBBF@G=8[)WgLLHIIIF:00;;5ubj))		U[!!aS((aAaAa1a1LLUVVVF!H	&&KKgiggW[Wegghhh&DN$Qx:00775rwrw/C'D'DEEFF~.. 	LLYYYYYZZZF:00775~.. 	LLYYYYYZZZF ((8=PQQ<LL<===Fz**400 	ELLCDDD $$X/BCC?LL?@@@Fz++G44 	KKKLLLF  &-a0	
 	
 	
 	
 	
r   )__name__
__module____qualname____doc__r   intr   r;   rc   r   __classcell__)r   s   @r   r   r      s         +i +C + + + + + +-  -  - ^X# X# X#t
 
 
 
 
 
 
r   r   )loggingr   numpyrp   fusion_gpt_attentionr   onnxr   
onnx_modelr   r   rK   r   r    r   r   <module>r      s   
           ; ; ; ; ; ;                  	8		/ / /P
 P
 P
 P
 P
!; P
 P
 P
 P
 P
r   