
    fPi@                     0   d dl Z d dlZd dlZd dlZd dlmZmZmZmZm	Z	 d dl
mZmZ d dlmZmZ d dlmZ  ej        e          Z G d d          Z G d d	e          Z G d
 de          Z G d d          Zd Zd Zd Zedk    r e             dS dS )    N)AttentionInputIDsAttentionOutputIDsMultiHeadAttentionInputIDsMultiHeadAttentionOutputIDs	Operators)helper
load_model)	NodeProto	OnnxModel)SymbolicShapeInferenceHelperc                       e Zd ZdedefdZdedz  fdZdedz  fdZdedz  fdZ	de
fd	Zd
ee         dee         ddfdZd
ee         dee         ddfdZdededdfdZdedz  fdZdde
ddfdZdS )PackingAttentionBasemodelattention_op_typec                     || _         g | _        g | _        d| _        i | _        | j         j         j        j        | _        || _        | j         	                    |          | _
        d S )NF)r   nodes_to_removenodes_to_addprune_graphnode_name_to_graph_namegraphnamethis_graph_namer   get_nodes_by_op_typeattention_nodes)selfr   r   s      /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/onnxruntime/transformers/convert_to_packing_mode.py__init__zPackingAttentionBase.__init__   sc     %
%'"$!&-/$$(J$4$:$?!2#z>>?PQQ    returnNc                 F   | j         t          j        k    rt          j        nt
          j        }|                                 }|rt          |j	                  |k    rd S |j	        |         }| j
        D ].}t          |j	                  |k    s|j	        |         |k    r d S /|S N)r   r   	ATTENTIONr   
MASK_INDEXr   KEY_PADDING_MASK_try_getting_first_attentionleninputr   )r   
mask_indexfirst_attention_nodeattention_masknodes        r   _try_getting_attention_maskz0PackingAttentionBase._try_getting_attention_mask$   s     %)<<< ((+< 	
  $@@BB# 	s+?+E'F'F*'T'T4-3J? ( 	 	D4:*,,
:0F.0X0Xtt 1Y r   c                 P    t          | j                  dk    rd S | j        d         S )Nr   )r&   r   r   s    r   r%   z1PackingAttentionBase._try_getting_first_attention8   s+    t#$$))4#A&&r   c                     d }| j                                         D ].}|j        t          j        k    s|j        t          j        k    r|}/|S r!   )r   nodesop_typer   	LAYERNORMSKIPLAYERNORM)r   last_layernorm_noder+   s      r   _try_getting_last_layernormz0PackingAttentionBase._try_getting_last_layernorm>   sQ    "J$$&& 	+ 	+D|y222dliF]6]6]&*#""r   c                     t                      r!   NotImplementedErrorr.   s    r   _are_attentions_supportedz.PackingAttentionBase._are_attentions_supportedE       !###r   inputsoutputsc                     t          j        t          j        ||| j                            t          j                            }d|_        | j                            |           | j	        | j
        |j        <   d S Nr;   r<   r   com.microsoft)r   	make_noder   REMOVEPADDINGr   create_node_namedomainr   appendr   r   r   r   r;   r<   new_nodes       r   _insert_removepadding_nodez/PackingAttentionBase._insert_removepadding_nodeH   ss    ##,,Y-DEE	
 
 
 *  ***6:6J$X]333r   c                     t          j        t          j        ||| j                            t          j                            }d|_        | j                            |           | j	        | j
        |j        <   d S r>   )r   rA   r   RESTOREPADDINGr   rC   rD   r   rE   r   r   r   rF   s       r   _insert_restorepadding_nodez0PackingAttentionBase._insert_restorepadding_nodeT   ss    #$,,Y-EFF	
 
 
 *  ***6:6J$X]333r   token_offsetcumulative_sequence_lengthc                     t                      r!   r7   )r   rL   rM   s      r   )_replace_attention_with_packing_attentionz>PackingAttentionBase._replace_attention_with_packing_attention`   r:   r   c                 ^    | j         t          j        k    r|j        t          j                 S d S r!   )r   r   r"   r'   r   INPUT)r   r)   s     r   _get_input_to_remove_paddingz1PackingAttentionBase._get_input_to_remove_paddingc   s*    !Y%888'-.?.EFFtr   Tuse_symbolic_shape_inferc                 P   t                               d           |                                 sd S |                                 }|sd S |                                 }|                                 }|sd S |                     |          }|sd S |dz   }|dz   }|dz   }|dz   }	|                     ||g||||	g           | j        	                    ||           t                               d           |j
        d         dz   }
|                     |
|g|j
        d         g           | j                            |j
        d         |
           t                               d	|j         d
           |                     ||           t                               d| j         d| j                    | j                            | j                   | j                            | j        | j                   | j        r| j                                         n'| j        s| j        r| j                                         | j                                         |rMt1          | j        j        d          }|                    | j        j        dd          }|r|| j        _        d S d S d S )Nz$start converting to packing model..._no_padding_token_offset_cumulated_seq_len_max_seq_lenz'inserted RemovePadding before Attentionr   _restore_inputz#inserted RestorePadding after last z layerz	replaced z with PackedverboseTF)
auto_mergeguess_output_rank)loggerdebugr9   r,   r%   r5   rR   rH   r   replace_input_of_all_nodesoutputrK   replace_output_of_all_nodesr1   rO   r   remove_nodesr   	add_nodesr   r   r   update_graphclean_shape_inferr   infer_shapes)r   rS   r*   r)   r4   input_to_remove_paddingoutput_without_paddingrL   cumulated_seq_lenmax_seq_lenrestorepadding_inputshape_infer_helperinferred_models                r   convertzPackingAttentionBase.converth   s   ;<<<--// 	F99;; 	F#@@BB">>@@" 	F #'"C"CDX"Y"Y& 	F!8=!H.@36JJ->''$n5#\3DkR	
 	
 	
 	
--.EG]^^^>???  39!<?OO((*>)MPcPjklPmOnooo
../B/I!/LNbccc^;N;V^^^___ 	66|EVWWW]!7]]TE[]]^^^
 4555
T.0LMMM 	&J""$$$$! 	&T%6 	&J##%%%
$$&&&# 	2 ">dj>NXY!Z!Z!Z/<<TZ=MZ^rw<xxN 2#1
   	2 	2
2 2r   T)__name__
__module____qualname__r   strr   r,   r
   r%   r5   boolr9   listrH   rK   rO   rR   ro    r   r   r   r      s       Ri RC R R R RS4Z    ('i$.> ' ' ' '#Y-= # # # #$4 $ $ $ $
Kc 
KT#Y 
KSW 
K 
K 
K 
K
K$s) 
Kd3i 
KTX 
K 
K 
K 
K$c $gj $os $ $ $ $C$J    
72 72 72 72 72 72 72 72 72r   r   c                   D     e Zd Zdef fdZdefdZdededdfdZ xZ	S )	PackingAttentionr   c                 `    t                                          |t          j                   d S r!   )superr   r   r"   r   r   	__class__s     r   r   zPackingAttention.__init__   s&    	 344444r   r   c                    | j         D ]}t          j        |d           dS t          j        |d           dS t          j        |d          }|	|dk    r dS t          |j                  t
          j        k    r|j        t
          j                 s dS t          |j                  t
          j        k    r|j        t
          j                 s dS dS )Npast_present_share_bufferF	do_rotaryunidirectionalr   T)r   r   get_node_attributer&   r'   r   PASTPAST_SEQUENCE_LENGTH)r   r+   unidirection_attrs      r   r9   z*PackingAttention._are_attentions_supported   s    ( 	 	D+D2MNNZuu+D+>>Juu ) <TCS T T ,1Ba1G1Guu4:!2!777
K\Ka@b7uuDJ"3"HHH
#4#IJ I uutr   rL   rM   Nc           
      R   | j         D ]p}t          |j                  t          j        k    r|j        t          j                 nd}t          j        t          j        |j        t          j	                 |j        t          j
                 |j        t          j                 |||g|j        t          j                 g| j                            t          j                            }g }|j        D ] }|j        dv r|                    |           !|j                            |           d|_        | j                            |           | j                            |           | j        | j        |j        <   rt4                              dt          | j                              d S )N r?   )	num_headsqkv_hidden_sizesscaler@   z0Converted %d Attention nodes to PackedAttention.)r   r&   r'   r   ATTENTION_BIASr   rA   r   PACKEDATTENTIONrQ   WEIGHTSBIASra   r   OUTPUTr   rC   	attributer   rE   extendrD   r   r   r   r   r^   info)r   rL   rM   	attentionattention_biaspacked_attention
attributesattrs           r   rO   z:PackingAttention._replace_attention_with_packing_attention   s   - 	W 	WI y''*;*JJJ  1 @AA 
  &/)O$5$;<O$5$=>O$5$:; ." #)*<*CDEZ001JKK      J!+ , ,9 JJJ%%d+++&--j999&5#$$%5666 ''	222BFBVD()9)>??FDL`HaHabbbbbr   )
rq   rr   rs   r   r   ru   r9   rt   rO   __classcell__r}   s   @r   ry   ry      s        5i 5 5 5 5 5 54    $ cc  cgj  cos  c  c  c  c  c  c  c  cr   ry   c                   v     e Zd Zdef fdZdedefdZdedefdZde	fdZ
d	ed
eddfdZdedz  fdZ xZS )PackingMultiHeadAttentionr   c                 `    t                                          |t          j                   d S r!   )r{   r   r   MULTI_HEAD_ATTENTIONr|   s     r   r   z"PackingMultiHeadAttention.__init__   s&    	 >?????r   indexr   c                     t          |j                  |k    rCt          |j        |                   dk    r%t                              d| d| d|            dS dS )'Check a node does not have given input.r   znode input  (0) is not supported in PackedMultiHeadAttention: FT)r&   r'   r^   errorr   r+   r   r   s       r   _check_empty_inputz,PackingMultiHeadAttention._check_empty_input   se    tz??U""4:e$%%))p5ppDppjnppqqqutr   c                     t          |j                  |k    rCt          |j        |                   dk    r%t                              d| d| d|            dS dS )r   r   znode output r   r   FT)r&   ra   r^   r   r   s       r   _check_empty_outputz-PackingMultiHeadAttention._check_empty_output   sg    t{e##4;u%&&**qEqqTqqkoqqrrrutr   r   c                 :   | j         D ]}|j        D ]4}|j        dvr)t                              d|j         d|              dS 5|j        t          j                 r4|j        t          j                 st                              d            dS | 	                    |t          j
        d          rc| 	                    |t          j        d          rB|                     |t          j        d          r!|                     |t          j        d          s dS dS )	Nr   mask_filter_valuer   znode attribute z/ is not supported in PackedMultiHeadAttention: Fz=packed kv format is not supported in PackedMultiHeadAttentionpast_keypresent_keyT)r   r   r   r^   r   r'   r   KEYVALUEr   PAST_KEY
PAST_VALUEr   r   PRESENT_KEYPRESENT_VALUE)r   r+   r   s      r   r9   z3PackingMultiHeadAttention._are_attentions_supported   s@   ( 	 	D ! !9$OOOLL!s49!s!smq!s!sttt 555 P z489 $*MgMmBn \]]]uu ''.H.QS]^^++D2L2WYcdd ,,T3N3Z\ijj ,,T3N3\^kll	 uu tr   rL   rM   Nc                    d}| j         D ]}t          |j                  t          j        k    r|j        t          j                 nd}t          j        t          j        |j        t          j	                 |j        t          j
                 |j        t          j                 |j        t          j                 |||g|j        t          j                 g| j                            t          j                            }g }|j        D ] }|j        dv r|                    |           !|j                            |           d|_        | j                            |           | j                            |           | j        | j        |j        <   |ri| j                            |t          j                  }	|	rB|	j        dk    r7t          |	j                  dk    r|	j                            |           |dz  }t:                              d	t          | j                              t:                              d
|           d S )Nr   r   r?   r   r@   GatedRelativePositionBias      zBConverted %d MultiHeadAttention nodes to PackedMultiHeadAttention.z=Converted %d GatedRelativePositionBias nodes to packing mode.)r   r&   r'   r   r   r   rA   r   PACKED_MULTI_HEAD_ATTENTIONQUERYr   r   r   ra   r   r   r   rC   r   r   rE   r   rD   r   r   r   r   
get_parentr1   r^   r   )
r   rL   rM   gated_relative_pos_bias_countmhar   
packed_mhar   r   rel_pos_bias_nodes
             r   rO   zCPackingMultiHeadAttention._replace_attention_with_packing_attention  s+   ()%' )	7 )	7C sy>>$>$MMM 	4CDD 
  )5I8>?I8<=I8>?I8=> ." $?$FGHZ001VWW  J J , ,9 KKK%%d+++ ''
333 /J$$Z000 '',,,<@<PD(9  7$(J$9$9#?Y?h$i$i!%7)15PPP-34499%+22<@@@1Q61XZ]^b^rZsZstttSUrsssssr   c                 p    | j                             |d          }|r|j        dk    r|j        d         S d S )Nr   MatMul)r   r   r1   r'   )r   r)   matmuls      r   rR   z6PackingMultiHeadAttention._get_input_to_remove_padding4  s?    &&';Q?? 	#fn00<?"tr   )rq   rr   rs   r   r   intrt   r   r   ru   r9   rO   rR   r   r   s   @r   r   r      s        @i @ @ @ @ @ @c     s #    4    *.tc .tgj .tos .t .t .t .t`C$J        r   r   c                   ,    e Zd ZdefdZddeddfdZdS )	PackingModer   c                     || _         d S r!   )r   )r   r   s     r   r   zPackingMode.__init__=  s    


r   TrS   r   Nc                    | j                             t          j                  ri| j                             t          j                  rt
                              d           d S t          | j                   }|                    |          S | j                             t          j                  r)t          | j                   }|                    |          S t
                              d           d S )NzRPacking mode does not support both Attention and MultiHeadAttention in same graph.zPPacking mode requires either Attention or MultiHeadAttention node in onnx graph.)
r   r   r   r"   r   r^   r   ry   ro   r   )r   rS   packings      r   ro   zPackingMode.convert@  s    :**9+>?? 	z..y/MNN qrrrt&tz22G??#;<<<Z,,Y-KLL 	/
;;G??#;<<<LLklll4r   rp   )rq   rr   rs   r   r   ru   ro   rw   r   r   r   r   <  sV        i            r   r   c                     t          j        d          } |                     ddt          d           |                     ddt          d           |                     d	d
dd           |                     d
           |                     dd
dd           |                     d
           |                                 }|S )Nz_Convert to packing mode tool for ONNX Runtime. It converts BERT like model to use packing mode.)descriptionz--inputTzinput onnx model path)requiredtypehelpz--outputzoptimized onnx model pathz	--verboseF
store_truezshow debug information.)r   actionr   rZ   z--use_external_data_formatz4use external data format to store large model (>2GB)use_external_data_format)argparseArgumentParseradd_argumentrt   set_defaults
parse_args)parserargss     r   _parse_argumentsr   O  s    $u  F 	DsAXYYY

TB]^^^
eLOhiii
&&&
$C	     777DKr   c                 d    | rt          j        dd           d S t          j        d           d S )NDEBUGz8[%(filename)s:%(lineno)s - %(funcName)20s()] %(message)s)levelfmtz%(funcName)20s: %(message)s)r   )coloredlogsinstallrZ   s    r   _setup_loggerr   g  sQ     ?J	
 	
 	
 	
 	
 	

 	 =>>>>>>r   c                      t                      } t          | j                   t                              d|             t
          j                            | j                  t
          j                            | j	                  k    rt          
                    d           t          | j                  }t          t          |                    }|                                 |j                            | j	        | j                   d S )Nz
arguments:zYSpecified the same input and output path. Note that this may overwrite the original modelr   )r   r   r[   r^   r_   ospathrealpathr'   ra   warningr	   r   r   ro   r   save_model_to_filer   )r   r   packing_modes      r   mainr   q  s    D$,
LL$d$$%%%	w
##rw'7'7'D'DDDrssstz""Ey//00L))$+PTPm)nnnnnr   __main__)r   loggingr   r   	constantsr   r   r   r   r   onnxr   r	   
onnx_modelr
   r   rm   r   	getLoggerrq   r^   r   ry   r   r   r   r   r   rw   r   r   <module>r      s     				                  $ # # # # # # # + + + + + + + + ; ; ; ; ; ;		8	$	$F2 F2 F2 F2 F2 F2 F2 F2R6c 6c 6c 6c 6c+ 6c 6c 6cr^ ^ ^ ^ ^ 4 ^ ^ ^B       &  0? ? ?o o o  zDFFFFF r   