
    fPi3                         d Z ddlZddlZddlZddlZddlmZ ddlmZ ddl	Z	ddl
Z	ddlmZmZ  ej        d          Z G d d	          ZdS )
zClass for ONNX model.    N)deque)Path   )MAXIMUM_PROTOBUFfind_by_nameneural_compressorc                      e Zd ZdZd Zd Zed             Zed             Zej	        d             Zd Z
ed             Zej	        d	             Zed
             Zed             Zej	        d             Zd Zd Zd Zed             Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Z d  Z!d! Z"d" Z#dOd$Z$ed%             Z%d& Z&ed'             Z'd( Z(d) Z)dPd+Z*dPd,Z+dPd-Z,d. Z-d/ Z.d0 Z/dOd1Z0e1d2             Z2dQd3Z3e1d4             Z4dQd5Z5d6 Z6dOd7Z7dPd8Z8d9 Z9dOd:Z:d; Z;d< Z<d= Z=d> Z>dPd?Z?	 	 	 	 dRd@Z@	 	 dQdAZAdB ZBdC ZC	 dSdEZDdF ZEdG ZFdH ZGdI ZHdPdJZIdTdLZJdM ZKdN ZLd*S )U	ONNXModelzBuild ONNX model.c                    t          |t                    s|nt          j        |d          | _        t          |t                    sdn|| _        |                                  | j        r7| j        0|                    dd          st          
                    d           | j        rrt          |t                    r]|                    dd          rGt          j                            | j        t          j                            | j                             d| _        t          |t                    rt          j                            t%          |          j                            d                                                    rDd	d
lm} |                    t%          |          j                                                  | _        i | _        i | _        i | _        |                     | j        j        j                   |                     | j        j        j                   i | _         | !                                 d| _"        dS )a;  Initialize an ONNX model.

        Args:
            model (str or ModelProto): path to onnx model or loaded ModelProto model object.
            ignore_warning (bool): ignore large model warning. Default is False.
            load_external_data (bool): load external data for large model. Default is True.
        F)load_external_dataNignore_warningzPModel size > 2GB. Please use model path instead of onnx model object to quantizer   Tconfig.jsonr   )
AutoConfig)#
isinstancestronnxload_model_model_pathcheck_is_large_model_is_large_modelgetloggerwarningexternal_data_helperload_external_data_for_modelospathdirname_configexistsr   parentjoinpathas_posixtransformersr   from_pretrainednode_name_counter_output_name_to_node_input_name_to_nodes_get_input_name_to_nodesgraphnode_get_output_name_to_node_graph_info_get_graph_info	_q_config)selfmodelkwargsr   s       /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/onnxruntime/quantization/neural_compressor/onnx_model.py__init__zONNXModel.__init__)   s    $.eS#9#9ieetych?i?i?i'1%'='=H445!!### 	oD$4$<VZZP`bgEhEh$<NNmnnn 	sJuc$:$: 	svzzJ^`d?e?e 	s%BB4;PRPWP_P_`d`pPqPqrrreS!! 	UbgnnT%[[5G5P5PQ^5_5_5h5h5j5j&k&k 	U//////%55d5kk6H6Q6Q6S6STTDL!#$&!$&!%%dk&7&<===%%dk&7&<===    c                    d}| j         j        j        D ]}|                    d          r$|j        t
          j        j        k    r
d| _         dS 	 |	                                }|t          j        |          z  }n3# t          $ r&}dt          |          v rd| _        Y d}~ dS |d}~ww xY w|t          k    r
d| _         dS d| _        dS )zCheck model > 2GB.r   data_locationTNz$exceeds maximum protobuf size of 2GBF)r   r+   initializerHasFieldr8   r   TensorProtoEXTERNALr   SerializeToStringsys	getsizeof	Exceptionr   r   )r1   	init_sizeinit
init_byteses        r4   r   zONNXModel.check_is_large_modelJ   s    	K%1 	 	D}}_-- $2DHXHa2a2a'+$!3355
S]:666		   9SVVCC+/D(FFFFFFG +++'+$ ,  %s   +A<<
B,B'%B''B,c                     | j         S )z!Check the onnx model is over 2GB.)r   r1   s    r4   is_large_modelzONNXModel.is_large_modela   s     ##r6   c                     | j         S )zReturn model path.r   rF   s    r4   
model_pathzONNXModel.model_pathf        r6   c                     || _         dS )zSet model path.NrI   )r1   r   s     r4   rJ   zONNXModel.model_pathk   s      r6   c                     dS )zReturn framework.onnxruntime rF   s    r4   	frameworkzONNXModel.frameworkp   s    }r6   c                     | j         S )zReturn q_config.r0   rF   s    r4   q_configzONNXModel.q_configt   s     ~r6   c                     || _         dS )zSet q_config.NrR   )r1   rS   s     r4   rS   zONNXModel.q_configy   s     "r6   c                     | j         S )z8Return huggingface config if model is Transformer-based.)r    rF   s    r4   	hf_configzONNXModel.hf_config~   s     |r6   c                     | j         S )zReturn model itself.)r   rF   s    r4   r2   zONNXModel.model   s     {r6   c                     || _         i | _        |                                  i | _        i | _        |                     | j         j        j                   |                     | j         j        j                   dS )zSet model itself.N)	r   r.   r/   r(   r)   r*   r+   r,   r-   )r1   r2   s     r4   r2   zONNXModel.model   ss     $&!$&!%%dk&7&<===%%dk&7&<=====r6   c                 8    d | j         j        j        D             S )zReturn input of model.c                     g | ]	}|j         
S rO   name.0is     r4   
<listcomp>z#ONNXModel.input.<locals>.<listcomp>   s    8881888r6   )r   r+   inputrF   s    r4   ra   zONNXModel.input   s    88 1 78888r6   c                 8    d | j         j        j        D             S )zReturn output of model.c                     g | ]	}|j         
S rO   r[   r]   s     r4   r`   z$ONNXModel.output.<locals>.<listcomp>   s    9991999r6   )r   r+   outputrF   s    r4   rd   zONNXModel.output   s    99 1 89999r6   c                     i | _         |                                  i | _        i | _        |                     | j        j        j                   |                     | j        j        j                   dS )zUpdate model info.N)	r.   r/   r(   r)   r*   r   r+   r,   r-   rF   s    r4   updatezONNXModel.update   sj    $&!$&!%%dk&7&<===%%dk&7&<=====r6   c                     | j         S )zEReturn ORT Graph Info object holding information about backend graph.)r.   rF   s    r4   
graph_infozONNXModel.graph_info   rK   r6   c                 z    | j         j        j        D ](}| j                            |j        |j        i           )dS )zUpdate graph info.N)r   r+   r,   rh   rf   r\   op_typer1   r,   s     r4   r/   zONNXModel._get_graph_info   sE    K%* 	> 	>DO""DIt|#<====	> 	>r6   c           	      t   t           j                            |          d         dk    rQt           j                            t           j                            |          d                   st	          d          | j        rt          j                            | j	        t           j                            | j
                  d                    t          j        | j	        |dd|                    d          d         dz   dd	
           nt          j        | j	        |           | j        t          | j        d          sdn| j        j        }|| j        j        _        t#          |          j                            d                                          }| j                            |d	           dS dS )zSave ONNX model.r    z!"root" directory does not exists.T/_data   Fsave_as_external_dataall_tensors_to_one_filelocationsize_thresholdconvert_attributeN
model_typer   )use_diff)r   r   splitr!   
ValueErrorrG   r   r   r   r   r   
save_modelsaver    hasattrrx   	__class__r   r"   r#   r$   to_json_file)r1   rootrx   output_config_files       r4   r}   zONNXModel.save   s~   7==q!R''rw}}T?R?RST?U0V0V'@AAA 	)%BB4;PRPWP]P]^b^nPoPopqPrsssO&*(,C,w6#"'     Idk4(((<##*4<#F#FcDLLcJ0:DL"-!%d!2!;!;M!J!J!S!S!U!UL%%&85%IIIII	 $#r6   c                 $    | j         j        j        S )zReturn model nodes.)r   r+   r,   rF   s    r4   nodeszONNXModel.nodes   s    { %%r6   c                 $    | j         j        j        S )zReturn model initializer.)r   r+   r9   rF   s    r4   r9   zONNXModel.initializer   s    { ,,r6   c                     | j         j        S )zReturn model graph.)r   r+   rF   s    r4   r+   zONNXModel.graph   s    {  r6   c                     | j         j        S )zReturn model ir_version.)r   
ir_versionrF   s    r4   r   zONNXModel.ir_version   s    {%%r6   c                     | j         j        S )zReturn model opset_import.)r   opset_importrF   s    r4   r   zONNXModel.opset_import   s    {''r6   c                 x    || j         j        j        v r&| j         j        j                            |           dS dS )zRemove a node from model.N)r   r+   r,   removerk   s     r4   remove_nodezONNXModel.remove_node   s?    4;$)))K"))$///// *)r6   c                 :    |D ]}|                      |           dS )zRemove nodes from model.N)r   )r1   nodes_to_remover,   s      r4   remove_nodeszONNXModel.remove_nodes   s2    # 	# 	#DT""""	# 	#r6   c                 P    | j         j        j                            |g           dS )zAdd a node to model.Nr   r+   r,   extendrk   s     r4   add_nodezONNXModel.add_node   s&    %%tf-----r6   c                 N    | j         j        j                            |           dS )zAdd nodes to model.Nr   )r1   nodes_to_adds     r4   	add_nodeszONNXModel.add_nodes   s$    %%l33333r6   c                     t          |j        | j        j        j                  '| j        j        j                            |g           dS dS )zAdd a initializer to model.N)r   r\   r   r+   r9   r   r1   tensors     r4   add_initializerzONNXModel.add_initializer   sI    T[%6%BCCKK)00&::::: LKr6   c                 :    |D ]}|                      |           dS )zAdd initializers to model.N)r   )r1   tensorsr   s      r4   add_initializerszONNXModel.add_initializers   s2     	) 	)F  ((((	) 	)r6   c                 L    | j         j        j        D ]}|j        |k    r|c S dS )zGet an initializer by name.N)r   r+   r9   r\   )r1   r\   r   s      r4   get_initializerzONNXModel.get_initializer   s9    k'3 	 	F{d"" #tr6   c                     d}|                      |          |S |                                 D ]}||j        v r|dz  }|S )z(Get the number of shares of initializer.r   Nr   )r   r   ra   )r1   r\   numr,   s       r4   get_initializer_share_numz#ONNXModel.get_initializer_share_num   sS    %%-JJJLL 	 	Dtz!!q
r6   c                 L    | j         j        j        D ]}|j        |k    r|c S dS )zGet a node by name.N)r   r+   r,   r\   )r1   r\   r,   s      r4   get_nodezONNXModel.get_node  s9    K%* 	 	DyD   !tr6   c                 x    || j         j        j        v r&| j         j        j                            |           dS dS )z!Remove an initializer from model.N)r   r+   r9   r   r   s     r4   remove_initializerzONNXModel.remove_initializer  s?    T[&222K)0088888 32r6   c                 :    |D ]}|                      |           dS )zRemove initializers from model.N)r   )r1   init_to_remover9   s      r4   remove_initializerszONNXModel.remove_initializers  s2    ) 	1 	1K##K0000	1 	1r6   Fc                    |                      |          }|                     |           |j        }|j        }|sFt          j                            ||||                                                                          n5t          j                            ||||	                                |          }| 
                    |           dS )zUpdate initializer.)rawN)r   r   dims	data_typer   helpermake_tensorflattentolisttostringr   )r1   r   arrayr   
old_tensorr   r   
new_tensors           r4   set_initializerzONNXModel.set_initializer  s    ))&11

+++(	 ]DK##FItU]]__=S=S=U=UVVV((D%..BRBRX[(\\ 	
 	Z(((((r6   c                     | j         S )zReturn input names of nodes.)r)   rF   s    r4   input_name_to_nodeszONNXModel.input_name_to_nodes&       ((r6   c                 h   |D ]}d |j         D             }t          |          dk    r$|D ]!}|                     |j        j                   "|j        D ]\}t          |                                          dk    r5|| j        vr|g| j        |<   <| j        |                             |           ]dS )zGet input names of nodes.c                 z    g | ]8}|j         t          j        j        k    s|j         t          j        j        k    6|9S rO   typer   AttributeProtoGRAPHGRAPHSr^   attrs     r4   r`   z6ONNXModel._get_input_name_to_nodes.<locals>.<listcomp>.  G       9 3 999TY$J]Jd=d=d =d=d=dr6   r   N)		attributelenr*   gr,   ra   stripr)   append)r1   r   r,   attrsr   
input_names         r4   r*   z"ONNXModel._get_input_name_to_nodes+  s     	K 	KD  N  E
 5zzA~~! ? ?D11$&+>>>>"j K K
z''))**a//!)BBBAE1*==1*=DDTJJJK	K 	Kr6   c                     | j         S )zReturn output names of nodes.)r(   rF   s    r4   output_name_to_nodezONNXModel.output_name_to_node=  r   r6   c                    |D ]}d |j         D             }t          |          dk    r$|D ]!}|                     |j        j                   "|j        D ]1}t          |                                          dk    r
|| j        |<   2dS )zGet output names of nodes.c                 z    g | ]8}|j         t          j        j        k    s|j         t          j        j        k    6|9S rO   r   r   s     r4   r`   z6ONNXModel._get_output_name_to_node.<locals>.<listcomp>E  r   r6   r   N)r   r   r-   r   r,   rd   r   r(   )r1   r   r,   r   r   output_names         r4   r-   z"ONNXModel._get_output_name_to_nodeB  s     	B 	BD  N  E
 5zzA~~! ? ?D11$&+>>>>#{ B B{((**++q00=AD-k:B	B 	Br6   c                     g }|                      |          D ]?}|                     |          D ]'}|j        |j        k    r|                    |           (@|S )zGet siblings nodes.)get_parentsget_childrenr\   r   )r1   r,   siblingsr"   childs        r4   get_siblingszONNXModel.get_siblingsQ  so    &&t,, 	+ 	+F**622 + +:**OOE***+ r6   Nc                 x    || j         }g }|j        D ]&}||v r ||         D ]}|                    |           '|S )zGet children nodes.)r)   rd   r   )r1   r,   r   childrenrd   r   s         r4   r   zONNXModel.get_childrenZ  sb    &"&";k 	+ 	+F,,,08 + +EOOE****r6   c                 n    || j         }g }|j        D ]!}||v r|                    ||                    "|S )zGet parents nodes.)r(   ra   r   )r1   r,   r   parentsra   s        r4   r   zONNXModel.get_parentsf  sQ    &"&";Z 	; 	;E+++259:::r6   c                 ~    || j         }t          |j                  |k    rdS |j        |         }||vrdS ||         S )zGet parent node by idx.N)r(   r   ra   )r1   r,   idxr   ra   s        r4   
get_parentzONNXModel.get_parentq  sP    &"&";tz??c!!4
3+++4"5))r6   c                 x    t          |j                  }|                    |           t          ||          }|S )zFind out node by name.)listr,   r   r   )r1   	node_namenew_nodes_listr+   graph_nodes_listr,   s         r4   find_node_by_namezONNXModel.find_node_by_name  s;    
++///I'788r6   c                 r    g }|j         D ],}|j        D ]"}||j        k    r|                    |           #-|S )z2Find all nodes with given initializer as an input.)r,   ra   r\   r   )r1   r+   r9   r   r,   
node_inputs         r4   find_nodes_by_initializerz#ONNXModel.find_nodes_by_initializer  sV    J 	' 	'D"j ' '
!111LL&&&' r6   c                 v    |                     d          s t                              d| d           dS  fd j        |         d         }|j        dk    r||j        d         k    s|j        d	k    r||j        d
         k    rdS  |          \  }}|sJ d|             |sJ d|             ||fS )z*Help function to get scale and zero_point.
_quantizedzFind z) in the quantized graph is not quantized.NNc                    
j         |          d         }
j                            | d          }g d}|S|j        |v rJ|j        d                             dd                              dd                              dd          }n|j        dv rJ|j        d                             dd                              dd                              dd          }n>|                     dd                              dd                              dd          }|d	z   }
                    |          }|d
z   }
                    |          }||| 	|j        d                   \  }}||fS )z/Search scale and zero point tensor recursively.r   N)Reshape	TransposeSqueeze	UnsqueezeMaxPoolPadSplitr   rm   _QuantizeLinear_QuantizeInput)Gather_scale_zero_point)r)   r(   r   rj   ra   replacerd   r   )tensor_namer,   r"   direct_int8fp32_tensor_namescalescale_tensorzo	zo_tensor	_searcherr1   s            r4   r   z+ONNXModel.get_scale_zero.<locals>._searcher  s   ,[9!<D.22;EEFeeeK!fn&C&CLOW\2..W.33W-r22	 !  ++KNW\2..W.33W-r22	 !   ''b99AABSUWXX``aqsuvv ! %x/E//66L!M1B,,R00I#y'8%.7iQ.H.H+L)**r6   r   QLinearConvro   QGemmzmissing scale for tensor zmissing zero point for tensor )endswithr   debugr)   rj   ra   )r1   r   r,   r   r   r   s   `    @r4   get_scale_zerozONNXModel.get_scale_zero  s   |,, 	LLRRRRSSS:	+ 	+ 	+ 	+ 	+ 	+B (03 LM))f
2.F.FLG##$*R.(@(@:&/i&7&7#L)EE!EV!E!EEE<GGGvGGGG9**r6   c                     |r<t           j                            | j        dt	          |          j        dz              t          j        | j        |           dS )zBSave model to external data, which is needed for model size > 2GB.Tz.data)rt   ru   N)r   r   convert_model_to_external_datar   r   r\   r|   )r1   output_pathuse_external_data_formats      r4   save_model_to_filezONNXModel.save_model_to_file  sa    # 	%DDTDDUDUDZ]dDd E    	[11111r6   c                     t          |t                    rt          |t                    sJ t          t          | j                            D ]}| j        |         |k    r
|| j        |<   dS )zReplace input of a node.N)r   r   ranger   ra   )r,   old_input_namenew_input_namejs       r4   replace_node_inputzONNXModel.replace_node_input  so     .#..R:nc3R3RRRRs4:'' 	/ 	/Az!}.. .
1	/ 	/r6   c                 $   |g }|g }t          |          dk    r;| j        j        j        D ]'}|j        |v rt
                              |||           (dS | j        j        j        D ]'}|j        |vrt
                              |||           (dS )zReplace inputs of all nodes.Nr   )r   r2   r+   r,   rj   r
   r  )r1   r  r  white_optypeblack_optyper,   s         r4   replace_input_of_all_nodesz$ONNXModel.replace_input_of_all_nodes  s    LL|q  
(- W W<<//00~~VVVW W 
(- W W<|3300~~VVVW Wr6   c                     t          |t                    rt          |t                    sJ t          t          | j                            D ]}| j        |         |k    r
|| j        |<   dS )zReplace output of a node.N)r   r   r  r   rd   )r,   old_output_namenew_output_namer  s       r4   replace_node_outputzONNXModel.replace_node_output  sr     /3//TJPS4T4TTTTs4;''(( 	1 	1A{1~00!0A	1 	1r6   c                 $   |g }|g }t          |          dk    r;| j        j        j        D ]'}|j        |v rt
                              |||           (dS | j        j        j        D ]'}|j        |vrt
                              |||           (dS )zReplace outputs of all nodes.Nr   )r   r2   r+   r,   rj   r
   r  )r1   r  r  r  r  r,   s         r4   replace_output_of_all_nodesz%ONNXModel.replace_output_of_all_nodes  s    LL|q  
(- Z Z<<//11$YYYZ Z 
(- Z Z<|3311$YYYZ Zr6   c                 p   g }|                                  }|D ]}|j        dk    rH|j        d         | j        j        j        vr*|j        d         | j        vr|                    |           V|j        dk    rt          |                     |                    dk    r|                     |          d         j        dk    r|j	        d         | j
        vrl|                     |          d         j        d         | j        vr?|                    |           |                    |                     |                     +d}|j        D ]%}|| j        v s||                                 v rd} n&|j	        D ];}|                     |          || j
        v s|| 	                                v rd} n<|r|                    |           |                     |           g }| j        j        j        D ]}|j        | j        vr|j        | j        j        j        vrm|                    |           |                                 j	        D ]>}	|	j        |j        k    r,|                                 j	                            |	           ?|                     |           |                                  dS )	zRemove unused nodes.Constantr   QuantizeLinearr   DequantizeLinearTFN)r   rj   rd   r   r+   r)   r   r   r   ra   r(   r   r   r   r9   r\   r   r   rf   )
r1   unused_nodesr   r,   unusedrd   ra   ununsed_weightswgraph_inputs
             r4   remove_unused_nodeszONNXModel.remove_unused_nodes  s   

 	. 	.D
**KN$+*;*BBBKN$*CCC##D)))) 000))$//00A55%%d++A.6:LLLJqM)BBB%%d++A.5a8@YYY##D)))##D$5$5d$;$;<<<< "k  F!:::f>U>U!& ?V "Z  E++E22> $";;;u

?T?T!& @U  . ''---,'''". 	? 	?AvT66616IZIa;a;a&&q)))#'::<<#5 ? ?K"'1611

*11+>>>  111r6   c                     |si }i  j         j        j        D ]}|j        D ]M}t	          |                                          dk    r&||vr|g||<   2||                             |           N|j        D ],}t	          |                                          dk    r||<   -n j        } j	        i t                      t                      } j         j        j        D ]"}                    ||j                            # j         j        j        D ]8}t           fd|j        D                       r                    |           9r߉                                }t          fd|j        D                       s||vr|                    |           Q||j        <   |j        D ].}	|	|v r(                    fd||	         D                        /t	                    dk    r;t	          |          dk    r(t          j        |          |                                 d                                 D             }
t	          t'          d |
D                                 t	          t'          d  j         j        j        D                                 k    sJ  j         j                            d            j         j        j                            |
           d	S )
zTopological sort the model.r   c              3   L   K   | ]}|vo|                                 vV  d S N)ra   )r^   r_   r   r1   s     r4   	<genexpr>z-ONNXModel.topological_sort.<locals>.<genexpr>D  s;      [[a1//IATZZ\\4I[[[[[[r6   c              3   >   K   | ]}|v |         j         v V  d S r'  r[   )r^   r_   	all_nodesr   s     r4   r(  z-ONNXModel.topological_sort.<locals>.<genexpr>I  s=      mmATUYlTlTl*1-2i?TlTlTlTlmmr6   c                 .    g | ]}|j         v|v|S rO   r[   )r^   r_   r*  qs     r4   r`   z.ONNXModel.topological_sort.<locals>.<listcomp>Q  s2    lllAQVS\E\E\abjkakakaakakakr6   c                     g | ]
}|d          S )r   rO   r]   s     r4   r`   z.ONNXModel.topological_sort.<locals>.<listcomp>U  s    111!1111r6   c                     h | ]	}|j         
S rO   r[   r^   ns     r4   	<setcomp>z-ONNXModel.topological_sort.<locals>.<setcomp>V  s    ///A///r6   c                     h | ]	}|j         
S rO   r[   r/  s     r4   r1  z-ONNXModel.topological_sort.<locals>.<setcomp>V  s    >e>e>e!qv>e>e>er6   r,   N)r2   r+   r,   ra   r   r   r   rd   r)   r(   r   r   r\   allpopleftcopydeepcopyclearitemsr   
ClearField)r1   enable_subgraphr   r,   r   r   waitinpr0  outr   r*  r   r,  s   `          @@@r4   topological_sortzONNXModel.topological_sort*  sC     	<"$"$
(- 	@ 	@"&* I IJ:++--..!33%-@@@?Cf/
;;/
;BB4HHH#'; @ @K;,,..//144;?+K8@	@ #'";"&";	GGww:#) 	4 	4CHH(23333!& 	 	A[[[[[STSZ[[[[[  			Ammmmmmmmmm D==KKNNN !Iafx n n---HHlllll)<S)Alllmmm1vv{{s4yyA~~M$''

  	 21y001114/////0011S>e>etzO_Od>e>e>e9f9f5g5ggggg
##F+++
$$U+++++r6   c                    |g }t                      }|D ]k}t          |t                    r|                    |           -t          |t          j                  r|                    |j                   bJ d            g }|D ]k}t          |t                    r|                    |           -t          |t          j                  r|                    |j                   bJ d            |r|                                }||v r||vr|                    |           n6t          |t          | j
        j        j                            }|                     |          D ]}|                    |j                   ||S )z4Get nodes chain with given start node and stop node.NFzM'get_nodes_chain' function only support list[string]or list[NodeProto] params)r   r   r   r   r   	NodeProtor\   r4  r   r   r2   r+   r,   r   )	r1   startstopresult_chain
start_noder,   	stop_noder   r"   s	            r4   get_nodes_chainzONNXModel.get_nodes_chainZ  s   LWW
 	n 	nD$$$ n!!$''''D$.11 n!!$),,,,mmmmu 	 	n 	nD$$$ n  &&&&D$.11 n  ++++mmmmu 	/"**,,II%%,,##I....	4
0@0E+F+FGGD**400 / /!!&+....  	/ r6   c                    g }| j         j        j        D ]B}d\  }}|j        dk    r8|}|                     |g dg d          |                     |g dg d          g}|j        dk    r|}|                     |g dg d          |                     |g dg d	          |                     |g d
g d| j        g           |                     |g dg d          |                     |g dg d          |                     |g dg d          |                     |g dg d          g}|st          |          s-|                    |           D|S )z,Find split node for layer wise quantization.r   SkipLayerNormalizationMatMulr   r   r   rJ  Nr   r   r   r   AddrJ  r   r   rJ  r   r   r   r   r   rM  r   Nr   r   r   r   Nr   r   r   r   Gemmr   r   r   rJ  Nr   r   r   r   r   r   return_indicerM  rJ  r   r   r   rJ  r   Nr   r   r   r   r   Nr   r   r   r   )rJ  MulrJ  rY  DivrM  )Nr   Nr   Nr   )rJ  rY  rJ  SimplifiedLayerNormalizationrM  )Nr   Nr   r   )r   r+   r,   rj   match_parent_pathr   anyr   )r1   start_nodesr,   rD  qkv_nodes_lists        r4   +find_split_node_for_layer_wise_quantizationz5ONNXModel.find_split_node_for_layer_wise_quantization  s"    K%* >	+ >	+D)3&J|777!
**"OOO*** 
 **"KKK' " |u$$!
 **"KKK*** 
 **"$W$W$WYkYkYk  **"XXX---,0,D&( +   **"VVV--- 
 **"VVV--- 
 **"HHH333 
 **"ZZZ--- E'"P  ~&& z****r6   c                    g }| j         j        j        D ]}|j        dk    r|                    |j        g           *d\  }}|j        dk    r8|}|                     |g dg d          |                     |g dg d          g}|j        dk    r|}|                     |g dg d	          |                     |g dg d
          |                     |g dg d| j        g           |                     |g dg d          |                     |g dg d          g}|st          |          s d |D             d         }g }|j	        D ]9}|| j        vr||d         j
        d         k    r$|                    |           :t          |          dk    r|d         }	| j        }
|
|	         }d |D             }|                    d          dk    r#|                    d |D                        |s n|S )zFind qkv MatMul in Attention.

        Args:
            find_all (bool, optional): find all qkv MatMul. Defaults to False

        Returns:
            qkv (list): qkv MatMul list
        	Attentionr   rH  rI  rK  rL  rN  rM  rO  rP  rQ  rS  rT  rV  rW  rX  c                     g | ]}||S r'  rO   )r^   qkvs     r4   r`   z3ONNXModel.find_qkv_in_attention.<locals>.<listcomp>  s    JJJ#////r6   ro   r   r   c                     g | ]	}|j         
S rO   )rj   r^   r   s     r4   r`   z3ONNXModel.find_qkv_in_attention.<locals>.<listcomp>  s    BBBemBBBr6   rJ     c                 2    g | ]}|j         d k    |j        S )rJ  )rj   r\   rf  s     r4   r`   z3ONNXModel.find_qkv_in_attention.<locals>.<listcomp>  s'    ZZZ5QY@Y@YEJ@Y@Y@Yr6   )r   r+   r,   rj   r   r\   r\  r   r]  ra   rd   r   r   count)r1   find_allrd  r,   rD  r_  	qkv_nodesother_inputsra   
root_inputr   r   children_typess                r4   find_qkv_in_attentionzONNXModel.find_qkv_in_attention  s    K%* H	 H	D|{**

DI;''')3&J|777!
**"OOO*** 
 **"KKK' " |u$$!
 **"KKK*** 
 **"$W$W$WYkYkYk  **"XXX---,0,D&( +   **"VVV--- 
 **"VVV--- 1"<  ~&& JJJJJ2NIL#) + + 888IaL/222##E****<  A%%%aJ"&":*:6HBBBBBN##H--22

ZZHZZZ[[[ E
r6   c                    g }t          t          |                    D ]}|t          |          dz
  k    r>||dz            }|dz
  dk    r)|                    ||dz
           ||dz
           g           V||         }||z   dz
  t          |          k     r/|                    |||z   dz
           |||z   dz
           g           |S )a  Find MatMul in FFN.

        Args:
            attention_index (list): index of Attention
            attention_matmul_list (list): list of Attention and MatMul nodes
            block_len (int): block length

        Returns:
            list: list of MatMul in FFN
        r      r   )r  r   r   )r1   attention_indexattention_matmul_list	block_len
ffn_matmulr   indexs          r4   find_ffn_matmulzONNXModel.find_ffn_matmul"  s     
_--.. 
	 
	Cc/**Q...'a019>>%%'<UQY'GI^_dgh_hIi&jkkk',9$q(3/D+E+EEE%%.uy/@1/DEG\]ben]nqr]rGst   r6   c                    ddl m} ddlm} t	          ||          r || j        | j                  \  }}}|                     |           |                     |           | 	                    |           | 
                                 |                                  |                                  |                     |           dS t                              d           t!          d           dS )zExport Qlinear to QDQ model.r   )ONNXQlinear2QDQConfig)onnx_qlinear_to_qdqzGUnsupported config for export, only ONNXQlinear2QDQConfig is supported!N)neural_compressor.configry  neural_compressor.utils.exportrz  r   r   r)   r   r   r   rf   r$  r>  r}   r   r   exit)r1   	save_pathconfry  rz  r   r   initss           r4   exportzONNXModel.export;  s    BBBBBBFFFFFFd122 	-@-@dNg-h-h*I|UNN9%%%l+++!!%(((KKMMM$$&&&!!###IIi     NNdeeeGGGGGr6   c                     g }|D ]R}||                                  vr:t          j                                        }||_        |                    |           S| j        j        j                             |           dS )zAdd the tensors to the model outputs to gets their values.

        Args:
            tensor_names: The names of tensors to be dumped.
        N)	rd   r   r   ValueInfoProtor\   r   r   r+   r   )r1   tensor_namesadded_outputsr   added_tensors        r4   add_tensors_to_outputsz ONNXModel.add_tensors_to_outputsM  s}     " 	3 	3FT[[]]**#{99;;$*!$$\222 ''66666r6   c                 0   g }|D ]g}||                                  v rO|                    | j        j        j         |                                                      |                              h|D ]&}| j        j        j                             |           'dS )zRemove the tensors from the model outputs.

        Args:
            tensor_names: The names of tensors to be removed.
        N)rd   r   r   r+   rv  r   )r1   r  removed_outputsr   rd   s        r4   remove_tensors_from_outputsz%ONNXModel.remove_tensors_from_outputs[  s     " 	^ 	^F&&&&t{'8'?@S@STZ@[@['\]]]% 	4 	4FK$++F3333	4 	4r6   c                     |g }t          |j                  D ]&\  }}||v r||         }|j        |k    r
||vr||fc S 'dS )a  Find parent node based on constraints on op_type.

        Args:
            node (str): current node name.
            parent_op_type (str): constraint of parent node op_type.
            output_name_to_node (dict): dictionary with output name as key, and node as value.
            exclude (list): list of nodes that are excluded (not allowed to match as parent).

        Returns:
            parent: The matched parent node. None if not found.
            index: The input index of matched parent node. None if not found.
        Nr   )	enumeratera   rj   )r1   r,   parent_op_typer   excluder_   ra   r"   s           r4   match_first_parentzONNXModel.match_first_parenth  sn     ?G!$*-- 	% 	%HAu+++,U3>^33g8M8M!19$$$zr6   c                 0   |J ||dk    sJ |g }|| j         }|4|                     ||||          \  }}||                    |           |S |t          |j                  k    rdS |                     |||          }||j        |k    r||vr|S dS )a|  Find parent node based on constraints on op_type and index.

        Args:
            node (str): current node name.
            parent_op_type (str): constraint of parent node op_type.
            input_index (int or None): only check the parent given input index of current node.
            output_name_to_node (dict): dictionary with output name as key, and node as value.
            exclude (list): list of nodes that are excluded (not allowed to match as parent).
            return_indice (list): a list to append the input index when input_index is None.

        Returns:
            parent: The matched parent node.
        Nr   )r(   r  r   r   ra   r   rj   )	r1   r,   r  input_indexr   r  rU  r"   rv  s	            r4   match_parentzONNXModel.match_parent~  s    , "kQ&6&6&66?G&"&"; 33D.J]_fggMFE($$U+++M#dj//))4{4GHH&.N"B"BvU\G\G\Mtr6   c           	         t          |          t          |          k    sJ || j        }|}g }t          |          D ]B\  }}	|                     ||	||         |g |          }
|
 dS |                    |
           |
}C|S )a  Find a sequence of input edges based on constraints on parent op_type and index.

        Args:
            node (str): current node name.
            parent_op_types (str): constraint of parent node op_type of each input edge.
            parent_input_index (list): constraint of input index of each input edge.
                                       None means no constraint.
            output_name_to_node (dict): dictionary with output name as key, and node as value.
            return_indice (list): a list to append the input index when there is
                                  no constraint on input index of an edge.

        Returns:
            parents: a list of matched parent node.
        N)r  rU  )r   r(   r  r  r   )r1   r,   parent_op_typesparent_input_indexr   rU  current_nodematched_parentsr_   rj   matched_parents              r4   r\  zONNXModel.match_parent_path  s    , %&&#o*>*>>>>>&"&";#O44 	* 	*JAw!.."1%#+ /  N %tt"">222)LLr6   c                 F    | j         j        j        D ]}d|j        v r dS dS )z~Check the model is smooth quantized or not.

        Returns:
            bool: the model is smooth quantized or not.
        _smooth_scaleTF)r2   r+   r9   r\   )r1   rB   s     r4   is_smoothquant_modelzONNXModel.is_smoothquant_model  s9     J$0 	 	D$)++tt ,ur6   c                 .    |                                  }|S )z-Find split nodes for layer-wise quantization.)r`  )r1   split_nodess     r4   find_split_nodeszONNXModel.find_split_nodes  s    FFHHr6   Tc                 	   t          j                    }|                    | j                   |j                            d           t          j                    }|                    | j                   |j                            d           d}d}| j        j        j        D ]a}	|dk    r |j        j                            |	           n%|dk    r|j        j                            |	           |	j        |k    r	|	j	        }d}bt          |          dk    sJ d| dt          |           d            |d         }
|rp	 dd	lm}  || j        d
t          j                            | j                            | _        n.# t"          $ r!}t$                              d           |d}~ww xY w|                     |
          \  }}t           j                            |
||          }t/          |d
          }t/          |d
          }|                                 |                                 |j        j        j	                            |           |j        j        j                            |           g }g }|j        D ]}||j        v r|                     |          \  }}t           j                            |||          }||j        j        j	        vr|                    |           ||j        j        j        vr|                    |           |D ]&}|j        j        j	                            |           '|D ]&}|j        j        j                            |           '|                                 |                                 |                                 |                                 t          j                            |          }|                    |           t          j                             |d          }||_!        |"                    |           |#                                 t$          $                    d| d           |r|                    |           t          j                             |d          }||_!        |"                    |           |#                                 t$          $                    d| d           ||fS ||fS )a[  Split model into two parts at a given node.

        Args:
            split_node_name (str): name of the node where the model is split at>
            path_of_model_to_split (str): path of model to be split.
            shape_infer (bool): do shape inference. Default is True.
            save_both_split_models (bool): whether to save the two split models.
                False means only save the first split model.
                True means save both the two split models.
                Default id True.

        Returns:
            tuple: the first split model, the second split model
        r,   Nr   rq  zJOnly support split at node with 1 output tensor, while current split node z has z output tensorsr   )infer_shapesT)
auto_mergebase_dirzShape infer fails for layer-wise quantization. We would recommend checking the graph optimization level of your model and setting it to 'DISABLE_ALL' or 'ENABLE_BASIC', as this may help avoid this error.)r   zsplit_model_part_1.onnxzsave split model part 1 to z for layer wise quantizationzsplit_model_part_2.onnxzsave split model part 2 to )%r   
ModelProtoCopyFromr   r+   r9  r,   r   r\   rd   r   'neural_compressor.adaptor.ox_utils.utilr  r   r   r   r   r@   r   error%_get_output_type_shape_by_tensor_namer   make_tensor_value_infor
   _remove_unused_input_outputr2   ra   r   r   remove_unused_initrf    load_model_initializer_by_tensorjoinrJ   _save_split_modelr   r  )r1   split_node_namepath_of_model_to_splitshape_infersave_both_split_modelssplit_model_part_1split_model_part_2split_node_outputpart_idxr,   split_tensor_namer  rD   split_tensor_typesplit_tensor_shapesplit_tensorinsert_output_for_model_1insert_input_for_model_2rd   output_typeoutput_shapeoutput_tensorra   dir_of_model_to_splitsplit_model_part_1_pathsplit_model_part_2_paths                             r4   split_model_with_nodezONNXModel.split_model_with_node  sS   * "_..##DK000 ++F333!_..##DK000 ++F333 K%* 	 	D1}}"(-44T::::Q"(-44T:::yO++$(K!$%%*** WYh  W  Wor  tE  pF  pF  W  W  W +** .a0  	PPPPPP*l4;4RTRYRaRabfbrRsRsttt   9    150Z0Z[l0m0m--{99:KM^`rss&'9$OOO&'9$OOO 	6688866888 &-44\BBB &,33LAAA$&!#% (< 	C 	CF+???,0,V,VW],^,^)\ $ B B6;Xd e e (:(@(F(MMM-44]CCC (:(@(F(LLL,33MBBB 0 	A 	AF$*188@@@@ . 	? 	?E$*077>>>> 	--///--///!!###!!### "0F G G;;<QRRR"$',,/DF_"`"`(?%,,-DEEE//111h3Jhhhiii! 		:??@UVVV&(gll3HJc&d&d#,C)001HIII33555LLl7Nlllmmm%'999%'999s   A F 
F.F))F.c           	          t           j                            |dz             rt          j        |dz              t	          j        | j        |dd|                    d          d         dz   dd           dS )	zSave split model as external data for layer wise quantization.

        Args:
            save_path (str): the path to save the split model
        rp   Trn   ro   rq   Frr   N)r   r   r!   r   r   r|   r   rz   )r1   r~  s     r4   r  zONNXModel._save_split_model`  s     7>>)g-.. 	+Ii')***K"&$(__S))"-7#	
 	
 	
 	
 	
 	
r6   c                     t           j        j        }d}| j        j        j        D ]@}|j        |k    r3|j        j        j	        }d |j        j        j
        j        D             } nA||fS )zGet output type and shape with a tensor name.

        Args:
            tensor_name (str): name of a tensor

        Returns:
            tuple: output type and shape
        Nc                 J    g | ] }|                     d           r|j        nd!S )	dim_valuero   )r:   r  )r^   dims     r4   r`   zCONNXModel._get_output_type_shape_by_tensor_name.<locals>.<listcomp>  s<       KNS\\+%>%>FCMMB  r6   )r   r;   FLOATr   r+   
value_infor\   r   tensor_type	elem_typeshaper  )r1   r   r  r  rd   s        r4   r  z/ONNXModel._get_output_type_shape_by_tensor_namer  s     $*	k'2 	 	F{k))"K3=	 RXR]RiRoRs    * %r6   c                    g }g }| j         j        j        D ]%}|j        | j        vr|                    |           &| j         j        j        D ]%}|j        | j        vr|                    |           &|D ]&}| j         j        j                            |           '|D ]&}| j         j        j                            |           'dS )z-Remove unused input & output for split model.N)	r   r+   rd   r\   r   r   ra   r   r   )r1   remove_outputsremove_inputsrd   ra   s        r4   r  z%ONNXModel._remove_unused_input_output  s    k'. 	. 	.F{$":::%%f---[&, 	, 	,Ez!999$$U+++$ 	4 	4FK$++F3333" 	2 	2EK#**51111	2 	2r6   c                     g }| j         j        j        D ]%}|j        | j        vr|                    |           &|                     |           dS )zRemove unused init.N)r   r+   r9   r\   r   r   r   )r1   remov_initsrB   s      r4   r  zONNXModel.remove_unused_init  s]    K%1 	) 	)Dy 888""4(((  -----r6   c                    |$t           j                            | j                  }| j        j        j        D ]Q}|                    d          r:|j        t          j
        j        k    r t          j                            ||           RdS )zLoad model initializer by tensor.

        Args:
            data_path (str, optional): the directory of saved initializer. Defaults to None.
        Nr8   )r   r   r   r   r   r+   r9   r:   r8   r   r;   r<   r   load_external_data_for_tensor)r1   	data_pathrB   s      r4   r  z*ONNXModel.load_model_initializer_by_tensor  s     (899IK%1 	Y 	YD}}_-- Y$2DHXHa2a2a)GGiXXX	Y 	Yr6   external.datac                 t   |rt           j                            t           j                            t           j                            | j                  |                    rTt          j        t           j                            t           j                            | j                  |                     |                                  t          j	        
                    | j        |           t          j	                            | j        t           j                            | j                             dS )a}  Write external data of merged quantized model to new location to save memory.

        Args:
            external_data_location (str, optional): external data location of merged quantized model.
                                                    Defaults to "external.data".
            overwrite (bool, optional): if True, remove existed externa data. Defaults to False.
        )ru   )filepathN)r   r   r!   r  r   r   r   r  r   r   r  r   write_external_data_tensors)r1   external_data_location	overwrites      r4   #write_external_data_to_new_locationz-ONNXModel.write_external_data_to_new_location  s      	_RW__TEU5V5VXn(o(opp 	_Ibgll27??43C#D#DF\]]^^^--///!@@Wm@nnn!==dkTVT[TcTcdhdtTuTu=vvvvvr6   c                    |                                  |                     t          |                                                     |                     t          |                                                     |                                  |                                j        D ]A}|j	        |                                 vr$| j
        j        j                            |           Bg }| j
        j        j        D ]2}|j	        |                                v r|                    |           3|D ]&}| j
        j        j                            |           '|                                j        D ]j}|j	        |                                 vrM|j	        |                                 vr2|j	        | j        vr$| j
        j        j                            |           kdS )z'Merge two split model into final model.N)r  r   r   r   r   r9   rf   r+   rd   r\   r   r   ra   r   r   )r1   to_merge_modelrd   remove_outputra   s        r4   merge_split_modelszONNXModel.merge_split_models  s   ::<<<tN002233444d>#=#=#?#?@@AAA %**,,3 	8 	8F{$++--//!(//777 k'. 	- 	-F{n224444$$V,,,# 	4 	4FK$++F3333 $))++1 	6 	6E
$**,,..Jdkkmm33Jd&>>>!'..u555	6 	6r6   c                 $   i }g }| j         j        j        D ]!}|||j        <   |                    |           "|D ]&}| j         j        j                            |           '|D ],}| j         j        j                            ||                    -dS )z:Re-org output of merged model for layer-wise quantization.N)r   r+   rd   r\   r   r   )r1   origin_outputoutputs
tmp_removerd   out_names         r4   re_org_outputzONNXModel.re_org_output  s    
k'. 	& 	&F#)GFK f%%%%  	4 	4FK$++F3333% 	? 	?HK$++GH,=>>>>	? 	?r6   )Fr'  r   )NNNN)TT)r  F)M__name__
__module____qualname____doc__r5   r   propertyrG   rJ   setterrP   rS   rV   r2   ra   rd   rf   rh   r/   r}   r   r9   r+   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r*   r   r-   r   r   r   r   r   r   r  r
  staticmethodr  r  r  r  r$  r>  rF  r`  ro  rw  r  r  r  r  r  r\  r  r  r  r  r  r  r  r  r  r  r  rO   r6   r4   r
   r
   &   s         B% % %. $ $ X$     X            X _" " _"   X   X \> > \>9 9 9: : :> > >     X > > >
J J J0& & &- - -! ! !& & &( ( (0 0 0
# # #
. . .4 4 4; ; ;
) ) )
  	 	 	  9 9 9
1 1 1
) ) ) ) ) ) X)K K K$ ) ) X)B B B  
 
 
 
	 	 	 	* * * *    2+ 2+ 2+h2 2 2 2 / / \/W W W W 1 1 \1Z Z Z Z/ / /b., ., ., .,`% % % %NJ J JXS S S Sj  2  $7 7 74 4 4   4  * * * *b !, , , ,\	 	 	   aev: v: v: v:p
 
 
$     (2 2 2". . .
Y 
Y 
Y 
Yw w w w6 6 6:? ? ? ? ?r6   r
   )r  r5  loggingr   r>   collectionsr   pathlibr   r   onnx.external_data_helperutilr   r   	getLoggerr   r
   rO   r6   r4   <module>r     s   &     				 



                      0 0 0 0 0 0 0 0		.	/	/
}? }? }? }? }? }? }? }? }? }?r6   