
    fPil                        d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlmZ d dl	Z	d dl
Z
d dlZd dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZmZmZ  e j        e           Z!g dZ"ej#        dej$        dej%        diZ& G d de          Z' G d de          Z( G d de          Z) G d de          Z* G d de          Z+e*ddfe+ddfe)ddfdZ, G d d          Z- G d  d!          Z.dS )"    N)Path)	Precision)float_to_float16_max_diff)FusionOptions)IOBindingHelper)	OnnxModel)optimize_model)torch_onnx_export)
GPT2ConfigGPT2LMHeadModel	GPT2ModelTFGPT2Model)
distilgpt2gpt2zgpt2-mediumz
gpt2-largezgpt2-xlMb@?g?g      @c                   ,     e Zd ZdZ fdZ fdZ xZS )GPT2ModelNoPastState2Here we wrap a class to disable past state output.c                 J    t                                          |           d S Nsuper__init__selfconfig	__class__s     /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/onnxruntime/transformers/models/gpt2/gpt2_helper.pyr   zGPT2ModelNoPastState.__init__*   !             c                 L    t                                          |dd          S )NF)	use_cachereturn_dict)r   forwardr   	input_idsr   s     r   r$   zGPT2ModelNoPastState.forward-   s    wwyEuMMMr    __name__
__module____qualname____doc__r   r$   __classcell__r   s   @r   r   r   '   s`        <<! ! ! ! !N N N N N N N N Nr    r   c                   ,     e Zd ZdZ fdZ fdZ xZS )TFGPT2ModelNoPastStater   c                 X    d|_         t                                          |           d S )NF)r"   r   r   r   s     r   r   zTFGPT2ModelNoPastState.__init__4   s)          r    c                 J    t                                          |d          S )NF)r"   )r   callr%   s     r   r$   zTFGPT2ModelNoPastState.forward8   s    ww||I|777r    r'   r-   s   @r   r/   r/   1   sW        <<! ! ! ! !8 8 8 8 8 8 8 8 8r    r/   c                   B     e Zd ZdZ fdZed             Z fdZ xZS )MyGPT2ModelzMHere we wrap a class for Onnx model conversion for GPT2Model with past state.c                 J    t                                          |           d S r   r   r   s     r   r   zMyGPT2Model.__init__?   r   r    c           	      
   t          | d         d         t          t          f          rt          | d                   |k    rt          | d         d                   dk    sJ g }t	          |          D ]w}|                    t          j        | d         |         d                             d          | d         |         d                             d          fd                     x| d         t          |          fS | S )N   r      )dim)	
isinstancetuplelistlenrangeappendtorchcat	unsqueeze)result	num_layerpresentis       r   post_processzMyGPT2Model.post_processB   s    fQilUDM22 	/vay>>Y..3vay|3D3D3I3I3IIG9%%   I1a22155vay|A7P7PQR7S7ST      1IuW~~..r    c                     t                                          ||||d          }t                              || j        j                  S NF)position_idsattention_maskpast_key_valuesr#   r   r$   r4   rG   r   n_layerr   r&   rJ   rK   pastrC   r   s         r   r$   zMyGPT2Model.forwardU   sK    %)  ! 
 
 ''0CDDDr    )	r(   r)   r*   r+   r   staticmethodrG   r$   r,   r-   s   @r   r4   r4   <   s{        WW! ! ! ! !   \$E E E E E E E E Er    r4   c                   ,     e Zd ZdZ fdZ fdZ xZS )MyGPT2LMHeadModelzSHere we wrap a class for Onnx model conversion for GPT2LMHeadModel with past state.c                 J    t                                          |           d S r   r   r   s     r   r   zMyGPT2LMHeadModel.__init__c   r   r    c                     t                                          ||||d          }t                              || j        j                  S rI   rM   rO   s         r   r$   zMyGPT2LMHeadModel.forwardf   sK    %)  ! 
 
 ''0CDDDr    r'   r-   s   @r   rS   rS   `   s`        ]]! ! ! ! !	E 	E 	E 	E 	E 	E 	E 	E 	Er    rS   c                   ,     e Zd ZdZ fdZ fdZ xZS )MyGPT2LMHeadModel_NoPaddinga  Here we wrap a class for Onnx model conversion for GPT2LMHeadModel with past state and no padding.
    When you always use batch_size=1 in inference, there is no padding in inputs. In such case, position_ids
    and attention_mask need no be in inputs.
    c                 J    t                                          |           d S r   r   r   s     r   r   z$MyGPT2LMHeadModel_NoPadding.__init__x   r   r    c                     t                                          ||d          }t                              || j        j                  S )NF)rL   r#   rM   )r   r&   rP   rC   r   s       r   r$   z#MyGPT2LMHeadModel_NoPadding.forward{   s9    DeTT''0CDDDr    r'   r-   s   @r   rW   rW   r   sd         
! ! ! ! !E E E E E E E E Er    rW   logitsTF
last_state)r   GPT2LMHeadModel_NoPaddingr   c                   2    e Zd Zd ZdefdZdefdZd ZdS )
Gpt2Inputsc                 >    || _         || _        || _        || _        d S r   )r&   rJ   rK   rP   )r   r&   rJ   rK   rP   s        r   r   zGpt2Inputs.__init__   s$    +4.:WeFJ			r    returnc                     d | j         | j        | j        fD             }| j        r|                    | j                   |S )Nc                     g | ]}||S r    .0vs     r   
<listcomp>z&Gpt2Inputs.to_list.<locals>.<listcomp>   s    kkkA]^]ja]j]j]jr    )r&   rJ   rK   rP   extend)r   
input_lists     r   to_listzGpt2Inputs.to_list   sJ    kk$.$2CTEX!Ykkk
9 	)di(((r    c                 d    t          d | j        | j        | j        | j        fD                       S )Nc              3      K   | ]}||V  	d S r   rc   rd   s     r   	<genexpr>z&Gpt2Inputs.to_tuple.<locals>.<genexpr>   s(      uu1ghgtQgtgtgtgtuur    )r;   r&   rJ   rK   rP   )r   s    r   to_tuplezGpt2Inputs.to_tuple   s4    uu1BDDWY]Yb cuuuuuur    c                     d }| j         F| j         j        t          j        k    r%| j                             t          j                  n| j         }d | j        D             }t          | j        | j	        ||          S )Ndtypec                 N    g | ]"}|                     t          j                   #S )rp   )tor@   float32re   ps     r   rg   z&Gpt2Inputs.to_fp32.<locals>.<listcomp>   s(    ===a5=))===r    )
rK   rq   r@   float16rs   rt   rP   r^   r&   rJ   )r   rK   rP   s      r   to_fp32zGpt2Inputs.to_fp32   s~    * '->> #&&U]&;;;(  >=49===$.$*;^TRRRr    N)	r(   r)   r*   r   r<   rj   r;   rn   rx   rc   r    r   r^   r^      su        K K K    v% v v v vS S S S Sr    r^   c            "       L   e Zd ZdZedddej        ej        ej        dfdededededed	ed
edej        de	de	de	dej
        dej
        dej
        de	def d            Ze	 dCdedededededeeee         f         fd            Zed             ZedDd            ZedDd            ZedEd            ZedFd            Zeddddej        ej        ej        fd ed!e	d"e	de	de	dej
        dej
        dej
        fd#            Ze	 	 	 dGd%            Zeg d&fd'ed(ee         fd)            ZedHd*ed+efd,            ZedHd*ed+efd-            Zed.             ZedId/            Ze	 	 	 dJd*ed0eeej        f         d1eeee         f         d+ed2e	d3e	fd4            Z ed5             Z!ed6             Z"edd7d7d8d9ddddej        ej        ej        d$ddfd:            Z#edd;ddddej        ej        ej        d<d9d=fd>            Z$edKd?            Z%edddg d@fdefdA            Z&dBS )L
Gpt2HelperzEA helper class for Gpt2 model conversion, inference and verification.FT
batch_sizepast_sequence_lengthsequence_lengthnum_attention_headshidden_sizerD   
vocab_sizedevicerw   has_position_idshas_attention_maskinput_ids_dtypeposition_ids_dtypeattention_mask_dtypeleft_side_paddingr`   c                    |rt           j        nt           j        d| ||t          ||z            gfdt	          |          D             }t          j        d|dz
  | |f|          }d}|
rf||z   }t          j        | |g|          }|dk    rBt	          |           D ]2}t          j        d|dz
            }|r
d||d|f<   &d||||z
  df<   3d}|	re|                                	                    d          dz
  }|
                    |dk     d           |dd|df                             |          }t          ||||          S )	zCreate random inputs for GPT2 model.
        Returns torch tensors of input_ids, position_ids, attention_mask and a list of past state tensors.
        r8   c                 J    g | ]}t          j                   dz  dz
   S )rq   r   g       @      ?)r@   rand)re   _r   
float_type
past_shapes     r   rg   z/Gpt2Helper.get_dummy_inputs.<locals>.<listcomp>   s6    pppZ[JjHHH3NQTTpppr    r   r7   )lowhighsizerq   r   Nr   )r@   rw   rt   intr>   randintonesrandomlongcumsummasked_fill_rs   r^   )r{   r|   r}   r~   r   rD   r   r   rw   r   r   r   r   r   r   rP   r&   rK   total_sequence_lengthrF   padding_lengthrJ   r   r   s          `              @@r   get_dummy_inputszGpt2Helper.get_dummy_inputs   s   * '.@U]]5=
 1122

 qppppp_den_o_opppMao.!
 
 
	  	X$8?$J!"Z23*  N %))z** X XA%+^A7Lq7P%Q%QN( X=>q/>/'9::VWq*?.*P*R*R'RSS  	Y)..0077;;a?L%%lQ&6:::'+?+@+@(@ADDEWXXL)\>4HHHr    r   r   model_classc                    |j         }|j        }|j        }|j        }t          |         d         }	| ||	dk    r|n|g}
d| |||z   t          ||z            g}|	|
i}t          |          D ]}||dt          |          z   <   |S )zAReturns a dictionary with output name as key, and shape as value.r7   rZ   r8   present_)r~   r   num_hidden_layersr   MODEL_CLASSESr   r>   str)r{   r|   r}   r   r   r~   r   rD   r   output_namelast_state_shapepresent_state_shapeoutput_shapesrF   s                 r   get_output_shapeszGpt2Helper.get_output_shapes   s     %8(,	&
#K03 %11JJ{
  ?21122
 %&67y!! 	E 	EA1DM*s1vv-..r    c                    |D ]|}|| v sJ | |         }t          j        ||                   |                                k    r<t          j        t          j        ||                   |j        |j                  | |<   }d S )Nr   )numpyprodnelementr@   emptyrq   r   )output_buffersr   keybuffers       r   auto_increase_buffer_sizez$Gpt2Helper.auto_increase_buffer_size  s      	 	C.((((#C(Fz-,--0A0AAA&+kJ}S122 ,!=' ' 's#		 	r    c                     |rt           j        nt           j        }i }|                                 D ]1\  }}t          j        t          j        |          ||          ||<   2|S )zpReturns a dictionary of output name as key, and 1D tensor as value. The tensor has enough space for given shape.r   )r@   rw   rt   itemsr   r   r   )r   r   
is_float16	data_typer   nameshapes          r   get_output_bufferszGpt2Helper.get_output_buffers  sk     &0BEMMU]	(..00 	b 	bKD%#(;uz%/@/@	Z`#a#a#aN4  r    c                    | d                                                                          }t          j        ||d         z
            }|r,t          j        |t          j        |          dz   z            S t          j        |          S )zGReturns the maximum difference between PyTorch and OnnxRuntime outputs.r   ư>)cpur   absamax)torch_outputsort_outputsrelativeexpected_outputsdiffs        r   diff_outputszGpt2Helper.diff_outputs%  s}     )+//117799y)KN:;; 	$:dei0@&A&AD&HIJJJ:d###r    MbP?c           	         t          j        |d         | d                                                                          ||          }t                              d|            |}t          |          dz
  }t          |          D ]z}t          j        |d|z            | d         |                                                                          ||          }t                              d| d| d|            |o|}{|s9t                              | |          }	t          	                    d|	d	           |S )
zReturns True if torch and ORT outputs are close for given thresholds, and False otherwise.
        Note: need kwargs since Gpt2BeamSearchHelper.compare_outputs has an extra parameter model_class
        r   )rtolatolz9PyTorch and OnnxRuntime output 0 (last_state) are close: r7   zPyTorch and OnnxRuntime layer z state (present_z) are close:z@PyTorch and OnnxRuntime results are not all close: max_abs_diff=.5f)
r   allcloser   loggerdebugr=   r>   rz   r   info)
r   r   r   r   kwargsis_closeis_all_close
num_layerslayermax_abs_diffs
             r   compare_outputszGpt2Helper.compare_outputs/  sT   
 >+a.-2B2F2F2H2H2N2N2P2PW[bfggg[QY[[\\\%%)
:&& 	5 	5E~AI&a '++--3355	  H LLn%nnQVnndlnnooo'4HLL 	o%22=+NNLKKm[gmmmnnnr    r   c                     d}d}g }g }t          t          |                    D ]}||         }|dk    r| d         n| d         |dz
                                                                           }	t          j        ||	|d          }
|                    t          j        t          j        |	|z
                                 |o|
}t          j        |	          	                                rt                              d| d           t          j        |	          	                                rt                              d| d           t          j        |          	                                rt                              d	| d           t          j        |          	                                rt                              d	| d           t          j        ||	z
            }t          j        |                                |j                  }|                    d
||         dd| d||         ddt#          |	|                   d           |dk    rqt          j        t          j        |d          |j                  }t          j        t          j        |	d          |	j                  }t          j        ||          }|                    t)          |                    }|t)          |          |||fS )a  Compare outputs from PyTorch and OnnxRuntime

        Args:
            torch_outputs (Tuple[Torch.Tensor]): PyTorch model output
            ort_outputs (List[numpy.ndarray]): OnnxRuntime output
            atol (float, optional): Absolute tollerance. Defaults to 1e-06.

        Returns:
            is_all_close(bool): whether all elements are close.
            max_abs_diff(float): maximum absolute difference.
            messages(str): a list of debug message for each output
        TFr   r7   )r   r   zPyTorch output z has nanz has infzORT output zdiff=z.9fz index=z ort=z torch=N)axis)r>   r=   r   r   r   r?   r   r   isnananyr   r   isinffabsunravel_indexargmaxr   floatarray_equalindexmax)r   r   r   r   is_top1_matched	max_diffsmessagesrF   
ort_outputtorch_outputr   r   idxort_max_indextorch_max_indexmax_diff_output_indexs                   r   compare_outputs_v2zGpt2Helper.compare_outputs_v2J  s    	s;''(( 	T 	TA$QJ01QM!,,M!<LQQRU<SXXZZ``bbL~j,TPQRRRHUZ	,2K(L(LMMNNN'4HL{<((,,.. <:q:::;;;{<((,,.. <:q:::;;;{:&&**,, 861666777{:&&**,, 861666777:j<788D%dkkmmTZ@@COOrS	rrrcrr
3rrrTYZfgjZkTlTlrrr   Avv % 3ELRV4W4W4WYcYi j j"'"5el<VZ6[6[6[]i]o"p"p"'"3M?"S"S )I ? ?	NN!
 	
r    onnx_model_pathverboseuse_external_data_formatc
                    | j         }
|
j        }t                              ddd|
j        |
j        ||
j        |d|||||	          }|                                }t          j	                    5   | | }ddd           n# 1 swxY w Y   d t          |          D             }d t          |          D             }|d         j        d         |
j        k    s|d         j        d         |
j        k    sJ |d         j        d         |
j        k    rd	nd
g|}dddd|d         dddi}|D ]
}ddd||<   |D ]
}ddd||<   dg}|rddd|d<   |                    d           |rddd|d<   |                    d           |                    |           t          |          dk    rt          |d                   |k    sJ t                              d|j        j         d|j        d         j         d|d         j         d|d         d         j                    t'          |          j                            dd           |rt-          j                    5 }t0          j                            |d          }t'          |          j                            dd           t7          | t9          |          |d|||ddd|           t;          j        |d          } t?          j         | |dd           ddd           dS # 1 swxY w Y   dS t7          | t9          |          |d|||ddd|           dS )z1Export GPT-2 model with past state to ONNX model.r7   F)r{   r|   r}   r~   r   rD   r   r   rw   r   r   r   r   r   Nc                     g | ]}d | S )past_rc   re   rF   s     r   rg   z*Gpt2Helper.export_onnx.<locals>.<listcomp>  s    <<<akakk<<<r    c                     g | ]}d | S )r   rc   r   s     r   rg   z*Gpt2Helper.export_onnx.<locals>.<listcomp>  s    BBBAABBBr    r   r8   rZ   r[   r&   r{   seq_len)r   r7   past_seq_len)r7      total_seq_lenrJ   rK   zShapes: input_ids=z past=z output=z	 present=T)parentsexist_okz	gpt2.onnx   )
argsfexport_paramsinput_namesoutput_namesdynamic_axesopset_versiondo_constant_foldingr   r   )load_external_data)save_as_external_dataall_tensors_to_one_file)!r   rN   rz   r   r~   r   r   rj   r@   no_gradr>   r   r?   rh   r=   r   r   r&   rP   r   parentmkdirtempfileTemporaryDirectoryospathjoinr
   r;   onnx
load_modelr   save)modelr   r   r   r   r   r   r   r   r   r   rD   dummy_inputsri   outputs
past_namespresent_namesr   r   r   r   tmp_dir_nametemp_onnx_model_paths                          r   export_onnxzGpt2Helper.export_onnx  s    #\N	!22!" & :*(-1+1!5 3 
 
  "))++
]__ 	) 	)eZ(G	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) =<5+;+;<<<
BBy1A1ABBB qz"f&77771:;KA;NRXRd;d;d;dd$+AJ$4Q$76;L$L$LR^oano \i88O)<<
  	F 	FD%1n!E!EL! 	G 	GD%1o!F!FL"m 	//;	+J+JL(~... 	11=/-R-RL)*/000:&&&7||q  S__	%A%A%AA Z!7!=  Z  Z\EVWXEYE_  Z  Zipqrisiy  Z  Z  EL  MN  EO  PQ  ER  EX  Z  Z	
 	
 	
 	_$**4$*GGG# (	,.. ,')w||L+'N'N$)**177t7TTT!z***"& +!-!-"$(,-1#    (<QUVVV#*.,0	   '                 4 :&&!"')) $().     s%   -A??BB#BLLLr   c           	          t          d          }	t          | d||d|	d          }
|r5|rt                              |
           nd|vrd|d<    |
j        dddi| |
                    ||           |
S )	zHOptimize ONNX model with an option to convert it to use mixed precision.r   r   F)
model_type	num_headsr   	opt_leveloptimization_optionsuse_gpukeep_io_typesuse_symbolic_shape_inferTrc   )r   r	   rz   auto_mixed_precisionconvert_float_to_float16save_model_to_file)r   optimized_model_pathr   r~   r   r   r  stager   r  ms              r   optimize_onnxzGpt2Helper.optimize_onnx  s      -V44)#!5
 
 
  	T# T//2222"&00.3F?+**SSDSFSSS	13KLLLr    )AddLayerNormalizationSkipLayerNormalizationFastGeluEmbedLayerNormalization
onnx_modelop_block_listc                 \   d |                                  D             }t          |          }|                    |          }t                              d| d|            |                                 j        d         j        }d}|                                 }||v sJ ||         }d}	|j	        dk    r|}	t                              d|j                    d}
|j
        D ]}|                     |          }
|
 nt          |
          }t                              d	|j         d
|            |dk     }n*t                              d|j	         d|j                    g }g }|s|	|g}|	j        g}||||d}t                              d|             | j        dddi| |S )a?  Convert GPT-2 model to mixed precision.
           It detects whether original model has fp16 weights, and set parameters for float16 conversion automatically.
        Args:
            onnx_model (OnnxModel): optimized ONNX model
            op_block_list (List[str], optional): operators to compute in fp32. Defaults to ["Add", "LayerNormalization",
                                                 "SkipLayerNormalization", "FastGelu", "EmbedLayerNormalization"]
        Returns:
            parameters(dict): a dictionary of parameters used in float16 conversion
        c                     h | ]	}|j         
S rc   )op_type)re   nodes     r   	<setcomp>z2Gpt2Helper.auto_mixed_precision.<locals>.<setcomp>0  s    CCCt|CCCr    z	fp32 op: z
 fp16 op: r   FNMatMulz#Found last MatMul node for logits: z3max diff of converting weights in last MatMul node : r   z-Failed to find MatMul node for logits. Found z	 of node )r  r,  node_block_listforce_fp16_initializersz!auto_mixed_precision parameters: r  Trc   )nodesset
differencer   r   graphoutputr   output_name_to_noder/  inputget_initializerr   r   warningr   )r+  r,  op_full_setfp32_op_setfp16_op_setlogits_output_nameis_weight_fp16_precisionr;  r0  last_matmul_nodeinitializerr<  max_diffr  r4  
parameterss                   r   r  zGpt2Helper.auto_mixed_precision  s   ( DC
0@0@0B0BCCC-((!,,[99DDD{DDEEE (--//6q9> $) (<<>>!%88888"#56<8###KKIdiIIJJJK  (88??*E +
 1==HLLftyff\dffggg'/$$$NNm4<mmbfbkmmnnn( 	6/?/K/0M/45O +*.'?	
 

 	D
DDEEE+
+XXTXZXXXr    inputs
total_runsc                    t                               d           |                                                                }t	          j                    5   | | }ddd           n# 1 swxY w Y   |dk    r|S g }t	          j                    5  t          |          D ]C}t          j                    } | | }|                    t          j                    |z
             D	 ddd           n# 1 swxY w Y   t          |          dz  t          |          z  }t                               d                    t          |d                               ||fS )zfRun inference of PyTorch model, and returns average latency in ms when total_runs > 0 besides outputs.zstart pytorch_inferenceNr     zPyTorch inference time = {} ms.2f)r   r   rx   rj   r@   r  r>   timer?   sumr=   format)	r  rH  rI  ri   r  latencyr   startaverage_latencys	            r   pytorch_inferencezGpt2Helper.pytorch_inferenceb  s    	./// ^^%%--//
]__ 	) 	)eZ(G	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) ??N]__ 	4 	4:&& 4 4	%,ty{{U233334	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 g,,-G<5<<VOUZ=[=[\\]]]''s%   A&&A*-A*AC//C36C3c                 X   t                               d           dt          j        |j                                                                                  i}|j        Xt          |j                  D ]C\  }}t          j        |                                                                          |d| <   D|j        @t          j        |j                                                                                  |d<   |j	        @t          j        |j	                                                                                  |d<   | 
                    d|          }|dk    r|S g }t          |          D ]T}t          j                    }	| 
                    d|          }|                    t          j                    |	z
             Ut          |          dz  t          |          z  }
t                               d	                    t!          |
d
                               ||
fS )zcRun inference of ONNX model, and returns average latency in ms when total_runs > 0 besides outputs.zstart onnxruntime_inferencer&   Nr   rK   rJ   r   rK  z"OnnxRuntime Inference time = {} msrL  )r   r   r   ascontiguousarrayr&   r   rP   	enumeraterK   rJ   runr>   rM  r?   rN  r=   rO  )ort_sessionrH  rI  
ort_inputsrF   past_ir   rP  r   rQ  rR  s              r   onnxruntime_inferencez Gpt2Helper.onnxruntime_inference|  s    	2333!5#:6;K;O;O;Q;Q;W;W;Y;Y#Z#Z[
;"&v{33 X X	6*/*A&**,,BTBTBVBV*W*W
;1;;'' ,+0+B6CXC\C\C^C^CdCdCfCf+g+gJ'(*).)@ATAXAXAZAZA`A`AbAb)c)cJ~&!oodJ77??z"" 	0 	0AIKKE%//$
;;KNN49;;.////g,,-G<9@@Y^A_A_``aaaO++r    c           	      6    t          j        | ||||||          S )z)Returnas IO binding object for a session.)r   prepare_io_binding)rX  r&   rJ   rK   rP   r   r   s          r   r]  zGpt2Helper.prepare_io_binding  s.     1
 
 	
r    c                 0    t          j        | |||          S )z3Copy results to cpu. Returns a list of numpy array.)r   "get_outputs_from_io_binding_buffer)rX  r   r   return_numpys       r   r_  z-Gpt2Helper.get_outputs_from_io_binding_buffer  s"     A
 
 	
r    r   r   r`  include_copy_output_latencyc           	         t                               d           t                              | |j        |j        |j        |j        ||          }|                     |           t          	                    | |||          }|dk    r|S g }	t          |          D ]r}
t          j                    }|                     |           |rt          	                    | |||          }
|	                    t          j                    |z
             st          |	          dz  t          |	          z  }t                               d|           ||fS )zUInference with IO binding. Returns outputs, and optional latency when total_runs > 0.z*start onnxruntime_inference_with_binded_ior   rK  z4OnnxRuntime with IO binding inference time = %.2f ms)r   r   rz   r]  r&   rJ   rK   rP   run_with_iobindingr_  r>   rM  r?   rN  r=   )rX  rH  r   r   rI  r`  ra  
io_bindingr   rP  r   rQ  rR  s                r   $onnxruntime_inference_with_binded_ioz/Gpt2Helper.onnxruntime_inference_with_binded_io  sS    	ABBB  22!K
 

 	&&z222 !CC
 
 ??z"" 	0 	0AIKKE**:666* AA  NN49;;.////g,,-G<K_]]]O++r    c                    t          d|  dd          5 }t          j        ||           d d d            n# 1 swxY w Y   t                              d|  d           t          d|  dd          5 }t          j        ||           d d d            n# 1 swxY w Y   t                              d|  d           d S )Nort_outputs_.picklewbz$ORT output are saved to ort_outputs_torch_outputs_z(Torch output are saved to torch_outputs_openpickledumpr   r   )rF   r   r   r   s       r   save_outputszGpt2Helper.save_outputs  sD   ++++T22 	(aKQ'''	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	(E1EEEFFF-1---t44 	*Kq)))	* 	* 	* 	* 	* 	* 	* 	* 	* 	* 	* 	* 	* 	* 	*IqIIIJJJJJs   7;;4BBBc                     t          d|  dd          5 }t          j        ||           d d d            n# 1 swxY w Y   t                              d|  d           d S )Ndummy_inputs_rh  ri  z!inputs are saved to dummy_inputs_rk  )rF   r  r   r   r   s        r   save_inputszGpt2Helper.save_inputs  s    ,!,,,d33 	)qKa(((	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	)BBBBCCCCCs   7;;r   i'  r7   c                   , |j         }t                              d| d d| d| d|	 d| d           d}d	}d
}d}|r:t                              |||||	          }t                              |||          }d}d}g ,dg|z  }|z  }t          |          D ]p}t          |z            }t          j	        d|          }|dk    rdnt          j	        d|          }t          j	        d|          } t          
                    d|  d| d           t                              | |||j        |j        |j        |j        |||
||||d          }!t                              ||!          }"|rt                              | |!          }#n;t                              | ||||	          }$t                              | |!||$          }#t                              |"|#|          \  }%}&}'}(})t)          j        |&          s,                    |&           |%r|dz  }|)r|dz  }||xx         dz  cc<   |r|%s~t                              d| d|  d| d| d|& 
           t/          |(          D ]E\  }}*t                              d| d|                                 |         j         d|*            F|rTt)          j        |&          s	|&d|z  k    r7t                              ||!           t                              ||#|"           r,r,fddD             }+nd dD             }+|dz  |z  |+d <   fd!|D             |+d"<   |dz  |z  |+d#<   |t9          ,          z
  dz  |z  |+d$<   t                              d%| d&| d'|t9          ,          z
   d(|            |d)|z  k    r2t                              d*t          |dz  |z            d+d,           |+S )-zKGenerate random inputs and compare the results of PyTorch and Onnx Runtime.zRunning parity test (atol=z, test_cases=z, runs=z, use_io_binding=z, model_class=z, is_float16=z) ...      r8   Nr   r7   z#Running parity test for batch_size=z past_sequence_length=z...T)r   r   r   r   )r   z
test_case=z batch_size=z sequence_length=z	 MaxDiff=	z: Name=z, d   c                 F    i | ]}d | t          j        |          dS )max_diff_percentile_r   )r   
percentile)re   rv   max_abs_diff_lists     r   
<dictcomp>z*Gpt2Helper.test_parity.<locals>.<dictcomp>o  sF       `a*q**u/?@QST/U/U,[,[  r    )2   Z   _   c   c                     i | ]}d | d	S )ry  nanrc   ru   s     r   r|  z*Gpt2Helper.test_parity.<locals>.<dictcomp>s  s#    RRRA0Q00%RRRr    r   top1_match_ratec                      g | ]
}|d z  z  S )r   rc   )re   xtest_cases_per_runs     r   rg   z*Gpt2Helper.test_parity.<locals>.<listcomp>v  s#    ,n,n,naQW7I-I,n,n,nr    top1_match_rate_per_rundiff_pass_ratenan_ratezParity Test Cases=z	; Passed=z; Nan=z; Top1_Matched=gffffff?zParity is good: passed rate=z.0f%)r   r   r   rz   r   r   r>   r   r   r   r   r   r~   r   rN   r   rS  r[  re  r   r   r   r?   rV  get_outputsr   rr  ro  r=   )-rX  r  r   r   r   r   r  rI  use_io_bindingr   r   r   r   r   r   r#  r   enable_pickle_outputr   max_batch_sizemax_past_seq_lenmax_seq_lenr   max_output_shapespassed_test_casestop1_matched_casestop1_matched_cases_per_runtotal_test_casesrF   run_idr}   r|   r{   r  r  r   r   r   r   r   r   r   messagerC   r{  s-         `                                     @r   test_parityzGpt2Helper.test_parity  s   . #\ ~  ~  ~<N  ~  ~Wa  ~  ~  uC  ~  ~  S^  ~  ~  mw  ~  ~  ~	
 	
 	
  	b * < < 0+v{! ! (::;LfV`aaN&'S:%5"-
:'(( C	A C	AA//00F$nQ<<O).!11&.L\:]:] >::JLLqjqqXlqqq   &66$*"! " /#5%9"& 7  L" !225,GGG (>>{LYY * < <(#! ! )MM~}  --g{-NN%;|,, 7!((666 '!Q&! 8"a'"*6222a7222 _| _ b  b  b
  b  bRf  b  b  zI  b  b  T`  b  b   #,H"5"5 _ _JAwKK ]Q ] ]{/F/F/H/H/K/P ] ]T[ ] ]^^^^ $ A\)B)B AlUX[_U_F_F_&&q,777'';@@@ 	S   eu  FF SRAQRRRF$6$<?O$O !,n,n,n,nSm,n,n,n()#4s#:=M#M .5F1G1GG3NQaaz d!1  d  d<M  d  dUehkl}h~h~U~  d  d  Pb  d  d	
 	
 	
 t&6666KKms;Ls;RUe;e7f7fmmmmnnnr    rw  rt      c                    |j         }d}|r:t                              |||||          }t                              |||          }t                              ||||j        |j        |j        |j        |||||	|
|          }|r t          	                    | ||          \  }}n!t          
                    | ||||          \  }}|S )zCGenerate random inputs and measure average latency of Onnx Runtime.N)r   r   r   )r   rz   r   r   r   r~   r   rN   r   r[  re  )rX  r  r   r   rI  r  r   r   r   r   r   r   r{   r}   r|   r   r   r   r  r   rP  s                        r   test_performancezGpt2Helper.test_performance  s    ( #\ 	^&880/6; M (::=&R\]]N!22 &N+1!5 3 
 
"  	#99+|U_``JAww#HH\>=* JAw r    c                     t                               ddd|j        |j        |j        |j        |d||                                          }t          j        	                    | |          S )zJIT trace for TorchScript.r7   F)r{   r|   r}   r~   r   rD   r   r   rw   r   r   )
rz   r   r~   r   rN   r   rj   r@   jittrace)r  r   r   r   r   ri   s         r   torchscriptzGpt2Helper.torchscript  sp      00!" & :*n(-1 1 
 
 ')) 	 yuj111r    rawfp32fp16int8c           
         |}t           j                            |          rt          |          j        d         }n|                    d          d          |dk    r|d|z   z  }|r|dz  }|rdddd	d
}d
D ]}t           j                            | |||         z             }	t           j                            |	          r||v ro	 t          j	        |	           t                              d|	            # t          $ r/}
t                              d|	 d|
j                    Y d}
~
d}
~
ww xY wt                              d| d|	            t           j                            t           j                            | |          |dz             t           j                            t           j                            | |dz             |dz             t           j                            t           j                            | |dz             |dz             t           j                            t           j                            | |d	z             |dz             d
S t           j                            | |dz             t           j                            | |dz             t           j                            | |dz             t           j                            | |dz             d
S )z=Build a  path name for given model based on given attributes.r   /r   r   _past _fp32_fp16_int8r  zRemoved the existed directory: zFailed to remove the directory r3  NzDirectory for z
 existed: z.onnxz
_fp32.onnxz
_fp16.onnxz
_int8.onnx)r	  r
  isdirr   partssplitr  existsshutilrmtreer   r   OSErrorstrerror)
output_dirmodel_name_or_pathr   has_past
new_folderremove_existing
model_namesuffixr  new_dires              r   get_onnx_pathszGpt2Helper.get_onnx_paths  s    (
7==+,, 	&0117;JJS!!"%%+++#++J 	"'!J 	'7SSF= 
V 
V
',,z:z@R3RSS7>>'** V!_44c"M'222"KK(S'(S(STTTT& c c c"KK(a'(a(aUVU_(a(abbbbbbbbc $TZ$T$T7$T$TUUU w||BGLLZ$H$H*W^J^__GLLZ'-ABB-  GLLZ'-ABB-  GLLZ'-ABB-   " 7<<
J,@AAGLLZ,-FGGGLLZ,-FGGGLLZ,-FGG	
 
 	
s   1C::
D3%D..D3N)r   )F)r   r   )r   )FFr   )r   )T)r   TF)TT)'r(   r)   r*   r+   rQ   r@   int32r   r   boolrq   r^   r   r   r   dictr<   r   r   r   r   r   r   r  r%  r   r  rS  r[  r]  r_  Tensorre  ro  rr  r  r  r  r  rc   r    r   rz   rz      s       OO !%#'',{*/+,1K"&>I >I>I!>I >I !	>I
 >I >I >I >I >I >I !>I >I "K>I $k>I  >I  
!>I >I >I \>I@  -    !    	 
   
c49n	      \ D 	 	 \	    \ $ $ $ \$    \4 3
 3
 3
 \3
j 
 ).!%#'',{*/+,1Ku u u 	u
 #'u u !u u "Ku $ku u u \un  "'"! ! ! \!F $
 $
 $
C CCCyC C C \CJ ( ( ( ( ( ( \(2 , ,: ,3 , , , \,> 
 
 \
( 
 
 
 \
  !,10, 0,0, S%,./0, CcN+	0,
 0, 0, &*0, 0, 0, \0,d K K \K D D \D
 
  % ;"["%E E E \EN 
 % ;"[4 4 4 \4l 2 2 2 \2"  -777:
 :
 :
 :
 :
 \:
 :
 :
r    rz   )/loggingr	  rm  r   r  r  rM  pathlibr   r   r  r@   benchmark_helperr   rw   r   fusion_optionsr   io_binding_helperr   r+  r   	optimizerr	   torch_onnx_export_helperr
   transformersr   r   r   r   	getLoggerr(   r   PRETRAINED_GPT2_MODELSFLOAT32FLOAT16INT8DEFAULT_TOLERANCEr   r/   r4   rS   rW   r   r^   rz   rc   r    r   <module>r     s    				               & & & & & & - - - - - - ( ( ( ( ( ( - - - - - -             $ $ $ $ $ $ 6 6 6 6 6 6 L L L L L L L L L L L L		8	$	$WWW  vsNC N N N N N9 N N N8 8 8 8 8[ 8 8 8!E !E !E !E !E) !E !E !EHE E E E E E E E$E E E E E/ E E E" *8T:"=x!O|T2 S S S S S S S S>_
 _
 _
 _
 _
 _
 _
 _
 _
 _
r    