
    fPi<                     .   d dl Z d dlZd dlZd dlZd dlmZ d dlZd dlZd dlmZm	Z	m
Z
mZmZ d dlmZmZmZmZ d dlmZ d dlmZ d dlmZ d dlmZ  ej        d	          Zdd
Zd Zedk    r' e            Z eej                     ee           dS dS )    N)datetime)	Precisioncreate_onnxruntime_sessionget_ort_environment_variablesprepare_environmentsetup_logger)DEFAULT_TOLERANCEMODEL_CLASSESPRETRAINED_GPT2_MODELS
Gpt2Helper)version)QuantizeHelper)
AutoConfig)__version__ c                 z   t          j                    }|                    dddt          dd                    t
                    z              |                    ddt          d	t          t          j                              d
d                    t          j                              z              |                    ddt          t          j
                            dd          d           |                    ddt          t          j
                            dd          d           |                    dddt          d           |                    ddddd           |                    ddddd           |                    d            |                    d!t          d"dg d#d$%           |                    d&ddd'           |                    d(           |                    d)d*t          t          j        t          t                    d+,           |                    d-ddd.           |                    d/           |                    d0d1d2t          d3gd45           |                    d6d2t          d3gd75           |                    d8d9d2t          g d:d;5           |                    d<d=dd d>?           |                    d@dt          dAdB           |                    dCddD           |                    dE           |                    dFddD           |                    dG           |                    dHddD           |                    dI           |                    dJddD           |                    dK           |                    |           }|S )LNz-mz--model_name_or_pathTz;Model path, or pretrained model name selected in the list: z, )requiredtypehelpz--model_classFGPT2LMHeadModelz!Model type selected in the list: )r   r   defaultchoicesr   z--cache_dir.cache_modelsz%Directory to cache pre-trained models)r   r   r   r   z
--onnx_dironnx_modelszDirectory to store onnx modelsz--test_timesd   z8Number of repeat times to get average inference latency.)r   r   r   r   z-vz--validate_onnx
store_truezValidate ONNX model)r   actionr   z-oz--optimize_onnxz'Use optimizer.py to optimize onnx model)optimize_onnxz--stager   )r         a6  Stage in generation: 1 (initial decoder), 2 (decoder), 0 (both). 1 - decode the first token when past_sequence_length is zero; 2 - decode the remaining tokens when past_sequence_length is not zero; 0 - one onnx model for both stages 1 and 2. Note that we will optimize 1 and 2 differently for best performance.)r   r   r   r   r   z	--use_gpuzuse GPU for inference)use_gpuz-pz--precisionzfPrecision of model to run. fp32 for full precision, fp16 for half precision, and int8 for quantization)r   r   r   r   z--torchscriptzuse Torchscript)torchscriptz-bz--batch_sizes+r    z
batch size)nargsr   r   r   z--sequence_lengthsz!sequence lengths (excluding past)z-sz--past_sequence_lengths)          @         zpast sequence lengthsz-rz--result_csvz$CSV file for saving summary results.)r   r   r   z--thread_numzThreads to usez--include_copy_output_latency)r   r   )include_copy_output_latencyz	--verbose)verbosez--output_torch_latency)output_torch_latencyz--disable_io_binding)disable_io_binding)argparseArgumentParseradd_argumentstrjoinr   listr
   keysospathintset_defaultsr   FLOAT32
parse_args)argvparserargss      /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/onnxruntime/transformers/models/gpt2/benchmark_gpt2.pyparse_argumentsrB   !   sb   $&&F
JTYYWmMnMnn     !]'))**0499]=O=Q=Q3R3RR     S.114     S-00-     G     "     6     e,,,
		O     eLOfggg
&&&
!Yu     %Sdeee
E***
oSsQCVbccc
0     !)))$     3     S"Scddd
7%P\]]]
E:::
eLIII
&&&
05VVV
U333
.|TTT
5111T""DK    c                    t          j        t                    t          j        d          k     rt          d          t                              d|             | j        t          j        k    r| j	        r| j
        s
J d            | j        t          j        k    r| j
        r
J d            | j        dk    r| j        dgk    s
J d            t          j        | j        dk    rt#          j        d	
          n| j                   t'          t          j                                                   | j        }| j        }t1          ||| j
                   t2          | j                 d         }t6          }t9          j        | j        | j        |          }|                    | j        ||          }t          j         | j
        rdnd          }|!                    |           |j"        dk    }|#                    || j        | j        d	|          }	|	d         }
t2          | j                 d         }|$                    |||
| j%        |||           | j	        s| j        t          j&        k    r|	| j        t          j        k    rtO          | j                  nd         }
|	                    |	d         |
| j        t          j        k    |j(        j)        |j(        j*        |d	| j                   | j        t          j        k    rlt                              d           tW          j,        |
|	d         |           tW          j-        |          }t                              d           |	d         }
| j        r|                    |||||          }t]          |
| j
        d| j        | j%                  }|d S |/                    ta          | j1                  ta          | j                  ta          | j2                  || j                  }|3                    ||| j        t          j        k              }| j4        p8d5                    tm          j7                    8                    d                    }ts          |dd          5 }g d }tu          j;        ||!          }|<                                 | j1        D ]F}| j2        D ]:}| j        D ].}|dk    r|dk    r|dk    sJ t          =                    d"|||           |>                    ||||j)        |j*        |j"        |j?        || j        t          j        k    ||#          }|/                    ||||| j                  }	 | j@        s| jA        r|B                    ||| jC                  \  }}t          |          D ]{\  }}t          |t                    r<t          =                    d$| d%t          |           d&|d         jH                    Vt          =                    d$| d'|jH                    |nd }d }| jI        r |J                    ||| jC                  \  }}n)|K                    ||||| jC        d| jL        (          \  }}| j@        r|}| jI        s@g }|D ];}|M                    |N                                O                                           <|P                    ||| j        t          | j                 t          | j                 )          r.t                              d*t          | j                  d+           t                              d,||||| jI        rd-nd|rd.nd           | j        | j        | j        t                      | j
        | j        | j	        | j        |||| jI        |r|d/nd0|d/d } |S                    |            # t          $ r. t          U                    d1d	2           Y    d d d            d S w xY w<H	 d d d            n# 1 swxY w Y   t                              d3|            |S )4Nz3.1.0z/This tool requires transformers 3.1.0 or later.z
Arguments:z'fp16 requires --optimize_onnx --use_gpuzquantization only supports CPUr    r   z<past_sequence_lengths shall be 0 for stage==1 (init decoder)T)logical)r#   	cache_dir)configrF   zcuda:0cpu   )has_past
new_folderrawr!   )has_position_idshas_attention_maskfp32)auto_mixed_precisionstagezquantizing model...int8zfinished quantizing modelF)enable_all_optimizationnum_threadsr.   zbenchmark_result_{}.csvz%Y%m%d-%H%M%Sar   )modenewline)
model_namemodel_classrQ   environment_variablesgpu	precision	optimizerr#   
batch_sizesequence_lengthpast_sequence_lengthr0   torch_latencyonnxruntime_latency)
fieldnameszMRunning test for batch_size=%d sequence_length=%d past_sequence_length=%d ...)float16rM   rN   ztorch output z is tuple of size z, shape z shape )return_numpyr-   )rY   rtolatolz:Pytorch and ONNX Runtime outputs are all close (tolerance=z).zZbatch_size=%d, sequence_length=%d, past_sequence_length=%d, onnxruntime_latency=%.2f %s %sz(disable_io_binding)z, torch_latency={torch_latency}z.2fNone	Exception)exc_infozResults are saved to file )Vr   parsetransformers_versionRuntimeErrorloggerinfor\   r   FLOAT16r   r"   INT8rQ   past_sequence_lengthstorchset_num_threads
thread_numpsutil	cpu_countprint
__config__parallel_inforF   onnx_dirr   r
   rY   r   r   from_pretrainedmodel_name_or_pathr#   deviceton_layerget_onnx_pathsexport_onnxr.   r<   r4   rG   num_attention_headshidden_sizer   quantize_onnx_modelquantize_torch_modelr   get_output_shapesmaxbatch_sizessequence_lengthsget_output_buffers
result_csvformatr   nowstrftimeopencsv
DictWriterwriteheaderdebugget_dummy_inputs
vocab_sizevalidate_onnxr/   pytorch_inference
test_times	enumerate
isinstancetuplelenshaper0   onnxruntime_inference$onnxruntime_inference_with_binded_ior-   appendrH   numpycompare_outputsr	   r   writerowri   error)!r@   rF   
output_dirrY   
gpt2helperrG   modelr~   use_external_data_formatonnx_model_pathsonnx_model_pathuse_paddingsessionmax_output_shapesoutput_bufferscsv_filenamecsv_filecolumn_names
csv_writerr^   r_   r`   dummy_inputsoutput_shapesoutputsra   ivalueort_outputsort_latencycopy_outputsoutputrows!                                    rA   mainr      s	   })**W]. .   LMMM
KK#T##$$$~***!]dl]]4]]]2~''<AA!AAAzQ)aS0002p000	DOq<P<P&*48888VZVefff	%

(
(
*
*+++IJ	:t|<<< 01!4KJ'(?TM]irsssF''(?Zc'ddE \dl=((>>F	HHV%~2!00+ 1   'u-O 01!4K $&      7T^y/@@@*$.T]TbBbBb3t~+>+>+>hno  U#Ni//L,L$$!%* 	! 		
 		
 		
 >Y^++KK-....@PQW@XZrsss"7>>EKK3444.v6O 
&&(* ' 
 
 ) %O  G  #44DD&''D!""   223Dfdn`i`qNqrrN?p&?&F&Fx|~~G^G^_nGoGo&p&pL	lb	1	1	1 C$X
 
 
  ^HFFF
   * o	$ o	$J#'#8 n$ n$,0,F m$ m$(%>>o.A.AFZ^_F_F_F__LLg"',	   $.#>#>",'2*)!%93D!D)4+6 $? $ $L %/$@$@",'(% %MO$- 11J 15?5Q5QRWYegkgv5w5w2G] -6g,>,> Z Z5#-eU#;#; !Z$*LL(q(q(qSQVZZ(q(qafghaiao(q(q%& %& %& %& %+LL1X1X1X5;1X1X$Y$Y$Y$YZ '+G,0M2 7A7W7W 't8 84K 8B7f7f ' , . - $-2<@<\ 8g 8 84K  - "+6L#'#: N/1.9 !N !NF$0$7$7

8J8J8L8L$M$M$M$M)99 ' ,,0,<%6t~%F%6t~%F  :     	" !' %Garsw  tB  bC  %G  %G  %G!" !" !" x&+0'6:6MU22SUANV==TV   +/*A+/+;%)Z5R5T5T#'<)-)-);+/+;*4/>4H262IGT-`-C-C-CZ`6A3G3G   #++C0000$ $ $ $[4@@@#tttGC$ C$ C$ C$ C$ C$ C$ C$B$Wm$n$o	$)C$ C$ C$ C$ C$ C$ C$ C$ C$ C$ C$ C$ C$ C$ C$J KK;\;;<<<s8   C#`1I_
`&`2`````__main__)N)!r1   r   loggingr8   r   rv   rs   benchmark_helperr   r   r   r   r   gpt2_helperr	   r
   r   r   	packagingr   quantize_helperr   transformersr   r   rl   	getLoggerrn   rB   r   __name__r@   r.    rC   rA   <module>r      s    



  				                      ] \ \ \ \ \ \ \ \ \ \ \       * * * * * * # # # # # # < < < < < <		2		D D D DNo o od z?DLDJJJJJ rC   