
    fPi;                         d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlmZ d dl	m
Z
  ej        e          Zd Zd Zd Zd Zd Zed	k    r e             dS dS )
    N)setup_logger)BenchmarkRecordc                  Z   t          j                    } |                     ddt          d           |                     ddt          d           |                     dd	t          d
           |                     ddt          d           |                     dddd           |                     dddd           |                     dt          dd           |                     dt          dd           |                     dt          dd           |                     dt          dd           |                     d t          d!d"#           |                     d$t          d!g d%d&'           |                     d(t          d!g d)d*'           |                     d+t          d,d-           |                     d.ddd/           |                     d0t          d1d2           |                     d3t          d d4           |                                 }t          |d5|j                            d6          d7         	                    d8d9                     d:|j
         d;|j         }|j        s||_        t          j        |j        d!<           |xj        d=z  c_        |S )>Nz-b--batch-sizesz1 2)typedefaultz-s--sequence-lengthsz8 16 32 64 128 256 512z-w--warmup-runs   z-n
--num-runs  z--hf-pt-eagerF
store_truez,Benchmark in PyTorch without `torch.compile`)r   actionhelpz--hf-pt-compilez)Benchmark in PyTorch with `torch.compile`--hf-ort-dir-path zDPath to folder containing ONNX models for Optimum + ORT benchmarking)r   r   r   z--ort-msft-model-pathzAPath to ONNX model from https://github.com/microsoft/Llama-2-Onnxz --ort-convert-to-onnx-model-pathz'Path to ONNX model from convert_to_onnx--cache-dirz./model_cachez-Cache dir where Hugging Face files are stored--model-nameTzModel name in Hugging Face)r   requiredr   --precision)int4int8fp16fp32zPrecision to run model)r   r   choicesr   --device)cpucudarocmzDevice to benchmark modelsz--device-idr   zGPU device IDz	--verbosezPrint detailed logsz	--timeout
   z8Number of mins to attempt the benchmark before moving on--log-folderz'Path to folder to save logs and results
model_size/.-z./_)exist_ok<   )argparseArgumentParseradd_argumentstrint
parse_argssetattr
model_namesplitreplacer"   	precision
log_folderosmakedirstimeout)parserargslog_folder_names      /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/onnxruntime/transformers/models/llama/benchmark_all.pyget_argsr=      s   $&&F
	     (	     	     	     ;	     8	     S	     P	     *6	     <	     )	     000%     ''')     	     "	     G	     6	     DD, 5 5c : :2 > F FsC P PQQQ=4?==T^==O? *)K$//// 	LLBLLK    c           
      D   g }d\  }}}d\  }}}	}
d}d}d}d}d}d}d	}t          |          5 }|D ]}|                    d
d          }||v r%t          |t          |          d                    }B||v r%t          |t          |          d                    }k||v rd}r||v rd}y||v r=t	          |t          |          |                    d                             }|dz  }||v r8t	          |t          |          |                    d                             }	||v rd|v rDt	          ||                    d          dz   |                    d                             dz  }
ns||                    |          t          |          z   d                              dd          }t          j        |          |          d         }t	          |          dz  }
|||||||	|
gz   }|	                    |           	 d d d            n# 1 swxY w Y   |S )N)NNN)NNNNzBatch Size: zSequence Length: zto get past_key_valueszwith past_key_valuesz	Latency: zThroughput: zpeak=
r   promptz	per-token r   CPU=   z MB'"max_used_MB)
openr3   r.   lenfloatrfindfindjsonloadsappend)	device_idlog_filebase_resultsentries
batch_sizesequence_lengthstep	latency_s
latency_ms
throughputmemorybatch_patternsequence_patternprompt_step_patternper_token_step_patternlatency_patternthroughput_patternmemory_patternf
input_linelinepeakusageentrys                           r<   process_log_fileri      s   G(8%J0F-Iz:v"M*23!O'N	h '&1 &	& &	&J%%dB//D$$ c-&8&8&:&:!;<<

!T))"%d3/?+@+@+B+B&C"D"D$,,'4//" D((!$s?';';djjoo'M"NOO	&-

#t++"4,>(?(?$**S//(Q#RSS

4''D== #4

3!(;djj>O>O(O#PQQTXXFF  		. 9 9C<O<O O Q QRZZ[^`cddD Jt,,Y7FE"5\\D0F %#(  u%%%M&	&'& '& '& '& '& '& '& '& '& '& '& '& '& '& '&R Ns   GHHHc           	         dd l }|                    | g d          }|d                             d          |d<   |d                             d          |d<   |d                             d          |d<   |d                             d          |d<   |d	                             d
          |d	<   |d                             d
          |d<   |d                             d
          |d<   |d                             d
          |d<   dd l}|j        }t          d |D                       }d}d}|rB|d                             d          d         }|d                             d          d         }g }	|                                D ]}\  }
}|d         dv r't          |d         |d         d|d         ||          }nq|d         dv r;t          |d         |d         d|d         t          j
        t          j                  }n,t          |d         |d         |d         |d         dd          }|d         |j        _        |d         |j        _        |d         |j        _        |d         |j        _        |d         |j        j        d<   |d         |j        j        d<   |d	         |j        j        d<   |d         |j        _        |d         |j        j        d<   |d         |j        _        |	                    |           t          j        ||	           t          j        |                    dd           |	           t2                              d!| d"           d S )#Nr   )Warmup RunsMeasured Runs
Model NameEngine	PrecisionDevice
Batch SizeSequence LengthStepLatency (s)Latency (ms)Throughput (tps)Memory (GB))columnsrk   r.   rl   rq   rr   rt   rK   ru   rv   rw   c                 B    g | ]}|j         d v |j          d|j         S ))onnxruntimezonnxruntime-gpu==)keyversion).0is     r<   
<listcomp>z save_results.<locals>.<listcomp>   s7    lllQIk@k@kAE	 	 QY	 	 @k@k@kr>   r   r{   rE   rn   )optimum-ortrz   rm   ro   rz   rp   )pytorch-eagerpytorch-compilepytorchrs   measure_stepenginelatency_s_meanthroughput_tps.csvz.jsonzResults saved in !)pandas	DataFrameastypepkg_resourcesworking_setsortedr2   iterrowsr   torch__name____version__configwarmup_runsmeasured_runsrU   
seq_length
customizedmetricslatency_ms_meanmax_memory_usage_GBrP   save_as_csvsave_as_jsonr3   loggerinfo)resultsfilenamepddfr   installed_packagesinstalled_packages_listort_pkg_nameort_pkg_versionrecordsr'   rowrecords                r<   save_resultsr      s   	
 
 
 
 
 
B( =)0077B}_-44U;;B,'..u55B|0188??B=)0099B}N+227;;B~ 23::7CCB=)0099B} &2$ll*<lll  LO D.q177==a@1!4::4@@C G++--  3x=:::$L!3{#3]CMS_ap FF ]BBB$L!3{#3YHu~_d_p FF %S%6K8H#h-Y\]eYfhjlnooF$'$6!&)/&:##&|#4 #&'8#9 36v; 0-0] *69-6H!"23),^)<&69:L6M!"23-0-?*v'222 !1!1&'!B!BGLLL
KK/H///00000r>   c                 D   | dt           j                                         dd}t          j                            | j        |          }t          |d          5 }t          j        |||          }	 |	                    | j
                   n)# t          j        $ r |                                 Y nw xY wd d d            n# 1 swxY w Y   t                              d           | j        | j        | j        || j        | j        g}t)          | j        ||          }|S )Nr'   %Y-%m-%d_%H:%M:%Sz.logw)stdoutstderrz Gathering data from log files...)datetimenowr6   pathjoinr5   rI   
subprocessPopenwaitr8   TimeoutExpiredkillr   r   r   num_runsr1   r4   deviceri   rQ   )	r:   benchmark_cmdr   log_filenamelog_pathrR   processrS   r   s	            r<   	benchmarkr   $  sQ   NNx04466NNNNLw||DO\::H	h		 "=(SSS	LL&&&&( 	 	 	LLNNNNN		               KK2333$dmT_fdn^b^ijLt~xFFGNs6   C4BC#B52C4B55CCCc                  z   t                      } t          | j                   t                              | j                   dt          j        j        _	        g }t          | j                  t          j        d<   | j        rdddddd| j        d	| j        d
| j        d| j        d| j        dt          | j                  dt          | j                  d| j        d| j        dg}t                              d           t          | |d          }|                    |           | j        rdddddd| j        d	| j        d
| j        d| j        d| j        dt          | j                  dt          | j                  d| j        d| j        dg}t                              d           t          | |d          }|                    |           | j        rdddddd| j        d| j        d	| j        d
| j        d| j        d| j        dt          | j                  dt          | j                  d| j        d| j        dg}t                              d           t          | |d          }|                    |           | j        rdddddd| j        d| j        d	| j        d
| j        d| j        d| j        dt          | j                  dt          | j                  d| j        d| j        g}t                              d           t          | |d          }|                    |           | j        rdddddd| j        d| j        d	| j        d
| j        d| j        d| j        dt          | j                  dt          | j                  d| j        d| j        g}t                              d           t          | |d           }|                    |           | j         d!| j         d!t<          j                                        d"d#}tA          |t          j!        "                    | j        |                     d S )$NTCUDA_VISIBLE_DEVICESpythonz-mzmodels.llama.benchmarkz--benchmark-typezhf-pt-eagerr   r   r   r	   r   r
   r   r!   r   z--authz'Benchmark PyTorch without torch.compiler   zhf-pt-compilez$Benchmark PyTorch with torch.compiler   zhf-ortr   z Benchmark Optimum + ONNX Runtimer   zort-msftz--ort-model-pathz)Benchmark Microsoft model in ONNX Runtimezort-convert-to-onnxz/Benchmark convert_to_onnx model in ONNX Runtimerz   r'   r   r   )#r=   r   verboser   r   __dict__r   backendscudnnr   r-   rQ   r6   environhf_pt_eagerr1   r4   batch_sizessequence_lengthsr   r   r   r5   	cache_dirextendhf_pt_compilehf_ort_dir_pathort_msft_model_pathort_convert_to_onnx_model_pathr"   r   r   r   r   r   )r:   all_resultsr   r   csv_files        r<   mainr   6  sq   ::D
KK%)EN"K),T^)<)<BJ%&  $$ON !K !!ON1
4 	=>>>D-AA7###  $$ON !K !!ON1
4 	:;;;D-1BCC7###  $$ ON !K !!ON5
8 	6777D-??7###  $$$ON !K !!ON3
6 	?@@@D-<<7### * $$!/ON !K !!ON3
6 	EFFFD-??7###/ddDNddX5F5J5J5L5LddddHbgll4?HEEFFFFFr>   __main__)r*   r   rN   loggingr6   r   r   benchmark_helperr   r   r   	getLoggerr   r   r=   ri   r   r   r    r>   r<   <module>r      s        				      ) ) ) ) ) ) # # # # # #		8	$	$G G GT6 6 6rJ1 J1 J1Z  $nG nG nGb zDFFFFF r>   