
    fPi&1                        d Z ddlZddlZ ed          ZddZd ZddZddZd	 Z	d
 Z
edk    rO e            Z ede           ddlmZ  eej                   ej        Z e
ee          ZeD ]Z ee           dS dS )zThis profiler result processor print out the kernel time spent on each Node of the model.
Example of importing profile result file from onnxruntime_perf_test:
    python profile_result_processor.py --input profile_2021-10-25_12-02-41.json
    N)ScanLoopIfc                    t          j                    }|                    dddt          d           |                    ddt          dd	           |                    d
dt          dd	           |                    dddd           |                    d           |                    dddd           |                    d           |                    |           S )Nz-iz--inputFz2Set the input file for reading the profile results)requiredtypehelpz--thresholdg{Gz?zfThreshold of run time ratio among all nodes. Nodes with larger ratio will show in top expensive nodes.)r   r   defaultr	   z
--providercudazExecution provider to usez--kernel_time_only
store_truez.Only include the kernel time and no fence time)r   actionr	   )kernel_time_onlyz-vz	--verbose)r   r   )verbose)argparseArgumentParseradd_argumentstrfloatset_defaults
parse_args)argvparsers     /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/onnxruntime/transformers/profile_result_processor.pyparse_argumentsr      s$   $&&F
A     u     (     =	     ///
kE,OOO
&&&T"""    c                     t          d|  d           t          |           5 }t          j        |          }d d d            n# 1 swxY w Y   t	          |t
                    sJ |S )Nzloading profile output z ...)printopenjsonload
isinstancelist)profile_fileopened_file	sess_times      r   load_profile_jsonr&   ;   s    	
6L
6
6
6777	l		 +{Ik**	+ + + + + + + + + + + + + + + i&&&&&s   AAAc                    i }i }i }d}d}| D ]}|d         dk    r|d         dk    rd}|s|d         dk    rd	|v rd
|v r}d|d
         v rs|d         }|d
         d         }	|	t           v r]|	sd| d}	||v r'||xx         |d	         z  cc<   ||xx         dz  cc<   n|d	         ||<   d||<   |	||<   ||d	         z  }|sdgS g }
|
                    d|dz  dd           |
                    d           |
                    d           t          |                                d d          D ]U\  }}||z  }||k     r||         }|t	          |          z  }|
                    |dd|dz  dd|dd|dd| 	           Vi }|                                D ]'\  }}	||         }|	|v r||	xx         |z  cc<   "|||	<   (|
                    d           |
                    d           |
                    d           t          |                                d  d          D ],\  }	}||z  }|
                    |dd|dz  dd|	            -|
S )!a<  Parse profile data and output nodes in two sections - nodes in the original order, and top expensive nodes.

    Args:
        sess_time (List[Dict]): profile data
        threshold (int, optional): Minimum ratio of duration among all. Defaults to 0.

    Returns:
        List[str]: lines of string for output.
    r   FcatSessionnamesession_initializationTKerneldurargsop_name()   zNo kernel record found!z%
Top expensive kernels with Time% >= d   .2f:@----------------------------------------------------------------u&   Total(μs)	Time%	Calls	Avg(μs)	Kernelc                     | d         S Nr2    xs    r   <lambda>z&parse_kernel_results.<locals>.<lambda>x   s
    1Q4 r   keyreverse10d	      Y@5.2f5d8.1fz
Group kernel time by operator:u   Total(μs)	Time%	Operatorc                     | d         S r8   r9   r:   s    r   r<   z&parse_kernel_results.<locals>.<lambda>   s
    1Q4 r   )_NODES_TYPE_CONTAINING_SUBGRAPHappendsorteditemsr   )r%   	thresholdkernel_name_to_op_namekernel_timekernel_freqtotalsession_inititemkernel_namer/   linesdurationratiocallsavg_timeop_times                   r   parse_kernel_resultsrY   E   so     KKEL ! !;)##V8P(P(PL 	;(""u}}4IY]^dYeLeLev,K6l9-G999  -,k,,,k))K(((DK7(((K(((A-((((+/;K(+,K(6=&{3T%[ E +)** E	LLP)c/PPPPQQQ	LL	LL=>>>!'(9(9(;(;Y]!^!^!^ j jX5 9K(eEll*hhhhhhehhh(hhh[fhhiiii G 6 < < > > ( (W{+gG('GG	LL3444	LL	LL.///#GMMOOQUVVV I I5 GGGGGGgGGHHHHLr   Fc                 b   g }i }i }i }d}| D ]@}|d         dk    r0d|v r+d|v r&d|d         v r|d                              dd	                               d
d	                               dd	          }	d|d         v rW|d         d         dk    rd}
n)|d         d         dk    rd}
n|d         d         dk    rd}
|	|vr|
||	<   n||	         |
k    sJ n|r|d         d         }|t          v r|	|v r'||	xx         |d         z  cc<   ||	xx         dz  cc<   n%|d         ||	<   d||	<   |                    |	           ||d         z  }Bg d}d}|D ]v}	||	         }||	         }|t          |          z  }||z  dz  }|                    |	d	          }||z  }|                    |dd|dd|dd|dd|dd|dd|	            w|                    d|dz  dd            |                    d!           |                    d"           t          |                                d# d$%          D ]t\  }	}||z  }||k     r||	         }|t          |          z  }||z  dz  }|                    |	d	          }|                    |dd|dd|dd|dd|dd|	            u|S )&a  Parse profile data and output nodes in two sections - nodes in the original order, and top expensive nodes.

    Args:
        sess_time (List[Dict]): profile data
        kernel_time_only (bool, optional): Only include items for kernel time. Defaults to False.
        threshold (int, optional): Minimum ratio of duration among all. Defaults to 0.

    Returns:
        List[str]: lines of string for output.
    r   r(   Noder-   r.   r/   r*   _kernel_time _fence_before_fence_afterproviderCPUExecutionProviderCPUCUDAExecutionProviderCUDADmlExecutionProviderDMLr2   )z
Nodes in the original order:r6   u3   Total(μs)	Time%	Acc %	Avg(μs)	Calls	Provider	Nodeg        rB   r@   rA   rC   rE   rD   8sz#
Top expensive nodes with Time% >= r3   r4   r5   r6   u-   Total(μs)	Time%	Avg(μs)	Calls	Provider	Nodec                     | d         S r8   r9   r:   s    r   r<   z$parse_node_results.<locals>.<lambda>   s
    qt r   Tr=   )replacerG   rH   r   getrI   rJ   )r%   r   rK   node_name_list	node_time	node_freqnode_providerrO   rQ   	node_namedevicer/   rS   before_percentagerT   rV   rW   
percentager`   rU   s                       r   parse_node_resultsrs      s.    NIIME !! !!;&  Ud]]v~~)W[\bWcJcJcV$$^R88@@RTUU]]^lnpqq  T&\))<
+/EEE"FF&\*-1HHH#FF&\*-1GGG"FM11/5M),,(3v=====! 6l9-G999I%%)$$$U3$$$)$$$)$$$$'+E{	)$'(	)$%%i000T%[ E  E
 # 	
 	
	Y')$eEll*&%/
 $$Y33Z'  D  D  Dz  D  D  D2C  D  D  DH  D  D  D]b  D  D  Diq  D  D  D  yB  D  D	
 	
 	
 	

 
LLN	CNNNNOOO	LL	LLEFFF%ioo&7&7^^UYZZZ 	t 	t	85 9)$eEll*&%/
 $$Y33rrr
rrr8rrrErrrX`rrrgprrssssLr   c                 H   i }i }d}i }i }i }i }d}i }	| D ]g}
|
d         dk    rWd|
v rRd|
v rMd|
d         v rB|
d         d         }|t           v r=d|
d         vr<d|
d	         v r1||v r||xx         |
d         z  cc<   n|
d         ||<   ||
d         z  }|
d                             dd
          }||	v r|	|xx         dz  cc<   nd|	|<   | d| }||v r'||xx         |
d         z  cc<   ||xx         dz  cc<   n|
d         ||<   d||<   ||v r||xx         |
d         z  cc<   n|
d         ||<   ||v r'||xx         |
d         z  cc<   ||xx         dz  cc<   n|
d         ||<   d||<   ||
d         z  }id
dg}|                    d           |                    d           t          |                                d d          D ]s\  }}|                    |d          }||z  }||z   }|||z   z  }||         }||z  }|                    |dd|dz  dd|dd|dz  dd|dd|dd|dd|            t|d
dgz  }|                    d           |                    d           t          |                                d d          D ]\  }}|                    d          }|d         }|d         }|                    dd
          }||         }||z  }|||         z  }|                    |dd|dz  dd|dd|dd|dd|            |S ) zGroup results by operator name.

    Args:
        sess_time (List[Dict]): profile data

    Returns:
        List[str]: lines of string for output.
    r   r(   r[   r-   r.   r/   r`   fencer*   r]   r2   r5   zGrouped by operatorr6   uM   Total(μs)	Time%	Kernel(μs)	Kernel%	Calls	AvgKernel(μs)	Fence(μs)	Operatorc                     | d         S r8   r9   r:   s    r   r<   z$group_node_results.<locals>.<lambda>0  s
    QqT r   Tr=   r@   rA   rB   rC   11drD   z14.1fzGrouped by provider + operatoru<   Kernel(μs)	Provider%	Calls	AvgKernel(μs)	Provider	Operatorc                     | d         S r8   r9   r:   s    r   r<   z$group_node_results.<locals>.<lambda>>  s    RSTURV r   ExecutionProviderz9.2frg   )rG   rj   rH   rI   rJ   splitri   )r%   op_kernel_timeop_kernel_recordstotal_kernel_timeprovider_op_kernel_timeprovider_op_kernel_recordsprovider_kernel_timeop_fence_timetotal_fence_timeprovider_counterrQ   r/   r`   r>   rS   rM   
fence_timekernel_time_ratio
total_time
time_ratiokernel_callsavg_kernel_timepartsshort_eprV   provider_time_ratios                             r   group_node_resultsr      sf    N !#M +- +-;&  Ud]]v~~)W[\bWcJcJc6l9-G 999f--d6l**-//%g...$u+=....15eg.$U3$F|''
B77H+++ ***a/****-. *))))C---',,,U;,,,*3///14/////3E{',23*3////$X...$u+=....15e$X..((w'''4;6'''!'***a/*****.u+w'-.!'*e,&'E	LL	LLghhh &~';';'='=>>[_ ` ` ` 	
 	
"&&w22
'*;; :-
#47G#GH
(1%4  y  y  ye!3  y  y  yK  y  y  yO`chOh  y  y  yq}  y  y  y  ET  y  y  y  ^h  y  y  y  pw  y  y	
 	
 	
 	
 
b233E	LL	LLTUUU"#:#@#@#B#B`deee 

 

[		#8(##$7<<*3/%-),@,JJ  D  D  D"5"=  D  D  De  D  D  DRa  D  D  Dks  D  D  D  {B  D  D	
 	
 	
 	
 Lr   c                     t          |           }t          ||j                  }|t          ||j        |j                  z  }|t          |          z  }|S N)r&   rY   rK   rs   r   r   )r#   r.   profile_recordsrS   s       r   process_resultsr   M  sT    '55O $.AAE	1FWWWE	000ELr   __main__	Arguments)setup_loggerr   )r   )Fr   )__doc__r   r   	frozensetrG   r   r&   rY   rs   r   r   __name__	argumentsr   benchmark_helperr   r   inputr#   resultsliner9   r   r   <module>r      sE   
  "+),B"C"C '# '# '# '#T  L L L L^T T T Tn_ _ _D	 	 	 z!!I	E+y!!!------L"###?LolI66G  d  r   