
    .`ip4                        U d dl Z d dlmZ d dlmZ d dlmZmZmZ d dl	m
Z
mZmZ d dlmZmZmZ d dlmZmZmZ d dlmZ d d	lmZmZ d d
lmZmZmZmZmZm Z  d dl!m"Z" 	 d dl#Z$n# e%$ r  e"d          Z$Y nw xY we G d d                      Z&e G d d                      Z'e G d d                      Z(e(e'z  Z)ee*d<   e G d d                      Z+e G d de                      Z, G d de          Z-dS )    N)defaultdict)Callable)asdict	dataclassfield)AnyOptional	TypeAlias)
DeviceType_KinetoEvent_ProfilerResult)
_EventType_ExperimentalConfig_ProfilerEvent)FunctionEvent)ProfilerActivityprofile)TablePrinterevent_has_moduleevent_is_torch_opevent_module_reprevent_torch_op_stack_traceindent_string)PlaceholderModulepandasc                       e Zd ZU eed<   dZed          ed<    ee          Z	ed          ed<   dZ
eed<   ed             Zed	             Zed
             ZdS )_ModuleTreeNodeeventNparent)default_factorychildren tracec                 X    | j         j        d u pt          | j         j                  dk    S Nr   )r   r!   lenselfs    s/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/profiler/layerwise_profile.pyis_leafz_ModuleTreeNode.is_leaf&   s)    z"d*Kc$*2E.F.F!.KK    c                 *    t          | j                  S N)r   r   r'   s    r)   is_torch_opz_ModuleTreeNode.is_torch_op*   s     ,,,r+   c                     | j         j        t          j        k    o$| j         j        d         j        t          j        k    S )N   )r   tagr   Kinetotypeddevice_typer   CUDAr'   s    r)   is_cudaz_ModuleTreeNode.is_cuda.   s5     JNj// C
 #/:?B	
r+   )__name__
__module____qualname__r   __annotations__r   r	   r   listr!   r#   strpropertyr*   r.   r6    r+   r)   r   r      s         *.FH&'...(-d(C(C(CHd$%CCCE3OOOL L XL - - X- 
 
 X
 
 
r+   r   c                   8    e Zd ZU eed<   eed<   eed<   eed<   dS )SummaryStatsEntrynamecuda_time_uspct_cuda_timeinvocationsN)r7   r8   r9   r<   r:   floatintr>   r+   r)   r@   r@   6   s?         
IIIr+   r@   c                   B    e Zd ZU eed<   eed<   eed<   eed<   eed<   dS )ModelStatsEntryrA   cpu_time_usrB   rC   r#   N)r7   r8   r9   r<   r:   rE   r>   r+   r)   rH   rH   >   sF         
IIIJJJJJr+   rH   
StatsEntryc                   @    e Zd ZU eed<   ee         ed<   edz  ed<   dS )_StatsTreeNodeentryr!   Nr   )r7   r8   r9   rJ   r:   r;   r>   r+   r)   rL   rL   J   s@         :r+   rL   c                      e Zd ZU eed<    ed          Zeee	e
         f         ed<    ed          Zeee	e         f         ed<    ed          Ze	e         ed<    ed          Ze	e         ed<    ed          Ze	e         ed<   d	Zed	z  ed
<   d Zd"deeef         fdZd"deeef         fdZdefdZdefdZdeeef         fdZe	 d#de	eeef                  deegef         ez  fd            Zd Z d Z!defdZ"defdZ#d Z$d Z%de	e         de	eeef                  fd Z&de	e         de	e         fd!Z'd	S )$LayerwiseProfileResults_kineto_resultsF)init_kineto_event_correlation_map_event_correlation_map_module_tree_model_stats_tree_summary_stats_treeNnum_running_seqsc                 ~    |                                   |                                  |                                  d S r-   )_build_correlation_map_build_module_tree_build_stats_treesr'   s    r)   __post_init__z%LayerwiseProfileResults.__post_init__]   s>    ##%%%!!!!!!!!r+   column_widthsc                    t          ddddd          }|r |j        di | d |                     | j                  D             }t	          t
          |                              |                     |d                      d S )N<      rA   rI   rB   rC   r#   c                 H    g | ]\  }}|j         d k    s|j        d k    ||f S r   )rB   rI   .0depthrows      r)   
<listcomp>z=LayerwiseProfileResults.print_model_table.<locals>.<listcomp>h   sD      
  
  
s!##s':': CL':':':r+   c                     dd| z  z   dz   S N|- r>   indents    r)   <lambda>z;LayerwiseProfileResults.print_model_table.<locals>.<lambda>p       C#,,>,D r+   indent_styler>   )dictupdate_flatten_stats_treerU   r   rH   print_table _indent_row_names_based_on_depth)r(   r]   _column_widthsfiltered_model_tables       r)   print_model_tablez)LayerwiseProfileResults.print_model_tableb   s    "Bb
 
 
  	3!N!22M222 
  
"66t7MNN 
  
  

 	_n55AA11$DD 2  	
 	
 	
 	
 	
r+   c                    t          dddd          }|r |j        di | d |                     | j                  D             }t	          t
          |                              |                     |d                      d S )	NP   r`      rA   rB   rC   rD   c                 2    g | ]\  }}|j         d k    ||fS rc   )rB   rd   s      r)   rh   z?LayerwiseProfileResults.print_summary_table.<locals>.<listcomp>z   s8     "
 "
 "
s!## CL###r+   c                     dd| z  z   dz   S rj   r>   rn   s    r)   rp   z=LayerwiseProfileResults.print_summary_table.<locals>.<lambda>   rq   r+   rr   r>   )rt   ru   rv   rV   r   r@   rw   rx   )r(   r]   ry   filtered_summary_tables       r)   print_summary_tablez+LayerwiseProfileResults.print_summary_tablet   s    "BB
 
 
  	3!N!22M222"
 "
"66t7OPP"
 "
 "

 	&77CC11&DD 2  	
 	
 	
 	
 	
r+   filenamec                     t          j        d |                     | j                  D                       }|                    |           d S )Nc                 2    g | ]\  }}t          |          S r>   r   re   _rg   s      r)   rh   zHLayerwiseProfileResults.export_model_stats_table_csv.<locals>.<listcomp>   s"    XXXVQVC[[XXXr+   )pd	DataFramerv   rU   to_csvr(   r   dfs      r)   export_model_stats_table_csvz4LayerwiseProfileResults.export_model_stats_table_csv   sO    \XXt'?'?@V'W'WXXX
 
 			(r+   c                     t          j        d |                     | j                  D                       }|                    |           d S )Nc                 2    g | ]\  }}t          |          S r>   r   r   s      r)   rh   zJLayerwiseProfileResults.export_summary_stats_table_csv.<locals>.<listcomp>   s2       As s  r+   )r   r   rv   rV   r   r   s      r)   export_summary_stats_table_csvz6LayerwiseProfileResults.export_summary_stats_table_csv   sZ    \ "66t7OPP  
 
 			(r+   returnc                 |    d| j         i|                     | j                  |                     | j                  dS )NrW   )metadatasummary_statsmodel_stats)rW   _convert_stats_tree_to_dictrV   rU   r'   s    r)   convert_stats_to_dictz-LayerwiseProfileResults.convert_stats_to_dict   sD    +T-BC!==d>VWW;;D<RSS
 
 	
r+   rm   depths_rowsrs   c                     g }| D ]U\  }}|j         dk    rt          j        |          }t          |j        ||          |_        |                    |           V|S r%   )rB   copydeepcopyr   rA   append)r   rs   indented_rowsrf   rg   indented_rows         r)   rx   z8LayerwiseProfileResults._indent_row_names_based_on_depth   sp    
 % 	/ 	/JE31$$=--L -l.? U UL  ....r+   c                     t          t                    | _        | j                                        D ]4}| j        |                                                             |           5d S r-   )r   r;   rR   rP   eventscorrelation_idr   )r(   r   s     r)   rY   z.LayerwiseProfileResults._build_correlation_map   si    -8->->*)0022 	U 	UE.u/C/C/E/EFMMeTTTT	U 	Ur+   c                      g  _          j                                        }	 ddt          dt          d z  f fd|D ]} |           d S )Nr   	curr_nodec                    | j         dk    rd S t          |           rJt          | |          }|r|j                            |           nj                            |           |}| j        d u pt          | j                  dk    }|r@|r>t          | |t          | d                     }|j                            |           |}| j        D ]} ||           d S )Nr0   )r   r   r   c                      t          |           S r-   )r   )xs    r)   rp   zSLayerwiseProfileResults._build_module_tree.<locals>._df_traversal.<locals>.<lambda>   s    /?/B/B r+   )until)r   r   r#   )	start_tidr   r   r!   r   rT   r&   r   )r   r   noder*   child_df_traversalr(   s        r)   r   zALayerwiseProfileResults._build_module_tree.<locals>._df_traversal   s)    !##&& !&U9EEE 3&--d3333%,,T222 	n,HEN0C0Cq0HG 	!9 	!&$4%B%B     "))$/// 	 0 0eY////0 0r+   r-   )rT   rP   experimental_event_treer   r   )r(   
event_treerootr   s   `  @r)   rZ   z*LayerwiseProfileResults._build_module_tree   s    )AACC
 HL	0 	0!	0.=.D	0 	0 	0 	0 	0 	0 	0<  	  	 DM$	  	 r+   r   c                     j         j        t          j        k    rd S | j                            j         j        g           }fd|D             }t          |d           S )Nc              3      K   | ]J}|                                 t          j        k    $|                                j        j        k    F|V  Kd S r-   )r4   r   r5   rA   r   )re   r   r   s     r)   	<genexpr>z@LayerwiseProfileResults._get_kineto_gpu_event.<locals>.<genexpr>   sZ       
 
}}*/11affhh$*/6Q6Q 6Q6Q6Q6Q
 
r+   )r   r1   r   r2   rR   getr   next)r(   r   correlated_kineto_eventsiterators    `  r)   _get_kineto_gpu_eventz-LayerwiseProfileResults._get_kineto_gpu_event   sw    :>Z...4#'#E#I#IJ%r$
 $
 
 
 
 
-
 
 

 Hd###r+   c                 8     dt           f fd |          S )z Return cuda time in microsecondsr   c                     | j         r.                    |           x}r|                                dz  S d}| j        D ]}| |          z  }|S )N     @@r   )r*   r   duration_nsr!   )r   gpu_kineto_eventcumulative_cuda_timer   _cumulative_cuda_time_recursiver(   s       r)   r   zVLayerwiseProfileResults._cumulative_cuda_time.<locals>._cumulative_cuda_time_recursive   sy    | ,T5O5OPT5U5U!U!1 ,'3355>>'($!] S SE(,K,KE,R,RR((++r+   )r   )r(   r   r   s   ` @r)   _cumulative_cuda_timez-LayerwiseProfileResults._cumulative_cuda_time   sA    	,/ 	, 	, 	, 	, 	, 	, 	, /.t444r+   c                 D     t           fd j        D                       S )Nc                 :    g | ]}                     |          S r>   )r   )re   r   r(   s     r)   rh   z<LayerwiseProfileResults._total_cuda_time.<locals>.<listcomp>   s'    SSSD..t44SSSr+   )sumrT   r'   s   `r)   _total_cuda_timez(LayerwiseProfileResults._total_cuda_time   s)    SSSSARSSSTTTr+   c                     i                                   fd	 	 ddt          dt          d z  dt          t                   f fdg  _         j        D ]%} j                             |                     &	 d	dt          dt          d z  f fdg  _         j        D ]%} j                             |                     &d S )
Nc                     | z  dz  S )Nd   r>   )rB   total_cuda_times    r)   rC   zALayerwiseProfileResults._build_stats_trees.<locals>.pct_cuda_time   s     ?2c99r+   r>   r   r   summary_tracec           	         t          | j                  r*t          | j                  }                    |           }nE                    |           x}r,|                                }|                                dz  }nd S ||fz   }|v rC|         j        }|xj        |z  c_        |xj	        dz  c_	         
|j                  |_
        nMt          t          || 
|          d          g |          }|r|j                            |           ||<   | j        D ]} 	||         |           |         S )Nr   r0   r   )rM   r!   r   )r   r   r   r   r   rA   r   rM   rB   rD   rC   rL   r@   r!   r   )r   r   r   rA   rB   r   rM   new_noder   build_summary_stats_tree_dfrC   r(   summary_dicts            r)   r   zOLayerwiseProfileResults._build_stats_trees.<locals>.build_summary_stats_tree_df   s   
  
++ (44#99$??%)%?%?%E%EE! ',,../;;==Ft)TG3M,,$]39""l2""!!Q&!!&3mE4F&G&G##)+!%1&3mL&A&A$%	    !	 	 	  5O**8444.6]+  ++<6     ..r+   c           
          t          | j                  r;t          | j                  }                    |           }| j        j        dz  }d}nN                    |           x}r5|                                }|                                dz  }d}| j        }nd S t          t          ||| 
|          |          |g           }|r|j                            |           | j        D ]} 	||           |S )Ni  r"   r   r   ra   )rM   r   r!   )r   r   r   r   duration_time_nsr   rA   r   r#   rL   rH   r!   r   )r   r   rA   rB   rI   r#   r   r   r   build_model_stats_tree_dfrC   r(   s            r)   r   zMLayerwiseProfileResults._build_stats_trees.<locals>.build_model_stats_tree_df'  s?     
   )44#99$??"j9D@%)%?%?%E%EE! ',,../;;==F
t%% +!-"/-"="=   
 
 
H  1&&x000 ; ;))%::::Or+   )Nr>   r-   )	r   r   rL   tupler<   rV   rT   r   rU   )r(   r   r   r   rC   r   r   s   ` @@@@@r)   r[   z*LayerwiseProfileResults._build_stats_trees   ss   24//11	: 	: 	: 	: 	:
 -1(*(	/ (	/!(	/"T)(	/ !:(	/ (	/ (	/ (	/ (	/ (	/ (	/ (	/ (	/T $& % 	O 	OD$++,G,G,M,MNNNN DH#	 #	!#	+9D+@#	 #	 #	 #	 #	 #	 #	 #	J "$% 	K 	KD"))*C*CD*I*IJJJJ	K 	Kr+   treec                 L    g ddt           ffd|D ]} |           S )Nr   r   c                 t                         || j        f           | j        D ]} ||dz              d S )Nr0   )rf   )r   rM   r!   )r   rf   r   df_traversalentriess      r)   r   zALayerwiseProfileResults._flatten_stats_tree.<locals>.df_traversalU  sS    NNE4:./// 5 5U%!)444445 5r+   rc   )rL   )r(   r   r   r   r   s      @@r)   rv   z+LayerwiseProfileResults._flatten_stats_treeP  sc     13	5 	5~ 	5 	5 	5 	5 	5 	5 	5
  	 	DLr+   c                 l    g }dt           dt          t                   ffd|D ]} ||           |S )Nr   curr_json_listc                     |                     t          | j                  g d           | j        D ]} ||d         d                    d S )N)rM   r!   r!   )r   r   rM   r!   )r   r   r   r   s      r)   r   zILayerwiseProfileResults._convert_stats_tree_to_dict.<locals>.df_traversalb  sg    !!F4:,>,>B"O"OPPP D DUN2$6z$BCCCCD Dr+   )rL   r;   rt   )r(   r   
root_dictsr   r   s       @r)   r   z3LayerwiseProfileResults._convert_stats_tree_to_dict_  sl    !#
	D~ 	DtDz 	D 	D 	D 	D 	D 	D
  	+ 	+DLz****r+   r-   )rm   )(r7   r8   r9   r   r:   r   rR   rt   rF   r;   r   rS   r   rT   r   rU   rL   rV   rW   r\   r<   r{   r   r   r   r   r   staticmethodr   rJ   r   rx   rY   rZ   r   r   r   r[   rv   r   r>   r+   r)   rO   rO   Q   s        $$$$CH5eCTCTCT!4T,-?(?#@TTT=BU=N=N=NDd=&9!9:NNN*/%U*;*;*;L$';;;.3e.?.?.?tN+???0550A0A0An-AAA $(cDj'''" " "

 
tCH~ 
 
 
 
$
 
c3h 
 
 
 
$S    s    
tCH~ 
 
 
 
  47 %Z01ucz*S0   \U U U
#  #  # J$/ $ $ $ $5/ 5 5 5 5U U U\K \K \K|(	eCO$	%   ^0D d      r+   rO   c                   @     e Zd Zddedz  f fdZ fdZ fdZ xZS )layerwise_profileNrW   c           	          t                                          t          j        t          j        gdddt          d                     || _        dS )a  
        layerwise profile constructor.

        Args:
            num_running_seqs (Optional[int], optional): When given,
                num_running_seqs will be passed to LayerProfileResults
                for metadata update. Defaults to None.
        T)verbose)
activitiesrecord_shapes
with_stackwith_modulesexperimental_configN)super__init__r   CPUr5   r   rW   )r(   rW   	__class__s     r)   r   zlayerwise_profile.__init__n  s\     	(,.>.CD 3D A A A 	 	
 	
 	
 !1r+   c                 D    t                                                      S r-   )r   	__enter__)r(   r   s    r)   r   zlayerwise_profile.__enter__  s    ww  """r+   c                     t                                          |||           t          | j        j        | j                  | _        d S )N)rW   )r   __exit__rO   profilerkineto_resultsrW   results)r(   exc_typeexc_valexc_tbr   s       r)   r   zlayerwise_profile.__exit__  sF    7F333.M(4;P
 
 
r+   r-   )r7   r8   r9   rF   r   r   r   __classcell__)r   s   @r)   r   r   m  s        1 1t 1 1 1 1 1 1&# # # # #
 
 
 
 
 
 
 
 
r+   r   ).r   collectionsr   collections.abcr   dataclassesr   r   r   typingr   r	   r
   torch._C._autogradr   r   r   torch._C._profilerr   r   r   torch.autograd.profilerr   torch.profilerr   r   vllm.profiler.utilsr   r   r   r   r   r   vllm.utils.import_utilsr   r   r   ImportErrorr   r@   rH   rJ   r:   rL   rO   r   r>   r+   r)   <module>r      s    # # # # # # $ $ $ $ $ $ 0 0 0 0 0 0 0 0 0 0 + + + + + + + + + + H H H H H H H H H H N N N N N N N N N N 1 1 1 1 1 1 4 4 4 4 4 4 4 4                6 5 5 5 5 5% % % %		8	$	$BBB% 
 
 
 
 
 
 
 
,                 (*;;
I ; ; ;         X X X X Xg X X Xv
 
 
 
 
 
 
 
 
 
s   A$ $A76A7