
    `i                         d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZmZ d a		 	 	 	 ddZ
d Zd Zd Zd	 Zd
 Zg adedefdZ	 	 	 ddZdS )    N)profileProfilerActivityc                      d S )N r       t/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/torch/_functorch/benchmark_utils.pysynchronizer	      s    Dr      c	                 L   |dg}|dgk    r/t           j                                        rt           j        j        a|i }|i }|5  t          j        d           t          d          D ]}	 | |fi | t                       t          j        d           t          j                    }
t          |          D ]}	 | |fi | t                       t          j                    }ddd           n# 1 swxY w Y   ||
z
  }t          dd|i|5 }|5  t                       t          j        d           t          |          D ]}	 | |fi | t                       	 ddd           n# 1 swxY w Y   ddd           n# 1 swxY w Y   |	                    |           |S )a0  
    Output the chrome trace of running f(input, **kwargs_for_f) with [optimize_ctx]
    [num_runs] times to [trace_filename].

    [activities] are the activities that the profiler will record, e.g. ProfilerActivity.CUDA.
    Return total runtime without the profiler

    Outputs to trace_filename
    Ncudacpui9     
activitiesr   )
torchr   is_availabler	   manual_seedrangetimeperf_counterr   export_chrome_trace)finputtrace_filenameoptimize_ctxr   num_runsdeviceskwargs_for_fkwargs_for_profiler_t0t1timingprofs                 r   dump_chrome_tracer$      s   * ( 5'ej5577j," 	 
! 
!$q 	 	AAe$$|$$$MMMM$  x 	 	AAe$$|$$$MMMM  
! 
! 
! 
! 
! 
! 
! 
! 
! 
! 
! 
! 
! 
! 
! "WF		>	>J	>*=	>	> $ 	 	MMMd###8__  %((<(((	 	 	 	 	 	 	 	 	 	 	 	 	 	 	               	^,,,MsJ   B!C33C7:C7FAE-!F-E1	1F4E1	5FFFc                 \    t          |           }t          j        |          }|d         }|S )NtraceEvents)openjsonload)filenamer   dataeventss       r   get_chrome_trace_eventsr-   K   s)    XA9Q<<D- FMr   c                 H    d| v o| d         t           v od| v o| d         dk    S )NpidphX)gpu_pidsevents    r   is_gpu_compute_eventr5   R   s@     	 	%LH$	EM	 $K3	r   c                     g }| D ]'}t          |          s|                    |           (t          |t          j        d                    S )Nts)key)r5   appendsortedoperator
itemgetter)r,   sorted_gpu_eventsr4   s      r   get_sorted_gpu_eventsr>   \   s`     ( (#E** 	  ''''#)<T)B)BCCCCr   c                 &   t          |           dk    rdS | d         }|d         |d         z   }|d         }| dd          D ]O}t          |d         |          }|d         |d         z   }|t          ||z
  d          z   }t          ||          }P|S )Nr   r7   durr
   )lenmax)r=   r4   current_end_timetotal_duration
start_timeend_times         r   get_durationrG   e   s    
""qa ET{U5\15\N"122& ; ;t&677
;u-'#h.CQ*G*GG/::r   c                 z    d }t          |           }g }|D ]#} ||          s|                    |           $|S )Nc                 Z    d| v o'd| d         v pd| d         v pd| d         v p	d| d         v S )Nnamegemmconvcutlasswgradr   r3   s    r   is_mm_conv_eventz7get_sorted_gpu_mm_conv_events.<locals>.is_mm_conv_eventt   sT     
eFm# (v&(E&M)( %-'		
r   )r>   r9   )r,   rO   
gpu_eventssorted_eventsr4   s        r   get_sorted_gpu_mm_conv_eventsrR   s   sg    
 
 
 'v..JM $ $&& 	U####r   r*   total_lengthc                 F   t          |           }g a|D ]C}d|vr|d         dk    r0d|d         d         v r t                              |d                    D|dz  }t          |          }t	          |          |z  }t          |          }t	          |          |z  }||fS )a  
    Process the chrome traces outputs by the pytorch profiler to compute GPU Utilization
    and percent of times spent on matmul and convolution

    Args:
        filename(str): Name of chrome traces file produced by pytorch profiler

        total_length(float): total length of the process without profiler in second

    Return:
        tuple: (GPU Utilization, percent of time spent on matmul and convolution)
    rJ   process_labelsGPUargslabelsr/   g    .A)r-   r2   r9   r>   rG   rR   )r*   rS   r,   r4   r=   utilizationsorted_gpu_mm_conv_eventsmm_conv_utilizations           r   compute_utilizationr\      s     %X..F H * *=,,,%-:Q1Q1QOOE%L)))#%L-f55011L@K =f E E&'@AALP+++r   tmp_chrome_tracec           	      r   t           j                            |          }|s&t          j        |           t	          d|z              |t          j                    }t           j                            ||dz             }t          | |||t          j
        g|dg          }t          ||          \  }	}
|	|
fS )a  
    Benchmark the GPU Utilization and percent of time spent on matmul and convolution operations of
    running f(input, **kwargs_for_f) with [optimize_ctx] [num_runs] times.
    It will produce a chrome trace file in trace_folder/trace_file_name.json

    Example:

    ```
    def f(a):
        return a.sum()


    a = torch.rand(2**20, device="cuda")
    utilization, mm_conv_utilization = benchmark_utilization(
        f, a, "tmp", trace_file_name="tmp_chrome_trace"
    )
    ```

    Args:
        f: function to benchmark

        input: input to :attr:`f`

        trace_folder: name of the folder to store the chrome trace

        optimize_ctx: the context in which f will run

        trace_file_name: name of the dumped chrome trace file, default to "tmp_chrome_trace"

        num_runs: number of times to run f, excluding the warm-up runs, default to 1.

    Return:
        tuple: (GPU Utilization, percent of time spent on matmul and convolution)

    zcreate folder Nz.jsonr   )r   r   )ospathexistsmakedirsprint
contextlibnullcontextjoinr$   r   CUDAr\   )r   r   trace_folderr   trace_file_namer   isExistchrome_trace_file_namerS   rY   r[   s              r   benchmark_utilizationrl      s    V gnn\**G /
L!!!-...!-//W\\,'8QRR$			  L (;( ($K$ +++r   )r
   NNN)Nr]   r
   )rd   r(   r;   r_   r   r   torch.profilerr   r   r	   r$   r-   r5   r>   rG   rR   r2   strfloatr\   rl   r   r   r   <module>rp      s1         				   4 4 4 4 4 4 4 4	 	 	 7 7 7 7t    D D D    $ ,# ,U , , , ,L &A, A, A, A, A, A,r   