
    Pi\7                        U d dl Z d dlZd dlZd dlmZ d dlmZ d dlmZm	Z	 d dl
Z
d dlZ
d dlmZ d dlmZ d dlmZ d dlmZmZ  ed	          Zd
Ze
j        j        j        e
j        j        j        e
j        j        j        hZdddddZeed<   dddddZ eed<   dZ!e"ed<   de"fdZ#	 	 d/de
j        j$        de"de"de%fdZ& G d d           Z'dddde d!         e d"         e d#         e d$         dddddfd%e(d&e(d'e(d(e(d!e(d"e(d#e(d$e(d)ee%         d*ee%         d+ee%         d,ee%         dee"         d-e	e
j        j$        ef         fd.Z)dS )0    N)partial)Path)OptionalTuple)
DictConfig)_ExperimentalConfig)tensorboard_trace_handler)
get_loggerget_world_size_and_rankINFOprofiler         
wait_stepswarmup_stepsactive_steps
num_cyclesDEFAULT_SCHEDULEFT)profile_memory
with_stackrecord_shapes
with_flopsDEFAULT_TRACE_OPTSprofiler_outputDEFAULT_PROFILE_DIRmsgc                 l    t                      \  }}|dk    rt                              |            d S d S )Nr   )r   logwarning)r   _ranks      p/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/torchtune/training/_profiler.py_warnr%   2   s9    %''GAtqyyC y    self_cuda_time_total   prof
output_dirmetric	row_limitc                    t                      \  }}dt          | j                  z   }t          j                            ||          }t          j                            |          st          j        |d           |dk    r"t          	                    d| j                    t          j                    }t          j                                        }	t          |d|	j         d|	j         d|	j         d|	j         d|	j         
d          }
 |
|            |dk    r3t          	                    d	t          j                    |z
  d
d           | j        rt*          j                                        r|dk    r{	 |                     | d| d           n4# t2          $ r'}t                              d|            Y d}~nd}~ww xY wt*          j        j                            | d| d           | j        r|                     | d| d|           |                     | j         d          !                    ||          }tE          | d| dd          5 }tG          ||           ddd           n# 1 swxY w Y   |dk    rt          	                    d|            |dk    r t*          j$        %                                 dS dS )a~  
    Handles export of artifacts from ``torch.profiler.profile``.

    The following artifacts are exported:
    - chrome / tensorboard trace - viewable through tensorboard or perfetto.dev / chrome::/tracing
    - trace event table
    - memory timeline and snapshot.pickle if ``profile_memory``
    - stacks if ``with_stack`` (note that ``profile_memory`` requires ``with_stack`` to be ``True``),
    viewable as a flamegraph see (https://pytorch.org/docs/stable/profiler.html#torch.profiler._KinetoProfile.export_stacks).

    Notes:
    - Each profiling cycle is exported as a sub-directory in output_dir
        - E.g., profiling in 5-step cycle (wait=2, warmup=2, active=1, repeat=0) will result in
        sub-directories iteration_5, iteration_10, etc.
    - If profiling in a distributed setting, each artifact will be prefixed with rank.
    - Memory timeline is only exported for rank 0 (error if exporting from multiple ranks on single node)

    See profiler documentation (https://pytorch.org/docs/stable/profiler.html#torch.profiler.profile) for more details

    Args:
        prof (torch.profiler.profile): instance of torch profiler to use
        output_dir (str):  directory to store artifacts
        metric (str): metric to order trace event table by, see ``torch.profiler.profile.key_averages().table`` for
        row_limit (int): number of rows to display in trace event table

    
iteration_T)exist_okr   zDumping traces at step zr0--)worker_nameuse_gzipzFinished dumping traces in z.2fz secondsz/rankz_memory-timeline.htmlz# Failed to export memory timeline: Nz_memory_snapshot.picklez_stacks.txt)r+   r   )group_by_input_shapegroup_by_stack_n)sort_byr,   z_key_averages.txtw)filezSaving profiling results to r   )&r   strstep_numospathjoinexistsmakedirsr    infotime	monotonicdatetimenowr	   yearmonthdayhourminuter   torchcudais_availableexport_memory_timeline	Exceptionwarnmemory_dump_snapshotr   export_stackskey_averagesr   tableopenprintdistributedbarrier)r)   r*   r+   r,   
world_sizer#   curr_trace_dir_namecurr_trace_dirbeginrC   exporterekey_avgsfs                 r$   trace_handlerr`   8   sw   @ /00J&T]););;W\\*.ABBN7>>.)) 3
NT2222 qyy:4=::;;;NE
 



!
!C(R#(RRSYRRRR38RRcjRR  H
 HTNNNqyyUt~/?/?%/GUUUUVVV  uz6688 199D++%GGDGGG     D D DBqBBCCCCCCCCD J,,!EEEEE  
  UnDD4DDDVTTT   !/! !  eFie00  
==d===s	C	C  qhQ                             qyy@@@AAA A~~!!##### ~s*   F 
G'G		G&JJJc                   0    e Zd ZdZd Zd Zd Zd Zd ZdS )DummyProfilera{  
    Drop-in replacement for torch.profiler.profile that functions as a nullcontext / object
    with no-op methods for ``start``, ``stop``, and ``step``.

    This is helpful for instrumenting profiling in a recipe without requiring changes to the
    code independent of whether profiling is on / off.

    E.g.,
    ```
        profiler = DummyProfiler()
        #profiler = torch.profiler.profile()

        # Below is same regardless of profiler object type
        with profiler as prof:
            for epoch in epochs:
                for batch in batches:
                    train.step()
                    prof.step()

    c                     | S N selfs    r$   	__enter__zDummyProfiler.__enter__   s    r&   c                     d S rd   re   )rg   argss     r$   __exit__zDummyProfiler.__exit__       r&   c                     d S rd   re   rf   s    r$   startzDummyProfiler.start   rl   r&   c                     d S rd   re   rf   s    r$   stopzDummyProfiler.stop   rl   r&   c                     d S rd   re   rf   s    r$   stepzDummyProfiler.step   rl   r&   N)	__name__
__module____qualname____doc__rh   rk   rn   rp   rr   re   r&   r$   rb   rb      si         *            r&   rb   r   r   r   r   enabledcpurJ   xpur   r   r   r   returnc                 4   | s.t          d           t                      t          ddi          fS g }|r)|                    t          j        j        j                   |r)|                    t          j        j        j                   |r)|                    t          j        j        j	                   t          |          dk    rt          d           t          }dx}x}}t          |du|	du|
du|dug           }|r[t          t          d                    d	                    fd
                                D                                            n||	|
|dfd                                D             }t          |          dk    rj|D ]}t          |         |<   t          d                    d	                    |          d	                    fd|D                                            t          j                            d         d         d         d                   }|rt          d           |p|}|p|}|rt%          d          nd}|t          dt&                      t&          }t)          |          }|                    dd           t-          |          }t/          t0          |          }t          j                            ||||||||          }t          | ||||||||d	          }||fS )aC  
    Sets up :class:`~torch.profiler.profile` and returns the profiler config with post-setup updates.

    The profiler config can be provided in configs under the ``profiler`` key with the following layout:

    .. code-block:: yaml

        profiler:
          _component_: torchtune.training.setup_torch_profiler
          enabled: bool
          # Output directory of trace artifacts
          output_dir: str

          # torch.profiler.ProfilerActivity types to trace
          cpu: bool
          cuda: bool

          # Trace options
          profile_memory: bool
          with_stack: bool
          record_shapes: bool
          with_flops: bool

          # torch.profiler.schedule args
          wait_steps: int
          warmup_steps: int
          active_steps: int
          num_cycles: int

    The profiler schedule updates with respect to an optimizer step (e.g., if
    ``gradient_accumulation = 2``, then the profiler will step every 2 batches).

    Sensible defaults will be chosen if the config is missing options:

    - If no activities are specified, profiler will default to CPU + CUDA
    - If no schedule is specified, profiler will default to ``DEFAULT_SCHEDULE``
    - Certain options will be overridden (``with_stack`` and ``record_shapes``)     depending on requirements of other options (e.g., ``profile_memory`` requires     ``with_stack`` and ``record_shapes``).


    Note:
        - Enabling the profiler will result in training speed reduction.
        - Setting ``profile_memory: True`` will generate large trace files.
        - The profiler schedule is context dependent. Calling ``profiler.step()``         at each batch iteration but **outside** the gradient accumulation scope will         ``step`` the profiler each forward / backward step. Calling ``profiler.step()``         each batch iteration but **within** the gradient accumulation scope  will ``step``         the profiler each optimizer update step such that each ``step`` contains multiple         forward / backward passes.

    Args:
        enabled (bool): Enable pytorch profiler. Default is False.
        cpu (bool): Enable cpu profiling. Default is True.
        cuda (bool): Enable cuda profiling. Default is True.
        xpu (bool): Enable xpu profiling. Default is True.
        profile_memory (bool): Profile memory usage. Default is False.
        with_stack (bool): Profile stack. Default is False.
        record_shapes (bool): Record shapes. Default is True.
        with_flops (bool): Profile flops. Default is False.
        wait_steps (Optional[int]): Wait time in steps. Maps to ``wait`` kwarg of ``torch.profiler.schedule``.
        warmup_steps (Optional[int]): Warmup time in steps. Maps to ``warmup`` kwarg of ``torch.profiler.schedule``.
        active_steps (Optional[int]): Active time in steps. Maps to ``active`` kwarg of ``torch.profiler.schedule``.
        num_cycles (Optional[int]): Number of profiling cycles. Maps to ``repeat`` kwarg of ``torch.profiler.schedule``.
        output_dir (Optional[str]): Tracing file output path.

    Returns:
        Tuple[torch.profiler.profile, DictConfig]
    z Profiling disabled.rw   Fr   z1No activities specified, defaulting to CPU + CUDATNz. No schedule found in config, defaulting to {}z, c              3   2   K   | ]}| d |          V  dS z = Nre   .0kschedule_argss     r$   	<genexpr>z'setup_torch_profiler.<locals>.<genexpr>6  s6      TT!Q55=#355TTTTTTr&   r   c                 $    g | ]}|         
|S rd   re   r~   s     r$   
<listcomp>z(setup_torch_profiler.<locals>.<listcomp>@  s#    TTTa=;K;S;S;S;Sr&   z= Missing keys in torch profiler schedule {}: defaulting to {}c              3   2   K   | ]}| d |          V  dS r}   re   r~   s     r$   r   z'setup_torch_profiler.<locals>.<genexpr>G  s6      PPa99}Q'799PPPPPPr&   r   r   r   r   )waitwarmupactiverepeatzp`profile_memory` requires `with_stack` and `record_shapes`, these will be enabled since `profile_memory` is True)verbosez= No output directory found in profiler config, defaulting to )parentsr/   )r*   )
activitiesr   r   r   r   scheduleexperimental_configon_trace_ready)	rw   r*   rx   rJ   ry   r   r   r   r   )r%   rb   r   appendrI   r   ProfilerActivityCPUCUDAXPUlenDEFAULT_PROFILER_ACTIVITIESanyr   formatr<   keysr   r   r   r   mkdirr8   r   r`   profile)rw   rx   rJ   ry   r   r   r   r   r   r   r   r   r*   r   use_default_schedulemissing_keysr   r   r   callbackr   profiler_cfgr   s                         @r$   setup_torch_profilerr      s   n  ?$%%%
Iu+= > >>> J
 ?%.9=>>> @%.9>???
 ?%.9=>>>
:!ABBB0
dS  #d"$$d"		
      (<CC		TTTT}?Q?Q?S?STTTTT 	
 	
 	
 	
 %(($	
 
 UTTT=#5#5#7#7TTT|q  ! 7 7#3A#6a  OVVIIl++IIPPPP<PPPPP    ~&&<(^,^,\*	 '  H  
~	
 	
 	
 -~J!3^M?IS-d;;;;t aL_aa	
 	
 	
 )
j!!JTD111ZJ }<<<H~%%%#/ & 	 	H $,$*$	
 	
 	
 L l##r&   )r'   r(   )*rB   r:   r@   	functoolsr   pathlibr   typingr   r   rI   torch.distributed	omegaconfr   torch._C._profilerr   torch.profilerr	   torchtune.utilsr
   r   r    PROFILER_KEYr   r   r   r   r   r   r   dict__annotations__r   r   r8   r%   r   intr`   rb   boolr   re   r&   r$   <module>r      s&    				              " " " " " " " "                  2 2 2 2 2 2 4 4 4 4 4 4 ? ? ? ? ? ? ? ?j	N#'	N#(	N#'  	  $    	  D    - S , , ,s     )	X$ X$
.
 X$X$ X$ 	X$ X$ X$ X$v# # # # # # # #N -.>?),7,_=),7 !%"&"& $ $L$ L$L$	L$ L$ 
	L$
 L$ L$ L$ L$ L$ 3-L$ 3-L$ L$ L$  5>!:-.!L$ L$ L$ L$ L$ L$r&   