
    Pi                         d dl Z d dlZd dlZd dlZd dlZ ej        dd          Z	 	 	 	 	 	 ddZdad Z	d	 Z
d
 Zd Zd ZddZd Zd ZdS )    NTORCHAO_AUTOTUNER_DATA_PATH   d   Tmeanc                    |dv sJ dd l 	  |              j                                         |r& j        t	          d          j        d          }n% j        t	          d          j        d          }j                            d          }j                            d          }	|                                 t          d	          D ] }
|	                                  |              !|	                                 j                                         |
                    |	          d	z  }t          d
t	          ||z                      }t          d
t	          ||z                      }fdt          |          D             }fdt          |          D             }	t          |          D ]}
 |              t          |          D ]b}||D ]	}d |_        
|	                                 ||                                           |              |	|                                          cj                                          j        d t          ||	          D             j                  }|U j        | j        |j                                                            }t%          |          d
k    r|d         }|S  t'          |          |                                          S )N)minmaxr   medianr       Acudadtypedeviceg    ATenable_timing      c                 F    g | ]}j                             d           S Tr   r   Event.0itorchs     l/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/torchao/kernel/autotuner.py
<listcomp>z#do_bench_triton.<locals>.<listcomp>G   s,    QQQA5:##$#77QQQ    c                 F    g | ]}j                             d           S r   r   r   s     r   r   z#do_bench_triton.<locals>.<listcomp>H   s,    OOO!!!!55OOOr   c                 >    g | ]\  }}|                     |          S  )elapsed_time)r   ses      r   r   z#do_bench_triton.<locals>.<listcomp>]   s(    CCCtq!		CCCr   )r   )r   r   synchronizeemptyintint8r   recordrangezero_r"   r	   gradtensorzipfloatquantiletolistlengetattritem)fnwarmuprepgrad_to_none	quantiles
fast_flushreturn_modecachestart_event	end_event_estimate_msn_warmupn_repeatr   xtimesretr   s                     @r   do_bench_tritonrF      s"    :::::LLL$ BDDD	J
  IC
OO59VLLLCJJejHHH *"""66K
  t 44I1XX  
	J**9559K 1c&;.//00H1c#+,,--HQQQQxQQQKOOOOuXOOOI8__  
8__   #!  A
!	JELCCs;	'B'BCCC5;  E enULEL%+$N$N$NOOVVXXs88q==a&C
&75+&&u--22444r   c                    t           j                                        }t          "t          j                                        dz  }nt	          j        t                    }t          j        d| d|            t          |d          5 }dd l
}t          j        d|            |                    | |           d d d            d S # 1 swxY w Y   d S )Nzdata.pklzTrying to store configs for z locally under wbr   zSaving best configs to file )r   r   get_device_nameAUTOTUNER_DATA_PATHpathlibPathcwdlogginginfoopenpickledump)best_configsdevice_namesaved_configsfrQ   s        r   _save_best_configsrW   j   s   *,,..K"((**Z7%899LR{RR=RR   
mT	"	" %aCMCCDDDL!$$$	% % % % % % % % % % % % % % % % % %s   2C

CCc                  8   t           j                                        } dd l}t          O|j                            d          }|dz  dz  dz  }|                     d          st          j	        d           nt          j        t                    }t          j	        d|  d	|            |                                rZdd l}t          |d
          5 }t          j	        d|            |                    |          cd d d            S # 1 swxY w Y   d S d S )Nr   torchaokernelconfigszdata_a100.pklzNVIDIA A100z6Warning! Loaded configurations are optimized for A100!zTrying to load configs for z from rbzLoading best configs from file )r   r   rI   	importlibrJ   	resourcesfiles
startswithrN   rO   rK   rL   is_filerQ   rP   load)rT   r]   rU   rQ   rV   s        r   _load_best_configsrc   z   si   *,,..K"!+11)<<%09<N%%m44 	SLQRRR%899LQ{QQ-QQRRR "-&& 	"!LJ=JJKKK;;q>>	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	"" "s   ,DDDc                     t          j        |           r.| j        |                                 |                                 fS | fS N)r   	is_tensorr   sizestride)as    r   get_arg_keyrj      s:    q /188::..4Kr   c                 P    t          t          d | D                       d          S )Nc              3   4   K   | ]}t          |          V  d S re   )rj   )r   ri   s     r   	<genexpr>zget_args_key.<locals>.<genexpr>   s(      22[^^222222r   r!   )sumtuple)argss    r   get_args_keyrq      s)    u22T22222B777r   c                 N    |              t           j                                         t          j        t	          d          t           j        d          }|                                 t           j                            d          }t           j                            d          }|                                 t          |          D ]} |              |                                 t           j                                         |	                    |          |z  }|S )Nr   r   r   Tr   )
r   r   r%   r&   r'   r+   r   r)   r*   r"   )r5   r7   r<   r=   r>   r?   r@   s          r   do_bench_basicrs      s    BDDD	JKJuyHHHE	KKMMM*"""66K
  t 44I3ZZ  
	J**955;Kr   c                 6     fd}	 t          |d          }n)# t          $ r d }Y nt          j        j        $ r d }Y nw xY w||||dz  k    rt          d          S t          |d          }|||dz  k    rt          d          S t          |          S )Nc                        gz    S re   r!   )rp   configr5   s   r   
wrapped_fnzdo_bench.<locals>.wrapped_fn   s    rDF8O%%r   r   r   infr   
   )rs   RuntimeErrortritonruntimeOutOfResourcesr/   rF   )r5   rp   rv   	best_timerw   times   ```   r   do_benchr      s    & & & & & & &j!,,   >(   |	-$S2H2HU|| *a((D	B!6!6U|| :&&&s    AAAc                 >    | t           v rt           |          d         S d S )Nr   )BEST_CONFIGS)keys    r   get_best_config_by_keyr      s%    
lC ## r   c           
      r   t           t                      a t           i a t          |          dk    rd S t          |          }t	          |          }||S t          j        d| d           |d         }t          | ||d                   }t          j        d                    t          t          |||g                               d}|dd          D ]p}t          | |||          }t          j        d                    |ddt          |          d|dt          |          g                     ||k     r|}|}|dz  }q||ft           |<   t          j        d	           t          j        d                    t          t          ||g                               t          t                      |S )
Nr   z2Starting autotune search. No config found for key . r   4d/z6.3fz-- perfetto --)r   rc   r2   rq   r   rN   rO   r   joinmapstrrW   )	r5   rp   r[   r   best_configr~   r   rv   r   s	            r   get_best_config_fnr      s   )++ 
7||qt
t

C(--KLLcLLLMMM !*KT71:..IL#cCK#@AABBCCC	A !""+  D&)44HH111G111d>>3v;;OPP	
 	
 	
 )I K	Q$i0LL!"""L#cI{#;<<==>>>|$$$r   )r   r   NNTr   re   )rN   osrK   r   r{   getenvrJ   rF   r   rW   rc   rj   rq   rs   r   r   r   r!   r   r   <module>r      s    				   bi =tDD 
 T5 T5 T5 T5n % % % " " "(  8 8 8  $' ' ' '4$ $ $
) ) ) ) )r   