
    -`i-                        d Z ddlZddlZddlZddlZddlZddlZddlZddlZddl	m
Z
 ddlmZ ddlZddlmZ ddlmZmZ ddlmZ e
d             Zd	 Zd
ej        deeef         ddfdZdej        fdZd
ej        fdZdS )aB  Benchmark the cold and warm startup time of vLLM models.

This script measures total startup time (including model loading, compilation,
and cache operations) for both cold and warm scenarios:
- Cold startup: Fresh start with no caches (temporary cache directories)
- Warm startup: Using cached compilation and model info
    N)contextmanager)Any)tqdm)#convert_to_pytorch_benchmark_formatwrite_to_json)
EngineArgsc               #     K   ddl m}  t          j                            d          }t          j        d          }	 |t          j        d<    |             5  dV  ddd           n# 1 swxY w Y   t          j        |d           |r|t          j        d<   dS t          j        	                    dd           dS # t          j        |d           |r|t          j        d<   w t          j        	                    dd           w xY w)	z
    Context manager to measure cold startup time:
    1. Uses a temporary directory for vLLM cache to avoid any pollution
       between cold startup iterations.
    2. Uses inductor's fresh_cache to clear torch.compile caches.
    r   )fresh_cacheVLLM_CACHE_ROOTvllm_startup_bench_cold_)prefixNT)ignore_errors)
torch._inductor.utilsr
   osenvirongettempfilemkdtempshutilrmtreepop)r
   original_cache_roottemp_cache_dirs      k/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/benchmarks/startup.pycold_startupr       s]      211111 *..):;;%-GHHHN
4(6
$%[]] 	 	EEE	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	nD9999 	4,?BJ()))JNN,d33333	 	nD9999 	4,?BJ())JNN,d3333s0   B> A(B> (A,,B> /A,0B> >A
Dc                    	 ddl m} t          j                    } |di t	          j        |           }t          j                    |z
  }d}t          |j        d          r/|j        j        }t          |d          r|j	        |j	        j
        }|                    ||d           dS # t          $ rB}|                    d           |                    t          |                     Y d}~dS d}~ww xY w)	z
    Run LLM startup in a subprocess and return timing metrics via a queue.
    This ensures complete isolation between iterations.
    r   )LLMg        vllm_configcompilation_configN)total_startup_timecompilation_time )vllmr   timeperf_counterdataclassesasdicthasattr
llm_enginer   r   r!   put	Exceptionstr)	engine_argsresult_queuer   
start_timellmr    r!   r   es	            r   run_startup_in_subprocessr2   :   s@   
! &((
c44K&{3344!.00:= 3>=11 	S.4K%9::S2>#.#A#R &8$4 	
 	
 	
 	
 	
  ! ! !Q         !s   B'B+ +
C757C22C7argsresultsreturnc                 N   t           j                            | j                  d         }t	          | d|d         i|d         |d         d          }|rt          | d|           t	          | d|d         i|d	         |d
         d          }|rt          | d|           t	          | d|d         i|d         |d         d          }|rt          | d|           t	          | d|d         i|d         |d         d          }|rt          | d|           d S d S )Nr   avg_cold_startup_timecold_startup_timescold_startup_percentiles)r8   r9   )r3   metrics
extra_infoz.cold_startup.pytorch.jsonavg_cold_compilation_timecold_compilation_timescold_compilation_percentiles)r=   r>   z.cold_compilation.pytorch.jsonavg_warm_startup_timewarm_startup_timeswarm_startup_percentiles)r@   rA   z.warm_startup.pytorch.jsonavg_warm_compilation_timewarm_compilation_timeswarm_compilation_percentiles)rC   rD   z.warm_compilation.pytorch.json)r   pathsplitextoutput_jsonr   r   )r3   r4   	base_namecold_startup_recordscold_compilation_recordswarm_startup_recordswarm_compilation_recordss          r    save_to_pytorch_benchmark_formatrM   `   s      !12215I>#W-D%E
 #**>"?(/0J(K
 
	 	 	  V>>>@TUUUB'1L)M
 '..F&G,34R,S
 
	  	  	    
888:R	
 	
 	
 ?#W-D%E
 #**>"?(/0J(K
 
	 	 	  V>>>@TUUUB'1L)M
 '..F&G,34R,S
 
	  	  	    
888:R	
 	
 	
 	
 	

 
    parserc                    |                      dt          dd           |                      dt          dd           |                      dt          dd	           |                      d
t          d d           t          j        |           } | S )Nz--num-iters-cold   z"Number of cold startup iterations.)typedefaulthelpz--num-iters-warmup   z>Number of warmup iterations before benchmarking warm startups.z--num-iters-warmz"Number of warm startup iterations.z--output-jsonz5Path to save the startup time results in JSON format.)add_argumentintr,   r   add_cli_args)rO   s    r   rX   rX      s    
1	     M	     1	     D	     $V,,FMrN   c                    t          j        dd           t          j        |           fd}dt          j        d<   t          d           t          d           g }g }t          t          | j	                  d	
          D ]h}t                      5   |            }|                    |d                    |                    |d                    d d d            n# 1 swxY w Y   it          d           t          t          | j                  d
          D ]} |             t          d           g }g }t          t          | j                  d
          D ]B} |            }|                    |d                    |                    |d                    Ct          j        |          }	t          j        |          }
t          j        |          }t          j        |          }t          j        |	          }t          j        |
          }t          j        |          }t          j        |          }g d}t          j        |	|          }t          j        |
|          }t          j        ||          }t          j        ||          }t          d           t          d           t          d           t          d           t          d|dd           t          d|dd           t          d           t%          ||          D ]\  }}t          d| d|dd           t          d           t%          ||          D ]\  }}t          d| d|dd           t          d           t          d|dd           t          d|dd           t          d           t%          ||          D ]\  }}t          d| d|dd           t          d           t%          ||          D ]\  }}t          d| d|dd           t          d           | j        rMt)          |          t)          |          ||t+          t%          ||                                                    t+          t%          ||                                                    t)          |          t)          |          ||t+          t%          ||                                                    t+          t%          ||                                                    d}t/          | j        d           5 }t1          j        ||d!"           d d d            n# 1 swxY w Y   t5          | |           d S d S )#NspawnT)forcec                     t          j                    } t          j        t          | f          }|                                 |                                 |                                 sa|                                 }|I|                                 s&|                                 }t          d|           t          d          |S t          d          )z
        Create LLM instance in a subprocess and measure startup time.
        Returns timing metrics, using subprocess for complete isolation.
        )targetr3   NzSubprocess failed: z$Subprocess failed with unknown errorz"Subprocess did not return a result)	multiprocessingQueueProcessr2   startjoinemptyr   RuntimeError)r.   processresult	error_msgr-   s       r   create_llm_and_measure_startupz,main.<locals>.create_llm_and_measure_startup   s     ',..!),
 
 
 	!!## 
	E!%%''F~#))++ O , 0 0 2 2I&'HY'H'HIII&'MNNNMCDDDrN   0VLLM_ENABLE_V1_MULTIPROCESSINGzESetting VLLM_ENABLE_V1_MULTIPROCESSING=0 to collect startup metrics.
zMeasuring cold startup time...
zCold startup iterations)descr    r!   z,
Warming up for warm startup measurement...
zWarmup iterationsz 
Measuring warm startup time...
zWarm startup iterations)
      2   K   Z   c   z=
============================================================zSTARTUP TIME BENCHMARK RESULTSz<============================================================z
COLD STARTUP:zAvg total startup time: z.2fz secondszAvg compilation time:   zStartup time percentiles:z  z%: zCompilation time percentiles:z
WARM STARTUP:)r7   r<   r8   r=   r9   r>   r?   rB   r@   rC   rA   rD   w   )indent)r^   set_start_methodr   from_cli_argsr   r   printr   rangenum_iters_coldr   appendnum_iters_warmupnum_iters_warmnparraymean
percentileziprG   floatdicttolistopenjsondumprM   )r3   rh   r8   r=   ir:   _r@   rC   cold_startup_arraycold_compilation_arraywarm_startup_arraywarm_compilation_arrayavg_cold_startupavg_cold_compilationavg_warm_startupavg_warm_compilationpercentagesr9   r>   rA   rD   
percentager   r4   fr-   s                             @r   mainr      s    $WD9999*400KE E E E E< 47BJ/0	
RSSS	
,---%+,,3LMMM G G^^ 	G 	G4466G%%g.B&CDDD"))'2D*EFFF	G 	G 	G 	G 	G 	G 	G 	G 	G 	G 	G 	G 	G 	G 	G 

:;;;%-..5HIII ) )&&((((	
.///%+,,3LMMM C C0022!!'*>"?@@@%%g.@&ABBBB "455X&<=="455X&<==w1227#9::w1227#9::***K!}-?MM#%=1G#U#U !}-?MM#%=1G#U#U 	/	
*+++	(OOO 

	
C%5
C
C
C
CDDD	
G%9
G
G
G
GHHH	
%&&&"%k3K"L"L < <
J::::*::::;;;;	
)***"%k3O"P"P < <
J::::*::::;;;; 

	
C%5
C
C
C
CDDD	
G%9
G
G
G
GHHH	
%&&&"%k3K"L"L < <
J::::*::::;;;;	
)***"%k3O"P"P < <
J::::*::::;;;;	(OOO  8%*+;%<%<)./C)D)D"4&<(,K!9!@!@!B!BCC) ) -1K!=!D!D!F!FGG- - &++;%<%<)./C)D)D"4&<(,K!9!@!@!B!BCC) ) -1K!=!D!D!F!FGG- -%
 
, $"C(( 	,AIgq++++	, 	, 	, 	, 	, 	, 	, 	, 	, 	, 	, 	, 	, 	, 	,(w7777738 8s%   AC""C&	)C&	U00U47U4)__doc__argparser&   r   r^   r   r   r   r$   
contextlibr   typingr   numpyr}   r   vllm.benchmarks.lib.utilsr   r   vllm.engine.arg_utilsr   r   r2   	Namespacer   r,   rM   ArgumentParserrX   r   r"   rN   r   <module>r      s              				    % % % % % %                        - , , , , , 4 4 42#! #! #!L;


;
'+CH~;
	;
 ;
 ;
 ;
|0    <E8x! E8 E8 E8 E8 E8 E8rN   