
    .`i$              
          U d dl Z d dlZd dlmZmZ d dlmZ d dlmZ d dl	m
Z
mZmZ d dlmZ d dlmZ d dlmZmZ d dlmZmZ d d	lmZ d d
lmZmZ d dlmZ d dlmZ d dl m!Z! d dl"m#Z#m$Z$m%Z%m&Z& d dl'm(Z(m)Z)  ee*          Z+eee,gdf         Z-e.d         Z/e/e-z  Z0 G d de          Z1de2e0         fdZ3 G d de1          Z4 G d de1          Z5 G d de5e4          Z6 G d de4          Z7 G d de4          Z8ee
z  ez  Z9ee:d<   de9d e2e,         d!e;de<e,e9f         fd"Z=d#e2e,         d$e,de2e,         fd%Z>d$e,de2e,         fd&Z? G d' d(          Z@dS ))    N)ABCabstractmethod)Callable)	TypeAlias)CounterGauge	Histogram)CUDAGraphLogging)SupportsMetricsInfo
VllmConfig)KVConnectorLoggingKVConnectorPrometheus)init_logger)STAT_LOGGER_PLUGINS_GROUPload_plugins_by_group)FinishReason)PerfMetricsLogging)unregister_vllm_metrics)CachingMetricsIterationStatsMultiModalCacheStatsSchedulerStats)SpecDecodingLoggingSpecDecodingPromStatLoggerBaseAggregateStatLoggerBasec            
           e Zd ZdZeddedefd            Ze	 	 ddedz  de	dz  d	e
dz  d
efd            Zed             Zd ZdedefdZdS )r   a   Interface for logging metrics.

    API users may define custom loggers that implement this interface.
    However, note that the `SchedulerStats` and `IterationStats` classes
    are not considered stable interfaces and may change in future versions.
    r   vllm_configengine_indexc                     d S N )selfr   r   s      k/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/v1/metrics/loggers.py__init__zStatLoggerBase.__init__/   s    HK    Nscheduler_statsiteration_statsmm_cache_stats
engine_idxc                     d S r!   r"   r#   r'   r(   r)   r*   s        r$   recordzStatLoggerBase.record2   s	     sr&   c                     d S r!   r"   r#   s    r$   log_engine_initializedz%StatLoggerBase.log_engine_initialized;   s    &)cr&   c                     d S r!   r"   r/   s    r$   logzStatLoggerBase.log>       r&   is_awakelevelc                     d S r!   r"   )r#   r4   r5   s      r$   record_sleep_statez!StatLoggerBase.record_sleep_stateA   r3   r&   r   Nr   )__name__
__module____qualname____doc__r   r   intr%   r   r   r   r-   r0   r2   r7   r"   r&   r$   r   r   '   s          KKJKcKKK ^K
 7; '$. ($. -t3	
    ^ )) ^)  3 s      r&   returnc                     g } t          t                                                    D ]Z\  }}t          |t                    rt          |t                    st          d|d|d          |                     |           [| S )NzStat logger plugin z+ must be a subclass of StatLoggerBase (got z).)	r   r   items
isinstancetype
issubclassr   	TypeErrorappend)	factoriesnameplugin_classs      r$   !load_stat_logger_plugin_factoriesrJ   E   s    )+I34MNNTTVV 	' 	'l,-- 	Z.6
 6
 	 :d : :'3: : :  
 	&&&&r&   c                   >    e Zd ZdZededee         fd            ZdS )r   zNAbstract base class for loggers that
    aggregate across multiple DP engines.r   engine_indexesc                     d S r!   r"   r#   r   rL   s      r$   r%   z AggregateStatLoggerBase.__init__Z   s    LOCr&   N)	r:   r;   r<   r=   r   r   listr>   r%   r"   r&   r$   r   r   V   s@        - - OJOS	OOO ^OOOr&   c            	           e Zd ZddedefdZd ZdefdZde	fd	Z
d
ededefdZed             Z	 	 ddedz  de	dz  dedz  defdZd Zd Zd Zd ZdS )LoggingStatLoggerr   r   r   c                    || _         || _        |                     t          j                               t                      | _        t                      | _        t                      | _	        t                      | _
        t                      | _        | j        j        }t          |          | _        d | _        | j        j        j        r3t'          | j        j        j        | j        j        j                  | _        d| _        d| _        d| _        d| _        |                                 rt9          |          | _        d S d S )N        F)r   r   _resettime	monotonicr   last_scheduler_statsr   prefix_caching_metrics connector_prefix_caching_metricsmm_caching_metricsr   spec_decoding_loggingkv_transfer_configr   kv_connector_loggingcudagraph_loggingobservability_configcudagraph_metricsr
   compilation_configcudagraph_modecudagraph_capture_sizeslast_prompt_throughputlast_generation_throughputengine_is_idle
aggregated_enable_perf_statsr   perf_metrics_logging)r#   r   r   r\   s       r$   r%   zLoggingStatLogger.__init___   s,   (&DN$$%%%$2$4$4! '5&6&6#0>0@0@-"0"2"2%8%:%:"!-@$67I$J$J!!%0B 	%5 3B 3K& &D" .1#14'#""$$ 	H(:;(G(GD%%%	H 	Hr&   c                 L    || _         d| _        d| _        d| _        d| _        d S r9   )last_log_timenum_prompt_tokensnum_generation_tokensnum_corrupted_reqsnum_preemptions)r#   nows     r$   rT   zLoggingStatLogger._reset}   s2      '(*+"'($%r&   r?   c                 $    | j         j        j        S r!   )r   r_   enable_mfu_metricsr/   s    r$   rh   z$LoggingStatLogger._enable_perf_stats   s    4GGr&   r(   c                     | xj         |j         z  c_         | xj        |j        z  c_        | xj        |j        z  c_        | xj        |j        z  c_        d S r!   )rl   rm   rn   ro   num_preempted_reqs)r#   r(   s     r$   _track_iteration_statsz(LoggingStatLogger._track_iteration_stats   sd    /"CC""o&KK""?#EE BBr&   tracked_statsrp   c                 J    || j         z
  }|dk    rdS t          ||z            S )NrS   )rk   float)r#   rv   rp   
delta_times       r$   _get_throughputz!LoggingStatLogger._get_throughput   s2    4--
3]Z/000r&   c                 6    d                     | j                  S )NzEngine {:03d}: )formatr   r/   s    r$   
log_prefixzLoggingStatLogger.log_prefix   s     ''(9:::r&   Nr'   r)   r*   c                 v   |r|                      |           | | j                            |j                   |j        | j                            |j                   |j        | j                            |j                   |j        x}r| j	                            |           | j
        &|j        | j
                            |j                   | j        s|| _        |j        x}r.|                                 r| j                            |           |r| j                            |           dS dS )zLog Stats to standard output.N)ru   rX   observeprefix_cache_statsconnector_prefix_cache_statsrY   spec_decoding_statsr[   kv_connector_statsr]   r^   cudagraph_statsrg   rW   
perf_statsrh   ri   rZ   )r#   r'   r(   r)   r*   r   r   s          r$   r-   zLoggingStatLogger.record   s`     	9''888&'//0RSSS;G5==#@   2>*22?3VWWW%4%GG! F)112DEEE&2#3?&../NOOO? <,;)-88
 >d>U>U>W>W >)11*=== 	<#++N;;;;;	< 	<r&   c                 $   t          j                    }|                     | j        |          }|                     | j        |          }|                     |           t          ||| j        | j        f           | _	        || _        || _        d S r!   )
rU   rV   rz   rl   rm   rT   anyrd   re   rf   )r#   rp   prompt_throughputgeneration_throughputs       r$   _update_statszLoggingStatLogger._update_stats   s    n 001GMM $ 4 4T5OQT U UC"%!%+/	#
 #
 
 +@'&7###r&   c                     d S r!   r"   r/   s    r$   aggregate_scheduler_statsz+LoggingStatLogger.aggregate_scheduler_stats   s    r&   c                    |                                   |                                  | j        rt          j        nt          j        }g d}| j        | j        | j        j	        | j        j
        g}| j        dk    r/|                    d           |                    | j                   |                    ddg           |                    | j        j        dz  | j        j        dz  g           t"          j        r/|                    d           |                    | j                   | j        j        s7|                    d           |                    | j        j        dz             | j        j        s7|                    d	           |                    | j        j        dz              || j        d
                    |          z   g|R   | j                            |           | j                            |           | j        | j                            |           |                                 r#| j                            || j                   d S d S )N)z$Avg prompt throughput: %.1f tokens/sz(Avg generation throughput: %.1f tokens/szRunning: %d reqszWaiting: %d reqsr   zPreemptions: %dzGPU KV cache usage: %.1f%%zPrefix cache hit rate: %.1f%%d   zCorrupted: %d reqsz&External prefix cache hit rate: %.1f%%zMM cache hit rate: %.1f%%z, )log_fn)r   r}   )r   r   rf   loggerdebuginford   re   rW   num_running_reqsnum_waiting_reqsro   rF   extendkv_cache_usagerX   hit_rateenvsVLLM_COMPUTE_NANS_IN_LOGITSrn   rY   emptyrZ   r}   joinr[   r2   r]   r^   rh   ri   )r#   r   	log_partslog_argss       r$   r2   zLoggingStatLogger.log   s   &&(((!%!4E&+
 
 
	 '+%6%6	-
 !##.///OOD0111,/	
 	
 	
 	)83>+4s:	
 	
 	
 + 	51222OOD34444: 	REFFFOODAJSPQQQ&, 	D8999OOD3<sBCCCOdii	222	
	
 	
 	
 	

 	"&&f&555!%%V%444!-"&&f&555""$$ 	U%))DO)TTTTT	U 	Ur&   c                     | j         j        j        r2t                              d| j        | j         j        j                   d S d S )NzSEngine %03d: vllm cache_config_info with initialization after num_gpu_blocks is: %d)r   cache_confignum_gpu_blocksr   r   r   r/   s    r$   r0   z(LoggingStatLogger.log_engine_initialized  sT    (7 	LL.! -<	    	 	r&   r8   r9   )r:   r;   r<   r   r>   r%   rT   boolrh   r   ru   rx   rz   propertyr}   r   r   r-   r   r   r2   r0   r"   r&   r$   rQ   rQ   ^   sl       H HJ Hc H H H H<& & &HD H H H HCn C C C C1S 1u 1 1 1 1 1 ; ; X; 7;!< !<'$.!< ($.!< -t3	!<
 !< !< !< !<F8 8 8"  8U 8U 8Ut    r&   rQ   c            	           e Zd Zdedee         fdZed             Zde	fdZ
	 	 dd	edz  d
edz  dedz  defdZd Zd Zd ZdS )AggregatedLoggingStatLoggerr   rL   c                     || _         d | j         D             | _        t                              | |d           d| _        d S )Nc                 ,    i | ]}|t                      S r"   )r   ).0idxs     r$   
<dictcomp>z8AggregatedLoggingStatLogger.__init__.<locals>.<dictcomp>  s3     E
 E
 E
&)C!!E
 E
 E
r&   )r   T)rL   last_scheduler_stats_dictrQ   r%   rg   rN   s      r$   r%   z$AggregatedLoggingStatLogger.__init__  s]    
 -E
 E
-1-@E
 E
 E
& 	""42"FFFr&   c                 P    d                     t          | j                            S )Nz{} Engines Aggregated: )r|   lenrL   r/   s    r$   r}   z&AggregatedLoggingStatLogger.log_prefix$  s!    (//D4G0H0HIIIr&   r?   c                     dS )NFr"   r/   s    r$   rh   z.AggregatedLoggingStatLogger._enable_perf_stats(  s    ur&   Nr   r'   r(   r)   r*   c                     || j         vrt                              d|           d S t                              | ||||           ||| j        |<   d S d S NzUnexpected engine_idx: %dr)   r*   )rL   r   warningrQ   r-   r   r,   s        r$   r-   z"AggregatedLoggingStatLogger.record,  s{     T000NN6
CCCF  )! 	! 	
 	
 	
 &9HD*:666 '&r&   c                 N   t                      | _        | j                                        D ]P}| j        xj        |j        z  c_        | j        xj        |j        z  c_        | j        xj        |j        z  c_        Q| j        xj        t          | j                  z  c_        d S r!   )r   rW   r   valuesr   r   r   r   )r#   rW   s     r$   r   z5AggregatedLoggingStatLogger.aggregate_scheduler_stats@  s    $2$4$4!$($B$I$I$K$K 		 		 %66$566 %66$566 %44$3444 	!00C8V4W4WW0000r&   c                 :    t                               |            d S r!   )rQ   r2   r/   s    r$   r2   zAggregatedLoggingStatLogger.logN  s    d#####r&   c                     | j         j        j        r?t                              dt          | j                  | j         j        j                   d S d S )NzR%d Engines: vllm cache_config_info with initialization after num_gpu_blocks is: %d)r   r   r   r   r   r   rL   r/   s    r$   r0   z2AggregatedLoggingStatLogger.log_engine_initializedQ  s\    (7 	KK.D'(( -<	    	 	r&   r9   )r:   r;   r<   r   rO   r>   r%   r   r}   r   rh   r   r   r   r-   r   r2   r0   r"   r&   r$   r   r     s       

 S	
 
 
 
 J J XJD     7;I I'$.I ($.I -t3	I
 I I I I(X X X$ $ $    r&   r   c            	       n    e Zd Zdedee         deddfdZ	 	 ddedz  d	e	dz  d
e
dz  defdZd Zd ZdS )PerEngineStatLoggerAdapterr   rL   per_engine_stat_logger_factoryr?   Nc                 T    i | _         || _        |D ]} |||          | j         |<   d S r!   )per_engine_stat_loggersrL   )r#   r   rL   r   r   s        r$   r%   z#PerEngineStatLoggerAdapter.__init__\  sO     (*$,* 	 	L9W9W\: :D(66	 	r&   r   r'   r(   r)   r*   c                     || j         vrt                              d|           d S | j         |                             ||||           d S r   )r   r   r   r-   r,   s        r$   r-   z!PerEngineStatLoggerAdapter.recordi  sg     T999NN6
CCCF$Z077)!	 	8 	
 	
 	
 	
 	
r&   c                 f    | j                                         D ]}|                                 d S r!   )r   r   r2   r#   per_engine_stat_loggers     r$   r2   zPerEngineStatLoggerAdapter.logz  s@    &*&B&I&I&K&K 	) 	)""&&((((	) 	)r&   c                 f    | j                                         D ]}|                                 d S r!   )r   r   r0   r   s     r$   r0   z1PerEngineStatLoggerAdapter.log_engine_initialized~  s@    &*&B&I&I&K&K 	< 	<""99;;;;	< 	<r&   r9   )r:   r;   r<   r   rO   r>   PerEngineStatLoggerFactoryr%   r   r   r   r-   r2   r0   r"   r&   r$   r   r   [  s         S	 )C	
 
   " 7;
 
'$.
 ($.
 -t3	

 
 
 
 
") ) )< < < < <r&   r   c            	           e Zd ZeZeZeZe	Z
eZ	 ddedee         dz  fdZdedefdZ	 	 dd	edz  d
edz  dedz  defdZddedefdZd ZdS )PrometheusStatLoggerNr   rL   c                   ()*+, |dg}|| _         t                       || _        |j        j        | _        |j        j        | _        ddg}|j        j        *|j        j	        }*fd|D             }| 
                    |j        ||          | _        |                     |||          | _        |                     ddd|          }t!          ||*          | _        |                     d	d
d|          }t!          ||*          | _        |                     dd|dgz   d          )i | _        g d}|D ],)*,fd|D             | j        ,<   |                                  |                     ddd|          }	t!          |	|*          | _        t,          j        r.|                     dd|          }
t!          |
|*          | _        |                     dd|          }t!          ||*          | _        |                     dd|          }t!          ||*          | _        |                     dd|          }t!          ||*          | _        |                     dd|          }t!          ||*          | _        |                     dd|          }t!          ||*          | _        |                     d d!|          }t!          ||*          | _        |                     d"d#|          }t!          ||*          | _         |                     d$d%|          }t!          ||*          | _!        |                     d&d'|          }t!          ||*          | _"        i | _#        |                     d(d)|d*gz             (tH          D ]+(*+fd+|D             | j#        +<   | %                    d,d%tM          |          |-          }t!          ||*          | _'        | %                    d.d'tM          |          |-          }t!          ||*          | _(        | %                    d/d0g d1|-          }t!          ||*          | _)        | %                    d2d3tM          |          |-          }t!          ||*          | _*        | %                    d4d5g d6|-          }t!          ||*          | _+        | %                    d7d8tM          |          |-          }t!          ||*          | _,        | %                    d9d:g d;|-          }t!          ||*          | _-        | %                    d<d=g d>|-          }t!          ||*          | _.        | %                    d?d@g d>|-          }t!          ||*          | _/        g dA}| %                    dBdC||-          }t!          ||*          | _0        | %                    dDdE||-          }t!          ||*          | _1        | %                    dFdG||-          } t!          | |*          | _2        | %                    dHdI||-          }!t!          |!|*          | _3        | %                    dJdK||-          }"t!          |"|*          | _4        | %                    dLdMtM          |          |-          }#t!          |#|*          | _5        | j        rg dN}$| %                    dOdP|$|-          }%t!          |%|*          | _6        | %                    dQdR|$|-          }&t!          |&|*          | _7        | %                    dSdT|$|-          }'t!          |'|*          | _8        ni | _6        i | _7        i | _8        d | _9        |j:        tw          | j                   dUk    rtx          =                    dV           dW| _>        dX| _?        dY| _@        |j:        jA        | _B        |                     dZd[d\| j>        | j?        | j@        g          | _9        d S d S )]Nr   
model_nameenginec                 4    i | ]}|t          |          gS r"   )str)r   r   r   s     r$   r   z1PrometheusStatLogger.__init__.<locals>.<dictcomp>  s3     ;
 ;
 ;
,/C*c#hh';
 ;
 ;
r&   zvllm:num_requests_runningz.Number of requests in model execution batches.
mostrecentrH   documentationmultiprocess_mode
labelnameszvllm:num_requests_waitingz+Number of requests waiting to be processed.zvllm:engine_sleep_statezEngine sleep state; awake = 0 means engine is sleeping; awake = 1 means engine is awake; weights_offloaded = 1 means sleep level 1; discard_all = 1 means sleep level 2.sleep_state)rH   r   r   r   )awakeweights_offloadeddiscard_allc                 B    i | ]}|                     |           S ))r   r   r   )labels)r   r   gauge_engine_sleep_stater   ss     r$   r   z1PrometheusStatLogger.__init__.<locals>.<dictcomp>  sJ     0 0 0  -44:1 5  0 0 0r&   zvllm:kv_cache_usage_percz*KV-cache usage. 1 means 100 percent usage.zvllm:corrupted_requestszMCorrupted requests, in terms of total number of requests with NaNs in logits.)rH   r   r   zvllm:prefix_cache_queriesz;Prefix cache queries, in terms of number of queried tokens.zvllm:prefix_cache_hitsz7Prefix cache hits, in terms of number of cached tokens.z"vllm:external_prefix_cache_querieszsExternal prefix cache queries from KV connector cross-instance cache sharing, in terms of number of queried tokens.zvllm:external_prefix_cache_hitszoExternal prefix cache hits from KV connector cross-instance cache sharing, in terms of number of cached tokens.zvllm:mm_cache_queriesz?Multi-modal cache queries, in terms of number of queried items.zvllm:mm_cache_hitsz;Multi-modal cache hits, in terms of number of cached items.zvllm:num_preemptionsz0Cumulative number of preemption from the engine.zvllm:prompt_tokensz#Number of prefill tokens processed.zvllm:generation_tokensz&Number of generation tokens processed.zvllm:request_successz)Count of successfully processed requests.finished_reasonc           
      t    i | ]4}|                     t          |          t                              5S r"   r   r   )r   r   counter_request_success_baser   reasons     r$   r   z1PrometheusStatLogger.__init__.<locals>.<dictcomp>a  sQ     4 4 4  188C#f++ 4 4 4r&   zvllm:request_prompt_tokens)rH   r   bucketsr   zvllm:request_generation_tokenszvllm:iteration_tokens_totalz.Histogram of number of tokens per engine_step.)             @         i   i   i   i   i    i @  z&vllm:request_max_num_generation_tokensz;Histogram of maximum number of requested generation tokens.zvllm:request_params_nz%Histogram of the n request parameter.)r         
      zvllm:request_params_max_tokensz.Histogram of the max_tokens request parameter.z vllm:time_to_first_token_secondsz,Histogram of time to first token in seconds.)MbP?{Gzt?{Gz?{Gz?g{Gz?gQ?g{Gz?皙?g      ?      ?      ?      ?      @      @      @      $@      4@      D@      T@g      d@g      @g      @z vllm:inter_token_latency_secondsz,Histogram of inter-token latency in seconds.)r   g?皙?g333333?r   g333333?皙?333333?g?r   r   r   r   r   r   r   r   r   r   z*vllm:request_time_per_output_token_secondsz7Histogram of time_per_output_token_seconds per request.)r   r   g?r   g      ?g       @r   r   r   g      .@r   g      >@r   g      I@g      N@g      ^@g      n@g      ~@g      @g      @g      @z vllm:e2e_request_latency_secondsz,Histogram of e2e request latency in seconds.zvllm:request_queue_time_secondsz5Histogram of time spent in WAITING phase for request.z#vllm:request_inference_time_secondsz5Histogram of time spent in RUNNING phase for request.z!vllm:request_prefill_time_secondsz5Histogram of time spent in PREFILL phase for request.z vllm:request_decode_time_secondsz4Histogram of time spent in DECODE phase for request.z'vllm:request_prefill_kv_computed_tokenszMHistogram of new KV tokens computed during prefill (excluding cached tokens).)r   gMb`?r   r   r   r   r   r   r   r   r   r   r   r      <   x   i,  iX  i  i  zvllm:kv_block_lifetime_secondsz|Histogram of KV cache block lifetime from allocation to eviction. Sampled metrics (controlled by --kv-cache-metrics-sample).z'vllm:kv_block_idle_before_evict_secondszqHistogram of idle time before KV cache block eviction. Sampled metrics (controlled by --kv-cache-metrics-sample).zvllm:kv_block_reuse_gap_secondszHistogram of time gaps between consecutive KV cache block accesses. Only the most recent accesses are recorded (ring buffer). Sampled metrics (controlled by --kv-cache-metrics-sample).r   zfvllm:lora_requests_info prometheus metrics may be incorrect/misleading with data parallel deployments.max_lorawaiting_lora_adaptersrunning_lora_adapterszvllm:lora_requests_infozRunning stats on lora requests.sum)CrL   r   r   r_   show_hidden_metricskv_cache_metricskv_cache_metrics_enabledmodel_configserved_model_namemax_model_len_spec_decoding_clsspeculative_configspec_decoding_prom_kv_connector_clskv_connector_prom
_gauge_clsmake_per_enginegauge_scheduler_runninggauge_scheduler_waitingr   r7   gauge_kv_cache_usager   r   _counter_clscounter_corrupted_requestscounter_prefix_cache_queriescounter_prefix_cache_hits&counter_connector_prefix_cache_queries#counter_connector_prefix_cache_hitscounter_mm_cache_queriescounter_mm_cache_hitscounter_num_preempted_reqscounter_prompt_tokenscounter_generation_tokenscounter_request_successr   _histogram_clsbuild_1_2_5_buckets#histogram_num_prompt_tokens_request'histogram_num_generation_tokens_requesthistogram_iteration_tokens+histogram_max_num_generation_tokens_requesthistogram_n_requesthistogram_max_tokens_requesthistogram_time_to_first_tokenhistogram_inter_token_latency'histogram_request_time_per_output_tokenhistogram_e2e_time_requesthistogram_queue_time_request histogram_inference_time_requesthistogram_prefill_time_requesthistogram_decode_time_request%histogram_prefill_kv_computed_requesthistogram_kv_block_lifetime$histogram_kv_block_idle_before_evicthistogram_kv_block_reuse_gapgauge_lora_infolora_configr   r   r   labelname_max_loralabelname_waiting_lora_adapterslabelname_running_lora_adapters	max_lorasr   )-r#   r   rL   r   r  per_engine_labelvaluesr  r  r   r  r  r  r  r  r  r  r  r  r  r  r  r  r   r!  r"  r#  r$  r%  r&  request_latency_bucketsr'  r(  r)  r*  r+  r,  kv_cache_residency_bucketsr-  r.  r/  r   r   r   r   r   s-                                           @@@@@r$   r%   zPrometheusStatLogger.__init__  s    !SN,!!!& $/#C#W ,= 	% #H-
 -?
#0>;
 ;
 ;
 ;
3A;
 ;
 ;
 #'"9"9*J8N#
 #
 "&!7!7%;"
 "
 #'//,J*!	 #2 #
 #
 (7#^Z(
 (
$ #'//,G*!	 #2 #
 #
 (7#^Z(
 (
$ $(??*7
 "]O3* $3 
$
 
$
  )+%CCC 	 	A0 0 0 0 0 0 *	0 0 0D)!,, 	!!!#+F*!	  /  
  
 %4 .*%
 %
! + 	)-):):.+ & *; * *& />*NJ/ /D+ (,'8'8,M! (9 (
 (
$ -<(.*-
 -
) %)$5$5)T! %6 %
 %
!
 *9%~z*
 *
& 261B1B5V " 2C 2
 2
. 7F2NJ7
 7
3 /3.?.?2U " /@ /
 /
+ 4C/4
 4
0 $(#4#4(Q! $5 $
 $
  )8$nj)
 )
% !% 1 1%M! !2 !
 !
 &5!>:&
 &
" &*%6%6'L! &7 &
 &
"
 +:&
+
 +
' !% 1 1%?! !2 !
 !

 &5!>:&
 &
" %)$5$5)B! %6 %
 %
!
 *9%~z*
 *
& PR$'+'8'8'E!%6$77 (9 (
 (
$
 # 	 	F4 4 4 4 4 4 *	4 4 4D(00 /3.A.A-?'66!	 /B /
 /
+ 4C/4
 4
0 372E2E1B'66!	 3F 3
 3
/ 8G3^Z8
 8
4 &*%8%8.JTTT!	 &9 &
 &
" +:&
+
 +
' 7;6I6I9W'66!	 7J 7
 7
3 <K7<
 <
8 #11(A%%%!	 2 
 
 $3$
 $
  (,':':1J'66!	 (; (
 (
$ -<(.*-
 -
) )-(;(;3H  0 "7 )< )
 )
%: .=)>:.
 .
* )-(;(;3H  * "1 )< )
 )
%4 .=)>:.
 .
* 372E2E=S  * "1 3F 3
 3
/4 8G3^Z8
 8
4#
 #
 #
. &*%8%83H+!	 &9 &
 &
" +:&
+
 +
' (,':':2Q+!	 (; (
 (
$ -<(.*-
 -
) ,0+>+>6Q+!	 ,? ,
 ,
( 1@,nj1
 1
- *.)<)<4Q+!	 *= *
 *
& />*NJ/
 /
+ )-(;(;3P+!	 )< )
 )
% .=)>:.
 .
* 150C0C:- (66! 1D 1
 1
- 6E1>:6
 6
2 ( D	3* * *&0 +/*=*=5Q 3% +> + +' 0?+^Z0 0D, 483F3F>Q 3% 4G 4 40 9H4nj9 9D5 ,0+>+>62
 3% ,? 
, 
,( 1@,nj1 1D-- 02D,8:D502D- .2".4&''!++K   '1D#3JD03JD0'3=DM#'??.?"'+88	 $3 	$ 	$D    /.r&   rC   
config_objc                 j   |                                 }d|d<   d\  }}|dk    rd}d}|J d|             |                     ||d|                                	          }| j        D ]H}|                                 }t	          |          |d<    |j        di |                    d
           Id S )N r   NNr   zvllm:cache_config_infoz(Information of the LLMEngine CacheConfigzUnknown metrics info type r   r   r   r"   )metrics_infor  keysrL   r   r   set)r#   rC   r9  r=  rH   r   
info_gauger   s           r$   log_metrics_infoz%PrometheusStatLogger.log_metrics_info  s    !..00!#X(m>!!+DFM!Dd!D!D
 __'*#((**	 % 
 

 !/ 	5 	5L%2244L%(%6%6L"J----11!4444	5 	5r&   r   r'   r(   r)   r*   c                 ^   || j         |                             |j                   | j        |                             |j                   | j        |                             |j                   | j        |                             |j	        j
                   | j        |                             |j	        j                   |j        T| j        |                             |j        j
                   | j        |                             |j        j                   |j         | j                            |j        |           |j         | j                            |j        |           | j        r|j        r| j        |         }| j        |         }| j        |         }|j        D ]U}|                    |j                   |                    |j                   |j        D ]}	|                    |	           V| j        d                    |j                                                   }
d                    |j!                                                   }| j"        |
| j#        || j$        | j%        i} | j        j&        di |'                                 |J| j(        |                             |j
                   | j)        |                             |j                   |dS tT          j+        r%| j,        |                             |j-                   | j.        |                             |j/                   | j0        |                             |j1                   | j2        |                             |j3                   | j4        |                             |j1        |j3        z              |j5        D ]"}| j6        |                             |           #|j7        D ]"}| j8        |                             |           #|j9        D ]"}| j:        |                             |           #|j;        D ]"}| j<        |                             |           #|j=        D ]}| j>        |j?                 |                                          | j@        |                             |jA                   | jB        |                             |jC                   | jD        |                             |jE                   | jF        |                             |jG                   | jH        |                             |jI                   |j1        t          |jK        d          z
  }| jL        |                             |           | jM        |                             |j1                   | jN        |                             |j3                   | jO        |                             |jP                   |jQ        r%| jR        |                             |jQ                   dS )zLog to prometheus.N,r   r"   )Sr  r?  r   r  r   r  r   r  incr   queriesr  hitsr   r  r  r   r  r   r   r
  r  kv_cache_eviction_eventsr-  r.  r/  lifetime_secondsidle_secondsreuse_gaps_secondsr0  r   r   r>  r   r4  r3  r2  r   r   set_to_current_timer  r  r   r   r  rn   r  rt   r  rl   r  rm   r   max_num_generation_tokens_iterr!  n_params_iterr"  time_to_first_tokens_iterr$  inter_token_latencies_iterr%  finished_requestsr  finish_reasonr'  e2e_latencyr(  queued_timer*  prefill_timer)  inference_timer+  decode_timemaxnum_cached_tokensr,  r  r  r&  mean_time_per_output_tokenmax_tokens_paramr#  )r#   r'   r(   r)   r*   lifetime_hist	idle_hist
reuse_histeventgapr   r   lora_info_labelsmax_gen_tokensn_paramttftitlfinished_requestprefill_kv_computeds                      r$   r-   zPrometheusStatLogger.record  s    &(4880   (4880   %j155o6TUUU-j9==2:   *:6::27   ;G;JGKK#@H   8DHH#@E   2>'//#7   1=&..#6
  
 -0#<0 !% @ L EjQ	!>zJ
,E 0 0E!))%*@AAA%%e&8999$7 0 0"**3////0 #/(+#9>>@@) )% ),#9>>@@) )% 8:O8:O+T]$ 
 ,$+??.>??SSUUU%)*599.:PQQQ&z266~7JKKK"F+ 	+J7;;2   	'
377.	
 	
 	
 	":.22?3TUUU&z2661	
 	
 	
 	'
3;;-0UU	
 	
 	
 .L 	 	N<ZHPP    '4 	B 	BG$Z088AAAA#= 	I 	ID.z:BB4HHHH"= 	H 	HC.z:BB3GGGG / A &	 &	()9)GHceee+J7?? ,   -j9AA ,   /
;CC -   1*=EE /   .z:BB ,   #3"Ds 2AH H # 6zBJJ#   4Z@HH 2   8DLL 6   8DLL ;    0 1*=EE$5  I&	 &	r&   sleepr5   c                 <   d}d}d}|dk    rd}|dk    rd}n|dk    rd}| j         D ]t}| j        d         |                             |           | j        d         |                             |           | j        d         |                             |           ud S )Nr   r   r   r   r   r   )rL   r   r?  )r#   rg  r5   r   r   r   r*   s          r$   r7   z'PrometheusStatLogger.record_sleep_statek  s    A::Ezz$%!!!- 	J 	JJ)-8DHHUUU)*=>zJNN!   )'2:>BB5IIII	J 	Jr&   c                 F    |                      d| j        j                   d S )Nr   )rA  r   r   r/   s    r$   r0   z+PrometheusStatLogger.log_engine_initialized~  s$    nd.>.KLLLLLr&   r!   r9   r   r   )r:   r;   r<   r   r  r   r  r	   r  r   r  r   r	  r   rO   r>   r%   r   r   rA  r   r   r   r-   r7   r0   r"   r&   r$   r   r     s5       JLN)- KOy y%y7;Cy47Gy y y yv5S 56I 5 5 5 58 7;L L'$.L ($.L -t3	L
 L L L L\J J J J J J J&M M M M Mr&   r   
PromMetricmetricengine_idxsr   c                 $      fd|D             S )Nc           	      X    i | ]&}|                     t          |                    'S r"   r   )r   r   rl  r   s     r$   r   z#make_per_engine.<locals>.<dictcomp>  s/    LLLCz3s8844LLLr&   r"   )rl  rm  r   s   ` `r$   r  r    s$     MLLLLLLLLr&   mantissa_lst	max_valuec                 p    d}g }	 | D ])}|d|z  z  }||k    r|                     |           &|c S |dz  }2)z
    Builds a list of buckets with increasing powers of 10 multiplied by
    mantissa values until the value exceeds the specified maximum.

    r   Tr   r   )rF   )rp  rq  exponentr   mvalues         r$   build_bucketsrv    sg     HG 	 	AH$E	!!u%%%%Ar&   c                 &    t          g d|           S )zR
    Example:
    >>> build_1_2_5_buckets(100)
    [1, 2, 5, 10, 20, 50, 100]
    )r   r   r   )rv  )rq  s    r$   r  r    s     I...r&   c                       e Zd ZdZ	 	 	 	 	 ddedee         dz  dee         dz  d	ed
edefdZ		 	 dde
dz  dedz  dedz  dedz  fdZddedefdZd Zd ZdS )StatLoggerManagera  
    StatLoggerManager:
        Logging happens at the level of the EngineCore (per scheduler).
         * DP: >1 EngineCore per AsyncLLM - loggers for each EngineCore.
         * With Local Logger, just make N copies for N EngineCores.
         * With Prometheus, we need a single logger with N "labels"

        This class abstracts away this implementation detail from
        the AsyncLLM, allowing the AsyncLLM to just call .record()
        and .log() to a simple interface.
    NTFr   r   rm  custom_stat_loggersenable_default_loggersaggregate_engine_loggingclient_countc                    |r|ndg| _         g | _        g }||                    |           |rjt                              t
          j                  rF|dk    rt                              d           n%|rt          nt          }|
                    |           d}	|D ]}
t          |
t                    r?t          |
t                    r* |
|| j                   }t          |t                    rd}	nt!          || j         |
          }| j        
                    |           |	s/| j        
                    t          || j                              d S d S )Nr   r   zfAsyncLLM created with api_server_count more than 1; disabling stats logging to avoid incomplete stats.F)r   rL   T)r   rL   r   )rL   stat_loggersr   r   isEnabledForloggingINFOr   r   rQ   rF   rB   rC   rD   r   r   r   )r#   r   rm  rz  r{  r|  r}  stat_logger_factoriesdefault_logger_factorycustom_prometheus_loggerstat_logger_factoryglobal_stat_loggers               r$   r%   zStatLoggerManager.__init__  s    .9Akkqc;=9;*!(()<===! 	Ef&9&9',&G&G 	EaI    0+//* '
 &,,-CDDD). #8 	9 	9-t44 #%<: :  &9%8 +#'#6& & &" 02FGG 4/3, &@ +#'#63F& & &"
 $$%78888' 	$$$[$2EFF    	 	r&   r'   r(   r)   r*   c                 T    |d}| j         D ]}|                    ||||           d S )Nr   r   )r  r-   )r#   r'   r(   r)   r*   r   s         r$   r-   zStatLoggerManager.record  sW     J' 	 	FMM-%	     	 	r&   r   rg  r5   c                 F    | j         D ]}|                    ||           d S r!   )r  r7   )r#   rg  r5   r   s       r$   r7   z$StatLoggerManager.record_sleep_state  s7    ' 	4 	4F%%eU3333	4 	4r&   c                 B    | j         D ]}|                                 d S r!   )r  r2   )r#   r   s     r$   r2   zStatLoggerManager.log  s-    ' 	 	FJJLLLL	 	r&   c                 B    | j         D ]}|                                 d S r!   )r  r0   )r#   
agg_loggers     r$   r0   z(StatLoggerManager.log_engine_initialized  s3    + 	0 	0J--////	0 	0r&   )NNTFr   r<  rj  )r:   r;   r<   r=   r   rO   r>   StatLoggerFactoryr   r%   r   r   r   r-   r7   r2   r0   r"   r&   r$   ry  ry    s6       
 
 )->B'+).1 11 #Y%1 ""34t;	1
 !%1 #'1 1 1 1 1n 7;!% '$. ($. -t3	
 $J   "4 4 4 4 4 4 4  0 0 0 0 0r&   ry  )Ar  rU   abcr   r   collections.abcr   typingr   prometheus_clientr   r   r	   	vllm.envsr   vllm.compilation.cuda_graphr
   vllm.configr   r   4vllm.distributed.kv_transfer.kv_connector.v1.metricsr   r   vllm.loggerr   vllm.pluginsr   r   vllm.v1.enginer   vllm.v1.metrics.perfr   vllm.v1.metrics.prometheusr   vllm.v1.metrics.statsr   r   r   r   vllm.v1.spec_decode.metricsr   r   r:   r   r>   r   rC   AggregateStatLoggerFactoryr  r   rO   rJ   r   rQ   r   r   r   rk  __annotations__objectdictr  rv  r  ry  r"   r&   r$   <module>r     s     # # # # # # # # $ $ $ $ $ $       7 7 7 7 7 7 7 7 7 7       8 8 8 8 8 8 7 7 7 7 7 7 7 7        $ # # # # # I I I I I I I I ' ' ' ' ' ' 3 3 3 3 3 3 > > > > > >            N M M M M M M M	X		%z3&79I&IJ !";< .1KK     S   <40A+B    "P P P P Pn P P Pv v v v v v v vrA A A A A"35L A A AH%< %< %< %< %<!8 %< %< %<P|M |M |M |M |M2 |M |M |M~ )3
I 3 3 3MM%)#YM<BM	#z/M M M MS	 c d3i    $/3 /49 / / / /[0 [0 [0 [0 [0 [0 [0 [0 [0 [0r&   