
    .`iW:                        d dl Z d dlmZmZ d dlmZmZ d dlmZm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZ er
d dlmZmZmZ e G d	 d
                      Z G d d          Ze G d de                      Ze G d de                      Ze G d d                      Ze G d d                      Ze G d d                      Ze G d d                      Z G d d          Z G d d          Z G d d          Z dS )    N)defaultdictdeque)	dataclassfield)TYPE_CHECKINGAny)CUDAGraphStat)	PerfStats)SpecDecodingStats)EngineCoreEventEngineCoreOutputFinishReasonc                   R    e Zd ZU dZdZeed<   	 dZeed<   	 dZ	eed<   	 dZ
eed<   dS )	BaseCacheStatszStores cache hit statistics.Fresetr   requestsquerieshitsN)__name__
__module____qualname____doc__r   bool__annotations__r   intr   r        i/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/v1/metrics/stats.pyr   r      s^         &&E4&Hc0GS2D#MMM//r   r   c                   x     e Zd ZdZddeddf fdZdefdZd	 Ze	de
fd
            Ze	defd            Z xZS )CachingMetricszMetrics for caching with a hit rate of the most recent N requests.
    Args:
        interval: The number of the most recent requests to aggregate.
            Defaults to 1000.
      max_recent_requestsreturnNc                     t                                                       || _        d| _        d| _        d| _        t          t          t          t          t          f                              | _	        d S Nr   )
super__init__r"   aggregated_requestsaggregated_query_totalaggregated_query_hitr   tupler   query_queue)selfr"   	__class__s     r   r'   zCachingMetrics.__init__*   s_    #6 #$ &'#$%! !sC}!5688r   statsc                 r   |j         r|                                   |j        dk    rdS | j                            |j        |j        |j        f           | xj        |j        z  c_        | xj        |j        z  c_        | xj        |j        z  c_        t          | j                  dk    r| j        | j
        k    r{| j                                        \  }}}| xj        |z  c_        | xj        |z  c_        | xj        |z  c_        t          | j                  dk    r| j        | j
        k    udS dS dS dS )a  Observe the prefix caching for a set of requests.

        This function is called with information gathered when new requests
        are being scheduled and are looking for computed blocks.

        When there are more than `max_recent_requests` requests, the oldest set
        of requests are removed from the metrics.

        Args:
            stats: The prefix cache stats.
        r   N   )r   r   r,   appendr   r   r(   r)   r*   lenr"   popleft)r-   r/   old_requestsold_queriesold_hitss        r   observezCachingMetrics.observe6   sZ    ; 	JJLLL >QF 	
 KLLL  EN2  ##u}4##!!UZ/!!  !!A%%(4+CCC262B2J2J2L2L/L+x$$4$$'';6''%%1%%  !!A%%(4+CCCCC &%%%CCr   c                 b    d| _         d| _        d| _        | j                                         dS )zReset the metrics.r   N)r(   r)   r*   r,   clearr-   s    r   r   zCachingMetrics.reset^   s5    #$ &'#$%!     r   c                     | j         dk    S )z.Return true if no requests have been observed.r   )r(   r;   s    r   emptyzCachingMetrics.emptye   s     '1,,r   c                 :    | j         dk    rdS | j        | j         z  S )z/Calculate the hit rate for the past N requests.r           )r)   r*   r;   s    r   hit_ratezCachingMetrics.hit_ratej   s(     &!++3(4+FFFr   )r!   )r   r   r   r   r   r'   r   r8   r   propertyr   r=   floatr@   __classcell__)r.   s   @r   r    r    #   s         
9 
9C 
94 
9 
9 
9 
9 
9 
9&2^ &2 &2 &2 &2P! ! ! -t - - - X- G% G G G XG G G G Gr   r    c                   \    e Zd ZU dZdZeed<   	 dZeed<   	 dZeed<   	 dedede	d	d
fdZ
d
S )PrefixCacheStatsz
    Stores prefix cache hit statistics.
    - `reset`: Whether `reset_prefix_cache` was invoked.
    - `queries`: Refers to the number of tokens that were queried.
    r   preempted_requestspreempted_queriespreempted_hits
num_tokensnum_hits	preemptedr#   Nc                     |r2| xj         dz  c_         | xj        |z  c_        | xj        |z  c_        dS | xj        dz  c_        | xj        |z  c_        | xj        |z  c_        dS )z-Aggregate request information into the stats.r1   N)rF   rG   rH   r   r   r   )r-   rI   rJ   rK   s       r   recordzPrefixCacheStats.record   s     		"##q(##""j0""8+ MMQMMLLJ&LLII!IIIIr   )r   r   r   r   rF   r   r   rG   rH   r   rM   r   r   r   rE   rE   r   s            Es6NC3" " " " " " " " " "r   rE   c                       e Zd ZdZdS )MultiModalCacheStatsz
    Stores multi-modal cache hit statistics.
    - `reset`: Whether `reset_mm_cache` was invoked.
    - `queries`: Refers to the number of multi-modal data items
      that were queried.
    N)r   r   r   r   r   r   r   rO   rO      s           r   rO   c                   B    e Zd ZU dZeed<   eed<   eedf         ed<   dS )KVCacheEvictionEventz&Single KV cache block eviction sample.lifetime_secondsidle_seconds.reuse_gaps_secondsN)r   r   r   r   rB   r   r+   r   r   r   rQ   rQ      sE         00eSj))))))r   rQ   c                      e Zd ZU dZdZeed<   dZeed<   dZeed<   dZ	eed<   dZ
eed<    ee	          Zeed
<   dZedz  ed<    ee	          Zee         ed<   dZedz  ed<   dZeeef         dz  ed<    ee	          Zeeef         ed<    ee	          Zeeef         ed<   dZedz  ed<   dZedz  ed<   dS )SchedulerStatsz$Stats associated with the scheduler.r   num_running_reqsnum_waiting_reqsstep_countercurrent_waver?   kv_cache_usage)default_factoryprefix_cache_statsNconnector_prefix_cache_statskv_cache_eviction_eventsspec_decoding_statskv_connector_statswaiting_lora_adaptersrunning_lora_adapterscudagraph_stats
perf_stats)r   r   r   r   rW   r   r   rX   rY   rZ   r[   rB   r   rE   r]   r^   listr_   rQ   r`   r   ra   dictstrr   rb   rc   rd   r	   re   r
   r   r   r   rV   rV      ss        ..cc L#L#NE+05AQ+R+R+R(RRR<@ "2T"9@@@;@5QU;V;V;Vd#78VVV48*T188804S#X-444,1E$,G,G,G4S>GGG,1E$,G,G,G4S>GGG,0O]T)000#'J	D '''''r   rV   c                       e Zd ZU dZdZeed<   dZeed<   dZ	eed<   dZ
eed<   dZeed<   dZeed	<   dZeed
<   dZeed<   dS )RequestStateStatsz3Stats that need to be tracked across delta updates.r   num_generation_tokensr?   arrival_time	queued_tsscheduled_tsfirst_token_tslast_token_tsfirst_token_latencyFis_corruptedN)r   r   r   r   rk   r   r   rl   rB   rm   rn   ro   rp   rq   rr   r   r   r   r   rj   rj      s         ==!"3""" L% IuL%NEM5 "%$$$ L$r   rj   c                       e Zd ZU dZded<   dZeed<   dZeed<   dZ	eed<   d	Z
ed	z  ed
<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   d	S )FinishedRequestStatsz)Stats associated with a finished request.r   finish_reasonr?   e2e_latencyr   num_prompt_tokensrk   Nmax_tokens_paramqueued_timeprefill_timeinference_timedecode_timemean_time_per_output_tokenFrr   num_cached_tokens)r   r   r   r   r   rv   rB   rw   r   rk   rx   ry   rz   r{   r|   r}   rr   r   r~   r   r   r   rt   rt      s         33!!!!Ks!"3"""#'cDj'''KL%NEK(++++L$sr   rt   c                       e Zd ZdZd ZdefdZdedefdZddd	ed
e	de
dedddedz  fdZdeded         d
e	dedddedz  fdZ	 dddde
de
dz  dede
f
dZdS )IterationStatsz8Stats associated with a single set of EngineCoreOutputs.c                     t          j                     | _        d| _        d| _        d| _        g | _        g | _        g | _        g | _        g | _	        d| _
        d S r%   )timeiteration_timestamprk   rw   num_preempted_reqsfinished_requestsmax_num_generation_tokens_itern_params_itertime_to_first_tokens_iterinter_token_latencies_iternum_corrupted_reqsr;   s    r   r'   zIterationStats.__init__   s^    #'9;; %&"!""#=?9;+(*68&79''(r   r#   c                     d                     d t          |                                           D                       }| j        j         d| dS )Nz, c              3   *   K   | ]\  }}| d | V  dS )=Nr   ).0kvs      r   	<genexpr>z*IterationStats.__repr__.<locals>.<genexpr>   s0      &Q&Qda!zzazz&Q&Q&Q&Q&Q&Qr   ())joinvarsitemsr.   r   )r-   field_to_value_strs     r   __repr__zIterationStats.__repr__   sR    !YY&Q&Qd4jj>N>N>P>P&Q&Q&QQQ.)AA,>AAAAr   startc                     | j         |z
  S )z=Calculate an interval relative to this iteration's timestamp.)r   )r-   r   s     r   _time_sincezIterationStats._time_since   s    '%//r   outputr   engine_core_timestampis_prefilling
prompt_len	req_statslora_statesLoRARequestStates	lora_nameNc                    t          |j                  }| xj        |z  c_        |rK| xj        |z  c_        |                     |j                  }	| j                            |	           |	|_        |xj        |z  c_        t          j
        r|j        s|j        dk    rd|_        |j        $|                     |j        |j        ||||           |r||_        n$||j        z
  }
| j                            |
           ||_        d S )Nr   T)r3   new_token_idsrk   rw   r   rl   r   r2   rq   envsVLLM_COMPUTE_NANS_IN_LOGITSrr   num_nans_in_logitseventsupdate_from_events
request_idro   rp   r   )r-   r   r   r   r   r   r   r   num_new_generation_tokensrq   itls              r   update_from_outputz!IterationStats.update_from_output   sC    %((<$=$=!""&??"" 	@""j0"""&"2"293I"J"J*112EFFF,?I)''+DD''
 ,	**	* )A--%)I" =$##!    	8'<I$$')*AAC+223777"7	r   req_idr   r   c                 j   ddl m} |D ]}|j        |j        k    r#|j        |_        |                    ||           5|j        |j        k    r.|j        dk    r|j        |_        |	                    ||           s|j        |j
        k    r&| xj        dz  c_        |                    ||           d S )Nr   )EngineCoreEventTyper?   r1   )vllm.v1.enginer   typeQUEUED	timestamprm   request_waiting	SCHEDULEDrn   request_running	PREEMPTEDr   )	r-   r   r   r   r   r   r   r   events	            r   r   z!IterationStats.update_from_events3  s     	766666 
	? 
	?Ez0777&+o	#++FI>>>>2<<<)S00-2_I*++FI>>>>2<<<''1,''++FI>>>
	? 
	?r   r   ru   r   rw   rx   r~   c                    |                      |j                  }|j        |j        z
  }|j        |j        z
  }|j        |j        z
  }	|j        |j        z
  }
|j        dz
  dk    r|	|j        dz
  z  nd}t          ||||j        ||||
|	||j        |          }| j	        
                    |           |j        r| xj        dz  c_        d S d S )Nr1   r   )ru   rv   rw   rk   rx   ry   rz   r{   r|   r}   rr   r~   )r   rl   rn   rm   ro   rp   rk   rt   rr   r   r2   r   )r-   ru   rw   rx   r   r~   rv   ry   rz   r|   r{   r}   finished_reqs                r   update_from_finished_requestz+IterationStats.update_from_finished_requestK  s    &&y'=>>  ,y/BB !/)2HH  -	0HH #093II
 .2Q66 9:Q>?? 	# ,'#/"+"A-#%)#'A"//
 
 
 	%%l333 ! 	)##q(####	) 	)r   )r   )r   r   r   r   r'   rh   r   rB   r   r   r   rj   r   rf   r   r   r   r   r   r   r      sr       BB
) 
) 
)B# B B B B0 05 0 0 0 018"18  %18 	18
 18 %18 )18 :18 18 18 18f?? &'? 	?
 %? )? :? ? ? ?< "#2) 2)%2) 2) *	2)
 %2) 2) 2) 2) 2) 2) 2)r   r   c                   H    e Zd ZdZd ZdededefdZedefd            Z	d	S )
	LoRAStatsz9Tracks waiting and running request IDs for a single LoRA.c                 R    t                      | _        t                      | _        d S N)setwaitingrunningr;   s    r   r'   zLoRAStats.__init__  s    !$!$r   r   r   r   c                     |r|rJ |r| j                             |           n| j                             |           |r| j                            |           d S | j                            |           d S r   )r   adddiscardr   )r-   r   r   r   s       r   updatezLoRAStats.update  s    (((( 	)LV$$$$L  ((( 	)LV$$$$$L  (((((r   r#   c                      | j         p| j         S r   r   r   r;   s    r   r=   zLoRAStats.empty  s    L0DL11r   N)
r   r   r   r   r'   rh   r   r   rA   r=   r   r   r   r   r     sy        CC' ' '
)S 
)4 
)$ 
) 
) 
) 
) 2t 2 2 2 X2 2 2r   r   c                       e Zd ZdZddefdZdededz  ded	efd
Zdededz  fdZdededz  fdZ	dededz  fdZ
dedz  fdZdS )r   z1A per-LoRA count of running and waiting requests.F	log_statsc                 F    || _         t          t                    | _        d S r   )r   r   r   r   )r-   r   s     r   r'   zLoRARequestStates.__init__  s    "5@5K5Kr   r   r   Nr   r   c                     | j         r|d S | j        |         }|                    |||           |j        r
| j        |= d S d S r   )r   r   r   r=   )r-   r   r   r   r   
lora_statss         r   _request_updatez!LoRARequestStates._request_update  sa     ~ 	!2F]9-
&'7333 	)i(((	) 	)r   c                 8    |                      ||dd           d S )NTFr   r   r-   r   r   s      r   r   z!LoRARequestStates.request_waiting  s%    VYeLLLLLr   c                 8    |                      ||dd           d S )NFTr   r   r   s      r   r   z!LoRARequestStates.request_running  s%    VYtLLLLLr   c                 8    |                      ||dd           d S )NFr   r   r   s      r   request_finishedz"LoRARequestStates.request_finished  s%    VYuMMMMMr   scheduler_statsc                     | j         r|d S | j                                        D ]=\  }}t          |j                  |j        |<   t          |j                  |j        |<   >d S r   )r   r   r   r3   r   rb   r   rc   )r-   r   r   r/   s       r   update_scheduler_statsz(LoRARequestStates.update_scheduler_stats  sw    ~ 	!8F $ 3 3 5 5 	R 	RIu?B5=?Q?QO1)<?B5=?Q?QO1)<<	R 	Rr   )F)r   r   r   r   r   r'   rh   r   r   r   r   rV   r   r   r   r   r   r     s#       ;;L L$ L L L L	)	)&)Dj	);?	)JN	) 	) 	) 	)Mc McDj M M M MMc McDj M M M MNs NsTz N N N NRnt6K R R R R R Rr   r   )!r   collectionsr   r   dataclassesr   r   typingr   r   	vllm.envsr   vllm.compilation.cuda_graphr	   vllm.v1.metrics.perfr
   vllm.v1.spec_decode.metricsr   r   r   r   r   r   r    rE   rO   rQ   rV   rj   rt   r   r   r   r   r   r   <module>r      s    * * * * * * * * ( ( ( ( ( ( ( ( % % % % % % % %       5 5 5 5 5 5 * * * * * * 9 9 9 9 9 9 ONNNNNNNNNN 0 0 0 0 0 0 0 0 LG LG LG LG LG LG LG LG^ " " " " "~ " " "<     >    * * * * * * * * ( ( ( ( ( ( ( (8        *        "T) T) T) T) T) T) T) T)n2 2 2 2 2 2 2 20 R  R  R  R  R  R  R  R  R  Rr   