
    .`i                         d dl Z d dlmZmZ d dlZd dlZd dlmZ d dl	m
Z
  e
e          Ze G d d                      Z G d d          Z G d	 d
          Zdej        deeee         f         fdZdS )    N)	dataclassfield)SpeculativeConfig)init_loggerc                       e Zd ZU dZeed<   dZeed<   dZeed<   dZeed<    e	e
          Ze
e         ed<   eded	d fd
            ZdedefdZdS )SpecDecodingStatszPer-step iteration decoding stats from scheduler.

    Each scheduler step, statistics on spec decoding performance are
    aggregated across requests by the scheduler and returned to the
    frontend in EngineCoreOutputs->SchedulerStats.
    num_spec_tokensr   
num_draftsnum_draft_tokensnum_accepted_tokens)default_factorynum_accepted_tokens_per_posreturnc                 $     | |dg|z            S )Nr   )r	   r    )clsr	   s     o/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/v1/spec_decode/metrics.pynewzSpecDecodingStats.new   s(    s+)*o(=
 
 
 	
    c                     | xj         dz  c_         | xj        |z  c_        | xj        |z  c_        || j        k    sJ t	          |          D ]}| j        |xx         dz  cc<   d S )N   )r
   r   r   r	   ranger   )selfr   r   is       r   observe_draftzSpecDecodingStats.observe_draft&   s    1!11  $77  "d&:::::*++ 	5 	5A,Q///14////	5 	5r   N)__name__
__module____qualname____doc__int__annotations__r
   r   r   r   listr   classmethodr   r   r   r   r   r   r      s           Jc    -2U4-H-H-HcHHH
# 
*= 
 
 
 [
5c 5 5 5 5 5 5 5r   r   c                   >    e Zd ZdZd Zd ZdefdZej	        fdZ
dS )SpecDecodingLoggingzAggregate and log spec decoding metrics.

    LoggingStatLogger aggregates per-iteration metrics over a set
    time interval using observe() and then logs them using log()
    before resetting to zero.
    c                 .    |                                   d S N)resetr   s    r   __init__zSpecDecodingLogging.__init__7   s    

r   c                 n    g | _         g | _        g | _        g | _        t	          j                    | _        d S r'   )r
   r   r   accepted_tokens_per_pos_liststime	monotoniclast_log_timer)   s    r   r(   zSpecDecodingLogging.reset:   s7    %'+-.0 >@*!^--r   spec_decoding_statsc                     | j                             |j                    | j                            |j                   | j                            |j                   | j                            |j                   d S r'   )r
   appendr   r   r,   r   )r   r0   s     r   observezSpecDecodingLogging.observeA   s}    2=>>>$$%8%IJJJ ''(;(OPPP*11;	
 	
 	
 	
 	
r   c           
      D   | j         sd S t          j        | j                   }t          j        | j                  }t          j        | j                  }d}d}t          j                    | j        z
  }|dk    r
||z  }||z  }|dk    r||z  dz  nt          d          }d||z  z   }	t          j	        | j
                  }
t          j        |
d          |z  }d                    d |D                       } |d|	||||||           |                                  d S )	Nr   d   nanr   )axisz, c              3      K   | ]}|d V  	dS )z.3fNr   ).0ps     r   	<genexpr>z*SpecDecodingLogging.log.<locals>.<genexpr>b   s&      CCQjjCCCCCCr   zSpecDecoding metrics: Mean acceptance length: %.2f, Accepted throughput: %.2f tokens/s, Drafted throughput: %.2f tokens/s, Accepted: %d tokens, Drafted: %d tokens, Per-position acceptance rate: %s, Avg Draft acceptance rate: %.1f%%)r
   npsumr   r   r-   r.   r/   floatarrayr,   joinr(   )r   log_fnr
   r   r   draft_throughputaccepted_throughputelapsed_timedraft_acceptance_ratemean_acceptance_length
pos_matrixacceptance_rates	rates_strs                r   logzSpecDecodingLogging.logI   s[    	FVDO,,
6$"788 fT%=>>~''$*<<!/,>"5"D  !##  "22S88u 	 "#&9J&F!GXd@AA
6*1555
BIICC2BCCCCC	0 #!	
 	
 	
" 	

r   N)r   r   r   r   r*   r(   r   r3   loggerinforJ   r   r   r   r%   r%   /   ss           . . .
+< 
 
 
 
   , , , , , ,r   r%   c            	       t    e Zd ZdZej        Zdedz  dee	         de
eee         f         fdZdded	efd
ZdS )SpecDecodingProma  Record spec decoding metrics in Prometheus.

    The acceptance rate can be calculated using a PromQL query:

      rate(vllm:spec_decode_num_accepted_tokens_total[$interval]) /
      rate(vllm:spec_decode_num_draft_tokens_total[$interval])

    The mean acceptance length (conventionally including bonus tokens)
    can be calculated using:

      1 + (
      rate(vllm:spec_decode_num_accepted_tokens_total[$interval]) /
      rate(vllm:spec_decode_num_drafts[$interval]))

    A per-position acceptance rate vector can be computed using

      vllm:spec_decode_num_accepted_tokens_per_pos[$interval] /
      vllm:spec_decode_num_drafts[$interval]
    speculative_configN
labelnamesper_engine_labelvaluesc                   	 |d u| _         | j         sd S |                     dd|          }t          ||          | _        |                     dd|          }t          ||          | _        |                     dd|          }t          ||          | _        |J | j         r|j        nd	|d	gz   }|                     d
d|          	fd|                                D             | _        d S )Nzvllm:spec_decode_num_draftszNumber of spec decoding drafts.)namedocumentationrP   z!vllm:spec_decode_num_draft_tokenszNumber of draft tokens.z$vllm:spec_decode_num_accepted_tokenszNumber of accepted tokens.r   positionz,vllm:spec_decode_num_accepted_tokens_per_posz#Accepted tokens per draft position.c                 R    i | ]"\  }|fd t                    D             #S )c                 H    g | ]} j         g t          |          R  S r   )labelsstr)r9   posbase_counterlvs     r   
<listcomp>z8SpecDecodingProm.__init__.<locals>.<dictcomp>.<listcomp>   s8    WWW%,%4r43s88444WWWr   )r   )r9   idxr\   r[   r	   s     @r   
<dictcomp>z-SpecDecodingProm.__init__.<locals>.<dictcomp>   sP     
 
 
R WWWWWo@V@VWWW
 
 
r   )	spec_decoding_enabled_counter_clsmake_per_enginecounter_spec_decode_num_drafts$counter_spec_decode_num_draft_tokens'counter_spec_decode_num_accepted_tokensnum_speculative_tokensitems/counter_spec_decode_num_accepted_tokens_per_pos)
r   rO   rP   rQ   counter_draftscounter_draft_tokenscounter_accepted_tokenspos_labelnamesr[   r	   s
           @@r   r*   zSpecDecodingProm.__init__   s    &8t%C") 	F**.;! + 
 

 />2/
 /
+  $0043!  1  
  

 5D "85
 5
1 #'"3"376! #4 #
 #

 8G#%;8
 8
4 "--- )55 	
 $zl2((??% ) 
 

 
 
 
 
17799
 
 
 	<<<r   r   r0   
engine_idxc                 v   | j         sd S | j        |                             |j                   | j        |                             |j                   | j        |                             |j                   t          | j	        |                   D ]%\  }}|                    |j
        |                    &d S r'   )r`   rc   incr
   rd   r   re   r   	enumeraterh   r   )r   r0   rm   rZ   counters        r   r3   zSpecDecodingProm.observe   s    ) 	F+J7;;*	
 	
 	
 	1*=AA0	
 	
 	
 	4Z@DD3	
 	
 	
 &@L
 
 	N 	NLC KK+GLMMMM	N 	Nr   )r   )r   r   r   r   prometheus_clientCounterra   r   r"   rY   dictr    objectr*   r   r3   r   r   r   rN   rN   x   s         ( %,L6
-46
 I6
 !%S$v,%6 7	6
 6
 6
 6
pN N+< N# N N N N N Nr   rN   rq   rQ   c                 D      fd|                                 D             S )z&Create a counter for each label value.c                 ,    i | ]\  }}| j         | S r   )rX   )r9   r^   labelvaluesrq   s      r   r_   z#make_per_engine.<locals>.<dictcomp>   s8       C 	^W^[)  r   )rg   )rq   rQ   s   ` r   rb   rb      s:    
    6 < < > >   r   )r-   dataclassesr   r   numpyr<   rr   vllm.configr   vllm.loggerr   r   rK   r   r%   rN   rs   rt   r    r"   ru   rb   r   r   r   <module>r}      sP    ( ( ( ( ( ( ( (         ) ) ) ) ) ) # # # # # #	X		 5 5 5 5 5 5 5 5<F F F F F F F FR^N ^N ^N ^N ^N ^N ^N ^NB& d6l!23     r   