
    .`i|                        d dl Z d dlmZmZ d dlmZ d dlmZ d dlm	Z	m
Z
 d dlZd dlZd dlmZ d dlmZmZmZmZmZ d dlmZ d d	lmZ d d
lmZmZmZmZ d dlm Z  d dl!m"Z"m#Z#m$Z$ d dl%m&Z& d dl'm(Z( d dl)m*Z* d dl+m,Z,m-Z-m.Z.m/Z/  ej0        d d          Z1 G d d          Z2e G d d                      Z3e G d d                      Z4 G d d          Z5 G d d          Z6dS )    N)defaultdictdeque)Iterable)	dataclass)Anycast)LoRARequest)STREAM_FINISHEDCompletionOutputPoolingOutputPoolingRequestOutputRequestOutput)RequestOutputKind)TokenizerLike)SpanAttributesSpanKindTracerextract_trace_context)&length_from_prompt_token_ids_or_embeds)EngineCoreOutputEngineCoreRequestFinishReason)IncrementalDetokenizer)LogprobsProcessor)ParentRequest)IterationStatsLoRARequestStatesRequestStateStatsSchedulerStatscpu)devicec                   t    e Zd ZdZdedefdZdeez  e	z  ddfdZ
deez  fd	Zdeez  dz  fd
Zd Zd ZdS )RequestOutputCollectorz
    Collects streamed RequestOutputs per individual request,
    for hand-off to the consuming asyncio generate task.

    When streaming deltas, RequestOutputs are merged if the
    producer gets ahead of the consumer.
    output_kind
request_idc                     |t           j        k    | _        || _        d | _        t          j                    | _        d | _        d S N)	r   DELTA	aggregater%   outputasyncioEventready_input_stream_task)selfr$   r%   s      s/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/v1/engine/output_processor.py__init__zRequestOutputCollector.__init__1   s;    $(9(??$OS]__
7;    r*   returnNc                    | j         t          |t                    r"|| _         | j                                         dS t          | j         t
                    r8t          |t
                    r#| j                             || j                   dS t          | j         t                    rt          |t                    r|| _         dS dS dS )zNon-blocking put operation.N)r)   )	r*   
isinstance	Exceptionr-   setr   addr)   r   r/   r*   s     r0   putzRequestOutputCollector.put9   s    ;*VY"?"? DKJNN]33 		!
M9
 9
 		!
 KOOFdnO=====%9:: 	!z(@
 @
 	! !DKKK	! 	! 	! 	!r2   c                    K   | j         x}(| j                                         d{V  | j         x}(d| _         | j                                         t	          |t
                    r||S )z"Get operation blocks on put event.N)r*   r-   waitclearr5   r6   r9   s     r0   getzRequestOutputCollector.getI   s}      $v-*//######### $v-
fi(( 	Lr2   c                     | j         }| d| _         | j                                         t          |t                    r||S )zNon-blocking get operation.N)r*   r-   r=   r5   r6   r9   s     r0   
get_nowaitz!RequestOutputCollector.get_nowaitS   sF    DKJfi(( 	Lr2   c                 T    | j         | j                                          d | _         d S r'   )r.   cancelr/   s    r0   closezRequestOutputCollector.close]   s.    ".#**,,,"&r2   c                     | j         x}5|                                                    |j                   d | _         d S d S r'   )r.   get_loopcall_soon_threadsaferB   )r/   tasks     r0   __del__zRequestOutputCollector.__del__b   sD    ++D8MMOO00===&*D### 98r2   )__name__
__module____qualname____doc__r   strr1   r   r   r6   r:   r>   r@   rD   rI    r2   r0   r#   r#   (   s         <$5 <3 < < < <!-*>>J !t ! ! ! ! =+??    M,@@4G    ' ' '
+ + + + +r2   r#   c                   B    e Zd ZU eeez           ed<   ee         ed<   dS )OutputProcessorOutputrequest_outputsreqs_to_abortN)rJ   rK   rL   listr   r   __annotations__rN   rO   r2   r0   rQ   rQ   h   s9         -*>>????9r2   rQ   c                   X    e Zd ZU dZedz  ed<   ee         dz  ed<   eed<   dZ	e
ed<   dS )StreamingUpdatezStreaming input update data for output processor.

    Contains the incremental prompt data to be applied to a request state
    when the current sub-request completes.
    Npromptprompt_token_idsarrival_timeFfinal)rJ   rK   rL   rM   rN   rU   rT   intfloatr[   boolrO   r2   r0   rW   rW   n   s_           $J3i$&&&&E4r2   rW   c            )       ~   e Zd Z	 	 	 	 d+dedededz  dededz  ded	edz  d
ee         dz  de	j
        dz  dedz  dedz  dedz  dededz  dedededz  dedz  dedz  def(dZdeddfdZededz  ded	edz  dedz  dededz  dededd fd            Z	 	 d,dee         de	j
        dz  d edz  d!eez  dz  d"eeef         dz  d#ej        dz  deez  dz  fd$Z	 d-ded%ee         ee          z  d&ed"eeef         dz  deez  f
d'Z!	 d-d(ee         d edz  d!eez  dz  d#ej        dz  def
d)Z"de	j
        de fd*Z#dS ).RequestStateNFr%   external_req_id
parent_reqrequest_indexlora_requestr$   rX   rY   prompt_embedslogprobs_processordetokenizermax_tokens_paramrZ   queue	log_statsstream_intervaltop_pntemperaturestream_inputc                    || _         || _        || _        || _        || _        ||j        nd | _        || _        || _        || _        |	| _	        t          | j        | j	                  | _        |
| _        || _        || _        || _        || _        || _        d| _        || _        d| _        |rt+          |          nd | _        || _        d| _        || _        |rt5                      nd | _        d S )NTr   )rZ   )r%   ra   rb   rc   rd   	lora_namer$   rX   rY   re   r   
prompt_lenrf   rg   rh   rl   rm   rn   is_prefillingri   num_cached_tokensr   statsrk   sent_tokens_offsetstreaming_inputr   input_chunk_queue)r/   r%   ra   rb   rc   rd   r$   rX   rY   re   rf   rg   rh   rZ   ri   rj   rk   rl   rm   rn   ro   s                        r0   r1   zRequestState.__init__}   s   . %.$*(3?3K//QU& 0*@!4#5
 
 #5& 0
&!
!"ENX&LAAAATX
  /"#  ,#-EGGG 	r2   updater3   c                 b   |j          | _        |j        r"| j        r| j        |j        z   n|j        | _        | j        r"| j                            |j        pd           n|j        pg | _        | j        J t          | j                  | _        | j        |j        | j        _        d| _	        d S )NrO   T)
r[   rw   rX   rY   extendlenrr   ru   rZ   rs   )r/   ry   s     r0   apply_streaming_updatez#RequestState.apply_streaming_update   s    #)</ = 	15Ov},,&- K   	B!(()@)FBGGGG$*$;$ArD!$000d344:!&,&9DJ#!r2   	tokenizerrequestc	                    |j         x}	rY|	j        sd }|	j        }
t          j        ||          }t          j        ||          }|	j        }|	j        }|	j        }|	j	        }n!d }d }d }d }d }d }|j
        J |j
        j        }
|j        J  | di d|j        d|j        d|d|d|j        d|
d|d	|j        d
|j        d|d|d|d|d|d|d|j        d|d|d|d|j        S )N)r~   r   r%   ra   rb   rc   rd   r$   rX   rY   re   rf   rg   rh   rl   rm   rn   rZ   ri   rj   rk   ro   rO   )sampling_params
detokenizer$   r   from_new_requestr   
max_tokensrl   rm   rn   pooling_paramsra   r%   rd   rY   re   rZ   	resumable)clsr~   r   rX   rb   rc   ri   rj   rk   r   r$   rf   rg   rh   rl   rm   rn   s                    r0   r   zRequestState.from_new_request   s    &55? 	="- ! 	)5K!2!C#" " " 1A#  K  /9#)E!A)5KK!%K#EAK)555!0<K&222s 
 
 
))
#33
 "z
 (-	

 !--
 $
 6
 %55
 "//
  21
 $
 .-
 %
 a
 $
  !--!
" %#
$  i%
& ,O'
( !**)
 	
r2   new_token_idspooling_outputfinish_reasonstop_reasonkv_transfer_paramsrouted_expertsc                    |d u}| j         t          j        k    }|s|rd S | j        dk    r| j        J |s7| j        dk    s,t          | j        j                  | j        z
  | j        k    sd S | j         t          j        k    r7| j        j        | j        d          }t          | j        j                  | _        | j	        }	|+| 
                    |	|                     |          g|          S |                     ||||          }
| j        |
g}n3| j                            | j        |
          \  }}|sd S | j        j	        }	| 
                    |	|||          S )N   r   )r$   r   
FINAL_ONLYrk   rg   rv   r|   output_token_idsr(   ra   _new_request_output_new_pooling_output_new_completion_outputrb   get_outputsr%   )r/   r   r   r   r   r   r   finished
final_onlyra   r*   outputss               r0   make_request_outputz RequestState.make_request_output  s    !,%):)EE
 	J 	4!###/// *a//t'899D<SS'( ( t#4#::: !% 0 A+--! +.d.>.O*P*P'.%++)).99:   ,,=+~
 
 ?"hGG $ ; ;DOV T TGX t"o=O''Wh0B
 
 	
r2   r   r   c                 "   |d         }t          |t                    r<t          |          dk    sJ | j        J t	          ||| j        | j        |          S | j        J | j        t          j	        k    r| j        
                                }n| j        j        }| j        }|| j        dgt          | j                  z  }t          || j        | j        ||t!          t"          t$                   |          ||| j        | j        
  
        S )Nr   r   )r%   r   rt   rY   r   )
r%   rd   rX   rY   prompt_logprobsr   r   r   rt   metrics)r5   r   r|   rY   r   rt   rf   r$   r   r(   pop_prompt_logprobsr   re   r   rd   rX   r   rT   r   ru   )r/   ra   r   r   r   first_outputr   rY   s           r0   r   z RequestState._new_request_outputH  s5    qzlM22 
	w<<1$$$$(444'*$"&"8!%!6!    &2220666"5IIKKOO"5EO  0#(:(F !sS);%<%<<&*;-+./991"4J
 
 
 	
r2   	token_idsc                 p   | j         J | j        J |d u}| j        t          j        k    }| j                             ||          }|s| j         j        }| j        j        }|r|r|t          |           d          }t          | j
        ||||| j        j        |rt          |          nd |r|nd           S )N)indextextr   r   logprobscumulative_logprobr   r   )rg   rf   r$   r   r(   get_next_output_textr   r   r|   r   rc   r   rN   )	r/   r   r   r   r   r   deltar   r   s	            r0   r   z#RequestState._new_completion_outputt  s     +++&222 , $5$;; 44XuEE 	:(9I *3 	3X 	3Y 1 12H$)#6I08B#m,,,d'/9T	
 	
 	
 		
r2   c                 "    t          |          S )N)data)r   )r/   r   s     r0   r   z RequestState._new_pooling_output  s    .1111r2   )NNNFNNr'   )$rJ   rK   rL   rN   r   r\   r	   r   rT   torchTensorr   r   r]   r#   r^   r1   rW   r}   classmethodr   r   r   r   dictr   npndarrayr   r   r   r   r   r   r   r   rO   r2   r0   r`   r`   |   s       & #$("+8
 8
8
 8
 "D(	8

 8
 "D(8
 '8
 d
8
 s)d*8
 |d*8
 .48
 ,d28
 *8
 8
 &,8
  !8
" #8
$ t|%8
& :'8
( T\)8
* +8
 8
 8
 8
t"_ " " " " "& ;
 4';
 #;
 d
	;

 "D(;
 ;
 &,;
 ;
 ;
 
;
 ;
 ;
 [;
F 59,0>
 >
Cy>
 t+>
 $d*	>

 3Y%>
 !cNT1>
 
T)>
 
-	-	4>
 >
 >
 >
J 59*
 *
*
 &'$}*==*
 	*

 !cNT1*
 
-	-*
 *
 *
 *
b -1
 
9
 $d*
 3Y%	

 
T)
 

 
 
 
B2%, 2= 2 2 2 2 2 2r2   r`   c                      e Zd ZdZ	 d&dedz  dedefdZd Zd	efd
Z	d'dZ
defdZdee         ded	ee         fdZ	 	 	 d(dededz  dedz  dededz  d	dfdZdedededz  d	dfdZ	 	 d)dee         dedz  dedz  d	efdZded	dfdZdedz  fd Zd!edededz  d	dfd"Zded!ededz  dedz  fd#Z ded$e!dz  dedz  fd%Z"dS )*OutputProcessorz.Process EngineCoreOutputs into RequestOutputs.r   r~   Nrj   rk   c                    || _         || _        || _        i | _        i | _        t          t                    | _        t          |          | _	        d | _
        t          j                    | _        | j                                         d S r'   )rj   r~   rk   request_statesparent_requestsr   rT   external_req_idsr   lora_statestracerr+   r,   _requests_drainedr7   )r/   r~   rj   rk   s       r0   r1   zOutputProcessor.__init__  sz     #".799;=H=N=N,Y77%)!(""$$$$$r2   c                 *    t          | j                  S r'   r|   r   rC   s    r0   get_num_unfinished_requestsz+OutputProcessor.get_num_unfinished_requests  s    4&'''r2   r3   c                 2    t          | j                  dk    S )Nr   r   rC   s    r0   has_unfinished_requestsz'OutputProcessor.has_unfinished_requests  s    4&''!++r2   c                 Z   K   | j         sd S | j                                         d {V  d S r'   )r   r   r<   rC   s    r0   wait_for_requests_to_drainz*OutputProcessor.wait_for_requests_to_drain  sC      " 	F$))+++++++++++r2   ec                     | j                                         D ](\  }}|j        J |j                            |           )dS )z(Propagate error to all generate() tasks.N)r   itemsri   r:   )r/   r   _states       r0   propagate_errorzOutputProcessor.propagate_error  sU     +1133 	 	HAu;***KOOA	 	r2   request_idsinternalc                    g }|D ]}|re|                     |           | j                            |          x}r3|j        }| j        |         }|                    |           |s| j        |= i| j                            |g           x}r|                    |           g }|D ])}| j                            |d          }|| j        	                    ||j
                   |                     |           |j        N|                    g |j        t          ndt          j        dd          x}	r|j                            |	           | j                            |          x}
rb|
j        r@t)          |
j                  }|                     |d          }|                    |           | j                            |d           +| j        s| j                                         |S )ah  Abort a list of requests.

        The request_ids may be either external request IDs (those passed to
        InputProcessor.process_inputs()) or internal request IDs (those randomly
        generated when creating the EngineCoreRequest).

        If an external request ID is provided, and that external request ID
        was used for multiple requests, all requests associated with that external
        request ID are aborted.

        In the case of parallel sampling, a request ID may be used to identify
        a parent request, in which case the associated child requests are aborted
        also.
        N)r   r   r   r   r   T)r   )appendr   r>   ra   r   removepopr{   r   request_finishedrq   ri   r   rg   EMPTY_CPU_TENSORr   ABORTr:   r   child_requestsrT   abort_requestsr   r7   )r/   r   r   internal_req_idsr%   	req_statera   internal_idsrequest_ids_to_abortrequest_outputparent
child_reqss               r0   r   zOutputProcessor.abort_requests  s<    % 	6 	6J 6 ''
333 !% 3 7 7
 C CC9 C&/&?O#'#8#IL ''
333' C 1/B!%!6!:!::r!J!JJ 6 ''555!* 	; 	;J+//
DAAI$ 11*i>QRRR$++J777?.&/&C&C&( %08 (8'7!&2&8$(+/ 'D 
' 
' 
N / O''777/33J??? ;( <!%f&;!<!<J!%!4!4Z$!4!O!OJ(//
;;;$((T:::" 	)"&&(((##r2   r   r   rX   rb   rc   ri   c           
         |j         }| j                            |          }||                     |||           d S t                              | j        |||||| j        | j                  }| j	        
                                r| j	                                         || j        |<   |r|| j        |j         <   | j        |j                                     |           d S )N)r~   r   rX   rb   rc   ri   rj   rk   )r%   r   r>   _update_streaming_request_stater`   r   r~   rj   rk   r   is_setr=   r   r   ra   r   )r/   r   rX   rb   rc   ri   r%   r   s           r0   add_requestzOutputProcessor.add_request  s     '
'++J77	 00GVLLLF 11n!'n 0 2 	
 	
	 !((** 	+"((****3J' 	E:DD !67 	i78??
KKKKKr2   r   c                    |j         sf|j        <|                     |           |j        |j                            t
                     n!|j        rd|j        d         _        nd|_        dS t          ||j	        |j
                  }|j        *|                    |           t                      |_        dS |j                            |           dS )z<Queue a streaming update instead of immediately applying it.NTF)rX   rY   rZ   )r   rx   _finish_requestri   r:   r
   r[   rw   rW   rY   rZ   r}   r   r   )r/   r   r   rX   ry   s        r0   r   z/OutputProcessor._update_streaming_request_state   s       	*2$$Y///?. O''888, 28<	+B/55,1	)F $5 -
 
 
 &.,,V444*/''I''' '..v66666r2   engine_core_outputsengine_core_timestampiteration_statsc           	         g }g }|D ]}|j         }| j                            |          }|'|                     ||||           |j        }	|j        }
|j        }|j        }|j        }|j	        }|j
        |_
        d|_        |
e|j        J |j        J |j                            |	|t          j        k              }|rt          j        }|}|j                            |           |                    |	|
||||          x}rE|j        rd|_        |j        |j                            |           n|                    |           ||j        r@|j        r0|j                                        }|                    |           d|_        |                     |           |j        s|                    |           |                     |||           | j        r|                     |||           t?          ||          S )a  
        Process the EngineCoreOutputs:
        1) Compute stats for logging
        2) Detokenize
        3) Create and handle RequestOutput objects:
            * If there is a queue (for usage with AsyncLLM),
              put the RequestOutput objects into the queue for
              handling by the per-request generate() tasks.

            * If there is no queue (for usage with LLMEngine),
              return a list of RequestOutput objects.

        NOTE FOR DEVELOPERS

        vLLM V1 minimizes the number of python loops over the full
        batch to ensure system overheads are minimized. This is the
        only function that should loop over EngineCoreOutputs.

        If you need to touch every element of the batch, do it from
        within the loop below.
        NF)rR   rS   ) r%   r   r>   _update_stats_from_outputr   r   r   r   r   r   rt   rs   rg   rf   ry   r   STOPupdate_from_outputr   rw   r   ri   r:   r   rx   popleftr}   r   _update_stats_from_finishedr   
do_tracingrQ   )r/   r   r   r   rR   rS   engine_core_outputreq_idr   r   r   r   r   r   r   stop_stringr   ry   s                     r0   process_outputszOutputProcessor.process_outputsA  s   8 GI#%"5 K	X K	X'2F+//77I  **-/Do   /<M/>N.<M,8K!3!F/>N*<*NI'&+I#% ,888 3???'3::!=L4E#E   .$0$5M"-K ,??@RSSS "+!>!>"" " ~ ; , 4.3N+?.O''7777 $**>::: (, X 2 ;!*!<!D!D!F!F!88@@@@6:	33((333-6 5 &,,V444 44!=/   { X(:IWWW$+'
 
 
 	
r2   c                 X   |j         }| j                            |           | j        |j                 }|                    |           |s| j        |j        = |j        }|r'|j        s | j                            |j         d            | j        s| j	        
                                 d S d S r'   )r%   r   r   r   ra   r   rb   r   r   r   r7   )r/   r   r   r   rb   s        r0   r   zOutputProcessor._finish_request  s    %''',Y-FGF### 	A%i&?@ )
 	Bj7 	B $$Z%:DAAA" 	)"&&(((((	) 	)r2   scheduler_statsc                 :    | j                             |           d S r'   )r   update_scheduler_stats)r/   r   s     r0   r   z&OutputProcessor.update_scheduler_stats  s    //@@@@@r2   r   c                    |j         J |J | j        J t          |j         j        dz            }t	          |j                  }t          |j        |j                  }| j        	                    dt          j        ||          5 }|j         }|j        |j        z
  }	|j        |j        z
  }
|j        |j        z
  }|j        |j        z
  }|j        |j        z
  }|                    t$          j        |j                   |                    t$          j        |	           |                    t$          j        |
           |                    t$          j        |           |                    t$          j        |j                   |                    t$          j        |           |                    t$          j        |           |                    t$          j        |           |                    t$          j        |j                   |j        r%|                    t$          j         |j                   |j!        r%|                    t$          j"        |j!                   |j#        r%|                    t$          j$        |j#                   |j%        r%|                    t$          j&        |j%                   d d d            d S # 1 swxY w Y   d S )Ng    eAllm_request)kindcontext
start_time)'ru   r   r\   rZ   r   trace_headersr   rY   re   start_as_current_spanr   SERVERiteration_timestampscheduled_ts	queued_tsfirst_token_tslast_token_tsset_attributer   "GEN_AI_LATENCY_TIME_TO_FIRST_TOKENfirst_token_latencyGEN_AI_LATENCY_E2EGEN_AI_LATENCY_TIME_IN_QUEUEGEN_AI_USAGE_PROMPT_TOKENSGEN_AI_USAGE_COMPLETION_TOKENSnum_generation_tokens$GEN_AI_LATENCY_TIME_IN_MODEL_PREFILL#GEN_AI_LATENCY_TIME_IN_MODEL_DECODE&GEN_AI_LATENCY_TIME_IN_MODEL_INFERENCEGEN_AI_REQUEST_IDra   rl   GEN_AI_REQUEST_TOP_Prh   GEN_AI_REQUEST_MAX_TOKENSrn   GEN_AI_REQUEST_TEMPERATURErm   GEN_AI_REQUEST_N)r/   r   r   r   arrival_time_nano_secondstrace_contextprompt_lengthspanr   e2e_timequeued_timeprefill_timedecode_timeinference_times                 r0   r   zOutputProcessor.do_tracing  s9    ******{&&&$'	(Ds(J$K$K!-.@.NOO>&	(?
 
 [..!0	 / 
 
 0	Q
 oG&:W=QQH!.1BBK"1G4HHL!/'2HHK$2W5IINA+   ~@(KKK~JKXXX~H-XXX=-   C\   BK   E~  
 0)2K    Y"">#F	XXX) """<i>X   $ """=y?T   { Q"">#BIKPPPa0	Q 0	Q 0	Q 0	Q 0	Q 0	Q 0	Q 0	Q 0	Q 0	Q 0	Q 0	Q 0	Q 0	Q 0	Q 0	Q 0	Q 0	Qs   	H2KKKc           	          |d S |J |j         J |                    |||j        |j        |j         | j        |j                   d S r'   )ru   r   rs   rr   r   rq   )r/   r   r   r   r   s        r0   r   z)OutputProcessor._update_stats_from_output  sn     "F$000*****!# O	
 	
 	
 	
 	
r2   r   c                    |d S |J |j         J |                    ||j        |j        |j         |j                   | j                            |j        |j                   t          j
        |j        ||j         j                   d S )N)r   num_prompt_tokensrh   	req_statsrt   )ru   update_from_finished_requestrr   rh   rt   r   r   r%   rq   r   observe_finished_requestrb   r  )r/   r   r   r   s       r0   r   z+OutputProcessor._update_stats_from_finished  s     "F(((***44''2&7o'9 	5 	
 	
 	
 	)))*>	@STTT. /9?3X	
 	
 	
 	
 	
r2   )r   )r3   N)Nr   Nr   )#rJ   rK   rL   rM   r   r^   r\   r1   r   r   r   r6   r   r   rN   rT   r   r   r   r#   r   r`   r   r   r]   r   rQ   r   r   r   r   r   r   r   r   rO   r2   r0   r   r     s       88  !	% % 4'% % 	% % % %"( ( (, , , , ,, , , ,
    >$(3- >$4 >$DQTI >$ >$ >$ >$H ,0/3L L"L d
L "D(	L
 L &,L 
L L L LB7%70A7KNQU:7	7 7 7 7H /315	n
 n
!"23n
  %t|n
 ($.	n

 
n
 n
 n
 n
`) )$ ) ) ) )"Ant6K A A A A?Q,?Q  ?Q ($.	?Q
 
?Q ?Q ?Q ?QB

 -
  %t|	

 ($.
 
 
 
,

 $d*
 ($.	
 
 
 
 
 
r2   r   )7r+   collectionsr   r   collections.abcr   dataclassesr   typingr   r   numpyr   r   vllm.lora.requestr	   vllm.outputsr
   r   r   r   r   vllm.sampling_paramsr   vllm.tokenizersr   vllm.tracingr   r   r   r   
vllm.utilsr   vllm.v1.enginer   r   r   vllm.v1.engine.detokenizerr   vllm.v1.engine.logprobsr    vllm.v1.engine.parallel_samplingr   vllm.v1.metrics.statsr   r   r   r   emptyr   r#   rQ   rW   r`   r   rO   r2   r0   <module>r.     s    * * * * * * * * $ $ $ $ $ $ ! ! ! ! ! !              ) ) ) ) ) )              3 2 2 2 2 2 ) ) ) ) ) ) P P P P P P P P P P P P = = = = = = L L L L L L L L L L = = = = = = 5 5 5 5 5 5 : : : : : :            5;q/// =+ =+ =+ =+ =+ =+ =+ =+@        
 
 
 
 
 
 
 
 
Z2 Z2 Z2 Z2 Z2 Z2 Z2 Z2zY
 Y
 Y
 Y
 Y
 Y
 Y
 Y
 Y
 Y
r2   