
    .`i                     <   d dl Z d dlZd dlmZ d dlmZ d dlZd dlZd dl	Z	d dl
mZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZmZ d d
lmZ dZ G d de j                  Z G d dej        ddd          Z G d de j                  Z G d dej                  Z G d dej        ddd          Z  G d dej        dd          Z! G d dej        ddd          Z" G d de j#                  Z$ G d  d!ej                  Z% G d" d#e j                  Z&dS )$    N)Mapping)Any)LoRARequest)MultiModalFeatureSpec)PoolingParams)SamplingParams)SchedulerStats)LogprobsListsLogprobsTensors)UtilityResult)stoplengthaborterrorc                   (    e Zd ZdZdZdZdZdZd ZdS )FinishReasona  
    Reason a request finished - stop, length, abort, or error.

    Int rather than Str for more compact serialization.

    stop - a stop string was emitted
    length - max_tokens was consumed, or max_model_len was reached
    abort - aborted by client
    error - retryable request-level internal error (e.g., KV load failure).
            Invariant: always converted to 500 Internal Server Error.

    r            c                 &    t           | j                 S N)FINISH_REASON_STRINGSvalueselfs    k/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/v1/engine/__init__.py__str__zFinishReason.__str__-   s    $TZ00    N)	__name__
__module____qualname____doc__STOPLENGTHABORTERRORr    r   r   r   r      sC          DFEE1 1 1 1 1r   r   c                   l   e Zd ZU eed<   ee         dz  ed<   ee         dz  ed<   edz  ed<   e	dz  ed<   edz  ed<   e
ed<   edz  ed	<   edz  ed
<   edz  ed<   dZej        dz  ed<   dZeed<   dZeed<   dZeed<   dZeeef         dz  ed<   dZeed<   dZedz  ed<   edee	z  fd            ZdS )EngineCoreRequest
request_idNprompt_token_idsmm_featuressampling_paramspooling_paramseos_token_idarrival_timelora_request
cache_saltdata_parallel_rankprompt_embedsr   client_indexcurrent_waveprioritytrace_headersF	resumableexternal_req_idreturnc                 >    | j         | j         S | j        J | j        S )z2Return the processed params (sampling or pooling).)r-   r.   r   s    r   paramszEngineCoreRequest.paramsV   s.     +''"...""r   )r   r    r!   str__annotations__listintr   r   r   floatr   r4   torchTensorr5   r6   r7   r8   r   r9   boolr:   propertyr=   r'   r   r   r)   r)   1   su         OOO3i$&&&&+,t3333#d****!D((((*$$$$d
d
""")-M5<$&--- L#
 L#Hc.2M738$t+222It #'OS4Z&&&#6 # # # X# # #r   r)   TF)
array_likeomit_defaultsgcc                       e Zd ZdZdZdZdZdS )EngineCoreEventTypez&The type of engine core request event.r   r   r   N)r   r    r!   r"   QUEUED	SCHEDULED	PREEMPTEDr'   r   r   rK   rK   _   s#        00FIIIIr   rK   c                   V    e Zd ZU dZeed<   eed<   e	 ddededz  dd fd            ZdS )	EngineCoreEventa  A timestamped engine core event associated with a request.

    The timestamp is a monotonic timestamps and is used for by the engine
    frontend to calculate intervals between engine core events. These
    timestamps should not be compared with timestamps from other processes.
    type	timestampN
event_typer;   c                 H    |t          j                    n|} | ||          S r   )time	monotonic)clsrS   rR   s      r   	new_eventzEngineCoreEvent.new_eventr   s-     )2(9DN$$$y	s:y)))r   r   )	r   r    r!   r"   rK   r?   rB   classmethodrX   r'   r   r   rP   rP   g   s|           HL* *,*9>*	* * * [* * *r   rP   c                   b   e Zd ZU eed<   ee         ed<   dZedz  ed<   dZ	e
dz  ed<   dZej        dz  ed<   dZedz  ed<   dZeez  dz  ed<   dZee         dz  ed	<   dZeeef         dz  ed
<   dZeeef         dz  ed<   dZeed<   dZej        dz  ed<   dZeed<   edefd            ZdS )EngineCoreOutputr*   new_token_idsNnew_logprobsnew_prompt_logprobs_tensorspooling_outputfinish_reasonstop_reasoneventskv_transfer_paramsr8   r   num_cached_tokensrouted_expertsnum_nans_in_logitsr;   c                     | j         d uS r   )r`   r   s    r   finishedzEngineCoreOutput.finished   s    !--r   ) r   r    r!   r>   r?   r@   rA   r]   r
   r^   r   r_   rC   rD   r`   r   ra   rb   rP   rc   dictr   r8   r   rd   re   npndarrayrf   rF   rE   rh   r'   r   r   r[   r[   z   sW         OOO9)-L-$&---:>4!7>>>*.NEL4'...)-M<$&---$(KsT!(((+/FD!D(///04S#X-444.2M738$t+222s(,NBJ%,,,  .$ . . . X. . .r   r[   c                   B    e Zd ZU eed<   dZedz  ed<   dZedz  ed<   dS )UtilityOutputcall_idNfailure_messageresult)	r   r    r!   rA   r?   ro   r>   rp   r   r'   r   r   rm   rm      sH         
 LLL #'OS4Z&&&#'FMD '''''r   rm   )rG   rI   c                       e Zd ZU dZeed<   g Zee         ed<   dZ	e
dz  ed<   dZeed<   dZedz  ed<   dZee         dz  ed	<   dZedz  ed
<   dZedz  ed<   d ZdS )EngineCoreOutputsr   engine_indexoutputsNscheduler_stats        rR   utility_outputfinished_requestswave_complete
start_wavec                 P    | j         dk    rt          j                    | _         d S d S )Nrv   )rR   rU   rV   r   s    r   __post_init__zEngineCoreOutputs.__post_init__   s*    >S  !^--DNNN ! r   )r   r    r!   rs   rA   r?   rt   r@   r[   ru   r	   rR   rB   rw   rm   rx   setr>   ry   rz   r|   r'   r   r   rr   rr      s          L# ')GT"#(((-1O^d*111Iu+/NMD(///)-s3x$--- !%M3:$$$ "Jd
!!!. . . . .r   rr   c                   &    e Zd ZdZdZdZdZdZdZdS )EngineCoreRequestTypezw
    Request types defined as hex byte strings, so it can be sent over sockets
    without separate encoding step.
                    N)	r   r    r!   r"   ADDr%   START_DP_WAVEUTILITYEXECUTOR_FAILEDr'   r   r   r   r      s3         
 CEMGOOOr   r   c                   B    e Zd ZU eed<   eed<   eed<   eed<   eed<   dS )ReconfigureDistributedRequestnew_data_parallel_sizenew_data_parallel_ranknew_data_parallel_rank_localnew_data_parallel_master_ipnew_data_parallel_master_portN)r   r    r!   rA   r?   r>   r'   r   r   r   r      sN         "%%%%!$$$$#&&&&&&r   r   c                       e Zd ZdZdZdZdS )ReconfigureRankTypez:
    Rank type for reconfiguring distributed request.
    N)r   r    r!   r"   KEEP_CURRENT_RANKSHUTDOWN_CURRENT_RANKr'   r   r   r   r      s(          r   r   )'enumrU   collections.abcr   typingr   msgspecnumpyrj   rC   vllm.lora.requestr   vllm.multimodal.inputsr   vllm.pooling_paramsr   vllm.sampling_paramsr   vllm.v1.metrics.statsr	   vllm.v1.outputsr
   r   vllm.v1.serial_utilsr   r   IntEnumr   Structr)   rK   rP   r[   rm   rr   Enumr   r   r   r'   r   r   <module>r      s     # # # # # #             ) ) ) ) ) ) 8 8 8 8 8 8 - - - - - - / / / / / / 0 0 0 0 0 0 : : : : : : : : . . . . . . = 1 1 1 1 14< 1 1 1.+# +# +# +# +#N	+# +# +# +#\    $,   * * * * *gn * * *&. . . . .N	. . . .@	( 	( 	( 	( 	(N	( 	( 	( 	(. . . . .N	. . . .>    DI   ' ' ' ' 'GN ' ' '    $,     r   