
    -`i                         d dl mZmZ d dlmZmZmZ d dlmZ d dl	m
Z
mZ d dlmZ d dlmZ d dlmZmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZ d dlmZ d dlm Z   G d de          Z!dS )    )ABCabstractmethod)AsyncGeneratorIterableMapping)Any)ModelConfig
VllmConfig)
PromptType)LoRARequest)PoolingRequestOutputRequestOutput)IOProcessor)PoolingParams)RendererLike)SamplingParams)SupportedTask)EngineCoreRequest)InputProcessorc                      e Zd ZU dZeed<   eed<   eed<   edz  ed<   e	e
defd                        Ze	e
defd	                        Ze	e
defd
                        Ze	e
defd                        Ze	e
defd                        Ze
ddddddddeez  dedededz  dedz  deeef         dz  deeef         dz  dededz  deedf         fd            Ze
	 	 	 	 	 dAdededededz  deeef         dz  dededz  deeef         dz  dee df         fd            Z!e
dee"e         z  ddfd            Z#e
defd            Z$e
dBd            Z%e
dBd            Z&e
dBd             Z'e
dBd!            Z(e
dBd"            Z)e
	 dCd$ed%edefd&            Z*e
dDd(eddfd)            Z+e
dEd*e,e         dz  ddfd+            Z-e
defd,            Z.e
dedefd-            Z/e
d#d.d/d0ed1eddfd2            Z0e
dBd3            Z1e
defd4            Z2	 dFd6ed7eddfd8Z3	 	 	 dGd:ed;e4dz  d<e5d=edz  fd>Z6de5e7d?f         fd@Z8dS )HEngineClientz$Protocol class for Clients to Enginevllm_configmodel_configinput_processorNio_processorreturnc                     d S N selfs    h/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/engine/protocol.pyrendererzEngineClient.renderer   s    (+    c                     d S r   r   r    s    r"   
is_runningzEngineClient.is_running!       "%#r$   c                     d S r   r   r    s    r"   
is_stoppedzEngineClient.is_stopped%   r'   r$   c                     d S r   r   r    s    r"   erroredzEngineClient.errored)   s    "sr$   c                     d S r   r   r    s    r"   
dead_errorzEngineClient.dead_error-   s    +.3r$   r   )prompt_textlora_requesttokenization_kwargstrace_headersprioritydata_parallel_rankpromptsampling_params
request_idr.   r/   r0   r1   r2   r3   c                    dS )zGenerate outputs for a request.Nr   )
r!   r4   r5   r6   r.   r/   r0   r1   r2   r3   s
             r"   generatezEngineClient.generate1   s	     	r$   pooling_paramstruncate_prompt_tokensc	                     dS )zGenerate outputs for a request from a pooling model.

        NOTE: truncate_prompt_tokens is deprecated in v0.14.
        TODO: Remove this argument in v0.15.
        Nr   )	r!   r4   r9   r6   r/   r1   r2   r:   r0   s	            r"   encodezEngineClient.encodeB   s	    " 	r$   c                 
   K   dS )zAbort a request.

        Args:
            request_id: The unique id of the request,
                        or an iterable of such ids.
        Nr   )r!   r6   s     r"   abortzEngineClient.abortU   s       	r$   c                 
   K   d S r   r   r    s    r"   is_tracing_enabledzEngineClient.is_tracing_enabled_   s      03r$   c                 
   K   d S r   r   r    s    r"   do_log_statszEngineClient.do_log_statsb   s      *-#r$   c                 
   K   dS )zRaise if unhealthyNr   r    s    r"   check_healthzEngineClient.check_healthe          	r$   c                 
   K   dS )zStart profiling the engineNr   r    s    r"   start_profilezEngineClient.start_profilej   rE   r$   c                 
   K   dS )zStop profiling the engineNr   r    s    r"   stop_profilezEngineClient.stop_profileo   rE   r$   c                 
   K   dS )zReset the multi-modal cacheNr   r    s    r"   reset_mm_cachezEngineClient.reset_mm_cachet   rE   r$   Freset_running_requestsreset_connectorc                 
   K   dS )zDReset the prefix cache and optionally any configured connector cacheNr   )r!   rL   rM   s      r"   reset_prefix_cachezEngineClient.reset_prefix_cachey   s      
 	r$      levelc                 
   K   dS )zSleep the engineNr   )r!   rQ   s     r"   sleepzEngineClient.sleep   rE   r$   tagsc                 
   K   dS )zWake up the engineNr   )r!   rT   s     r"   wake_upzEngineClient.wake_up   rE   r$   c                 
   K   dS )z$Check whether the engine is sleepingNr   r    s    r"   is_sleepingzEngineClient.is_sleeping   rE   r$   c                 
   K   dS )z<Load a new LoRA adapter into the engine for future requests.Nr   )r!   r/   s     r"   add_lorazEngineClient.add_lora   rE   r$   T)wait_for_inflight_requestsclear_cacher[   r\   c                
   K   dS )a\  Pause new generation/encoding requests.

        Args:
            wait_for_inflight_requests: When ``True`` waits for in-flight requests
                to finish before pausing. When ``False`` (default), aborts in-flight
                requests immediately.
            clear_cache: Whether to clear KV and prefix caches after draining.
        Nr   )r!   r[   r\   s      r"   pause_generationzEngineClient.pause_generation   s       	r$   c                 
   K   dS )z.Resume accepting generation/encoding requests.Nr   r    s    r"   resume_generationzEngineClient.resume_generation   rE   r$   c                 
   K   dS )z.Return whether the engine is currently paused.Nr   r    s    r"   	is_pausedzEngineClient.is_paused   rE   r$   ,  new_data_parallel_sizedrain_timeoutc                    K   t           )zScale the engineNotImplementedError)r!   rd   re   s      r"   scale_elastic_epzEngineClient.scale_elastic_ep   s       "!r$   r   methodtimeoutargskwargsc                    K   t           )z0Perform a collective RPC call to the given path.rg   )r!   rj   rk   rl   rm   s        r"   collective_rpczEngineClient.collective_rpc   s       "!r$   .c                    K   t           )zGet supported tasksrg   r    s    r"   get_supported_tasksz EngineClient.get_supported_tasks   s      !!r$   )NNr   NN)r   N)FF)rP   r   )rc   )Nr   N)9__name__
__module____qualname____doc__r
   __annotations__r	   r   r   propertyr   r   r#   boolr&   r)   r+   BaseExceptionr-   r   r   r   strr   dictr   r   intr   r   r8   r   r   r<   r   r>   r@   rB   rD   rG   rI   rK   rO   rS   listrV   rX   rZ   r^   r`   rb   ri   floattuplero   r   rq   r   r$   r"   r   r      s        ..####$$$$+,+++ ^ X+%D%%% ^ X%%D%%% ^ X%"""" ^ X".M... ^ X. #'+/5926)-  !J. ( 	 4Z "D( "#s(^d2 sCx(4/   $J 
t+	,   ^   ,026-159  & 	
 "D( sCx(4/  !$d
 "#s(^d2 
,d2	3   ^$ cHSM&9 d    ^ 3$333 ^3--- ^-   ^    ^    ^    ^ LQ &*EI	   ^   T    ^  $s)d"2 d    ^ 4    ^ ; 4    ^  ,1 	   %) 	
 
   ^     ^     ^
 AD" "&)":="	" " " " !%"" "" " 	"
 t" " " ""51C+D " " " " " "r$   r   N)"abcr   r   collections.abcr   r   r   typingr   vllm.configr	   r
   vllm.inputs.datar   vllm.lora.requestr   vllm.outputsr   r   vllm.plugins.io_processorsr   vllm.pooling_paramsr   vllm.renderersr   vllm.sampling_paramsr   
vllm.tasksr   vllm.v1.enginer   vllm.v1.engine.input_processorr   r   r   r$   r"   <module>r      s_   $ # # # # # # # = = = = = = = = = =       / / / / / / / / ' ' ' ' ' ' ) ) ) ) ) ) < < < < < < < < 2 2 2 2 2 2 - - - - - - ' ' ' ' ' ' / / / / / / $ $ $ $ $ $ , , , , , , 9 9 9 9 9 9l" l" l" l" l"3 l" l" l" l" l"r$   