
    .`i=                        d dl Z d dlmZmZ d dlmZ d dlmZmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZ d dlmZmZ d dl m!Z!m"Z" d dl#m$Z$ d dl%m&Z& d dl'm(Z( d dl)m*Z* d dl+m,Z, d dl-m.Z. d dl/m0Z0 d dl1m2Z2 d dl3m4Z4 d dl5m6Z6 d dl7m8Z8 d dl9m:Z: d dl;m<Z< d dl=m>Z> d dl?m@Z@mAZA d dlBmCZCmDZD d dlEmFZF d d lGmHZH d d!lImJZJ  eeK          ZL ed"e#          ZM G d$ d%          ZNdS )&    N)CallableMapping)copy)Anycast)TypeVar)ParallelConfig
VllmConfig)1stateless_destroy_torch_distributed_process_group)get_dp_group)
EngineArgs)
PromptType)init_logger)LoRARequest)MULTIMODAL_REGISTRYMultiModalRegistry)PoolingRequestOutputRequestOutput)get_io_processor)PoolingParams)RendererLike)SamplingParams)SupportedTask)TokenizerLike)init_tracer)UsageContext)EngineCoreRequest)EngineCoreClient)InputProcessor)OutputProcessor)ParentRequest)Executor)StatLoggerFactoryStatLoggerManager)Metricget_metrics_snapshot)IterationStats)record_function_or_nullcontext)
WorkerBase_R)defaultc                      e Zd ZdZdej        deddfdedee	         de
de
ded	ee         dz  d
ede
de
ddfdZeej        ddfdeded	ee         dz  de
dd f
d            Zeej        ddfdeded	ee         dz  de
dd f
d            ZdefdZde
fdZde
de
fdZed             Zdeedf         fdZdJdee         de
ddfdZ	 	 	 	 	 	 dKded eez  d!ee z  d"e!dz  d#e"dz  d$e#ee$f         dz  d%e%eef         dz  d&ed'edz  ddfd(Z&dee'e(z           fd)Z)d* Z*d+ Z+d, Z,	 dLd-e
d.e
de
fd/Z-dMd1efd2Z.dNd3ee         dz  fd4Z/de
fd5Z0dee1         fd6Z2e3de4dz  fd7            Z5de4fd8Z6e3de7fd9            Z8dOd:Z9dOd;Z:d#e"de
fd<Z;d=ede
fd>Z<de=e         fd?Z>d=ede
fd@Z?	 	 	 dPdBee@eAgeBf         z  dCe!dz  dDedEe#ee$f         dz  deeB         f
dFZCdGe@eDjE        geBf         deeB         fdHZFdI ZGdS )Q	LLMEnginez-Legacy LLMEngine for backwards compatibility.FNvllm_configexecutor_class	log_statsaggregate_engine_loggingusage_contextstat_loggersmm_registryuse_cached_outputsmultiprocess_modereturnc
                    || _         |j        | _        |j        | _        |j        | _        || _        |j        }
|
j        }|
j        dk    o|dk    | _        |	s,|
j        dk    r!| j        s|
	                                | _
        nd | _
        d| _        t          | j                   | _        t          | j         | j        j                  | _        t#          | j        | j        | j         j        j                  | _        | j        j        }|t/          d|          }|| j        _        t3          j        |	d||| j                  | _        d | _        | j        r1t;          ||||          | _        | j                                         |	s| j        j        j        | _        | j        rtA                      j!        | _
        | "                                 d S )N   external_launcherF)r0   stream_intervalzvllm.llm_engine)r6   asyncio_moder.   r/   r0   )r.   custom_stat_loggersenable_default_loggersr1   )#r.   observability_configmodel_configcache_configr0   parallel_configdistributed_executor_backenddata_parallel_sizeexternal_launcher_dpstateless_init_dp_groupdp_groupshould_execute_dummy_batchr   input_processorr   io_processor_pluginio_processorr    	tokenizerscheduler_configr;   output_processorotlp_traces_endpointr   tracerr   make_clientengine_corelogger_managerr$   log_engine_initializedmodel_executorr   	cpu_groupreset_mm_cache)selfr.   r/   r0   r1   r2   r3   r4   r5   r6   rB   executor_backendendpointrP   s                 m/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/v1/engine/llm_engine.py__init__zLLMEngine.__init__2   s    '$/$D!'4'4"%5*G .2 8 $77 	! "	!2Q66- 7 ,CCEEDMM DM*/'-d.>??,1
 
 !0Nn ,=M!
 !
 !

 ,A !2H==F+1D!( ,7/#)n
 
 
 9=> 	9"3'$0'0)A	# # #D 66888  	N"&"2">"MD$ 	5 )NN4DM 	    disable_log_statsc                 ^     | |t          j        |          | ||t          j                  S )Nr.   r/   r0   r2   r3   r6   )r"   	get_classenvsVLLM_ENABLE_V1_MULTIPROCESSING)clsr.   r2   r3   r^   s        r[   from_vllm_configzLLMEngine.from_vllm_config   s@     s##-k::,,'%"A
 
 
 	
r]   engine_argsenable_multiprocessingc                     |                     |          }t          j        |          }t          j        rt
                              d           d} | |||j         |||          S )z0Creates an LLM engine from the engine arguments.z'Enabling multiprocessing for LLMEngine.Tr`   )create_engine_configr"   ra   rb   rc   loggerdebugr^   )rd   rf   r2   r3   rg   r.   r/   s          r[   from_engine_argszLLMEngine.from_engine_args   s~     "66}EE!+K88. 	*LLBCCC%)" s#)%77'%4
 
 
 	
r]   c                 4    | j                                         S N)rN   get_num_unfinished_requestsrX   s    r[   ro   z%LLMEngine.get_num_unfinished_requests   s    $@@BBBr]   c                     | j                                         }| j        |p| j                                        S |                     |          S rn   )rN   has_unfinished_requestsrG   rR   dp_engines_runninghas_unfinished_requests_dp)rX   has_unfinisheds     r[   rr   z!LLMEngine.has_unfinished_requests   sL    .FFHH= !JT%5%H%H%J%JJ..~>>>r]   ru   c                 P    t          j        | j        |          }|s	|rd| _        |S NT)r	   has_unfinished_dprG   rH   )rX   ru   aggregated_has_unfinisheds      r[   rt   z$LLMEngine.has_unfinished_requests_dp   s<    $2$DM>%
 %
!  	3"; 	3.2D+((r]   c                     |S rn    )rd   outputsoutput_types      r[   validate_outputszLLMEngine.validate_outputs   s    r]   .c                 4    | j                                         S rn   )rR   get_supported_tasksrp   s    r[   r   zLLMEngine.get_supported_tasks   s    33555r]   request_idsinternalc                 p    | j                             ||          }| j                            |           dS )z3Remove request_ids from EngineCore and Detokenizer.N)rN   abort_requestsrR   )rX   r   r   s      r[   abort_requestzLLMEngine.abort_request   s9     +::;QQ''44444r]   r   
request_idpromptparamsarrival_timelora_requesttokenization_kwargstrace_headerspriorityprompt_textc
           
         t          |t                    st          dt          |                     t          |t                    r(|}
||
j        k    rt                              d           n}|	J | j        	                    ||||||||          }
t          |t                    r|}	n@t          |t                    r+t          t          d z  |                    d                    }	| j                            |
           |
j        }t          |t                    r|j        nd}|dk    r9| j                            |
|	d d           | j                            |
           d S t)          |
          }t+          |          D ]y}|                    |          \  }}||dz
  k    r|
nt/          |
          }||_        ||_        | j                            ||	||           | j                            |           zd S )Nz!request_id must be a string, got zAsyncLLM.add_request() was passed a request_id parameter that does not match the EngineCoreRequest.request_id attribute. The latter will be used, and the former will be ignored.r   r9   r   )
isinstancestr	TypeErrortyper   r   rj   warning_oncerI   process_inputsr   r   getassign_request_idr   r   nrN   add_requestrR   r!   rangeget_child_infor   sampling_params)rX   r   r   r   r   r   r   r   r   r   requestr   
parent_reqidxchild_paramschild_requests                   r[   r   zLLMEngine.add_request   s9    *c** 	TRZ@P@PRRSSS f/00 	EGW///##K   &&&*99#	 	G &#&& E$FG,, E"3:vzz(/C/CDD..w777 "6>::AFHH66!--g{D!LLL((111F #7++
88 	8 	8C'1'@'@'E'E$J'*a!e||GGgM'1M$,8M) !--{J   ((7777	8 	8r]   c                 z   | j         r"d| _         | j                                         g S t          d          5  | j                                        }d d d            n# 1 swxY w Y   t          d          5  | j        rt                      nd }| j                            |j	        |j
        |          }| j                            |j                   d d d            n# 1 swxY w Y   t          d          5  | j                            |j                   d d d            n# 1 swxY w Y   t          d          5  | j        T|j        M| j                            |j        || j                                                   |                                  d d d            n# 1 swxY w Y   |j        S )NFzllm_engine step: get_outputz llm_engine step: process_outputs)engine_core_timestampiteration_statszllm_engine step: abort_requestszllm_engine step: record_stats)scheduler_statsr   mm_cache_stats)rH   rR   execute_dummy_batchr(   
get_outputr0   r'   rN   process_outputsr|   	timestampupdate_scheduler_statsr   r   reqs_to_abortrS   recordrI   stat_mm_cachedo_log_stats_with_intervalrequest_outputs)rX   r|   r   processed_outputss       r[   stepzLLMEngine.step  s   * 	.3D+00222I ,,IJJ 	4 	4&1133G	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 ,,NOO 	R 	R26.Jn...dO $ 5 E E&-&7 / !F ! !
 !889PQQQ	R 	R 	R 	R 	R 	R 	R 	R 	R 	R 	R 	R 	R 	R 	R ,,MNN 	M 	M++,=,KLLL	M 	M 	M 	M 	M 	M 	M 	M 	M 	M 	M 	M 	M 	M 	M ,,KLL 	2 	2".73J3V#**$+$;$3#'#7#E#E#G#G +   
 //111	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 !00sI   AA#&A#9AC##C'*C'= D))D-0D-AF++F/2F/c                 :    | j                             d           d S rw   rR   profilerp   s    r[   start_profilezLLMEngine.start_profile>  s      &&&&&r]   c                 :    | j                             d           d S )NFr   rp   s    r[   stop_profilezLLMEngine.stop_profileA  s      '''''r]   c                 j    | j                                          | j                                         d S rn   )rI   clear_mm_cacherR   rW   rp   s    r[   rW   zLLMEngine.reset_mm_cacheD  s2    ++---'')))))r]   reset_running_requestsreset_connectorc                 8    | j                             ||          S rn   )rR   reset_prefix_cache)rX   r   r   s      r[   r   zLLMEngine.reset_prefix_cacheH  s$     22"O
 
 	
r]   r9   levelc                     | j                             |           | j        | j                            d|           d S d S )Nr9   )rR   sleeprS   record_sleep_state)rX   r   s     r[   r   zLLMEngine.sleepO  sI    u%%%*221e<<<<< +*r]   tagsc                     | j                             |           | j        | j                            dd           d S d S )Nr   )rR   wake_uprS   r   )rX   r   s     r[   r   zLLMEngine.wake_upU  sI      &&&*221a88888 +*r]   c                 4    | j                                         S rn   )rR   is_sleepingrp   s    r[   r   zLLMEngine.is_sleeping[  s    ++---r]   c                 @    | j         s
J d            t                      S )NzStat logging disabled)r0   r&   rp   s    r[   get_metricszLLMEngine.get_metrics^  s$    ~66666~#%%%r]   c                     | j         j        S rn   )rI   rL   rp   s    r[   rL   zLLMEngine.tokenizerb  s    #--r]   c                 4    | j                                         S rn   )rI   get_tokenizerrp   s    r[   r   zLLMEngine.get_tokenizerf  s    #11333r]   c                     | j         j        S rn   )rI   rendererrp   s    r[   r   zLLMEngine.rendereri  s    #,,r]   c                 J    | j         r| j                                          dS dS )z Log stats if logging is enabled.N)rS   logrp   s    r[   do_log_statszLLMEngine.do_log_statsm  s2     	&##%%%%%	& 	&r]   c                     t          j                     }t          | d          s|| _        || j        z
  t          j        k    r|                                  || _        dS dS )z,Log stats when the time interval has passed._last_log_timeN)timehasattrr   rb   VLLM_LOG_STATS_INTERVALr   )rX   nows     r[   r   z$LLMEngine.do_log_stats_with_intervalr  sg    ikkt-.. 	&"%D$$(DDD"%D EDr]   c                 6    | j                             |          S )z<Load a new LoRA adapter into the engine for future requests.)rR   add_lora)rX   r   s     r[   r   zLLMEngine.add_lora{  s    ((666r]   lora_idc                 6    | j                             |          S )z&Remove an already loaded LoRA adapter.)rR   remove_lorarX   r   s     r[   r   zLLMEngine.remove_lora  s    ++G444r]   c                 4    | j                                         S )zList all registered adapters.)rR   
list_lorasrp   s    r[   r   zLLMEngine.list_loras  s    **,,,r]   c                 6    | j                             |          S )z&Prevent an adapter from being evicted.)rR   pin_lorar   s     r[   r   zLLMEngine.pin_lora  s    ((111r]   r{   methodtimeoutargskwargsc                 <    | j                             ||||          S rn   )rR   collective_rpc)rX   r   r   r   r   s        r[   r   zLLMEngine.collective_rpc  s!     ..vwfMMMr]   funcc                 2    |                      d|f          S )Napply_model)r   )r   )rX   r   s     r[   r   zLLMEngine.apply_model  s    ""=w"???r]   c                 `    t          | dd           }|| j        st          |           d S d S d S )NrG   )getattrrE   r   )rX   rG   s     r[   __del__zLLMEngine.__del__  sC    4T22(A=hGGGGG  r]   )F)NNNNr   N)FF)r9   rn   )r7   N)Nr{   N)H__name__
__module____qualname____doc__r   ENGINE_CONTEXTr   r
   r   r"   boollistr#   r   r\   classmethodre   r   rl   intro   rr   rt   r~   tupler   r   r   r   r   r   r   r   floatr   dictr   r   r   r   r   r   r   r   rW   r   r   r   r   r%   r   propertyr   rL   r   r   r   r   r   r   r   setr   r   r   r)   r*   r   nnModuler   r   r{   r]   r[   r-   r-   /   sB       77 */&2&A7;*=#("'T TT XT 	T
 #'T $T ,-4T (T !T  T 
T T T Tl  '3&A7;"'
 

 $
 ,-4	

  
 

 
 
 [
   '3&A7;',
 

 $
 ,-4	

 !%
 

 
 
 [
6CS C C C C? ? ? ? ?) )$ ) ) ) )   [6U=#+=%> 6 6 6 65 5c 5d 5t 5 5 5 5 &*+/5926"&E8 E8E8 "J.E8 .	E8
 dlE8 "D(E8 "#s(^d2E8 sCx(4/E8 E8 4ZE8 
E8 E8 E8 E8N"1d=+??@ "1 "1 "1 "1H' ' '( ( (* * *
 MR
 
&*
EI
	
 
 
 
= =3 = = = =9 9DI, 9 9 9 9.T . . . .&T&\ & & & & .=4/ . . . X.4} 4 4 4 4 -, - - - X-& & & &
& & & &7[ 7T 7 7 7 753 54 5 5 5 5-CH - - - -2 2 2 2 2 2 !%(,N Nh
|R/00N N 	N
 S#X%N 
bN N N N@")b 9 @d2h @ @ @ @H H H H Hr]   r-   )Or   collections.abcr   r   r   typingr   r   torch.nnr   typing_extensionsr   	vllm.envsrb   vllm.configr	   r
   vllm.distributedr   vllm.distributed.parallel_stater   vllm.engine.arg_utilsr   vllm.inputsr   vllm.loggerr   vllm.lora.requestr   vllm.multimodalr   r   vllm.outputsr   r   vllm.plugins.io_processorsr   vllm.pooling_paramsr   vllm.renderersr   vllm.sampling_paramsr   
vllm.tasksr   vllm.tokenizersr   vllm.tracingr   vllm.usage.usage_libr   vllm.v1.enginer   vllm.v1.engine.core_clientr   vllm.v1.engine.input_processorr   vllm.v1.engine.output_processorr     vllm.v1.engine.parallel_samplingr!   vllm.v1.executorr"   vllm.v1.metrics.loggersr#   r$   vllm.v1.metrics.readerr%   r&   vllm.v1.metrics.statsr'   vllm.v1.utilsr(   vllm.v1.worker.worker_baser)   r   rj   r*   r-   r{   r]   r[   <module>r     s    - - - - - - - -                     % % % % % %       2 2 2 2 2 2 2 2 N N N N N N 8 8 8 8 8 8 , , , , , , " " " " " " # # # # # # ) ) ) ) ) ) C C C C C C C C < < < < < < < < 7 7 7 7 7 7 - - - - - - ' ' ' ' ' ' / / / / / / $ $ $ $ $ $ ) ) ) ) ) ) $ $ $ $ $ $ - - - - - - , , , , , , 7 7 7 7 7 7 9 9 9 9 9 9 ; ; ; ; ; ; : : : : : : % % % % % % H H H H H H H H ? ? ? ? ? ? ? ? 0 0 0 0 0 0 8 8 8 8 8 8 1 1 1 1 1 1	X		WT3kH kH kH kH kH kH kH kH kH kHr]   