
    .`iE                        d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlmZm	Z	 d dl
mZ d dlmZmZ d dlmZmZ d dlmZ d dlmZmZmZ d dlZd dlZd d	lmZmZ d d
lmZ d dlm Z  d dl!m"Z" d dl#m$Z$ d dl%m&Z& d dl'm(Z( d dl)m*Z*m+Z+ d dl,m-Z- d dl.m/Z/m0Z0 d dl1m2Z2 d dl3m4Z4 d dl5m6Z6m7Z7 d dl8m9Z9m:Z:m;Z;m<Z<m=Z= d dl>m?Z? d dl@mAZA d dlBmCZCmDZDmEZEmFZFmGZGmHZHmIZImJZJmKZK d dlLmMZMmNZNmOZO d dlPmQZQ d dlRmSZS d dlTmUZU d dlVmWZW d dlXmYZYmZZZ d d l[m\Z\m]Z] d d!l^m_Z_ d d"l`maZa d d#lbmcZd  e"ee          Zfd$Zgd%Zh ed&          Zi G d' d(          Zj G d) d*ej          Zk G d+ d,ek          Zl G d- d.          Zm G d/ d0emel          Zn G d1 d2emek          ZodS )3    N)deque)Callable	Generator)Future)	ExitStackcontextmanager)isclass	signature)DEBUG)AnyTypeVarcast)ParallelConfig
VllmConfig)1stateless_destroy_torch_distributed_process_group)enable_envs_cache)init_logger)dump_engine_exception)LoRARequest)MULTIMODAL_REGISTRY)POOLING_TASKSSupportedTask)(maybe_register_config_serialize_by_value)freeze_gc_heapmaybe_attach_gc_debug_callback)get_hash_fn_by_name)make_zmq_socket)decorate_logsset_process_title)	BlockHash"generate_scheduler_kv_cache_configget_kv_cache_configsget_request_block_hasherinit_none_hash)SchedulerInterface)SchedulerOutput)	EngineCoreOutputEngineCoreOutputsEngineCoreRequestEngineCoreRequestTypeFinishReasonReconfigureDistributedRequestReconfigureRankTypeUtilityOutputUtilityResult)EngineHandshakeMetadataEngineZmqAddressesget_device_indices)Executor)KVCacheConfig)SchedulerStats)ModelRunnerOutput)RequestRequestStatus)MsgpackDecoderMsgpackEncoder)StructuredOutputManager)compute_iteration_details)__version__g      @   _Rc                      e Zd ZdZ	 	 d<dedee         dededz  def
d	Z	ded
e
eeef         fdZd
e
edf         fdZd=dedefdZdee         fdZedefd            Zedefd            Zd
e
eeef         ef         fdZded
dfdZd
e
eeef         dz  ef         fdZd Zd Zd>defdZ d  Z!	 d?d!ed"ed
efd#Z"d@d%efd&Z#dAd'ee         dz  fd(Z$d
efd)Z%d* Z&d+e'd
efd,Z(d-ed
efd.Z)d
e*e         fd/Z+d-ed
efd0Z,	 	 dBd1ed2edz  d3edz  d
dfd4Z-	 	 	 dCd6eede.f         z  d7e/dz  d8e
d9eee0f         dz  d
ee.         f
d:Z1de2d
e
eef         fd;Z3dS )D
EngineCorezInner loop of vLLM's Engine.NFvllm_configexecutor_class	log_statsexecutor_fail_callbackinclude_finished_setc                 p   ddl m}  |             || _        |j        j        s!t
                              dt          |           || _         ||          | _	        || j	        
                    |           d| _        |                     |          \  }}}	||j        _        ||j        _        |                     d||f           t#          |          | _        |j                                        }
t+          |	j                  dk    r2|j        j        r&t
                              d           d|j        _        |j        j        |j        j        z  |j        j        z  } |
||	| j        || j        |	          | _        |j        d u| _        | j        j        $| j	                             | j        j                   tB          x| _"        }|#                    |          | _$        | j        %                                }|N| j	        &                                }|r3i }|D ]}||'                    |           |(                    |           | j	        j)        | _*        d | _+        | j*        d
k    r:t
          ,                    d| j*                   t[          | j*                  | _+        |j.        d uo|j.        j/        | _/        |j0        j1        dk    | _2        d | _3        |j        j4        s|=tk          |j        j6                  }to          |           tq          ||          | _3        | j+        | j9        n| j:        | _;        |j        j<        | _<        t{          j>        t~          t                                        | _A        t                       t                       t                       d S )Nr   )load_general_pluginsz2Initializing a V1 LLM engine (v%s) with config: %sinitialize_cacheargsz3Disabling chunked prefill for model without KVCacheF)rB   kv_cache_configstructured_output_managerrF   rD   
block_size   z#Batch queue is enabled with size %d)maxlenpooling)Evllm.pluginsrH   rB   parallel_configdata_parallel_rank_localloggerinfoVLLM_VERSIONrD   model_executorregister_failure_callback!available_gpu_memory_for_kv_cache_initialize_kv_cachescache_confignum_gpu_blocksnum_cpu_blockscollective_rpcr;   rN   scheduler_configget_scheduler_clslenkv_cache_groupsenable_chunked_prefillwarningrO   decode_context_parallel_sizeprefill_context_parallel_size	schedulerspeculative_configuse_spec_decode	connectorinit_kv_output_aggregatorr   mm_registry!engine_receiver_cache_from_configmm_receiver_cacheget_kv_connector#get_kv_connector_handshake_metadataupdateset_xfer_handshake_metadatamax_concurrent_batchesbatch_queue_sizebatch_queuedebugr   ec_transfer_configis_ec_producermodel_configrunner_typeis_pooling_modelrequest_block_hasherenable_prefix_cachingr   prefix_caching_hash_algor$   r#   stepstep_with_batch_queuestep_fnasync_schedulingqueueQueueliststraborts_queuer   r   r   )selfrB   rC   rD   rE   rF   rH   r^   r_   rM   	Schedulerscheduler_block_sizern   kv_connectorxfer_handshake_metadatacontentworker_dictcaching_hash_fns                     g/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/v1/engine/core.py__init__zEngineCore.__init__Q   s    	655555&*C 	KKD   # -n[99!-99:PQQQ13. ;?:T:T;
 ;
7 3A /2@ /.nn5UVVV)@)M)M&  0BBDD	.//144 +B LTUUUFK,C $/)FG)GH 	 .7Y#+&*&D!5n+.
 .
 .
  +=TI>#/99$.:RSSS)<<;!,!N!N"
 "
 ~6688# #GGII $ ' B +-#: 4 4K".{33388AAA !% 3 J  	  1$$LL>@UVVV$D,ABBBD *$6 >.= 	 !, 8 D	 QQU!#9 	\=U1(A O ?+++(@$o) )D%
 )1DIIt7Q 	 !, < M!KS	244 	&((( 	    returnc                    t          j                     }| j                                        }t          d |D                       }|rt          j                            d          dk    rHt          | dd           }|J t          j	        |d          | _
        | j
        gt          |          z  }n:| j                                        }|d         | _
        ndgt          |          z  }t          |          t          |          k    sJ |j        j        }t          |||          }|j        j        }	|	|k    r|                     d|	f           t#          |          }
|
j        }d}| j                            |           t          j                     |z
  }t(                              d	|d
           |||
fS )Nc              3      K   | ]}|V  d S N ).0kv_cache_specs     r   	<genexpr>z3EngineCore._initialize_kv_caches.<locals>.<genexpr>   s"      MM]=MMMMMMr   VLLM_ELASTIC_EP_SCALE_UP_LAUNCH1dp_grouprI   r   update_max_model_lenrK   zFinit engine (profile, create kv cache, warmup model) took %.2f secondslocal)scope)timerY   get_kv_cache_specsanyosenvirongetgetattrr   sync_kv_cache_memory_sizer[   rc   determine_available_memoryr{   max_model_lenr"   r`   r!   
num_blocksinitialize_from_configrV   	info_once)r   rB   startkv_cache_specshas_kv_cacher   available_gpu_memorymax_model_len_beforekv_cache_configsmax_model_len_afterscheduler_kv_cache_configr^   r_   elapseds                 r   r\   z EngineCore._initialize_kv_caches   s    	 ,??AAMMnMMMMM 	=z~~?@@CGG"4T::+++"<XrJJ 6 )-(N'ORU"S S ($$ (,':'U'U'W'W$9Ma9P66 %&3^)<)<#< >""c*>&?&?????  +7E/)=
 
 *6D"666 6>Q=STTT$FGW$X$X!2= 	223CDDD)++%T 	 	
 	
 	

 ~/HHHr   .c                     | j         j        S r   )rY   supported_tasksr   s    r   get_supported_taskszEngineCore.get_supported_tasks  s    "22r   r   requestrequest_wavec                    t          |j        t                    s$t          dt	          |j                             |j        x}rAd |                                 D             }|j        |vrt          d|j        d|           |j	        3| j
                                        st                              d           | j
                            |           dS )zAdd request to the scheduler.

        `request_wave`: indicate which wave of requests this is expected to
        belong to in DP case
        z!request_id must be a string, got c                 $    g | ]}|t           v |S r   )r   )r   tasks     r   
<listcomp>z*EngineCore.add_request.<locals>.<listcomp>'  s)     ' ' 't}?T?T?T?T?Tr   zUnsupported task: z Supported tasks: NzXGot kv_transfer_params, but no KVConnector found. Disabling KVTransfer for this request.)
isinstance
request_idr   	TypeErrortypepooling_paramsr   r   
ValueErrorkv_transfer_paramsri   rq   rV   rf   add_request)r   r   r   r   supported_pooling_taskss        r   r   zEngineCore.add_request  s#    ',c22 	ND9K4L4LNN   %33> 		' '!%!9!9!;!;' ' '# "*AAA B)< B B(?B B  
 %1//11 2 NN9  
 	""7+++++r   request_idsc                 P    | j                             |t          j                   dS )z"Abort requests from the scheduler.N)ri   finish_requestsr8   FINISHED_ABORTED)r   r   s     r   abort_requestszEngineCore.abort_requests;  s%     	&&{M4RSSSSSr   scheduler_outputc              #      K   	 dV  dS # t           $ r4}t          | j        || j                                                   |d}~ww xY w)z3Execute the model and log detailed info on failure.N)	Exceptionr   rB   ri   
make_stats)r   r   errs      r   log_error_detailzEngineCore.log_error_detailC  sk      	EEEEE 		 		 		 " "2DN4M4M4O4O   I		s   
 
A/AAc              #   B  K   | j         j        j        sd V  d S t          | dd          | _        t          |          }t          j                    }d V  t          	                    d
                    dt          | j                  dt          |j                  dt          |j                  dt          |j                  dt          |j                  d	t!          t          j                    |z
  d
z  d          dg                     | xj        dz  c_        d S )N_iteration_indexr    z
Iteration(z): z context requests, z context tokens, z generation requests, z, generation tokens, iteration elapsed time: i  z.2fz msrP   )rB   observability_config enable_logging_iteration_detailsr   r   r<   r   	monotonicrV   rW   joinr   num_ctx_requestsnum_ctx_tokensnum_generation_requestsnum_generation_tokensformat)r   r   iteration_detailsbefores       r   log_iteration_detailsz EngineCore.log_iteration_detailsS  s%     4U 	EEEF '.@! D D56FGG!!GG -..):;;))899')ABB,)?@@BDN,,v5=uEE 	
 	
 	
& 	"r   c                 f   | j                                         si dfS | j                                         }| j                            |d          }| j                             |          }|                     |          5  |                     |          5  |                                }|| j        	                    |          }ddd           n# 1 swxY w Y   ddd           n# 1 swxY w Y   | 
                                 | j                             ||          }||j        dk    fS )zSchedule, execute, and make output.

        Returns tuple of outputs and a flag indicating whether the model
        was executed.
        FT	non_blockNr   )ri   has_requestsschedulerY   execute_modelget_grammar_bitmaskr   r   resultsample_tokens_process_aborts_queueupdate_from_outputtotal_num_scheduled_tokens)r   r   futuregrammar_outputmodel_outputengine_core_outputss         r   r   zEngineCore.stepq  s    ~**,, 	u9>2244$223Ct2TT;;<LMM!!"233	Q 	Q&&'788	Q 	Q "==??L##2@@PP	Q 	Q 	Q 	Q 	Q 	Q 	Q 	Q 	Q 	Q 	Q 	Q 	Q 	Q 	Q 	Q 	Q 	Q 	Q 	Q 	Q 	Q 	Q 	Q 	Q 	Q 	Q 	Q 	Q 	Q 	""$$$"n??l
 
 #$4$ORS$SSSs6   C,1C	C,C	C,C	C,,C03C0model_executedc                     | j         s@| j        r;|r;| j                                        }|"| j                            |           d S d S d S d S d S r   )r   rk   rY   take_draft_token_idsri   update_draft_token_ids)r   r   draft_token_idss      r   	post_stepzEngineCore.post_step  s     $ 	G)= 	G. 	G"1FFHHO*55oFFFFF		G 	G 	G 	G 	G 	G +*r   c                 |   | j         }|J t          |          | j        k     sJ d}d}| j                                        r| j                                        }| j                            |d          }| j        s|j	        dk    }| j
        s|s!t          t          t                   |          }n@|j        s7| j                            |          }| j                            |d          }n|}|sT|                    |||f           |r:t          |          | j        k     r"|d         d                                         sdS n|sdS |                                \  }}}|                     |          5  |                     |          5  |                                }	|	#|                                 t/          d	          	 ddd           n# 1 swxY w Y   ddd           n# 1 swxY w Y   |                                  | j                            ||	          }
|r| j        r8| j                                        }|J | j                            ||           | j                            |          }| j                            |d          }|                    |||f           |
|fS )
a  Schedule and execute batches with the batch queue.
        Note that if nothing to output in this step, None is returned.

        The execution flow is as follows:
        1. Try to schedule a new batch if the batch queue is not full.
        If a new batch is scheduled, directly return an empty engine core
        output. In other words, fulfilling the batch queue has a higher priority
        than getting model outputs.
        2. If there is no new scheduled batch, meaning that the batch queue
        is full or no other requests can be scheduled, we block until the first
        batch in the job queue is finished.
        3. Update the scheduler from the output.
        NFTr   r   rI   NTNFzunexpected error)rw   rc   rv   ri   r   r   rY   r   rz   r   r}   r   r   r6    pending_structured_output_tokensr   r   
appendleftdonepopr   r   r   RuntimeErrorr   r   rk   r    update_draft_token_ids_in_output)r   rw   r   deferred_scheduler_outputr   exec_futurer   r   exec_model_futr   r   r   s               r   r   z EngineCore.step_with_batch_queue  s     &&&&
 ;$"77777$(!>&&(( *	#~6688-;; D <  K & Q!1!Lq!P$ AN Af%67EE'H A &*^%G%G(& &N "0>>&$ ?  FF 1A-, 
&&&0@+'NOOO"&K((4+@@@'OA.3355 A
 &: 	 ; 4???3D3D0 .!!"233		7 		7&&'788		7 		7 "==??L# %%'''"#5666	 $		7 		7 		7 		7 		7 		7 		7 		7 		7 		7 		7 		7 		7 		7 		7 		7 		7 		7 		7 		7 		7 		7 		7 		7 		7 		7 		7 		7 		7 		7 	""$$$"n??l
 
 % 	U # "&"5"J"J"L"L&222 ??#%>  
 "^??) N (66~QU6VVF""F,E{#STTT"N22s6   =G1:GG1G	G1!G	"G11G58G5c                 Z   | j                                         sg }| j                                         s_| j                                         }|                    t	          |t
                    r|fn|           | j                                         _|                     |           d S d S r   )r   empty
get_nowaitextendr   r   r   )r   r   idss      r   r   z EngineCore._process_aborts_queue  s     &&(( 	-K'--// L'2244""ZS-A-A#JC66sKKK '--// L
 ,,,,,	- 	-r   c                     | j                                          | j        r| j                                         | j        r| j                                         d S d S r   )rN   clear_backendrY   shutdownri   r   s    r   r  zEngineCore.shutdown  se    &44666 	+((***> 	&N##%%%%%	& 	&r   Tis_startc                 :    | j                             |           d S r   )rY   profile)r   r  s     r   r  zEngineCore.profile  s    ##H-----r   c                     | j                                         rt                              d           | j        | j                                         | j                                         d S )NzcResetting the multi-modal cache when requests are in progress may lead to desynced internal caches.)ri   has_unfinished_requestsrV   rf   rp   clear_cacherY   reset_mm_cacher   s    r   r  zEngineCore.reset_mm_cache"  sq     >1133 	NND   !-"..000**,,,,,r   reset_running_requestsreset_connectorc                 8    | j                             ||          S r   )ri   reset_prefix_cache)r   r  r  s      r   r  zEngineCore.reset_prefix_cache1  s#     ~00"O
 
 	
r   rP   levelc                 :    | j                             |           d S r   )rY   sleep)r   r  s     r   r  zEngineCore.sleep8  s    !!%(((((r   tagsc                 :    | j                             |           d S r   )rY   wake_up)r   r  s     r   r   zEngineCore.wake_up;  s    ##D)))))r   c                     | j         j        S r   )rY   is_sleepingr   s    r   r"  zEngineCore.is_sleeping>  s    "..r   c                 8    | j                                          d S r   )rY   execute_dummy_batchr   s    r   r$  zEngineCore.execute_dummy_batchA  s    //11111r   lora_requestc                 6    | j                             |          S r   )rY   add_lora)r   r%  s     r   r'  zEngineCore.add_loraD  s    "++L999r   lora_idc                 6    | j                             |          S r   )rY   remove_lorar   r(  s     r   r*  zEngineCore.remove_loraG  s    "..w777r   c                 4    | j                                         S r   )rY   
list_lorasr   s    r   r-  zEngineCore.list_lorasJ  s    "--///r   c                 6    | j                             |          S r   )rY   pin_lorar+  s     r   r/  zEngineCore.pin_loraM  s    "++G444r   pathpatternmax_sizec                 @    | j                             |||           d S )N)r0  r1  r2  )rY   save_sharded_state)r   r0  r1  r2  s       r   r4  zEngineCore.save_sharded_stateP  s5     	..w 	/ 	
 	
 	
 	
 	
r   r   methodtimeoutrL   kwargsc                 <    | j                             ||||          S r   )rY   r`   )r   r5  r6  rL   r7  s        r   r`   zEngineCore.collective_rpcZ  s!     "11&'4PPPr   c                     | j         +|j        r$| j                             |j                  |_        t          j        || j                  }|j        r| j                            |           ||j	        fS )zPreprocess the request.

        This function could be directly used in input processing thread to allow
        request initialization running in parallel with Model forward
        )
rp   mm_featuresget_and_update_featuresr7   from_engine_core_requestr~   use_structured_outputrN   grammar_initcurrent_wave)r   r   reqs      r   preprocess_add_requestz!EngineCore.preprocess_add_requestc  s     !-'2E-"&"8"P"P## #G .w8QRR$ 	= *77<<<G(((r   r   r   )T)FF)rP   r   )NN)Nr   N)4__name__
__module____qualname____doc__r   r   r3   boolr   r   tupleintr4   r\   r   r   r7   r   r   r   r   r   r&   r   r   dictr(   r   r   r   r   r  r  r  r  r  r   r"  r$  r   r'  r*  setr-  r/  r4  r?   floatr   r`   r)   rA  r   r   r   rA   rA   N   s       && 37%*I II XI 	I
 !)4I #I I I IV9I%9I	sC&	'9I 9I 9I 9Iv3U=#+=%> 3 3 3 3, ,7 ,# , , , ,BT$s) T T T T     ^ #o # # # ^#:TeD&7!78$>? T T T T>G G G G G Gr3	tC**+d2D8	9r3 r3 r3 r3h- - -& & &. . . . . .- - -  MR
 
&*
EI
	
 
 
 
) )3 ) ) ) )* *DI, * * * */T / / / /2 2 2:[ :T : : : :83 84 8 8 8 80CH 0 0 0 05 5 5 5 5 5 ##	
 

 t
 *	

 

 
 
 
 !%(,Q QhsBw''Q Q 	Q
 S#X%Q 
bQ Q Q Q).? )E'SV,DW ) ) ) ) ) )r   rA   c                   ,    e Zd ZdZdZ	 d-dddededed	ee	         d
ededz  de
f fdZededededededz  deeddf         fd            Ze	 d-dej        dededededededz  deeddf         fd            Ze	 d-dej        dedededz  def
d            Zedddde
de
fd            ZdefdZd Zd ZdefdZd ed!eddfd"Zed#             Z d$ Z!d%e"e         d&edz  ded'e#j$        fd(Z%d)e"e         d*edz  de
fd+Z&d!e'ddfd,Z( xZ)S ).EngineCoreProcz9ZMQ-wrapper for running EngineCore in background process.s   ENGINE_CORE_DEADNr   engine_indexrB   local_clienthandshake_addressrC   rD   client_handshake_addressrP  c                *    t          j        t          t          t          f                               _        t          j        t          t          t          f         t          z                        _	         fd}| _
         j
                            dd          }	d _                             ||	|||          5 }
t          |
j                   _        |
j        d u _        |
j         _        t(                              d j         j                    j        o|j        j         }| _                             |           t5                                          |||||           t9          j                    }t9          j         j        |
j         |
j!        |	|fd          }|"                                 t9          j         j#        |
j        |
j         j
        fd           _$         j$        "                                 |%                    d	
          s\|&                                stO          d          |
j!        J t(          (                    d           |%                    d	
          \d d d            d S # 1 swxY w Y   d S )Nc                  P     j                             t          j        df          S )Nr   )input_queue
put_nowaitr*   EXECUTOR_FAILEDr   s   r   <lambda>z)EngineCoreProc.__init__.<locals>.<lambda>  s%    )9)D)D"2C8*
 *
 r      little)length	byteorderFz1Has DP Coordinator: %s, stats publish address: %sT)targetrL   daemon
   r6  z'Input socket thread died during startupz0Waiting for READY message from DP Coordinator...))r   r   rH  r*   r   rV  rI  r(   bytesoutput_queuerP  to_bytesengines_running_perform_handshakesrc   outputsclient_countcoordinator_outputhas_coordinatorfrontend_stats_publish_addressrV   rx   rT   data_parallel_external_lbpublish_dp_lb_stats_init_data_parallelsuperr   	threadingEventThreadprocess_input_socketsinputscoordinator_inputr   process_output_socketsoutput_threadwaitis_aliver  rW   )r   rB   rQ  rR  rC   rD   rS  rP  rE   identity	addressesinternal_dp_balancingready_eventinput_thread	__class__s   `             r   r   zEngineCoreProc.__init__  s.    !;u-BC-G'HIKK!Kc3D.D(E(MNPP"
 "
 "
 "
 )$--Q(-KK$%%$
 
 H	P  #I$5 6 6D $-#?t#KD 8 / LLC$3   $ N#3MM " (=D$$$[111GG&%   $/++K$+1$/	 	 	 	L    !*!12%0%
 " " "D $$&&& "&&r&22 P#,,.. R&'PQQQ 2>>>NOOO	 "&&r&22 PIH	P H	P H	P H	P H	P H	P H	P H	P H	P H	P H	P H	P H	P H	P H	P H	P H	P H	Ps   =F>JJJrz  r   c           	   #     K   t          j                    }|o|du }| }|                     |||||||j                  }	||	5 }
|
V  ddd           n# 1 swxY w Y   nn|sJ |                     |||dd|          }|	5 }
|5 }|j        |
_        |j        |
_        |
V  ddd           n# 1 swxY w Y   ddd           n# 1 swxY w Y   |                                 dS )a  
        Perform startup handshakes.

        For DP=1 or offline mode, this is with the colocated front-end process.

        For DP>1 with internal load-balancing this is with the shared front-end
        process which may reside on a different node.

        For DP>1 with external or hybrid load-balancing, two handshakes are
        performed:
            - With the rank 0 front-end process which retrieves the
              DP Coordinator ZMQ addresses and DP process group address.
            - With the colocated front-end process which retrieves the
              client input/output socket addresses.
        with the exception of the rank 0 and colocated engines themselves which
        don't require the second handshake.

        Here, "front-end" process can mean the process containing the engine
        core client (which is the API server process in the case the API
        server is not scaled out), OR the launcher process running the
        run_multi_api_server() function in serve.py.
        NTF)zmqContext_perform_handshakerT   rt  rg  __post_init__)r   rR  rz  rQ  rB   rS  	input_ctxis_localheadless	handshaker{  local_handshakeclient_addressess                r   rf  z"EngineCoreProc._perform_handshakes  s     > KMM	D$<$D##++'
 
	 $+  i                               <"553XtUK O   i  <L#3#:	 $4$<	!                                                            	!!#####sH   AAA CB, C,B0	0C3B0	4CC
Cctxr  parallel_config_to_updatec              #     K   t          ||t          j        |dd          5 }|                     ||||          }	|	V  |j        j        }
| j        }d|||
|d}|j        j        dk    r|j        	                                |d<   |
                    t          j                            |                     d d d            d S # 1 swxY w Y   d S )Ni  F)rz  lingerbindREADY)statusr   r  r^   dp_stats_addressrP   parallel_config_hash)r   r  DEALERstartup_handshaker]   r^   rk  rT   data_parallel_sizecompute_hashsendmsgspecmsgpackencode)r   r  rR  rz  rQ  r  rB   r  handshake_socketr{  r^   r  	ready_msgs                r   r  z!EngineCoreProc._perform_handshake  s^      J
 
 
 "	E .. ,:S I OOO )5DN  $B "%$"0$4 I *=AA/<<>> 01 !!'/"8"8"C"CDDDE"	E "	E "	E "	E "	E "	E "	E "	E "	E "	E "	E "	E "	E "	E "	E "	E "	E "	Es   BCC	C	r  rT   c                 *   |                      t          j                            d||d                     t                              d           |                     t          dz            st          dt           d          | 	                                }t          j        
                    |t                    }t                              d	|           |0|j                                        D ]\  }}t          |||           |j        S )
NHELLO)r  r   r  z(Waiting for init message from front-end.i`  ra  z7Did not receive response from front-end process within z minutesr   zReceived init message: %s)r  r  r  r  rV   rx   pollHANDSHAKE_TIMEOUT_MINSr  recvdecoder0   rT   itemssetattrr{  )r  rQ  r  rT   
init_bytesinit_messagekeyvalues           r   r  z EngineCoreProc.startup_handshakeJ  s4    	O""%) (  	
 	
 	
 	?@@@$$-Cf-L$MM 	"8    
 &**,,
070F0F4 1G 1
 1
 	0,???&*:@@BB 5 5
Ue4444%%r   )dp_ranklocal_dp_rankr  r  c                 ,  
 d
t                       
fd}t          j        t          j        |           t          j        t          j        |           d}	 |d         }|j        }|j        dk    p| dk    }|r||_        t          dd|             nt          d           t                       |rG|j	        @|j	        j
         d	| |j	        _
        t                              d
|j	        j
                   | |_        |r |j        j        r| |_        t#          |i |}n#d|_        d|_        d|_        t'          |d| i|}|                                 n# t*          $ r t                              d            t,          $ rR}	|t                              d           n.t                              d           |                                 |	d}	~	ww xY w	 ||                                 dS dS # ||                                 w w xY w)z2Launch EngineCore busy loop in background process.Fc                 ,    sdt                      d S r   )
SystemExit)signumframeshutdown_requesteds     r   signal_handlerz6EngineCoreProc.run_engine_core.<locals>.signal_handler|  s%    % #%)" ll"# #r   NrB   rP   r   rA   DP_dpz*Setting kv_transfer_config.engine_id to %srP  EngineCore exiting.zEngineCore failed to start.%EngineCore encountered a fatal error.)r   signalSIGTERMSIGINTrT   r  rU   r   r   kv_transfer_config	engine_idrV   rx   data_parallel_indexr{   is_moedata_parallel_rankDPEngineCoreProcdata_parallel_size_localrN  run_busy_loopr  r   	exception_send_engine_deadr  )r  r  rL   r7  r  engine_corerB   rT   data_paralleler  s             @r   run_engine_corezEngineCoreProc.run_engine_corep  s    # 	1222	# 	# 	# 	# 	# 	fnn555fm^444-12	'&,]&;K.9.IO+>BQgPQkM 0;H8!,W????!,///OOO 	!?!K #5?SSMSS .8 @2<  
 3:O/ T!9!@ T5<2.???
 672;<8562,dSSFSS%%'''' 	 	 	LL./// 	 	 	"  !>????  !HIII--///G	 ( &$$&&&&& '&{&$$&&&& 's,   DE G: .G
AGGG: :Hc                     d S r   r   )r   rB   s     r   rn  z"EngineCoreProc._init_data_parallel  s    r   c                 V    	 |                                   |                                  ))z!Core busy loop of the EngineCore.)_process_input_queue_process_engine_stepr   s    r   r  zEngineCoreProc.run_busy_loop  s0    	(%%'''%%'''		(r   c                     d}| j         s | j                                        s| j        s| j                                        r}| j        j        5  | j        j        	                                 ddd           n# 1 swxY w Y   t                              t                    rt                              d           d}| j                                        } | j        |  | j         s | j                                        s| j        |rt                              d           | j                                        s>| j                                        } | j        |  | j                                        <dS dS )z0Exits when an engine step needs to be performed.FNzEngineCore waiting for work.TzEngineCore loop active.)re  ri   r   rw   rV  r	  r   mutexr   clearrV   isEnabledForr   rx   r   _handle_client_requestr
  )r   waitedr@  s      r   r  z#EngineCoreProc._process_input_queue  s    $	.N//11	. $	.
 %%'' "&, 4 4%+113334 4 4 4 4 4 4 4 4 4 4 4 4 4 4&&u-- "LL!?@@@!F"&&((C'D'-- $	.N//11	. $	.  	4LL2333 "((** 	."--//C'D'-- "((** 	. 	. 	. 	. 	.s   A;;A?A?c                 &   |                                  \  }}|r|                                ndD ]}| j                            |           |                     |           |s-| j                                        rt          j        d           |S )z5Called only when there are unfinished local requests.r   gMbP?)	r   r  rc  rW  r   ri   r  r   r  )r   rg  r   outputs       r   r  z#EngineCoreProc._process_engine_step  s     #',,..)08gmmooob 	1 	1F((0000~&&&  	$."H"H"J"J 	Jur   request_typer   c                    |t           j        k    r|\  }}|                     ||           dS |t           j        k    r|                     |           dS |t           j        k    r|\  }}}}t          |          }		 t          | |          }
 |
|                     |
|           }t          |          |	_
        nL# t          $ r?}t                              d|           d| dt          |           |	_        Y d}~nd}~ww xY w| j                            |t%          |	          f           dS |t           j        k    rt)          d          t                              d|           dS )zDispatch request from client.zInvocation of %s method failedzCall to z method failed: N)utility_outputzExecutor failed.z/Unrecognized input request type encountered: %s)r*   ADDr   ABORTr   UTILITYr.   r   _convert_msgspec_argsr/   r   BaseExceptionrV   r  r   failure_messagerc  rW  r(   rX  r  error)r   r  r   r@  r   
client_idxcall_idmethod_namerL   r  r5  r   r  s                r   r  z%EngineCoreProc._handle_client_request  s   
 0444 'CS,/////2888(((((2:::5<2Jd"7++F {33!;!;FD!I!IJ -f 5 5      !A;OOOD{DDCFFDD &&&&&&
 ((.fEEEF     2BBB1222LLA<    s   <=B: :
D5C>>Dc                     |s|S t          |           j                                        }t          |          t          |          k    sJ t	          d t          ||          D                       S )ztIf a provided arg type doesn't match corresponding target method
        arg type, try converting to msgspec object.c              3      K   | ]l\  }}t          |j                  rOt          |j        t          j                  r0t          ||j                  st          j        ||j                   n|V  mdS )r  N)r	   
annotation
issubclassr  Structr   convert)r   vps      r   r   z7EngineCoreProc._convert_msgspec_args.<locals>.<genexpr>  s       
 
 1	 q|$$1<88 q!,//GOAAL1111 
 
 
 
 
 
r   )r
   
parametersvaluesrc   rH  zip)r5  rL   	arg_typess      r   r  z$EngineCoreProc._convert_msgspec_args  s      	Kf%%07799	4yyC	NN**** 
 
 D),,
 
 
 
 
 	
r   c                     | j                             t          j                   | j                            d           | j                                        rt                              d           dS dS )z/Send EngineDead status to the EngineCoreClient.g      @ra  zNvLLM shutdown signal from EngineCore failed to send. Please report this issue.N)	rc  rW  rN  ENGINE_CORE_DEADrw  r   ry  rV   fatalr   s    r   r  z EngineCoreProc._send_engine_dead(  s     	$$^%DEEE 	,,,&&(( 	LL5    	 	r   input_addressescoord_input_addressr}  c                    t          t                    }t                      }t                      5 t          j                    5 fd|D             }|d}nF                    t          |t          j        d                    }|                    d           t          j	                    }	|D ]7}
|
                    d           |	
                    |
t          j                   8|:|                                dk    sJ |	
                    |t          j                   |                                 ~	 |	                                D ]\  }
}|
                    d	          ^}}t!          t#          |j                            }|t           j        k    rQ|                    |          }	 |                     |          }nd# t,          $ r |                     |           Y w xY w|                    |          }|t           j        k    r| j                            |           | j                            ||f           # 1 swxY w Y   ddd           dS # 1 swxY w Y   dS )
zInput socket IO thread.c                 r    g | ]3}                     t          |t          j        d                     4S )Frz  r  )enter_contextr   r  r  )r   input_addressr  rz  stacks     r   r   z8EngineCoreProc.process_input_sockets.<locals>.<listcomp>D  s[        " ###]CJPU     r   NFr     r   s   READYT)copy)r9   r)   r   r  r  r  r   XSUBr  PollerregisterPOLLINr  rK  r  recv_multipartr*   rb  bufferr  r  rA  r   _handle_request_preproc_errorr  r   rW  rV  )r   r  r  rz  r}  add_request_decodergeneric_decoderinput_socketscoord_socketpollerinput_socket_
type_framedata_framesr  r@  r   r  r  s      `             @@r   rs  z$EngineCoreProc.process_input_sockets6  sT    -->??(**[[ B	IE3;== B	IC      &5  M #*#$22#+!)"      !!'*** Z\\F - : : !!#&&&cj9999'#((**h6666cj999OOI'-{{}} I IOL!/;/J/JPU/J/V/V,J#8z?P9Q9Q#R#RL $'<'@@@1D1K1KK1X1X%&*&A&A#&F&FGG( % % % >>sCCC$H% #2"8"8"E"E'+@+FFF
 !-88AAA $//w0GHHHH5IQB	I B	I B	I B	I B	I B	I B	I B	I B	I B	I B	I B	I B	I B	I B	I B	I B	I B	I B	I B	I B	I B	I B	I B	I B	I B	IsU   I,E2I;GIG3	0I2G3	3A!II	I,I	I,,I03I0output_pathscoord_output_pathc                 @   t                      }g }t          t          t          j        t
          t          f                              }t                      5 t          j                    5 fd|D             }|1	                    t          |t          j        dd                    nd}t          |          dz   }		 | j                                        }
|
t          j        k    r|D ]}|                    |
           nft%          |
t&                    rJ |
\  }}||_        |dk    r-|J |                    |                    |                     |rU|d         d	         j        rB|                    |                                d
                    |r|d         d	         j        B|r|                                nt                      }|                    ||          }||                             |dd          }|j        s0t          |          dk    r|nd}|                    |||f           n(t          |          |	k     r|                    |           	 ddd           n# 1 swxY w Y   ddd           dS # 1 swxY w Y   dS )zOutput socket IO thread.c           
      p    g | ]2}                     t          |t          j        d                     3S )  )r  )r  r   r  PUSH)r   output_pathr  r  s     r   r   z9EngineCoreProc.process_output_sockets.<locals>.<listcomp>  sQ          ###CchtLLL   r   NFr  )r  r  rP   TrI   r   rZ  )r  track)r:   r   rH  r  MessageTrackerr   	bytearrayr   r  r  r   r  rc   rc  r   rN  r  r  r   rb  rP  send_multipartr  r  appendr  encode_intor   )r   r  r  rP  encoderreuse_bufferspendingsocketsr  max_reuse_bufsr  socketclient_indexrg  r   bufferstrackerrefr  r  s                     @@r   rv  z%EngineCoreProc.process_output_sockets  si    !"")+ c0#y@ABDD [[ 1	1E3;== 1	1C     $0	  G %0 ###.uT       !\\A-N1*..00^<<<") , ,F++++%fe44444(.%g'3$2%% (333 //w0G0GHHH  ;'"+a."5 ;!((q)9:::  ;'"+a."5 ; 1>N**,,,9;;!--gv>>!,/>>%t ?   | 1%(\\A%5%5''4C&&f'=>>>>''.88!((000?1
 /1	1 1	1 1	1 1	1 1	1 1	1 1	1 1	1 1	1 1	1 1	1 1	1 1	1 1	1 1	1 1	1 1	1 1	1 1	1 1	1 1	1 1	1 1	1 1	1 1	1 1	1 1	1 1	1 1	1 1	1 1	1 1	1 1	1s7   J+HI;/J;I?	?JI?	JJJc                     t                               d|j                   | j                            |j        t          | j        |j        ht          |j        g t          j
                  g          f           dS )zLog and return a request-scoped error response for exceptions raised
        from the add request preprocessing in the input socket processing thread.
        z*Unexpected error pre-processing request %s)r   new_token_idsfinish_reason)rP  finished_requestsrg  N)rV   r  r   rc  rW  r  r(   rP  r'   r+   ERROR)r   r   s     r   r  z,EngineCoreProc._handle_request_preproc_error  s     	8':L	
 	
 	
 	$$$!!%!2'.'9&:('.'9*,*6*<  
 
 
	
 	
 	
 	
 	
r   r   )*rC  rD  rE  rF  r  r   rG  r   r   r3   rI  r   r   rb  r   r1   rf  r  r  r   r  staticmethodSocketr  r  rn  r  r  r  r*   r   r  r  r  r   rp  rq  rs  rv  r)   r  __classcell__r  s   @r   rN  rN  |  s       CC* 04]P ]P ]P ]P]P ]P 	]P
 X]P ]P #&*]P ]P ]P ]P ]P ]P ]P~ 8$8$ 8$ 	8$
  8$ #&*8$ 
%tT1	28$ 8$ 8$ ^8$t  <@,E ,E[,E ,E 	,E
 ,E ,E  ,E $2D#8,E 
%tT1	2,E ,E ,E ^,E\ 
 26	#& #&*#&#& #& ($.	#&
 
#& #& #& \#&J ./a H' H' H' H' H' H' H' \H'Tz    ( ( (. . .6d    (1<?	   @ 
 
 \
   OIcOI !4ZOI 	OI
 _OI OI OI OIbD13iD1 :D1 	D1 D1 D1 D1L
5F 
4 
 
 
 
 
 
 
 
r   rN  c                        e Zd ZdZ	 ddedededee         dededz  f fd	Z	defd
Z
 fdZddedef fdZdededdf fdZd Zd ZdedefdZdeddfdZ xZS )r  zXZMQ-wrapper for running EngineCore in background process
    in a data parallel context.NrB   rQ  rR  rC   rD   rS  c           	          |j         j        s
J d            d| _        d| _        d| _        |j        j        }t                                          |||||||           d S )Nz3DPEngineCoreProc should only be used for MoE modelsr   r   r   rO  )	r{   r  step_counterr?  last_countsrT   r  ro  r   )	r   rB   rQ  rR  rC   rD   rS  r  r  s	           r   r   zDPEngineCoreProc.__init__  s     '. 	
 	
A	
 	
. ! -@$  	 	
 	
 	
 	
 	
r   c                     |j         j        }|j         j        }|j         j        }|dk    sJ |J d|cxk    r|cxk    r|k     sn J || _        |j                                         | _        d S )NrP   r   )rT   r  r  rU   r  stateless_init_dp_groupr   )r   rB   r  dp_sizer  s        r   rn  z$DPEngineCoreProc._init_data_parallel	  s    -@-@#3L{{{{(((M6666W6666w666666#3KKMMr   c                     t                                                       t          | dd           x}rt          |           d S d S )Nr   )ro  r  r   r   )r   r   r  s     r   r  zDPEngineCoreProc.shutdown  sS    tZ6668 	H=hGGGGG	H 	Hr   r   r   r   c                    | j         rT|| j        k    rI|| j        k    r|| _        n6| j        s/| j                            dt          | j                  f           t                                          ||           d S )NrI   )
start_wave)rj  r?  re  rc  rW  r(   ro  r   )r   r   r   r  s      r   r   zDPEngineCoreProc.add_request  s     	LD4E$E$Ed///$0!!)  !,,*d6GHHHI   	G\22222r   r  r   c                    |t           j        k    rS|\  }}|| j        k    r=|| j        k    r4|| _        | j        s(t
                              d|           d| _        d S d S d S d S t                                          ||           d S )Nz*EngineCore starting idle loop for wave %d.T)	r*   START_DP_WAVErP  r?  re  rV   rx   ro  r  )r   r  r   new_waveexclude_eng_indexr  s        r   r  z'DPEngineCoreProc._handle_client_request(  s     0>>>*1'H' D$555D---$,!+ 0LL!MxXXX+/D((( 65--0 0 GG**<AAAAAr   c                     | j         sd S | j                                        }|| j        k    rJ|| _        t	          || j        | j        d}| j                            dt          |          f           d S d S )N)r.  r?  rI   )scheduler_stats)
rm  ri   get_request_countsr/  r5   r.  r?  rc  rW  r(   )r   countsstatss      r   _maybe_publish_request_countsz.DPEngineCoreProc._maybe_publish_request_counts7  s    ' 	F 2244T%%%%D"d&7dFW  E ((".?PU.V.V.V)WXXXXX &%r   c                 :   	 |                                   |                                 }|                                  | j                                        }|s|s| j        sb|                                  |                     |          | _        | j        s| j        dk    s| j	        sZt                              d| j                   | j	        rdnd}| j                            |t          | j                  f           | xj        dz  c_        d| _        )z8Core busy loop of the EngineCore for data parallel case.Tr   z&Wave %d finished, pausing engine loop.rI   )wave_completerP   )r  r  r?  ri   r  re  r$  _has_global_unfinished_reqsr  rj  rV   rx   r?  rc  rW  r(   r.  )r   executedlocal_unfinished_reqsr  s       r   r  zDPEngineCoreProc.run_busy_loopD  sK   )	&%%''' 0022H..000$(N$J$J$L$L! +, T5I  ((*** $(#C#C%$ $D  ' &<1$$D,@$LL@$BS   *.)=#D221L%00(-D<MNNN   !!Q&!!$%!S)	&r   local_unfinishedc                 v    | xj         dz  c_         | j         dz  dk    rdS t          j        | j        |          S )NrP       r   T)r.  r   has_unfinished_dpr   )r   rE  s     r   rB  z,DPEngineCoreProc._has_global_unfinished_reqss  sD    Qr!Q&&4/?OPPPr   reconfig_requestc                 D   t          | j                   |                                  | j        j        }|j        }|j        |_        |j        dk    r|j        |_        |j	        t          j        k    sJ |j        |_        |j        |_        |j        dk    r%|j        | _        |                                | _        |j        |_        | j                            |           |j        |k    rF| j        dk    sJ t+          j        | j        | j                   | j                            d           |j        t          j        k    r6|                                  t2                              d| j                   d S t2                              d| j                   d S )NrI   r   compile_or_warm_up_modelzDPEngineCoreProc %s shutdownz4Distributed environment reinitialized for DP rank %s)r   r   r  rB   rT   r  new_data_parallel_sizenew_data_parallel_rankr  new_data_parallel_rank_localr-   KEEP_CURRENT_RANKnew_data_parallel_master_ipdata_parallel_master_ipnew_data_parallel_master_portdata_parallel_master_portr  r1  rY   reinitialize_distributedr[   r   r   r`   SHUTDOWN_CURRENT_RANKrV   rW   )r   rI  rT   old_dp_sizes       r   rU  z)DPEngineCoreProc.reinitialize_distributed{  s    	:$-HHH*:%8-=-T*2b881A1XO. 9"45 5 5 5 8 	/ : 	1 2b88*=DL+CCEEDM5 	6 	445EFFF2[@@9A==== 4tE  
 ../IJJJ3"89 9 MMOOOKK6EEEEEKKF    r   r   rB  )rC  rD  rE  rF  r   rG  r   r   r3   r   rn  r  r7   rI  r   r*   r   r  r?  r  rB  r,   rU  r)  r*  s   @r   r  r    s       # # 04
 

 
 	

 X
 
 #&*
 
 
 
 
 
>Nz N N N NH H H H H
3 37 3# 3 3 3 3 3 3B1B<?B	B B B B B BY Y Y-& -& -&^QD QT Q Q Q Q2 =2	2 2 2 2 2 2 2 2r   r  c                       e Zd ZdZ	 	 ddedededefdZdedefdZdeded	e	fd
Z
ede	dededede	dz  f
d            Zd Zd ZdS )EngineCoreActorMixinzE
    Ray actor for running EngineCore in a data parallel context
    r   rB   r{  r  r  c                 p    || _         ||j        _        ||j        _        |                     ||           d S r   )r{  rT   r  rU   _set_visible_devices)r   rB   r{  r  r  s        r   r   zEngineCoreActorMixin.__init__  s?     #:A#7?L#<& 	!!+}=====r   c                 z    ddl m} |                                rd S |j        }|                     |||           d S )Nr   )current_platform)vllm.platformsr]  is_xpudevice_control_env_var_set_cuda_visible_devices)r   rB   r  r]  r`  s        r   r[  z)EngineCoreActorMixin._set_visible_devices  s`    333333""$$ 	D%5%L"**],B    r   r`  c                     |j         j        }	 t          |||          }|t          j        |<   d S # t
          $ r=}t          d| d||z   d|dz   |z   dt          j        |           d	          |d }~ww xY w)NzError setting z: local range: [z, rP   z) base value: "")rT   
world_sizer2   r   r   
IndexErrorr   getenv)r   rB   r  r`  rd  r  r  s          r   ra  z.EngineCoreActorMixin._set_cuda_visible_devices  s     !0;
	&&z E 27BJ-... 	 	 	E!7 E E!.!;E E!A%3E E !#	*@ A AE E E 
 	s    0 
A78A22A7rR  rz  rQ  rS  Nc              #      K   | j         V  dS )z
        For Ray, we don't need to actually perform handshake.
        All addresses information is known before the actor creation.
        Therefore, we simply yield these addresses.
        N)r{  )r   rR  rz  rQ  rB   rS  s         r   rf  z(EngineCoreActorMixin._perform_handshakes  s       nr   c                     dS )a   
        Wait until the engine core is initialized.

        This is just an empty method. When ray.get() on this method
        (or any other method of the actor) returns, it is guaranteed
        that actor creation (i.e., __init__) is complete.
        Nr   r   s    r   wait_for_initz"EngineCoreActorMixin.wait_for_init   s	     	r   c                 (   	 |                                   nM# t          $ r t                              d            t          $ r t                              d            w xY w	 |                                  dS # |                                  w xY w)z0
        Run the engine core busy loop.
        r  r  N)r  r  rV   rx   r   r  r  r   s    r   runzEngineCoreActorMixin.run
  s    		     	 	 	LL./// 	 	 	DEEE		 ! MMOOOOODMMOOOOs    A; A
A!!A; ;Br-  )rC  rD  rE  rF  r   r1   rI  r   r[  r   ra  r   rb  rG  rf  ri  rk  r   r   r   rY  rY    s%         > >> &> 	>
 > > > ><	
 	3 	 	 	 	%69SV   $   	
   #&*   ^      r   rY  c                   H    e Zd ZdZ	 	 ddedededee         dede	d	e	fd
Z
dS )DPMoEEngineCoreActorz'Used for MoE model data parallel cases.r   rB   rQ  r{  rC   rD   r  r  c                     ||j         _        t                              | ||||           t                              | ||d||           d S )Nr   )rT   r  rY  r   r  r   rB   rQ  r{  rC   rD   r  r  s           r   r   zDPMoEEngineCoreActor.__init__  sd     :A#6%%+y'=	
 	
 	
 	!!+|R	
 	
 	
 	
 	
r   Nr-  rC  rD  rE  rF  r   rG  r1   r   r3   rI  r   r   r   r   rm  rm    s        11 
 

 
 &	

 X
 
 
 
 
 
 
 
 
r   rm  c                   H    e Zd ZdZ	 	 ddedededee         dede	d	e	fd
Z
dS )EngineCoreActorz%Used for non-MoE and/or non-DP cases.r   rB   rQ  r{  rC   rD   r  r  c           	          d|j         _        d|j         _        d|j         _        t                              | ||||           t                              | ||d|||           d S )NrP   r   r   rO  )rT   r  r  r  rY  r   rN  ro  s           r   r   zEngineCoreActor.__init__4  s     :;#6?@#<9:#6%%+y'=	
 	
 	
 	  	  	
 	
 	
 	
 	
r   Nr-  rp  r   r   r   rr  rr  1  s        // 
 

 
 &	

 X
 
 
 
 
 
 
 
 
r   rr  )pr   r   r  rp  r   collectionsr   collections.abcr   r   concurrent.futuresr   
contextlibr   r   inspectr	   r
   loggingr   typingr   r   r   r  r  vllm.configr   r   vllm.distributedr   	vllm.envsr   vllm.loggerr   vllm.logging_utils.dump_inputr   vllm.lora.requestr   vllm.multimodalr   
vllm.tasksr   r   vllm.transformers_utils.configr   vllm.utils.gc_utilsr   r   vllm.utils.hashingr   vllm.utils.network_utilsr   vllm.utils.system_utilsr   r   vllm.v1.core.kv_cache_utilsr    r!   r"   r#   r$   vllm.v1.core.sched.interfacer%   vllm.v1.core.sched.outputr&   vllm.v1.enginer'   r(   r)   r*   r+   r,   r-   r.   r/   vllm.v1.engine.utilsr0   r1   r2   vllm.v1.executorr3   vllm.v1.kv_cache_interfacer4   vllm.v1.metrics.statsr5   vllm.v1.outputsr6   vllm.v1.requestr7   r8   vllm.v1.serial_utilsr9   r:   vllm.v1.structured_outputr;   vllm.v1.utilsr<   vllm.versionr=   rX   rC  rV   POLLING_TIMEOUT_Sr  r?   rA   rN  r  rY  rm  rr  r   r   r   <module>r     s
   
			              / / / / / / / / % % % % % % 0 0 0 0 0 0 0 0 & & & & & & & &       % % % % % % % % % %  



 2 2 2 2 2 2 2 2 N N N N N N ' ' ' ' ' ' # # # # # # ? ? ? ? ? ? ) ) ) ) ) ) / / / / / / 3 3 3 3 3 3 3 3 S S S S S S        3 2 2 2 2 2 4 4 4 4 4 4 D D D D D D D D              < ; ; ; ; ; 5 5 5 5 5 5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
         
 & % % % % % 4 4 4 4 4 4 0 0 0 0 0 0 - - - - - - 2 2 2 2 2 2 2 2 ? ? ? ? ? ? ? ? = = = = = = 3 3 3 3 3 3 4 4 4 4 4 4	X		  WT]]k) k) k) k) k) k) k) k)\g	
 g	
 g	
 g	
 g	
Z g	
 g	
 g	
TG G G G G~ G G GTg g g g g g g gT
 
 
 
 
/1A 
 
 
.
 
 
 
 
*N 
 
 
 
 
r   