
    .`i8                     <   d dl Z d dlZd dlZd dlmZmZmZ d dlmZ d dl	m
Z
mZ d dlmZmZ d dlmZ d dlmZmZmZ d dlmZ d dlZd dlZd d	lmZ d d
lmZmZmZ d dlm Z  d dl!m"Z" d dl#m$Z$ d dl%m&Z&m'Z' d dl(m)Z) d dl*m+Z+ d dl,m-Z- d dl.m/Z/m0Z0 erd dl1m2Z2  e e3          Z4dZ5 G d de
          Z6 G d d          Z7e G d d                      Z8e G d d                      Z9 G d d          Z:e j;        ded e<d!ed         fd"            Z=	 d9d#e>d e<d$e<d%e<dz  fd&Z?d'ed!e>dz  fd(Z@ G d) d*          ZAe j;        	 d:ded,eBe-         d-eCd.e<d!eeDe:eAz  dz  e+dz  e8f                  f
d/            ZEd0ejF        d1e8d2eGe7         d3ed4eCd5ed6e:dz  d7edz  fd8ZHdS );    N)CallableIteratorMapping)	dataclass)Enumauto)Process
connection)BaseProcess)TYPE_CHECKINGAnycast)patch)envs)CacheConfigParallelConfig
VllmConfig)init_logger)current_platform)get_env_vars_to_copy)get_open_zmq_ipc_pathzmq_socket_ctx)get_mp_context)DPCoordinator)Executor)get_engine_client_zmq_addrshutdown)PlacementGroupi'  c                   J    e Zd Z e            Z e            Z e            ZdS )CoreEngineStateN)__name__
__module____qualname__r   NEW	CONNECTEDREADY     h/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/v1/engine/utils.pyr    r    %   s/        
$&&CIDFFEEEr(   r    c                   $    e Zd ZdZddedefdZdS )	
CoreEnginezCOne per data parallel rank, used to track state during handshaking.r   Tindexlocalc                 l    || _         |                    dd          | _        t          j        | _        d S )N   little)r-   to_bytesidentityr    r$   state)selfr,   r-   s      r)   __init__zCoreEngine.__init__.   s,    
q(33$(


r(   N)r   T)r!   r"   r#   __doc__intboolr5   r'   r(   r)   r+   r+   +   sA        MM) )c )d ) ) ) ) ) )r(   r+   c                   x    e Zd ZU ee         ed<   ee         ed<   dZedz  ed<   dZedz  ed<   dZedz  ed<   dS )EngineZmqAddressesinputsoutputsNcoordinator_inputcoordinator_outputfrontend_stats_publish_address)	r!   r"   r#   liststr__annotations__r=   r>   r?   r'   r(   r)   r:   r:   5   su          I#Y$(sTz(((%)d
))) 26"C$J55555r(   r:   c                   P    e Zd ZU dZeed<   eeeez  e	e         z  f         ed<   dS )EngineHandshakeMetadatazMetadata sent to each engine process during startup handshake,
    including addresses of the front-end ZMQ queues that they should
    connect to.
    	addressesparallel_configN)
r!   r"   r#   r6   r:   rB   dictrA   r7   r@   r'   r(   r)   rD   rD   E   sM          
 "!!!#sSy4944555555r(   rD   c                       e Zd ZdZ	 ddedededededed	ed
e	e
         dededz  fdZd Zd ZdefdZdeeef         fdZdS )CoreEngineProcManagerz
    Utility class to handle creation, readiness, and shutdown
    of background processes used by the AsyncLLM and LLMEngine.
    N	target_fnlocal_engine_countstart_indexlocal_start_indexvllm_configlocal_clienthandshake_addressexecutor_class	log_statsclient_handshake_addressc                 Z   t                      }|||||	d}|
r|
|d<   g | _        g }t          |          D ]Z}||z   }||z   }|                    |           | j                            |                    |d| |||dz                       [t          j        | t          | j                  | _        |j	        j
        dk    }	 t          | j        |          D ]u\  }}|r/t          j                    r|j	        j        rt          ||          nt!          j                    5  |                                 d d d            n# 1 swxY w Y   v	 |                                 r|                                  d S d S # |                                 r|                                  w w xY w)N)rN   rO   rP   rQ   rR   rS   EngineCore_DP)dp_ranklocal_dp_rank)targetnamekwargs   )r   	processesrangeappendr	   weakreffinalizer   
_finalizerrF   data_parallel_sizezipr   is_cuda_alikeuse_rayset_device_control_env_var
contextlibnullcontextstartfinished_procsclose)r4   rJ   rK   rL   rM   rN   rO   rP   rQ   rR   rS   contextcommon_kwargslocal_dp_ranksr,   local_indexglobal_indexdata_parallelprocrW   s                       r)   r5   zCoreEngineProcManager.__init__V   sL    !""&(!2,"
 
 $ 	Q8PM45,.-.. 	 	E+e3K&.L !!+...N!!$777(#/)4     
 
 
 
 "*44>JJ#3FJ	'*4>>'J'J ! !#m &2 !1 > @ @	2
  +:B2.{MJJJ $/11! ! JJLLL! ! ! ! ! ! ! ! ! ! ! ! ! ! !	!$ ""$$ 

 t""$$ 

s1   AE? %E:E? E
	
E? E
	E? ?+F*c                 .    |                                   dS )zShutdown all procs.N)ra   r4   s    r)   rk   zCoreEngineProcManager.close   s    r(   c                 L    t          j        d | j        D                        dS )zWait for any process to exit.c              3   $   K   | ]}|j         V  d S Nsentinel.0rr   s     r)   	<genexpr>z3CoreEngineProcManager.join_first.<locals>.<genexpr>   s$      AA$AAAAAAr(   N)r
   waitr\   rt   s    r)   
join_firstz CoreEngineProcManager.join_first   s)    AA$.AAAAAAAAr(   returnc                 $    d | j         D             S )Nc                     g | ]	}|j         
S r'   rx   rz   s     r)   
<listcomp>z3CoreEngineProcManager.sentinels.<locals>.<listcomp>   s    999$999r(   r\   rt   s    r)   	sentinelszCoreEngineProcManager.sentinels   s    99$.9999r(   c                 $    d | j         D             S )z>Returns dict of proc name -> exit code for any finished procs.c                 6    i | ]}|j         	|j        |j         S rw   )exitcoderY   rz   s     r)   
<dictcomp>z8CoreEngineProcManager.finished_procs.<locals>.<dictcomp>   s0     
 
 
}( It}(((r(   r   rt   s    r)   rj   z$CoreEngineProcManager.finished_procs   s%    
 

 
 
 	
r(   rw   )r!   r"   r#   r6   r   r7   r   r8   rA   typer   r5   rk   r~   r@   r   rG   rj   r'   r(   r)   rI   rI   P   s          04D DD  D 	D
 D  D D D XD D #&*D D D DL  B B B:4 : : : :
S#X 
 
 
 
 
 
r(   rI   rN   rW   r   c              #      K   | j         j        }| j         j        }t          j        }t          ||||          }t          j        t          j	        ||ff          5  dV  ddd           dS # 1 swxY w Y   dS )zW
    Temporarily set CUDA_VISIBLE_DEVICES or equivalent
    for engine subprocess.
    )valuesN)
rF   
world_sizelocal_world_sizer   device_control_env_varget_device_indicesr   rG   osenviron)rN   rW   r   r   evarvalues         r)   rf   rf      s       ,7J"2C2Dt]J@PQQE	BJu'7	8	8	8                   s   A..A25A2r   r   r   c                    ||}	 d                     d t          ||z  ||z  |z             D                       }nJ# t          $ r=}t          d|  d||z   d|dz   |z   dt	          j        |            d		          |d}~ww xY w|S )
z
    Returns a comma-separated string of device indices for the specified
    data parallel rank.

    For example, if world_size=2 and local_dp_rank=1, and there are 4 devices,
    this will select devices 2 and 3 for local_dp_rank=1.
    N,c              3   X   K   | ]%}t          t          j        |                    V  &d S rw   )rA   r   device_id_to_physical_device_idr{   is     r)   r|   z%get_device_indices.<locals>.<genexpr>   sJ       
 
  @CCDD
 
 
 
 
 
r(   zError setting z: local range: [z, r[   z) base value: "")joinr]   
IndexError	Exceptionr   getenv)r   rW   r   r   r   es         r)   r   r      s     % 
 

*
*-== 
 
 
 
 
    53 5 5*Z75 5!Z/5 5 	011	5 5 5
 
 	 Ls   6= 
B8A??Bpromptc                     t          | t                    r| S t          | t                    r+t          t          d z  |                     d                    S d S )Nr   )
isinstancerA   r   r   get)r   s    r)   get_prompt_textr      sP    &# &'"" 6C$J

8 4 45554r(   c                   0   e Zd ZdZ	 	 ddededee         dede	d         dz  d	e	e
         dz  fd
Zededee	d         e	e
         f         fd            Zedede
dee	d         e	e
         f         fd            Zdede
ddfdZde
de
ddfdZd Zd ZdS )CoreEngineActorManagerz
    Utility class to handle creation, readiness, and shutdown
    of core engine Ray actors used by the AsyncLLM and LLMEngine.

    Different from CoreEngineProcManager, this class manages
    core engines for both local and remote nodes.
    NrN   rE   rQ   rR   placement_groupsr   rn   c           
      *   dd l }dd l}ddlm}	 ddlm}
 ddlm}m} |j	        j
        }|dk    r|j        j        r|n|}g | _        g | _        t          |j                  }d |D             | _         |	| j                  }|| _        || _        || _        |j	        j        }|j	        j        }|                                rt0                              d	           n|                                 |X|
J d
            t7          |          t7          |          k    s
J d            t0                              d           g | _        n$t:                              |          \  }}|| _        t7          |          |k    s
J d            g | _        g }tA          tC          |          ||          D ]|\  }}}|"                    |          }||j	        _#        ||k     }|dk    r"|j$        |j$        j%         d| |j$        _%        tM          j'                    rGtL          j(        }tS          |||          }| j                                         }|||<    |	|          }|*                    |          +                     |
||          |          *                    |||||||          }|r| j        ,                    |           n| j        ,                    |           | j        ,                    |           |,                    |j-        *                                           ~|.                    |           g | _/        | j        | j        z   D ]3}| j/        ,                    |j0        *                                           4d S )Nr   
RuntimeEnv PlacementGroupSchedulingStrategyDPMoEEngineCoreActorEngineCoreActorr[   )destinationc                 P    i | ]#}|t           j        v |t           j        |         $S r'   )r   r   )r{   rY   s     r)   r   z3CoreEngineActorManager.__init__.<locals>.<dictcomp>  s4     
 
 
'+
@R@RD"*T"@R@R@Rr(   env_varsz8Ray is already initialized. Skipping Ray initialization.z?local_dp_ranks must be provided if placement_groups is providedz=placement_groups and local_dp_ranks must have the same lengthzUsing provided placement groupsz8Number of placement groups must match data parallel size_dpplacement_groupplacement_group_bundle_indexscheduling_strategyruntime_envrN   rQ   rR   rO   rE   rV   rW   )1copyrayray.runtime_envr   ray.util.scheduling_strategiesr   vllm.v1.engine.corer   r   rF   rb   model_configis_moelocal_engine_actorsremote_engine_actorsr   r!   env_vars_dictrE   rQ   rR   data_parallel_size_localr   is_initializedloggerinfoinitlencreated_placement_groupsr   create_dp_placement_groupsplacement_group_is_localrc   r]   deepcopyr   kv_transfer_config	engine_idr   is_xpur   r   remoteoptionsr^   wait_for_initr   run_refsrun)r4   rN   rE   rQ   rR   r   rn   r   r   r   r   r   r   dp_sizeactor_classenv_vars_listr   rK   r   refsr,   ro   pgdp_vllm_configrO   device_evardevice_indicesactor_env_varsactors                                r)   r5   zCoreEngineActorManager.__init__   sG    	


......SSSSSSMMMMMMMM-@ {{{7>{ !   	 ;= ;=!,9MNNN
 
/<
 
 
 !j$*<===","(8Q 0;
 	KKRSSSSHHJJJ'!--Q .-- '((C,?,????O @?? KK9:::,.D)) 'AA+NN -n -=D)#$$///F 0// )+%&)'NNN,<'
 '
 3	6 3	6"E; "]];77N=?N*: #55L{{~@L &8BTT{TT 1;  &(( B.E!3j" " "&!3!8!8!:!:.<{+(j.AAA 

;''(H(H(*5?) ) ) !,     .#1'!-'!"-    &  8(//6666)00777)00>>>KK+22445555-0II 	5 	5EM  !1!1!3!34444	5 	5r(   r   c           
         ddl }ddlm} t                              d           | j        j        }| j        j        }| j        j        } |            }| j        j	        }g }g }	d| t          |                                fd          }
t          |
          dk    s
J d            |
d         v sJ d	| d
            t          j        fd|
D             }|sJ d d            t          |          }t           j        }d}||vrt%          t           j         d|           | j        j        }|dk    r|dk    s|dk    rt%          d          |dv rd}nd}||k    sJ d| d| d            t)          |          |hk    sJ d|
             ||z  dk    sJ d| d| d            t          |          |z  ||z  k    s#J dt          |           d| d | d!|             |d"k    sJ d#| d$            g }|
D ]}d% |D             }t          |          d"k    sJ d&|             |d         }|                    d'          d"         }t-          |                    d                    }|d(k    r	|dk    rd"}n||z  }||k    r"||k     rt%          d)| d*| d+| d,          |}n/|d-k    r'||k     rt                              d.|||           |}n|}t1          |          D ]}d/d|z   d0ig}|d(k    rW|||z  z  }t          |          |k    sJ d1t          |          d2|            t          |          |k     r^|d3d/igz   }g }n||z  d3d/igz   }|j                            d4t          |           ||5          }|                    |           |	                    |           t          |          |k    r nt          |          |k     r%t%          d)| d6t          |           d7|           t          |          |k    sJ d8t          |           d9|             t          |	          |k    sJ d:t          |	           d;|             ||	fS )<z<
        Create placement groups for data parallel.
        r   N)available_resources_per_nodez+Creating placement groups for data parallelnode:c                     | vS rw   r'   )xdp_master_ip_keys    r)   <lambda>zCCoreEngineActorManager.create_dp_placement_groups.<locals>.<lambda>  s    8HPQ8Q r(   keyz-No nodes with resources found in Ray cluster.zThe DP master node (ip: z) is missing or deadc                 B    g | ]}|v t          |                   S r'   )r7   )r{   node_resources
device_strs     r)   r   zECoreEngineActorManager.create_dp_placement_groups.<locals>.<listcomp>  s;     %
 %
 %
^++ z*+++++r(   zNo z found in Ray cluster.)strictfillspanzJ is not supported. Make sure to set `VLLM_RAY_DP_PACK_STRATEGY` to one of r   deepep_high_throughputdeepep_low_latencyzDeepEP kernels require EP ranks [0,7] (same for [8,15], ...) to be on the same node, but VLLM_RAY_DP_PACK_STRATEGY=fill does not guarantee that. Please use VLLM_RAY_DP_PACK_STRATEGY=strict instead.)r   r   STRICT_PACKPACKzWorld size z8 is smaller than the maximum number of devices per node zD. Make sure to set `VLLM_RAY_DP_PACK_STRATEGY` to `strict` or `fill`zNodes are not homogenous, z1For multi-node data parallel groups, world_size (z4) must be a multiple of number of devices per node (z).z"Not enough total available nodes (z) and devices per node (z!) to satisfy required world size z and data parallel size r[   zdata-parallel-size-local z should be set as the default (1) for VLLM_RAY_DP_PACK_STRATEGY=span. The actual data-parallel-size-local will be auto determined.c                 H    g | ]}|d k    |                     d          | S )znode:__internal_head__r   )
startswith)r{   r   s     r)   r   zECoreEngineActorManager.create_dp_placement_groups.<locals>.<listcomp>  s?       222s~~g7N7N2 222r(   z7Zero or multiple node IP keys found in node resources: :r   z!Not enough resources to allocate z DP ranks on DP master node z, possible to fit z
 DP ranks.r   zJSkipping node %s as %s DP ranks could not fit, possible to fit %s DP ranks      ?MbP?zJcollected_bundles should be <= world_size, but got len(collected_bundles)=z and world_size=CPUdp_rank_rY   strategybundlesz  placement groups, only created z( placement groups. Available resources: zCreated z DP placement groups, expected zlocal_dp_ranks length z does not match expected )r   ray._private.stater   r   r   rF   data_parallel_master_iprb   r   r   sortedr   r   r   ray_device_keymaxr   VLLM_RAY_DP_PACK_STRATEGY
ValueErrorall2all_backendsetsplitr7   r   r]   utilr   r^   )rN   r   r   dp_master_ipr   dp_size_localavailable_resourcesr   r   rn   nodesn_node_devicesmax_device_per_nodepack_strategy_supported_pack_strategiesr  placement_strategycollected_bundlesr   node_ip_keysnode_ip_keynode_ipn_device_on_nodedp_size_availabledp_size_to_allocater   device_bundler   r   r   r   s                                @@r)   r   z1CoreEngineActorManager.create_dp_placement_groupso  sL    	


CCCCCCABBB"2J-@#3L::<< 0;
13$&1<11&&((.Q.Q.Q.Q
 
 
 5zzA~~~N~~~58+++I|III ,++ &4
%
 %
 %
 %
"'%
 %
 %

 GGGZGGGGG~!.116%?" :::1 : :7: :   &5EF""777"666G   ...!.!' 3333Dj D D&D D D 433 ~&&+>*????4U44 @??  33q888XJ X X@SX X X 988 ~&&)<<
W@TTTTS5H5H  )< 2<    UTT !A%%%OM O O O &%% # G	 G	N )  L
 |$$)))X,XX *)) 'q/K!'',,Q/G">#5#5j!#D#DEE&&+;q+@+@ %&!!$4
$B!,&&$}44$9M 9 9-99 9,9 9 9  
 '4##(**$}44KK6%)   &3## '8#.//  ",c7W3De!L M F**%9I)II%011Z???O#&'8#9#9O OAKO O @?? ,--
:: /E3<.@G(*%%+j8UCL>IGX--;C(8$9$9;;/# .  
 !''+++%%a((('((G33E 4   7**)G ) )'(() ) '	) )   #$$///Vs+,,VVWVV 0// >""g---"S%8%8 " "" " .--  //r(   old_vllm_confignew_data_parallel_sizec                    ddl }ddlm}m} ddlm} | j        j        }||z
  }|dk    rg g fS | j        j        | j        j	        } |            }	t          |	fd          }	|	d         j        k    s
J d            t          |	          dk    s|	d         j        k    s
J d	             |            }
 |            }g }g }d}t          j        }|	D ]}||k    r n|j        }|j        }t!          |
|         |                   }t!          ||         |                   }t#          d||z
            }||z  }||z  }t%          |          D ]}||k    r n||z   }|k    r|d
dz   dig|z  dd
igz   }n|d
ig|z  dd
igz   }|j                            d| d|          }|                    |           ||z   }|                    |           |dz  }	||fS )zB
        Add placement groups for new data parallel size.
        r   N)r   total_resources_per_node)
list_nodesc                     | j         k    S rw   )r  )noder
  s    r)   r   z@CoreEngineActorManager.add_dp_placement_groups.<locals>.<lambda>?  s    t||/K r(   r   z$The first node must be the head noder[   zThere can only be one head noder   r   r   r   r   r   r   )r   r   r   r  ray.util.stater  rF   rb   r   r   r  r  r   r   r  node_idr7   r  r]   r	  r   r^   )r  r  r   r   r  r  old_dp_sizenum_pg_to_creater   r  r  total_resourcesr   rn   num_pg_createdr   r!  r  r#  available_gpus
total_gpus	used_gpusused_engines_on_nodeavailable_engine_countr   rankr   r   
local_rankr
  s                                @r)   add_dp_placement_groupsz.CoreEngineActorManager.add_dp_placement_groups'  s    	


	
 	
 	
 	
 	
 	
 	
 	
 	.-----%5H1K?q  r6M&6N$4?

u"K"K"K"KLLLQx<///1W///5zzQ%("2l"B"B"B- #C"BB ;:<<2244%4
 -	$ -	$D!111lGlG !4W!=j!IJJN _W5jABBJ AzN:;;I#,
#:  &4z%A" 122 $ $!%555E"^3 l**#S'L*@%H"#',cl^4GG !+C01J>5#,OGX--*D***# .  
 !''+++ 2A5
%%j111!#//r(   cur_vllm_configc           
         dd l }dd l}ddlm} ddlm} ddlm}m} |j	        j
        r|n|}	t          | j                  t          | j                  z   }
||
k    sJ d| d|
 d            |                     ||          \  }}|j        j        }|j        j        d} || j        dd	iz  
          }t'          t)          ||                    D ]D\  }\  }}|
|z   }|                    |          }||j        _        ||j        _        t1          fd|j        D                       }|r|dz  }|j        j        |z   |j        _        |                    |	                               |||          |                              || j        | j        || j        ||          }|r| j                             |           n| j                             |           | j!                             |           | j"                             |           F|#                    d |dk    r| j        | d          ng | j        t          |          |z
   d          z   D                        |dk    r| j        | d          ng | j        t          |          |z
   d          z   }|D ]3}| j$                             |j%                                                   4||j        _        |dk    r|j        xj        |z  c_        d S d S )Nr   r   r   r   zNew data parallel size z1 must be greater than current data parallel size z for scale upVLLM_ELASTIC_EP_SCALE_UP_LAUNCH1r   c              3   R   K   | ]!}|                     d z   d          dk    V  "dS )r   r   N)r   )r{   bundler
  s     r)   r|   z=CoreEngineActorManager.scale_up_elastic_ep.<locals>.<genexpr>  sL        >D

7\11559     r(   r[   r   r   r   c                 @    g | ]}|j                                         S r'   )r   r   )r{   r   s     r)   r   z>CoreEngineActorManager.scale_up_elastic_ep.<locals>.<listcomp>  s7     
 
 
 #**,,
 
 
r(   )&r   r   r   r   r   r   r   r   r   r   r   r   r   r   r/  rF   r   r   r   	enumeraterc   r   rb   r   anybundle_specsr   r   r   rQ   rR   rE   r^   r   r   r   r   r   )r4   r0  r  r   r   r   r   r   r   r   cur_data_parallel_sizer   rn   r   new_local_enginesr   r   r   r.  r-  r   rO   r   actorsr
  s                           @r)   scale_up_elastic_epz*CoreEngineActorManager.scale_up_elastic_ep~  sI    	


......SSSSSSMMMMMMMM +2!    	 "%T%=!>!>%B
 B
 "
 &(>>>>&<  /E   ?>> ,0+G+G3,
 ,
(. %4?
&6N j'+Lc*RR
 
 
 $-S1A>-R-R#S#S ,	? ,	?AJ)A-D!]]?;;N@VN*==?N*:     HJ    L  !Q&! $3L'( .G 

;''(H(H(*5?) ) ) !,     .#'#6"n!-"n ",    (  8(//6666)00777)00444)00>>>>
 
 )1,, ,.?-?-@-@AA+*++.??@BB
 
 
	
 	
 	
  !1$$ $&7%7%8%899%,<(=(=@Q(Q&R&T&TU	V  	5 	5EM  !1!1!3!34444=S': q  +DD!DDDD ! r(   r:  c                 h   dd l }||k    sJ d| d| d            t          ||z
            D ]}| j                                        }| j                                        }|r| j                                         n| j                                         |j                            |           d S )Nr   zcur_data_parallel_size z- must be greater than new_data_parallel_size z for scale down)	r   r]   r   popr   r   r   r	  remove_placement_group)r4   r:  r  r   _r   is_locals          r)   scale_down_elastic_epz,CoreEngineActorManager.scale_down_elastic_ep  s     	


%(>>>>&<  +A   ?>>
 -0FFGG 	0 	0A.2244B488::H 0(,,....)--///H++B////	0 	0r(   c                     | j         S rw   )r   rt   s    r)   get_run_refsz#CoreEngineActorManager.get_run_refs  s
    }r(   c                     dd l }| j        | j        z   D ]}|                    |           | j        D ]}|j                            |           d S )Nr   )r   r   r   killr   r	  r@  )r4   r   r   r   s       r)   rk   zCoreEngineActorManager.close  sk    


-0II 	 	EHHUOOOO/ 	0 	0BH++B////	0 	0r(   )NN)r!   r"   r#   r6   r   r:   r   r   r8   r@   r7   r5   staticmethodtupler   r/  r=  rC  rE  rk   r'   r(   r)   r   r      s         ;?+/y5 y5y5 &y5 X	y5
 y5 /047y5 S	D(y5 y5 y5 y5v u0u0	t$%tCy0	1u0 u0 u0 \u0n T0#T0=@T0	t$%tCy0	1T0 T0 T0 \T0lq)qCFq	q q q qf0&)0CF0	0 0 0 0&  0 0 0 0 0r(   r   r[   rQ   rR   num_api_serversc              #   <  K   | j         }|j        }|j        |j        }|j        }|j        |j        }|du}	|	p|p|k    t          fdt          |          D             fdt          |          D                       }
| j	        o|	 o|dk    }|r{t          || j        j                  }|                                \  |
_        |
_        |                                |
_        t$                              d|j        j                   nd}|j        dk    r6t$                              d	           t/          | |
||
          }|||
fV  dS |	rdk    sJ t1          |d          g}nK|dk    rfdt          |          D             }n)|s
J d            d t          ||z             D             }|	p|k    }t3          ||j                  }|r|dk    r|rJ t7                      }|}n|}d}t9          |t:          j        d          5 }ddlm } r!tC          |j"        | ||||d||pd
  
        }nd}|||
fV  tG          ||
|||dk    o| j        j        | j$        ||r|j        nd           ddd           dS # 1 swxY w Y   dS )z5Launch engine and DP coordinator processes as needed.Nc                 0    g | ]}t                    S r'   r   r{   rA  client_local_onlyhosts     r)   r   z'launch_core_engines.<locals>.<listcomp>4  s4     
 
 
 ''8$??
 
 
r(   c                 0    g | ]}t                    S r'   rM  rN  s     r)   r   z'launch_core_engines.<locals>.<listcomp>8  s4     
 
 
 ''8$??
 
 
r(   )r;   r<   r   )enable_wave_coordinationz(Started DP Coordinator process (PID: %d)r   z(Starting ray-based data parallel backend)rN   rE   rQ   rR   r[   Tr,   r-   c                 :    g | ]}t          ||k                S )rS  r+   )r{   r   rK   s     r)   r   z'launch_core_engines.<locals>.<listcomp>l  s9      
  
  
DEJQq+='=??? 
  
  
r(   zcAttempting to launch core_engines from dp_rank > 0, but found internal DPLB, which is incompatible.c                 0    g | ]}t          |d           S )TrS  rU  r   s     r)   r   z'launch_core_engines.<locals>.<listcomp>u  s5      
  
  
 Qd+++ 
  
  
r(   )bind)EngineCoreProc)	rN   rQ   rR   rP   rS   rO   rK   rL   rM   )%rF   rb   r   data_parallel_rank_localdata_parallel_rankr   local_engines_onlyr:   r]   needs_dp_coordinatorr   r   r   get_engine_socket_addressesr=   r>   get_stats_publish_addressr?   r   r   rr   piddata_parallel_backendr   r+   r   data_parallel_rpc_portr   r   zmqROUTERr   rX  rI   run_engine_corewait_for_engine_startupcache_config)rN   rQ   rR   rJ  rF   r   rM   rV   r[  offline_moderE   run_coordinatorcoordinatorengine_actor_managerengines_to_handshakehandshake_local_onlyrP   local_handshake_addressrS   handshake_socketrX  local_engine_managerrO  rP  rK   s                         @@@r)   launch_core_enginesrp    sU      "1O0G(A'@0G2D(;
 %D0L
 	M*M/AW/L 
 #
 
 
 
 
?++
 
 

 
 
 
 
?++
 
 
	 	 	I  	(N-=N'Q,   #%0%=%D
 
 
 3355 	B	#Y%A 1133 	0 	>@P@TUUUU,55>???5#)	 
  
  
 #K:::: 
!Q&&&& * E E EF	A
 
  
  
  
INw 
  
  

 " 	
 	
:	
 	
! 
  
7G.@$@AA 
  
  
 (H+=+H2dO$J   (gkk''''"7"9"9#:  "3#' 	$
 
 
 "
	666666  	(#8.'-#"3)A!#5#"3"8q$ $ $   $( "K:::: 	  aK;K4;$  +5K		
 		
 		
3"
 "
 "
 "
 "
 "
 "
 "
 "
 "
 "
 "
 "
 "
 "
 "
 "
 "
s   A'JJJrn  rE   core_enginesrF   coordinated_dprf  proc_managercoord_processc                 	   j         }t          |          |z
  }	||	gddg}}
t          j                    }|                    | t          j                   j         oj         }|7|                                D ]"}|                    |t          j                   #|%|                    |j	        t          j                   t          |
          st          |          r|                    t                    }|sCt          |
          rt          j        dg|
R   t          |          rt          j        dg|R   ~t          |          dk    s|d         d         | k    rB|r|                                ni }||j        |j        ||j        <   t%          d|           |                                 \  }t(                              d          }t-          fd|D             d           }|t%          d|           t.          j                            |          }|d	         |d
         |d         }}}||j        k    r't%          | d|rd
nd d| d|j        rd
nd           |s.||k    r(|rt%          d| d          t%          d| d          |dk    r|j        t8          j        k    rt.          j                            t?          ||rfddD             ni                     }|                      |fd           |
|rdndxx         dz  cc<   ||rdndxx         dz  cc<   t8          j!        |_        n|dk    r|j        t8          j!        k    r|j"        pd}||d         z  }||_"        |j#        |$                    d          |_#        |rH|$                    d          }%                                }||k    rt%          d| d| d| d           ||rdndxx         dz  cc<   t8          j&        |_        n%t%          d!| d"|rd
nd d| d#|j         d$	          t                              d%||rd
nd|           t          |
          t          |          d S d S )&Nr   z?Waiting for %d local, %d remote core engine proc(s) to connect.z=Waiting for %d local, %d remote core engine proc(s) to start.r[   zNEngine core initialization failed. See root cause above. Failed core proc(s): r0   c              3   2   K   | ]}|j         k    |V  d S rw   )r2   )r{   r   eng_identitys     r)   r|   z*wait_for_engine_startup.<locals>.<genexpr>  s/      MMQ!*2L2Lq2L2L2L2LMMr(   z8Message from engine with unexpected data parallel rank: statusr-   headlessz message from r   z engine z, expected it to be zRemote engine z9 must not use --headless in external or hybrid dp lb modez< must use --headless unless in external or hybrid dp lb modeHELLOc                 2    i | ]}|t          |          S r'   )getattr)r{   krF   s     r)   r   z+wait_for_engine_startup.<locals>.<dictcomp>  s5     % % % 7?A66% % %r(   )r   data_parallel_master_port_data_parallel_master_port_listrb   )rE   rF   F)r   r&   num_gpu_blocksdp_stats_addressparallel_config_hashz+Configuration mismatch detected for engine z. All DP workers must have identical configurations for parameters that affect collective communication (e.g., enable_eplb, eplb_config.log_balancedness). Worker hash: z, Expected hash: zM. Please ensure all workers are started with the same command-line arguments.zUnexpected z message for z in z state.z"%s from %s core engine process %s.)'r   r   rb  PollerregisterPOLLINdata_parallel_hybrid_lbdata_parallel_external_lbr   ry   r8  pollSTARTUP_POLL_PERIOD_MSr   debugrj   r   rY   RuntimeErrorrecv_multipartr7   
from_bytesnextmsgspecmsgpackdecoder-   r3   r    r$   encoderD   send_multipartr%   r  r?   r   compute_hashr&   )rn  rE   rq  rF   rr  rf  rs  rt  local_countremote_countconn_pendingstart_pendingpollerremote_should_be_headlessry   eventsfinishedready_msg_bytes	eng_indexenginemsgrx  r-   ry  init_messager  worker_config_hashexpected_hashrw  s      `                        @r)   re  re    s@    ":K|$${2L#."=1v-LZ\\F
OO$cj111 33 	:99 
 $..00 	2 	2HOOHcj1111 .
;;;
l

 }
s=11 }
344 	<   U!    =!! S"    v;;??fQil.>>>8DL|22444"H(]-C-O/</E+,3(03 3   )9(G(G(I(I%oNN<::	MMMM,MMMtTT>V9VV   o$$_55"%h-Ws:xFL   ; ;#177; ;#; ; %l877; ;    	%>>> "Y      #"Y " " "   W1D!D!D"?11'' &
% % % % %"% % % %    L  ++\<,Hu+UUUe*+++q0+++u+!!!,,,1,,,*4FLLw6<?3L#L#L *8=ANc"233N*8L' 7?;>77CU;V;V	8  %(WW-C%D%D" / < < > >%66&3$3 3
 );3 3 +83 3 3
 
 
 u+!!!,,,1,,,*0FLL8f 8 8#1778 88 8"(,8 8 8   	0*GG(		
 	
 	
q l

 }
s=11 }
 }
 }
 }
 }
r(   rw   )r[   )Irg   r   r_   collections.abcr   r   r   dataclassesr   enumr   r   multiprocessingr	   r
   multiprocessing.processr   typingr   r   r   unittest.mockr   r  rb  vllmr   vllm.configr   r   r   vllm.loggerr   vllm.platformsr   vllm.ray.ray_envr   vllm.utils.network_utilsr   r   vllm.utils.system_utilsr   vllm.v1.engine.coordinatorr   vllm.v1.executorr   vllm.v1.utilsr   r   ray.util.placement_groupr   r!   r   r  r    r+   r:   rD   rI   contextmanagerr7   rf   rA   r   r   r   r   r8   rI  rp  Socketr@   re  r'   r(   r)   <module>r     s       				  7 7 7 7 7 7 7 7 7 7 ! ! ! ! ! !         / / / / / / / / / / / / / / + + + + + + + + + +        



       ? ? ? ? ? ? ? ? ? ? # # # # # # + + + + + + 1 1 1 1 1 1 J J J J J J J J 2 2 2 2 2 2 4 4 4 4 4 4 % % % % % % > > > > > > > > 8777777	X		     d   ) ) ) ) ) ) ) ) 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6]
 ]
 ]
 ]
 ]
 ]
 ]
 ]
@ ,/d^   ( $(	   Dj	   DC C$J    b0 b0 b0 b0 b0 b0 b0 b0J 
 	]
 ]
]
N]
 ]
 	]

 	 66=	]
 ]
 ]
 ]
@Y
jY
!Y
 z"Y
 $	Y

 Y
 Y
 ($.Y
 T>Y
 Y
 Y
 Y
 Y
 Y
r(   