
    -`iw,                        d dl Z d dlZd dlZd dlZd dlmZ d dlmZ d dlm	Z	m
Z
mZ d dlmZmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZ d dlmZ d dlmZm Z  d dl!m"Z" d dl#m$Z$ d dl%m&Z& d dl'm(Z(m)Z)  ee*          Z+dZ, G d de          Z-de.e         fdZ/de j0        fdZ1de j0        fdZ2	 d	 ddZ3dS )    N)CLISubcommand)
run_serverrun_server_workersetup_server)make_arg_parservalidate_parsed_serve_args)VLLM_SUBCMD_PARSER_EPILOG)init_logger)UsageContext)FlexibleArgumentParser)get_tcp_uri)decorate_logsset_process_title)EngineCoreProc)CoreEngineProcManagerlaunch_core_engines)Executor)MultiprocExecutor)setup_multiprocess_prometheus)APIServerProcessManagerwait_for_completion_or_failurea0  Launch a local OpenAI-compatible API server to serve LLM
completions via HTTP. Defaults to Qwen/Qwen3-0.6B if no model is specified.

Search by using: `--help=<ConfigGroup>` to explore options by section (e.g.,
--help=ModelConfig, --help=Frontend)
  Use `--help=all` to show all available flags at once.
c                   t    e Zd ZdZdZedej        ddfd            Zdej        ddfdZ	dej
        defd	ZdS )
ServeSubcommandz(The `serve` subcommand for the vLLM CLI.serveargsreturnNc                    t          | d          r| j        | j        | _        | j        r1| j        #| j        dk    rt          d| j         d          d| _        | j        p| j        d u}| j        p| j	        d u}|r|rt          d          | j        }|rd| _        ns|r:| j
        pd| _        | j        dk    r t                              d| j                   n7| j        | _        | j        dk    r t                              d| j                   | j        dk     rt          |            d S | j        dk    rt          |            d S t!          j        t%          |                      d S )	N	model_tagr   z--api-server-count=zN cannot be used with --headless (no API servers are started in headless mode).a  Cannot use both external and hybrid data parallel load balancing modes. External LB is enabled via --data-parallel-external-lb or --data-parallel-rank. Hybrid LB is enabled via --data-parallel-hybrid-lb or --data-parallel-start-rank. Use one mode or the other.   zPDefaulting api_server_count to data_parallel_size_local (%d) for hybrid LB mode.z7Defaulting api_server_count to data_parallel_size (%d).)hasattrr   modelheadlessapi_server_count
ValueErrordata_parallel_external_lbdata_parallel_rankdata_parallel_hybrid_lbdata_parallel_start_rankdata_parallel_size_localloggerinfodata_parallel_sizerun_headlessrun_multi_api_serveruvlooprunr   )r   is_external_lbis_hybrid_lbs      n/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/entrypoints/cli/serve.pycmdzServeSubcommand.cmd/   s    4%% 	($.*DDJ= 	&$0T5JQ5N5N &$*? & & &   %&D! *Qd.ET.Q 	 (UD,IQU,U 	  	l 	I    ( ()%% (,(E(J%(1,,KK3-   )-(?%(1,,KKQ-  
  1$$"Q&& &&&&& Jz$''(((((    c                 $    t          |           d S N)r   )selfr   s     r3   validatezServeSubcommand.validateq   s    "4(((((r5   
subparsersc                     |                     | j        dt          d          }t          |          }t	          j        | j                  |_        |S )NzNLaunch a local OpenAI-compatible API server to serve LLM completions via HTTP.z vllm serve [model_tag] [options])helpdescriptionusage)subcmd)
add_parsernameDESCRIPTIONr   r	   formatepilog)r8   r:   serve_parsers      r3   subparser_initzServeSubcommand.subparser_initt   s[     ",,I$#4 - 
 
 '|447>diPPPr5   )__name__
__module____qualname____doc__rA   staticmethodargparse	Namespacer4   r9   _SubParsersActionr   rF    r5   r3   r   r   *   s        22D?)($ ?) ?) ?) ?) \?)B)X/ )D ) ) ) )"4	     r5   r   r   c                       t                      gS r7   )r   rO   r5   r3   cmd_initrQ      s    r5   r   c                 v   | j         dk    rt          d          t          j                            |           }t
          j        }|                    |d          }|j        rt          d          |j	        }|j
        }|dk    rt          d          dfd	}t          j        t          j        |           t          j        t          j        |           |j        dk    r^dd
lm} |j        }| d|j         }	t&                              d||	           t+          |d          }
|
                    d           d S |j        }|j        }t3          ||          }t&                              d||           t5          t6          j        ||j	        j        d|d|t=          j        |          |j          	  	        }	 |!                                 t&                              d           |"                                 d S # t&                              d           |"                                 w xY w)Nr   z.api_server_count can't be set in headless modeT)usage_contextr"   z:data_parallel_hybrid_lb is not applicable in headless moder   z5data_parallel_size_local must be > 0 in headless modeFc                 T    t                               d|            s	dt          d S )NzReceived %d signal.T)r*   debug
SystemExit)signumframeshutdown_requesteds     r3   signal_handlerz$run_headless.<locals>.signal_handler   s6    *F333! 	!%	 	r5   )__version__:zpLaunching vLLM (v%s) headless multiproc executor, with head node address %s for torch.distributed process group.)monitor_workers)inlinezQLaunching %d data parallel engine(s) in headless mode, with head node address %s.)		target_fnlocal_engine_countstart_indexlocal_start_indexvllm_configlocal_clienthandshake_addressexecutor_class	log_statszShutting down.)#r#   r$   vllmAsyncEngineArgsfrom_cli_argsr   OPENAI_API_SERVERcreate_engine_configr'   parallel_configr)   signalSIGTERMSIGINTnode_rank_within_dpvllm.versionr[   master_addrmaster_portr*   r+   r   start_worker_monitordata_parallel_master_ipdata_parallel_rpc_portr   r   r   run_engine_corer&   r   	get_classdisable_log_stats
join_firstclose)r   engine_argsrS   rc   rm   r`   rZ   VLLM_VERSIONhosthead_node_addressexecutorportre   engine_managerrY   s                 @r3   r-   r-      s   q  IJJJ &44T::K 2M22#d 3  K * WUVVV!1O(AQPQQQ     M&..111
M&-000*Q..<<<<<< *#CCo&ACCM		
 	
 	
 %[%HHH%%T%2222D1D#D$//
KK	%	   + 0-/B+)+66!33
 
 
N!!###$%%% 	$%%%s   H 0H8c                 2   | j         rJ | j        }|dk    sJ |dk    rt                       t          |           \  }}t          j                            |           }||_        d|_        t          j
        }|                    |          }|dk    rt          j        rt          d          t          j        |          }|j         }|j        }	|	j        }
|	j        s|
dk    sJ d }t+          ||||          5 \  }}}t-          t.          ||| ||j        |j        |r|                                nd           }|
dk    s|	j        st7          d	i |}d d d            n# 1 swxY w Y   ||j        |d<   t7          d	i |}t;          |||           d S )
Nr   r   )rS   zIVLLM_ALLOW_RUNTIME_LORA_UPDATING cannot be used with api_server_count > 1)target_server_fnlisten_addresssockr   num_serversinput_addressesoutput_addressesstats_update_addressr   )api_server_managerr   coordinatorrO   )r"   r#   r   r   rh   ri   rj   _api_process_count_api_process_rankr   rk   rl   envs VLLM_ALLOW_RUNTIME_LORA_UPDATINGr$   r   ry   rz   rm   r&   local_engines_onlyr   dictrun_api_server_worker_procinputsoutputsget_stats_publish_addressr   frontend_stats_publish_addressr   )r   num_api_serversr   r   r}   rS   rc   rf   rg   rm   dp_rankr   local_engine_managerr   	addressesapi_server_manager_kwargss                   r3   r.   r.      sY   }0OQ%''''--ND&44T::K%4K"$&K! 2M222OOKtDW
 
 	
 '44N11I!1O0G-=A=9=	^Y
 
 V	7
Y$(7)'%,&."!F!F!H!H!H%
 %
 %
!$ a<<A<!8!U!U;T!U!U1V V V V V V V V V V V V V V V6 !4 	""89 5QQ7PQQ #-+     s   <AE""E&)E&c                     |pi }|                     dd          }t          dt          |                     t                       t	          j        t          | |||fi |           dS )z6Entrypoint for individual API server worker processes.client_indexr   	APIServerN)getr   strr   r/   r0   r   )r   r   r   client_configuvicorn_kwargsserver_indexs         r3   r   r   !  s}     "'RM $$^Q77L k3|#4#4555OOO
J.$mVV~VV    r5   r7   )r   N)4rL   rn   r/   rh   	vllm.envsr   vllm.entrypoints.cli.typesr   "vllm.entrypoints.openai.api_serverr   r   r    vllm.entrypoints.openai.cli_argsr   r   vllm.entrypoints.utilsr	   vllm.loggerr
   vllm.usage.usage_libr   vllm.utils.argparse_utilsr   vllm.utils.network_utilsr   vllm.utils.system_utilsr   r   vllm.v1.engine.corer   vllm.v1.engine.utilsr   r   vllm.v1.executorr   #vllm.v1.executor.multiproc_executorr   vllm.v1.metrics.prometheusr   vllm.v1.utilsr   r   rG   r*   rB   r   listrQ   rM   r-   r.   r   rO   r5   r3   <module>r      sb             4 4 4 4 4 4         
 Y X X X X X X X < < < < < < # # # # # # - - - - - - < < < < < < 0 0 0 0 0 0 D D D D D D D D . . . . . . K K K K K K K K % % % % % % A A A A A A D D D D D D Q Q Q Q Q Q Q Q	X		W W W W Wm W W Wt$}%    Nx) N N N NbEx1 E E E ER /3	     r5   