
    -`i                        U d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZ d dlZd dl	Z	d dl
Z
d dlZd dlZd dlZd dlmZ d dlmZmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlZd dlZd dlmZm Z m!Z!m"Z" d dl#m$Z$ d d	l%m&Z& d d
l'm(Z( d dl)m*Z* d dl+m,Z,m-Z-m.Z.m/Z/ d dl0m1Z1m2Z2m3Z3m4Z4m5Z5 d dl6m7Z7 d dl8m9Z9 d dl:m;Z; d dl<m=Z= d dl>m?Z? d dl@mAZA d dlBmCZC d dlDmEZEmFZFmGZG d dlHmIZI d dlJmKZKmLZL d dlMmNZN d dlOmPZPmQZQ d dlRmSZS d dlTmUZU d dlVmWZW d dlXmYZY d dlZm[Z[m\Z\ d dl]m^Z^ d dl_m`Z` d d lambZb d d!lcmdZdmeZemfZfmgZgmhZh d d"limjZj d d#lkmlZl d d$lmmnZn d d%lompZp d d&lqmrZr d d'lsmtZt d d(lumvZv d d)lwmxZx d d*lymzZzm{Z{ d d+l|m}Z~ ej        ed,<    eld-          Z e            Zee j                 ed.<   ed/e fd0            Zeerj        ddd1d2ed3erd4edz  d5eeef         dz  d6ee;         f
d7            Zeerj        d8dd1d9e9d3erd4ed5eeef         dz  d6ee;         f
d:            Z e            Zd;e"d6eSfd<Zd;e"d6ebfd=Zd;e"d6e;fd>Ze                    d?          d;e"fd@            Ze                    dA          dB             ZdCedz  d6edz  fdDZ G dE dF          Z G dG dH          ZdIed6efdJZ G dK dL          ZdMed6dfdNZdMed6dfdOZd2ed6e fdPZdQe;dRe/d2ed6dfdSZdTeeef         d6ej        fdUZdVed6ej        fdWZdX ZdY Zd_dZZ	 d`	 d_d[Zed\k    r[ ed              etd]^          Z eKe          Ze                                Z eLe            ej         ee                     dS dS )a    N)	Namespace)AsyncIterator	Awaitable)asynccontextmanager)
HTTPStatus)Any)	APIRouterFastAPIHTTPExceptionRequest)RequestValidationError)CORSMiddleware)JSONResponseiterate_in_threadpool)URLHeadersMutableHeadersState)ASGIAppMessageReceiveScopeSend)AsyncEngineArgs)EngineClient)AnthropicServingMessages)load_chat_template)
serve_http)RequestLogger)DemoToolServerMCPToolServer
ToolServer)OpenAIServingChat)make_arg_parservalidate_parsed_serve_args)OpenAIServingCompletion)	ErrorInfoErrorResponse)OpenAIServing)BaseModelPath)OpenAIServingModels)OpenAIServingResponses)OpenAIServingTranscriptionOpenAIServingTranslation)ServingTokens)ScalingMiddleware)OpenAIServingTokenization)cli_env_setuplog_non_default_argslog_version_and_modelprocess_lora_modulessanitize_message)VLLMValidationError)init_logger)ReasoningParserManager)ToolParserManager)UsageContext)FlexibleArgumentParser)freeze_gc_heap)is_valid_ipv6_address)decorate_logs
set_ulimit)__version__prometheus_multiproc_dirz"vllm.entrypoints.openai.api_server_running_tasksappc                  K   	 | j         j        rg| j         j        fd}t          j         |                      }t
                              |           |                    t
          j                   nd }t                       	 d W V  ||
                                 n# ||
                                 w w xY w| ` d S # | ` w xY w)Nc                     K   	 t          j        t          j                   d {V                                    d {V  ?N)asynciosleepenvsVLLM_LOG_STATS_INTERVALdo_log_stats)engine_clients   v/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/entrypoints/openai/api_server.py
_force_logzlifespan.<locals>._force_log`   sZ      7!-(DEEEEEEEEE'446666666667    )state	log_statsrN   rI   create_taskrD   addadd_done_callbackremover>   cancel)rE   rP   taskrN   s      @rO   lifespanrZ   Z   s      9 	*-)*AM7 7 7 7 7
 &zz||44Dt$$$"">#89999D 		EEEE    IIICIs$   BC 	B% C %B>>C C	usage_context disable_frontend_multiprocessingclient_configargsr\   r]   r^   returnc               f  K   t          j        d          dk    rpt                              d           t	          j        d           t	          j        dg           t          j                     t                              d           t          j
        |           }|r6|                    dd          |_        |                    dd	          |_        |t          | j                  }t!          ||||
          4 d {V 	 }|W V  d d d           d {V  d S # 1 d {V swxY w Y   d S )NVLLM_WORKER_MULTIPROC_METHOD
forkserverz!Setup forkserver with pre-importszvllm.v1.engine.async_llmzForkserver setup complete!client_count   client_indexr   r[   )osgetenvloggerdebugmultiprocessingset_start_methodset_forkserver_preloadrc   ensure_runningr   from_cli_argsget_api_process_count_api_process_rankboolr]   *build_async_engine_client_from_engine_args)r_   r\   r]   r^   engine_argsengines         rO   build_async_engine_clientrw   x   s      
y/00L@@ 	8999(666.0J/KLLL!###1222 "/55K M)6):):>1)M)M&(5(9(9.!(L(L%'/+/0U+V+V(9#)I#	          
 
                             s   D  
D*-D*Fru   c          
       K   |                      |          }|rt                              d           ddlm} d}|rt          |          ni }|                    dd          }|                    dd          }	 |                    ||| j        | j	        | j
        |||	          }|J |                                 d{V  |W V  |r|                                 dS dS # |r|                                 w w xY w)
z
    Create EngineClient, either:
        - in-process using the AsyncLLMEngine Directly
        - multiprocess using AsyncLLMEngine RPC

    Returns the Client or None if the creation failed.
    )r\   z:V1 is enabled, but got --disable-frontend-multiprocessing.r   )AsyncLLMNrd   re   rf   )vllm_configr\   enable_log_requestsaggregate_engine_loggingdisable_log_statsclient_addressesrd   rf   )create_engine_configri   warningvllm.v1.engine.async_llmry   dictpopfrom_vllm_configr{   r|   r}   reset_mm_cacheshutdown)	ru   r\   r]   r^   rz   ry   	async_llmrd   rf   s	            rO   rt   rt      s`     " 222OOK' USTTT111111!%I ,9@D'''bM $$^Q77L $$^Q77L!--#' + ?%0%I);*%% . 	
 	
	 $$$&&((((((((( 	!     	! 	!9 	!    	!s   =AC& &C?requestc                      t          |           S rH   )tokenizationr   s    rO   baser      s       rQ   c                 $    | j         j        j        S rH   )rE   rR   openai_serving_tokenizationr   s    rO   r   r      s    ;88rQ   c                 $    | j         j        j        S rH   )rE   rR   rN   r   s    rO   rN   rN      s    ;**rQ   z/loadc                 H   K   t          d| j        j        j        i          S )Nserver_loadcontent)r   rE   rR   server_load_metricsr   s    rO   get_server_load_metricsr      s&      ( 0A0U VWWWWrQ   z/versionc                  8   K   dt           i} t          |           S )Nversionr   )VLLM_VERSIONr   )vers    rO   show_versionr      s!      l
#C$$$$rQ   log_config_filec                     | sd S 	 t          |           5 }t          j        |          cd d d            S # 1 swxY w Y   d S # t          $ r'}t                              d| |           Y d }~d S d }~ww xY w)Nz0Failed to load log config from file %s: error %s)openjsonload	Exceptionri   r   )r   fes      rO   load_log_configr      s     t/"" 	 a9Q<<	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	    >QR	
 	
 	
 ttttt	s2   A 6A :A :A 
A4A//A4c            	       f    e Zd ZdZdedee         ddfdZdede	fdZ
d	ed
ededed         fdZdS )AuthenticationMiddlewareaP  
    Pure ASGI middleware that authenticates each request by checking
    if the Authorization Bearer token exists and equals anyof "{api_key}".

    Notes
    -----
    There are two cases in which authentication is skipped:
        1. The HTTP method is OPTIONS.
        2. The request path doesn't start with /v1 (e.g. /health).
    rE   tokensr`   Nc                 6    || _         d |D             | _        d S )Nc                     g | ];}t          j        |                    d                                                     <S )utf-8)hashlibsha256encodedigest).0ts     rO   
<listcomp>z5AuthenticationMiddleware.__init__.<locals>.<listcomp>  s:    VVV!7>!((7*;*;<<CCEEVVVrQ   )rE   
api_tokens)selfrE   r   s      rO   __init__z!AuthenticationMiddleware.__init__  s"    VVvVVVrQ   headersc                 X   |                     d          }|sdS |                    d          \  }}}|                                dk    rdS t          j        |                    d                                                    }d}| j        D ]}|t          j	        ||          z  }|S )NAuthorizationF bearerr   )
rp   	partitionlowerr   r   r   r   r   secretscompare_digest)	r   r   authorization_header_valuescheme_param
param_hashtoken_match
token_hashs	            rO   verify_tokenz%AuthenticationMiddleware.verify_token  s    %,[[%A%A") 	55??DD5<<>>X%%5^ELL$9$9::AACC
/ 	J 	JJ71*jIIIKKrQ   scopereceivesendc                    |d         dvs|d         dk    r|                      |||          S |                    dd          }t          |          j                            |          }t          |          }|                    d          r5|                     |          s t          d	d
id          } ||||          S |                      |||          S )Ntypehttp	websocketmethodOPTIONS	root_path r   z/v1errorUnauthorizedi  )r   status_code)	rE   rp   r   pathremoveprefixr   
startswithr   r   )r   r   r   r   r   url_pathr   responses           rO   __call__z!AuthenticationMiddleware.__call__,  s    = 555xI9U9U 88E7D111IIk2..	U###(55i@@&&&u%% 	2d.?.?.H.H 	2#Wn,ESVWWWH8E7D111xxw---rQ   )__name__
__module____qualname____doc__r   liststrr   r   rs   r   r   r   r   r   r    rQ   rO   r   r     s        	 	WG WT#Y W4 W W W WG     ".e .g .T .iPTo . . . . . .rQ   r   c            	       F    e Zd ZdZdeddfdZdededede	d         fd	Z
dS )
XRequestIdMiddlewarez
    Middleware the set's the X-Request-Id header for each response
    to a random uuid4 (hex) value if the header isn't already
    present in the request, otherwise use the provided request id.
    rE   r`   Nc                     || _         d S rH   )rE   )r   rE   s     rO   r   zXRequestIdMiddleware.__init__B  s    rQ   r   r   r   c                     |d         dvr|                      ||          S t          |          dt          dd ffd}|                      |||          S )Nr   r   r   messager`   c                    K   | d         dk    rXt          | d                   }                    dt          j                    j                  }|                    d|            |            d{V  dS )zx
            Custom send function to mutate the response headers
            and append X-Request-Id to it.
            r   zhttp.response.startr   )rawzX-Request-IdN)r   rp   uuiduuid4hexappend)r   response_headers
request_idrequest_headersr   s      rO   send_with_request_idz;XRequestIdMiddleware.__call__.<locals>.send_with_request_idL  s      
 v"777#1gi6H#I#I#I ,00AQRR
 ''
CCC$w--rQ   )rE   r   r   )r   r   r   r   r   r   s      ` @rO   r   zXRequestIdMiddleware.__call__E  s    = 55588E7D111 "...		  		 D 		  		  		  		  		  		  		  xxw(<===rQ   )r   r   r   r   r   r   r   r   r   r   r   r   rQ   rO   r   r   ;  su         G     >e >g >T >iPTo > > > > > >rQ   r   
chunk_datac                    	 ddl m} ddlm} |                     d          dk    rK|                    |           }|j        r.|j        d         j        j        r|j        d         j        j        S nY|                     d          dk    r@|                    |           }|j        r$|j        d         j	        r|j        d         j	        S n}# t          j        $ rk d| v rd| d         r\| d         d         }d|v r+|d                             d	          r|d         d	         cY S |                    d
          r
|d
         cY S Y nw xY wdS )z0Extract content from a streaming response chunk.r   )ChatCompletionStreamResponse)CompletionStreamResponseobjectzchat.completion.chunktext_completionchoicesdeltar   textr   )0vllm.entrypoints.openai.chat_completion.protocolr   +vllm.entrypoints.openai.completion.protocolr   rp   model_validater   r   r   r   pydanticValidationError)r   r   r   chat_responsecompletion_responsechoices         rO   _extract_content_from_chunkr  Z  s   &	
 	
 	
 	
 	
 	
	
 	
 	
 	
 	
 	

 >>(##'>>>8GG
SSM$ >)>q)A)G)O >$,Q/5==^^H%%):::":"I"I*"U"U"* ;/B/J1/M/R ;*215::# & & &
""z)'<"	*1-F&  VG_%8%8%C%C gy1111F## &f~%%%& 2s    A.C 1AC AE$EEc                   `    e Zd ZdZd Zdedee         fdZdede	fdZ
de	dd	fd
Zde	fdZd	S )
SSEDecoderz:Robust Server-Sent Events decoder for streaming responses.c                 "    d| _         g | _        d S )Nr   )buffercontent_bufferr   s    rO   r   zSSEDecoder.__init__{  s     rQ   chunkr`   c                 .   ddl }	 |                    d          }n# t          $ r g cY S w xY w| xj        |z  c_        g }d| j        v r| j                            dd          \  }| _        |                    d          }|                    d          rv|dd                                         }|d	k    r|                    d
di           n<|r:	  |j	        |          }|                    d|d           n# |j
        $ r Y w xY wd| j        v |S )z4Decode a chunk of SSE data and return parsed events.r   Nr   
re   zdata:    z[DONE]r   donedata)r   r  )r   decodeUnicodeDecodeErrorr  splitrstripr   stripr   loadsJSONDecodeError)r   r	  r   	chunk_streventslinedata_str
event_datas           rO   decode_chunkzSSEDecoder.decode_chunk  s`   	W--II! 	 	 	III	 	y  dk!! $ 1 1$ : :D$+;;t$$Dx(( 
!8>>++x''MM66"23333 !!%/TZ%9%9
vz&J&JKKKK/ ! ! ! ! dk!!  s    ++(C< <
D	D	r  c                      t          |          S )z Extract content from event data.)r  )r   r  s     rO   extract_contentzSSEDecoder.extract_content  s    *:666rQ   r   Nc                 B    |r| j                             |           dS dS )zAdd content to the buffer.N)r  r   )r   r   s     rO   add_contentzSSEDecoder.add_content  s1     	0&&w/////	0 	0rQ   c                 6    d                     | j                  S )z"Get the complete buffered content.r   )joinr  r  s    rO   get_complete_contentzSSEDecoder.get_complete_content  s    wwt*+++rQ   )r   r   r   r   r   bytesr   r   r  r   r  r   r#  r   rQ   rO   r  r  x  s        DD! ! !% DJ    @7$ 73 7 7 7 703 04 0 0 0 0
,c , , , , , ,rQ   r  response_bodyc                     ddl m} t                      dfd} | |                      | _        t                              dt                               dS )z/Log streaming response with robust SSE parsing.r   r   c               3     K   D ]} dz  | V                       |           }|D ]}|d         dk    r1                    |d                   }                    |           ?|d         dk    rs                                }|r>t	          |          dk    r|d d         dz   }	 t
                              d|           nt
                              d             d S d S )	Nre   r   r  r  i   r   z9response_body={streaming_complete: content=%r, chunks=%d}z9response_body={streaming_complete: no_content, chunks=%d})r  r  r   r#  lenri   info)r	  r  eventr   full_contentchunk_countr%  sse_decoders        rO   buffered_iteratorz2_log_streaming_response.<locals>.buffered_iterator  s>      # 	 	E1KKKK !--e44F  =F**)99%-HHG++G44446]f,,#.#C#C#E#EL# |,,t33+7+>+CL,W('    W'   FFF% -		 	rQ   z,response_body={streaming_started: chunks=%d}N)starlette.concurrencyr   r  body_iteratorri   r)  r(  )r   r%  r   r.  r,  r-  s    `  @@rO   _log_streaming_responser1    s    ;;;;;;,,KK             D 323D3D3F3FGGH
KK>M@R@RSSSSSrQ   c                     	 | d                                          }t                              d|           dS # t          $ r t                              d           Y dS w xY w)zLog non-streaming response.r   zresponse_body={%s}zresponse_body={<binary_data>}N)r  ri   r)  r  )r%  decoded_bodys     rO   _log_non_streaming_responser4    so    5$Q'..00(,77777 5 5 534444445s   59 $A! A!c                    | j         rt          d d d t                    }n4| j        rt          d d t                    }nt          t                    }| |j        _        ddlm}  ||           ddlm	}  ||           ddl
m	}  ||           ddlm	}  ||           ddlm	}  ||           ddlm	}  ||           ddlm	}  ||           ddlm}	  |	t"                     |                    t"                     | j        |_        ddlm}
  |
|           |                    t.          | j        | j        | j        | j        	           |                    t:                    d
t<          dt:          fd            }|                    t>                    d
t<          dt>          fd            }d | j         ptB          j"        gD             x}r|                    tF          |           | j$        r|                    tJ                     |                    tL                     tB          j'        r@tP          )                    d           |*                    d          dt<          fd            }| j*        D ]}|+                    dd          \  }}tY          t[          j.        |          |          }t_          j0        |          r|                    |           gt_          j1        |          r |*                    d          |           te          d| d          tg          j4        |          }|S )N)openapi_urldocs_url	redoc_urlrZ   )r7  r8  rZ   )rZ   r   )register_vllm_serve_api_routers)attach_router)register_sagemaker_routes)register_pooling_api_routers)allow_originsallow_credentialsallow_methodsallow_headersr   excc                    K   t          t          t          |j                  t	          |j                  j        |j                            }t          |                                |j                  S )N)r   r   coder   r   )	r)   r(   r7   detailr   r   phraser   
model_dump)r   rA  errs      rO   http_exception_handlerz)build_app.<locals>.http_exception_handler#  so      (44007_  
 
 
 CNN,,#/JJJJrQ   c                   K   d }|                                 }|D ]<}d|v r6d|d         v r,|d         d         }t          |t                    r	|j        } n=t	          |          }t	          |          }|r|r||k    r| d| }n|}t          t          t          |          t          j	        j
        t          j	        |                    }	t          |	                                t          j	                  S )Nctxr   r   )r   r   rC  r   rD  rE  )errors
isinstancer8   	parameterr   r)   r(   r7   r   BAD_REQUESTrG  r   rH  )
r   rA  r   rM  r   	ctx_errorexc_str
errors_strr   rI  s
             rO   validation_exception_handlerz/build_app.<locals>.validation_exception_handler.  s      	 	E~~'U5\"9"9!%L1	i)<== %/EEc(([[
 	j 	Z7%:%: //://GGG(11+2+	  
 
 
 CNN,,*:PQQQQrQ   c                     g | ]}||S r   r   )r   keys     rO   r   zbuild_app.<locals>.<listcomp>L  s    NNN##N#NNNrQ   )r   z}CAUTION: Enabling log response in the API Server. This can include sensitive information and should be avoided in production.r   r   c                 \  K    ||            d {V }d |j         2              d {V }t          t          |                    |_         |j                            dd          }|dk    }|st
                              d           n"|rt          ||           nt          |           |S )Nc                 "   K   g | 3 d {V }|
6 S rH   r   )r   sections     rO   r   z3build_app.<locals>.log_response.<locals>.<listcomp>_  s.      QQQQQQQQwWQQQQs   zcontent-typer   z text/event-stream; charset=utf-8zresponse_body={<empty>})	r0  r   iterr   rp   ri   r)  r1  r4  )r   	call_nextr   r%  content_typeis_streamings         rO   log_responsezbuild_app.<locals>.log_response\  s      &Yw////////HQQ(:PQQQQQQQQQM%:4;N;N%O%OH"#+//CCL'+MML ! ;56666 ;'-@@@@+M:::OrQ   .re   zInvalid middleware z . Must be a function or a class.)5disable_fastapi_docsr
   rZ   enable_offline_docsrR   r_   vllm.entrypoints.server9  2vllm.entrypoints.openai.chat_completion.api_routerr:  ,vllm.entrypoints.openai.responses.api_router/vllm.entrypoints.openai.translations.api_router-vllm.entrypoints.openai.completion.api_router%vllm.entrypoints.anthropic.api_router)vllm.entrypoints.openai.models.api_router!vllm.entrypoints.sagemaker.routesr;  routerinclude_routerr   vllm.entrypoints.poolingr<  add_middlewarer   allowed_originsr>  allowed_methodsallowed_headersexception_handlerr   r   r   api_keyrK   VLLM_API_KEYr   enable_request_id_headersr   r1   "VLLM_DEBUG_LOG_API_SERVER_RESPONSEri   r   
middlewarersplitgetattr	importlibimport_moduleinspectisclassiscoroutinefunction
ValueErrorsagemaker_standards	bootstrap)r_   rE   r9  register_chat_api_routerregister_responses_api_router register_translations_api_routerregister_completion_api_routerregister_anthropic_api_routerregister_models_api_routerr;  r<  rJ  rT  r   r^  rv  module_pathobject_nameimporteds                      rO   	build_appr    s     )tth
 
 
 
	! )tthGGGx(((CINFFFFFF##C(((      S!!!      "!#&&&      %$S)))      #"3'''      "!#&&&      s###KKKKKKf%%%vNCMEEEEEE  %%%*0**     	=))K Km K K K *)K 	122Rg R<R R R R 32R: ON$,"E43D2ENNNNv D3FCCC% 1/000 ())). %	
 	
 	
 
			 	 	 	 
 		" o 
 

#-#4#4S!#<#< [92;??MM?8$$ 	x(((((22 	"CNN6""8,,,,RjRRR   
'
,
,CJrQ   rN   rR   c                 t  K   | j         }j        j        }nj        g}j        rt	          j                  }nd }fd|D             }| |_        j         |_        ||_         |_	        | 
                                 d {V }t                              d|           t          j                  }j        dk    r@t!                      }	t#          |	t                     sJ |	                                 d {V  n8j        r/t'                      }	|	                    j                   d {V  nd }	|j        |j        j        ni }
|j        |j        j        ni }
t/          j        |
          }t3          | ||          |_        |j                                         d {V  d|v rTt9          | |j        ||j        j        j        j         |	j!        j"        j#        j$        j%        j&                  nd |_'        d|v rmtQ          | |j        j)        f||j        j*        j+        j        j        j,        j         j!        j"        j#        j$        j%        j-        j&        dnd |_.        |j.        |j.        /                                 d {V  d|v r/ta          | |j        |j        j#        j$        j&        	          nd |_1        te          | |j        ||j        j+        j&        
          |_3        d|v r#ti          | |j        |j&        j$                  nd |_5        d|v r#tm          | |j        |j&        j$                  nd |_7        d|v rMtq          | |j        j)        ||j        j        j        j         j!        j"        j#        j$                  nd |_9        d|v r5tu          | |j        |j        j&        j#        j%        j;                  nd |_<        ddl=m>}  || |           d {V  j?        |_?        d|_@        d S )N)max_log_lenc                 <    g | ]}t          |j                   S ))name
model_path)r+   model)r   r  r_   s     rO   r   z"init_app_state.<locals>.<listcomp>  s5       <@4DJ777  rQ   zSupported tasks: %sdemo)rN   base_model_pathslora_modulesgenerate)request_loggerchat_templatechat_template_content_formatreturn_tokens_as_token_idsenable_auto_toolstool_parsertool_serverreasoning_parserenable_prompt_tokens_detailsenable_force_include_usageenable_log_outputslog_error_stack)r  r  r  default_chat_template_kwargstrust_request_chat_templater  r  #exclude_tools_when_tool_choice_noner  r  r  r  r  enable_log_deltasr  )r  r  r  r  r  )r  r  r  r  r  transcription)r  r  r  )	r  r  r  r  r  r  r  r  r  )r  r  r  r  r  force_no_detokenizer   )init_pooling_state)Arz   served_model_namer  r{   r    r  rN   r}   rS   r_   get_supported_tasksri   r)  r   r  r  r!   rN  init_and_validater"   add_tool_serverlora_configdefault_mm_lorasr6   r  r,   openai_serving_modelsinit_static_lorasr-   r  r  enable_auto_tool_choicetool_call_parserstructured_outputs_configr  r  r  r  r  openai_serving_responsesr$   response_roler  r  r  r  openai_serving_chatwarmupr'   openai_serving_completionr2   r   r.   openai_serving_transcriptionr/   openai_serving_translationr   anthropic_serving_messagesr0   tokens_onlyserving_tokensrl  r  enable_server_load_trackingr   )rN   rR   r_   rz   served_model_namesr  r  supported_tasksresolved_chat_templater  r  r  r  s     `          rO   init_app_stater    s     
  +K)!3"j\ &43CDDD   DV   (E00EO#EEJ)==????????O
KK%777/0BCC6!!)7)9)9+~66666++----------		 #oo))$*:;;;;;;;;;;
 ". 	00  ". 	00 
 ((9;KLLL"5#)!# # #E
 
%
7
7
9
99999999$ ((! 	')0)-)J'+'F":-#!;L)-)J'+'F#6 0	
 	
 	
 	
" % 
"R (() 	'	
 *0)-)J)-)J(,(H'+'F":040X-!;L)-)J'+'F#6"4 0%	
 	
 	
 	
* - 
2  ,'..000000000 (( 	 ')'+'F)-)J'+'F 0	
 	
 	
 	
  
# )B#%,%)%F$($D,) ) )E%" o-- 	#') 0'+'F	
 	
 	
 	
  
&& o-- 	!') 0'+'F	
 	
 	
 	
  
$4 (( 	!')0)-)J'+'F":-!;L)-)J'+'F	
 	
 	
 	
 ! 
$: (( 	')'+'F 0)-)J#6 $ 0		
 		
 		
 		
  
 <;;;;;

]E4
8
88888888(,(HE% !ErQ   addrc                 v   t           j        }t          | d                   rt           j        }t          j         |t           j                  }|                    t           j        t           j        d           |                    t           j        t           j        d           |	                    |            |S )Nr   familyr   re   )
socketAF_INETr?   AF_INET6SOCK_STREAM
setsockopt
SOL_SOCKETSO_REUSEADDRSO_REUSEPORTbind)r  r  socks      rO   create_server_socketr  ?  s    ^FT!W%% !=V-?@@@DOOF%v':A>>>OOF%v':A>>>IIdOOOKrQ   r   c                     t          j         t           j        t           j                  }|                    |            |S )Nr  )r  AF_UNIXr  r  )r   r  s     rO   create_server_unix_socketr  L  s/    =V5GHHHDIIdOOOKrQ   c                 H   t          j                    }| j        r7| j        |vr.t	          d| j         dd                    |           d          t          j                    }| j        j        x}r-||vr+t	          d| dd                    |           d          d S d S )Nzinvalid tool call parser: z (chose from { ,z })zinvalid reasoning parser: )	r;   list_registeredr  r  KeyErrorr"  r:   r  r  )r_   valid_tool_parsesvalid_reasoning_parsersr  s       rO   validate_api_server_argsr  R  s    )9;;# 
(=EV(V(V@)> @ @!hh'899@ @ @
 
 	

 5DFF :KK

"9
9
9F)9 F F!hh'>??F F F
 
 	

 

9
9rQ   c                    t          t          t          | j                   t	          |            | j        r1t          | j                  dk    rt          j        | j                   | j	        r1t          | j	                  dk    rt          j        | j	                   t          |            | j        rt          | j                  }n| j        pd| j        f}t#          |          }t%                       dd}t'          j        t&          j        |           | j        rd| j         }n;|\  }}| j        o| j        }t/          |          rd| dn|pd	}d
|rdnd d| d| }||fS )zRValidate API server args, set up signal handler, create socket
    ready to serve.   r   r`   Nc                       t          d          )N
terminated)KeyboardInterrupt)r   s    rO   signal_handlerz$setup_server.<locals>.signal_handler  s    ---rQ   zunix:[]z0.0.0.0r   sz://:r`   N)r5   ri   r   r  r4   tool_parser_pluginr(  r;   import_tool_parserreasoning_parser_pluginr:   import_reasoning_parserr  udsr  hostportr  rA   signalSIGTERMssl_keyfilessl_certfiler?   )	r_   r  	sock_addrr  listen_addressr  r  is_ssl	host_parts	            rO   setup_serverr  d  s    &,
;;; F3t'>#?#?!#C#C,T-DEEE# UD,H(I(IA(M(M6t7STTTT"""
 x /(22Y_"di0	#I.. LLL. . . . M&..111x M+++
d!7d&7#8#>#>UKKKKKDDUI	Lv 52LL)LLdLL4rQ   c                 v   K   t          d           t          |           \  }}t          ||| fi | d{V  dS )zRun a single-worker API server.	APIServerN)r@   r  run_server_worker)r_   uvicorn_kwargsr  r  s       rO   
run_serverr    sZ       +'--ND
ND$
I
I.
I
IIIIIIIIIIrQ   c                   K   |j         r1t          |j                   dk    rt          j        |j                    |j        r1t          |j                  dk    rt          j        |j                   t          |j                  }|||d<   t          ||          4 d{V 	 }t          |          }t          ||j        |           d{V  t                              d|j        j        j        |            t%          |f||j        |j        |j        |j        |j         t0          j        |j        |j        |j        |j        |j        |j        |j         d| d{V }ddd          d{V  n# 1 d{V swxY w Y   	 | d{V  |!                                 dS # |!                                 w xY w)zRun a single API server worker.r  N
log_config)r^   z!Starting vLLM API server %d on %s)r  enable_ssl_refreshr  r  	log_level
access_logtimeout_keep_aliver  r  ssl_ca_certsssl_cert_reqsssl_ciphersh11_max_incomplete_event_sizeh11_max_header_count)"r  r(  r;   r  r  r:   r  r   r   rw   r  r  rR   ri   r)  rz   parallel_configrr   r   r  r  r  uvicorn_log_leveldisable_uvicorn_access_logrK   VLLM_HTTP_TIMEOUT_KEEP_ALIVEr  r  r  r  r  r  r  close)	r  r  r_   r^   r  r  rN   rE   shutdown_tasks	            rO   r  r    s     
  F3t'>#?#?!#C#C,T-DEEE# UD,H(I(IA(M(M6t7STTT !!566J'1|$(#    
  
  
  
  
  
  
  
 
oo]CIt<<<<<<<<</%5G	
 	
 	

 )
#6,  ::#@(**,(*.*L!%!:#
 
$ %
 
 
 
 
 
 
 
 
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
F



s   &C E88
FF
F( (F>__main__z*vLLM OpenAI-Compatible RESTful API server.)descriptionr  rH   )rI   r   ry  r{  r   rk   multiprocessing.forkserverrc   rg   r   r  r  tempfiler   argparser   collections.abcr   r   
contextlibr   r   r   typingr   +model_hosting_container_standards.sagemaker	sagemakerr  r   uvloopfastapir	   r
   r   r   fastapi.exceptionsr   fastapi.middleware.corsr   fastapi.responsesr   r/  r   starlette.datastructuresr   r   r   r   starlette.typesr   r   r   r   r   	vllm.envsrK   vllm.engine.arg_utilsr   vllm.engine.protocolr   "vllm.entrypoints.anthropic.servingr   vllm.entrypoints.chat_utilsr   vllm.entrypoints.launcherr   vllm.entrypoints.loggerr     vllm.entrypoints.mcp.tool_serverr!   r"   r#   /vllm.entrypoints.openai.chat_completion.servingr$    vllm.entrypoints.openai.cli_argsr%   r&   *vllm.entrypoints.openai.completion.servingr'   'vllm.entrypoints.openai.engine.protocolr(   r)   &vllm.entrypoints.openai.engine.servingr*   'vllm.entrypoints.openai.models.protocolr+   &vllm.entrypoints.openai.models.servingr,   )vllm.entrypoints.openai.responses.servingr-   ,vllm.entrypoints.openai.translations.servingr.   r/   %vllm.entrypoints.serve.disagg.servingr0   ,vllm.entrypoints.serve.elastic_ep.middlewarer1   'vllm.entrypoints.serve.tokenize.servingr2   vllm.entrypoints.utilsr3   r4   r5   r6   r7   vllm.exceptionsr8   vllm.loggerr9   vllm.reasoningr:   vllm.tool_parsersr;   vllm.usage.usage_libr<   vllm.utils.argparse_utilsr=   vllm.utils.gc_utilsr>   vllm.utils.network_utilsr?   vllm.utils.system_utilsr@   rA   vllm.versionrB   r   TemporaryDirectory__annotations__ri   setrD   TaskrZ   OPENAI_API_SERVERrs   r   r   rw   rt   rj  r   r   rN   rp   r   r   r   r   r   r  r  r   r1  r4  r  r  tupleintr  r  r  r  r  r  r   parser
parse_argsr_   runr   rQ   rO   <module>rF     s	               / / / / / / 				            4 4 4 4 4 4 4 4 * * * * * *             I I I I I I   > > > > > > > > > > > > 5 5 5 5 5 5 2 2 2 2 2 2 * * * * * * 7 7 7 7 7 7 H H H H H H H H H H H H B B B B B B B B B B B B B B       1 1 1 1 1 1 - - - - - - G G G G G G : : : : : : 0 0 0 0 0 0 1 1 1 1 1 1 V V V V V V V V V V M M M M M M X X X X X X X X N N N N N N        A @ @ @ @ @ A A A A A A      M L L L L L        @ ? ? ? ? ?      N M M M M M              0 / / / / / # # # # # # 1 1 1 1 1 1 / / / / / / - - - - - - < < < < < < . . . . . . : : : : : : = = = = = = = = 4 4 4 4 4 4"5 5 5 5 
9	:	: %(CEEGL! ) ) )     :  #/"@48+/     
     '+Tk	 
 S>D(  <        F  #/"@-2+/1! 1! 1! 1!  1! '+	1!
 S>D(1! < 1! 1! 1! 1!h 
!' !m ! ! ! !
9' 9&? 9 9 9 9+7 +| + + + + GX7 X X X X, J% % %

S4Z 
D4K 
 
 
 
-. -. -. -. -. -. -. -.`> > > > > > > >>D S    <2, 2, 2, 2, 2, 2, 2, 2,j*TT *Td *T *T *T *TZ5t 5 5 5 5 5YI Y' Y Y Y Yx}"}"}" }" 
	}" }" }" }"@
uS#X 
6= 
 
 
 
C FM    
 
 
$)  )  ) XJ J J J /36	6 6 6 6r z MOOO##@  F _V$$FDt$$$FJzz$      rQ   