
    -`iT                     T   U d dl Z d dlZd dlmZ d dlmZmZ d dlmZ d dl	m
Z
 d dlmZmZ d dlZd dlZd dlmZ d dlmZmZ d d	lmZ d d
lmZ d dlmZmZ d dlmZ d dlmZ d dlm Z m!Z! d dl"m#Z# d dl$m%Z%m&Z& d dl'm(Z( d dl)m*Z* d dl+m,Z,m-Z- d dl.m/Z/ d dl0m1Z1m2Z2m3Z3m4Z4 d dl5m6Z6 d dl7m8Z8 d dl9m:Z: d dl;m<Z< d dl=m>Z> d dl?m@ZA  e8eB          ZCe e,z  e3z  e1z  ZDeeEd<    G d de&          ZF G d d e&          ZG G d! d"e&          ZHd#e>fd$ZId% ZJd&ZK G d' d(          ZLd)eMd*eMfd+ZNd,eMd-eOeH         d*dfd.ZPd/eMd0eMd1eQd*dfd2ZRd)eMd-eOeH         d3eMd*dfd4ZSd5eFd6eMd*eHfd7ZTd5eFd6eMd*eHfd8ZUd9ed5eFd:eLd*eHfd;ZVd< ZWd=ed>ed*dfd?ZXd>efd@ZYeBdAk    r eJ            ZZeC[                    dBeA           eC[                    dCeZ           eZj\        r-eC[                    dD            eeZj]        eZj^        E           neC[                    dF            e j_         eYeZ                     dS dS )G    N)	Namespace)	AwaitableCallable)
HTTPStatus)StringIO)Any	TypeAlias)start_http_server)TypeAdapterfield_validator)ValidationInfo)tqdm)AsyncEngineArgsoptional_type)EngineClient)RequestLogger)ChatCompletionRequestChatCompletionResponse)OpenAIServingChat)ErrorResponseOpenAIBaseModel)BaseModelPath)OpenAIServingModels)EmbeddingRequestEmbeddingResponse)OpenAIServingEmbedding)RerankRequestRerankResponseScoreRequestScoreResponse)ServingScores)init_logger)ReasoningParserManager)random_uuid)FlexibleArgumentParser)__version__BatchRequestInputBodyc                       e Zd ZU dZeed<   eed<   eed<   eed<    edd          ede	d	e
fd
                        ZdS )BatchRequestInputz
    The per-line object of the batch input file.

    NOTE: Currently only the `/v1/chat/completions` endpoint is supported.
    	custom_idmethodurlbodyplain)modevalueinfoc                    |j         d         }|dk    rt          j        |          S |dk    r't          t                                        |          S |                    d          r't          t                                        |          S |                    d          rt          j        |          S t          t                                        |          S )Nr,   /v1/chat/completions/v1/embeddings/score/rerank)
datar   model_validater   r   validate_pythonendswithr   r   r'   )clsr0   r1   r,   s       u/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/entrypoints/openai/run_batch.pycheck_type_for_urlz$BatchRequestInput.check_type_for_urlN   s     9U#((((7>>>"""/00@@GGG<<!! 	D|,,<<UCCC<<	"" 	7 /666011AA%HHH    N)__name__
__module____qualname____doc__str__annotations__r'   r   classmethodr   r   r=    r>   r<   r)   r)   8   s           NNN KKK 
HHH  _V'***Is I. I I I [ +*I I Ir>   r)   c                   N    e Zd ZU dZeed<   eed<   dZee	z  e
z  ez  dz  ed<   dS )BatchResponseData   status_code
request_idNr-   )r?   r@   rA   rJ   intrD   rC   r-   r   r   r    r   rF   r>   r<   rH   rH   ^   sq         K OOO 	 	
	
	 	 		    r>   rH   c                   H    e Zd ZU dZeed<   eed<   edz  ed<   edz  ed<   dS )BatchRequestOutputzA
    The per-line object of the batch output and error files
    idr*   Nresponseerror)r?   r@   rA   rB   rC   rD   rH   r   rF   r>   r<   rN   rN   o   sU           	GGG NNN$&&&& :r>   rN   parserc                    |                      dddt          d           |                      dddt          d           |                      d	t          d d
           |                      dt          t                    dd           t          j        |           } |                      dt
          d d           |                      ddd           |                      dt          dd           |                      dt
          dd           |                      dddd           |                      dddd            | S )!Nz-iz--input-fileTzThe path or url to a single input file. Currently supports local file paths, or the http protocol (http or https). If a URL is specified, the file should be available via HTTP GET.)requiredtypehelpz-oz--output-filezThe path or url to a single output file. Currently supports local file paths, or web (http or https) urls. If a URL is specified, the file should be available via HTTP PUT.z--output-tmp-dirzMThe directory to store the output file before uploading it to the output URL.)rU   defaultrV   z--response-role	assistantz@The role name to return if `request.add_generation_prompt=True`.z--max-log-lenz^Max number of prompt characters or prompt ID numbers being printed in log.

Default: Unlimitedz--enable-metrics
store_truezEnable Prometheus metrics)actionrV   z--urlz0.0.0.0zLURL to the Prometheus metrics server (only needed if enable-metrics is set).z--porti@  zUPort number for the Prometheus metrics server (only needed if enable-metrics is set).z--enable-prompt-tokens-detailsFz6If set to True, enable prompt_tokens_details in usage.)rZ   rW   rV   z--enable-force-include-usagezZIf set to True, include usage on every request (even when stream_options is not specified))add_argumentrC   r   r   add_cli_argsrL   rR   s    r<   make_arg_parserr^      s   
5     6     	     3O	     )&11F
!	     <6Q     2	     2	     (E	     &6	     Mr>   c                  d    t          d          } t          |                                           S )Nz$vLLM OpenAI-Compatible batch runner.)description)r%   r^   
parse_argsr]   s    r<   ra   ra      s-    #0VWWWF6""--///r>   z_{desc}: {percentage:3.0f}% Completed | {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}]
c                   ,    e Zd Zd Zd Zd ZdefdZdS )BatchProgressTrackerc                 "    d| _         d | _        d S )Nr   )_total_pbarselfs    r<   __init__zBatchProgressTracker.__init__   s    "&


r>   c                 &    | xj         dz  c_         d S )N   )re   rg   s    r<   	submittedzBatchProgressTracker.submitted   s    qr>   c                 J    | j         r| j                                          d S d S N)rf   updaterg   s    r<   	completedzBatchProgressTracker.completed   s0    : 	 J	  	 r>   returnc                     t           j                                         p!t           j                                        dk    }t	          | j        ddd| t                    | _        | j        S )Nr   reqzRunning batch   )totalunitdescminintervaldisable
bar_format)torchdistributedis_initializedget_rankr   re   _BAR_FORMATrf   )rh   enable_tqdms     r<   pbarzBatchProgressTracker.pbar   sn    !00222We6G6P6P6R6RVW6W 	 + #O"
 
 

 zr>   N)r?   r@   rA   ri   rl   rp   r   r   rF   r>   r<   rc   rc      s\        ' ' '       d      r>   rc   path_or_urlrq   c                 .  K   |                      d          s|                      d          rt          j                    4 d {V }|                    |           4 d {V }|                                 d {V cd d d           d {V  cd d d           d {V  S # 1 d {V swxY w Y   d d d           d {V  d S # 1 d {V swxY w Y   d S t          | d          5 }|                                cd d d            S # 1 swxY w Y   d S )Nhttp://https://utf-8encoding)
startswithaiohttpClientSessiongettextopenread)r   sessionrespfs       r<   	read_filer      s     i(( K,B,B:,N,N (** 	% 	% 	% 	% 	% 	% 	%gw{{;7O7O 	% 	% 	% 	% 	% 	% 	%SW$$$$$$	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% +000 	A6688	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	sH   C"B!<C!
B+	+C.B+	/C
CC)D

DDoutput_pathbatch_outputsc                    K   t          | dd          5 }|D ]%}t          |                                |           &	 ddd           dS # 1 swxY w Y   dS )z
    Write the responses to a local file.
    output_path: The path to write the responses to.
    batch_outputs: The list of batch outputs to write.
    wr   r   fileN)r   printmodel_dump_json)r   r   r   os       r<   write_local_filer      s       
k3	1	1	1 /Q 	/ 	/A!##%%A.....	// / / / / / / / / / / / / / / / / /s   )AAA
output_urldata_or_file	from_filec                   K   d}d}t          d|dz             D ]}	 t          j        t          j        d                    4 d{V 	 }|rt	          |d          5 }|                    | |          4 d{V }|j        d	k    r,t          d
|j         d|                                           	 ddd          d{V  n# 1 d{V swxY w Y   ddd           n# 1 swxY w Y   ny|                    | |          4 d{V }|j        d	k    r,t          d|j         d|                                           	 ddd          d{V  n# 1 d{V swxY w Y   ddd          d{V  n# 1 d{V swxY w Y   v# t          $ rm}	||k     r8t          
                    d||	|           t          j        |           d{V  n$t          d| dt          |	           d          |	Y d}	~	d}	~	ww xY wdS )z
    Upload a local file to a URL.
    output_url: The URL to upload the file to.
    data_or_file: Either the data to upload or the path to the file to upload.
    from_file: If True, data_or_file is the path to the file to upload.
    rt   rk   i  )ru   )timeoutNrb)r7   rI   zFailed to upload file.
Status: z
Response: zFailed to upload data.
Status: zPFailed to upload data (attempt %d). Error message: %s.
Retrying in %d seconds...zFailed to upload data (attempt z). Error message: .)ranger   r   ClientTimeoutr   putstatus	Exceptionr   loggerrQ   asynciosleeprC   )
r   r   r   max_retriesdelayattemptr   r   rP   es
             r<   upload_datar     sg      KEK!O,, % %$	 ,-D999           lD11 "T#*;;z;#E#E " " " " " " "'#55&/%C/7%C %C19%C %C'" '" !" !"" " " " " " " " " " " " " " " " " " " " " " " " " " "" " " " " " " " " " " " " " "  '{{:L{II       X#?c11"+!?+3?!? !?-5]]__!? !?# #                                                      *  	 	 	$$g	   mE**********ZgZZQTUVQWQWZZZ  +****	3% %s   .FE= C&>8C		7C&	
CC&CC&E=&C**E=-C*.!E=8EE=
E$$E='E$(E=+F=
F	F
F	F
HA"HHoutput_tmp_dirc                   K   |                      d          s|                      d          rz|t                              d           t                      }|D ]%}t	          |                                |           &|                    d           t                              d|            t          | |                                	                                
                    d          d	
           d{V  dS t          j        dd|dd          5 }t                              d|j                   t          |j        |           d{V  t                              d|            t          | |j        d
           d{V  ddd           dS # 1 swxY w Y   dS t                              d|            t          | |           d{V  dS )a  
    Write batch_outputs to a file or upload to a URL.
    path_or_url: The path or URL to write batch_outputs to.
    batch_outputs: The list of batch outputs to write.
    output_tmp_dir: The directory to store the output file before uploading it
    to the output URL.
    r   r   Nz Writing outputs to memory bufferr   r   zUploading outputs to %sr   F)r   r   tmp_batch_output_z.jsonl)r/   r   dirprefixsuffixz*Writing outputs to temporary local file %sTz Writing outputs to local file %s)r   r   r1   r   r   r   seekr   r   stripencodetempfileNamedTemporaryFilenamer   )r   r   r   output_bufferr   r   s         r<   
write_filer   C  su      i(( ;K,B,B:,N,N ;!KK:;;;$JJM" ? ?a''))>>>>>q!!!KK1;???""$$**,,33G<<            , "*   
G H!&QQQ&qv}=========5{CCC!+qvFFFFFFFFFF
G 
G 
G 
G 
G 
G 
G 
G 
G 
G 
G 
G 
G 
G 
G 
G 
G 
G 	6DDD{M:::::::::::s   A4FF"Frequest	error_msgc           
          t          dt                       | j        t          t          j        dt                                 |          }|S )Nvllm-vllm-batch-rJ   rK   rO   r*   rP   rQ   )rN   r$   r*   rH   r   BAD_REQUEST)r   r   batch_outputs      r<   make_error_request_outputr   l  sa     &";==""#"".4[]]44
 
 
   L r>   c                 &   K   t          | |          S rn   )r   )r   r   s     r<   make_async_error_request_outputr   {  s       %Wi888r>   serving_engine_functrackerc           
      &  K    | |j                    d {V }t          |t          t          t          t
          f          rFt          dt                       |j        t          |dt                                 d           }nvt          |t                    rPt          dt                       |j        t          |j        j        dt                                 |          }nt          |d          }|                                 |S )Nr   r   )r-   rK   r   r   z'Request must not be sent in stream moder   )r-   
isinstancer   r   r    r   rN   r$   r*   rH   r   rQ   coder   rp   )r   r   r   rP   r   s        r<   run_requestr     sG     
 )(66666666H	!2M>R  
 *&{}}&&'&*G*G*G   
 
 
 
Hm	,	, 
)&{}}&&'&$N/888   
 
 
 1H
 
 
 r>   c                     t          j                    }| j        j        x}r-||vr+t	          d| dd                    |           d          d S d S )Nzinvalid reasoning parser: z (chose from { ,z }))r#   list_registeredstructured_outputs_configreasoning_parserKeyErrorjoin)argsvalid_reasoning_parsersr   s      r<   validate_run_batch_argsr     s    4DFF :KK

"9
9
9F)9 F F!hh'>??F F F
 
 	

 

9
9r>   engine_clientr   c                   K   j         j         }nj        g}j        rt          j                  }nd }fd|D             }| j        }|                                  d {V }t                              d|           t          | |d           }d|v rAt          | |j        |d dj        j        j        j        t!          dd           
  
        nd }d	|v rt#          | ||d d
          nd }	d|v ot!          |j        dd          dk    }
d	|v s|
rt'          | ||d           nd }t)                      }t                              dj                   g }t-          j                   d {V                                                     d          D ]\}|                                }|st2                              |          }|j        dk    rk||j        nd }|%|                    t=          |d                     q|                    t?          |||                     |                                  |j        dk    rl|	|	j!        nd }|%|                    t=          |d                     |                    t?          |||                     |                                  !|j        "                    d          rm||j#        nd }|&|                    t=          |d                     n|                    t?          |||                     |                                  |j        "                    d          rm||j$        nd }|&|                    t=          |d                     |                    t?          |||                     |                                  /|                    t=          |d|j         d                     ^|%                                5  tM          j'        |  d {V }d d d            n# 1 swxY w Y   tQ          j)        |j*                   d {V  d S )N)max_log_lenc                 <    g | ]}t          |j                   S ))r   
model_path)r   model).0r   r   s     r<   
<listcomp>zrun_batch.<locals>.<listcomp>  s5       <@4DJ777  r>   zSupported tasks: %s)r   base_model_pathslora_modulesgenerateautodefault_chat_template_kwargs)request_loggerchat_templatechat_template_content_formatr   enable_prompt_tokens_detailsenable_force_include_usager   embed)r   r   r   classify
num_labelsr   rk   )r   score_templatezReading batch from %s...
r3   z/The model does not support Chat Completions APIr   r4   z)The model does not support Embeddings APIr5   z%The model does not support Scores APIr6   z%The model does not support Rerank APIzURL z was used. Supported endpoints: /v1/chat/completions, /v1/embeddings, /score, /rerank .See vllm/entrypoints/openai/api_server.py for supported score/rerank versions.)+served_model_namer   enable_log_requestsr   r   model_configget_supported_tasksr   r1   r   r   response_roler   r   r   r   getattrr   	hf_configr!   rc   
input_filer   r   splitr)   model_validate_jsonr,   create_chat_completionappendr   r   rl   create_embeddingr:   create_score	do_rerankr   r   gatherr   output_filer   )r   r   served_model_namesr   r   r   supported_tasksopenai_serving_modelsopenai_serving_chatopenai_serving_embeddingenable_serving_rerankingopenai_serving_scoresr   response_futuresrequest_jsonr   chat_handler_fnembed_handler_fnscore_handler_fnrerank_handler_fn	responsess    `                   r<   	run_batchr    s      )!3"j\ &43CDDD   DV   !-L)==????????O
KK%777 0#)  * (( 	!))/!;L)-)J'+'F)04d* *	
 	
 	
 	
 ! 6 o%% 	!))/	
 	
 	
 	
   	o% 	BL*L!<<A  &&*B& 	!)		
 	
 	
 	
   #$$G
KK*DO<<< =?(99999999@@BBHHNN W W#))++ 	#77EE ;000 '2 $:: 
 & ''3"S     ##K'$R$RSSS[,,, ,7 )99 
  ' ''3"M     ##K0@'7$S$STTT[!!(++ ,	 )4 &22 
  ' ''3"I     ##K0@'7$S$STTT[!!),, 	 )4 &// 
 !( ''3"I     ##K0A7G$T$TUUU##/-W[ - - -  	 	 	 	 
 < <!.*:;;;;;;;	< < < < < < < < < < < < < < < T%y$2E
F
FFFFFFFFFFs   P22P69P6c                    K   ddl m} ddlm} t	          |             || |j        d          4 d {V 	 }t          ||            d {V  d d d           d {V  d S # 1 d {V swxY w Y   d S )Nr   )build_async_engine_client)UsageContextF)usage_context disable_frontend_multiprocessing)"vllm.entrypoints.openai.api_serverr  vllm.usage.usage_libr  r   OPENAI_BATCH_RUNNERr  )r   r  r  r   s       r<   mainr  b  sA     LLLLLL111111D!!!(("6).   - - - - - - - - 
t,,,,,,,,,- - - - - - - - - - - - - - - - - - - - - - - - - - - - - -s   A""
A,/A,__main__z$vLLM batch processing API version %szargs: %szPrometheus metrics enabled)portaddrzPrometheus metrics disabled)`r   r   argparser   collections.abcr   r   httpr   ior   typingr   r	   r   r{   prometheus_clientr
   pydanticr   r   pydantic_core.core_schemar   r   vllm.engine.arg_utilsr   r   vllm.engine.protocolr   vllm.entrypoints.loggerr   0vllm.entrypoints.openai.chat_completion.protocolr   r   /vllm.entrypoints.openai.chat_completion.servingr   'vllm.entrypoints.openai.engine.protocolr   r   'vllm.entrypoints.openai.models.protocolr   &vllm.entrypoints.openai.models.servingr   'vllm.entrypoints.pooling.embed.protocolr   r   &vllm.entrypoints.pooling.embed.servingr   'vllm.entrypoints.pooling.score.protocolr   r   r   r    &vllm.entrypoints.pooling.score.servingr!   vllm.loggerr"   vllm.reasoningr#   
vllm.utilsr$   vllm.utils.argparse_utilsr%   vllm.versionr&   VLLM_VERSIONr?   r   r'   rD   r)   rH   rN   r^   ra   r   rc   rC   r   listr   boolr   r   r   r   r   r   r  r  r   r1   enable_metricsr  r,   runrF   r>   r<   <module>r>     s           / / / / / / / /             ! ! ! ! ! ! ! !   / / / / / / 1 1 1 1 1 1 1 1 4 4 4 4 4 4       @ @ @ @ @ @ @ @ - - - - - - 1 1 1 1 1 1        N M M M M M        B A A A A A F F F F F F W W W W W W W W I I I I I I            A @ @ @ @ @ # # # # # # 1 1 1 1 1 1 " " " " " " < < < < < < 4 4 4 4 4 4	X		 ,,|;mK y   
#I #I #I #I #I #I #I #IL       "       $K2 K K K K\0 0 0 q       6     //%)*<%=/	/ / / /2# 2S 2T 2d 2 2 2 2j&;&;%)*<%=&;OR&;	&; &; &; &;R+.   99+.99 9 9 9#!## "# 	# # # #L
 
 
mGmG
mG 
mG mG mG mG`-Y - - - - z:<<D
KK6EEE
KK
D!!!  30111tytx888881222GKT

 r>   