
    -`i              	          d Z ddlZddlZddlZddlmZ ddlmZ ddlm	Z	 ddl
mZmZ ddlmZmZmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlm Z  ddl!m"Z" ddl#m$Z$ ddl%m&Z& ddl'm(Z)  ed          Z* e            a+da,tV          -                    d          defd            Z.tV          /                    d          dedefd            Z0ede1dedefd            Z2dedefdZ3	 d8dededz  defd Z4	 d8dededz  d!e	ddfd"Z5e6d#k    r8 e$            Z7e78                    d$e9d%           e78                    d&e7j:        d'%           e78                    d(e9d%           e78                    d)e9d%           e78                    d*e9dd+,           e78                    d-d.d/d01           e78                    d2e; e;ej<                  d3,           e78                    d4e9dd5,           e78                    d6e9d7%            ej=        e7          Z7e7>                                Z? ej@         e5e?                     dS dS )9aR  
NOTE: This API server is used only for demonstrating usage of AsyncEngine
and simple performance benchmarks. It is not intended for production use.
For production use, we recommend using our OpenAI compatible server.
We are also not going to accept PRs modifying this file, please
change `vllm/entrypoints/openai/api_server.py` instead.
    N)	Namespace)AsyncGenerator)Any)FastAPIRequest)JSONResponseResponseStreamingResponse)AsyncEngineArgs)AsyncLLMEngine)
serve_http)with_cancellation)init_logger)SamplingParams)UsageContext)random_uuid)FlexibleArgumentParser)
set_ulimit)__version__zvllm.entrypoints.api_serverz/healthreturnc                  &   K   t          d          S )zHealth check.   status_code)r	        o/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/entrypoints/api_server.pyhealthr   (   s       $$$$r   z	/generaterequestc                 h   K   |                                   d{V }t          ||            d{V S )a%  Generate completion for the request.

    The request should be a JSON object with the following fields:
    - prompt: the prompt to use for the generation.
    - stream: whether to stream the results or not.
    - other fields: the sampling parameters (See `SamplingParams` for details).
    N)raw_request)json	_generate)r   request_dicts     r   generater%   .   sL       !''''''L<W==========r   r$   r!   c                 "  
K   |                      d          
|                      dd          }t          di | ddi}t                      }t          J t                              
||          dt
          t          d f         ffd}|rt           |                      S d }	 2 3 d {V }|}
6 n%# t          j	        $ r t          d	          cY S w xY w|J |j        

J 
fd
|j        D             }d|i}	t          |	          S )NpromptstreamF
skip_cloneTr   c                    K   2 3 d {V } | j         J fd| j        D             }d|i}t          j        |          dz                       d          W V  W6 d S )Nc                 $    g | ]}|j         z   S r   text.0outputr'   s     r   
<listcomp>z5_generate.<locals>.stream_results.<locals>.<listcomp>K   s     VVVVFV[0VVVr   r-   
zutf-8)r'   outputsr"   dumpsencode)request_outputtext_outputsretr'   results_generators      @r   stream_resultsz!_generate.<locals>.stream_resultsG   s      $5 	; 	; 	; 	; 	; 	; 	;.#*F%%%VVVV~?UVVVL<(C:c??T)11'::::::: %6$5$5s   Ai  r   c                 $    g | ]}|j         z   S r   r,   r.   s     r   r1   z_generate.<locals>.<listcomp>]   s     LLLVFV[(LLLr   r-   r   )popr   r   enginer%   r   bytesr
   asyncioCancelledErrorr	   r'   r3   r   )r$   r!   r(   sampling_params
request_idr:   final_outputr6   r7   r8   r'   r9   s             @@r   r#   r#   ;   s     h''Fh..F$EE|EEEEEOJLL;."= ; ; ; ; ; ;  3 !1!1222 L)$5 	* 	* 	* 	* 	* 	* 	*.)LL %6$5! ) ) )C(((((() ### FLLLL|7KLLLL<
 Cs   )B7 +B51B7 7CCargsc                 2    | j         t          _         t          S N)	root_pathapp)rD   s    r   	build_apprI   b   s     NCMJr   
llm_enginec                    K   t          |           }t          j        |           }||nt          j        |t
          j                  at          |j        _	        | |j        _
        |S )N)usage_context)rI   r   from_cli_argsr   from_engine_argsr   
API_SERVERr=   stateengine_clientrD   )rD   rJ   rH   engine_argss       r   init_apprS   i   sp       D//C "/55K ! 	
,|'>
 
 
  %CICINJr   uvicorn_kwargsc                   K   t                               dt                     t                               d|            t                       t	          | |           d {V }t
          J t          |fd | j        | j        | j	        | j
        t          j        | j        | j        | j        | j        d
| d {V }| d {V  d S )NzvLLM API server version %szargs: %s)
sockenable_ssl_refreshhostport	log_leveltimeout_keep_alivessl_keyfilessl_certfilessl_ca_certsssl_cert_reqs)loggerinfoVLLM_VERSIONr   rS   r=   r   rW   rX   rY   rZ   envsVLLM_HTTP_TIMEOUT_KEEP_ALIVEr\   r]   r^   r_   )rD   rJ   rT   rH   shutdown_tasks        r   
run_serverrf   ~   s      KK,l;;;
KK
D!!!LLLz**
*
*
*
*
*
*C$2YY.<$&&(         M r   __main__z--host)typedefaultz--porti@  z--ssl-keyfilez--ssl-certfilez--ssl-ca-certszThe CA certificates file)rh   ri   helpz--enable-ssl-refresh
store_trueFz5Refresh SSL Context when SSL certificate files change)actionri   rj   z--ssl-cert-reqsz@Whether client certificate is required (see stdlib ssl module's)z--root-pathz?FastAPI root_path when app is behind a path based routing proxyz--log-leveldebugrF   )A__doc__r?   r"   sslargparser   collections.abcr   typingr   fastapir   r   fastapi.responsesr   r	   r
   	vllm.envsrc   vllm.engine.arg_utilsr   vllm.engine.async_llm_enginer   vllm.entrypoints.launcherr   vllm.entrypoints.utilsr   vllm.loggerr   vllm.sampling_paramsr   vllm.usage.usage_libr   
vllm.utilsr   vllm.utils.argparse_utilsr   vllm.utils.system_utilsr   vllm.versionr   rb   r`   rH   r=   getr   postr%   dictr#   rI   rS   rf   __name__parseradd_argumentstr
check_portint	CERT_NONEadd_cli_args
parse_argsrD   runr   r   r   <module>r      s      



       * * * * * *       $ $ $ $ $ $ $ $ G G G G G G G G G G       1 1 1 1 1 1 7 7 7 7 7 7 0 0 0 0 0 0 4 4 4 4 4 4 # # # # # # / / / / / / - - - - - - " " " " " " < < < < < < . . . . . . 4 4 4 4 4 4	2	3	3gii	 %h % % % %
 +	>G 	> 	> 	> 	> 	> #$ #W # # # # #LI '     )- 
%    , :> 
!/$!6QT	   : z##%%F
sD999
v'8$GGG
c4@@@
(sDAAA
sD7Q     D	     CM""O	     N	     CAAA)_)&11FDGK

4  !!!!!? r   