
    -`i              	       B   d dl Z d dlZd dlZd dlmZ d dlmZ d dlZd dlm	Z	m
Z
mZ d dlmZ d dlmZ d dlmZmZ d dlmZ d d	lmZ d d
lmZ d dlmZmZ  ee          Z	 dde	dej        dz  dedefdZdej         defdZ!dej         defdZ"de	dej         ddfdZ#dS )    N)
HTTPStatus)Any)FastAPIRequestResponse)envs)EngineClient)H11_MAX_HEADER_COUNT_DEFAULT%H11_MAX_INCOMPLETE_EVENT_SIZE_DEFAULT)SSLCertRefresher)init_logger)find_process_using_port)EngineDeadErrorEngineGenerateErrorFappsockenable_ssl_refreshuvicorn_kwargsc           
        K   t                               d           | j        D ]X}t          |dd          }t          |dd          }||)t                               d|d                    |                     Y|                    dd          }|                    dd          }|t          }|t          }t          j	        | fi |}	||	_
        ||	_        |	                                 t          j        |	          }
t          | |
           t          j                    }|                    t%          |
| j        j                            |                    |
                    |r|gnd	                    |sdn&t-          |	j        |	j        |	j        |	j        
          dfd}dd}|                    t8          j        |           |                    t8          j        |           	  d{V   |                                             S # t          j         $ r |d         }tC          |          }|Bt           "                    d||d                    |#                                                     t                               d           |
$                                cY                                  S w xY w#                                  w xY w)z
    Start a FastAPI app using Uvicorn, with support for custom Uvicorn config
    options.  Supports http header limits via h11_max_incomplete_event_size and
    h11_max_header_count.
    zAvailable routes are:methodsNpathzRoute: %s, Methods: %sz, h11_max_incomplete_event_sizeh11_max_header_count)sockets)ssl_contextkey_path	cert_pathca_pathreturnc                                                                                           r                                 d S d S N)cancelstop)server_taskssl_cert_refresherwatchdog_tasks   m/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/entrypoints/launcher.pysignal_handlerz"serve_http.<locals>.signal_handlerT   sQ     	&##%%%%%	& 	&    c                  
   K   d S r!    r+   r)   r'   dummy_shutdownz"serve_http.<locals>.dummy_shutdown[   s      r)   portz7port %s is used by process %s launched with command:
%s z"Shutting down FastAPI HTTP server.)r   N)%loggerinforoutesgetattrjoinpopr   r
   uvicornConfigr   r   loadServer_add_shutdown_handlersasyncioget_running_loopcreate_taskwatchdog_loopstateengine_clientserver   sslssl_keyfilessl_certfilessl_ca_certsadd_signal_handlersignalSIGINTSIGTERMr"   CancelledErrorr   warningcmdlineshutdown)r   r   r   r   router   r   r   r   configserverloopr(   r,   r-   processr$   r%   r&   s                   @@@r'   
serve_httprR      s:      KK'((( H H%D11ufd++?dl,dDIIg4F4FGGGG %3$6$6'% %! *--.DdKK %,(M%#;^C22>22F+HF("6F
KKMMM^F##F3'''#%%D$$]639;R%S%STTM""6<<$8PD<#Q#QRRK "	

')'	
 
 
 & & & & & & & &    	FM>:::FNN;;;~ 	 ! ! ! !f%)$//NNJ**++	   	8999    ! 	s%   <H" "BK:K KK K*rO   enginec                 b   K   d}	 t          j        |           d{V  t          | |           +)z
    # Watchdog task that runs in the background, checking
    # for error state in the engine. Needed to trigger shutdown
    # if an exception arises is StreamingResponse() generator.
    g      @TN)r:   sleepterminate_if_errored)rO   rS   VLLM_WATCHDOG_TIME_Ss      r'   r=   r=   t   sK       -m0111111111VV,,,-r)   c                 V    |j         o|j         }t          j        s|rd| _        dS dS dS )a  
    See discussions here on shutting down a uvicorn server
    https://github.com/encode/uvicorn/discussions/1103
    In this case we cannot await the server shutdown here
    because handler must first return to close the connection
    for this request.
    TN)errored
is_runningr   VLLM_KEEP_ALIVE_ON_ENGINE_DEATHshould_exit)rO   rS   engine_erroreds      r'   rV   rV      sJ     ^=F,=(=N/ "N "!" " " "r)   r   c                     |                      t                    |                      t                    |                      t                    dt          ffd                                    }dS )a)  
    VLLM V1 AsyncLLM catches exceptions and returns
    only two types: EngineGenerateError and EngineDeadError.

    EngineGenerateError is raised by the per request generate()
    method. This error could be request specific (and therefore
    recoverable - e.g. if there is an error in input processing).

    EngineDeadError is raised by the background output_handler
    method. This error is global and therefore not recoverable.

    We register these @app.exception_handlers to return nice
    responses to the end user if they occur and shut down if needed.
    See https://fastapi.tiangolo.com/tutorial/handling-errors/
    for more details on how exception handlers work.

    If an exception is encountered in a StreamingResponse
    generator, the exception is not raised, since we already sent
    a 200 status. Rather, we send an error message as the next chunk.
    Since the exception is not raised, this means that the server
    will not automatically shut down. Instead, we use the watchdog
    background task for check for errored state.
    requestc                 |   K   t          | j        j        j                   t	          t
          j                  S )N)rO   rS   )status_code)rV   r   r>   r?   r   r   INTERNAL_SERVER_ERROR)r_   __rO   s     r'   runtime_exception_handlerz9_add_shutdown_handlers.<locals>.runtime_exception_handler   sD       	;$2	
 	
 	
 	

 J$DEEEEr)   N)exception_handlerRuntimeErrorr   r   r   )r   rO   rd   s    ` r'   r9   r9      s    2 	<((?++.//F F F F F F 0/ ,+ )(F F Fr)   )F)$r:   rF   sockethttpr   typingr   r5   fastapir   r   r   vllmr   vllm.engine.protocolr	   vllm.entrypoints.constantsr
   r   vllm.entrypoints.sslr   vllm.loggerr   vllm.utils.network_utilsr   vllm.v1.engine.exceptionsr   r   __name__r/   boolrR   r8   r=   rV   r9   r+   r)   r'   <module>rt      s                   . . . . . . . . . .       - - - - - -        2 1 1 1 1 1 # # # # # # < < < < < < J J J J J J J J	X		  %V V	V
-$
V V 	V V V Vr	- 	- 	- 	- 	- 	-
" 
" 
" 
" 
" 
""F "F "FD "F "F "F "F "F "Fr)   