
    -`is0              
       V   d dl Z d dlZd dlZd dlZd dlmZ d dlmZ d dlm	Z	 d dl
mZmZ d dlZd dlmZ d dlmZmZ d dlmZmZ d d	lmZ d d
lmZ d dlmZmZ d dlmZ d dl m!Z! erd dl"m#Z# d dl$m%Z% d dl&m'Z' d dl(m)Z) ne*Z#e*Z%e*Z'e*Z) ee+          Z,dZ-deddfdZ.d Z/defdZ0d Z1d Z2	 d1de3de3dz  de4e5ef         dz  de3dz  fdZ6de3ddde3d e4de3f
d!Z7d"eez  fd#Z8d$d%d&e9de:e9e9f         fd'Z;d(e<e)         d)e4e5e5f         dz  de<e)         fd*Z=d+e5de5fd,Z>d-ed.e5d/e5ddfd0Z?dS )2    N)	Namespace)Logger)Template)TYPE_CHECKINGAny)Request)JSONResponseStreamingResponse)BackgroundTaskBackgroundTasks)envs)
EngineArgs)current_formatter_typeinit_logger)current_platform)FlexibleArgumentParser)ChatCompletionRequest)CompletionRequest)StreamOptionsLoRAModulePatha  For full list:            vllm {subcmd} --help=all
For a section:            vllm {subcmd} --help=ModelConfig    (case-insensitive)
For a flag:               vllm {subcmd} --help=max-model-len  (_ or - accepted)
Documentation:            https://docs.vllm.ai
requestreturnc                    K   	 |                                   d{V }|d         dk    rQt          | j        j        dd          r4t	          | j        j        d          r| j        j        xj        dz  c_        dS x)	z+Returns if a disconnect message is receivedTNtypezhttp.disconnectenable_server_load_trackingFserver_load_metrics   )receivegetattrappstatehasattrr   )r   messages     j/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/entrypoints/utils.pylisten_for_disconnectr&   4   s      
))))))))6?/// !#@%  ;'++-BCC; !55:55E
    c                 F     t          j                    fd            }|S )a  Decorator that allows a route handler to be cancelled by client
    disconnections.

    This does _not_ use request.is_disconnected, which does not work with
    middleware. Instead this follows the pattern from
    starlette.StreamingResponse, which simultaneously awaits on two tasks- one
    to wait for an http disconnect message, and the other to do the work that we
    want done. When the first task finishes, the other is cancelled.

    A core assumption of this method is that the body of the request has already
    been read. This is a safe assumption to make for fastapi handlers that have
    already parsed the body of the request into a pydantic model for us.
    This decorator is unsafe to use elsewhere, as it will consume and throw away
    all incoming messages for the request while it looks for a disconnect
    message.

    In the case where a `StreamingResponse` is returned by the handler, this
    wrapper will stop listening for disconnects and instead the response object
    will start listening for disconnects.
    c                    K   t          |           dk    r| d         n|d         }t          j         | i |          }t          j        t          |                    }t          j        ||gt          j                   d {V \  }}|D ]}|                                 ||v r|                                S d S )Nr   raw_request)return_when)lenasynciocreate_taskr&   waitFIRST_COMPLETEDcancelresult)	argskwargsr   handler_taskcancellation_taskdonependingtaskhandler_funcs	           r%   wrapperz"with_cancellation.<locals>.wrapper[   s       !YY]]$q''}0E*<<+H+H+HII#/0Eg0N0NOO%l,-7;R
 
 
 
 
 
 
 
 
g  	 	DKKMMMM4&&(((tr'   	functoolswraps)r:   r;   s   ` r%   with_cancellationr?   C   s:    0 _\""    #"" Nr'   c                 :    | j         j        xj        dz  c_        d S )Nr   )r!   r"   r   )r   s    r%   decrement_server_loadrA   p   s!    K))Q.))))r'   c                 F     t          j                    fd            }|S )Nc                    K   |                     dt          |           dk    r| d         nd           }|t          d          t          |j        j        dd          s | i | d {V S t          |j        j        d          sd|j        j        _        |j        j        xj        dz  c_        	  | i | d {V }n)# t          $ r |j        j        xj        dz  c_         w xY wt          |t          t          f          r|j        t          t          |          |_        nt          |j        t                    r!|j                            t          |           nt          |j        t                    r_t                      } |j        |j        j        g|j        j        R i |j        j         |                    t          |           ||_        n|j        j        xj        dz  c_        |S )Nr*   r   z9raw_request required when server load tracking is enabledr   Fr   r   )getr,   
ValueErrorr    r!   r"   r#   r   	Exception
isinstancer	   r
   
backgroundr   rA   r   add_taskfuncr3   r4   )r3   r4   r*   responsetasksrJ   s        r%   r;   z load_aware_call.<locals>.wrapperu   sD     jj3t99q==QdSSK   {,.KUSS 	/t.v......... {,.CDD 	:89KO!511Q611	!T42622222222HH 	 	 	O!55:55	 h/@ ABB 	;"*&45JK&X&X##H/AA ,#,,-BKPPPPH/@@ 
, ())',(-   )0  
 4kBBB&+#O!55:55s   5C &C*r<   )rJ   r;   s   ` r%   load_aware_callrM   t   s9    _T) ) ) ) )V Nr'   c                  x    dt           j        vr+t                              d           dt           j        d<   d S d S )NVLLM_WORKER_MULTIPROC_METHODz/Setting VLLM_WORKER_MULTIPROC_METHOD to 'spawn'spawn)osenvironloggerdebug r'   r%   cli_env_setuprV      s?      &RZ77FGGG5<
1222 87r'   max_model_lentruncate_prompt_tokenstokenization_kwargsc                 z    |1|dk    r| }|| k    rt          d| d|  d          |
d|d<   ||d<   n|d|d<   |S )	Nztruncate_prompt_tokens value (z!) is greater than max_model_len (z,). Please, select a smaller truncation size.T
truncation
max_lengthF)rE   )rW   rX   rY   s      r%   _validate_truncation_sizer^      s    
 )!R''%2"!M11>1G > >2?> > >   *04-0F- *05-!!r'   z)ChatCompletionRequest | CompletionRequestinput_lengthdefault_sampling_paramsc           	          t          |dd           p|j        }| |z
  }t          j        |          }t	          d ||||                    d          fD                       S )Nmax_completion_tokensc              3      K   | ]}||V  	d S NrU   ).0vals     r%   	<genexpr>z!get_max_tokens.<locals>.<genexpr>   s4       	 	 ? 	 ???	 	r'   
max_tokens)r    rh   r   get_max_output_tokensminrD   )rW   r   r_   r`   rh   default_max_tokensmax_output_tokenss          r%   get_max_tokensrm      s     "94@@VGDVJ&5(>|LL 	 	 #''55	
	 	 	 	 	 	r'   r3   c                    ddl m} i }t          | t                    ry |t	                                }t          |                    g                                                     D ],\  }}|t          | |          k    rt          | |          ||<   -nt          | t                    rt          | j
                  }t          j        |           D ]<}t          | |j                  }t          ||j                  }	||	k    r
|||j        <   =|j
        t          j
        k    r
|j
        |d<   nt          d          t                              d|           d S )Nr   )make_arg_parser)modelrp   zDUnsupported argument type. Must be Namespace or EngineArgs instance.znon-default args: %s) vllm.entrypoints.openai.cli_argsro   rG   r   r   vars
parse_argsitemsr    r   rp   dataclassesfieldsname	TypeErrorrS   info)
r3   ro   non_default_argsparserargdefaultdefault_argsfieldcurrent_valdefault_vals
             r%   log_non_default_argsr      sw   @@@@@@ $	"" 
 !7!9!9:: !2!22!6!677==?? 	; 	;LC'$,,,,(/c(:(: %	;
 
D*	%	% 
!
333 '-- 	; 	;E!$
33K!,
;;Kk))/: ,!111(4(:W%R
 
 	
 KK&(899999r'   stream_optionszStreamOptions | Noneenable_force_include_usagec                 V    | r | j         p|}|ot          | j                  }n|d}}||fS )NF)include_usageboolcontinuous_usage_stats)r   r   r   include_continuous_usages       r%   should_include_usager     sT      T&4R8R#0 $
T16
 6
   3Me/222r'   args_lora_modulesdefault_mm_lorasc                 p    ddl m | }|r*fd|                                D             }| |}n||z  }|S )Nr   r   c                 0    g | ]\  }} ||           S ))rw   pathrU   )re   modality	lora_pathr   s      r%   
<listcomp>z(process_lora_modules.<locals>.<listcomp>  sF     !
 !
 !

 $)	 N  !
 !
 !
r'   )&vllm.entrypoints.openai.models.servingr   rt   )r   r   lora_modulesdefault_mm_lora_pathsr   s       @r%   process_lora_modulesr     s|     FEEEEE$L 2!
 !
 !
 !

 (8'='='?'?!
 !
 !
 $0LL11Lr'   r$   c                 .    t          j        dd|           S )Nz at 0x[0-9a-f]+>>)resub)r$   s    r%   sanitize_messager   *  s    6%sG444r'   lgrversion
model_namec                    t           j        st          |           x}d}nLt          d          }ddddd}|dk    rt                              |d	          }|                    |          }|                     |||           d S )
Nz(vLLM server version %s, serving model %su+  
       ${w}█     █     █▄   ▄█${r}
 ${o}▄▄${r} ${b}▄█${r} ${w}█     █     █ ▀▄▀ █${r}  version ${w}%s${r}
  ${o}█${r}${b}▄█▀${r} ${w}█     █     █     █${r}  model   ${w}%s${r}
   ${b}▀▀${r}  ${w}▀▀▀▀▀ ▀▀▀▀▀ ▀     ▀${r}
z[97;1mz[93mz[94mz[0m)wobrcolor )r   VLLM_DISABLE_LOG_LOGOr   r   dictfromkeys
substitutery   )r   r   r   	formatterr$   logo_templatecolorss          r%   log_version_and_modelr   /  s    ! 33I#3N3N&Ni%W< W
 
 	
 
 ]]62..F**622HHWgz*****r'   rd   )@r-   ru   r=   rQ   argparser   loggingr   stringr   typingr   r   regexr   fastapir   fastapi.responsesr	   r
   starlette.backgroundr   r   vllmr   vllm.engine.arg_utilsr   vllm.loggerr   r   vllm.platformsr   vllm.utils.argparse_utilsr   0vllm.entrypoints.openai.chat_completion.protocolr   +vllm.entrypoints.openai.completion.protocolr   'vllm.entrypoints.openai.engine.protocolr   'vllm.entrypoints.openai.models.protocolr   object__name__rS   VLLM_SUBCMD_PARSER_EPILOGr&   r?   rA   rM   rV   intr   strr^   rm   r   r   tupler   listr   r   r   rU   r'   r%   <module>r      s            				                   % % % % % % % %           = = = = = = = = @ @ @ @ @ @ @ @       , , , , , , ; ; ; ; ; ; ; ; + + + + + + < < < < < <                 GFFFFFF"MN 
X		7  T    * * *Z/7 / / / /- - -`= = =0 26" ""$J" c3h$." 	4Z	" " " "68  "	
 	   ,:y:5 : : : :<
3*
3HL
3
4:
3 
3 
3 
3N+?CCH~PT?T	.   *5c 5c 5 5 5 5
+v + + + + + + + + +r'   