
    .`i)                     (   d dl Z d dlZd dlmZ d dlmZ d dlmZ d dlm	Z	 d dl
mZ d dlmZmZmZ d dlmZmZmZmZ d d	lmZmZ d d
lmZ d dlmZmZmZ d dlmZ d dl m!Z! d dl"m#Z# d dl$m%Z% d dl&m'Z' d dl(m)Z)  e!e*          Z+ G d de          Z,dS )    N)AsyncGenerator)Sequence)Request)EngineClient)RequestLogger)ChatCompletionLogProbChatCompletionLogProbsChatCompletionLogProbsContent)ErrorResponsePromptTokenUsageInfoRequestResponseMetadata	UsageInfo)OpenAIServingclamp_prompt_logprobs)OpenAIServingModels)GenerateRequestGenerateResponseGenerateResponseChoice)TokensPrompt)init_logger)Logprob)RequestOutput)SamplingParams)as_listc                       e Zd ZdZdddddddedededz  ded	ed
ededef fdZ	 dde	de
dz  deez  fdZde	deedf         dedededeez  fdZ	 ddee         deeeef         dz           dedz  defdZ xZS )ServingTokensz;Provides Tokens IN <> Tokens OUT functionality to vLLM API.F)force_no_detokenizereturn_tokens_as_token_idslog_error_stackenable_prompt_tokens_detailsenable_log_outputsengine_clientmodelsrequest_loggerNr   r   r   r    r!   c                    t                                          |||||           || _        || _        || _        |rt
                              d           d S d S )N)r"   r#   r$   r   r   zPTokens-only mode is enabled, skipping detokenization step for incoming requests.)super__init__r    r!   r   loggerinfo)
selfr"   r#   r$   r   r   r   r    r!   	__class__s
            y/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/entrypoints/serve/disagg/serving.pyr'   zServingTokens.__init__-   s     	')'A+ 	 	
 	
 	
 -I)"4#6  	KK.    	 	    requestraw_requestreturnc                 T  K   |                      |           d {V }|t                              d|           |S | j        j        r| j        j        d }|                     |d          }| j                            |          }d| 	                    ||j
                   }t          |          }|r||j        _        t          |j                  }|j        d |d<   t#          |d          r|j        
|j        |d<   d }		 |j        }
| j        rd	|
_        |                     |t          |j                  |
|
           |d n|                     |j                   d {V }| j                            ||
||||j                  }	n9# t6          $ r,}|                     t;          |                    cY d }~S d }~ww xY w	 |	J |                     ||	|||           d {V S # t6          $ r,}|                     t;          |                    cY d }~S d }~ww xY w)NzError with model %sT)supports_default_mm_loraszgenerate-tokens-)
request_id)prompt_token_idsmulti_modal_data
cache_saltF)paramslora_request)r8   trace_headerspriority)_check_modelr(   errorr"   errored
dead_error_maybe_get_adaptersr#   
model_name_base_request_idr3   r   staterequest_metadatar   	token_idsfeatureshasattrr6   sampling_paramsr   
detokenize_log_inputs_get_trace_headersheadersgenerater:   
ValueErrorcreate_error_responsestrserve_tokens_full_generator)r*   r.   r/   error_check_retr8   r@   r3   rC   engine_promptresult_generatorrG   r9   es                r,   serve_tokenszServingTokens.serve_tokensI   s     
 !% 1 1' : :::::::&LL.@@@""
 % 	0$////SW/XX[++L99
 Xt44['BTUUWW 	 3jIII 	B1AK. %g6GHHH'04M,-7L)) 	=g.@.L*1*<M,' HL	6%5O' 3-2*g.?@@@&)	     & 22;3FGGGGGGGG   $1::)+ )  ;      	6 	6 	6--c!ff55555555	6
	6#///99):zCS          	6 	6 	6--c!ff55555555	6s=   	B
F 
G
!G?G
G
"G1 1
H';!H"H'"H'rS   r3   r@   rC   c           
        K   t          t          j                              }d }|j        }	 |2 3 d {V }	|	}
6 n^# t          j        $ r |                     d          cY S t          $ r,}
|                     t          |
                    cY d }
~
S d }
~
ww xY w|J g }d}|j        D ]}|j	        }|j
        }|j
        r*|
J d            |                     |||j
                  }nd }t          |j        ||j        r|j        ndt          |j	                            }|                    |           |t#          |j	                  z  }|j        J t#          |j                  }|j        |t#          |j                  z  }t)          ||||z             }| j        r!|j        rt/          |j                  |_        ||_        t5          |||||t7          |j                  |j        	          }| j        ri| j        rb|D ]_}d }|j        t#          |j                  k     r|j        |j                 j	        }|r%| j                             |d
||j        dd           `|S )NzClient disconnectedr   zDid not output logprobs)rD   top_logprobsnum_output_top_logprobsstop)indexlogprobsfinish_reasonrD   )prompt_tokenscompletion_tokenstotal_tokens)cached_tokens)idcreatedmodelchoicesusageprompt_logprobskv_transfer_params F)r3   outputsoutput_token_idsr\   is_streamingdelta)!inttimerG   asyncioCancelledErrorrN   rM   rO   ri   rD   r[   _create_tokens_logprobsr   rZ   r\   r   appendlenr4   encoder_prompt_token_idsr   r    num_cached_tokensr   prompt_tokens_detailsfinal_usage_infor   r   rf   rg   r!   r$   log_outputs)r*   r.   rS   r3   r@   rC   created_time	final_resrG   resrT   rd   num_generated_tokensoutputrD   out_logprobsr[   choice_datanum_prompt_tokensre   responsechoicerj   s                          r,   rP   z)ServingTokens.serve_tokens_full_generator   sI      49;;''*.	*1*A	6-              c		 .-% 	E 	E 	E--.CDDDDD 	6 	6 	6--c!ff55555555	6 $$$02 ' 	: 	:F(I!?L '  #//1J///77'!-,;,D 8    0l!6<6JVf22PV!&"233	  K NN;''' C(8$9$99  )555	 :;;-9Y%G!H!HH+2*-AA
 
 

 , 	1L 	*>'9+ + +E' -2)# 1)2KLL(;
 
 
 " 	t': 	!  #' <#i&7"8"888'0'8'F'P$# 	'33#- ")9&,&:%*# 4    s)   ; 9; $B!	B*!BBBrD   rW   rX   c                   	 g }t          |          D ]\  }}d| 	||         }||                    |          $|                    t          	                     M||         }|                    t          	t	          |j        d          	fdt          |                                          D                                  t          |          S )zCreate OpenAI-style logprobs.z	token_id:N)token    c           	      z    g | ]7\  }}r0|k     t          t          |d          j        d                    8S )   r   )r   logprob)r   maxr   ).0iprX   r   s      r,   
<listcomp>z9ServingTokens._create_tokens_logprobs.<locals>.<listcomp>  sg     & & &
 !%16& <=?V;V;V 2&+(+AaDL'(B(B  
 <W;V;Vr-   )r   r   rW   )content)	enumerategetrr   r
   r   r   itemsr	   )
r*   rD   rW   rX   logprobs_contentr   token_idstep_top_logprobs
step_tokenr   s
      `     @r,   rq   z%ServingTokens._create_tokens_logprobs   s0    AC$Y// 	 	KAx***E ,Q (,=,A,A(,K,K,S ''1#      /x8
 ''1# #J$6 @ @& & & & &
 )22C2I2I2K2K(L(L& & &      &.>????r-   )N)__name__
__module____qualname____doc__r   r   r   boolr'   r   r   r   r   rU   r   r   rO   r   rP   GenericSequencerm   dictr   r	   rq   __classcell__)r+   s   @r,   r   r   *   s       EE %*+0 %-2#(  # $
 &, " %)  '+ !     > '+N6 N6 N6 t^N6 
M	)	N6 N6 N6 N6`` ` ))<=` 	`
 ` 2` 
)	)` ` ` `L /3	$@ $@"3'$@ &d3<&84&?@$@ "%t	$@
 
 $@ $@ $@ $@ $@ $@ $@ $@r-   r   )-ro   rn   collections.abcr   r   r   fastapir   vllm.engine.protocolr   vllm.entrypoints.loggerr   0vllm.entrypoints.openai.chat_completion.protocolr   r	   r
   'vllm.entrypoints.openai.engine.protocolr   r   r   r   &vllm.entrypoints.openai.engine.servingr   r   &vllm.entrypoints.openai.models.servingr   &vllm.entrypoints.serve.disagg.protocolr   r   r   vllm.inputs.datar   vllm.loggerr   vllm.logprobsr   vllm.outputsr   vllm.sampling_paramsr   vllm.utils.collection_utilsr   r   r(   r    r-   r,   <module>r      s  
   * * * * * * 7 7 7 7 7 7       - - - - - - 1 1 1 1 1 1         
            X W W W W W W W F F F F F F         
 * ) ) ) ) ) # # # # # # ! ! ! ! ! ! & & & & & & / / / / / / / / / / / /	X		u@ u@ u@ u@ u@M u@ u@ u@ u@ u@r-   