
    .`iqv                        d dl Z d dlZd dlmZmZ d dlmZ d dlmZ d dl	Z	d dl
mZ d dlmZ d dlmZ d dlmZmZmZmZmZmZ d d	lmZmZmZmZ d d
lmZmZmZ d dl m!Z! d dl"m#Z# d dl$m%Z%m&Z& d dl'm(Z( d dl)m*Z*m+Z+m,Z, d dl-m.Z. d dl/m0Z0 d dl1m2Z2 d dl3m4Z4m5Z5 d dl6m7Z7 d dl8m9Z9 d dl:m;Z; d dl<m=Z=  e.e>          Z? G d de          Z@dS )    N)AsyncGeneratorAsyncIterator)Sequence)cast)Request)EngineClient)RequestLogger)CompletionLogProbsCompletionRequestCompletionResponseCompletionResponseChoiceCompletionResponseStreamChoiceCompletionStreamResponse)ErrorResponsePromptTokenUsageInfoRequestResponseMetadata	UsageInfo)GenerationErrorOpenAIServingclamp_prompt_logprobs)OpenAIServingModels)RenderConfig)get_max_tokensshould_include_usage)VLLMValidationError)EmbedsPromptTokensPromptis_embeds_prompt)init_logger)Logprob)RequestOutput)BeamSearchParamsSamplingParams)TokenizerLike)merge_async_iterators)as_list)%validate_logits_processors_parametersc                       e Zd Zddddddedededz  deded	ed
ef fdZdede	e
ez           ez  fdZ	 d%dededz  deedf         ez  ez  fdZdede	e
ez           deeeef                  dedededededz  dedeedf         fdZde	e         dedededededz  dedefdZ	 	 d&dee         deeeef         dz           dededz  d ed!edz  defd"Z	 d%ded#edz  de fd$Z! xZ"S )'OpenAIServingCompletionF)return_tokens_as_token_idsenable_prompt_tokens_detailsenable_force_include_usagelog_error_stackengine_clientmodelsrequest_loggerNr*   r+   r,   r-   c                    t                                          |||||           | j        j        | _        || _        || _        | j                                        | _        d S )N)r.   r/   r0   r*   r-   )super__init__model_configlogits_processorsr+   r,   get_diff_sampling_paramdefault_sampling_params)	selfr.   r/   r0   r*   r+   r,   r-   	__class__s	           ~/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/entrypoints/openai/completion/serving.pyr3   z OpenAIServingCompletion.__init__4   sr     	')'A+ 	 	
 	
 	
 "&!2!D,H)*D''+'8'P'P'R'R$$$    requestreturnc                   K   |                      |           d{V }||S | j        j        r| j        j        |j        |                     d          S |j        r|j        |                     d          S |j        |j        |                     d          S 	 | 	                                }|
                    |j        |j        |                     |                     d{V }n^# t          t          t          t           j        f$ r9}t$                              d           |                     |          cY d}~S d}~ww xY w|S )z
        render completion request by validating and preprocessing inputs.

        Returns:
            A list of engine_prompts on success,
            or an ErrorResponse on failure.
        Nz!suffix is not currently supportedz'Echo is unsupported with prompt embeds.z5prompt_logprobs is not compatible with prompt embeds.)prompt_or_promptsprompt_embedsconfigz$Error in preprocessing prompt inputs)_check_modelr.   errored
dead_errorsuffixcreate_error_responseechor@   prompt_logprobs_get_completion_rendererrender_prompt_and_embedsprompt_build_render_config
ValueError	TypeErrorRuntimeErrorjinja2TemplateErrorlogger	exception)r8   r<   error_check_retrendererengine_promptses         r:   render_completion_requestz1OpenAIServingCompletion.render_completion_requestO   s      !% 1 1' : :::::::&""
 % 	0$// >%--.QRRR< 	YG1=--.WXXX".73H3T--G  		14466H#+#D#D").%30099 $E $ $      NN
 I|V5IJ 	1 	1 	1CDDD--a00000000	1 s   AC- -"E.E=EEraw_requestc                 	  $K   |                      |           d{V }t          |t                    r|S |}d|                     ||j                   }t          t          j                              }t          |          }|r||j        _	        	 | 
                    |          }nS# t          t          t          f$ r9}	t                              d           |                     |	          cY d}	~	S d}	~	ww xY w|                     |          }
g }	 t%          |          D ]\  }}|                     |          \  }}}d}|t)          |          }n|t)          |          }nt*          | j        i | _        t/          | j        ||| j                  }|j        r|                    || j                  }n;|                    || j        j        | j                  }t=          | j        |           | d| }|                      ||||           |dn| !                    |j"                   d{V }tG          tH          tJ          z  |          }t          |tL                    r| '                    |||||          }nR| (                    ||||||j)        |
	           d{V \  }}| j*        +                    ||||||j)        |||

	  	        }|,                    |           n,# t          $ r}	|                     |	          cY d}	~	S d}	~	ww xY wt[          | }| j.        /                    |          }t)          |          }|j0        o|j         }| j1        j2        }|r| 3                    |||||||||	  	        S dg|z  }	 |2 3 d{V \  }}|||<   6 t%          |          D ]C\  }} | J | j4        3||         }tk          |          rdn|6                    d          | _4        DtG          tn          tp                   |          }!| 9                    |!||||||          }"nx# tt          j;        $ r |                     d          cY S tx          $ r}	| =                    |	          cY d}	~	S d}	~	wt          $ r}	|                     |	          cY d}	~	S d}	~	ww xY w|j0        r8|">                                $dt~          t          df         f$fd}# |#            S |"S )aq  Completion API similar to OpenAI's API.

        See https://platform.openai.com/docs/api-reference/completions/create
        for the API specification. This API mimics the OpenAI Completion API.

        NOTE: Currently we do not support the following feature:
            - suffix (the language models we currently support do not support
            suffix)
        Nzcmpl-)
request_idz"Error preparing request components)max_model_lenr<   input_lengthr7   -)paramslora_request)rK   r[   r_   r`   trace_headers)r`   ra   prioritydata_parallel_rank)r`   ra   rb   prompt_texttokenization_kwargsrc   )num_prompts	tokenizerrequest_metadatarK   zClient disconnectedr=   c                 (   K   d  dW V  dW V  d S )Ndata: 

data: [DONE]

 )response_jsons   r:   fake_stream_generatorzHOpenAIServingCompletion.create_completion.<locals>.fake_stream_generatorD  s6      2}2222222(((((((r;   )ArX   
isinstancer   _base_request_idr[   inttimer   staterh   _maybe_get_adaptersrM   rN   rO   rR   rS   rF   _get_data_parallel_rank	enumerate_get_prompt_componentslenNotImplementedErrorr7   r   r\   use_beam_searchto_beam_search_paramsto_sampling_paramsr4   logits_processor_patternr'   r5   _log_inputs_get_trace_headersheadersr   r   r   r"   beam_search_process_inputsrb   r.   generateappendr%   r/   
model_namestreamrU   rg   completion_stream_generatorrK   r   getlistr!   %request_output_to_completion_responseasyncioCancelledErrorr   %_convert_generation_error_to_responsemodel_dump_jsonr   str)%r8   r<   rY   resultrV   r[   created_timerh   r`   rW   rc   
generatorsiengine_promptrd   prompt_token_idsr@   r]   
max_tokenssampling_paramsrequest_id_itemra   	generatorengine_requestre   result_generatorr   rf   r   rg   final_res_batchres	final_resfinal_res_batch_checkedresponsero   rn   s%                                       @r:   create_completionz)OpenAIServingCompletion.create_completion}   s      55g>>>>>>>>fm,, 	MUT22;@RSSUU
49;;''2jIII 	B1AK.	133G<<LLI|4 	1 	1 	1ABBB--a00000000	1
 "99+FF AC
\	1$-n$=$= Y- Y- =//>> =-}  $#/#&'7#8#8LL".#&}#5#5LL--/735D0+"&"4#!-,0,H	  
 * &-&C&C"D$@' 'OO '.&@&@")B4' 'O
 :.'  
 &0"5"5!"5"5  #!*!-	 !    #* D#66{7JKKKKKKKK  !%\L%@- P Po/?@@  $ 0 0,#-.%1&3 !1 ! !II AE@T@T'%'%1&3!(!1+= AU A A ; ; ; ; ; ;7N$7 !% 2 ; ;&''%1&3!(!1$/,?+= !< 
! 
!I !!),,,,sY-t  	1 	1 	1--a00000000	1 1*=[++L99
.)) ?(?$? M+	 	33 '#!1 4 
 
 
 8<f{6J"	1 0 ) ) ) ) ) ) )fa%("" !1 !*/ : :  9 ,,,
 #+$21$5M ,M::9*..x88 $ '+4+>&P&P#AA'  HH % 	E 	E 	E--.CDDDDD 	A 	A 	A==a@@@@@@@@ 	1 	1 	1--a00000000	1
 > 	+$4466M)T	1J ) ) ) ) ) ) )(***s   B+ +C;.C60C;6C;GK5 5
L?LLLP7 N'BP7 7$R,	R,&R :R, R,R'!R,'R,rV   r   r[   r   r   rf   rg   rh   c
                  K   |j         dn|j         }
dg|
z  |z  }dg|
z  |z  }dg|
z  |z  }dg|z  }d }d}|j        }t          || j                  \  }}	 |2 3 d {V \  }}|j        }|j        }|r	|j        }d}|j        }|.||         }t          |          rd n|	                    d          }|t          |          ||<   |j        D ]}|j        ||
z  z   }d }|j        J |j        rW||         sO|J |j        rd}|J |j        dk    r|}|}|}n$||j        z   }g ||j        }g |pg |j        pg }|}d||<   n8|j        }|j        }|j        }||         s|j        r|}d||<   |s|s	||         s|j        8|
J d            |                     |||j        |||         |j                  } nd } ||xx         t          |j                  z  cc<   ||xx         t          |j                  z  cc<   |j        }!|j        }"|                     |!|           t1          |||t3          ||| |!|"||j        rt5          |j                  nd 	          g
          }#|r*||         }$||         }%t7          |$|%|$|%z             |#_        |#                    d          }&d|& dW V  a6 t=          |          }'t=          |          }(t7          |'|(|'|(z             })| j        r|rtA          |          |)_!        |r4t1          |||g |)          }*|*                    dd          }+d|+ dW V  |)|	_"        n}# tF          $ r&},d| $                    |,           dW V  Y d },~,nRd },~,wtJ          $ rB},tL          '                    d           | (                    |,          }-d|- dW V  Y d },~,nd },~,ww xY wdW V  d S )N   r   FTrK    Did not output logprobs)	token_idstop_logprobsnum_output_top_logprobsrg   initial_text_offsetreturn_as_token_id)indextextlogprobsfinish_reasonstop_reasonr   r   )idcreatedmodelchoicesprompt_tokenscompletion_tokenstotal_tokens)exclude_unsetrj   rk   cached_tokens)r   r   r   r   usage)r   exclude_nonez%Error in completion stream generator.rl   ))nstream_optionsr   r,   r   rH   num_cached_tokensrK   r   r   ry   outputsr   r   rG   return_token_idsr   r   r   _create_completion_logprobsr*   r   r   _raise_if_errorr   r   r&   r   r   r   sumr+   r   prompt_tokens_detailsfinal_usage_infor   /_convert_generation_error_to_streaming_response	ExceptionrR   rS   create_streaming_error_response).r8   r<   rV   r   r[   r   r   rf   rg   rh   num_choicesprevious_text_lensprevious_num_tokens
has_echoednum_prompt_tokensr   first_iterationr   include_usageinclude_continuous_usage
prompt_idxr   r   rH   rd   r   outputr   prompt_token_ids_to_return
delta_textdelta_token_idsout_logprobsr   r   r   chunkr   r   rn   total_prompt_tokenstotal_completion_tokensr   final_usage_chunkfinal_usage_datarW   datas.                                                 r:   r   z3OpenAIServingCompletion.completion_stream_generatorL  s1      #9,aa')S;.< cK/+=W{*[8
C+-  /2FD;3
 3
//c	&)9 ~7 ~7 ~7 ~7 ~7 ~7 ~7oj##&#7 "%"5" ,(+(=%&+O!j&$2:$>M ,M::9*..x88   $/478H4I4I%j1
 "k f7 f7FzK'??A
 DH."-999| )%JqM )%/;;;"3 -*,K*666"-22)4J.>O+:LL *5v{)BJ/!1/!'!1/O,"1"7R,"(/"7R,L 6F2(,
1 &,[
*0*:'-  *!} 11I 19I6,0JqM !+%$3% %8$:% %'3+779R777#'#C#C&5)54;4D&/0B10E/6/Q $D $ $ $(&q)))S-=-==)))'***c&2B.C.CC***$*$8M"("4K((
CCC4% ,(:&'%/)1.;,71K (/'?%.GF,<$=$=$=)-  !	  E( 0 (9*(E,?,B)&/*7.?)69J)J' ' ' %*$9$9$9$N$NM6=66666666Mf71 *:@ #&&7"8"8&)*=&>&>#(1"903JJ      0 5F 9M"3: : : 6  6$<!($*% % %! $5#D#D"'d $E $ $  6/5555555 1A-- 	Y 	Y 	YX4OOPQRRXXXXXXXXXXXX 	& 	& 	&DEEE77::D%4%%%%%%%%%%%%	& !      s1   M J; K*M 
OM66O8O  Or   c                 ,   g }d}	d}
d }d }|D ]}|}|j         }|J t          |j                  }|j        }|j        D ]W}|                     |j        |           |j        J |j        rW|j	        rd}|J |j        dk    r|}|}|}nMg ||j
        }|j        d }n|J |j        J g ||j        }||j        z   }n|j
        }|j        }|j        }|j        1|
J d            |                     ||||j        |j                  }nd }t          t!          |          |||j        |j        |j        |j	        r|nd |j	        rt%          |j
                  nd           }|                    |           |
t!          |j
                  z  }
Y|	t!          |          z  }	t)          |	|
|	|
z             }| j        r#|r!|j        rt/          |j                  |_        ||_        |r|d         j        }t7          ||||||          S )	Nr   r   r   )r   r   rg   r   r   )r   r   r   r   r   rH   r   r   r   r   )r   r   r   r   r   kv_transfer_params)r   r   rH   rK   r   r   r   r   rG   r   r   r   r   r   r*   r   ry   r   r&   r   r   r+   r   r   r   r   r   r   )r8   r   r<   r[   r   r   rg   rh   r   r   num_generated_tokensr   last_final_resr   r   rH   rd   r   r   r   output_textr   choice_datar   s                           r:   r   z=OpenAIServingCompletion.request_output_to_completion_response  s    35 !( G	7 G	7I&N(9#///3I4MNNO#*K
 $+ ;> ;>$$V%9:FFF)555< ./ )&(&222)Q..$4	'6&1$J&6$J9I$J	"+3+/LL#2#>#>#>#)?#>#>#>,!0,!',L
 '2FK&? & 0I#)?L"(+K#/'335N333#??"+%1"+070@+2+M  @    HH  $H6g,,$%"("6 & 2$-$=,3,DN(($ 6=5MW 0111SW   {+++$F,<(=(==$$%5!6!66+2*-AA
 
 
 -		 0	
 +?,>+ + +E' -2) 	G!0!3!F! 1
 
 
 	
r;   r   r   r   r   r   r   c                 p    g }g }g }	g }
d}||n j         t          |          D ]z\  }}||         }|qrd| }n)t          ddd                              |          }|	                    |           |                    d           |
                    d           n||         }                     ||          }t          |j        d	          }|	                    |           |                    |           |
                     fd
t          |                                          D                        t          |          dk    r|                    |           n|                    |d         |z              t          |          }|t          |||	|
          S )z*Create logprobs for OpenAI Completion API.r   Nz	token_id:z:Unable to get tokenizer because `skip_tokenizer_init=True`skip_tokenizer_initT	parametervaluer       c                     i | ]J\  }}|k                         |d          |d                   t          |d          j        d          KS )r   r   r   r   )_get_decoded_tokenmaxlogprob).0r   top_lpr   r8   should_return_as_token_idrg   s      r:   
<dictcomp>zGOpenAIServingCompletion._create_completion_logprobs.<locals>.<dictcomp>  ss        &Av2a77 //"1I"1I%/H	 0  
 vay0'::777r;   )text_offsettoken_logprobstokensr   )r*   rw   r   decoder   r   r   r   itemsry   r
   )r8   r   r   r   rg   r   r   out_text_offsetout_token_logprobs
out_tokensout_top_logprobslast_token_lenr   token_idstep_top_logprobstoken
step_tokentoken_logprobr   s   `  ``             @r:   r   z3OpenAIServingCompletion._create_completion_logprobs~  sK    &(13 "
:< "- 0 	"
 %Y// 8	( 8	(KAx ,Q (, 7222EE (19&;"&	    &,,X66E!!%((("))$/// ''----.x8
//'@	 0   !$J$6 @ @!!%((("))-888 !''       *33D3J3J3L3L)M)M     ?##q((&&':;;;;&&r':^'KLLL ZZNN!'-)	
 
 
 	
r;   max_input_lengthc           	         |j         8|j         | j        k    r(t          d|j          d| j         dd|j                   | j        |j         pdz
  }t          ||j        |j        |j        t          |j        o|j	                             S )Nz'max_tokens' (z=) cannot be greater than the model's maximum context length (z).r   r   r   )
max_lengthtruncate_prompt_tokensadd_special_tokens
cache_saltneeds_detokenization)
r   r\   r   r   r  r  r	  boolrG   r   )r8   r<   r  max_input_tokens_lens       r:   rL   z,OpenAIServingCompletion._build_render_config  s     )g.@4CU.U.U%N!3 N N7;7IN N N&(	     $1W5G5L1M+#*#A&9)!%gl&S7;S7S!T!T
 
 
 	
r;   )N)r   N)#__name__
__module____qualname__r   r   r	   r  r3   r   r   r   r   r   rX   r   r   r   r   r   r   tuplerr   r!   r$   r   r   r   GenericSequencedictr    r
   r   r   rL   __classcell__)r9   s   @r:   r)   r)   3   s4        ,1-2+0 %S S S#S $S
 &,S %)S '+S %)S S S S S S S6,", 
l\)	*]	:, , , ,b '+M M"M t^M 
T		"%7	7-	G	M M M M^}!"}! \L89}! (c=.@(AB	}!
 }! }! }! }! !4'}! 2}! 
T		"}! }! }! }!~q
m,q
 #q
 	q

 q
 q
 !4'q
 2q
 
q
 q
 q
 q
r $%*.U
 U
"3'U
 &d3<&84&?@U
 "%	U

 !4'U
 !U
 !4KU
 
U
 U
 U
 U
t (,
 
"
 *
 
	
 
 
 
 
 
 
 
r;   r)   )Ar   rs   collections.abcr   r   r   r  typingr   rP   fastapir   vllm.engine.protocolr   vllm.entrypoints.loggerr	   +vllm.entrypoints.openai.completion.protocolr
   r   r   r   r   r   'vllm.entrypoints.openai.engine.protocolr   r   r   r   &vllm.entrypoints.openai.engine.servingr   r   r   &vllm.entrypoints.openai.models.servingr   vllm.entrypoints.rendererr   vllm.entrypoints.utilsr   r   vllm.exceptionsr   vllm.inputs.datar   r   r   vllm.loggerr   vllm.logprobsr    vllm.outputsr!   vllm.sampling_paramsr"   r#   vllm.tokenizersr$   vllm.utils.async_utilsr%   vllm.utils.collection_utilsr&   vllm.v1.sample.logits_processorr'   r  rR   r)   rm   r;   r:   <module>r)     s     9 9 9 9 9 9 9 9 7 7 7 7 7 7              - - - - - - 1 1 1 1 1 1                                   
 G F F F F F 2 2 2 2 2 2 G G G G G G G G / / / / / / I I I I I I I I I I # # # # # # ! ! ! ! ! ! & & & & & & A A A A A A A A ) ) ) ) ) ) 8 8 8 8 8 8 / / / / / / Q Q Q Q Q Q	X		w

 w

 w

 w

 w

m w

 w

 w

 w

 w

r;   