
    .`iK                    T   d dl Z d dlZd dlZd dlmZmZ d dlmZ d dlm	Z	m
Z
 d dlZd dlZd dlZd dlmZ d dlmZ d dlmZ d dlmZ d d	lmZmZmZmZ d d
lmZ d dlm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z) d dl*m+Z+m,Z, d dl-m.Z.m/Z/m0Z0m1Z1m2Z2m3Z3m4Z4m5Z5 d dl6m7Z7m8Z8m9Z9 d dl:m;Z; d dl<m=Z=m>Z>m?Z?m@Z@mAZAmBZBmCZC d dlDmEZE d dlFmGZGmHZH d dlImJZJ d dlKmLZL d dlMmNZN d dlOmPZPmQZQ d dlRmSZSmTZT d dlUmVZV d dlWmXZXmYZYmZZZm[Z[ d dl\m]Z] d dl^m_Z_ d dl`maZa d dlbmcZc d dldmeZe  eLef          Zg G d d e8          ZhdS )!    N)AsyncGeneratorAsyncIterator)Sequence)AnyFinal)Request)Message)Allow)EngineClient)ChatTemplateContentFormatOptionConversationMessageget_history_tool_calls_cntmake_tool_call_id)RequestLogger)
ChatCompletionLogProbChatCompletionLogProbsChatCompletionLogProbsContent"ChatCompletionNamedToolChoiceParamChatCompletionRequestChatCompletionResponseChatCompletionResponseChoice"ChatCompletionResponseStreamChoiceChatCompletionStreamResponseChatMessage)
TokenStateextract_harmony_streaming_delta)DeltaFunctionCallDeltaMessageDeltaToolCallErrorResponsePromptTokenUsageInfoRequestResponseMetadataToolCall	UsageInfo)GenerationErrorOpenAIServingclamp_prompt_logprobs)OpenAIServingModels)get_developer_message%get_stop_tokens_for_assistant_actions#get_streamable_parser_for_assistantget_system_message%parse_chat_inputs_to_harmony_messagesparse_chat_outputrender_for_completion) maybe_filter_parallel_tool_calls)get_max_tokensshould_include_usage)TokensPrompt)init_logger)Logprob)CompletionOutputRequestOutput)BeamSearchParamsSamplingParams)TokenizerLike)MistralTokenizermaybe_serialize_tool_callstruncate_tool_call_idsvalidate_request_params)
ToolParser)MistralToolCall)partial_json_loads)as_list)%validate_logits_processors_parametersc            '           e Zd Zddddddddddddddededed	edz  d
edz  dedededededededz  dededededede	ee
f         dz  ddf& fdZdDdZdedeee         ee
         f         ez  fdZ	 dEdededz  deedf         ez  ez  fdZdedefdZedFd"edefd#            Zed$ed%edeeef         fd&            Z	 dEd%ed'edz  d$ed(ed)edz  deedz  ef         fd*Zded+ee         d,ed-ed.ee         d/e dz  d0e!deedf         fd1Z"ded+ee         d,ed-ed.ee         d/e dz  d0e!deez  fd2Z#d3e	ee$f         d4edz  d/e dz  d5edee%         f
d6Z&	 	 dGd7e'e         d4e'e	ee$f         dz           d/e dz  d8edz  d9edz  de(fd:Z)defd;Z*d<edz  d=e+defd>Z,ed<ed?ed@edefdA            Z-	 dHdedBefdCZ. xZ/S )IOpenAIServingChatF NT)trust_request_chat_templatereturn_tokens_as_token_idsreasoning_parserenable_auto_tools#exclude_tools_when_tool_choice_nonetool_parserenable_prompt_tokens_detailsenable_force_include_usageenable_log_outputsenable_log_deltaslog_error_stackdefault_chat_template_kwargsengine_clientmodelsresponse_rolerequest_loggerchat_templatechat_template_content_formatrG   rH   rI   rJ   rK   rL   rM   rN   rO   rP   rQ   rR   returnc                   t                                          |||||           || _        || _        || _        || _        |pi | _        || _        || _        | j	        j
        | _
        |                     |	          | _        |
| _        |                     ||
          | _        || _        || _        || _        | j	                                        | _        | j	        j        j        dk    rd| _        nd| _        | j	        j        j        dk    | _        | j        r?d| j        vr
g | j        d<   | j        d                             t5                                 d| _        d | _        d| _        d | _        d S )	N)rS   rT   rV   rH   rQ   )reasoning_parser_name)tool_parser_namerJ   kimi_k2randomgpt_ossstop_token_idsF)super__init__rU   rW   rX   rG   rR   rO   rP   model_configlogits_processors_get_reasoning_parserrI   rJ   _get_tool_parserrL   rK   rM   rN   get_diff_sampling_paramdefault_sampling_params	hf_config
model_typetool_call_id_typeuse_harmonyextendr*   supports_browsingbrowser_toolsupports_code_interpreterpython_tool)selfrS   rT   rU   rV   rW   rX   rG   rH   rI   rJ   rK   rL   rM   rN   rO   rP   rQ   rR   	__class__s                      /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/entrypoints/openai/chat_completion/serving.pyrb   zOpenAIServingChat.__init__[   s   , 	')'A+ 	 	
 	
 	
 +*3O)+F(,H,NB)"4!2 "&!2!D !% : :"2 !; !
 !
 (900(<M 1 
 
 4W0,H)*D''+'8'P'P'R'R$&1Y>>%.D""%-D",6AYN 	t'CCCAC,-=>()9:AA577   "'  */&    c                   K   t                               d           t          j                    }	 | j        j        }t          dddgdd          }|                     |||j        | j	        | j
        dd	ddd| j        dd	
           d{V  t          j                    |z
  dz  }t                               d|           dS # t          $ r t                               d           Y dS w xY w)a$  
        Warm up the chat template processing to avoid first-request latency.

        This method triggers Jinja2 template compilation and content format
        detection that would otherwise happen on the first real request,
        causing increased latency on the first request.
        z&Warming up chat template processing...userwarmuprolecontentN   )messagesmodelmax_completion_tokensTF
rW   rX   add_generation_promptcontinue_final_message
tool_dicts	documentschat_template_kwargsrR   rL   add_special_tokensi  z(Chat template warmup completed in %.1fmszChat template warmup failed)loggerinfotimeperf_counterrS   rendererr   _preprocess_chatr}   rW   rX   rR   	Exception	exception)rr   
start_timer   dummy_requestelapseds        rt   rx   zOpenAIServingChat.warmup   sA      	<===&((
$	<)2H 2#)h??@&'  M ''&"0-1-N&*',%)-1-N #( (           (**Z74?GKKBGLLLLL 	< 	< 	<:;;;;;;	<s   BC $C-,C-requestc                   K   |                      |           d{V }|t                              d|           |S | j        j        r| j        j        	 | j        j        }|j        }| j        }t          |t                    r-t          |           t          |           t          |           |du ot          |t                     o| j         }|rY|j        dvrP|j        dk    r| j        s|                     d          S |j        dk    r|                     d|j         d          S |j        |j        dk    r
| j        rd}nd	 |j        D             }| j        s|                     |j        |j        | j        
          }||S |j        pi }|                    |j                   |                     |||j        |j        p| j        | j        |j        |j        ||j        || j         ||j!                   d{V \  }	}
n|du}| "                    ||          \  }	}
n^# tF          tH          tJ          tL          j'        f$ r9}t          (                    d           |                     |          cY d}~S d}~ww xY w|	|
fS )z
        render chat request by validating and preprocessing inputs.

        Returns:
            A tuple of (conversation, engine_prompts) on success,
            or an ErrorResponse on failure.
        NzError with model %s)NnoneautozV"auto" tool choice requires --enable-auto-tool-choice and --tool-call-parser to be setztool_choice="z'" requires --tool-call-parser to be setr   c                 6    g | ]}|                                 S  )
model_dump).0tools     rt   
<listcomp>z9OpenAIServingChat.render_chat_request.<locals>.<listcomp>  s"    JJJDdoo//JJJru   )request_chat_templater   rG   )reasoning_effortr   z$Error in preprocessing prompt inputs))_check_modelr   errorrS   errored
dead_errorr   	tokenizerrL   
isinstancer;   r<   r=   r>   rl   tool_choicerJ   create_error_responsetoolsrK   _validate_chat_templaterW   r   rG   updater   r   r}   rX   r   r   r   rR   r   _make_request_with_harmony
ValueError	TypeErrorRuntimeErrorjinja2TemplateErrorr   )rr   r   error_check_retr   r   rL   tool_parsing_unavailabler   r   conversationengine_promptsshould_include_toolses                rt   render_chat_requestz%OpenAIServingChat.render_chat_request   sj      !% 1 1' : :::::::&LL.@@@""
 % 	0$//T	1)2H *I*K)%566 1 +7333&w///'000 t# )"9.>???)(( % ( G,? H - - &&009O0  55U   (F22557(; 7 7 7  
 }$#v--< . "

JJGMJJJ
# !"&">">*1*?)0)E040P #? # #
 #.**'.'C'Ir$$++W=U+VVV595J5J$")"7"M4;M151R*1*G+2+I)%/)=151R +'.'A 6K 6 6 0 0 0 0 0 0,nn" (2'=$/3/N/N10 0,n I|V5IJ 	1 	1 	1CDDD--a00000000	1 ^++s3   B/H+ (H+ /AH+ BH+ +"J.J;JJraw_requestc                 r  K   |                      |           d{V }t          |t                    r|S |\  }}d|                     ||j                   }t          |          }|r||j        _        	 |                     |d          }| j	        
                    |          }	nS# t          t          t          f$ r9}
t                              d           |                     |
          cY d}
~
S d}
~
ww xY w|                     |          }g }	 t%          |          D ]\  }}|                     |          \  }}}t)          |          dk    r|n| d| }| j        i | _        t-          | j        |t)          |d	                   | j        
          }|j        r|                    || j                  }n;|                    || j        j        | j                  }t;          | j        |           |                     ||||           |dn|                      |j!                   d{V }t          |tD                    r| #                    |||||          }nR| $                    ||||||j%        |           d{V \  }}| j&        '                    ||||||j%        |||	  	        }|(                    |           n,# t          $ r}
|                     |
          cY d}
~
S d}
~
ww xY wt)          |          dk    sJ |\  }| j)        j*        }|j+        r| ,                    ||||	|||          S 	 | -                    ||||	|||           d{V S # t\          $ r}
| /                    |
          cY d}
~
S d}
~
wt          $ r}
|                     |
          cY d}
~
S d}
~
ww xY w)z
        Chat Completion API similar to OpenAI's API.

        See https://platform.openai.com/docs/api-reference/chat/create
        for the API specification. This API mimics the OpenAI
        Chat Completion API.
        Nz	chatcmpl-)
request_idT)supports_default_mm_lorasz"Error preparing request componentsr|   _prompt_token_ids)max_model_lenr   input_lengthrh   )paramslora_request)promptr   r   r   trace_headers)r   r   prioritydata_parallel_rank)r   r   r   prompt_texttokenization_kwargsr   )0r   r   r    _base_request_idr   r"   staterequest_metadata_maybe_get_adaptersrT   
model_namer   r   r   r   r   r   _get_data_parallel_rank	enumerate_get_prompt_componentslenrh   r1   r   use_beam_searchto_beam_search_paramsto_sampling_paramsrc   logits_processor_patternrC   rd   _log_inputs_get_trace_headersheadersr8   beam_search_process_inputsr   rS   generateappendr   r   stream chat_completion_stream_generatorchat_completion_full_generatorr%   %_convert_generation_error_to_response)rr   r   r   resultr   r   r   r   r   r   r   r   
generatorsiengine_promptr   r   sub_request_id
max_tokenssampling_paramsr   	generatorengine_requestr   result_generatorr   s                             rt   create_chat_completionz(OpenAIServingChat.create_chat_completionH  sU      //88888888fm,, 	M'-$n Q--k7;MNNPP 	 3jIII 	B1AK.	1334 4  L //==JJI|4 	1 	1 	1ABBB--a00000000	1
 "99+FF AC
Q	1$-n$=$= N- N- =$($?$?$N$N!Q #&n"5"5":":JJ:@S@SPQ@S@S  /735D0+"&"4#!$]3E%F!G!G,0,H	  
 * &-&C&C"D$@' 'OO '.&@&@")B4' 'O
 :.'  
   "!*!-	 !    #* D#66{7JKKKKKKKK  o/?@@  $ 0 0,#1.%1&3 !1 ! !II AE@T@T&%'%1&3!(!1+= AU A A ; ; ; ; ; ;7N$7 !% 2 ; ;&'&%1&3!(!1$/,?+= !< 
! 
!I !!),,,,]N-^  	1 	1 	1--a00000000	1 :!####(	 M+	> 		88    	1<<            	A 	A 	A==a@@@@@@@@ 	1 	1 	1--a00000000	1sm   71B) )C9 .C4.C94C9F<K 
K:K5/K:5K: M& &
N60N
N6
N6N1+N61N6c                 D    |j         r| j        S |j        d         d         S )Nrz   )r   rU   r}   rr   r   s     rt   get_chat_request_rolez'OpenAIServingChat.get_chat_request_role  s)    ( 	&%%#F++ru   {}sc                 B    d}| D ]}||k    r|dz  }||k    r|dz  }|S )zS
        Calculate the current level of nested brackets in a given string.
        r   r|   r   )r   openingclosinglevelchars        rt   _bracket_levelz OpenAIServingChat._bracket_level  sD    
  	 	Dw

ru   
delta_textprevious_textc                     t                               |          }d\  }}| D ]9}|dk    r|dz  }|dk    }n|dk    r|dz  }|dk    }|dk    r||z  }1|dk    r n:||fS )N)rF   Fr   r|   r   r   ,)rE   r   )r   r   bracket_levelupdated_deltapassed_zerocs         rt   _filter_delta_textz$OpenAIServingChat._filter_delta_text  s     *88GG%."{ 	 	ACxx"+q0c"+q0!!" 88E k))ru   current_textfunction_name_returnedtool_call_idxc           	      &   ||dk    rd |fS 	 t           j        }t          ||          \  }}nG# t          j        j        j        t          j        f$ r t          
                    d           d }Y nw xY w|(t          |t                    rt          |          dk    sd}d }	njt                              ||          \  }}
|d         }|
sd|vsd|vrd}d }	n4|st!          j        d|t           j                  }|r|                    d	          nd}t                              ||          \  }}|
rd|vr|d
         }d}t)          | j        |d         |          }t-          t/          |t1          |d         |          t          |          d	z
  d          g          }	nft                              ||          \  }}|dk    r@t-          t/          t1          d |          t          |          d	z
            g          }	nd }	|	|fS )NrF   z(not enough tokens to parse into JSON yetr   Fr   name
parametersz.*"parameters":\s*(.*)r|   Tid_type	func_nameidxr  	argumentsfunction)idr  indextype
tool_callsr  r  )r
   ALLrA   partial_json_parsercore
exceptionsMalformedJSONjsonJSONDecodeErrorr   debugr   listr   rE   r   researchDOTALLgroupr   rk   r   r   r   )rr   r   r   r   r   r  flagsobjr   delta_messagefinishes_previous_toolcurrent_tool_callparam_matchr  tool_call_ids                  rt   $extract_tool_call_required_streamingz6OpenAIServingChat.extract_tool_call_required_streaming  s    <2#5#5///	IE'e<<FC$/= 
 	 	 	 LLCDDDCCC	 ;jd33;3s88a<<%*" MM(9(L(LM) )%A% !$B * =-///<GX3X3X).& $- 7-"$)1<# #K 9D K 1 1! 4 4 4I#4#G#G!=$ $LIq . 4,FW2W2W,/G)-1*#4 $ 6"3F";)$ $ $L
 %1)#/):):6)Bi*" *" *" '*#hhl%/  	$% % %MM %6$H$H"M% %MJ "R''(4 --> .22<	.& .& .& +.c((Q,!" !" !"
() ) ) )-444s   . AA21A2r   r   r   r   r   r   c                  K   t          t          j                              }d}	d}
|j        dn|j        }dg|z  }dg|z  }d}d }| j        rd t	          |          D             }dg|z  }dg|z  }t          |j        t                    r|j        j        j	        }nd }| o| 
                    |          }dg|z  }| j        dk    rt          |          }nd}dg|z  }|s| j        rg g|z  }dg|z  }dg|z  }nd }	 | j        rH|t          d	          |                     |j        | j                  }|                     ||
          }nb# t$          $ rU}t&                              d           |                     t-          |                    }d| dW V  dW V  Y d }~d S d }~ww xY w	 |r2| j        r+|t          d	          |                     |          g|z  }nd g|z  }nU# t0          $ rH}t&                              d           |                     |          }d| dW V  dW V  Y d }~d S d }~ww xY w|j        } t5          | | j                  \  }!}"	 |2 3 d {V }#|#j        2t;          |#j                  }|#j        |t;          |#j                  z  }|
rt|#j        }|                      |          }$t	          |          D ]}%tC          |%tE          |$d          d d           }&tG          ||	||&g||j$        r|#j        nd           }'|"rtK          |d|          |'_&        |''                    d          }d| dW V  |j(        rd}(|r9d|d         v r/|d         )                    d          |$k    r|d         d         pd}(|(rt	          |          D ]p}%tC          |%tE          |(          d d           }&tG          ||	||&g|          }'|"rtK          |d|          |'_&        |''                    d          }d| dW V  qd}
|#j*        D 	]E})|)j+        }%||%         }*||%         r|j,        rG|j-        @|)j,        
J d            | .                    |)j/        |)j,        ||j-        |j0                  }+nd }+| j        r||%         },|,j1        }-g }.|)j/        D ]N}/|,2                    |/           |,j3        pd}0|.4                    tk          |,j6        |,j1        |0                     Od7                    d |.D                       }1|,j6        }2|2s|1rd}2n|)j8        }1|1s|)j/        s
||%         s |s| j        rK|J |J ||%         }3||%         }4|3|1z   }5|4r|4ts          |)j/                  z   }6nts          |)j/                  }6| j        r-tu          |,|.|-|j;                  \  }7}8||%xx         |8z  cc<   n|r2| j        r||%         s|<                    |4          s|J |=                    |3|5|1|4|6|)j/                  }7|<                    ts          |)j/                            s!|#j        r:|<                    |#j                  r d||%<   |7r|7j>        r|7j>        }5d |7_>        nd}5n| j        r|3|1z   }1d}5||%         r t          t          |1          |%           }9n3t          t                      d!t          ||1"          |%#          }9d||%<   tE          |9g$          }7d||%<   n|j        d%k    r|J ||%         }3|3|1z   }5||%         }:ts          |)j/                  };| j        .||%         s&|#j        r|<                    |#j                  rd||%<   | j        rY||%         sQ|=                    |3|5|1|4|6|;          }7|<                    |;          r d||%<   |7r|7j>        r|7j>        }5d |7_>        nd}5n|5}<| B                    |3|<|1|:|&          \  }7||%<   |7r#|7jC        r|7jC        d         jD        
|dz  }d||%<   nz|r
| j        r|*J |J |J |J ts          |)j/                  };||%         s|#j        r"|<                    |#j                  rd||%<   |;}6nc|=                    |3|5|1|4|6|;          }7|<                    |;          r4d||%<   |E                    |;          }6|7r|7j>        r|7j>        }5d |7_>        nd}5||%         rA|;}=||%         sd||%<   d}3g }4|5}1|6}=|*F                    |3|5|1|4|6|=|'          }7|7r|7jC        rd||%<   nm|r4|*J |*F                    |3|5|1|4|6|)j/        |'          }7|7r|7jC        rd||%<   n7| j        r |=                    |3|5|1|4|6|)j/                  }7ntE          |1          }7|s| j        r| j        s|J |J |5||%<   |6||%<   n|J ||%xx         |1z  cc<   ||%xx         t;          |)j/                  z  cc<   |7|)jG        	|j$        stE                      }7| jH        r| jI        rg }>|7j>        r|>4                    |7j>                   |7jJ        r |7jJ        }?|>4                    d(|? d)           |7jC        r?d7                    d* |7jC        D                       }@|@r|>4                    d+|@ d)           |>rS| jK        rLd,7                    |>          }A| jI        L                    ||Ats          |)j/                  |)jG        dd-           |)jG        1tC          |%|7|+d |j$        rts          |)j/                  nd .          }&n| M                    |)jG        |           d}B|*r4t;          |*jN                  dk    }B|Brt;          |*jN                  dz
  nd}Cnd}C| O                    |7|)          r|*rd}Dt          |7jC        d         j        t                    rNt          |7jC        d         j        jP        t,                    r$t;          |7jC        d         j        jP                  }Dt          jR        |*jN        |C         )                    d/i           d0          }E|*jS        |C         }F|Ddk    r|Fd |D          }F|ET                    |Fdd          }G| U                    |7|G|C          }7|Bs||%         r|r| j        r||%         rd1}Hn|)jG        r|)jG        nd2}HtC          |%|7|+|H|)jV        |j$        rts          |)j/                  nd 3          }&d||%<   t          |&|          }&tG          ||	||&g|          }'|"r"||%         }ItK          ||I||Iz             |'_&        |''                    d          }d| dW V  	G6 |!rwt          |          }ItK          ||I||Iz             }J| jY        r|rt          |4          |J_[        tG          ||	|g ||J5          }K|K'                    dd6          }Ld|L dW V  t          |          }MtK          ||M||Mz             |_\        | jH        rb| jI        r[t	          |          D ]K}%|r|%t;          |          k     r||%         nd7||%          d8}N| jI        L                    ||Nd d9dd-           Ln}# t          $ r&}d| ^                    |           dW V  Y d }~nRd }~wt0          $ rB}t&                              d:           |                     |          }d| dW V  Y d }~nd }~ww xY wdW V  d S );Nzchat.completion.chunkTr|   r   Fc                 *    g | ]}t                      S r   )r+   )r   r   s     rt   r   zFOpenAIServingChat.chat_completion_stream_generator.<locals>.<listcomp>  s,       :;355  ru   r]   rF   7Tokenizer not available when `skip_tokenizer_init=True`r   #Error in reasoning parser creation.zdata: z

zdata: [DONE]

zError in tool parser creation.ry   )r  deltalogprobsfinish_reason)r  objectcreatedchoicesr~   r   prompt_tokenscompletion_tokenstotal_tokens)exclude_unsetr{   r   rz   r{   )r  r0  r1  r2  r~   Did not output logprobs)	token_idstop_logprobsr   num_output_top_logprobsreturn_as_token_idc              3   "   K   | ]
\  }}}|V  d S Nr   )r   r   r-  s      rt   	<genexpr>zEOpenAIServingChat.chat_completion_stream_generator.<locals>.<genexpr>\  s(      ,S,S{q!UU,S,S,S,S,S,Sru   final)harmony_parsertoken_statesprev_recipientinclude_reasoning)r  r  r  r
  )r  r  r  r  r  required)r   r   r   r   r  )r   r   r   previous_token_idscurrent_token_idsdelta_token_idsr   z[reasoning: ]c              3   T   K   | ]#}|j         	|j         j        |j         j        V  $d S r?  )r  r  r   tcs     rt   r@  zEOpenAIServingChat.chat_completion_stream_generator.<locals>.<genexpr>  sR       0 0$&#%;0 46;3H0 " 50 0 0 0 0 0ru   [tool_calls:  r   outputsoutput_token_idsr/  is_streamingr-  )r  r-  r.  r/  r:  r  )ensure_asciir  stop)r  r-  r.  r/  stop_reasonr:  cached_tokens)r  r0  r1  r2  r~   usage)r7  exclude_nonez<streaming_complete: z tokens>streaming_completez*Error in chat completion stream generator.)_intr   nrl   ranger   r   r   r  r  %_should_stream_with_auto_tool_parsingrk   r   rI   r   #_prepare_extra_chat_template_kwargsr   rR   r   r   r   create_streaming_error_responsestrrL   r   stream_optionsr2   rN   r   r   encoder_prompt_token_idsnum_cached_tokensr   r   r   r   return_token_idsr$   rY  model_dump_jsonechogetrQ  r  r.  r;  _create_chat_logprobsr:  rH   current_recipientprocesslast_content_deltar   r   current_channeljointextrB   r   rE  is_reasoning_endextract_reasoning_streamingr{   r   r   r   r'  r  r  extract_content_idsextract_tool_calls_streamingr/  rO   rV   reasoning_contentrP   log_outputs_raise_if_errorprev_tool_call_arr,_should_check_for_unstreamed_tool_arg_tokensr  r  dumpsstreamed_args_for_toolreplace_create_remaining_args_deltarV  r0   sumrM   r!   prompt_tokens_detailsfinal_usage_infor%   /_convert_generation_error_to_streaming_response)Orr   r   r   r   r   r   r   r   created_timechunk_object_typefirst_iterationnum_choicesprevious_num_tokensfinish_reason_sentnum_prompt_tokensre  harmony_parsersharmony_tools_streamedtools_streamedtool_choice_function_nametool_choice_autor   history_tool_call_cntprevious_textsall_previous_token_idsadded_content_delta_arrreasoning_end_arrr   rI   r   datatool_parsersrc  include_usageinclude_continuous_usageresrz   r   choice_datachunklast_msg_contentoutputrL   r.  rB  rD  rC  token_idtoken_deltar   cur_channelr   rG  r   rH  r"  tools_streamed_flagdelta_tool_callfn_name_returnedrR  r{   rI  delta_content_parts	reasoning	tool_argsdelta_contentauto_tools_calledr  latest_delta_lenexpected_callactual_callremaining_callfinish_reason_r5  final_usagefinal_usage_chunkfinal_usage_datanum_completion_tokens	full_textsO                                                                                  rt   r   z2OpenAIServingChat.chat_completion_stream_generatort  s      49;;''#: #9,aa') cK/#W{2  	; ?D[?Q?Q  O ',W{%:";.g)+MNN 	-(/(;(D(I%%(,% *) D::7CC 	 #(;!6!Y..$>|$L$L!!$%! +  	*t4 	*&(TK%7"',g&;#!&+ 5%)"	$ $$Q  
 (,'O'O05( ($ $(#8#8)= $9 $ $   	 	 	BCCC77A??D%4%%%%%%%$$$$$FFFFF		 
4D$4 
4$$Q  
 $$Y//99  !%v3 	 	 	=>>>77::D%4%%%%%%%$$$$$FFFFF	 !/2FD;3
 3
//E
	&- H	. H	. H	. H	. H	. H	. H	.c'3(+C,@(A(A%3?)S1M-N-NN)
 # O,(+(=%  55g>>D #;// "2 "2&H"#".%)(*# # # &**.' ' ' !=)#4$0%0M", $+#;!* 4 4%)! ! ! 4 *3.?23->+ + +EK  %4444HH1t11111111 |  :GI((Q )\"-= = = ,R 0 4 4V < < D D/;B/?	/J/Pb,+ :%*;%7%7 : :.P*+*6?O*P*P*P-126	/" /" /" )E'1+<,8-8M*4)" )" )" $< !&2;6G:;5F3& 3& 3&EK (-'<'<4'<'P'P&9t&9&9&9 9 9 9 9 9&+O!k n. n.FA".q/K)!, ! ' 
(G,@,L%::<U:::#'#=#=&,&6)/&/4;4H/6/Q $> $ $ $(' 1)8);)7)I :<(.(8 	 	H*228<<<*8*K*QrK(// *$2$B$2$D$/!" !"    &(WW,S,Sl,S,S,S%S%S
&4&D
  + 2z 2*1K%+[
 '! & 0! !4A 6! !
 ( J4+@ J-9995AAA(6q(9-CA-F*'4z'A- J0BW & 0F F 1-- 188H0I0I-' mI;/=-9/=292K	   ;': /q1115HH11112 bI 1B5$5a$8B5 %5$E$E 2% %B5 $4#?#?#? 0 L L$1$0$.$6$5$*$4!" !" *  0@@ '(8 9 9    6 !$ 46 %5$E$E$'$8%" %"	6 8< 1! 4#0 !6]5J !63@3HL<@M$9$935L  $4 2-:Z-G
/15a8 A2?->-T-T-T*+3" 3" 3"
 3@'8':':)3->-F2<.& .& .& +,3" 3" 3" =A 6q 9,8$3,"- - -M
 15N1-- ,
::-999(6q(9'4z'A+A!+D(+263C+D+D( !1=$5a$8 > # 4 > !1 A A#BV W W >
 48-a00 '99J19M '9 0 L L$1$0$.$6$5$4!" !" *  0@@AQRR 67; 1! 4#0 !6]5J !63@3HL<@M$9$9 46L '3G !% I I2?18/9;K2G !J !" !" EM+A!+D !.9$1$<9 %2$<Q$?$B$N 5 : 548q 1 * cId.C cI*666/;;;6BBB0<<<+263C+D+D(03 (:
 !$ 4$:$4$E$E$'$8%" %"$: 8< 1! 44D 1 1 %5$P$P(5(4(2(:(9(8%& %& !. $4#D#DEU#V#V !:;?$5a$8(8(L(L,<)* )* %6
 (5 %:9N %:7D7L@D(=(=79 -Q/ 9.>O $;1#= D=A 7 :0257 2-9
2C,7,T,T.;-9+53E2C0?(/ -U - -M  - 91I 948q 1) I*666(3(P(P*7)5'1/A.?,2,<$+ )Q ) ) ) 5]-E 504N1- . I(8(T(T)(&.-",) ) )5Z(H(H(H )
8,0,A
8".
8  .9995AAA,8q)4E.q11  .999&q)))Z7))) (***c&2B.C.CC*** %,
 #08$+$< 9 %(4 . 43F .0+(0 N/66}7LMMM(: T(5(GI/667Ri7R7R7RSSS(3 Y(* 0 0*7*B0 0 0 ) )I
  ) Y 3 : :;W9;W;W;W X X X. 	43I 	,/HH5H,I,IM /;;+5(5189I1J1J.4.B-1&* <    +3&H"#"/%-*. $+#;!*(8 9 9 9%)
' 
' 
'  ,,V-A:NNN -2)& &03K4R0S0SVW0W- $5!'K$B C Ca G G%& "E %&E !MM -v ' !,	' 01, *$1$<Q$?$H$5!" !"
"
 #- - 8 ; D NPS# #
" 47$1$<Q$?$H$R4" 4" 0 -1J + >u E I I$/!" !" .3	- - -M +6*LU*SK/!33.9:L<L;L:L.M .;-B-B;PRTU-V-VN,0,M,M -~u- -M .	 .q 1	:S	 !% 0	 6LA5N	
 .:NN 9?8L X 4 4RX + 'I"#"/%-*8(.(: $+#;!*(8 9 9 9%)' ' ' 15*1-"B;PW"X"XK8%0 ,!,(  E 0 ,?,B)&/*;.?):=N)N' ' ' !00t0DDD-4--------]n.u .X  6$'(;$<$<!'"3&7!25F!F  
 4 9J 8L&79 9 9K5 %A!,($%% % %! $5#D#D"&T $E $ $  6/5555555 %((;$<$<!09/"7.1FF1 1 1- & 4+> {++  A *V./#n2E2E.E.E 'q))U5H5KUUU 
 '33#- ))-&:%)# 4      	Y 	Y 	YX4OOPQRRXXXXXXXXXXXX 	& 	& 	&IJJJ77::D%4%%%%%%%%%%%%	&
 !      sc   AE 
F0A
F++F04:G/ /
I9=H<<I$y: &u-,py: :
{4z%%{428{//{4c                   .K   t          t          j                              }d }		 |2 3 d {V }
|
}	
6 nQ# t          j        $ r |                     d          cY S t
          $ r}|                     |          cY d }~S d }~ww xY w|	J g }| j        dk    rt          |          }nd}|                     |          }|	j	        D ]&}| 
                    |j        |           |j        }|j        }d }|j        r8|j        1|
J d            |                     |||j        ||j                  }nd }| j        rt%          |          \  }}}|j        sd }| j        ^|t          d          |                     |          }|                    d||          }|j        }t/          ||||j        	          }nt/          |||
          }t3          |j        |||	|j        rdn|j        r|j        nd|j        |j        rt=          |j                  nd           }|                    |           s| j         r	 |t          d          | !                    |j"        | j#                  }|                      ||          }nU# tH          $ rH}tJ          &                    d           |                     tO          |                    cY d }~c S d }~ww xY w|(                    |j)        |          \  }}|j        sd }n	d }|j)        }d}| *                    |||| j+        | j                  \  }}tY          |tZ                    rt\          nt^          .| j+        r| j        s9tY          |j0        tb                    s|j0        dk    rt/          |||
          }n|j0        rSte          |j0                  tb          u r8|tg          |          dk    sJ t/          ||d.fd|D             	          }n^|j0        r||j0        dk    rqg }|tg          |          dk    sJ |D ]A}|                     .ti          | j        |j5        |          |                     |dz  }Bt/          |d||          }n|j0        r|j0        dk    rt/          |||
          }n|j6        r|j0        dk    s|j0        q| j+        rj| j        rc|d uotg          |          dk    }|rt/          |||d |D             	          }nX|} |rtg          |          dk    r|} t/          ||| 
          }n,tJ          7                    d           t/          |||
          }|p|j0        o|j0        dk    o
|j        dk    }!t3          |j        |||!rdn|j        r|j        nd|j        |j        rt=          |j                  nd           }tq          ||          }|                    |           (|j9        rd}"|r9d|d         v r/|d         :                    d          |k    r|d         d         pd}"tY          |"tv                    rd <                    d! |"D                       }"|D ]}#|"|#j=        j        pdz   }$|$|#j=        _         |	j>        J tg          |	j>                  }%|	j?        |%tg          |	j?                  z  }%t          d" |	j	        D                       }&t          |%|&|%|&z   #          }'| jB        r!|	jC        rt          |	jC        $          |'_E        |'|_F        t          |||||'t          |	jI                  |j        r|	j>        nd |	jJ        %          }(| jK        r| jL        r|D ]}#d})|#j=        j        r|#j=        j        })n|#j=        j        rg }*|#j=        j        D ][}+t          |+jN        d&          rDt          |+jN        d'          r/|*                    |+jN        j5         d(|+jN        jO         d)           \d*<                    |*          },d+|, d,})|)r[d }-|#j        tg          |	j	                  k     r|	j	        |#j                 j        }-| jL        P                    ||)|-|#j        dd-           |(S ).NzClient disconnectedr]   r   r9  )r:  r;  r<  r   r=  r*  rF   )r   r:  )rz   r  r{   r  )rz   r  r{   r  rU  )r  messager.  r/  rV  r:  r+  r,  )r   F)r   r   r{   rJ   tool_parser_clsrF  c                 (    g | ]} |           S ))r  r   )r   rM  tool_call_classs     rt   r   zDOpenAIServingChat.chat_completion_full_generator.<locals>.<listcomp>  s&    RRR < < <RRRru   r  )r  r  r|   )rz   r{   r  r  r   r   c                 0    g | ]}t          |d           S )r  )r  r  )r#   rL  s     rt   r   zDOpenAIServingChat.chat_completion_full_generator.<locals>.<listcomp>5  s?     $ $ $
 !#	 %)+%/  $ $ $ru   z~Error in chat_completion_full_generator - cannot determine if tools should be extracted. Returning a standard chat completion.r{   r   rz   
c              3   &   K   | ]}|d          V  dS )rp  Nr   )r   msgs     rt   r@  zCOpenAIServingChat.chat_completion_full_generator.<locals>.<genexpr>y  s&      ,U,USS[,U,U,U,U,U,Uru   c              3   >   K   | ]}t          |j                  V  d S r?  )r   r:  )r   r  s     rt   r@  zCOpenAIServingChat.chat_completion_full_generator.<locals>.<genexpr>  s>       #
 #
&,C !!#
 #
 #
 #
 #
 #
ru   r3  rW  )r  r1  r~   r2  rY  prompt_logprobsr   kv_transfer_paramsr  r  ()z, rN  rJ  rP  )Qr\  r   asyncioCancelledErrorr   r   rk   r   r   rQ  rw  r/  r:  r.  r;  rj  rH   rl   r.   rE  rL   extract_tool_callsr{   r   r  r   r  tools_calledrV  rf  rB   r   rI   r`  r   rR   r   r   r   rb  extract_reasoningrp  _parse_tool_calls_from_contentrJ   r   r;   r@   r#   r   r   r  r   r   r  r   r   r0   rh  ri  r  ro  r  r   rd  r~  r$   rM   re  r!   r  r  r   r'   r  r  rO   rV   hasattrr  r  rv  )/rr   r   r   r   r   r   r   r   r  	final_resr  r   r2  r  rz   r  r:  out_logprobstool_call_infor.  r  r{   r   rL   r  r  r   rI   r  r  tool_call_class_items	tool_callret_contentis_finish_reason_tool_callsr  choicefull_messager  num_generated_tokensrY  responseoutput_texttool_call_descriptionsrM  tool_calls_strrR  r  s/                                                 @rt   r   z0OpenAIServingChat.chat_completion_full_generatorg  s      49;;''*.		1-              c		 .-% 	E 	E 	E--.CDDDDD 	1 	1 	1--a00000000	1 $$$68!Y..$>|$L$L!!$%!))'22' i	( i	(F   !5zBBB(I!?L!N 
 G$8$D#//1J///55'!-,3,@''.'I 6     1(9)(D(D%	7A0 % $I#/ ((U   #'"2"29"="=K%0%C%C '"+ &D & &N
 -4G)!"+ '#1#<	  GG *!"+ '  G ; ,#% +6>;V6 % "/$V11# & 25<5MW 0111SW    {+++$ &> ((U  
 ,0+S+S49, ,( (,'<'<!-A (= ( ($$ $ > > >$$%JKKK55c!ff==========>
 &6%G%GK &H & &"	7 0 % $I 	 + % #'"E"E#"&"8 $ 0 #F # #J $.i9I#J#JXPX  * ]W$2B ]Ww24VWW]W':55%49gVVV #VW,--1SSS!-#j//A2E2E2EE%'RRRRzRRR	   $ JW)<
)J)J(*%!-#j//A2E2E2EE!+ / /I)00'0(,(>*3.$9     
 &/  	 	 	 *Q.))%4'	   ( 2WG,?6,I,I%49gVVV .W(F22g6I6Q* 7R$ 7R %/d$:$Rs:QR?R! )!"+ '$ $
 '1$ $ $	  GG  #*K  .3w<<!#3#3&-)!"+ +  GG "  
 &49gVVV +< +# 3':53(F2 ( 7l!.ll 'V))".181ISGF,---t  K ;;PPKNN;''''< 	6;=Eb!111 $((00D88#/#3I#>#D" *D11 V#'99,U,UDT,U,U,U#U#U ! 6 6/6>3I3ORP)5&&)555	 :;;-9Y%G!H!HH" #
 #
090A#
 #
 #
  
  
 +2*-AA
 
 

 , 	1L 	*>'9+ + +E' -2)) 1)2KLL.5.FP	**D(;
 
 
 " 	t': 	!   >) D"(."8KK^. D-/*$n7  "2;77 GK= =  399#%;#3 N Nbk6K N N N   &*YY/E%F%FN"C."C"C"CK '+$|c)*;&<&<<<+4+<V\+J+T('33#- +)9&,&:%*# 4    sH   5 35 $B	B$A>8B>B AJ		
K;KKKr.  r;  should_return_as_token_idc                 n      fdt          |                                          D             S )Nc                    g | ]\  }}r|k     sd k    t                              |d         |d                   xt          |d         j        d          t	                              dd                              S )	r   r|   r   )r=      utf-8r|  errors)tokenlogprobbytes)r   _get_decoded_tokenmaxr  r  encode)r   r   prr   r  r  r   r;  s      rt   r   z7OpenAIServingChat._get_top_logprobs.<locals>.<listcomp>  s     
 
 
 1
 "#\!1!1\R5G5G "!44!!!+D	 5   E AaDL'225<<	<BBCC   6H5G5Gru   )r   items)rr   r.  r;  r   r  r  s   ` ```@rt   _get_top_logprobsz#OpenAIServingChat._get_top_logprobs  sZ    
 
 
 
 
 
 
 
 "(.."2"233
 
 
 	
ru   r:  r<  r=  c                    g }||n| j         }t          |          D ]4\  }}	||         }
|
|
                    |	          u|rd|	 }n&|t          d          |                    |	          }|                    t          |t          |                    dd                                         |
|	         }|j	        }|                    t          | 
                    ||	||          t          |j        d          |dn#t          |                    dd                    |                     |
|||          	                     6t          |
          S )zCreate OpenAI-style logprobs.Nz	token_id:z:Unable to get tokenizer because `skip_tokenizer_init=True`r  r|  r  )r  r  r  )r  r  r  r;  r8  )rH   r   ri  r   decoder   r   r  r  decoded_tokenr  r  r  r  r   )rr   r:  r;  r   r<  r=  logprobs_contentr  r   r  step_top_logprobsr  
step_tokenstep_decodeds                 rt   rj  z'OpenAIServingChat._create_chat_logprobs  s    AC "- 0 	"
 %Y// ,	 ,	KAx ,Q (,=,A,A(,K,K,S, 7222EE ((X   &,,X66E ''1#"5<<	<#J#JKK      /x8
)7 ''1"55&$%5	  !$J$6 @ @  ,3 !D!%l&9&9')&9&T&T!U!U%)%;%;-3%5	& &     . &.>????ru   c                 >    |j         o| j        o| j        o|j        dv S )ae  
        Utility function to check if streamed tokens should go through the tool
        call parser that was configured.

        We only want to do this IF user-provided tools are set, a tool parser
        is configured, "auto" tool choice is enabled, and the request's tool
        choice field indicates that "auto" tool choice should be used.
        )r   N)r   rL   rJ   r   r   s     rt   r_  z7OpenAIServingChat._should_stream_with_auto_tool_parsing  s6     M 6 6&6 #~5		
ru   r"  r  c                     t          |j        duoN| j        oG| j        o@|o>|j        o7|j        d         o*|j        d         j        o|j        d         j        j        du          S )z
        Check to see if we should check for unstreamed tool arguments tokens.
        This is only applicable when auto tool parsing is enabled, the delta
        is a tool call with arguments.
        Nr   )boolr/  rJ   rL   r  r  r  )rr   r"  r  s      rt   ry  z>OpenAIServingChat._should_check_for_unstreamed_tool_arg_tokens+  s       , K&K K K (	K
 (+K (+4K (+4>dJ
 
 	
ru   r  r  c                     t          fd| j        D             d          }|r|j        nd}t          t	          |r|j        nd|r|j        ndt          |r|j        nd|                    g          S )z
        Create a delta message for remaining tool arguments, preserving
        id/type/name from the original delta.
        c              3   2   K   | ]}|j         k    |V  d S r?  )r  )r   rM  r  s     rt   r@  zAOpenAIServingChat._create_remaining_args_delta.<locals>.<genexpr>N  s/      HHBbh%6G6GR6G6G6G6GHHru   Nr
  )r  r  r  r  r  )	nextr  r  r   r   r  r  r   r  )r"  r  r  original_tcoriginal_fns     `  rt   r}  z.OpenAIServingChat._create_remaining_args_deltaC  s     HHHH-2HHH
 
 /:Ck**t)4>{~~$-8B))d.1<F[--$"0  	  

 
 
 	
ru   r   c                    g }t          |           | j        rJ | j        rJ t          |j        d d |          }|                    |           |j        r.t          |r|j        nd           }|                    |           |                    t          |j
                             t          |          }t          |          }|j        
|j        |d<   ||gfS )N)r   browser_descriptionpython_descriptionwith_custom_tools)r   )r   
cache_salt)r<   rn   rp   r,   r   r   r   r)   rm   r-   r}   r/   r3   r  )rr   r   r   r}   sys_msgdev_msgr   r   s           rt   r   z,OpenAIServingChat._make_request_with_harmony`  s   
 )+
 	#7+++
 ))))1111$$5 $#2	
 
 
 	    = 	%+';Egmm  G OOG$$$ 	=g>NOOPPP 1::$6FGGG )*1*<M,'-((ru   )rY   Nr?  )r   r   )NN)T)0__name__
__module____qualname__r   r(   rb  r   r   r  dictr   rb   rx   r   tupler  r   r    r   r   r   r   r   r   staticmethodr\  r   r   r   r'  r   r7   r:   r"   r   r   r5   r   r  GenericSequencer   rj  r_  r6   ry  r}  r   __classcell__)rs   s   @rt   rE   rE   Z   s        -2+0 ""'49"&-2+0#("& %>B)L  L  L #L  $L  	L  &,L  TzL  'FL  &*L  %)L  L   L  .2L  4ZL  '+L   %)!L " !#L $  %L & 'L ( '+38nt&;)L * 
+L  L  L  L  L  L \/< /< /< /<bl,&l, 
t'($s)3	4}	Dl, l, l, l,b '+Z1 Z1&Z1 t^Z1 
T		"%;	;m	K	Z1 Z1 Z1 Z1x,-B ,s , , , ,
 
 
# 
C 
 
 
 \
 *s *3 *5dCS * * * \*> %)b5 b5b5 Djb5 	b5
 !%b5 Tzb5 
|d"D(	)b5 b5 b5 b5Hq!&q! (6q! 	q!
 q! ./q! !4'q! 2q! 
T		"q! q! q! q!fZ&Z (6Z 	Z
 Z ./Z !4'Z 2Z 
/	/Z Z Z Zx

sG|$
 Dj
 !4'	

 $(
 
#	$
 
 
 
: /3*.>@ >@"3'>@ &d3<&84&?@>@ !4'	>@
 "%t>@ !4K>@ 
 >@ >@ >@ >@@
=R 
 
 
 
 
#d*
 !
 
	
 
 
 
0 
#

 
 
	
 
 
 \
> &*+) +)&+) #+) +) +) +) +) +) +) +)ru   rE   )ir  r  r   collections.abcr   r   r   r   typingr   r   r   r  regexr  fastapir   openai_harmonyr	   OpenAIMessage partial_json_parser.core.optionsr
   vllm.engine.protocolr   vllm.entrypoints.chat_utilsr   r   r   r   vllm.entrypoints.loggerr   0vllm.entrypoints.openai.chat_completion.protocolr   r   r   r   r   r   r   r   r   r   6vllm.entrypoints.openai.chat_completion.stream_harmonyr   r   'vllm.entrypoints.openai.engine.protocolr   r   r   r    r!   r"   r#   r$   &vllm.entrypoints.openai.engine.servingr%   r&   r'   &vllm.entrypoints.openai.models.servingr(   ,vllm.entrypoints.openai.parser.harmony_utilsr)   r*   r+   r,   r-   r.   r/   vllm.entrypoints.openai.utilsr0   vllm.entrypoints.utilsr1   r2   vllm.inputs.datar3   vllm.loggerr4   vllm.logprobsr5   vllm.outputsr6   r7   vllm.sampling_paramsr8   r9   vllm.tokenizersr:   vllm.tokenizers.mistralr;   r<   r=   r>   vllm.tool_parsersr?   %vllm.tool_parsers.mistral_tool_parserr@   vllm.tool_parsers.utilsrA   vllm.utils.collection_utilsrB   vllm.v1.sample.logits_processorrC   r  r   rE   r   ru   rt   <module>r      sL      9 9 9 9 9 9 9 9 7 7 7 7 7 7                        3 3 3 3 3 3 2 2 2 2 2 2 - - - - - -            2 1 1 1 1 1                              	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	         
 G F F F F F                  K J J J J J G G G G G G G G ) ) ) ) ) ) # # # # # # ! ! ! ! ! ! 8 8 8 8 8 8 8 8 A A A A A A A A ) ) ) ) ) )            ) ( ( ( ( ( A A A A A A 6 6 6 6 6 6 / / / / / / Q Q Q Q Q Q	X		q) q) q) q) q) q) q) q) q) q)ru   