
    .`ig                        d dl Z d dlmZmZ d dlmZmZmZ d dlZd dl	m
Z
 d dlmZ d dlmZmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZ d dlmZmZmZ d dlmZ d dlm Z m!Z!m"Z"m#Z#m$Z$m%Z% d dl&m'Z' d dl(m)Z) d dl*m+Z+ d dl,m-Z-m.Z.m/Z/m0Z0 d dl1m2Z2 d dl3m4Z4 d dl5m6Z6 d dl7m8Z8m9Z9m:Z:m;Z;m<Z<  e+e=          Z> G d de          Z? G d de?          Z@dS )    N)AsyncGeneratorMapping)AnyFinalcast)Request)Response)assert_neveroverride)EngineClient)ChatTemplateContentFormatOption)RequestLogger)ErrorResponse	UsageInfo)EmbeddingServeContextOpenAIServingServeContext)OpenAIServingModels)EmbeddingBytesResponseEmbeddingChatRequestEmbeddingCompletionRequestEmbeddingRequestEmbeddingResponseEmbeddingResponseData)RenderConfig)TokensPrompt)init_logger)EmbeddingRequestOutputPoolingOutputPoolingRequestOutputRequestOutput)PoolingParams)merge_async_iterators)
chunk_list)
EmbedDTypeEncodingFormat
Endiannessencode_pooling_bytesencode_pooling_outputc                       e Zd Z fdZedededz  fd            Zdede	fdZ
ededeez  ez  fd            Zdefd	Zdefd
Zdedee         dedeeedf                  fdZdee         dedef fdZdedededeeef         dz  dedeeez  df         fdZedededz  f fd            Zedededz  f fd            Z xZ S )EmbeddingMixinc                      t                      j        |i | | j        j        }t	          |o|j                  | _        |r|j        r|j        nd | _        d S N)super__init__model_configpooler_configboolenable_chunked_processingsupports_chunked_processingmax_embed_len)selfargskwargsr1   	__class__s       z/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/entrypoints/pooling/embed/serving.pyr/   zEmbeddingMixin.__init__9   sw    $)&))))7 ,0EmE,
 ,
(
 !.!<M'' 	    ctxreturnNc           
        K   t          t          |          }	 |                     |j                  |_        t          |j        t                    rt|                     |j        | j        |j        j	        |j        j
        p|j
        |j        |j        j        |j        j        |j        j                   d {V \  }|_        nX|                                 }|                    |j        j        |                     |j                             d {V |_        d S # t(          t*          f$ rF}t,                              d           |                     t3          |                    cY d }~S d }~ww xY w)N)chat_templatechat_template_content_formatadd_generation_promptcontinue_final_messageadd_special_tokens)prompt_or_promptsconfigz$Error in preprocessing prompt inputs)r   r   _maybe_get_adaptersrequestlora_request
isinstancer   _preprocess_chatrenderermessagesr?   r@   rA   rB   rC   engine_prompts_get_completion_rendererrender_promptinput_build_render_config
ValueError	TypeErrorlogger	exceptioncreate_error_responsestr)r6   r<   _rK   es        r:   _preprocesszEmbeddingMixin._preprocessH   s     
 (#..	6#77DDC#+';<< .2.C.CKMK("%+";"Ps?P141Q*-+*K+.;+M'*{'E /D 	/ 	/ 	) 	) 	) 	) 	) 	)%3%%  88::+3+A+A&)k&744S[AA ,B , , & & & & & &" 4I& 	6 	6 	6CDDD--c!ff55555555	6s   DD   E71;E2,E72E7rG   c                     |                      |          rd }n| j        p| j        }t          ||j        |j                  S )N)
max_lengthtruncate_prompt_tokensrC   )_should_use_chunked_processingr5   max_model_lenr   r]   rC   )r6   rG   r\   s      r:   rQ   z#EmbeddingMixin._build_render_configg   sV    ..w77 	BJJ+At/AJ!#*#A&9
 
 
 	
r;   c                 d   t          t          t                   j                  j        j        j        j        j        j        fd}dt          dt          ffd}dk    sdk    r
 |            S dk    sdk    r |dk              S t                     d S )	Nc            
      >   g } d}t                    D ]U\  }}t          |t          |	
                    }|j        }|                     |           |t          |          z  }Vt          ||          }t          j        j	        j
        | |          S )Nr   )encoding_formatembed_dtype
endianness)index	embedding)prompt_tokenstotal_tokensidcreatedmodeldatausage)	enumerater   r)   prompt_token_idsappendlenr   r   
request_idcreated_time
model_name)itemsnum_prompt_tokensidx	final_resitemrp   rn   r<   rc   rb   rd   final_res_batch_checkeds          r:   encode_float_base64z;EmbeddingMixin._build_response.<locals>.encode_float_base64   s    13E !"+,C"D"D ; ;Y,3!(7$/#-	     $-#= T"""!S)9%:%::!!/.  E
 %>(n   r;   
bytes_onlyr=   c           	          t                    \  }}}| rd n*dt          j        j        j        j        ||d          i}t          ||          S )N)pooling_outputsrc   rd   metadatari   )contentheaders)r(   jsondumpsrs   rt   ru   r   )	r}   r   rv   rn   r   r<   rc   rd   r{   s	        r:   encode_bytesz4EmbeddingMixin._build_response.<locals>.encode_bytes   s    $8 7'%% % %!GUE  
"%.'*'7%(^$)%* ! !
   *'7KKKKr;   floatbase64bytes)r}   )r   listr    final_res_batchrG   rb   rc   rd   r2   r   r
   )r6   r<   r|   r   rc   rb   rd   r{   s    `  @@@@r:   _build_responsezEmbeddingMixin._build_responset   s   
 #'t,@'A3CV"W"W*-+*E"%+"9!$!7
	 	 	 	 	 	 	 	 	@	LT 	L.D 	L 	L 	L 	L 	L 	L 	L 	L 	L2 g%%H)D)D&&(((''?l+J+J<?l+JKKKK)))))r;   c                     | j         j        S )z?Get the model's effective maximum sequence length for chunking.)r0   r_   )r6   s    r:   _get_max_position_embeddingsz+EmbeddingMixin._get_max_position_embeddings   s     ..r;   c                 H    t          |t          t          f          o| j        S )z<Check if chunked processing should be used for this request.)rI   r   r   r4   )r6   rG   s     r:   r^   z-EmbeddingMixin._should_use_chunked_processing   s(     w!;=Q RSS 10	
r;   	token_ids
prompt_idxc                   K   g }|                                  }t          t          ||                    D ]\  }}	|j         d| d| }
t	          |	          }|                     |
|||j                   | j                            |||
|j        |t          |j
        dd                    }|                    |           |S )z1Process a single prompt using chunked processing.z-prompt--chunk-)rp   paramsrH   priorityr   rH   trace_headersr   )r   ro   r$   rs   r   _log_inputsrH   engine_clientencodegetattrrG   rq   )r6   r<   r   pooling_paramsr   r   
generatorsmax_pos_embeddings	chunk_idxchunk_tokenschunk_request_idchunk_engine_promptoriginal_generators                r:   _process_chunked_requestz'EmbeddingMixin._process_chunked_request   s      HJ
 ">>@@'0y"455(
 (
 	2 	2#I| #&.XX*XXYXX #/"M"M"M  #% -	     "&!3!:!:#  -+ j!<< "; " " 01111r;   	input_ids
input_textc                 :   t          |          }t          |t          t          f          r|                     |          }|                                 }| j        
d}| j        }n	d}| j        }d}	d}
||k    r%t          |		                    |||                    ||k    rD|rt                              d||           n%t          |
	                    d||                    t          ||	          S t                                          |||          S )
z>Override to support chunked processing for embedding requests.Nzmaximum embedding input lengthzmaximum context lengthzThis model's {length_type} is {max_length_value} tokens. However, you requested {token_num} tokens in the input for embedding generation. Please reduce the length of the input.zThis model's {length_type} is {max_length_value} tokens. However, you requested {token_num} tokens in the input for embedding generation. Please reduce the length of the input or enable chunked processing.)length_typemax_length_value	token_numzOInput length %s exceeds max_position_embeddings %s, will use chunked processingz"maximum position embeddings length)promptrp   )rr   rI   r   r   r^   r   r5   r_   rR   formatrT   infor   r.   _validate_input)r6   rG   r   r   r   enable_chunkedr   r   r   validation_error_msgchunked_processing_error_msgr9   s              r:   r   zEmbeddingMixin._validate_input   sy    	NN	 g :<PQRR <	O!@@IIN "&!B!B!D!D !->#'#5   7#'#5 O !0 ) +++ (//$/)9"+ 0     ---! KK:!*	    %4;;(L-?&/ <      zINNNN ww&&w	:FFFr;   engine_promptr   r   prompt_indexc                    K   |j          d| }|                     ||||j                   | j                            ||||j        |t          |j        dd                    S )zACreate a generator for a single prompt using standard processing.-r   r   r   r   )rs   r   rH   r   r   r   rG   )r6   r<   r   r   r   r   request_id_items          r:   _create_single_prompt_generatorz.EmbeddingMixin._create_single_prompt_generatorB  s       !^<<l<<!)	 	 	
 	
 	
 !(()'S[*a88 ) 
 
 	
r;   c                 T  K   t          t          |          }|                     |j                  }|s't	                                          |           d{V S g }	 |j        dn$|                     |j        j                   d{V }| 	                    |          }t          |t                    r|S 	 |                    d| j                   n9# t          $ r,}|                     t!          |                    cY d}~S d}~ww xY w|j        |                     d          S |                                 }t'          |j                  D ]\  }}	d|	v rP|	d         }
t)          |
          |k    r5|                     ||
|||           d{V }|                    |           Y|                     ||	|||           d{V }|                    |           t3          | |_        dS # t6          $ r,}|                     t!          |                    cY d}~S d}~ww xY w)z'Override to support chunked processing.NembedEngine prompts not availablerp   )r   r   r^   rG   r.   _prepare_generatorsraw_request_get_trace_headersr   _create_pooling_paramsrI   r   verifyr0   rR   rV   rW   rM   r   ro   rr   r   extendr   rq   r#   result_generator	Exception)r6   r<   use_chunkedr   r   r   rY   r   ir   rp   chunk_generators	generatorr9   s                r:   r   z"EmbeddingMixin._prepare_generators^  s      (#.. 99#+FF  	:44S999999999
  	2	6 ?* 223?3JKKKKKKKK  "88==N.-88 &%%:%%gt/@AAAA : : :11#a&&99999999: !)112PQQQ!%!B!B!D!D$-c.@$A$A - - =%66'45G'H$+,,/AAA151N1N,*)2 2 , , , , , ,( #))*:;;;  #'"F"Fq# #      	 !!),,,,#8*#EC 4 	6 	6 	6--c!ff55555555	6sV   AG1 :C G1 
D !DDG1 DG1 +CG1 1
H';!H"H'"H'c                 	  K   t          t          |          }	 |j        |                     d          S |                     |j                  }|s(t                                          |           d{V S |j        |                     d          S i }i }|j        2 3 d{V \  }}d|j	        v r|j	        
                    d          }	 t          ||                    d          dz                      }n# t          t          f$ r |}Y nw xY w||vr(dd	d	|j	        
                    d          d	         d
||<   ||         }	t          |t                     s,|                     dt#          |          j                   c S t'          |j        d          r|j        j        }
nSt'          |j        d          r|j        j        }
n1|                     dt#          |j                  j                   c S t          |
t.          j                  s t/          j        |
t.          j                  }
|j        |                     d          c S t9          |j                  }|
                    t.          j                  |z  }|	d         ||	d<   n|	dxx         |z  cc<   |	dxx         |z  cc<   |	dxx         dz  cc<   -|j	        
                    d          }	 t          |d                   }n# t          t          f$ r |}Y nw xY wt          t           |          ||<   6 g }t9          |j                  }t=          |          D ]<}||v r||         }	|	d         }|	d         }|t          |t.          j                  rt          |t          t>          f          r|d	k    ry||z  }tA          |          }|j        |         }d|vr|                     d| d          c S |d         }t!          |	d         ||d	d          }|!                    |           |                     d|           c S ||v r0|!                    t          t           ||                              $|                     d|           c S t          tD          tF          t           z           |          |_$        dS # tJ          $ r,}|                     tM          |                    cY d}~S d}~ww xY w)zCollect and aggregate batch results
        with support for chunked processing.

        For chunked requests, performs online aggregation to
        minimize memory usage.
        For regular requests, collects results normally.
        Nr   )r<   zResult generator not availabler   r   r      r   )weighted_sumtotal_weightchunk_countrs   z9Expected PoolingRequestOutput for chunked embedding, got rm   rf   zUnsupported output type: )dtypez6prompt_token_ids cannot be None for chunked processingr   r   r   )rm   rp   zChunked prompt z does not contain token IDsrs   T)rs   rp   outputsnum_cached_tokensfinishedz&Failed to aggregate chunks for prompt zResult not found for prompt )'r   r   rM   rV   r^   rG   r.   _collect_batchr   rs   splitintre   rR   
IndexErrorrI   r    type__name__hasattrr   rm   rf   torchTensortensorfloat32rp   rr   toranger   r   rq   r   r!   r   r   rW   )r6   r<   r   prompt_aggregatorsshort_prompts_results
result_idxresultpartsr   
aggregatorembedding_dataweightweighted_embeddingr   num_promptsr   r   final_embeddingpooling_output_dataoriginal_promptoriginal_token_idspooling_request_outputrY   r9   s                          r:   r   zEmbeddingMixin._collect_batch  sn      (#..c	6!)112PQQQ ==ckJJK ="WW333<<<<<<<<<#+112RSSS
 =?EG!,/,@ N N N N N N N(j& 111"-33C88E0%(u{{8/D/Dq/H)I%J%J

&
3 0 0 0%/


0
 ");;;,0,-+,*0*;*A*A)*L*LQ*O	: :*:6 "4J!?J
 &f.BCC #997#F||47 7       v~v66 
)/)< ==  *0)A#99WV^8L8L8UWW       &nelCC ).*%-* * * .6#99T       !!899F)7):):):)O)ORX)X&!.195G
>22 #>2226HH222~...&8...}---2---- #-33C88E0%(r^^

&
3 0 0 0%/


0 9=,f9 9)*55Y -Ab TVOc011K#K00 3 3
!333!3J!?J#-n#=L#-n#=L %0&|U\BB 1&|c5\BB 1 )1,, +7*E /<.Q.Q.Q+ +.*<Z*H-_DD#'#=#=!,* !, !, !,$ $   
 .==O-P*1E'1,'?-?$7./%)2 2 2. (../EFFFF#99QZQQ        #888#**13H3TUU     55CzCC     #']%99:O# #C 4 	6 	6 	6--c!ff55555555	6s   S AS :S S !L2'(S +C<;S <DS DA7S 
A4S ?AS BS )K?>S ?LS LC S 6AS AS -S 
S;!S60S;6S;)!r   
__module____qualname__r/   r   r   r   rZ   r   r   rQ   r   r	   r   r   r   r2   r^   r   r   r   r    r   rW   r   r   r"   r   r!   r   r   r   __classcell__r9   s   @r:   r+   r+   8   s       
 
 
 
 
 66 
	6 6 6 X6<
,F 
< 
 
 
 
 H*H* 
X	%	5H* H* H* XH*T/c / / / /
 
 
 
 
+"+ 9+ + 
n1478	9+ + + +ZIG 9IG 	IG
 
IG IG IG IG IG IGV
"
 $
 &	

 sCx(4/
 
 
(<<dB	C
 
 
 
8 E6E6 
	E6 E6 E6 E6 E6 XE6N o6o6 
	o6 o6 o6 o6 o6 Xo6 o6 o6 o6 o6r;   r+   c                        e Zd ZdZddddedededz  dedz  d	ed
e	de	ddf fdZ
	 ddededz  deez  f fdZedee         deez  f fd            Zdededz  f fdZ xZS )OpenAIServingEmbeddingembdF)trust_request_chat_templatelog_error_stackr   modelsrequest_loggerNr?   r@   r   r   r=   c                |    t                                          ||||           || _        || _        || _        d S )N)r   r   r   r   )r.   r/   r?   r@   r   )	r6   r   r   r   r?   r@   r   r   r9   s	           r:   r/   zOpenAIServingEmbedding.__init__\  sR     	')+	 	 	
 	
 	
 +3O)+F(((r;   rG   r   c                   K   | j                                         }| j         d|                     ||j                   }t          ||||| j        | j                  }t                      	                    |           d{V S )z
        Embedding API similar to OpenAI's API.

        See https://platform.openai.com/docs/api-reference/embeddings/create
        for the API specification. This API mimics the OpenAI Embedding API.
        r   )rG   r   ru   rs   r?   r@   N)
r   ru   request_id_prefix_base_request_idrs   r   r?   r@   r.   handle)r6   rG   r   ru   rs   r<   r9   s         r:   create_embeddingz'OpenAIServingEmbedding.create_embeddingr  s       [++--
% H H$$['2DEEH H 	
 $#!!,)-)J
 
 
 WW^^C(((((((((r;   r<   c                 $   t                                          |          }t          |t                    r|S 	 |                    d| j                   n9# t          $ r,}|                     t          |                    cY d }~S d }~ww xY w|S )Nr   )	r.   r   rI   r   r   r0   rR   rV   rW   )r6   r<   r   rY   r9   s       r:   r   z-OpenAIServingEmbedding._create_pooling_params  s    
 77<<nm44 	"!!	6!!'4+<==== 	6 	6 	6--c!ff55555555	6 s   A 
B!!BBBc                    K   t          |j        t                    r5|                     |j        j        |j        j        | j                  }||S t                                          |           d {V S )N)request_chat_templatechat_template_kwargsr   )	rI   rG   r   _validate_chat_templater?   r  r   r.   rZ   )r6   r<   error_check_retr9   s      r:   rZ   z"OpenAIServingEmbedding._preprocess  s       ck#788 	'"::&)k&?%([%E,0,L ;  O
 *&&WW((---------r;   r-   )r   r   r   r   r   r   r   rW   r   r2   r/   r   r   r   r   r   r   r   r"   r   rZ   r   r   s   @r:   r   r   Y  s        -2 %G G G#G $G
 &,G TzG 'FG &*G G 
G G G G G G2 '+) )!) t^) 
]	*	) ) ) ) ) )8 *+ 
	&     X.. 
	. . . . . . . . . .r;   r   )Ar   collections.abcr   r   typingr   r   r   r   fastapir   fastapi.responsesr	   typing_extensionsr
   r   vllm.engine.protocolr   vllm.entrypoints.chat_utilsr   vllm.entrypoints.loggerr   'vllm.entrypoints.openai.engine.protocolr   r   &vllm.entrypoints.openai.engine.servingr   r   r   &vllm.entrypoints.openai.models.servingr   'vllm.entrypoints.pooling.embed.protocolr   r   r   r   r   r   vllm.entrypoints.rendererr   vllm.inputs.datar   vllm.loggerr   vllm.outputsr   r   r    r!   vllm.pooling_paramsr"   vllm.utils.async_utilsr#   vllm.utils.collection_utilsr$   vllm.utils.serial_utilsr%   r&   r'   r(   r)   r   rT   r+   r    r;   r:   <module>r     s    3 3 3 3 3 3 3 3 # # # # # # # # # #        & & & & & & 4 4 4 4 4 4 4 4 - - - - - - G G G G G G 1 1 1 1 1 1                
 G F F F F F                3 2 2 2 2 2 ) ) ) ) ) ) # # # # # #            . - - - - - 8 8 8 8 8 8 2 2 2 2 2 2              
X		^6 ^6 ^6 ^6 ^6] ^6 ^6 ^6BQ. Q. Q. Q. Q.^ Q. Q. Q. Q. Q.r;   