
    -`if                        U d Z ddlZddlZddlZddlZddlZddlZddlmZ ddl	m
Z
mZ ddlmZmZmZ ddlZddlZddlmZ  ej        d          Z G d	 d
          Ze
 G d d                      Ze
 G d d                      Z G d de          Zdededeee         z  ddfdZdeeef         deddfdZdeeef         deddfdZ 	 d2dedej!        dedz  defdZ"	 d3deded          de#eeef                  fd!Z$	 	 d4dedej!        dedz  ded          def
d"Z%	 d2dedej!        dedz  defd#Z&	 d2dej!        dedeeef         deeef         dedz  defd$Z'	 d2dedej!        dedz  defd%Z(	 d2dedej!        dedz  defd&Z)	 	 d4dedej!        dedz  ded          def
d'Z*defd(Z+defd)Z,defd*Z-	 d2dedej!        dedz  defd+Z.	 d2dedej!        dedz  defd,Z/	 d2dedej!        dedz  defd-Z0	 d2dedej!        dedz  defd.Z1e"e"e%e&e(e*e.e/e0e1e)d/Z2eeef         e3d0<   d1 e24                                D             Z5dS )5z'The request function for API endpoints.    N)	Awaitable)	dataclassfield)AnyLiteralProtocol)tqdmi`T  )totalc                   4    e Zd ZdZd Zdedee         fdZdS )StreamedResponseHandlerzbHandles streaming HTTP responses by accumulating chunks until complete
    messages are available.c                     d| _         d S N )buffer)selfs    }/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/benchmarks/lib/endpoint_request_func.py__init__z StreamedResponseHandler.__init__   s        chunk_bytesreturnc                    |                     d          }| xj        |z  c_        g }d| j        v rW| j                            dd          \  }| _        |                                }|r|                    |           d| j        v W| j                            d          r| j                            d                                          }|dk    r4|                    | j                                                   d| _        n`|r^	 t          j        |           |                    | j                                                   d| _        n# t          j	        $ r Y nw xY w|S )zLAdd a chunk of bytes to the buffer and return any complete
        messages.utf-8z

   data: [DONE]r   )
decoder   splitstripappend
startswithremoveprefixjsonloadsJSONDecodeError)r   r   	chunk_strmessagesmessagemessage_contents         r   	add_chunkz!StreamedResponseHandler.add_chunk   sn     &&w//	y  ###';#4#4VQ#?#? GT[mmooG )(((	 ## ;!!(++ 	"k66x@@FFHHO(** 1 1 3 3444   J///OODK$5$5$7$7888"$DKK+   D s   AE E%$E%N)	__name__
__module____qualname____doc__r   bytesliststrr)    r   r   r   r      sS           U tCy      r   r   c                      e Zd ZU dZeee         z  ed<   eed<   eed<   eed<   eed<   dZedz  ed<   dZ	edz  ed	<   dZ
edz  ed
<   dZedz  ed<   dZeee         z  dz  ed<   dZeed<   dZedz  ed<   dZedz  ed<   dS )RequestFuncInputz#The input for the request function.promptapi_url
prompt_len
output_lenmodelN
model_namelogprobsextra_headers
extra_bodymulti_modal_contentF
ignore_eoslanguage
request_id)r*   r+   r,   r-   r0   r/   __annotations__intr9   r:   r;   dictr<   r=   r>   boolr?   r@   r1   r   r   r3   r3   ?   s         --$s)OLLLOOOOOOJJJ!Jd
!!!HcDj!%M4$;%%%"Jt"""48T
*T1888JHcDj!Jd
!!!!!r   r3   c                       e Zd ZU dZdZeed<   dZeed<   dZ	e
ed<   dZeed	<   dZe
ed
<    ee          Zee
         ed<   dZe
ed<   dZeed<   dZeed<   dZe
ed<   dS )RequestFuncOutputz5The output of the request function including metrics.r   generated_textFsuccess        latencyr   output_tokensttft)default_factoryitltpotr6   error
start_timeN)r*   r+   r,   r-   rG   r0   rA   rH   rD   rJ   floatrK   rB   rL   r   r/   rN   rO   r6   rP   rQ   r1   r   r   rF   rF   R   s         ??NCGTGUM3D%uT222Ce222D%JE3OOOJr   rF   c            
       F    e Zd Z	 ddedej        dedz  dee         fdZ	dS )RequestFuncNrequest_func_inputsessionpbarr   c                     d S Nr1   )r   rU   rV   rW   s       r   __call__zRequestFunc.__call__c   s	    
 (+sr   rY   )
r*   r+   r,   r3   aiohttpClientSessionr	   r   rF   rZ   r1   r   r   rT   rT   b   se        
 !	+ +,+ &+ Tk	+
 
$	%+ + + + + +r   rT   r5   api_nameexpected_suffixesr   c                     t          |t                    r|h}h |d}|                     t          |                    st	          | d| d          d S )Nprofilez URL must end with one of: .)
isinstancer0   endswithtuple
ValueError)r5   r]   r^   s      r   _validate_api_urlrf   k   s}    
 #S)) 0./7+7Y7E"34455 WHUUARUUUVVVW Wr   payloadrU   c                 n    |j         r
|j         | d<   |j        r|                     |j                   d S d S )Nr>   )r>   r<   update)rg   rU   s     r   _update_payload_commonrj   y   sM     $ > 2 =$ 6)4555556 6r   headersc                 N    |j         r
| |j         z  } |j        r|j        | d<   d S d S )Nzx-request-id)r;   r@   )rk   rU   s     r   _update_headers_commonrm      sH     ' 4%33$ @"4"?@ @r   rV   rW   c                 P  K   | j         }t          |dd           | j        r| j        n| j        | j        d| j        | j        dddid}t          ||            ddt          j	        
                    d	           i}t          ||            t                      }| j        |_        d
}t          j                    }||_        |}		 |                    |||          4 d{V }
|
j        dk    rd}t'                      }|
j                                        2 3 d{V }|                                }|s|                    |          }|D ]}|                    d          r|                    d          }|dk    rt5          j        |          }|
                    d          x}rw|d         
                    d          }t          j                    }|s d}t          j                    |z
  }||_        n|j                            ||	z
             |}	||pd
z  }|
                    d          x}r|
                    d          |_        	@6 |rd|_         nd|_         d|_!        ||_"        |	|z
  |_#        n|
j$        pd
|_!        d|_         	 ddd          d{V  n# 1 d{V swxY w Y   nP# tJ          $ rC d|_         tM          j'                    }d
(                    tS          j*        |           |_!        Y nw xY w|r|+                    d           |S )zThe async request function for the OpenAI Completions API.

    Args:
        request_func_input: The input for the request function.
        pbar: The progress bar to display the progress.

    Returns:
        The output of the request function.
    zOpenAI Completions APIcompletionsg      ?Tinclude_usage)r8   r4   repetition_penalty
max_tokensr:   streamstream_optionsAuthorizationBearer OPENAI_API_KEYr   urlr"   rk   N   F:r   r   choicesr   textusagecompletion_tokenszVNever received a valid chunk to calculate TTFT.This response will be marked as failed!r   ),r5   rf   r9   r8   r4   r7   r:   rj   osenvirongetrm   rF   r6   timeperf_counterrQ   poststatusr   contentiter_anyr   r)   r    r!   r"   r#   rL   rN   r   rK   rH   rP   rG   rJ   reason	Exceptionsysexc_infojoin	tracebackformat_exceptionri   )rU   rV   rW   r5   rg   rk   outputrG   stmost_recent_timestampresponsefirst_chunk_receivedhandlerr   r&   r'   chunkdatar|   r}   	timestamprL   r~   r   s                           r    async_request_openai_completionsr      sF      !(Gg7GGG (&#..%$+!(3&/T
 G 7$6777 	E2:>>2B#C#CEEG 7$6777  F*5FN				BF>F<<G'7<KK 9	' 9	' 9	' 9	' 9	' 9	' 9	'x#%%',$133)1)9)B)B)D)D 'V 'V 'V 'V 'V 'V 'V+"-"3"3"5"5K& ! &00==H#+ !V !V #--c22 %$ ' 4 4X > > H,,#':e#4#4D
 +/((9*=*==w V (/qz~~f'='=,0,=,?,?	'; !Y;?$8+/+<+>+>+CD26FKK %+J$5$5iBW6W$X$X$X8A 5 .$*" <*.((7*;*;!; V7<yyAT7U7U 4C!V *EP ( %)FNN%*FNB L )7%!6!;'4"!&&s9	' 9	' 9	' 9	' 9	' 9	' 9	' 9	' 9	' 9	' 9	' 9	' 9	' 9	' 9	' 9	' 9	' 9	' 9	' 9	' 9	' 9	' 9	' 9	' 9	' 9	' 9	't  F F F<>>wwy98DEEF
  AMsD   J? $6J-I E:J-J? -
J77J? :J7;J? ?A
LLlastmm_position)firstr   c                 6   d| j         dg}g }| j        rv| j        }t          |t                    r|                    | j                   n?t          |t
                    r|                    | j                   nt          d          |dk    r||z   S ||z   S )Nr}   typer}   z@multi_modal_content must be a dict or list[dict] for openai-chatr   )r4   r=   rb   r/   extendrC   r   	TypeError)rU   r   text_contentsmm_contents
mm_contents        r   _get_chat_contentr      s     %.@.GHHIMK- 	';
j$'' 	1EFFFF
D)) 	1EFFFFR   g]**;&&r   c                 ,  K   | j         }t          |dd           t          | |          }| j        r| j        n| j        d|dg| j        dddid}t          ||            d	d
t          j        	                    d           d}t          ||            t                      }| j        |_        d}	d}
t          j                    }||_        |}	 |                    |||          4 d {V }|j        dk    rvt%                      }|j                                        2 3 d {V }|                                }|s|                    |          }|D ]}|                    d          r|                    d          }|dk    rt          j                    }t3          j        |          }|	                    d          x}rY|d         d         	                    d          }|
dk    r||z
  }
|
|_        n|j                            ||z
             |	|pdz  }	n1|	                    d          x}r|	                    d          |_        |}56 |	|_        d|_         ||z
  |_!        n|j"        pd|_#        d|_         	 d d d           d {V  n# 1 d {V swxY w Y   nP# tH          $ rC d|_         tK          j&                    }d'                    tQ          j)        |           |_#        Y nw xY w|r|*                    d           |S )NzOpenAI Chat Completions APIzchat/completionsr   userroler   Trp   )r8   r&   max_completion_tokensrs   rt   application/jsonrv   rw   zContent-Typeru   r   rI   rx   rz   r{   r   r   r|   r   deltar   r~   r   Fr   )+r5   rf   r   r9   r8   r7   rj   r   r   r   rm   rF   r6   r   r   rQ   r   r   r   r   r   r   r)   r    r!   r"   r#   rL   rN   r   rK   rG   rH   rJ   r   rP   r   r   r   r   r   r   ri   )rU   rV   rW   r   r5   r   rg   rk   r   rG   rL   r   r   r   r   r   r&   r'   r   r   r   r|   r~   r   s                           r   %async_request_openai_chat_completionsr     s)      !(Gg<>PQQQ 2LLLG (&#..%00
 "4!>T
 G 7$6777 +E2:>>2B#C#CEE G 7$6777  F*5FND				BF1F<<G'7<KK ,	' ,	' ,	' ,	' ,	' ,	' ,	'x#%%133)1)9)B)B)D)D "> "> "> "> "> "> ">+"-"3"3"5"5K& ! &00==H#+ > > #--c22 %$ ' 4 4X > > H,,(,(9(;(;I#':e#4#4D*.((9*=*==w V*1!*W*=*A*A)*L*L#'3;;+4r>D26FKK %+J$5$5iBW6W$X$X$X .'-R ?*.((7*;*;!; V7<yyAT7U7U 44=19> *EH )7%!%!6!;'4"!&&Y,	' ,	' ,	' ,	' ,	' ,	' ,	' ,	' ,	' ,	' ,	' ,	' ,	' ,	' ,	' ,	' ,	' ,	' ,	' ,	' ,	' ,	' ,	' ,	' ,	' ,	' ,	'Z  F F F<>>wwy98DEEF
  AMsD   J- 04J$I*EJ	J- 
J%%J- (J%)J- -A
K:9K:c                   K   dd l | j        }t          |dddh           d| j        dg}| j        r| j        n| j        | j        ddddd	}t          ||            d
dt          j	        
                    d           i}t          ||            fd}| j        }t          |t                    rd|vrt          d           ||d          5 }	t!          j                    }
|
                    d|	d           |                                D ](\  }}|
                    |t)          |                     )t+                      }| j        |_        d}d}t/          j                    }||_        |}	 |                    ||
|          4 d {V 	 }|j        dk    rst9                      }|j                                        2 3 d {V }|                                }|s|                     |          }|D ]}|!                    d          "                    d          }|dk    rt/          j                    }tG          j$        |          }|
                    d          x}rY|d         d         
                    d          }|dk    r||z
  }||_%        n|j&        '                    ||z
             ||pdz  }n1|
                    d          x}r|
                    d          |_(        |}26 ||_)        d|_*        ||z
  |_+        n|j,        pd|_-        d|_*        	 d d d           d {V  n# 1 d {V swxY w Y   nP# t\          $ rC d|_*        t_          j0                    }d1                    te          j3        |           |_-        Y nw xY wd d d            n# 1 swxY w Y   |r|4                    d            |S )!Nr   zOpenAI Audio APItranscriptionstranslationsr}   r   Ten)r8   r   rs   r?   stream_include_usagestream_continuous_usage_statsru   rv   rw   c                     t          j                    }                    || |d           |                    d           |S )NWAV)formatr   )ioBytesIOwriteseek)ysrr   	soundfiles      r   to_bytesz,async_request_openai_audio.<locals>.to_bytes  s<    2e444Ar   audioz5multi_modal_content must be a dict containing 'audio'filez	audio/wav)content_typer   rI   )ry   r   rk   rz   r   r   r   r|   r   r   r~   r   Fr   )5r   r5   rf   r4   r9   r8   r7   rj   r   r   r   rm   r=   rb   rC   r   r[   FormData	add_fielditemsr0   rF   r6   r   r   rQ   r   r   r   r   r   r   r)   r   r!   r"   r#   rL   rN   r   rK   rG   rH   rJ   r   rP   r   r   r   r   r   r   ri   )rU   rV   rW   r5   r   rg   rk   r   mm_audiofformkeyvaluer   rG   rL   r   r   r   r   r   r&   r'   r   r   r   r|   r~   r   r   s                                @r   async_request_openai_audior   v  s^       (Gg14Dn3UVVV(:(ABBCG (&#..%!3!> $)-
 
G 7$6777 	E2:>>2B#C#CEEG 7$6777     "5Hh%% Q)@)@OPPP	8G$	% ?J!!vq{;;;!--// 	, 	,JCNN3E

++++"$$.9   "1	J||$ $   ,+ ,+ ,+ ,+ ,+ ,+ ,+ ,+?c))577G-5-=-F-F-H-H B B B B B B Bk&1&7&7&9&9* %$#*#4#4[#A#A'/ B BG$+NN7$;$;$H$H$R$RE$00,0,=,?,?	'+z%'8'8.2hhy.A.A#A7 !&.5aj.A.E.Ei.P.PG'+s{{/82~6: )/
(9(9,58M,M)* )* )* %3gm$CNN.2hhw.?.?%?U !&;@99(;<& <&F$8 9B 53B .IB -;F)%)FN%:R%?FNN#+?#8bFL%*FN*Y,+ ,+ ,+ ,+ ,+ ,+ ,+ ,+ ,+ ,+ ,+ ,+ ,+ ,+ ,+ ,+ ,+ ,+ ,+ ,+ ,+ ,+ ,+ ,+ ,+ ,+ ,+Z  	J 	J 	J"FN|~~H779#=x#HIIFLLL	Jy?J ?J ?J ?J ?J ?J ?J ?J ?J ?J ?J ?J ?J ?J ?JB  AMsi   B#N*5M4L<K:EL<*M<
M	M	M	
MN*A
NN*NN**N.1N.c                 N  K   t                      }t          j                    }||_        	 |                     |||          4 d {V }|j        dk    rt          j                    |z
  x|_        |_        |                    dd          dk    r6t          j
        |j        d                   }|                    di           }	n0|	                                 d {V }
|
                    di           }	d|_        d	|_        |	                    d
d          |_        nd|_        |j        pd	|_        	 d d d           d {V  n# 1 d {V swxY w Y   n2# t"          $ r%}d|_        t%          |          |_        Y d }~nd }~ww xY w|r|                    d           |S )N)ry   rk   r"   rz   encoding_formatrR   r.   metadatar~   Tr   prompt_tokensr   Fr   )rF   r   r   rQ   r   r   rL   rJ   r   r"   r#   rk   rH   rG   r6   r   rP   r   r0   ri   )rV   r5   rg   rk   rW   r   r   r   r   r~   r   es               r   _run_pooling_requestr     s1        F				BF<<GW7<KK 	5 	5 	5 	5 	5 	5 	5x#%%/3/@/B/BR/GGfn;;0'::gEE#z(*::*FGGH$LL"55EE!)000000D HHWb11E!%(*%$)IIoq$A$A!!!&'4"4!	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5"    1vv  AMs<   E 
C-E
8E 

EE EE 
F&FFc                 0  K   | j         }t          |dd           | j        r| j        n| j        | j        dd}t          ||            ddt          j                            d           d}t          ||            t          |||||	           d {V S )
NOpenAI Embeddings API
embeddings)r8   inputtruncate_prompt_tokensr   rv   rw   r   rg   rk   rW   )r5   rf   r9   r8   r4   rj   r   r   r   rm   r   rU   rV   rW   r5   rg   rk   s         r   async_request_openai_embeddingsr   	  s      
 !(Gg6EEE (&#..%#* #% G 7$6777 +E2:>>2B#C#CEE G 7$6777%         r   c                   K   | j         }t          |dd           t          | j        t                    rt          | j                  dk    sJ | j        r| j        n| j        | j        d         | j        dd          dd}ddt          j	        
                    d	           d
}t          ||            t          |||||           d {V S )NzvLLM score APIrerankr   r   r   )r8   query	documentsr   r   rv   rw   r   r   )r5   rf   rb   r4   r/   lenr9   r8   r   r   r   rm   r   r   s         r   async_request_vllm_rerankr   +  s#     
 !(Gg/::: 	%,d33")**Q...	/ (&#..%#*1-'.qrr2 #%	 	G +E2:>>2B#C#CEE G 7$6777%         r   c                 P  K   | j         }t          |dd           t          | |          }| j        r| j        n| j        d|dgdd}t          ||            dd	t          j                            d
           d}t          ||            t          |||||           d {V S )Nr   r   r   r   r   r   )r8   r&   r   r   rv   rw   r   r   )r5   rf   r   r9   r8   rj   r   r   r   rm   r   )rU   rV   rW   r   r5   r   rg   rk   s           r   $async_request_openai_embeddings_chatr   R  s      !(Gg6EEE 2LLLG (&#..%00

 #%
 
G 7$6777 +E2:>>2B#C#CEE G 7$6777%         r   c                     | j         rOt          j        d| j                   }|r3	 t          |                    d                    S # t
          $ r Y nw xY wd S )Nz(\d+)$r   )r@   researchrB   groupre   )rU   matchs     r   _try_extract_request_idxr   y  sn    $ 	)%7%BCC 	5;;q>>***    4s   !A 
AAc                 &    | j         r	d| _        d S d S r   )r=   r4   )rU   s    r   _preprocess_clipr     s&    - '$&!!!' 'r   c                     | j         r8t          |           }|d u p|dz  dk    }|r	d| _        d S d| j         | _        d S d S )N   r   zRepresent the given image.z7Represent the given image with the following question: )r=   r   r4   )rU   request_idxuse_image_only_prompts      r   _preprocess_vlm2vecr     sz    - ./ABB !,t 3 K{Q!7K  	(D%%%/%,/ / %%% r   c                 T   K   t          |            t          | ||           d {V S N)rW   )r   r   rU   rV   rW   s      r   $async_request_openai_embeddings_clipr     sU      
 '(((5         r   c                 V   K   t          |            t          | ||d           d {V S )Nr   )rW   r   )r   r   r   s      r   'async_request_openai_embeddings_vlm2vecr     sX      
 *+++5	         r   c                   K   | j         }t          |dd           d| j        r| j        n| j        i}| j        r| j        |d<   nV| j        }t          |t                    sJ |d         }||         d         |d<   |                    dd          d	         |d
<   t          ||            ddt          j                            d           d}t          ||            t          |||||           d {V S )NzInfinity Embeddings APIr   r8   r   r   ry   _r   r   modalityr   rv   rw   r   r   )r5   rf   r9   r8   r4   r=   rb   rC   r   rj   r   r   r   rm   r   )rU   rV   rW   r5   rg   r   mm_typerk   s           r   !async_request_infinity_embeddingsr     sR     
 !(Gg8,GGG 	(&#..%G   7-4';
*d+++++V$%g.u5%mmC33A6
7$6777 +E2:>>2B#C#CEE G 7$6777%         r   c                 T   K   t          |            t          | ||           d {V S r   )r   r   r   s      r   &async_request_infinity_embeddings_clipr     sU      
 '(((2         r   )vllmopenaizopenai-chatzopenai-audiozopenai-embeddingszopenai-embeddings-chatzopenai-embeddings-clipzopenai-embeddings-vlm2veczinfinity-embeddingszinfinity-embeddings-clipzvllm-rerankASYNC_REQUEST_FUNCSc                 8    g | ]\  }}|t           t          fv |S r1   )r   r   ).0kvs      r   
<listcomp>r    s8       1-/TUUU UUUr   rY   )r   )Nr   )6r-   r   r"   r   r   r   r   collections.abcr   dataclassesr   r   typingr   r   r   r[   regexr   tqdm.asyncior	   ClientTimeoutAIOHTTP_TIMEOUTr   r3   rF   rT   r0   setrf   rC   rj   rm   r\   r   r/   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rA   r   OPENAI_COMPATIBLE_BACKENDSr1   r   r   <module>r     s   . - - 				  				 



      % % % % % % ( ( ( ( ( ( ( ( ) ) ) ) ) ) ) ) ) )           '''k:::& & & & & & & &R " " " " " " " "$        + + + + +( + + +WWW SX~W 
	W W W W6#s(^6(6 
6 6 6 6@#s(^@(@ 
@ @ @ @ n n(n"n +n 	n n n nf -3' '(')' 
$sCx.' ' ' '6 ,2	] ](]"] +] )	]
 ] ] ] ]F k k(k"k +k 	k k k kf " """" #s(^" #s(^	"
 +" " " " "P  (" + 	   J $ $($"$ +$ 	$ $ $ $T ,2	$ $($"$ +$ )	$
 $ $ $ $N	1A 	 	 	 	')9 ' ' ' ',<    *  (" + 	   "  (" + 	   $ & &(&"& +& 	& & & &X  (" + 	     -.8.8BB!H< F,/ / T#{*+     #))++     r   