
    &`i
#                     R   d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZ d dlm	Z	m
Z
mZmZmZmZ d dlZd dlZd dlZd dlZd dlZd dlmZ d dlmZ d dlmZ d dlmZmZ d dlmZ d	d
de
de de dej!        fdZ"	 	 	 d6de
g ee#         f         de de de#dee#e#ej!        f         f
dZ$d	dddde de%de&dee#         fdZ'd	ddde d e%fd!Z(d"efd#Z)d	ej*        ej+        z   fd$e fd%Z, G d& d'          Z-ej.         G d( d)                      Z/ej.         G d* d+                      Z0ej.         G d, d-                      Z1ej.         G d. d/                      Z2ej.         G d0 d1                      Z3ej.         G d2 d3                      Z4ej.         G d4 d5                      Z5dS )7    N)partial)AnyCallable	CoroutineListOptionalTuple)StreamingResponse)tqdm)serve)	serve_pb2serve_pb2_grpc)DeploymentHandled   )num_warmup_requestsfnum_requestsr   returnc                ^   K   t          j                   r }n fd}g }t          t          ||z                       D ]Y}t	          j                    } |             d {V  t	          j                    }||k    r|                    d||z
  z             Zt          j        |          S )Nc                      K                  d S N )r   s   x/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/serve/_private/benchmarks/common.pyto_callz&run_latency_benchmark.<locals>.to_call   s      ACCCCC      )	inspectiscoroutinefunctionr   rangetimeperf_counterappendpdSeries)r   r   r   r   	latenciesistartends   `       r   run_latency_benchmarkr)      s       "1%% 	 	 	 	 	 I%'::;;<< 3 3!##gii!! ###TS5[12229Yr      
   fn
multiplier
num_trialstrial_runtimec                   K   t          j                     }t          j                     |z
  dk     r* |              d{V  t          j                     |z
  dk     *g }g }t          t          |                    D ]}t          j                    }d}t          j                    |z
  |k     rF |              d{V }	|	r|                    |	           |dz  }t          j                    |z
  |k     Ft          j                    }
|                    ||z  |
|z
  z             t          t          j        |          d          t          t          j	        |          d          t          j        |          fS )a;  Benchmarks throughput of a function.

    Args:
        fn: The function to benchmark. If this returns anything, it must
            return a list of latencies.
        multiplier: The number of requests or tokens (or whatever unit
            is appropriate for this throughput benchmark) that is
            completed in one call to `fn`.
        num_trials: The number of trials to run.
        trial_runtime: How long each trial should run for. During the
            duration of one trial, `fn` will be repeatedly called.

    Returns (mean, stddev, latencies).
    g?Nr   r*      )r    r   r   r!   extendr"   roundnpmeanstdr#   r$   )r,   r-   r.   r/   r'   statsr%   _countresr(   s              r   run_throughput_benchmarkr;   .   st     * IKKE
)++

#
#bdd






 )++

#
# EI%
##$$ 
9 
9!##!!E)M99******C &  %%%QJE !!E)M99 !!Z%'3;78888##U26%==!%<%<bi	>R>RRRr   zhttp://localhost:8000F)
batch_sizeurlstreamr<   r=   r>   c                 &  K   t          j        |           }t          j        |d          4 d{V 	 fdt          j        fdt          |           D               d{V cddd          d{V  S # 1 d{V swxY w Y   dS )z9Sends a batch of http requests and returns e2e latencies.)limitT)	connectorraise_for_statusNc                    K   t          j                    } 	                               4 d {V }r&|j                                        2 3 d {V \  }}6 n|                                 d {V  d d d           d {V  n# 1 d {V swxY w Y   n# t          j        j        $ r Y nw xY wt          j                    }d|| z
  z  S Nr   )	r    r!   getcontentiter_chunksreadaiohttpclient_exceptionsClientConnectionError)r'   rchunkr8   r(   sessionr>   r=   s        r   do_queryz&do_single_http_batch.<locals>.do_queryi   s     %''E	";;s++ ' ' ' ' ' ' 'q './i.C.C.E.E ! ! ! ! ! ! !(%  /F.E  ffhh' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ,B    #%%C3;''s@   B BA!B6B 
BB BB B10B1c                 $    g | ]}             S r   r   .0r8   rO   s     r   
<listcomp>z(do_single_http_batch.<locals>.<listcomp>y   s    %L%L%LQhhjj%L%L%Lr   )rI   TCPConnectorClientSessionasynciogatherr   )r<   r=   r>   rA   rO   rN   s    `` @@r   do_single_http_batchrX   Y   s      $:666I$d   N N N N N N N N		( 	( 	( 	( 	( 	( 	(  ^%L%L%L%L%
:K:K%L%L%LMMMMMMM)N N N N N N N N N N N N N N N N N N N N N N N N N N N N N Ns   4B  
B
B
zlocalhost:9000)r<   targetrY   c                   K   t           j                            |          }t          j        |          t          j        d          fdt          j        fdt          |           D               d {V S )N )datac                     K   t          j                    }                                d {V  t          j                    }d|| z
  z  S rD   )r    r!   	grpc_call)r'   r(   payloadstubs     r   rO   z&do_single_grpc_batch.<locals>.do_query   sV      !##nnW%%%%%%%%%!!sU{##r   c                 $    g | ]}             S r   r   rQ   s     r   rS   z(do_single_grpc_batch.<locals>.<listcomp>   s    !H!H!H((**!H!H!Hr   )
grpcaioinsecure_channelr   RayServeBenchmarkServiceStubr   
StringDatarV   rW   r   )r<   rY   channelrO   r_   r`   s      @@@r   do_single_grpc_batchrh   |   s       h''//G6w??D"+++G$ $ $ $ $ $ !H!H!H!HeJ6G6G!H!H!HIIIIIIIIr   coroc                    K   ddl m}  |            }|                                 |  d{V  |                                 |                                 dS )z)Collects profiling events using Viztracerr   )	VizTracerN)	viztracerrk   r'   stopsave)ri   rk   tracers      r   collect_profile_eventsrp      sa       $#####Y[[F
LLNNN
JJJJJJJ
KKMMM
KKMMMMMr   sizec                 `    d                     fdt          |           D                       S )Nr[   c              3   @   K   | ]}t          j                  V  d S r   )randomchoice)rR   r8   charss     r   	<genexpr>z#generate_payload.<locals>.<genexpr>   s-      ==A6=''======r   )joinr   )rq   rv   s    `r   generate_payloadry      s0    77====t======r   c                       e Zd Zd ZdS )	Blackholec                     d S r   r   )selfos     r   sinkzBlackhole.sink   s    r   N)__name__
__module____qualname__r   r   r   r   r{   r{      s#            r   r{   c                       e Zd Zd Zd ZdS )Noopc                 h    t          j        d                              t           j                   d S N	ray.servelogging	getLoggersetLevelWARNINGr}   s    r   __init__zNoop.__init__   )    +&&//@@@@@r   c                     dS )Nr   r   r}   argskwargss      r   __call__zNoop.__call__   s    sr   Nr   r   r   r   r   r   r   r   r   r      s5        A A A    r   r   c                       e Zd Zd Zd ZdS )	ModelCompc                 v    t          j        d                              t           j                   || _        d S r   r   r   r   r   _childr}   childs     r   r   zModelComp.__init__   .    +&&//@@@r   c                 D   K   | j                                          d {V S r   )r   remoter   s      r   r   zModelComp.__call__   s,      ['')))))))))r   Nr   r   r   r   r   r      s2          * * * * *r   r   c                        e Zd Zd Zd Zd ZdS )GrpcDeploymentc                 h    t          j        d                              t           j                   d S r   r   r   s    r   r   zGrpcDeployment.__init__   r   r   c                 0   K   t          j        d          S N	   )outputr   ModelOutputr}   user_messages     r   r^   zGrpcDeployment.grpc_call         $A....r   c                 0   K   t          j        d          S r   r   r   s     r   call_with_stringzGrpcDeployment.call_with_string   r   r   Nr   r   r   r   r^   r   r   r   r   r   r      sD        A A A/ / // / / / /r   r   c                        e Zd Zd Zd Zd ZdS )GrpcModelCompc                 v    t          j        d                              t           j                   || _        d S r   r   r   s     r   r   zGrpcModelComp.__init__   r   r   c                 n   K   | j                                          d {V  t          j        d          S r   r   r   r   r   r   s     r   r^   zGrpcModelComp.grpc_call   @      k  """""""""$A....r   c                 n   K   | j                                          d {V  t          j        d          S r   r   r   s     r   r   zGrpcModelComp.call_with_string   r   r   Nr   r   r   r   r   r      sA          / / // / / / /r   r   c                   ,    e Zd ZddedefdZd Zd ZdS )	Streamerr+   tokens_per_requestinter_token_delay_msc                     t          j        d                              t           j                   || _        |dz  | _        d S )Nr   r   )r   r   r   r   _tokens_per_request_inter_token_delay_s)r}   r   r   s      r   r   zStreamer.__init__   s>    +&&//@@@#5 $84$?!!!r   c                   K   t          | j                  D ]&}t          j        | j                   d {V  dW V  'd S )Ns   hi)r   r   rV   sleepr   )r}   r8   s     r   r>   zStreamer.stream   s\      t/00 	 	A- 9:::::::::KKKKK	 	r   c                 H   K   t          |                                           S r   r
   r>   r   s    r   r   zStreamer.__call__   s       ///r   N)r+   )r   r   r   intr   r>   r   r   r   r   r   r      s`        @ @3 @c @ @ @ @
  
0 0 0 0 0r   r   c                   &    e Zd ZdefdZd Zd ZdS )IntermediateRouterhandlec                     t          j        d                              t           j                   |                    d          | _        d S )Nr   Tr>   )r   r   r   r   options_handle)r}   r   s     r   r   zIntermediateRouter.__init__   s<    +&&//@@@~~T~22r   c                b   K   | j         j                                        2 3 d {V }|W V  6 d S r   )r   r>   r   )r}   tokens     r   r>   zIntermediateRouter.stream   sW      <.5577 	 	 	 	 	 	 	%KKKKK 877s   .c                 D    t          |                                           S r   r   r   s    r   r   zIntermediateRouter.__call__   s     ///r   N)r   r   r   r   r   r>   r   r   r   r   r   r      sN        3/ 3 3 3 3  0 0 0 0 0r   r   c                       e Zd Z	 ddedefdZddedefdZdefd	Z	d
e
dee         fdZddde
dedej        fdZddd
e
de
dedee         deeef         f
dZdS )BenchmarkerFr   r>   c                     t          j        d                              t           j                   |                    |          | _        || _        d S )Nr   r   )r   r   r   r   r   r   _stream)r}   r   r>   s      r   r   zBenchmarker.__init__   sE    
 	+&&//@@@~~V~44r   Nr_   r   c                    K   t          j                    }| | j                                         d{V  n | j                            |           d{V  t          j                    }d||z
  z  S )z<Completes a single unary request. Returns e2e latency in ms.Nr   )r    r!   r   r   )r}   r_   r'   r(   s       r   do_single_requestzBenchmarker.do_single_request   s      !##?,%%'''''''''',%%g.........!!sU{##r   c                    K   t          j                    }| j        j                                        2 3 d{V }6 t          j                    }d||z
  z  S )z?Consumes a single streaming request. Returns e2e latency in ms.Nr   )r    r!   r   r>   r   )r}   r'   rL   r(   s       r   _do_single_streamzBenchmarker._do_single_stream  st      !##|*1133 	 	 	 	 	 	 	! 4 !!sU{##s   <r<   c                     K    j         r-t          j         fdt          |          D               d {V S t          j         fdt          |          D               d {V S )Nc                 8    g | ]}                                 S r   )r   rR   r8   r}   s     r   rS   z0Benchmarker._do_single_batch.<locals>.<listcomp>  %    FFFq$((**FFFr   c                 8    g | ]}                                 S r   r   r   s     r   rS   z0Benchmarker._do_single_batch.<locals>.<listcomp>  r   r   )r   rV   rW   r   )r}   r<   s   ` r   _do_single_batchzBenchmarker._do_single_batch  s      < 	 FFFFE*4E4EFFF        !FFFFE*4E4EFFF       r   )r_   r   c                D    K    fd}t          ||           d {V S )Nc                  B   K                                    d {V  d S r   r   )r_   r}   s   r   r   z,Benchmarker.run_latency_benchmark.<locals>.f  s3      ((11111111111r   )r   )r)   )r}   r   r_   r   s   ` ` r   r)   z!Benchmarker.run_latency_benchmark  sM      	2 	2 	2 	2 	2 	2 +1<HHHHHHHHHHr   )r   r.   r/   r   c                   K   | j         r
|sJ ||z  }n|}t          t          | j        |          |||           d {V S )N)r<   )r,   r-   r.   r/   )r   r;   r   r   )r}   r<   r.   r/   r   r-   s         r   r;   z$Benchmarker.run_throughput_benchmark$  s       < 	$%%%%+j8JJ#J-%%   "!'
 
 
 
 
 
 
 
 
 	
r   )Fr   )r   r   r   r   boolr   r   floatr   r   r   r   r   r#   r$   r)   r   r	   r;   r   r   r   r   r      sT       
       
$ 
$s 
$e 
$ 
$ 
$ 
$$ $ $ $ $ e     48I I I"I-0I	I I I I /3
 
 
 
 	

 
 %UO
 
ue|	
 
 
 
 
 
r   r   )r*   r+   r*   )6rV   r   r   rt   stringr    	functoolsr   typingr   r   r   r   r   r	   rI   aiohttp.client_exceptionsrb   numpyr4   pandasr#   starlette.responsesr
   r   rayr   ray.serve.generatedr   r   ray.serve.handler   r   r$   r)   r   r;   strr   rX   rh   rp   ascii_uppercasedigitsry   r{   
deploymentr   r   r   r   r   r   r   r   r   r   <module>r      s~               B B B B B B B B B B B B B B B B                   1 1 1 1 1 1             9 9 9 9 9 9 9 9 - - - - - - CF      " <? Y       2 	(S (ST%[!(S(S (S 	(S
 5%"#(S (S (S (SZ &	 N  N  N N 
 N 	 N
 
%[ N  N  N  NH ,<J J JJ&)J J J J$y     "%F,BV],R > >3 > > > >       
         * * * * * * * * / / / / / / / / / / / / / / / / 0 0 0 0 0 0 0 0 
0 
0 
0 
0 
0 
0 
0 
0 H
 H
 H
 H
 H
 H
 H
 H
 H
 H
r   