
    &`i+                         d dl Z d dlZd dlZd dlZd dlmZ d dlmZmZm	Z	m
Z
 d dlmZ d dlmZmZ d dlmZmZmZ d dlmZ d dlmZ d d	lmZmZ  ej        e          Z G d
 d          ZdS )    N)OrderedDict)AnyCallableListSet)metrics)	ReplicaIDRequestRoutingInfo)MODEL_LOAD_LATENCY_BUCKETS_MS%PUSH_MULTIPLEXED_MODEL_IDS_INTERVAL_SSERVE_LOGGER_NAME)MetricsPusher)ServeUsageTag)_get_global_client_get_internal_replica_contextc                   x    e Zd ZdZdZdeegef         dedefdZ	de
e         fdZd	 Zd
 ZdedefdZddZdS )_ModelMultiplexWrappera  A wrapper class that wraps the model load function and
    provides the LRU caching functionality.

    The model multiplexer is a wrapper class that wraps the model load function
    and provides the LRU caching functionality, and the model load function should
    be a coroutine function that takes the model ID as the first argument and
    returns the user-constructed model object.
    The model multiplexer will also ensure that the number of models on the current
    replica does not exceed the specified limit.
    The model will be unloaded in the LRU order, the model multiplexer will call the
    model's __del__ attribute if it exists to clean up the model resources eagerly.

    push_multiplexed_model_idsmodel_load_funcself_argmax_num_models_per_replicac                    t           j                            d           t                      | _        || _        || _        || _        t          	                    dt                      t          j        ddt                    | _        t          j        ddt                    | _        t          j        dd	
          | _        t          j        ddd          | _        t          j        dd
          | _        t          j        dd
          | _        t          j        dd
          | _        t-                      }|t/          d          |j        | _        |j        | _        |j        | _        d| _        t?          j                     | _!        tE                      | _#        tI                      | _%        | j%        &                    | j'        | j(        tR                     | j%        *                                 dS )a  Initialize the model multiplexer.
        Args:
            model_load_func: the model load async function.
            self_arg: self argument when model_load_func is class method.
            max_num_models_per_replica: the maximum number of models to be loaded on the
                current replica. If it is -1, there is no limit for the number of models
                per replica.
        1zMODEL_LOAD_LATENCY_BUCKET_MS: 'serve_multiplexed_model_load_latency_msz"The time it takes to load a model.)description
boundaries)serve_multiplexed_model_unload_latency_msz$The time it takes to unload a model.serve_num_multiplexed_modelsz3The number of models loaded on the current replica.)r   %serve_registered_multiplexed_model_idz/The model id registered on the current replica.)model_id)r   tag_keys,serve_multiplexed_get_model_requests_counterz:The counter for get model requests on the current replica.'serve_multiplexed_models_unload_counterz7The counter for unloaded models on the current replica.%serve_multiplexed_models_load_counterz5The counter for loaded models on the current replica.Nzc`@serve.multiplex` can only be used within a deployment (failed to retrieve Serve replica context).F)+r   MULTIPLEXED_API_USEDrecordr   models_funcr   r   loggerdebugr   r   	Histogrammodel_load_latency_msmodel_unload_latency_msGaugenum_models_gaugeregistered_model_gaugeCounterget_model_requests_countermodels_unload_countermodels_load_counterr   RuntimeErrorapp_name	_app_name
deployment_deployment_name
replica_id_replica_id_push_multiplexed_replica_infoasyncioLock_model_cache_lockset_model_load_tasksr   metrics_pusherregister_or_update_task%_PUSH_MULTIPLEXED_MODEL_IDS_TASK_NAME_push_model_ids_infor   start)selfr   r   r   contexts        g/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/serve/multiplex.py__init__z_ModelMultiplexWrapper.__init__'   s    	*11#666!mm.
%/I' 	U6SUUVVV%,%65<4&
 &
 &
"
 (/'87>4(
 (
 (
$
 !(*M!
 !
 !

 '.m3I"'
 '
 '
#
 +2/:T+
 +
 +
' &-_5Q&
 &
 &
" $+?3O$
 $
 $
 
 011?>  
 &.%,%7&-&8 5:+ ")
 ,/55+oo336%1	
 	
 	

 	!!#####    returnc                     t          | j                                                  }|                    | j                   t          |          S )zGet the model IDs of the loaded models & loading models in the replica.
        This is to push the model id information early to the controller, so that
        requests can be routed to the replica.
        )r@   r'   keysupdaterA   list)rG   models_lists     rI   !_get_loading_and_loaded_model_idsz8_ModelMultiplexWrapper._get_loading_and_loaded_model_ids~   sD    
 $+**,,--41222K   rK   c                    	 | j                             t          | j                             | j        D ] }| j                            dd|i           !| j        rPt                                          t          | j	        | 
                                                     d| _        dS dS # t          $ r(}t                              d|            Y d}~dS d}~ww xY w)z4Push the multiplexed replica info to the controller.   r    tags)r:   multiplexed_model_idsFzFFailed to push the multiplexed replica info to the controller. Error: N)r/   r@   lenr'   r0   r<   r   record_request_routing_infor
   r;   rR   	Exceptionr)   warning)rG   r    es      rI   rE   z+_ModelMultiplexWrapper._push_model_ids_info   s0   	!%%c$+&6&6777 K P P+//X8N/OOOO2 <"$$@@&#'#3.2.T.T.V.V     7<333< <  	 	 	NN1-.1 1        	s   B)B/ /
C!9CC!c                   K   t          | j                  dk    rj	 |                                  d{V  n4# t          $ r'}t                              d|            Y d}~nd}~ww xY wt          | j                  dk    hdS dS )z<Unload all the models when the model multiplexer is deleted.r   NzFailed to unload model. Error: )rX   r'   unload_model_lrurZ   r)   	exception)rG   r\   s     rI   shutdownz_ModelMultiplexWrapper.shutdown   s      $+""++----------     9a99        $+""""""s   7 
A(A##A(r    c                 t  K   t          |          t          urt          d          |st          d          | j                                         || j        v r1| j                            |          }|| j        |<   | j        |         S d| _        | j	        
                    |           | j        4 d{V  || j        v r| j        |         cddd          d{V  S 	 | j        dk    r>t          | j                  | j        k    r!|                                  d{V  d| _        t                              d| d           | j                                         t%          j                    }| j        $|                     |           d{V | j        |<   n)|                     | j        |           d{V | j        |<   t%          j                    |z
  dz  }t                              d	| d
|dd           | j	                            |           | j                            |           | j        |         cddd          d{V  S # t0          $ rA}t                              d| d|            | j	                            |           |d}~ww xY w# 1 d{V swxY w Y   dS )zLoad the model if it is not loaded yet, and return
            the user-constructed model object.

        Args:
            model_id: the model ID.

        Returns:
            The user-constructed model object.
        zThe model ID must be a string.zThe model ID cannot be empty.TNr   zLoading model ''.     @@zSuccessfully loaded model '' in .1fms.zFailed to load model 'z
'. Error: )typestr	TypeError
ValueErrorr2   incr'   popr<   rA   addr?   r   rX   r^   r)   infor4   timer   r(   discardr,   observerZ   error)rG   r    modelload_start_timeload_latency_msr\   s         rI   
load_modelz!_ModelMultiplexWrapper.load_model   s      >>$$<=== 	><==='++---t{""KOOH--E$)DK!;x(( 37D/"&&x000- ' ' ' ' ' ' ' 't{**;x0' ' ' ' ' ' ' ' ' ' ' ' ' '#
 7!;;,,0OOO #33555555555>B; KK >( > > >???,00222&*ikkO},6:jj6J6J0J0J0J0J0J0JH--6:jj M87 7 1 1 1 1 1 1H- (,y{{_'D&NOKK4h 4 4*34 4 4   *228<<<.66GGG;x0C' ' ' ' ' ' ' ' ' ' ' ' ' 'D !   LLHHHQHH   *228<<<GE' ' ' ' ' ' ' ' ' ' ' ' ' ' ' 's1   :J'$E"I
J$#<JJ$$J''
J14J1Nc                   K   | j                                          t          j                    }| j                            d          \  }}t
                              d| d           t          |d          rnt          j	        |j
                  s3t          j                                        d|j
                   d{V  n|
                                 d{V  d |_
        t          j                    |z
  dz  }| j                            |           t
                              d	| d
|dd           | j                            dd|i           dS )z%Unload the least recently used model.F)lastzUnloading model 'rb   __del__Nc                     d S )N )_s    rI   <lambda>z9_ModelMultiplexWrapper.unload_model_lru.<locals>.<lambda>   s    d rK   rc   zSuccessfully unloaded model 'rd   re   rf   r   r    rU   )r3   rk   ro   r'   popitemr)   rn   hasattrinspectiscoroutinefunctionry   r=   get_running_looprun_in_executorr-   rq   r0   r@   )rG   unload_start_timer    rs   unload_latency_mss        rI   r^   z'_ModelMultiplexWrapper.unload_model_lru   ss      	"&&((( IKK+--5-99%4444555 5)$$ 	+.u}== &.00@@u}UUUUUUUUUUmmoo%%%%%%%*NEM!Y[[+<<F$,,->???UHUU;LUUUU	
 	
 	
 	#''X0F'GGGGGrK   )rL   N)__name__
__module____qualname____doc__rD   r   rh   r   intrJ   r   rR   rE   r`   rv   r^   r{   rK   rI   r   r      s          -I)U$!3%*-U$ U$ %(	U$ U$ U$ U$n!49 ! ! ! !  ,  F F F F F FPH H H H H HrK   r   )r=   r   loggingro   collectionsr   typingr   r   r   r   	ray.server   ray.serve._private.commonr	   r
   ray.serve._private.constantsr   r   r    ray.serve._private.metrics_utilsr   ray.serve._private.usager   ray.serve.contextr   r   	getLoggerr)   r   r{   rK   rI   <module>r      sR       # # # # # # + + + + + + + + + + + +       C C C C C C C C         
 ; : : : : : 2 2 2 2 2 2 O O O O O O O O		,	-	-nH nH nH nH nH nH nH nH nH nHrK   