
    &`i}                     @   d dl Z d dlZd dlmZ d dlmZmZmZmZm	Z	m
Z
mZ d dlmZmZmZmZmZmZmZmZ d dlmZmZmZ d dlmZ d dlmZmZ d dlmZ d d	l m!Z! d d
l"m#Z#m$Z$ d dl%m&Z&  e j'        e          Z( G d d          Z) G d d          Z* G d d          Z+dS )    N)defaultdict)AnyCallableDictListOptionalSetTuple)RUNNING_REQUESTS_KEYApplicationNameDeploymentIDHandleMetricReport	ReplicaIDReplicaMetricReportTargetCapacityDirection
TimeSeries))RAY_SERVE_AGGREGATE_METRICS_AT_CONTROLLER&RAY_SERVE_MIN_HANDLE_METRICS_TIMEOUT_SSERVE_LOGGER_NAME)DeploymentInfo)aggregate_timeseriesmerge_instantaneous_total)ServeUsageTag)"get_capacity_adjusted_num_replicas)AutoscalingContextAutoscalingPolicy)metricsc                   F   e Zd ZdZdefdZdededefdZde	fd	Z
defd
ZdefdZdee	         fdZd Zd ZdefdZdedefdZdeddfdZdeddfdZdee         ddfdZdedededefdZ	 d1ded!edefd"Z	 d2d#eeee f                  de!fd$Z"dee#         fd%Z$dee#         fd&Z%dee#         fd'Z&d(ee#         defd)Z'defd*Z(defd+Z)defd,Z*dee	ee#         f         fd-Z+defd.Z,deeee	ef         f         fd/Z-deeee	e#f         f         fd0Z.dS )3DeploymentAutoscalingStatez,Manages autoscaling for a single deployment.deployment_idc                    || _         t                      | _        t                      | _        d | _        d | _        d | _        d | _        g | _        d | _	        d | _
        d | _        d | _        t          j        ddd          | _        t          j        ddd          | _        t          j        ddd	          | _        d S )
N"serve_autoscaling_desired_replicaszmThe raw autoscaling decision (number of replicas) from the autoscaling policy before applying min/max bounds.
deploymentapplication)descriptiontag_keys serve_autoscaling_total_requestszbTotal number of requests as seen by the autoscaler. This is the input to the autoscaling decision.*serve_autoscaling_policy_execution_time_mszpTime taken to execute the autoscaling policy in milliseconds. High values may indicate a slow or complex policy.)r$   r%   policy_scope)_deployment_iddict_handle_requests_replica_metrics_deployment_info_config_policy_policy_state_running_replicas_target_capacity_target_capacity_direction_last_scale_up_time_last_scale_down_timer   Gaugeautoscaling_decision_gauge autoscaling_total_requests_gauge'autoscaling_policy_execution_time_gaugeselfr    s     x/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/serve/_private/autoscaling_state.py__init__z#DeploymentAutoscalingState.__init__%   s    +
 @Dvv GKff $  	
 8<2415MQ'48 6:"*1-09 3+
 +
 +
' 18./ 31
 1
 1
- 8?}8E C8
 8
 8
444    infocurr_target_num_replicasreturnc                 ^   |j         j        }|t          d| j                   | j        | j                            |          r|j        |j        }n|}|| _        || _        | j        j        	                                | _
        |j        | _        |j        | _        i | _        | j        j                                        sTt"                              d| j        j        j         d| j         d           t(          j                            d           |                     |          S )zyRegisters an autoscaling deployment's info.

        Returns the number of replicas the target should be set to.
        N-Autoscaling config is not set for deployment !Using custom autoscaling policy 'z' for deployment ''.1)deployment_configautoscaling_config
ValueErrorr+   r/   config_changedinitial_replicasr0   policy
get_policyr1   target_capacityr4   target_capacity_directionr5   r2   is_default_policy_functionloggerrA   policy_functionr   CUSTOM_AUTOSCALING_POLICY_USEDrecordapply_bounds)r=   rA   rB   configtarget_num_replicass        r>   registerz#DeploymentAutoscalingState.register[   sD    ':>U@SUU   !)T-B-Q-QRV-W-W)%1"("9": $|*5577 $ 4*.*H' |"==?? 	EKK;DL4G4W ; ;#'#6; ; ;  
 8??DDD  !4555r@   
replica_idc                 ,    || j         v r
| j         |= d S d S N)r.   )r=   r[   s     r>   on_replica_stoppedz-DeploymentAutoscalingState.on_replica_stopped   s)    ...%j111 /.r@   c                     | j         j        4| j        t          j        k    rt          | j         j        | j                  S t          | j         j        | j                  S r]   )r0   rM   r5   r   UPr   r4   min_replicasr=   s    r>   get_num_replicas_lower_boundz7DeploymentAutoscalingState.get_num_replicas_lower_bound   sa    <(4+/F/III5-%  
 6)%  r@   c                 @    t          | j        j        | j                  S r]   )r   r0   max_replicasr4   rb   s    r>   get_num_replicas_upper_boundz7DeploymentAutoscalingState.get_num_replicas_upper_bound   s"    1L%!
 
 	
r@   running_replicasc                     || _         dS )z=Update cached set of running replica IDs for this deployment.N)r3   )r=   rg   s     r>   update_running_replica_idsz5DeploymentAutoscalingState.update_running_replica_ids   s    !1r@   c                 6    t          j                     | _        dS )z2Record a scale up event by updating the timestamp.N)timer6   rb   s    r>   record_scale_upz*DeploymentAutoscalingState.record_scale_up   s    #'9;;   r@   c                 6    t          j                     | _        dS )z4Record a scale down event by updating the timestamp.N)rk   r7   rb   s    r>   record_scale_downz,DeploymentAutoscalingState.record_scale_down   s    %)Y[["""r@   &num_replicas_running_at_target_versionc                 4    |                      |          |k    S )zWhether or not this deployment is within the autoscaling bounds.

        Returns: True if the number of running replicas for the current
            deployment version is within the autoscaling bounds. False
            otherwise.
        )rW   )r=   ro   s     r>   is_within_boundsz+DeploymentAutoscalingState.is_within_bounds   s#     DEE56	
r@   num_replicasc                     t          |                                 t          |                                 |                    S )zqClips a replica count with current autoscaling bounds.

        This takes into account target capacity.
        )maxrc   minrf   )r=   rr   s     r>   rW   z'DeploymentAutoscalingState.apply_bounds   s>     --//1133\BB
 
 	
r@   replica_metric_reportNc                 x    |j         }|j        }|| j        vs|| j        |         j        k    r|| j        |<   dS dS )z8Records average number of ongoing requests at a replica.N)r[   	timestampr.   )r=   rv   r[   send_timestamps       r>   "record_request_metrics_for_replicaz=DeploymentAutoscalingState.record_request_metrics_for_replica   sV     +5
.8 d333 5j A KKK0ED!*--- LKr@   handle_metric_reportc                 x    |j         }|j        }|| j        vs|| j        |         j        k    r|| j        |<   dS dS )zgRecords average number of queued and running requests at a handle for this
        deployment.
        N)	handle_idrx   r-   )r=   r{   r}   ry   s       r>   !record_request_metrics_for_handlez<DeploymentAutoscalingState.record_request_metrics_for_handle   sT     )2	-7T222 5i @ JJJ/CD!),,, KJr@   alive_serve_actor_idsc                 4   t          d| j        j        z  t                    }t	          | j                                                  D ]\  }}|j        rR|j        K|j        |vrB| j        |= |j	        dk    r.t                              d| d|j         d|j	         d           ^t          j                    |j        z
  |k    rQ| j        |= |j	        dk    r>|j        }|rd| d	nd
}t                              d| d	| d|dd|j	         d	           dS )Drops handle metrics that are no longer valid.

        This includes handles that live on Serve Proxy or replica actors
        that have died AND handles from which the controller hasn't
        received an update for too long.
           Nr   zDropping metrics for handle 'z%' because the Serve actor it was on (z) is no longer alive. It had z ongoing requestsz
on actor 'z'  z#Dropping stale metrics for handle 'z#because no update was received for z.1fzs. Ongoing requests was: .)rt   r0   metrics_interval_sr   listr-   itemsis_serve_component_sourceactor_idtotal_requestsrS   debugrk   rx   rA   )r=   r   	timeout_sr}   handle_metricr   
actor_infos          r>   drop_stale_handle_metricsz4DeploymentAutoscalingState.drop_stale_handle_metrics   s    //2
 
	 )-T-B-H-H-J-J(K(K 	 	$I} 7!*6!*2GGG))4 /!33LLY	 Y Y,9,BY Y)6)EY Y Y   }66)CC))4 /!33,5H>F!N!:h!:!:!:!:BJKKQi Q Q: Q Q>GPQ Q1>1MQ Q Q  /	 	r@   decision_num_replicastotal_num_requestspolicy_execution_time_msr*   c                     | j         j        | j         j        d}| j                            ||           | j                            ||           | j                            |i |d|i           d S )Nr#   )tagsr*   )r+   nameapp_namer9   setr:   r;   )r=   r   r   r   r*   r   s         r>   record_autoscaling_metricsz5DeploymentAutoscalingState.record_autoscaling_metrics   s     -2.7
 
 	'++,A+MMM-112D41PPP488$+Qd+QNL+Q+Q 	9 	
 	
 	
 	
 	
r@   F_skip_bound_checkc                 h   | j         t          d| j         d          |                     |          }t	          j                    }|                      |          \  }| _        t	          j                    |z
  dz  }|                     ||j        |d           |r|S |                     |          S )a  Decide the target number of replicas to autoscale to.

        The decision is based off of the number of requests received
        for this deployment. After the decision number of replicas is
        returned by the policy, it is then bounded by the bounds min
        and max adjusted by the target capacity and returned. If
        `_skip_bound_check` is True, then the bounds are not applied.
        Nz!Policy is not set for deployment r     r$   )	r1   rK   r+   get_autoscaling_contextrk   r2   r   r   rW   )r=   rB   r   autoscaling_context
start_timer   r   s          r>   get_decision_num_replicasz4DeploymentAutoscalingState.get_decision_num_replicas  s     <WATWWWXXX"::;STT Y[[
48LLAT4U4U1t1$(IKK*$<#D ''!2$		
 	
 	
  	)((  !6777r@   override_policy_statec                     ||                                 }n#| j        | j                                         }ni }t          di d| j        d| j        j        d| j        j        dt          | j                  d|d| j        d| j        d| 	                                d	| 
                                d
|dt          j                    d| j        d| j        d| j        d| j        d| j        d| j        S )Nr    deployment_namer   current_num_replicasrY   rg   r   capacity_adjusted_min_replicascapacity_adjusted_max_replicaspolicy_statecurrent_timerX   total_queued_requestsaggregated_metricsraw_metricslast_scale_up_timelast_scale_down_time )copyr2   r   r+   r   r   lenr3   get_total_num_requestsrc   rf   rk   r0   _get_queued_requests_get_aggregated_custom_metrics_get_raw_custom_metricsr6   r7   )r=   rB   r   current_policy_states       r>   r   z2DeploymentAutoscalingState.get_autoscaling_context1  s    !,#8#=#=#?#?  +#'#5#:#:#<#<  #% ! 
 
 
--
 /44
 (11
 "%T%;!<!<!<	

 !9 8
 "33
  $::
 ,0+L+L+N+N+N
 ,0+L+L+N+N+N
 .-
 
 <<
 #'";";
  $BB
 44
   $77!
" "&!;!;#
 	
r@   c                     g }| j         D ]R}| j                            |d          }|3t          |j        v r%|                    |j        t                              S|S )zCollect running requests timeseries from replicas for aggregation.

        Returns:
            List of timeseries data.
        N)r3   r.   getr   r   append)r=   timeseries_listr[   rv   s       r>   !_collect_replica_running_requestsz<DeploymentAutoscalingState._collect_replica_running_requestsQ  sv     0 	 	J$($9$=$=j$$O$O!%1(,A,III&&)12FG   r@   c                 v    g }| j                                         D ]}|                    |j                   |S )ztCollect queued requests timeseries from all handles.

        Returns:
            List of timeseries data.
        )r-   valuesr   queued_requests)r=   r   r{   s      r>   _collect_handle_queued_requestsz:DeploymentAutoscalingState._collect_handle_queued_requestse  sK     $($9$@$@$B$B 	I 	I ""#7#GHHHHr@   c                     g }| j                                         D ]Z}| j        D ]P}t          |j        vs||j        t                   vr%|                    |j        t                   |                    Q[|S )a  Collect running requests timeseries from handles when not collected on replicas.

        Returns:
            List of timeseries data.

        Example:
            If there are 2 handles, each managing 2 replicas, and the running requests metrics are:
            - Handle 1: Replica 1: 5, Replica 2: 7
            - Handle 2: Replica 1: 3, Replica 2: 1
            and the timestamp is 0.1 and 0.2 respectively
            Then the returned list will be:
            [
                [TimeStampedValue(timestamp=0.1, value=5.0)],
                [TimeStampedValue(timestamp=0.2, value=7.0)],
                [TimeStampedValue(timestamp=0.1, value=3.0)],
                [TimeStampedValue(timestamp=0.2, value=1.0)]
            ]
        )r-   r   r3   r   r   r   )r=   r   r   r[   s       r>    _collect_handle_running_requestsz;DeploymentAutoscalingState._collect_handle_running_requestsp  s    & !299;; 		 		M"4  
(0EEE!)>?S)TTT&&!)*>?
K    r@   r   c                     |sdS t          |          }|rM|d         j        }t          j                    |z
  }|dk    rd}t          || j        j        |          }||ndS dS )a  Aggregate and average a metric from timeseries data using instantaneous merge.

        Args:
            timeseries_list: A list of TimeSeries (TimeSeries), where each
                TimeSeries represents measurements from a single source (replica, handle, etc.).
                Each list is sorted by timestamp ascending.

        Returns:
            The time-weighted average of the metric

        Example:
            If the timeseries_list is:
            [
                [
                    TimeStampedValue(timestamp=0.1, value=5.0),
                    TimeStampedValue(timestamp=0.2, value=7.0),
                ],
                [
                    TimeStampedValue(timestamp=0.2, value=3.0),
                    TimeStampedValue(timestamp=0.3, value=1.0),
                ]
            ]
            Then the returned value will be:
            (5.0*0.1 + 7.0*0.2 + 3.0*0.2 + 1.0*0.3) / (0.1 + 0.2 + 0.2 + 0.3) = 4.5 / 0.8 = 5.625
                r   gMbP?)aggregation_functionlast_window_s)r   rx   rk   r   r0   r   )r=   r   merged_timeserieslast_metric_timer   values         r>   _merge_and_aggregate_timeseriesz:DeploymentAutoscalingState._merge_and_aggregate_timeseries  s    <  	3 6oFF 	704> !IKK*::M !! $(!%)\%F+  E
 "-5536sr@   c                 ^   |                                  }t          |          dk    }|                                 }|s|                                 }ng }g }|                    |           |s|                    |           |                    |           |                     |          }|S )a  Calculate total requests using aggregate metrics mode with timeseries data.

        This method works with raw timeseries metrics data and performs aggregation
        at the controller level, providing more accurate and stable metrics compared
        to simple mode.

        Processing Steps:
            1. Collect raw timeseries data (eg: running request) from replicas (if available)
            2. Collect queued requests from handles (always tracked at handle level)
            3. Collect raw timeseries data (eg: running request) from handles (if not available from replicas)
            4. Merge timeseries using instantaneous approach for mathematically correct totals
            5. Calculate time-weighted average running requests from the merged timeseries

        Key Differences from Simple Mode:
            - Uses raw timeseries data instead of pre-aggregated metrics
            - Performs instantaneous merging for exact gauge semantics
            - Aggregates at the controller level rather than using pre-computed averages
            - Uses time-weighted averaging over the look_back_period_s interval for accurate calculations

        Metrics Collection:
            Running requests are collected with either replica-level or handle-level metrics.

            Queued requests are always collected from handles regardless of where
            running requests are collected.

        Timeseries Aggregation:
            Raw timeseries data from multiple sources is merged using an instantaneous
            approach that treats gauges as right-continuous step functions. This provides
            mathematically correct totals without arbitrary windowing bias.

        Example with Numbers:
            Assume metrics_interval_s = 0.5s, current time = 2.0s

            Step 1: Collect raw timeseries from 2 replicas (r1, r2)
            replica_metrics = [
                {"running_requests": [(t=0.2, val=5), (t=0.8, val=7), (t=1.5, val=6)]},  # r1
                {"running_requests": [(t=0.1, val=3), (t=0.9, val=4), (t=1.4, val=8)]}   # r2
            ]

            Step 2: Collect queued requests from handles
            handle_queued = 2 + 3 = 5  # total from all handles

            Step 3: No handle metrics needed (replica metrics available)
            handle_metrics = []

            Step 4: Merge timeseries using instantaneous approach
            # Create delta events: r1 starts at 5 (t=0.2), changes to 7 (t=0.8), then 6 (t=1.5)
            #                      r2 starts at 3 (t=0.1), changes to 4 (t=0.9), then 8 (t=1.4)
            # Merged instantaneous total: [(t=0.1, val=3), (t=0.2, val=8), (t=0.8, val=10), (t=0.9, val=11), (t=1.4, val=15), (t=1.5, val=14)]
            merged_timeseries = {"running_requests": [(0.1, 3), (0.2, 8), (0.8, 10), (0.9, 11), (1.4, 15), (1.5, 14)]}

            Step 5: Calculate time-weighted average over full timeseries (t=0.1 to t=1.5+0.5=2.0)
            # Time-weighted calculation: (3*0.1 + 8*0.6 + 10*0.1 + 11*0.5 + 15*0.1 + 14*0.5) / 2.0 = 10.05
            avg_running = 10.05

            Final result: total_requests = avg_running + queued = 10.05 + 5 = 15.05

        Returns:
            Total number of requests (average running + queued) calculated from
            timeseries data aggregation.
        r   )r   r   r   r   extendr   )r=   replica_timeseriesmetrics_collected_on_replicasqueued_timeserieshandle_timeseriesongoing_requests_timeseriesongoing_requestss          r>   (_calculate_total_requests_aggregate_modezCDeploymentAutoscalingState._calculate_total_requests_aggregate_mode  s    ~ "CCEE(+,>(?(?!(C% !@@BB, 	# $ E E G G " ')# 	$**+=>>> - 	B'../@AAA 	$**+<===  ??'
 
  r@   c                    d}| j         D ]9}|| j        v r.|| j        |         j                            t          d          z  }:|dk    }| j                                        D ]o}||j        z  }|sa| j         D ]Y}||j                            t          i           v r5||j                            t                                        |          z  }Zp|S )ag  Calculate total requests using simple aggregated metrics mode.

        This method works with pre-aggregated metrics that are computed by averaging
        (or other functions) over the past look_back_period_s seconds.

        Metrics Collection:
            Metrics can be collected at two levels:
            1. Replica level: Each replica reports one aggregated metric value
            2. Handle level: Each handle reports metrics for multiple replicas

        Replica-Level Metrics Example:
            For 3 replicas (r1, r2, r3), metrics might look like:
            {
                "r1": 10,
                "r2": 20,
                "r3": 30
            }
            Total requests = 10 + 20 + 30 = 60

        Handle-Level Metrics Example:
            For 3 handles (h1, h2, h3), each managing 2 replicas:
            - h1 manages r1, r2
            - h2 manages r2, r3
            - h3 manages r3, r1

            Metrics structure:
            {
                "h1": {"r1": 10, "r2": 20},
                "h2": {"r2": 20, "r3": 30},
                "h3": {"r3": 30, "r1": 10}
            }

            Total requests = 10 + 20 + 20 + 30 + 30 + 10 = 120

            Note: We can safely sum all handle metrics because each unique request
            is counted only once across all handles (no double-counting).

        Queued Requests:
            Queued request metrics are always tracked at the handle level, regardless
            of whether running request metrics are collected at replicas or handles.

        Returns:
            Total number of requests (running + queued) across all replicas/handles.
        r   )r3   r.   r   r   r   r-   r   aggregated_queued_requests)r=   r   idr   r   r[   s         r>   %_calculate_total_requests_simple_modez@DeploymentAutoscalingState._calculate_total_requests_simple_mode)  s   Z ( 	 	BT***$"7";"N"R"R(!# #  )7(:% "299;; 
	* 
	*MmFFN0 *"&"8 * *J!]%E%I%I,b& &   '-*J*N*N0+ +#j//* r@   c                 `    t           r|                                 S |                                 S )a  Get average total number of requests aggregated over the past
        `look_back_period_s` number of seconds.

        If there are 0 running replicas, then returns the total number
        of requests queued at handles

        This code assumes that the metrics are either emmited on handles
        or on replicas, but not both. Its the responsibility of the writer
        to ensure enclusivity of the metrics.
        )r   r   r   rb   s    r>   r   z1DeploymentAutoscalingState.get_total_num_requestsn  s1     5 	@@@BBB==???r@   c                    t          t                    }| j        D ]b}|| j        v rW| j        |         j        rE| j        |         j                                        D ] \  }}||                             |           !c|S )z!Get the raw replica metrics dict.)r   r   r3   r.   r   r   r   )r=   metric_valuesr   kvs        r>   get_replica_metricsz.DeploymentAutoscalingState.get_replica_metrics~  s    #D))( 	/ 	/BT***t/DR/H/P* 1"5=CCEE / /DAq!!$++A....r@   c                     t           r-|                                 }|sdS |                     |          S t          d | j                                        D                       S )zCalculate the total number of queued requests across all handles.

        Returns:
            Sum of queued requests at all handles. Uses aggregated values in simple mode,
            or aggregates timeseries data in aggregate mode.
        r   c              3   $   K   | ]}|j         V  d S r]   )r   ).0r   s     r>   	<genexpr>zBDeploymentAutoscalingState._get_queued_requests.<locals>.<genexpr>  s9        ! 8     r@   )r   r   r   sumr-   r   )r=   r   s     r>   r   z/DeploymentAutoscalingState._get_queued_requests  s     5 	 $ D D F F$ s778IJJJ   %)%:%A%A%C%C     r@   c                    t          t                    }| j        D ]_}| j                            |          }||j                                        D ]&\  }}|                     |g          }|||         |<   '`t          |          S )a4  Aggregate custom metrics from replica metric reports.

        This method aggregates raw timeseries data from replicas on the controller,
        similar to how ongoing requests are aggregated.

        Returns:
            Dict mapping metric name to dict of replica ID to aggregated metric value.
        )r   r,   r3   r.   r   r   r   r   )r=   r   r[   rv   metric_name
timeseriesaggregated_values          r>   r   z9DeploymentAutoscalingState._get_aggregated_custom_metrics  s     )..0 	O 	OJ$($9$=$=j$I$I!$,+@+H+N+N+P+P O O'Z#'#G#G#U#U >N";/
;;O
 &'''r@   c                     t          t                    }| j        D ]I}| j                            |          }||j                                        D ]\  }}|||         |<   Jt          |          S )zExtract raw custom metric values from replica metric reports.

        Returns:
            Dict mapping metric name to dict of replica ID to raw metric timeseries.
        )r   r,   r3   r.   r   r   r   )r=   r   r[   rv   r   r   s         r>   r   z2DeploymentAutoscalingState._get_raw_custom_metrics  s     "$''0 	B 	BJ$($9$=$=j$I$I!$,+@+H+N+N+P+P B B'Z7AK(44B K   r@   Fr]   )/__name__
__module____qualname____doc__r   r?   r   intrZ   r   r^   rc   rf   r   ri   rl   rn   rq   rW   r   rz   r   r~   r	   strr   floatr   boolr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r@   r>   r   r   "   s       664
l 4
 4
 4
 4
l"6^ "6s "6s "6 "6 "6 "6H2Y 2 2 2 2c    
c 
 
 
 
24	? 2 2 2 2/ / /1 1 1
s 
 
 
 
	
 	
 	
 	
 	
 	
F%8F	F F F FD0D 
D D D D's3x 'D ' ' ' 'R
"
 "
 #(	

 
 
 
 
$ HM8 8(+8@D8	8 8 8 8F ;?
 
  (S#X7
 
	
 
 
 
@4
3C    (	j1A 	 	 	 	 $z2B        D6j)6 
6 6 6 6p] % ]  ]  ]  ] ~Cu C C C CJ@ @ @ @ @ T)T*5E*E%F    e    *(S$y%?O:P5P0Q ( ( ( (.!	c4	:-..	/! ! ! ! ! !r@   r   c            	          e Zd ZdZdefdZed             ZdefdZ	de
fdZd	ed
ededefdZd	efdZd	efdZdeeeeeef         f                  fdZ	 d&deeef         de
deeef         fdZd	edee         fdZd	efdZd	efdZdefdZd	edefdZd	efdZ d	edede
fdZ!de"fd Z#d!e$fd"Z%d#e&e         fd$Z'd%S )'ApplicationAutoscalingStatez-Manages autoscaling for a single application.r   c                 >    || _         i | _        d | _        d | _        d S r]   )	_app_name_deployment_autoscaling_statesr1   r2   r=   r   s     r>   r?   z$ApplicationAutoscalingState.__init__  s6     "  	+  	 BFr@   c                 4    | j                                         S r]   )r   keysrb   s    r>   deploymentsz'ApplicationAutoscalingState.deployments  s    277999r@   autoscaling_policyc                    |                                 | _        i | _        |                                sLt                              d|j         d| j         d           t          j	        
                    d           dS dS )a  Register or update application-level autoscaling config and deployments.

        This will overwrite the deployment-level policies with the application-level policy.

        Args:
            autoscaling_policy: The autoscaling policy to register.
        rF   z' for application 'rG   rH   N)rO   r1   r2   rR   rS   rA   rT   r   r   rU   rV   )r=   r   s     r>   rZ   z$ApplicationAutoscalingState.register  s     *4466 "<<>> 	EKK74F4V 7 7$(N7 7 7  
 8??DDDDD	E 	Er@   rC   c                     | j         d uS r]   )r1   rb   s    r>   
has_policyz&ApplicationAutoscalingState.has_policy  s    |4''r@   r    rA   rB   c                 j   || j         vrt          |          | j         |<   |j        j        t	          d|           |j        j        j                                        s2|                                 rt          	                    d| d           | j         |         
                    ||          S )z4Register a single deployment under this application.NrE   zUUser provided both a deployment-level and an application-level policy for deployment z4. The application-level policy will take precedence.)r   r   rI   rJ   rK   rN   rR   r   rS   warningrZ   )r=   r    rA   rB   s       r>   register_deploymentz/ApplicationAutoscalingState.register_deployment  s      CCC +=99 / !4<OOO   &9@[[]]	!!	 NNEhu E E E  
 2=AJJ$
 
 	
r@   c                     || j         vr t                              d| d           d S | j                             |           d S )Nz3Cannot deregister autoscaling state for deployment z because it is not registered)r   rS   r   popr<   s     r>   deregister_deploymentz1ApplicationAutoscalingState.deregister_deployment  sX     CCCNNrmrrr   F+//>>>>>r@   c                     || j         v S r]   )r   r<   s     r>   should_autoscale_deploymentz7ApplicationAutoscalingState.should_autoscale_deployment&  s     CCCr@   r   c           	      "   |dS t          |t                    s
J d            |                                D ]V}|| j        v sJ d|             t          ||         t                    s#J d| dt	          ||                                WdS )z`Validate that the returned policy_state from an application-level policy is correctly formatted.NzcApplication-level autoscaling policy must return policy_state as Dict[DeploymentID, Dict[str, Any]]z-Policy state contains invalid deployment ID: zPolicy state for deployment z must be a dictionary, got )
isinstancer,   r   r   type)r=   r   r    s      r>   _validate_policy_statez2ApplicationAutoscalingState._validate_policy_state)  s     F$
 
 	q 	qp	q 	q 

 *..00 	| 	|M!DDDDN}NN EDD]+T  | |{m{{X\]ijw]xXyXy{{| |  |		| 	|r@   F!deployment_to_target_num_replicasr   c                 4                                      r[ fd j                                        D             }t          j                    }                     |          \  }}t          j                    |z
  dz  }                     |           | _        t          |          t          u s
J d            |	                                D ]+}| j        v sJ d| d            |v sJ d| d            ,i }	|                                D ]\\  }}
 j        |         }|
                    |
||         j        |d           s  j        |                             |
          n|
|	|<   ]|	S fd j                                        D             S )	z
        Decide scaling for all deployments in this application by calling
        each deployment's autoscaling policy.
        c           
          i | ]D\  }}||                     |         j        rj                            |i           ni           ES r   )r   r2   r   )r   r    stater  r=   s      r>   
<dictcomp>zIApplicationAutoscalingState.get_decision_num_replicas.<locals>.<dictcomp>H  so     $ $ $ )M5 u<<5mD)D&**="===	   $ $ $r@   r   zWAutoscaling policy must return a dictionary of deployment_name -> decision_num_replicaszDeployment z is not registeredz is invalidr%   c                 R    i | ]#\  }}||                     |                    $S ))rB   r   )r   )r   r    deployment_autoscaling_stater   r  s      r>   r  zIApplicationAutoscalingState.get_decision_num_replicas.<locals>.<dictcomp>~  sZ        @M#? ;UU-N%. '8	  V      r@   )r   r   r   rk   r1   r  r2   r  r,   r   r   r   rW   )r=   r  r   autoscaling_contextsr   	decisionsreturned_policy_stater   r    resultsrr   r  s   ```         r>   r   z5ApplicationAutoscalingState.get_decision_num_replicas=  s7    ?? @	$ $ $ $ $ -1,O,U,U,W,W$ $ $  J 04||<P/Q/Q,I,(,	j(@D'H$''(=>>>!6D Y4'''h ('' "+!1!1 < <!T%HHHHBBBB IHH "%FFFF;;;; GFFF G/8/@/@  +|/3/R!0, -GG (7J,!	   -&D7FSS$   & && N     DHCfClClCnCn   r@   rg   c                 F    | j         |                             |           d S r]   )r   ri   )r=   r    rg   s      r>   ri   z6ApplicationAutoscalingState.update_running_replica_ids  s1     	+M:UU	
 	
 	
 	
 	
r@   c                 Z    || j         v r!| j         |                                          dS dS )z)Record a scale up event for a deployment.N)r   rl   r<   s     r>   rl   z+ApplicationAutoscalingState.record_scale_up  s9    D???/>NNPPPPP @?r@   c                 Z    || j         v r!| j         |                                          dS dS )z+Record a scale down event for a deployment.N)r   rn   r<   s     r>   rn   z-ApplicationAutoscalingState.record_scale_down  s9    D???/>PPRRRRR @?r@   r[   c                 j    |j         }|| j        v r"| j        |                             |           d S d S r]   )r    r   r^   )r=   r[   dep_ids      r>   r^   z.ApplicationAutoscalingState.on_replica_stopped  sB    )T888/7JJ:VVVVV 98r@   c                 @    | j         |                                         S r]   )r   r   r<   s     r>   %get_total_num_requests_for_deploymentzAApplicationAutoscalingState.get_total_num_requests_for_deployment  s#     2

 
 
"
"	#r@   c                 @    | j         |                                         S r]   )r   r   r<   s     r>   $get_replica_metrics_by_deployment_idz@ApplicationAutoscalingState.get_replica_metrics_by_deployment_id  s    2=AUUWWWr@   ro   c                 B    | j         |                             |          S r]   )r   rq   )r=   r    ro   s      r>   rq   z,ApplicationAutoscalingState.is_within_bounds  s'     2=ARR2
 
 	
r@   rv   c                 t    |j         j        }|| j        v r"| j        |                             |           d S d S r]   )r[   r    r   rz   )r=   rv   r  s      r>   rz   z>ApplicationAutoscalingState.record_request_metrics_for_replica  sM     '1? T888/001FGGGGG 98r@   r{   c                 j    |j         }|| j        v r"| j        |                             |           d S d S r]   )r    r   r~   )r=   r{   r  s      r>   r~   z=ApplicationAutoscalingState.record_request_metrics_for_handle  sH     &3T888///0DEEEEE 98r@   r   c                 h    | j                                         D ]}|                    |           dS )r   N)r   r   r   )r=   r   	dep_states      r>   r   z5ApplicationAutoscalingState.drop_stale_handle_metrics  sH     <CCEE 	G 	GI//0EFFFF	G 	Gr@   Nr   )(r   r   r   r   r   r?   propertyr   r   rZ   r   r   r   r   r   r   r  r  r   r   r   r   r  r   r   r   ri   rl   rn   r^   r   r  r  rq   r   rz   r   r~   r	   r   r   r@   r>   r   r     s       77F!F F F F$ : : X:E-E E E E.(D ( ( ( (
#
 
 #&	

 

 
 
 
B?< ? ? ? ?D D D D D|$T,S#X*F%GH| | | |. #(I I+/c0A+BI  I 
lC	 	I I I IV
)
=A)_
 
 
 
Q\ Q Q Q Q
S| S S S S
WY W W W W
#)#	# # # #X, X X X X
)
SV
	
 
 
 
	H%8	H 	H 	H 	HF$6F F F FGs3x G G G G G Gr@   r   c                      e Zd ZdZd ZdedededefdZdefdZ	d	e
d
efdZd	e
fdZd	e
defdZd	e
deeef         deeef         fdZd	e
fdZdefdZdedee         fdZdefdZdefdZdefdZdedeeee         f         fdZdedefdZdededefdZdeddfdZ de!ddfd Z"d!e#e$         ddfd"Z%dS )#AutoscalingStateManagerzManages all things autoscaling related.

    Keeps track of request metrics for each application and its deployments,
    and decides on the target number of replicas to autoscale to.
    c                     i | _         d S r]   _app_autoscaling_statesrb   s    r>   r?   z AutoscalingStateManager.__init__  s      	$$$r@   r    rA   rB   rC   c                     |j         j        sJ |j        }| j                            |t          |                    }t                              d|            |                    |||          S )z%Register autoscaling deployment info.z-Registering autoscaling state for deployment )	rI   rJ   r   r&  
setdefaultr   rS   rA   r   )r=   r    rA   rB   r   	app_states         r>   r   z+AutoscalingStateManager.register_deployment  s     %8888 )0;;1(;;
 
	 	SMSSTTT,,4!9
 
 	
r@   c                     | j                             |j                  }|r4t                              d|            |                    |           dS dS )z Remove deployment from tracking.z/Deregistering autoscaling state for deployment N)r&  r   r   rS   rA   r  r=   r    r)  s      r>   r  z-AutoscalingStateManager.deregister_deployment  sj    044]5KLL	 	;KKQ-QQ   ++M:::::		; 	;r@   r   r   c                     | j                             |t          |                    }t                              d|            |                    |           d S )Nz.Registering autoscaling state for application )r&  r(  r   rS   rA   rZ   )r=   r   r   r)  s       r>   register_applicationz,AutoscalingStateManager.register_application  sb    
 0;;1(;;
 
	 	OXOOPPP-.....r@   c                     || j         v r:t                              d|            | j                             |d           dS dS )z!Remove application from tracking.z0Deregistering autoscaling state for application N)r&  rS   rA   r   r   s     r>   deregister_applicationz.AutoscalingStateManager.deregister_application  sR    t333KKU8UUVVV(,,Xt<<<<< 43r@   c                 R    || j         v o| j         |                                         S r]   )r&  r   r   s     r>   _application_has_policyz/AutoscalingStateManager._application_has_policy  s/    44 D,X6AACC	
r@   r  c                 B    | j         |                             |          S )aQ  
        Decide scaling for all deployments in the application.

        Args:
            app_name: The name of the application.
            deployment_to_target_num_replicas: A dictionary of deployment_id to target number of replicas.

        Returns:
            A dictionary of deployment_id to decision number of replicas.
        )r&  r   )r=   r   r  s      r>   r   z1AutoscalingStateManager.get_decision_num_replicas  s'     +H5OO-
 
 	
r@   c                     || j         v S r]   r%  r   s     r>   should_autoscale_applicationz4AutoscalingStateManager.should_autoscale_application  s    4777r@   c                 h    |j         | j        v o$| j        |j                                      |          S r]   )r   r&  r  r<   s     r>   r  z3AutoscalingStateManager.should_autoscale_deployment!  s9    "d&BB 9,&))-88		
r@   rg   c                 x    | j                             |j                  }|r|                    ||           d S d S r]   )r&  r   r   ri   )r=   r    rg   r)  s       r>   ri   z2AutoscalingStateManager.update_running_replica_ids)  sO     044]5KLL	 	R00@PQQQQQ	R 	Rr@   c                 v    | j                             |j                  }|r|                    |           dS dS )zRecord a scale up event for a deployment.

        Args:
            deployment_id: The ID of the deployment being scaled up.
        N)r&  r   r   rl   r+  s      r>   rl   z'AutoscalingStateManager.record_scale_up0  sI     044]5KLL	 	5%%m44444	5 	5r@   c                 v    | j                             |j                  }|r|                    |           dS dS )zRecord a scale down event for a deployment.

        Args:
            deployment_id: The ID of the deployment being scaled down.
        N)r&  r   r   rn   r+  s      r>   rn   z)AutoscalingStateManager.record_scale_down:  sI     044]5KLL	 	7''66666	7 	7r@   r[   c                     | j                             |j        j                  }|r|                    |           d S d S r]   )r&  r   r    r   r^   )r=   r[   r)  s      r>   r^   z*AutoscalingStateManager.on_replica_stoppedD  sJ    044Z5M5VWW	 	5((44444	5 	5r@   c                 l    |j         | j        v r%| j        |j                                      |          S i S r]   )r   r&  r  r<   s     r>   get_metrics_for_deploymentz2AutoscalingStateManager.get_metrics_for_deploymentI  sA     !T%AAA/&22=AAB Ir@   c                 l    |j         | j        v r%| j        |j                                      |          S dS )Nr   )r   r&  r  r<   s     r>   r  z=AutoscalingStateManager.get_total_num_requests_for_deploymentS  sA     !T%AAA/&33MBBC 1r@   ro   c                 R    | j         |j                 }|                    ||          S r]   )r&  r   rq   )r=   r    ro   r)  s       r>   rq   z(AutoscalingStateManager.is_within_bounds]  s2     01GH	))A
 
 	
r@   rv   Nc                     | j                             |j        j        j                  }|r|                    |           d S d S r]   )r&  r   r[   r    r   rz   )r=   rv   r)  s      r>   rz   z:AutoscalingStateManager.record_request_metrics_for_replicae  sZ     044!,:C
 
	  	P889NOOOOO	P 	Pr@   r{   c                     | j                             |j        j                  }|r|                    |           dS dS )z,Update request metric for a specific handle.N)r&  r   r    r   r~   )r=   r{   r)  s      r>   r~   z9AutoscalingStateManager.record_request_metrics_for_handlen  sW    
 044 .7
 
	  	N778LMMMMM	N 	Nr@   r   c                 h    | j                                         D ]}|                    |           d S r]   )r&  r   r   )r=   r   r)  s      r>   r   z1AutoscalingStateManager.drop_stale_handle_metricsy  sF    5<<>> 	G 	GI//0EFFFF	G 	Gr@   )&r   r   r   r   r?   r   r   r   r   r  r   r   r-  r/  r   r1  r   r   r4  r  r   r   ri   rl   rn   r^   r   r;  r   r  rq   r   rz   r   r~   r	   r   r   r   r@   r>   r#  r#    s          

#
 
 #&	

 

 
 
 
";< ; ; ; ;	/!	/ .	/ 	/ 	/ 	/= = = = =
 
D 
 
 
 

!
 ,0c0A+B
 
lC	 	
 
 
 
&8_ 8 8 8 8
 
 
 
 
R)R=A)_R R R R5\ 5 5 5 57| 7 7 7 75Y 5 5 5 5
)	ij))	*   )	   
)
SV
	
 
 
 
P%8P	P P P P	N0	N 
	N 	N 	N 	NGs3x GD G G G G G Gr@   r#  ),loggingrk   collectionsr   typingr   r   r   r   r   r	   r
   ray.serve._private.commonr   r   r   r   r   r   r   r   ray.serve._private.constantsr   r   r   "ray.serve._private.deployment_infor    ray.serve._private.metrics_utilsr   r   ray.serve._private.usager   ray.serve._private.utilsr   ray.serve.configr   r   ray.utilr   	getLoggerrS   r   r   r#  r   r@   r>   <module>rM     sD     # # # # # # B B B B B B B B B B B B B B B B B B	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	         
 > = = = = =        3 2 2 2 2 2 G G G G G G B B B B B B B B      		,	-	-e
! e
! e
! e
! e
! e
! e
! e
!PAG AG AG AG AG AG AG AGHmG mG mG mG mG mG mG mG mG mGr@   