
    &`i                        d dl Z d dlZd dlmZmZmZmZ d dlmZm	Z	m
Z
 d dlmZmZ d dlmZ  e j        e
          Z	 	 ddededed	ee         d
ee         defdZ ed          dedeeeeef         f         fd            ZeZdS )    N)AnyDictOptionalTuple)CONTROL_LOOP_INTERVAL_S'SERVE_AUTOSCALING_DECISION_COUNTERS_KEYSERVE_LOGGER_NAME)AutoscalingConfigAutoscalingContext)	PublicAPIautoscaling_configtotal_num_requestsnum_running_replicasoverride_min_replicasoverride_max_replicasreturnc                    |dk    rt          d          |                                 |z  }||z  }|dk    r|                                 }n|                                 }d|dz
  |z  z   }t	          j        ||z            }	t	          j        ||z            |k     r|	|k    r|	dz  }	| j        }
| j        }||}
||}t          |
t          ||	                    }	|	S )a8  Returns the number of replicas to scale to based on the given metrics.

    Args:
        autoscaling_config: The autoscaling parameters to use for this
            calculation.
        current_num_ongoing_requests (List[float]): A list of the number of
            ongoing requests for each replica.  Assumes each entry has already
            been time-averaged over the desired lookback window.
        override_min_replicas: Overrides min_replicas from the config
            when calculating the final number of replicas.
        override_max_replicas: Overrides max_replicas from the config
            when calculating the final number of replicas.

    Returns:
        desired_num_replicas: The desired number of replicas to scale to, based
            on the input metrics and the current number of replicas.

    r   z!Number of replicas cannot be zero   )

ValueErrorget_target_ongoing_requestsget_upscaling_factorget_downscaling_factormathceilmin_replicasmax_replicasmaxmin)r   r   r   r   r   target_num_requestserror_ratioscaling_factorsmoothed_error_ratiodesired_num_replicasr   r   s               p/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/serve/autoscaling_policy.py_calculate_desired_num_replicasr%      s#   2 q  <===
 	6688;OO  ,.AAK a+@@BB+BBDD qN BC9%9<P%PQQ 		&4558LLL $888!%2L%2L(,(, |S?S-T-TUU    alpha)	stabilityctxc                    | j         }| j        }| j        }| j        }| j        }| j        }| j        }|                    t          d          }|dk    rC|dk    r9t          t          j        d|                                z            |          |fS ||fS |}	t          |||||          }
|
|k    r2|dk     rd}|dz  }|t          |j        t           z            k    rd}|
}	nl|
|k     rd|dk    rd}|dz  }|dk    }|r|j        |j        }n|j        }n|j        }t          d|
          }
|t          |t           z             k     rd}|
}	nd}||t          <   |	|fS )a  The default autoscaling policy based on basic thresholds for scaling.
    There is a minimum threshold for the average queue length in the cluster
    to scale up and a maximum threshold to scale down. Each period, a 'scale
    up' or 'scale down' decision is made. This decision must be made for a
    specified number of periods in a row before the number of replicas is
    actually scaled. See config options for more details.  Assumes
    `get_decision_num_replicas` is called once every CONTROL_LOOP_PERIOD_S
    seconds.
    r   r   )r   r   r   )target_num_replicasr   current_num_replicasconfigcapacity_adjusted_min_replicascapacity_adjusted_max_replicaspolicy_stategetr   r   r   r   r   r%   intupscale_delay_sr   downscale_to_zero_delay_sdownscale_delay_s)r)   curr_target_num_replicasr   r   r-   r.   r/   r0   decision_counterdecision_num_replicasr#   is_scaling_to_zerodelay_ss                r$   'replica_queue_length_autoscaling_policyr;   X   s    %($;!4 # 8*-*F*-*L"*-*L"#&#3L#''(OQRSSq  !!Ia&"="="?"??@@,    (554:1<<   666 a A c&"8;R"RSSSS $8! 
 8	8	8 a A5: 		@/; : 2.G#&q*>#?#?  s7-D#DEEEEE $8! <LL89 ,..r&   )NN)loggingr   typingr   r   r   r   ray.serve._private.constantsr   r   r	   ray.serve.configr
   r   ray.util.annotationsr   	getLoggerloggerr2   floatr%   strr;   default_autoscaling_policy r&   r$   <module>rG      s     - - - - - - - - - - - -         
 C B B B B B B B * * * * * *		,	-	- .2-1E  E )E E  E  $E?	E 
 $E?E  	E  E  E  E P WX/	X/
3S#XX/ X/ X/ X/v E   r&   