
    &`i                         d dl mZ d dlmZmZ d dlmZ d dlmZm	Z	 d dl
mZ  e            \  ZZZ e            \  ZZe G d d                      ZdS )	    )Optional)try_import_tftry_import_torch)PiecewiseSchedule)LearningRateOrSchedule
TensorType)DeveloperAPIc            	           e Zd ZdZddddededee         fdZeded	ed
eddfd            Z	de
fdZdedefdZdede
fdZdS )	SchedulerzClass to manage a scheduled (framework-dependent) tensor variable.

    Uses the PiecewiseSchedule (for maximum configuration flexibility)
    torchN)	frameworkdevicefixed_value_or_scheduler   r   c                   || _         || _        t          |t          t          f          | _        | j        rLt          ||d         d         d          | _        |                     |d         d                   | _	        dS || _	        dS )a  Initializes a Scheduler instance.

        Args:
            fixed_value_or_schedule: A fixed, constant value (in case no schedule should
                be used) or a schedule configuration in the format of
                [[timestep, value], [timestep, value], ...]
                Intermediary timesteps will be assigned to linerarly interpolated
                values. A schedule config's first entry must
                start with timestep 0, i.e.: [[0, initial_value], [...]].
            framework: The framework string, for which to create the tensor variable
                that hold the current value. This is the variable that can be used in
                the graph, e.g. in a loss function.
            device: Optional device (for torch) to place the tensor variable on.
        N)outside_valuer   r      )initial_value)
r   r   
isinstancelisttupleuse_scheduler   	_schedule_create_tensor_variable_curr_value)selfr   r   r   s       w/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/rllib/utils/schedules/scheduler.py__init__zScheduler.__init__   s    * #&'>uNN 	7 /'5b9"=  DN  $;;5a8;  <    D  7D    setting_namedescriptionreturnc                    t          | t          t          f          s| dS t          | t          t          f          rt          |           dk     rt          d| d|  d| d          | d         d         dk    r7t          d| d	| d
| d         d          d| d| d         d          d          t          d | D                       rt          d| d| d          dS )a  Performs checking of a certain schedule configuration.

        The first entry in `value_or_schedule` (if it's not a fixed value) must have a
        timestep of 0.

        Args:
            fixed_value_or_schedule: A fixed, constant value (in case no schedule should
                be used) or a schedule configuration in the format of
                [[timestep, value], [timestep, value], ...]
                Intermediary timesteps will be assigned to linerarly interpolated
                values. A schedule config's first entry must
                start with timestep 0, i.e.: [[0, initial_value], [...]].
            setting_name: The property name of the schedule setting (within a config),
                e.g. `lr` or `entropy_coeff`.
            description: A full text description of the property that's being scheduled,
                e.g. `learning rate`.

        Raises:
            ValueError: In case, errors are found in the schedule's format.
        N   z	Invalid `z` (zP) specified! Must be a list of 2 or more tuples, each of the form (`timestep`, `zG to reach`), for example `[(0, 0.001), (1e6, 0.0001), (2e6, 0.00005)]`.r   zWhen providing a `zW` schedule, the first timestep must be 0 and the corresponding lr value is the initial z! You provided ts= =r   .c              3   <   K   | ]}t          |          d k    V  dS )r$   N)len).0pairs     r   	<genexpr>z%Scheduler.validate.<locals>.<genexpr>o   s,      DDDTaDDDDDDr   z_` schedule, each tuple in the schedule list must have exctly 2 items of the form (`timestep`, `)r   intfloatr   r   r)   
ValueErrorany)r   r    r!   s      r   validatezScheduler.validate=   s   : .e==	&.F1D%=AA 	'((1,,AL A A-D A A!,A A A   %Q'*a//4\ 4 4FQ4 4#:1#=a#@4 4CN4 4 +1-a04 4 4   DD,CDDDDD 	A\ A A!,A A A  	 	r   c                     | j         S )a  Returns the current value (as a tensor variable).

        This method should be used in loss functions of other (in-graph) places
        where the current value is needed.

        Returns:
            The tensor variable (holding the current value to be used).
        )r   )r   s    r   get_current_valuezScheduler.get_current_valuew   s     r   timestepc                     | j         rf| j                            |          }| j        dk    r%t                              |          | j        _        n"| j                            |           n| j        }|S )af  Updates the underlying (framework specific) tensor variable.

        In case of a fixed value, this method does nothing and only returns the fixed
        value as-is.

        Args:
            timestep: The current timestep that the update might depend on.

        Returns:
            The current value of the tensor variable as a python float.
        )tr   )	r   r   valuer   r   tensorr   dataassign)r   r4   python_values      r   updatezScheduler.update   sv      	,>//(/;;L~(((-\(B(B %% ''5555+Lr   r   c                     | j         dk    r-t                              |dt          j        | j                  S t
                              |dt
          j                  S )zCreates a framework-specific tensor variable to be scheduled.

        Args:
            initial_value: The initial (float) value for the variable to hold.

        Returns:
            The created framework-specific tensor variable.
        r   F)requires_graddtyper   )	trainabler?   )r   r   r8   float32r   tfVariable)r   r   s     r   r   z!Scheduler._create_tensor_variable   sd     >W$$<<#m{	      ;;j    r   )__name__
__module____qualname____doc__r   strr   r   staticmethodr1   r   r3   r-   r.   r<   r    r   r   r   r      s         ! $(7 (7 (7!7(7 	(7
 (7 (7 (7 (7T 7!77 7 	7
 
7 7 7 \7r	 : 	  	  	  	 s u    .U z      r   r   N)typingr   ray.rllib.utils.frameworkr   r   ,ray.rllib.utils.schedules.piecewise_scheduler   ray.rllib.utils.typingr   r   ray.util.annotationsr	   _rB   r   r   rJ   r   r   <module>rQ      s          E E E E E E E E J J J J J J E E E E E E E E - - - - - -=??2qq a a a a a a a a a ar   