
    &`iA                       d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Z
d dlZd dlmZ d dlmZ d dlmZmZmZmZmZmZmZmZmZmZmZmZ d dlZd dlZd dl Z!d dl"Z"d dl#m$Z$ d dl%Z%d dl&m'Z( d dl)m*Z*m+Z+m,Z, d dl-m.Z.m/Z/ d dl0m1Z1 d d	l2m3Z3 d d
l4m5Z6 d dl7m8Z8m9Z9m:Z:m;Z;m<Z< d dl=m>Z> d dl?m@Z@ d dlAmBZB d dlCmDZDmEZEmFZFmGZGmHZHmIZImJZJmKZKmLZL d dlMmNZN d dlOmPZP d dlQmRZRmSZS d dlTmUZUmVZV d dlWmXZX d dlYmZZZ d dl[m\Z\ d dl]m^Z^ d dl_m`Z` d dlambZbmcZc d dldmeZe d dlfmgZg d dlhmiZimjZjmkZkmlZlmmZm d dlnmoZo d dlpmqZqmrZr d dlsmtZtmuZu d d lvmwZwmxZxmyZy d d!lzm{Z{ d d"l|m}Z}m~Z~mZmZmZmZmZ d d#lmZmZmZmZmZ d d$lmZ d d%lmZmZ d d&lmZ d d'lmZ d d(lmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZ d d)lmZ d d*lmZ d d+lmZmZmZ d d,lmZmZ d d-lmZ d d.lmZmZ d d/lmZ d d0lmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZ d d1lmZ d d2lmZ d d3lmZ d d4lmZ d d5lmZmZ d d6lmZmZmZ d d7lmZ d d8lmZ d d9lmZ d d:lmZ d d;lmZmZ d d<lmZ erd d=lmZ d d>lmZ  e            \  ZZ Z ej        e          Ze G d? d@ee                      Z G dA dB          ZdS )C    N)defaultdict)datetime)TYPE_CHECKINGAnyCallable
CollectionDefaultDictDictListOptionalSetTupleTypeUnion)version)DEPRECATED_VALUE
Deprecateddeprecation_warning)TagKeyrecord_extra_usage_tag)ActorHandleAlgorithmConfig)ALGORITHMS_CLASS_TO_NAME)AggregatorActor_get_env_runner_bundles_get_learner_bundles_get_main_process_bundle _get_offline_eval_runner_bundles)make_callback)ObsPreprocessorConnector)ConnectorPipelineV2)	COMPONENT_ENV_RUNNER!COMPONENT_ENV_TO_MODULE_CONNECTORCOMPONENT_EVAL_ENV_RUNNERCOMPONENT_LEARNERCOMPONENT_LEARNER_GROUPCOMPONENT_METRICS_LOGGER!COMPONENT_MODULE_TO_ENV_CONNECTORCOMPONENT_RL_MODULEDEFAULT_MODULE_ID)Columns)validate_module_id)MultiRLModuleMultiRLModuleSpec)RLModuleRLModuleSpec)INPUT_ENV_SPACES)
EnvContext)	EnvRunner)EnvRunnerGroup)_gym_env_creator)collect_episodessummarize_episodes)synchronous_parallel_sample)get_dataset_and_shards)DirectMethodDoublyRobustImportanceSamplingOffPolicyEstimatorWeightedImportanceSampling)OfflineEvaluator)Policy
PolicySpec)DEFAULT_POLICY_IDSampleBatch)FilterManagerdeep_update
force_list)FaultTolerantActorManager)DeveloperAPIExperimentalAPIOldAPIStackOverrideToImplementCustomLogic5OverrideToImplementCustomLogic_CallToSuperRecommended	PublicAPIoverride)CHECKPOINT_VERSION)CHECKPOINT_VERSION_LEARNER_AND_ENV_RUNNERCheckpointableget_checkpoint_infotry_import_msgpack)update_global_seed_if_necessary)ERR_MSG_INVALID_ENV_DESCRIPTOREnvError)try_import_tf)from_config)&AGGREGATOR_ACTOR_RESULTSALL_MODULESDATASET_NUM_ITERS_EVALUATEDENV_RUNNER_RESULTSENV_RUNNER_SAMPLING_TIMEREPISODE_LEN_MEANEPISODE_RETURN_MEANEVALUATION_ITERATION_TIMEREVALUATION_RESULTSFAULT_TOLERANCE_STATSLEARNER_RESULTSLEARNER_UPDATE_TIMERNUM_AGENT_STEPS_SAMPLED NUM_AGENT_STEPS_SAMPLED_LIFETIME!NUM_AGENT_STEPS_SAMPLED_THIS_ITERNUM_AGENT_STEPS_TRAINED NUM_AGENT_STEPS_TRAINED_LIFETIMENUM_ENV_STEPS_SAMPLED.NUM_ENV_STEPS_SAMPLED_FOR_EVALUATION_THIS_ITERNUM_ENV_STEPS_SAMPLED_LIFETIMENUM_ENV_STEPS_SAMPLED_THIS_ITERNUM_ENV_STEPS_TRAINEDNUM_ENV_STEPS_TRAINED_LIFETIMENUM_EPISODESNUM_EPISODES_LIFETIME%NUM_TRAINING_STEP_CALLS_PER_ITERATIONOFFLINE_EVAL_RUNNER_RESULTS"OFFLINE_EVALUATION_ITERATION_TIMERRESTORE_ENV_RUNNERS_TIMERRESTORE_EVAL_ENV_RUNNERS_TIMER"RESTORE_OFFLINE_EVAL_RUNNERS_TIMERSTEPS_TRAINED_THIS_ITER_COUNTER SYNCH_ENV_CONNECTOR_STATES_TIMER%SYNCH_EVAL_ENV_CONNECTOR_STATES_TIMERSYNCH_WORKER_WEIGHTS_TIMERTIMERSTRAINING_ITERATION_TIMERTRAINING_STEP_TIMER)LEARNER_INFO)MetricsLogger)(DEFAULT_HISTOGRAM_BOUNDARIES_LONG_EVENTS)DEFAULT_HISTOGRAM_BOUNDARIES_SHORT_EVENTSTimerAndPrometheusLogger)MultiAgentReplayBufferReplayBuffer)RunnerGroup)NOT_SERIALIZABLEdeserialize_type)space_utils)AgentConnectorDataTypeAgentIDAgentToModuleMappingFnAlgorithmConfigDict
EnvCreatorEnvInfoDictEnvType	EpisodeIDModuleIDPartialAlgorithmConfigDictPolicyIDPolicyState
ResultDictSampleBatchTypeShouldModuleBeUpdatedFn	StateDictTensorStructType
TensorType)DEFAULT_STORAGE_PATH)
Checkpoint)PlacementGroupFactory)ExportFormat)LoggerUnifiedLogger)ENV_CREATOR_global_registryget_trainable_cls)	Resources)TRAINING_ITERATION)	Trainable)log_once)Counter	Histogram)_Timer)LearnerGroupOfflineDatac            %           e Zd ZU dZdZee         ed<   dZee	         ed<   dZ
ee         ed<   dZee         ed<   dZed         ed<   dZed	         ed
<   dZg dZddgZddgZe de e de de e e e e de fZdZdZe ee          	 dddde dde!e"e#f         ded         dee$e%                  dee&e'e(ge%f                  dee!e$e%         e&e%ee)         ge*f         f                  dd f fd                        Z+e,	 	 	 ddee         dee&g e-f                  f fd            Z.d Z/e0edefd                         Z1e0de2e3         fd!            Z4e5 ee6          deddfd"                        Z7e0ededee8e9                  fd#                        Z: ee6          de;fd$            Z<e,d%             Z=e,	 dd&ee>j?        j@                 de;fd'            ZAdeBe;e3e3f         fd(ZCd) ZDd* ZEd+ ZFdd,ZGd- ZHe0dede2e3         fd.            ZIe0d/eJde2e3         fd0            ZKe0dd1            ZLe,eMfd2eNdeeO         fd3            ZPe,dddd4d4d4d5d2eNd6eQd7eeR         d8eeS         d9eeT         d:e*d;e*d<e*deUfd=            ZVe,ddd4d4d4d>d2eNd8eeS         d9eeT         d?e*d@e*dAe*dee9         fdB            ZWeXeYfdCe%de9fdD            ZZe,ddee2e%                  de[fdE            Z\e,dFeRe%e[f         fdG            Z]eX	 	 dddddddd4d4de e dHdCe%dIee8e9                  dJee9         dKee^j_        j`                 dLee^j_        j`                 dee!eeaf                  dMeeb         dee&e'e(ge%f                  dee!e$e%         e&e%ee)         ge*f         f                  d;e*d<e*d6eeQ         dee9         fdN            ZceXeYfddd4d4e e dOdCe%dee&e'ge%f                  dee!e$e%         e&e%ee)         ge*f         f                  d@e*dAe*ddfdP            ZdeXeedQeRdd fdR                        ZfeXeYdfdSe"dCe%dTee3         ddfdU            ZgeXeYfdSe"dCe%ddfdV            Zh ee6          dWe"ddfdX            Zi ee6          dWe"ddfdY            Zj ee          	 dddZd[ee!e"e$e"         f                  d\ee!e"e$e"         f                  dekfd]            Zl ee          dQekddfd^            Zm ee          de2eBe"d_f                  fd`            Zn ee          deBeBeRe"eof         f         fda            Zp ee           fdb            Zq ee6          dce;ddfdd            Zr ee6          dde            Zse0e ee6          de!eeaf         de!eteuf         fdf                                    Zvewdg             Zxeedhe!e"eydf         dedeBee"         ezf         fdi            Z{dje|dkededdfdlZ}e ee6          de!ee~f         de"fdm                        Z ee6          	 	 	 	 ddnee         doee         dpee3         dqe*de[f
 fdr            Ze	 ddse~dteaduee*         de~fdv            Zeeedweydxeddfdy                        ZdeBe;dzf         fd{Zd| Z	 dd&ee>j?        j@                 de;fd}ZdeBe;e;dzf         fd~Zd Zedd            Zedd            Zd Zd Zd Zed             Zed             Zd ZeXde2e"         dSe"deRe"e"f         fd            ZeXdeRfd            ZeXdd            ZeXeeddddde[dee$e%                  dee&e'e(ge%f                  dee!e$e%         e&e%ee)         ge*f         f                  deRf
d                        ZeXdeadee         fd            ZeXd             ZeXd             ZeX edd          	 	 dddddeYddddddddee         dQee2e                  dee         dee         dee         dee         dCe%de*dee*         dee3         dee*         dee*         de!eeBee2e         eRe"ef         f         f         fd                        ZeX edd          	 ddddeYddddddd
dedQee2e                  dee         dee         dee         dCe%de*dee*         dee3         dee*         dee*         fd                        Z edd4          d             Z edd4          ed                         Z edd4          ed                         Z xZS )	Algorithma   An RLlib algorithm responsible for training one or more neural network models.

    You can write your own Algorithm classes by sub-classing from `Algorithm`
    or any of its built-in subclasses.
    Override the `training_step` method to implement your own algorithm logic.
    Find the various built-in `training_step()` methods for different algorithms in
    their respective [algo name].py files, for example:
    `ray.rllib.algorithms.dqn.dqn.py` or `ray.rllib.algorithms.impala.impala.py`.

    The most important API methods an Algorithm exposes are `train()` for running a
    single training iteration, `evaluate()` for running a single round of evaluation,
    `save_to_path()` for creating a checkpoint, and `restore_from_path()` for loading a
    state from an existing checkpoint.
    Nconfigmetricsenv_runner_groupeval_env_runner_groupr   learner_groupr   offline_dataF)tf_session_argslocal_tf_session_args
env_configmodel	optimizercustom_resources_per_env_runnercustom_resources_per_workerevaluation_configexploration_configreplay_buffer_config extra_python_environs_for_workerinput_configoutput_configr   r   off_policy_estimation_methodspolicies/rllib_checkpoint.jsonalgorithm_state)
policy_idspolicy_mapping_fnpolicies_to_train
checkpointpath
filesystemzpyarrow.fs.FileSystemr   r   r   returnc                   |t           k    rt          ddd           t          ||          }|d         t          j        d          k    rEt          |t                    r|                                } t                      j	        |fd|i|S |d         t          j        d          k    rt          d	          |d         t          j        d
          k     rt          d|d          d          |d         dk    r0|.|d         t          hk    rt          j        }nt          d          t                              ||||          }	t                              |	          S )af  Creates a new algorithm instance from a given checkpoint.

        Args:
            path: The path (str) to the checkpoint directory to use or a Ray Train
                Checkpoint instance to restore from.
            filesystem: PyArrow FileSystem to use to access data at the `path`. If not
                specified, this is inferred from the URI scheme of `path`.
            policy_ids: Optional list of PolicyIDs to recover. This allows users to
                restore an Algorithm with only a subset of the originally present
                Policies.
            policy_mapping_fn: An optional (updated) policy mapping function to use from
                here on.
            policies_to_train: An optional list of policy IDs to be trained or a
                callable taking PolicyID and SampleBatchType and returning a bool
                (trainable or not?). If None, will keep the existing setup in place.
                Policies, whose IDs are not in the list (or for which the callable
                returns False) will not be updated.

        Returns:
            The instantiated Algorithm.
        z)Algorithm.from_checkpoint(checkpoint=...)z#Algorithm.from_checkpoint(path=...)Toldnewerrorcheckpoint_versionz2.0r   0.1a  Cannot restore a v0 checkpoint using `Algorithm.from_checkpoint()`!In this case, do the following:
1) Create a new Algorithm object using your original config.
2) Call the `restore()` method of this algo object passing it your checkpoint dir or AIR Checkpoint object.z1.0z`checkpoint_info['checkpoint_version']` in `Algorithm.from_checkpoint()` must be 1.0 or later! You are using a checkpoint with version v.formatmsgpackNr   a  You are trying to restore a multi-agent algorithm from a `msgpack` formatted checkpoint, which do NOT store the `policy_mapping_fn` or `policies_to_train` functions! Make sure that when using the `Algorithm.from_checkpoint()` utility, you also pass the args: `policy_mapping_fn` and `policies_to_train` with your call. You might leave `policies_to_train=None` in case you would like to train all policies anyways.)checkpoint_infor   r   r   )r   r   rS   r   Version
isinstancer   to_directorysuperfrom_checkpoint
ValueErrorrC   r   DEFAULT_POLICY_MAPPING_FNr   #_checkpoint_info_to_algorithm_state
from_state)clsr   r   r   r   r   r   kwargsr   state	__class__s             r/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/rllib/algorithms/algorithm.pyr   zAlgorithm.from_checkpoint+  s   R )))?9   
 .dJ?? /0GOE4J4JJJ$
++ +((***577*4QQJQ&QQQ 12goe6L6LLLA   12W_U5K5KKKE+,@AE E E   8$	11 !( #<05F4GGG(7(Q%% %H	 	 	 ==+!//	 > 
 
 ##E***    logger_creatorc                 @   t          |t                    r|                                 }t          |t                    rDd|v rt          j        |           nt          j        |                     ||d                    }nt          |t                    rd|v r|                    |          }n|                    |          }n|                                 }t          |t          |                    s(|                    |	                                          }n'|                    |
                                          }|j        t          |           |_        |/t          d| dd| dd	
           |                    |           |                                 |                                 |                     |j        |          \  | _        | _        t          | j        t                    r| j        j        n| j        }d| _        d| _        t/          d|j                  | _        |t5          j                                        d          }t;          j        ddt?          |                    }t          |           j         d| d| }	t@          j!        "                    tF                    stA          j$        tF          d           tK          j&        |	tF                    |j'        rd|j'        v rfd}
nfd}
|
}tQ          tR                    | _*        tQ          tV                    | _,        g | _-        g | _.        d| _/        d| _0        d| _1        d| _2        d| _3        d| _4        d| _5        d| _6        d| _7        d| _8        d| _9        d| _:        d| _;        d| _<        d| _=        d| _>        d| _?        d| _@        d| _A        d| _B        d| _C        d| _D        d| _E        d| _F         t                      jH        d||d| dS )aD  Initializes an Algorithm instance.

        Args:
            config: Algorithm-specific configuration object.
            logger_creator: Callable that creates a ray.tune.Logger
                object. If unspecified, a default logger is created.
            **kwargs: Arguments passed to the Trainable base class.
        classT)config_dictNzalgo = Algorithm(env='z', ...)z&algo = AlgorithmConfig().environment('z
').build()Fr   )rootstats_cls_lookupz%Y-%m-%d_%H-%M-%Sz[/\\]-_exist_ok)prefixdirtypec                     | d                                          }|                    d          }|                    d          }t          ||g|          S )z0Creates a custom logger with the default prefix.logger_configr   logdir)r   _argsr   )copypoprY   )r   cfgr   logdir_r   s       r   default_logger_creatorz2Algorithm.__init__.<locals>.default_logger_creator  sT     16688C''&//C "ggh77G&3seGLLLLr   c                 (    t          | d          S )z1Creates a Unified logger with the default prefix.N)loggers)r   )r   r   s    r   r   z2Algorithm.__init__.<locals>.default_logger_creator  s    (FFFFr   )r   r    )Ir   dictget_default_configr   r   	from_dictmerge_algorithm_configsupdate_from_dictr   to_dict	get_state
algo_classr   environmentvalidatefreeze_get_env_id_and_creatorenv_env_idenv_creator__name__local_replay_bufferr   r   r   r   r   todaystrftimeresubstrosr   existsr   makedirstempfilemkdtempr   r   r   _timersint	_counters_episode_history_episodes_to_be_collectedr   r   _metrics_step_time(_metrics_run_one_training_iteration_time _metrics_run_one_evaluation_time'_metrics_compile_iteration_results_time_metrics_training_step_time_metrics_evaluate_time._metrics_evaluate_sync_env_runner_weights_time,_metrics_evaluate_sync_connector_states_time)_metrics_step_sync_env_runner_states_time_metrics_load_checkpoint_time_metrics_save_checkpoint_time&_metrics_callback_on_train_result_time(_metrics_callback_on_evaluate_start_time&_metrics_callback_on_evaluate_end_time"_metrics_impala_training_step_time;_metrics_impala_training_step_aggregator_preprocessing_time5_metrics_impala_training_step_learner_group_loop_time8_metrics_impala_training_step_sync_env_runner_state_time4_metrics_impala_sample_and_get_connector_states_time+_metrics_impala_training_step_input_batches0_metrics_impala_training_step_zero_input_batches/_metrics_impala_training_step_env_steps_droppedr   __init__)selfr   r  r   r   default_config	env_descrtimestrenv_descr_for_dirlogdir_prefixr   r   r   s              @r   r:  zAlgorithm.__init__  s   ( fd## 	G!4466N .$// Ef$$#.v6666,6$($@$@*FD% %  FF fd++ E60A0A+66v>>FF+<<VDDFF!4466N fd>&:&:;; G'889I9IJJ'2263C3C3E3EFF $ $T

F?9S999LSLLL   
 s### 	
 *.)E)EJ*
 *
&d& &0d%C%CUDL!! 	
 $(  8<
 '4(?'
 '
 '

 ! n&&//0CDDG "y#s9~~ F F#Dzz2RR5FRRRRM7>>"677 A 04@@@@%]@TUUUF # G&2F(F(FM M M M M MG G G G G 4N #6**$S)) ")+& =A?C" 8<MQ5EI-LP4@D(;?#SW;QU9NR6BF*BF* LP3MQ5KO3 HL/  	H
  	B
  	E
  	A OS8SW=RV< 	
)	
 	
 	
 	
 	
 	
 	
r   c                 Z   t          ddt          d          | _        | j                            d| j        j        i           t          ddt          d          | _        | j                            d| j        j        i           t          dd	t          d          | _        | j                            d| j        j        i           t          d
dt          d          | _	        | j	                            d| j        j        i           t          ddt          d          | _
        | j
                            d| j        j        i           t          ddt          d          | _        | j                            d| j        j        i           t          ddt          d          | _        | j                            d| j        j        i           t          ddt          d          | _        | j                            d| j        j        i           t          ddt          d          | _        | j                            d| j        j        i           t          ddt          d          | _        | j                            d| j        j        i           t          ddt          d          | _        | j                            d| j        j        i           t          ddt          d          | _        | j                            d| j        j        i           t          ddt          d          | _        | j                            d| j        j        i           t          ddt          d          | _        | j                            d| j        j        i           d S ) Nrllib_algorithm_step_timezTime spent in Algorithm.step())rllib)namedescription
boundariestag_keysrC  /rllib_algorithm_run_one_training_iteration_timez5Time spent in Algorithm._run_one_training_iteration()'rllib_algorithm_run_one_evaluation_timez-Time spent in Algorithm._run_one_evaluation().rllib_algorithm_compile_iteration_results_timez4Time spent in Algorithm._compile_iteration_results()"rllib_algorithm_training_step_timez'Time spent in Algorithm.training_step()rllib_algorithm_evaluate_timez"Time spent in Algorithm.evaluate()5rllib_algorithm_evaluate_sync_env_runner_weights_timezPTime spent on syncing weights to the eval EnvRunners in the Algorithm.evaluate()3rllib_algorithm_evaluate_sync_connector_states_timezBTime spent on syncing connector states in the Algorithm.evaluate()0rllib_algorithm_step_sync_env_runner_states_timezGTime spent in sync_env_runner_states code block of the Algorithm.step()$rllib_algorithm_load_checkpoint_timez)Time spent in Algorithm.load_checkpoint()$rllib_algorithm_save_checkpoint_timez)Time spent in Algorithm.save_checkpoint()-rllib_algorithm_callback_on_train_result_timez*Time spent in callback 'on_train_result()'/rllib_algorithm_callback_on_evaluate_start_timez,Time spent in callback 'on_evaluate_start()'-rllib_algorithm_callback_on_evaluate_end_timez*Time spent in callback 'on_evaluate_end()')r   r   r$  set_default_tagsr   r  r%  r&  r   r'  r(  r)  r*  r+  r,  r-  r.  r/  r0  r1  r;  s    r   _set_up_metricszAlgorithm._set_up_metricsI  sP   "+,8?	#
 #
 #
 	00'4>;R1STTT8ABO?	9
 9
 9
5 	5FFdn-.	
 	
 	
 1::G?	1
 1
 1
- 	->>dn-.	
 	
 	
 8AAN@	8
 8
 8
4 	4EEdn-.	
 	
 	
 ,55A?	,
 ,
 ,
( 	(99dn-.	
 	
 	
 '00<?	'
 '
 '
# 	#44gt~?V5WXXX>GHj@	?
 ?
 ?
; 	;LLdn-.	
 	
 	
 =FF\@	=
 =
 =
9 	9JJdn-.	
 	
 	
 :CCa@	:
 :
 :
6 	6GGdn-.	
 	
 	
 .77C@	.
 .
 .
* 	*;;dn-.	
 	
 	
 .77C@	.
 .
 .
* 	*;;dn-.	
 	
 	

 7@@D@	7
 7
 7
3 	3DDdn-.	
 	
 	
 9BBF@	9
 9
 9
5 	5FFdn-.	
 	
 	
 7@@D@	7
 7
 7
3 	3DDdn-.	
 	
 	
 	
 	
r   c                     t                      S Nr   )r   s    r   r  zAlgorithm.get_default_config  s        r   c                 4    | j                                         S )a  Returns a list of remote worker IDs to fetch metrics from.

        Specific Algorithm implementations can override this method to
        use a subset of the workers for metrics collection.

        Returns:
            List of remote worker IDs to fetch metrics from.
        )r   healthy_worker_idsrV  s    r   _remote_worker_ids_for_metricsz(Algorithm._remote_worker_ids_for_metrics  s     $77999r   c                    !" t          |t                    st          |t                    sJ                                  }t          |t                    s8t          |t                    sJ t                                          |          }|                    |            j        |_        | _        t           j        j
         j        j                                         j                    j        j        r)d t           j        j                  D              _        n j                                         _         j        j        dv r(t$                              d j        j         d            j        j        r1t)          j        d                               j        j                                         j                   _        t3          t4                     _        d  _        d  _        d  _        d  _         j                             d          }|K|tB          urBd |D             }tE          d#                    |          d	| d
dd           | j        _$         j        j%        r*ddl&m'}  j        j(        p|} | j                   _)        nd  _)         j        j*        s j        j        sdtW           j,         j-         .                     j                   j         j        j        sdn j        j/         j0         j1                   _         j        2                                 _3         j3        4                                  j3        5                                  6                     j3                  r 7                     j3        j         j3                  \  }}tW          |d  .                     j                   j3         j0         j1         j3        j        sdn j3        j/         j        j8                   _9         j        r j        :                                 _        n% j9        r j9        :                                 _         j;        Q j        J j        <                     j                   _         j        =                     j                   _        d  _>         j3        j$        r^ j3        j?        sRt$                              d           t           j3        d          \   _>        }t$                              d           i  _A        t          t          t          t          d}	 j        j$        F                                D ]I\  }
}|G                    d          }||	v r.tE          |t          |	|                   d           |	|         }npt          |t                    r[t$          I                    dd|z              |J                    dd          \  }}t          jL        |          }t          ||          }t          |t                    r_t          |t                    rJ Q                                }t          |t                    r j        jS        |d<    ||fi | jA        |
<   nt          d| d          ||d<   K j        jU        rt           j        jW         j        jX        fi} j        r|Y                     j                   n j9        r-|Y                     j9        :                                           n j        j*        r3|Y                    t           j        jW         j        jX        fi           nr j        j%        rf j        [                    |t                   d         |t                   d                   }|Y                    t          |jW        |jX        fi            j        \                    |d !          } j        ]                    |"           _^        i }|j_        }|j`        }|ja        F                                D ]\  }}|j_        r
|j_        ||<   |s|r j^        b                    |||#            j^        c                    t          d$z   t          z   d%          t                   } j        rU j        f                     j         jg        h                    t          t          fd&          | j         j        '           n[ j9        rT j9        f                     j3         jg        h                    t          t          fd&          | j         j        '            j)        r j^        jk        rH j^        l                    d(           }d) |D              j)        _m         j^        jn         j)        _o        n j^        jp        g j)        _o        | j)        _q        | j)        _         r                     j3                  r|dd*lsmt}  j3        ju        s7 j^        c                    t          d$z   t          z   d %          t                   } | j3         j        jv        dk    |t                   ||+           _w        d  _x         j        jU        r j        jy        dk    r j        \                     j        d !          " t          j{        dd,-          t                    !t          !" fd.t           j        j        pd j        jy        z            D              j        j        /           _x        t          t           j^        l                    d0 1                              }t          t           jx                            d2 1                              }i  _        |D ]\  }}|                                 }|D ]I\  }} |                                  d         |d         k    r |dd          |d         gz   }| j        |<    nJ| j        vr)t          d3| d4|d          d5|d          d6| d	          t           fd7t           j        j        pd          D                       st          d8 j                                                     t          d9 j         j        j        t            jg        :          ;           d S )<Nc                 "    g | ]} |            S r  r  ).0r   s     r   
<listcomp>z#Algorithm.setup.<locals>.<listcomp>  s    WWWcceeWWWr   )WARNERRORzCurrent log_level is zV. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.z	ray.rllibinput_evaluationc                 2    i | ]}t          |          d |iS )r   )r  )r_  opes     r   
<dictcomp>z#Algorithm.setup.<locals>.<dictcomp>  s$    LLLCC63-LLLr   zconfig.input_evaluation={}zSconfig.evaluation(evaluation_config=config.overrides(off_policy_estimation_methods=z))Tz/Running OPE during training is not recommended.)r   r   r   helpr   r   )r  validate_envdefault_policy_classr   local_env_runnerr   tune_trial_id)r  rh  ri  r   r   rk  rj  	pg_offset)spaceszCreating evaluation dataset ...)num_workerszEvaluation dataset created)iswisdmdrr   r   zTrying to import from string: r      gammaz$Unknown off_policy_estimation type: ze! Must be either a class path or a sub-class of ray.rllib.offline.offline_evaluator::OfflineEvaluator)input_observation_spaceinput_action_spaceF)rm  inference_onlyrl_module_spec)multi_rl_module_ckpt_dirmodules_to_loadrl_module_ckpt_dirsr   
componentsrw  default)r   env_steps_sampledrl_module_stateenv_to_modulemodule_to_envc                 L    t          j                                                    S rY  )rayget_runtime_contextget_node_id)r   s    r   <lambda>z!Algorithm.setup.<locals>.<lambda>  s    #"9";";"G"G"I"I r   c                 6    g | ]}|                                 S r  )get)r_  node_ids     r   r`  z#Algorithm.setup.<locals>.<listcomp>  s-     8 8 8*18 8 8r   )OfflineEvaluationRunnerGroup)r   local_runnermodule_statemodule_specrm  )num_cpusmax_restartsc                 F    g | ]}                     j                  S r  )remoter   )r_  r   agg_clsry  r;  s     r   r`  z#Algorithm.setup.<locals>.<listcomp>L  s9        NN4;??  r   )'max_remote_requests_in_flight_per_actorc                     | j         | j        fS rY  )nodedevice)_learners    r   r  z!Algorithm.setup.<locals>.<lambda>[  s    x}ho.N r   funcc                     | j         | j        fS rY  )_node_deviceactors    r   r  z!Algorithm.setup.<locals>.<lambda>c  s    EK+G r   z9No Learner worker found that matches aggregation worker #z	's node (z) and device (z&)! The Learner workers' locations are c              3   N   K   | ]}|j                                         v V   d S rY  )_aggregator_actor_to_learnervalues)r_  learner_idxr;  s     r   	<genexpr>z"Algorithm.setup.<locals>.<genexpr>~  sK         t@GGIII     r   zwSome Learner indices are not mapped to from any AggregatorActors! Final AggregatorActor idx -> Learner idx mapping is: on_algorithm_init	algorithmmetrics_logger)r   )r   r   r   r  r  r  r  r  r   rU   framework_strseed_record_usage"enable_env_runner_and_connector_v2rG   callbacks_class	callbacks	log_levelloggerinfologging	getLoggersetLevel(_create_local_replay_buffer_if_necessaryr  r   setremote_requests_in_flightr   rm  env_to_module_connectormodule_to_env_connectorr  r   r   r   r   
is_offlineray.rllib.offline.offline_datar   offline_data_classr   	is_onliner5   r  rh  get_default_policy_classcreate_local_env_runnerr   trial_idget_evaluation_config_objectr   r  r  %_should_create_evaluation_env_runnersr  num_env_runnersr   
get_spaces
env_runnerbuild_env_to_module_connectorbuild_module_to_env_connectorevaluation_datasetope_split_batch_by_episoder:   reward_estimatorsr=   r?   r;   r<   itemsr   r  logrsplit	importlibimport_modulegetattrr   
issubclassr@   
get_policyr>   rt  r   enable_rl_module_and_learnerr2   observation_spaceaction_spaceupdater+   build_learner_connectorget_multi_rl_module_specbuild_learner_groupr   load_state_pathr{  rl_module_specsload_module_stater
  r&   r*   sync_env_runner_statesr   peekr]   rm   	is_remoteforeach_learnerlocality_hints_workerslearner_handlesr  r  )_should_create_offline_evaluation_runners1ray.rllib.offline.offline_evaluation_runner_groupr  %offline_eval_rl_module_inference_onlynum_offline_eval_runnersoffline_eval_runner_group_aggregator_actor_manager!num_aggregator_actors_per_learnerr  r  r   rH   rangenum_learners+max_requests_in_flight_per_aggregator_actorlist	enumerateforeach_actorr  RuntimeErrorallrW  r    callbacks_on_algorithm_initr  )#r;  r   
config_objrc  ope_dictr   r  r   r  	ope_typesrD  method_configmethod_typemodobjpolicyrm  learner_connectorr  r|  rz  r{  	module_idsub_module_specr  learner_node_idsr  learner_locationsaggregator_locationsagg_idxaggregator_locationr  learner_locationr  ry  s#   `                                @@r   setupzAlgorithm.setup  s   
 &/22 	%f&@AAAAA0022Jj/:: E!&*DEEEEE,..88DD
''///!\JN$DK 	((A4;CSTTT4;''' ;9 	;WWz$+:U/V/VWWWDNN![88::DN; $555KK(=     
 ;  	Kk**33DK4IJJJ $(#P#PK$
 $
   	& ;? '+FJ$FJ$  ;??+=>>',<DT,T,TLL;KLLLH0778HII19   F    9ADK5 ;! 		%BBBBBB "&!?!N; 2 24; ? ?D !%D;  	(V 	$2 ,!.%)%B%B4;%O%O{
  ;I=DD<{"m% % %D!" "&!I!I!K!K'')))%%'''
 55d6LMM 	!99&*D,B NA{ :H'!%)%B%B4;%O%O-{"m
  1THDD/G+5: : :D&"   	B/::<<DKK' 	B4??AADK?"t{'>+/;+T+T{ ,U , ,D( ,0;+T+T{ ,U , ,D( #'"@
	6*E
	6 KK9:::)?&A* * *&D#Q KK4555@B$-	
 
	 $(;#L#R#R#T#T 	0 	0D-'++F33Ki''##Ik233   
 (4K-- 0

1>LMMM&--c155S-c22%c3//+t,, -2 2 
 **k+=>> ?-1[->M'*/:{6/S/S]/S/S&t,, B; B B B   %0M&!!;3 O	 K1K,#F $ !dk****+ d8CCEEFFFF ;( MM- $ = $ 80    [+ (,(K(K067G0H0K+12B+CA+F )L ) )% MM- 1 C 1 >0   .2[-Q-Q$ .R . .K "&!@!@* "A " "D
 #%'2'B$)9O.9.I.O.O.Q.Q U U*	?"2 U5D5T'	2' +> "44-E$3(; 5    #0::,s25HH# ;    !O $ %<<;&*l&7&7+-KLVW '8 ' ' %4"&">"&"> =     + 	*AA1&*l&7&7+-KLVW '8 ' ' %4"&">"&"> B      2 %/ V (,'9'I'III( ($8 85E8 8 8D%4
 9=8J8SD%55 :>9K9T8UD%5 1<!- ,2!(==d>TUU       -S )&*&8&B&B#4s#:=P#P', 'C ' ' (')O
 PlOk1!%!E!J!01D!E + "
P 
P 
P. *.&;3 F	K9A==![AA{$ B  Ncj    G .G     "16Q+GH    KK. . .D* !%&66NN 7   ! ! $(2@@GG A   $ $  13D-0D  ,,&9&=&=&?&?#5F 	 	1K!1 (++--a04G4JJJ,=abb,A-a0E -) FQ9'B K $"CCC&4#4 4.A!.D4 4/24 4  14 4 4   D     #()A)FQ#G#G      #;8; ;   	 	NK3#|  		
 	
 	
 	
 	
 	
r   c                     dS )a'  Returns a default Policy class to use, given a config.

        This class will be used by an Algorithm in case
        the policy class is not provided by the user in any single- or
        multi-agent PolicySpec.

        Note: This method is ignored when the RLModule API is enabled.
        Nr  r   r   s     r   r  z"Algorithm.get_default_policy_class  s	     tr   c           
         t          | j                  5  | j        j        o| j        dz   | j        j        z  dk    }| j        j        o| j        dz   | j        j        z  dk    }i }|r1| j        j        s| j        j        r|                                 \  }}}n;| j        j	        r| 
                                \  }}n|                                 \  }}|r"| j        j        s|                     d          }|rD|                                 }|r,|t                                       |t                              n|}| j        j	        r| j        j        s| j        r| j                            t(          t*          f          5  t          | j                  5  | j                            | j        | j                            t2          t4          fd          | j        | j                   ddd           n# 1 swxY w Y   ddd           n# 1 swxY w Y   |                     ||          }n{|                     | j        j        | j        | j                   tA          | j        | !                                | j        j"        	          }| #                    ||i ||
          }ddd           n# 1 swxY w Y   |S )aA  Implements the main `Algorithm.train()` logic.

        Takes n attempts to perform a single training step. Thereby
        catches RayErrors resulting from worker failures. After n attempts,
        fails gracefully.

        Override this method in your Algorithm sub-classes if you would like to
        handle worker failures yourself.
        Otherwise, override only `training_step()` to implement the core
        algorithm logic.

        Returns:
            The results dict with stats/infos on sampling, training,
            and - if required - evaluation.
        rs  r   Nparallel_train_futurer  )r   r  r  r  )train_resultseval_resultscentral_workerworkersr   )timeout_seconds)episodes_this_iterstep_ctxiteration_results)$r   r$  r   evaluation_interval	iterationoffline_evaluation_intervalevaluation_parallel_to_training'offline_evaluation_parallel_to_training6_run_one_training_iteration_and_evaluation_in_parallelr  _run_one_training_iteration)_run_one_training_iteration_old_api_stack_run_one_evaluation_run_one_offline_evaluationrb   r  !_dont_auto_sync_env_runner_statesr   r   log_timer}   rz   r,  r  r  r]   rm   r  r  _compile_iteration_results_sync_filters_if_neededrj  r7   r\  $metrics_episode_collection_timeout_s(_compile_iteration_results_old_api_stack)	r;  evaluate_this_iterevaluate_offline_this_iterr  r  train_iter_ctxoffline_eval_resultsresultsr  s	            r   stepzAlgorithm.step  s!   $ &d&=>> p	 p	 / P^a'4;+JJaO  7 X^a'4;+RRVWW ' (*L " I;I;FI OOQQ	! "N ;A I484T4T4V4V1M>>
 FFHH%& " T$+*U T#77d7SS) 
8'+'G'G'I'I$   8 !34;;,-?@   
 $8L {= 0E- ..!AB    6 J    !1HH'+{26,2C2C(:(F%& -. 3D 3" 3" /3.J.2.J I                               * 99"/!- :  
 ,,#'#8#I 1; -    &6)7799$(K$T& & &"
 GG'9+&G&G,&G H  Yp	 p	 p	 p	 p	 p	 p	 p	 p	 p	 p	 p	 p	 p	 p	d s\   E.J9HAG8,H8G<<H?G< HJ9H	J9H	BJ99J= J=c           
         | j                             | j        | j        j                   t          d| j        | j        j        t          | | j	                             | j         j
        dk    r|                                  n|                                  | j	                            t          t          fi d          }t          d| j        | j        j        t          | | j	        |	                     t          |iS )
zEvaluates current policy offline under `evaluation_config` settings.

        Returns:
            A ResultDict only containing the offline evaluation results from the current
            iteration.
        from_worker_or_learner_grouprw  on_evaluate_offline_startr  callbacks_objectscallbacks_functionsr   r   Tr  latest_merged_onlyon_evaluate_offline_endr  r  evaluation_metrics)r  sync_weightsr   r   r  r    r  #callbacks_on_evaluate_offline_startr  r   num_healthy_remote_runners%_evaluate_offline_with_fixed_duration!_evaluate_offline_on_local_runnerr  rb   rt   !callbacks_on_evaluate_offline_end)r;  r  s     r   evaluate_offlinezAlgorithm.evaluate_offline,  s%    	&33)-);;L 	4 	
 	
 	
 	'"n $ O$t|DDD		
 	
 	
 	
 )DqHH66888822444|((!<=# ) 
 
 	%"n $ M#|#/  			
 		
 		
 		
 ,\::r   r  c                    t          | j                  5  |                                  | j         |                                 cddd           S | j        j        r]| j        N| j                                        r5| j        j	        j
        | j                                        d                  }n| j        }n| j        }| j        Qt          | j                  5  | j                            |d           ddd           n# 1 swxY w Y   | j        j        r| j        j        r| j                            t(          t*          f          5  t          | j                  5  | j                            | j        | j        | j                            t2          t4          fd          | j        | j                   ddd           n# 1 swxY w Y   ddd           n# 1 swxY w Y   nU|                     | j        j        | j        | j                   n(| j        j        r| j                            |d           t          | j                  5  tA          d| j!        | j        j"        tG          | | j        	          
           ddd           n# 1 swxY w Y   dx}}g }| j        j$        r?| j        j        r| %                                \  }}}n| j        $                                }n| j        '| j        r | &                    | j                  \  }}}}n| j        '                                dk    r | &                    | j(                  \  }}}}nh| j        '                                dk    rI| j        j)        dk    r|J | *                    |          \  }}}}n| +                                \  }}}}ni }| j        j        rN| j                            tX          i d          }t[          d          r|st\          /                    d           n1t2          |i}||t`          <   ||tb          <   ||d<   || j2        tf          <   | j        j$        sti          tj                    }| j6        7                                D ]F\  }}	|D ]>}
|	8                    |
| j        j9                  }||         :                    |           ?G|r=i |d<   |7                                D ]#\  }}tw          j<        d g|R  }||d         |<   $t          | j=                  5  tA          d| j!        | j        j>        tG          | | j        |          
           ddd           n# 1 swxY w Y   ddd           n# 1 swxY w Y   |S )ae  Evaluates current policy under `evaluation_config` settings.

        Args:
            parallel_train_future: In case, we are training and avaluating in parallel,
                this arg carries the currently running ThreadPoolExecutor object that
                runs the training iteration. Use `parallel_train_future.done()` to
                check, whether the parallel training job has completed and
                `parallel_train_future.result()` to get its return values.

        Returns:
            A ResultDict only containing the evaluation results from the current
            iteration.
        Nr   Tr.  r  r   from_workerr  r  r  r  on_evaluate_startr  r1  auto)keyr  r5  no_eval_resultszoNo evaluation results found for this iteration. This can happen if the evaluation worker(s) is/are not healthy.timesteps_this_iter)split_batch_by_episodeoff_policy_estimatorc                  .    t          j        | d          S )Nr   )axis)npmean)xs    r   r  z$Algorithm.evaluate.<locals>.<lambda>  s    rwqq'9'9'9 r   on_evaluate_endr7  )?r   r)  _before_evaluater  %_run_offline_evaluation_old_api_stackr   r  r   healthy_env_runner_ids_worker_manager_actorsr   r  r   r*  r9  r   broadcast_env_runner_statesr   r"  r}   r{   r+  r  r  r]   rm   r  r  r$  rj  r0  r    r  callbacks_on_evaluate_startr  custom_evaluation_function#_evaluate_with_custom_eval_function_evaluate_on_local_env_runnernum_healthy_remote_workerseval_env_runnerevaluation_duration_evaluate_with_auto_duration_evaluate_with_fixed_durationrb   r   r  warningrh   rn   r!  rl   r   r  r  r  estimater  appendtreemap_structurer1  callbacks_on_evaluate_end)r;  r  weights_src	env_stepsagent_stepsbatchesr  	estimatesrD  	estimatorbatchestimate_resultestimate_listavg_estimates                 r   evaluatezAlgorithm.evaluatee  s:   $ &d&ABB v	 v	!!###&2AACCv	 v	 v	 v	 v	 v	 v	 v	 {= .)5-DDFF 6 #'"7"G"O-DDFFqI#KK #'"4KK"o )5-G    .;;5@'+ <                  ;A -I "!\22#%JK  " " ": $ Q" " " " !% : Q Q+/+A046:l6G6G,>,J)* 12 7H 7& 7& 372N262N !R !" !" !"" " " " " " " " " " " " " " "" " " " " " " " " " " " " " "( 00'+'<'M $ :#5 1     ? %221<#' 3   
 *=    '&*n(,(O$t|LLL	                  '('IG {5 .";A L
 @@BB	$!# $(;#I#I#K#KLL+33 66tGG G +FFHHAMM 66t7KLL G +FFHH1LL;2f<<0<<< 99:OPP$!# ::<<$!#  "{= #|00*'+  1    
 -.. | NN J   !3LABM>?@I<=6?23  B
 ;9 R'--	'+'='C'C'E'E @ @OD)!( @ @*3*<*<!37;3Y += + + "$..????@  R;=L!78/8/@/@ R R+m'+'999(<I( ( ( FR%;<TBB *$*UVV 
 
%&*n(,(M"&'+|+7  		 	 	 	
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Yv	 v	 v	 v	 v	 v	 v	 v	 v	 v	 v	 v	 v	 v	 v	r s   /VBVD:VD
	
VD
	AVG!$AG
>G!
GG!GG!V!G%	%V(G%	)A-V7JVJ	V J	!JV&8U*V*U.	.V1U.	2VVVc                 |   t                               d|  d| j        j                    | j        j        r?| j                            | | j                  \  }}}|r|st          d| d| d          n| j                                        }|rt          |t                    st          d| d          |||fS )NEvaluating current state of z  using the custom eval function zxCustom eval function must return `Tuple[ResultDict, int, int]` with `int, int` being `env_steps` and `agent_steps`! Got z, r   z6Custom eval function must return dict of metrics! Got )	r  r  r   rW  r  r   r   r   r  )r;  r  rf  rg  s       r   rX  z-Algorithm._evaluate_with_custom_eval_function2  s)   84 8 8{58 8	
 	
 	
 ;9 	D
 66tT=WXX	 K  V:CV VGRV V V    ;AACCL 	:lD#A#A 	8(48 8 8  
 Y33r   c           	         d}| j         j        | j         j        z  }t                              d|  d| d| d           | j        j                                        }| j        	                    |gt          t          f           d S )Nrh  rq   for  r   rE  )r   offline_evaluation_duration!dataset_num_iters_per_eval_runnerr  r  r  r  runr   	aggregaterb   rt   )r;  unitdurationr+  s       r   r=  z+Algorithm._evaluate_offline_on_local_runnerM  s    K3k;< 	
 	P4PPhPPPPPQQQ0=AACCI#%@A 	 	
 	
 	
 	
 	
r   c           	      j   t          |d          r|j        t          d          | j        j        rt          d          | j        j        }| j        j        }| j        }dx}}t          	                    d|  d| d| d           g }| j        j
        r`|                    |d	k    r|nd |d
k    r|nd           }|t          d |D                       z  }|t          d |D                       z  }n|d
k    rqt          |          D ]`}	|                                }
||
                                z  }||
                                z  }| j        r|                    |
           an^|                                }
||
                                z  }||
                                z  }| j        r|                    |
           |                                }| j        j
        st'          |||j                  }n+| j                            |gt.          t0          f           d }||||fS )Ninput_readera;  Can't evaluate on a local worker if this local worker does not have an environment!
Try one of the following:
1) Set `evaluation_interval` > 0 to force creating a separate evaluation EnvRunnerGroup.
2) Set `create_local_env_runner=True` to force the local (non-eval) EnvRunner to have an environment to evaluate on.zpCannot run on local evaluation worker parallel to training! Try setting `evaluation_parallel_to_training=False`.r   rq  rs  rt  r   	timestepsepisodes)num_timestepsnum_episodesc              3   >   K   | ]}|                                 V  d S rY  rg  r_  es     r   r  z:Algorithm._evaluate_on_local_env_runner.<locals>.<genexpr>}  s*      AA1q}}AAAAAAr   c              3   >   K   | ]}|                                 V  d S rY  rf  r  s     r   r  z:Algorithm._evaluate_on_local_env_runner.<locals>.<genexpr>~  s*      ==qQ[[]]======r   keep_custom_metricsru  )hasattrr}  r   r   r  evaluation_duration_unitr\  r   r  r  r  samplesumr  rg  rf  r  ra  get_metricsr8   keep_per_episode_custom_metricsr   ry  rb   r]   )r;  r  rz  r{  eval_cfgrf  rg  all_batchesr  r   rk  env_runner_resultss               r   rY  z'Algorithm._evaluate_on_local_env_runner^  s   :~.. 	:3J3R   [8 	C   {3;2)"##	KP4PPhPPPPPQQQ;9 	*!((*.+*=*=hh4)-););XX )  H 3AAAAAAAAK==H======IIZ8__ . ."))++u00222U__...	) .&&u---. %%''E5,,...K***I% *""5)))'3355{= 	&!3""$,$L" " " L""#$');< #    "&!9k;FFr   c           	      "   t                               d|  d           g }g }| j                                        }| j        j        fd}dx}}| j        j        r)| j                            t          t          fd          }n| j        t                   j        }t          j                    }	| j        d}
|dk    r|
dk    s|                                s|
dz  }
| j        j        r| j                            t           t"          t$          fd	d
di          d
         }t'          | j        j        t+          | j        j        |t          j                    |	z
  z
  |z  |z                      }| j                            |||
dd          }|D ]2\  }}}}|| j        k    r||z  }||z  }|                    |           3t          j        d           n| j                            fdd          }|D ]q\  }}}|| j        k    r||                                z  }||                                z  }|                    |           | j        r|                    |           r| j                                        }|dk    r|
dk    |                                |dk    rt                               d           | j        j        s*t?          ||| j         j!                  }|tD                   }nX| j        #                    |t           t"          f           | j                            t           t"          tD          fd          }d }| j        j        r$|dk    rt                               d| d           ||||fS )Nrq  z; for as long as the parallelly running training step takes.c                     |                      |o|dk              }|                                 }t          d |D                       }t          d |D                       }||||fS )Nr   )r  force_resetc              3   >   K   | ]}|                                 V  d S rY  r  r  s     r   r  zUAlgorithm._evaluate_with_auto_duration.<locals>._env_runner_remote.<locals>.<genexpr>  *      <<aAKKMM<<<<<<r   c              3   >   K   | ]}|                                 V  d S rY  r  r  s     r   r  zUAlgorithm._evaluate_with_auto_duration.<locals>._env_runner_remote.<locals>.<genexpr>  *      @@!ammoo@@@@@@r   )r  r  r  )	workernumrounditerr  r   rf  rg  r  s	           r   _env_runner_remotezBAlgorithm._evaluate_with_auto_duration.<locals>._env_runner_remote  s     }}!{/Iuz %  H ((**G<<8<<<<<I@@x@@@@@Kk7D88r   r           r  r  rs  Tthroughput_since_last_restore)
throughputr  )r  r  r  r  r  r   tag{Gz?c                 V    |                                  |                                 fS rY  r  r  walgo_iterations    r   r  z8Algorithm._evaluate_with_auto_duration.<locals>.<lambda>      

AMMOO^'T r   !env_runner_sample_and_get_metrics)r  r  ]  Calling `sample()` on your remote evaluation worker(s) resulted in all workers crashing! Make sure a) your environment is not too unstable, b) you have enough evaluation workers (`config.evaluation(evaluation_num_env_runners=...)`) to cover for occasional losses, and c) you use the `config.fault_tolerance(restart_failed_env_runners=True)` setting.r  ru  a  This evaluation iteration resulted in an empty set of episode summary results! It's possible that the auto-duration time (roughly the mean time it takes for the training step to finish) is not enough to finish even a single episode. Your current mean training iteration time is aE  sec. Try setting the min iteration time to a higher value via the `config.reporting(min_time_s_per_iteration=...)` OR you can also set `config.evaluation_force_reset_envs_before_iteration` to False. However, keep in mind that then the evaluation results may contain some episode stats generated with earlier weights versions.)$r  r  r   rZ  r   ,evaluation_force_reset_envs_before_iterationr  r   r  r}   r~   r  rM  timer  donerb   r]   rm   min1evaluation_auto_duration_max_env_steps_per_samplemax1evaluation_auto_duration_min_env_steps_per_sample$foreach_env_runner_async_fetch_readyra  sleeprf  rg  extendr  r_  r8   r   r  rq   ry  )r;  r  all_metricsr  num_healthy_workersr  rf  rg  train_mean_timet0_roundthroughput_estimate_numr+  env_sag_sr   r  rk  r  r  r  r  s                        @@r   r]  z&Algorithm._evaluate_with_auto_duration  s   +4 + + +	
 	
 	

  #8SSUU kN		9 		9 		9 		9 		9 #$#	K;9 	J"l//12C 0  OO #l+CDIOY[[  !## 2%:%?%?%A%AaKF{= B2 '+l&7&7**6
  $ =cB '8 
' 
' 2
'3# KQU -	b0@A 22 2	2	
 
 " .SS/'+fnUU0 T    3: 0 0.E4$ t~-- &I4'K&&w////
4    
 .SSTTTT? T    -4 	2 	2(E7Dt~-- !2!22I5#4#4#6#66K&&w///- 2 $**5111 *EEGG  Y  !## 2%:%?%?%A%AZ !##NN=   {= 	&!3*J	" " " .l;LLL""');< #     <,,#%7F -  L "&
 KD	!!NNV #	V V V
 
 
 "9k;FFr   c                 R   | j         j        }| j         j        }d }g }g }d}| j        j        }t          j                    }d}| j        }	|dk    r| j         j        |z  |z
  }
|
dk    rn|dz  }| j                            t          j
        ||	                     | j                            dd	          }t          j                    }|s
||z
  |k    rn|r|}|D ]l\  }\  }}|| j        k    r|                    |           |t          |t                   v r*|t                   t                                                   ndz  }m| j        j        }|dk    |dk    rt                               d
           | j                            |t(          t*          f           d S )Nc                 2    |                                  }||fS rY  )rx  )runnerr  r   s      r   _offline_eval_runner_remotezTAlgorithm._evaluate_offline_with_fixed_duration.<locals>._offline_eval_runner_remoteO  s    jjllGD= r   r   r  rs  )r  r  Fr  )return_obj_refsr  aj  Calling `run()` on your remote offline evaluation runner(s) resulted in all runners crashing! Make sure a) your dataset is not corrupted, b) you have enough offline evaluation runners (`config.evaluation(num_offline_eval_runners=...)`) to cover for occasional losses, and c) you use the `config.fault_tolerance(restart_failed_offline_eval_runners=True)` setting.ru  )r   r  offline_evaluation_timeout_sr  r;  r  r  rv  foreach_runner_async	functoolspartialfetch_ready_async_reqsra  r\   r[   r  r  r_  r   ry  rb   rt   )r;  rn  time_outr  r  num_units_doner  t_last_resultr  r  units_left_to_dor+  time_nowwidmetr  s                   r   r<  z/Algorithm._evaluate_offline_with_fixed_durationJ  s*   k:;;	! 	! 	!  "<W
 	 "A%%7+EV   1$$aKF*??&/'   @    4KK %t L  G
 y{{H )x-7(BB ) ($+ 
 
 [c44>))""3''' 2c+6FFF $%@AFFHHH .I  M "A%%T !##NNF   	#%@A 	 	
 	
 	
 	
 	
r   c                    | j         j        | j        }| j         j        }| j         j        }| j         j        }fd}g }g }d}| j                                        dx}	}
t          j                    }d}| j	        dk    r^| j         j
        |z
  dk    rnG|dz  }| j         j        rd gfdt          d|dz             D             z   }| j                            ||||dd          }t          j                    }|s||z
  |k    rn|r|}|D ]y\  }}}}|| j	        k    r|	|z  }	|
|z  }
|                    |           |d	k    r*t          |v r|t                                                   ndn| j         j        d
k    r|n|z  }zn.d	k    rdn|j        |j        z  fdt)          | j                                                  D             }| j                            fd|d          }t          j                    }|s
||z
  |k    rn|r|}|D ]q\  }}}|| j	        k    r|	|                                z  }	|
|                                z  }
|                    |           | j        r|                    |           rd	k    r|t5          |          z  }n| j         j        d
k    r|	n|
}| j                                        dk    ^dk    rt6                              d           | j         j        s*t;          ||| j        j                  }|t                   }nY| j                             |tB          tD          f           | j                            tB          tD          t          fdd          }d }|dk    r5t6                              d| j         j
         d| j         j         d           ||	|
|fS )Nc                 $   |                      	dk    r|| j                 nd 	dk    r|| j                 nd |o|dk              }|                                 }t          d |D                       }t          d |D                       }||||fS )Nr~  r  r   )r  r  r  c              3   >   K   | ]}|                                 V  d S rY  r  r  s     r   r  zVAlgorithm._evaluate_with_fixed_duration.<locals>._env_runner_remote.<locals>.<genexpr>  r  r   c              3   >   K   | ]}|                                 V  d S rY  r  r  s     r   r  zVAlgorithm._evaluate_with_fixed_duration.<locals>._env_runner_remote.<locals>.<genexpr>  r  r   )r  worker_indexr  r  )
r  r  r  r  _force_resetr  r   rf  rg  rz  s
            r   r  zCAlgorithm._evaluate_with_fixed_duration.<locals>._env_runner_remote  s     }}040C0CC+,,:>*:L:Lc&"566RV(7UaZ %  H ((**G<<8<<<<<I@@x@@@@@Kk7D88r   r   r  rs  c                 H    g | ]}z  t          |z  k              z   S r  )bool)r_  ir  r  s     r   r`  z;Algorithm._evaluate_with_fixed_duration.<locals>.<listcomp>  sN     ! ! !  &)<<1!14G!GHIIJ! ! !r   )r  r  r  r  r  r  r  rf  c                 ,    g | ]\  }}|z  k     |S r  r  )r_  r  	worker_idr  units_per_healthy_remote_workers      r   r`  z;Algorithm._evaluate_with_fixed_duration.<locals>.<listcomp>   s>     , , ,$9 ::=MMM	  NMMr   c                 V    |                                  |                                 fS rY  r  r  s    r   r  z9Algorithm._evaluate_with_fixed_duration.<locals>.<lambda>
  r  r   r  )r  remote_worker_idsr  r  r  ru  Tr4  zThis evaluation iteration resulted in an empty set of episode summary results! It's possible that your configured duration timesteps are not enough to finish even a single episode. You have configured rt  a  . For 'timesteps', try increasing this value via the `config.evaluation(evaluation_duration=...)` OR change the unit to 'episodes' via `config.evaluation(evaluation_duration_unit='episodes')` OR try increasing the timeout threshold via `config.evaluation(evaluation_sample_timeout_s=...)` OR you can also set `config.evaluation_force_reset_envs_before_iteration` to False. However, keep in mind that in the latter case, the evaluation results may contain some episode stats generated with earlier weights versions.)#r   r  r   evaluation_num_env_runnersr  evaluation_sample_timeout_sr   rZ  r  r  r\  r  r  r  ra  rq   r  count_steps_byrollout_fragment_lengthnum_envs_per_env_runnerr  r[  rf  rg  r  r  lenr  r_  r8   r  r   ry  rb   r]   )r;  r  rn  r  r  r  r  r  r  rf  rg  r  r  r  r+  r  r  r  r  r  selected_eval_worker_idsrk  r   r  r  r  r  rz  r  r  s                            @@@@@r   r^  z'Algorithm._evaluate_with_fixed_duration  sB   {3)k<kN;:	9 	9 	9 	9 	9   "8SSUU"##	K	 "A%%#{>O1$$aKF {= [v ! ! ! ! ! #1kAo66! ! !  .SS/#'%+$2,7	    1 T 	 	   9;; -8m#;h#F#F -$,M.5  *E4dt~-- &I4'K&&s+++":-- 6BS5H5H\*//111aa &*[%?;%N%NEETX	NN" z)) A!967 0, , , , ,(12EEGG) ), , ,( .SSTTTT*B? T     9;; -8m#;h#F#F -$,M,3 	2 	2(E7Dt~-- !2!22I5#4#4#6#66K&&w///- 2 $**5111 :%%"c'll2NN
  ;5DD "	( # *EEGG  M "A%%T !##NN=   {= 	&!3*J	" " " .l;LLL""');< #     <,,#%7F#' -  L
 "& 1NN, ;2, , ;7	, , ,    "9k;FFr   c                    d}| j         j        r|                                }|sg S | j        dxx         t	          |          z  cc<   |j        p| j        }||                                }t          j	        |          | j         j
        s5|d                                         D ]}|                    dd           n| j         j        rt          j        |j                  n| j                            d           j        d                                         | j                            t*          dz   t,          z   d	          t*                   }| j                                        |t0          <   | j                                        |t4          <   | j                            t:          t<          fd
          |t<          <   t          j	        |          fd}|                    ||d| j         j                    |S )a  Try bringing back unhealthy EnvRunners and - if successful - sync with local.

        Algorithms that use custom EnvRunners may override this method to
        disable the default, and create custom restoration logics. Note that "restoring"
        does not include the actual restarting process, but merely what should happen
        after such a restart of a (previously failed) worker.

        Args:
            env_runner_group: The EnvRunnerGroup to restore. This may be the training or
                the evaluation EnvRunnerGroup.

        Returns:
            A list of EnvRunner indices that have been restored during the call of
            this method.
        Ntotal_num_restored_workerspolicy_statesconnector_configsc                 4    t          j        | j                  S rY  )r/   from_modulemodule)learners    r   r  z/Algorithm.restore_env_runners.<locals>.<lambda>  s    $5$A'.$Q$Q r   r   r   Tr}  r  c                    | j         j        r| j         j        r| j        j                                                                        D ]*\  }}|j        vr| j                            |d           +j                                        D ]=\  }}|| j        vr/| j        	                    ||
                                d           >|                     t          j                             d S )NT)raise_err_if_not_foundF)rO   )r   r  is_multi_agentr  _rl_modulesr   r  r  remove_module
add_modulebuild	set_stater  r  )err   r  midmod_specmulti_rl_module_spec	state_refs        r   _sync_env_runnerz7Algorithm.restore_env_runners.<locals>._sync_env_runner  s     	<
TI,
T *,)>)C)C)E)E)K)K)M)M X X%Iv (<(LLL	//	RV/WWW%9%I%O%O%Q%Q T TMC")++	,,S(..2B2BU,SSSLL++,,,,,r   F)r  r  rj  r  )!r   r  probe_unhealthy_env_runnersr!  r  rj  r  r
  r  putr  r  r   r  r/   r  r  r   r  result_or_errorsr  r&   r*   r  r$   r  r)   r   r  r]   rm   foreach_env_runnerenv_runner_restore_timeout_s)	r;  r   restoredfrom_env_runnerr   
pol_statesr  r  r  s	          @@r   restore_env_runnerszAlgorithm.restore_env_runnerse  sU   & ;  	F'CCEEH 	I 	3444HE444*;Nt &#--//EI ;A "'"8"?"?"A"A > >JNN#6====> + '8'D#*( ($ "22QQ  "!%  ! &00,s25HH# 1    !E ,6688 1
 ,6688 1 59L4E4E#%CD 5F 5 5E01 I	- 	- 	- 	- 	- 	-$ 	++!& # KD 	, 	
 	
 	
 r   runner_groupc           	         |r|j         sg S |                                }|r| j        dxx         t          |          z  cc<   |                                d         }|                    dd|          d         }t          j        |          fd}|                    ||d| j        j	                   |                    d|d| j        j	        d	|j
        |         i
           |S )Nr  r   r
  F)r  r  c                 V    |                      t          j                             d S rY  r  r  r  )rr  s    r   _sync_runnerz<Algorithm.restore_offline_eval_runners.<locals>._sync_runner  s%    CGI../////r   )r  r  r  r  set_dataset_iteratoriterator)r  r  r  r  r   )r  probe_unhealthy_runnersr!  r  healthy_runner_idsforeach_runnerr  r  r   %offline_eval_runner_restore_timeout_s_offline_data_iterators)r;  r  r  from_runnerr   r
  r  s         @r   restore_offline_eval_runnersz&Algorithm.restore_offline_eval_runners  s>    	<#< 	I7799 $	N7888CMMI888 '99;;A>K //""- 0   	E
 I0 0 0 0 0
 ''!"*" $ 6 \ (    ''+"*" $ 6 \"L$H$RS (    r   c           
      F   | j         j        st          d          | j                            t
          t          f          5  | j         j        dk    r1t          | j	        | j         j
        | j         j        dd          \  }}n0t          | j	        | j         j
        | j         j        dd          \  }}ddd           n# 1 swxY w Y   | j                            |t                     | j                            t
          t          f          5  | j                            |t"          | j                            t"                    i          }| j                            |t&                     ddd           n# 1 swxY w Y   | j                            t
          t(          f          5  | j	                            | j        t-          t/          |                                          t2          hz
            d	           ddd           dS # 1 swxY w Y   dS )
ad  Default single iteration logic of an algorithm.

        - Collect on-policy samples (SampleBatches) in parallel using the
          Algorithm's EnvRunners (@ray.remote).
        - Concatenate collected SampleBatches into one train batch.
        - Note that we may have more than one policy in the multi-agent case:
          Call the different policies' `learn_on_batch` (simple optimizer) OR
          `load_batch_into_buffer` + `learn_on_loaded_batch` (multi-GPU
          optimizer) methods to calculate loss and update the model(s).
        - Return all collected metrics for the iteration.

        Returns:
            For the new API stack, returns None. Results are compiled and extracted
            automatically through a single `self.metrics.reduce()` call at the very end
            of an iteration (which might contain more than one call to
            `training_step()`). This way, we make sure that we account for all
            results generated by each individual `training_step()` call.
            For the old API stack, returns the results dict from executing the training
            step.
        a+  The `Algorithm.training_step()` default implementation no longer supports the old API stack! If you would like to continue using these old APIs with this default `training_step`, simply subclass `Algorithm` and override its `training_step` method (copy/paste the code and delete this error message).rg  T)
worker_setmax_agent_stepssample_timeout_s_uses_new_env_runners_return_metrics)r  max_env_stepsr  r  r  Nru  )r  r~  )r/  r   rw  )r   r  NotImplementedErrorr   r"  r}   r^   r  r9   r   total_train_batch_sizer  ry  r]   re   r   r  rm   r  rd   r|   r9  r  r  keysr[   )r;  r  r  learner_resultss       r   training_stepzAlgorithm.training_step  s   , {= 	%7   \""F,E#FGG 	 	{)]::/J#4$(K$F%)[%A*.$(0 0 0,,, 0K#4"&+"D%)[%A*.$(0 0 0,,	 	 	 	 	 	 	 	 	 	 	 	 	 	 	$ 	17IJJJ\""F,@#ABB 		I 		I"077!2))*HII 8  O L""?"HHH		I 		I 		I 		I 		I 		I 		I 		I 		I 		I 		I 		I 		I 		I 		I \""F,F#GHH 	 	!..-1-?c/"6"6"8"899[MIJJ# /   	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	s9   A2C  CCA"F  FF1AHHHr   c                     | j         | j         j        }n%| j                            d dgd          d         }t	          |t
                    r|                    |          S |S )a  Returns the (single-agent) RLModule with `model_id` (None if ID not found).

        Args:
            module_id: ID of the (single-agent) RLModule to return from the MARLModule
                used by the local EnvRunner.

        Returns:
            The RLModule found under the ModuleID key inside the local EnvRunner's
            MultiRLModule. None if `module_id` doesn't exist.
        Nc                     | j         S rY  )r  )r  s    r   r  z&Algorithm.get_module.<locals>.<lambda>V	  s    29 r   rs  F)r  rj  r   )r  r  r   r  r   r.   r  )r;  r   r  s      r   
get_modulezAlgorithm.get_moduleF	  sx     ?&_+FF*==$$#$#!& >   	F fm,, 	::i(((Mr   T)config_overridesnew_agent_to_module_mapping_fnnew_should_module_be_updatedadd_to_learnersadd_to_env_runnersadd_to_eval_env_runnersr  r#  r$  r%  r&  r'  r(  c                   t          d           d}	| j        j        st          dt           d          t          |||g          st          d          |r| j                            ||          }	d| j        _	        t                      | j        j        <   || j                            |i	           | j                            
           | j                            |	           | j                                       | j                                         |ffd	}
|r^|	!| j                            |
          d         }	n| j                            |
           | j                            | j        d           |du re| j        ^|	!| j                            |
          d         }	n| j                            |
           | j                            | j        d           |	S )aV  Adds a new (single-agent) RLModule to this Algorithm's MARLModule.

        Note that an Algorithm has up to 3 different components to which to add
        the new module to: The LearnerGroup (with n Learners), the EnvRunnerGroup
        (with m EnvRunners plus a local one) and - if applicable - the eval
        EnvRunnerGroup (with o EnvRunners plus a local one).

        Args:
            module_id: ID of the RLModule to add to the MARLModule.
                IMPORTANT: Must not contain characters that
                are also not allowed in Unix/Win filesystems, such as: `<>:"/|?*`,
                or a dot, space or backslash at the end of the ID.
            module_spec: The SingleAgentRLModuleSpec to use for constructing the new
                RLModule.
            config_overrides: The `AlgorithmConfig` overrides that should apply to
                the new Module, if any.
            new_agent_to_module_mapping_fn: An optional (updated) AgentID to ModuleID
                mapping function to use from here on. Note that already ongoing
                episodes will not change their mapping but will use the old mapping till
                the end of the episode.
            new_should_module_be_updated: An optional sequence of ModuleIDs or a
                callable taking ModuleID and SampleBatchType and returning whether the
                ModuleID should be updated (trained).
                If None, will keep the existing setup in place. RLModules,
                whose IDs are not in the list (or for which the callable
                returns False) will not be updated.
            add_to_learners: Whether to add the new RLModule to the LearnerGroup
                (with its n Learners).
            add_to_env_runners: Whether to add the new RLModule to the EnvRunnerGroup
                (with its m EnvRunners plus the local one).
            add_to_eval_env_runners: Whether to add the new RLModule to the eval
                EnvRunnerGroup (with its o EnvRunners plus the local one).

        Returns:
            The new MultiRLModuleSpec (after the RLModule has been added).
        Tr   NzCan't add a new RLModule to a single-agent setup! Make sure that your setup is already initially multi-agent by either defining >1 RLModules in your `rl_module_spec` or assigning a ModuleID other than z to your (only) RLModule.zjAt least one of `add_to_learners`, `add_to_env_runners`, or `add_to_eval_env_runners` must be set to True!)r   r  r#  r%  F)%algorithm_config_overrides_per_moduler   rx  r   c                    | j                             |                                           | j                                       | j                                       t          j        | j                   S )N)r   r  r,  r-  )r  r  r  r   multi_agentr/   r  )_env_runner_module_specr   r$  r%  s     r   _addz"Algorithm.add_module.<locals>._add	  s    ))#L,>,>,@,@ *    .9"..&D /    ,7"..&B /    %01CDDDr   r   r.  )r-   r   r  r  r+   anyr   r   r  
_is_frozenrB   r   r/  	rl_moduler  r   r  r9  r   )r;  r   r  r#  r$  r%  r&  r'  r(  r  r2  s    `  ``     r   r  zAlgorithm.add_module`	  s   b 	9D1111  ${) 	E *E E E   O%79PQRR 	A    	#'#5#@#@#'!1-I	 $A $ $  "'*4,,Y''K##7@BR6S $    *5K##6T#UUU-ABBB'3K##6R#SSS+6 	E 	E 	E 	E 	E 	E 	E 	E(  	#+'+'<'O'OPT'U'UVW'X$$%88>>>!..-1-?# /   
 #d**t/I/U#+'+'A'T'T( (($$ *==dCCC&33-1-?# 4   
 $#r   )r$  r%  remove_from_learnersremove_from_env_runnersremove_from_eval_env_runnersr6  r7  r8  c                6   d}|r| j                             |          }d| j        _        | j        j        = | j        j                            d           | j                                       | j                            |           || j                            |           | j        	                                 fd}|r^|!| j
                            |          d         }n| j
                            |           | j
                            | j         d	
           |d	u re| j        ^|!| j                            |          d         }n| j                            |           | j                            | j         d	
           |S )a^  Removes a new (single-agent) RLModule from this Algorithm's MARLModule.

        Args:
            module_id: ID of the RLModule to remove from the MARLModule.
                IMPORTANT: Must not contain characters that
                are also not allowed in Unix/Win filesystems, such as: `<>:"/|?*`,
                or a dot, space or backslash at the end of the ID.
            new_agent_to_module_mapping_fn: An optional (updated) AgentID to ModuleID
                mapping function to use from here on. Note that already ongoing
                episodes will not change their mapping but will use the old mapping till
                the end of the episode.
            new_should_module_be_updated: An optional sequence of ModuleIDs or a
                callable taking ModuleID and SampleBatchType and returning whether the
                ModuleID should be updated (trained).
                If None, will keep the existing setup in place. RLModules,
                whose IDs are not in the list (or for which the callable
                returns False) will not be updated.
            remove_from_learners: Whether to remove the RLModule from the LearnerGroup
                (with its n Learners).
            remove_from_env_runners: Whether to remove the RLModule from the
                EnvRunnerGroup (with its m EnvRunners plus the local one).
            remove_from_eval_env_runners: Whether to remove the RLModule from the eval
                EnvRunnerGroup (with its o EnvRunners plus the local one).

        Returns:
            The new MultiRLModuleSpec (after the RLModule has been removed).
        N)r   r%  Fr,  rx  r-  c                     | j                                        | j                                       d| _        t          j        | j                   S )N)r   r,  T)r  r  r   r/  _needs_initial_resetr/   r  )r0  r   r$  s    r   _removez(Algorithm.remove_module.<locals>._remove&
  sd    ,,y,AAA-9"..&D /    04K,$01CDDDr   r   Tr.  )r   r  r   r4  r   r+  r   r/  r5  r  r   r  r9  r   )	r;  r   r$  r%  r6  r7  r8  r  r<  s	    ``      r   r  zAlgorithm.remove_module	  s   N  $   	#'#5#C#C#-I $D $ $  "'K +9==iNNN)5K##6T#UUU-ABBB'3K##6R#SSS	E 	E 	E 	E 	E 	E" # 
	#+'+'<'O'O( (($$ %88AAA!..-1-?# /    )D00*6#+'+'A'T'T( (($$ *==gFFF&33-1-?# 4   
 $#r   	policy_idc                 6    | j                             |          S )zwReturn policy for the specified id, or None.

        Args:
            policy_id: ID of the policy to return.
        )r  r  )r;  r=  s     r   r  zAlgorithm.get_policyU
  s     )))444r   c                 z    | j         | j                             |          S | j                            |          S )zReturn a dict mapping Module/Policy IDs to weights.

        Args:
            policies: Optional list of policies to return weights for,
                or None for all policies.
        N)
module_ids)r   get_weightsr  )r;  r   s     r   rA  zAlgorithm.get_weights^
  s=     )%11X1FFF**8444r   weightsc                     | j         *|                     t          t          t          |iii           | j        j                            |           dS )zSet RLModule/Policy weights by Module/Policy ID.

        Args:
            weights: Dict mapping ModuleID/PolicyID to weights.
        N)r   r  r'   r&   r*   r   rj  set_weights)r;  rB  s     r   rD  zAlgorithm.set_weightsk
  s_     )NN+)/,.   	.::7CCCCCr   )r  r  r   policy_stater   r   r'  r(  r  evaluation_workersr&  
policy_clsr  r  r  rE  c                   | j         j        rt          d          |t          k    rt	          ddd           |t          k    rt	          ddd           t          |d	           |
du r$| j                            |||||||||	|

  
         |du r+| j        $| j                            |||||||||	|

  
         |
r| 	                    |          S |r| j        r| j
        j        |         S dS dS )a{
  Adds a new policy to this Algorithm.

        Args:
            policy_id: ID of the policy to add.
                IMPORTANT: Must not contain characters that
                are also not allowed in Unix/Win filesystems, such as: `<>:"/|?*`,
                or a dot, space or backslash at the end of the ID.
            policy_cls: The Policy class to use for constructing the new Policy.
                Note: Only one of `policy_cls` or `policy` must be provided.
            policy: The Policy instance to add to this algorithm. If not None, the
                given Policy object will be directly inserted into the Algorithm's
                local worker and clones of that Policy will be created on all remote
                workers as well as all evaluation workers.
                Note: Only one of `policy_cls` or `policy` must be provided.
            observation_space: The observation space of the policy to add.
                If None, try to infer this space from the environment.
            action_space: The action space of the policy to add.
                If None, try to infer this space from the environment.
            config: The config object or overrides for the policy to add.
            policy_state: Optional state dict to apply to the new
                policy instance, right after its construction.
            policy_mapping_fn: An optional (updated) policy mapping function
                to use from here on. Note that already ongoing episodes will
                not change their mapping but will use the old mapping till
                the end of the episode.
            policies_to_train: An optional list of policy IDs to be trained
                or a callable taking PolicyID and SampleBatchType and
                returning a bool (trainable or not?).
                If None, will keep the existing setup in place. Policies,
                whose IDs are not in the list (or for which the callable
                returns False) will not be updated.
            add_to_env_runners: Whether to add the new RLModule to the EnvRunnerGroup
                (with its m EnvRunners plus the local one).
            add_to_eval_env_runners: Whether to add the new RLModule to the eval
                EnvRunnerGroup (with its o EnvRunners plus the local one).
            module_spec: In the new RLModule API we need to pass in the module_spec for
                the new module that is supposed to be added. Knowing the policy spec is
                not sufficient.

        Returns:
            The newly added policy (the copy that got added to the local
            worker). If `workers` was provided, None is returned.
        z`Algorithm.add_policy()` is not supported on the new API stack w/ EnvRunners! Use `Algorithm.add_module()` instead. Also see `rllib/examples/self_play_league_based_with_open_spiel.py` for an example.z,Algorithm.add_policy(evaluation_workers=...)z1Algorithm.add_policy(add_to_eval_env_runners=...)Tr   z(Algorithm.add_policy(add_to_learners=..).Hybrid API stack no longer supported by RLlib!r   rg  r   r*  )r  r  r   rE  r   r   r  N)r   r  r   r   r   r-   r   
add_policyr   r  r[  
policy_map)r;  r=  rG  r  r  r  r   rE  r   r   r'  r(  r  rF  r&  s                  r   rK  zAlgorithm.add_policy
  s   H ;9 	   !111BG   
 ...>E    	9D1111%%!,,"3))"3"3' -    #d**t/I/U&11"3))"3"3' 2     	>??9---$ 	>)C 	>'29==	> 	> 	> 	>r   )r   r   r7  r8  rF  r6  c                    |t           k    rt          ddd           |}|t           k    rt          ddd           fd	}|r| j                            |d
           |r%| j         | j                            |d
           dS dS dS )aH  Removes a policy from this Algorithm.

        Args:
            policy_id: ID of the policy to be removed.
            policy_mapping_fn: An optional (updated) policy mapping function
                to use from here on. Note that already ongoing episodes will
                not change their mapping but will use the old mapping till
                the end of the episode.
            policies_to_train: An optional list of policy IDs to be trained
                or a callable taking PolicyID and SampleBatchType and
                returning a bool (trainable or not?).
                If None, will keep the existing setup in place. Policies,
                whose IDs are not in the list (or for which the callable
                returns False) will not be updated.
            remove_from_env_runners: Whether to remove the Policy from the
                EnvRunnerGroup (with its m EnvRunners plus the local one).
            remove_from_eval_env_runners: Whether to remove the RLModule from the eval
                EnvRunnerGroup (with its o EnvRunners plus the local one).
        z/Algorithm.remove_policy(evaluation_workers=...)z9Algorithm.remove_policy(remove_from_eval_env_runners=...)Fr   z0Algorithm.remove_policy(remove_from_learners=..)rI  TrJ  c                 8    |                                 d S )N)r=  r   r   )remove_policy)r  r   r=  r   s    r   fnz#Algorithm.remove_policy.<locals>.fn1  s3      #"3"3 !     r   rj  N)r   r   r   r  r   )	r;  r=  r   r   r7  r8  rF  r6  rP  s	    ```     r   rO  zAlgorithm.remove_policy
  s    J !111EO   
 ,>(#333FE   	 	 	 	 	 	 	 # 	P!44R$4OOO ( 	UD,F,R&99"t9TTTTT	U 	U,R,Rr   r   c                     |                      d          }|t          d          |                      d          }|st          d           ||          }|                    |            |S )a  Recovers an Algorithm from a state object.

        The `state` of an instantiated Algorithm can be retrieved by calling its
        `get_state` method. It contains all information necessary
        to create the Algorithm from scratch. No access to the original code (e.g.
        configs, knowledge of the Algorithm's class, etc..) is needed.

        Args:
            state: The state to recover a new Algorithm instance from.

        Returns:
            A new Algorithm instance.
        algorithm_classNzQNo `algorithm_class` key was found in given `state`! Cannot create new Algorithm.r   z+No `config` found in given Algorithm state!)r   )r  r   __setstate__)r   rS  r   new_algos       r   r   zAlgorithm.from_state@  s      ,1995F+G+G"/   8$$ 	LJKKK"?&111e$$$ r   
export_dironnxc                 X    |                      |                              ||           dS )a  Exports policy model with given policy_id to a local directory.

        Args:
            export_dir: Writable local directory.
            policy_id: Optional policy id to export.
            onnx: If given, will export model in ONNX format. The
                value of this parameter set the ONNX OpSet version to use.
                If None, the output format will be DL framework specific.
        N)r  export_model)r;  rV  r=  rW  s       r   export_policy_modelzAlgorithm.export_policy_modelb  s,      		""//
DAAAAAr   c                     |                      |          }|t          d| d          |                    |           dS )a  Exports Policy checkpoint to a local directory and returns an AIR Checkpoint.

        Args:
            export_dir: Writable local directory to store the AIR Checkpoint
                information into.
            policy_id: Optional policy ID to export. If not provided, will export
                "default_policy". If `policy_id` does not exist in this Algorithm,
                will raise a KeyError.

        Raises:
            KeyError: if `policy_id` cannot be found in this Algorithm.
        NzPolicy with ID z not found in Algorithm!)r  KeyErrorexport_checkpoint)r;  rV  r=  r  s       r   export_policy_checkpointz"Algorithm.export_policy_checkpointt  sN    $ ++>PYPPPQQQ  ,,,,,r   checkpoint_dirc                    t          | j                  5  | j        j        r/|                     || j        j                   	 ddd           dS t          j        |          }|                                 }i }d|v r&d|d         v r|d         	                    di           }| j        j        rt          |d<   n
t          |d<   |dz  }t          |d          5 }t          j        ||           ddd           n# 1 swxY w Y   t          |dz  d	          5 }t          j        d
t!          |d                   dt!          |          t#          |                                          t&          j        t&          j        d|           ddd           n# 1 swxY w Y   |                                D ]`\  }}t/          |d           |dz  |z  }t1          j        |d           |                     |          }	|	                    ||           a| j        j        r:t0          j                            |d          }
| j                            |
           ddd           dS # 1 swxY w Y   dS )ao  Exports checkpoint to a local directory.

        The structure of an Algorithm checkpoint dir will be as follows::

            policies/
                pol_1/
                    policy_state.pkl
                pol_2/
                    policy_state.pkl
            learner/
                learner_state.json
                module_state/
                    module_1/
                        ...
                optimizer_state/
                    optimizers_module_1/
                        ...
            rllib_checkpoint.json
            algorithm_state.pkl

        Note: `rllib_checkpoint.json` contains a "version" key (e.g. with value 0.1)
        helping RLlib to remain backward compatible wrt. restoring from checkpoints from
        Ray 2.0 onwards.

        Args:
            checkpoint_dir: The directory where the checkpoint files will be stored.
        )use_msgpackNr  r  r   zalgorithm_state.pklwbr   r  r   cloudpickle)r   r   r   
state_filer   ray_version
ray_commitTr*  r   r   )rE  r  )r   r.  r   r  save_to_path_use_msgpack_checkpointspathlibPath__getstate__r   rQ   rP   openpickledumpjsonr  r  r  r  __version__
__commit__r  r-   r  r  r  r]  r   joinr   )r;  r_  r   r  rd  fpidrE  
policy_dirr  learner_state_dirs              r   save_checkpointzAlgorithm.save_checkpoint  s   : &d&HII ?	C ?	C {7 !!" $ D "    ?	C ?	C ?	C ?	C ?	C ?	C ?	C ?	C %\.99N%%''E M5  _h%G%G %h 3 3OR H H {7 A.W*++.@*+ (*??Jj$'' &1E1%%%& & & & & & & & & & & & & & & n'>>DD 	 +.1%8L2M.N.N"/&)*oo&*=+=+=+?+?&@&@'*&)n                    &3%8%8%:%: P P!\"3d3333+j83>
J6666--((,(OOOO {7 C$&GLL$K$K!"//0ABBB?	C ?	C ?	C ?	C ?	C ?	C ?	C ?	C ?	C ?	C ?	C ?	C ?	C ?	C ?	C ?	C ?	C ?	Csb   /I+B
I+C=1I+=D	I+D	I+A1FI+F	I+F	 B>I++I/2I/c           	         t          | j                  5  | j        j        r|                     |           n>t          |          }t                              |          }|                     |           t          d| j
        | j        j        t          |                      d d d            d S # 1 swxY w Y   d S )Non_checkpoint_loaded)r  r1  )r   r-  r   r  restore_from_pathrS   r   r   rT  r    r  callbacks_on_checkpoint_loadedr  )r;  r_  r   checkpoint_datas       r   load_checkpointzAlgorithm.load_checkpoint  s   %d&HII 	 	 {7 	3&&~6666 #6n"E"E"+"O"O## # !!/222 &"&.$(K$Nd+++	   	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	s   BB33B7:B7)not_componentsr~  r~  c          
         | j         j        st          d          i }| j         j        r| j        r|                     t          ||          re | j        j        d|                     t          |          t          |                     t          |                    t          gz   d||t          <   nz|                     t          ||          r!| j                                        |t          <   |                     t          ||          r!| j                                        |t          <   | j        r|                     t           ||          re | j        j        d|                     t          |          t          |                     t          |                    t          gz   d||t           <   |                     t"          ||          rO | j        j        d|                     t"          |          |                     t"          |          d||t"          <   | j                                        |t(          <   | j        |t,          <   |S )Nz_Algorithm.get_state() not supported on the old API stack! Use Algorithm.__getstate__() instead.)r~  r~  r  )r   r  r  r  r  _check_componentr#   r
  _get_subcomponentsr*   rG   r$   r  r)   r  r[  r%   r'   r   r   r(   training_iterationr   )r;  r~  r~  r   r   s        r   r
  zAlgorithm.get_state  s    {= 	8  
  ;  	A A(((*n   3L$/2K 3#'#:#:/$ $ (2 334GXX( ( //(0	3 3 !3 3E./ ((5z>  A
 4>>@@ 9 ((5z>  A
 4>>@@ 9  	D$9$9%z>%
 %
 	 0Nt/C/M 	0223F
SS)++,?PP   
 '' (	0 	0 	0 	0E+,   !8*nUU 	-IT-?-I .223JJWW#66+^   . .
 . .E)* +/,*@*@*B*B&' %)$; !r   c                    t           |v r| j        r&| j                            |t                               n`| j                            |t                    t                              | j                            |t                    t                              | j                            | j	        | j        | j
                            t          t          fd          | j        | j                   | j        rt          |v r| j        r%| j                            |t                               | j                            | j        | j        | j
                            t          t          fd          | j        | j                   t$          |v rn| j                            |t$                              | j                            | j        d           | j        r!| j                            | j        d           t*          |v r%| j
                            |t*                              t,          |v r|t,                   | _        d S d S )Nr   r  rA  Tr.  )r#   r  r  r  r$   r  r)   r   r  r   r   r  r]   rm   r   r%   r[  r   r'   r   r9  r(   r   
_iterationr;  r   s     r   r  zAlgorithm.set_stateS  sd     5(( ))%0D*EFFFF,66./0QR   ,66./0QR   !88{ O"&,"3"3')GHRS #4 # # #:": 9    % 	*Cu*L*L# L$..u5I/JKKK&==- O"&,"3"3')GHRS #4 # # #:": >    #e++((/F)GHHH!..-1-?# /    ) *77151C#' 8    $u,,L""5)A#BCCC&&#$67DOOO '&r   rR   c                    t           | j        fg}| j        j        r)| j        r"|                    t          | j        f           nc| j        j        rW| j        sP| j        r!|                    t          | j        f           | j	        r!|                    t          | j	        f           | j        r!|                    t          | j        f           |S rY  )r'   r   r   r  r  ra  r#   r  r$   r  r)   r[  r%   )r;  r~  s     r   get_checkpointable_componentsz'Algorithm.get_checkpointable_components  s    %d&89

 ;  	T_ 	%t7    [" 	4? 	+ !!68TU   + !!68TU    	-(   r   c                 :    | j                                         fi fS rY  )r   r
  rV  s    r   get_ctor_args_and_kwargsz"Algorithm.get_ctor_args_and_kwargs  s%     [""$$&
 	
r   c                     t                      j        |g|R i | t          j        |          }|t          z                                  sd|v r0t          |d         v r!| j                            | j        d           | j        	                                dk    r| j
        s|t          z                                  r | j        j        |t          z  g|R i | |t          z                                  r | j        j        |t          z  g|R i | | j                            | j        d | j                            t&          t(          fd          | j        | j                   d S | j        	                                dk    rV| j
        rQ| j                            | j        | j                            t&          t(          fd          | j
                   d S d S d S )N	componentTr.  r   r  rA  )r   r  rB  )r   rz  ri  rj  r'   is_dirr   r9  r   num_remote_env_runnersr  r$   r  r)   r  r  r   r   r  r]   rm   )r;  r   argsr   r   s       r   rz  zAlgorithm.restore_from_path  sf    	"!$8888888 |D!!**2244 	6!!&=AT&T&T !..-1-?# /     7799A==do=88@@BB >,><<?C  GM   88@@BB >,><<?C  GM   !88{ "&,"3"3')GHRS #4 # # #:": 9 	 	 	 	 	 "99;;a??DO?!88{"&,"3"3')GHRS #4 # # !O 9      @???r   resultc                     t          | j                  5  t          d| j        | j        j        t          | | j        |                     d d d            n# 1 swxY w Y   t          j	        | |           d S )Non_train_result)r  r  r  r1  )
r   r/  r    r  r   callbacks_on_train_resultr  r   r   
log_result)r;  r  s     r   r  zAlgorithm.log_result  s     &d&QRR 
	 
	!"&.$(K$I"#'<!  		 	 	 	
	 
	 
	 
	 
	 
	 
	 
	 
	 
	 
	 
	 
	 
	 
	 	T6*****s   8AA Ac                    t          | d          r | j        | j                                         t          | d          r | j        | j                                         t          | d          r | j        | j                                         t          | d          r"| j        | j                                         d S d S d S )Nr   r  r   r   )r  r   shutdownr  clearr   stopr   rV  s    r   cleanupzAlgorithm.cleanup  s     4)) 	*d.@.L''))) 4455 	3*6*00222 4+,, 	)1F1R!&&(((D122	.*6&++-----		. 	.66r   c                    |                                                      |          }|                                 |                                 |                                }|                                 |                                 |j        rt          |          }n!|j        |j        rdn|j        s|j	        ndd}t          |          }|                     |          rt          |          }ng }|                     |          rt          |          }ng }g }|j        rt          |          }|g|z   |z   |z   |z   }t          ||j                  S )Nr   )CPUGPU)bundlesstrategy)r  r  r  r  r  r  r   num_cpus_for_main_process
_fake_gpusnum_gpusr   r  r  r   r   r   placement_strategy)	r   r   eval_configmain_processenv_runner_bundleseval_env_runner_bundlesoffline_eval_runner_bundleslearner_bundlesr  s	            r   default_resource_requestz"Algorithm.default_resource_request  s    ''))::6BB99;;. 	3F;;LL 7 (AA ">	 	L 5V<<44[AA 	)&=k&J&J##&(#88EE 	-*J;*W*W''*,'. 	;26::O N !%& ** 	 	 %.
 
 
 	
r   c                     dS )zPre-evaluation callback.Nr  rV  s    r   rP  zAlgorithm._before_evaluateF  s	     	r   env_specifierc                     t           t                    r`t          j        t                     r t          j        t                     fS d v r	 fd} |fS  t          j        t                     fS t           t                    rЉ }|d         rt          j                            d          }t          j        |          t          j        d          k    rt          d          t          j        d	           G d
 d                       |fdfS t#           t$          j                  r|t          j        t                     fS | fdfS  dd fS t          d                               dz             )a}  Returns env_id and creator callable given original env id from config.

        Args:
            env_specifier: An env class, an already tune registered env ID, a known
                gym env name, or None (if no env is used).
            config: The AlgorithmConfig object.

        Returns:
            Tuple consisting of a) env ID string and b) env creator callable.
        r   c                     	 t          |           }n/# t          $ r" t          t          j                            w xY w|S rY  )rY   r   rW   rV   r   )env_contextenv_objr  s     r   env_creator_from_classpathzEAlgorithm._get_env_id_and_creator.<locals>.env_creator_from_classpathb  s[    "-m["I"I%   &:A-PP   #Ns	    ,A )env_descriptorremote_worker_envsgymz0.22zCannot specify a gym.Env class via `config.env` while setting `config.remote_worker_env=True` AND your gym version is >= 0.22! Try installing an older version of gym or set `config.remote_worker_env=False`.rs  )r  c                       e Zd Zd Zd ZdS )3Algorithm._get_env_id_and_creator.<locals>._wrapperc                     | j         | j        fS rY  )r  r  rV  s    r   _get_spacesz?Algorithm._get_env_id_and_creator.<locals>._wrapper._get_spaces  s    #5t7HHHr   c                 .    ddl m} t          | |          S )Nr   )MultiAgentEnv)ray.rllib.env.multi_agent_envr  r   )r;  r  s     r   _is_multi_agentzCAlgorithm._get_env_id_and_creator.<locals>._wrapper._is_multi_agent  s$    OOOOOO)$>>>r   N)r  
__module____qualname__r  r  r  r   r   _wrapperr    s5        I I I? ? ? ? ?r   r  c                 .                         |           S rY  )r  )r   r  s    r   r  z3Algorithm._get_env_id_and_creator.<locals>.<lambda>  s    8??3+?+? r   c                      |           S rY  r  )r   r  s    r   r  z3Algorithm._get_env_id_and_creator.<locals>.<lambda>  s    ==+=+= r   Nc                     d S rY  r  )r   s    r   r  z3Algorithm._get_env_id_and_creator.<locals>.<lambda>  s    D r   z {} is an invalid env specifier. zlYou can specify a custom env as either a class (e.g., YourEnvCls) or a registered env id (e.g., "your_env").)r   r  r   containsr   r  r  r  r6   r   r  metadatar   parser   r  r  r  r  Envr   )r  r   r  env_idgym_versionr  s   `    @r   r  z!Algorithm._get_env_id_and_creatorK  s    mS)) H	(mDD $&6&:;&V&VVV %%# # # # # %&@@@ %i&7$]' ' '   t,, 0	"F*+ "> (088??=--v1F1FFF$4   Q'''	? 	? 	? 	? 	?} 	? 	? ('	? ??????M3733 >y0$#0        ====== "0000 299-HHPP  r   r  r  c                    |r:|j         dk    r1t          j        |j        ||j        |j        |j                   dS dS dS )ag  Synchronizes the filter stats from `workers` to `central_worker`.

        .. and broadcasts the central_worker's filter stats back to all `workers`
        (if configured).

        Args:
            central_worker: The worker to sync/aggregate all `workers`' filter stats to
                and from which to (possibly) broadcast the updated filter stats back to
                `workers`.
            workers: The EnvRunnerGroup, whose EnvRunners' filter stats should be used
                for aggregation on `central_worker` and which (possibly) get updated
                from `central_worker` after the sync.
            config: The algorithm config instance. This is used to determine, whether
                syncing from `workers` should happen at all and whether broadcasting
                back to `workers` (after possible syncing) should happen.
        NoFilter)update_remoter  use_remote_data_for_updateN)observation_filterrE   synchronizefiltersupdate_worker_filter_stats)sync_filters_on_rollout_workers_timeout_suse_worker_filter_stats)r;  r  r  r   s       r   r$  z!Algorithm._sync_filters_if_needed  sh    .  	f7:EE%&$? & P+1+I     	 	EEr   c                     d| S )Na  

You can adjust the resource requests of RLlib Algorithms by calling `AlgorithmConfig.env_runners(num_env_runners=.., num_cpus_per_env_runner=.., num_gpus_per_env_runner=.., ..)` and `AgorithmConfig.learners(num_learners=.., num_gpus_per_learner=..)`. See the `ray.rllib.algorithms.algorithm_config.AlgorithmConfig` classes (each Algorithm has its own subclass of this class) for more info.

The config of this Algorithm is: r  r
  s     r   resource_helpzAlgorithm.resource_help  s    9 179 9		
r   nowtime_this_iter	timestampdebug_metrics_onlyc                     t                                          ||||          }d|vrt          d          t          |d         t                    s:t          |d         t
                    sJ |d                                         |d<   |S )Nr   z3`config` key not found in auto-filled results dict!)r   get_auto_filled_metricsr\  r   r  r   r	  )r;  r  r  r  r  auto_filledr   s         r   r  z!Algorithm.get_auto_filled_metrics  s     gg55,>
 
 ;&&PQQQ +h/66 	Dk(3_EEEEE$/$9$A$A$C$CK!r   config1config2_allow_unknown_configsc                     t          j        |          }d|v r.t          |d                   t          u rt	          ddd           || j        }t          |||| j        | j        | j	                  S )a  Merges a complete Algorithm config dict with a partial override dict.

        Respects nested structures within the config dicts. The values in the
        partial override dict take priority.

        Args:
            config1: The complete Algorithm's dict to be merged (overridden)
                with `config2`.
            config2: The partial override config dict to merge on top of
                `config1`.
            _allow_unknown_configs: If True, keys in `config2` that don't exist
                in `config1` are allowed and will be added to the final config.

        Returns:
            The merged full algorithm config dict.
        r  zcallbacks dict interfaceza class extending rllib.callbacks.callbacks.RLlibCallback; see `rllib/examples/metrics/custom_metrics_and_callbacks.py` for an example.Tr*  )
r   deepcopyr   r  r   r  rF   _allow_unknown_subkeys%_override_all_subkeys_if_type_changes_override_all_key_list)r   r  r  r  s       r   r  z!Algorithm.merge_algorithm_configs  s    . -(('!!d7;+?&@&@D&H&H*     ")%(%?""&5&
 
 	
r   r  r  c                     dS )a  Env validator function for this Algorithm class.

        Override this in child classes to define custom validation
        behavior.

        Args:
            env: The (sub-)environment to validate. This is normally a
                single sub-environment (e.g. a gym.Env) within a vectorized
                setup.
            env_context: The EnvContext to configure the environment.

        Raises:
            Exception: in case something is wrong with the given environment.
        Nr  )r  r  s     r   rh  zAlgorithm.validate_env  s	    " 	r   TrainIterCtxc           
      T   t          | j                  5  | j                            t          t
          f          5  | j                            d          dk    r2t          	                                st                                           d}t          |           5 }|                    |          sE| j                            t          t          f          5  |                     | j                  }|r"|                     | j        | j        |           ddd           n# 1 swxY w Y   | j                            t          t$          f          5  t          | j                  5  |                                 }ddd           n# 1 swxY w Y   d}ddd           n# 1 swxY w Y   |t+          d          | j                            t.          d	d
           |                    |          Eddd           n# 1 swxY w Y   ddd           n# 1 swxY w Y   | j        j        rA| j                            d dddd          }| j                            |t8                     | j                                        }ddd           n# 1 swxY w Y   ||fS )a@  Runs one training iteration (`self.iteration` will be +1 after this).

        Calls `self.training_step()` repeatedly until the configured minimum time (sec),
        minimum sample- or minimum training steps have been reached.

        Returns:
            The ResultDict from the last call to `training_step()`. Note that even
            though we only return the last ResultDict, the user still has full control
            over the history and reduce behavior of individual metrics at the time these
            metrics are logged with `self.metrics.log_...()`.
        	frameworktf2Falgor   r   restored_env_runner_indicesNTa<  `Algorithm.training_step()` should NOT return a result dict anymore on the new API stack! Instead, log all results, timers, counters through the `self.metrics` (MetricsLogger) instance of the Algorithm and return None. The logged results are compiled automatically into one single result dict per training iteration.rs  r  )reducec                 *    |                                  S rY  )r  r  s    r   r  z7Algorithm._run_one_training_iteration.<locals>.<lambda>n  s    u'8'8':': r   r   r  )r  r  r  r  ignore_ray_errorsru  )r   r%  r   r"  r}   r~   r   r  tfexecuting_eagerlytf1enable_eager_executionr  should_stoprv   r  r   (_make_on_env_runners_recreated_callbacksr   r(  r  r   	log_valuers   r  r  foreach_actor_async_fetch_readyry  rZ   compile)r;  has_run_oncer)  r  training_step_return_valueremote_aggregator_metricscompiled_metricss          r   r  z%Algorithm._run_one_training_iteration(  s`    &d&STT I	6 I	6&&0H'IJJ 5 5 ;??;//588AUAUAWAW8..000$!t,,, , -88FF )!\22F<U3VWW " "'+'?'?@U'V'VH' " $ M M+/;595J@H !N !" !" !"	" " " " " " " " " " " " " " " "\22F<O3PQQ 0 0!9 $ @" " R R >B=O=O=Q=Q :R R R R R R R R R R R R R R R ,0L0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6A",!Q# #  ..A#( /   K -88FF ), , , , , , , , , , , , , , ,5 5 5 5 5 5 5 5 5 5 5 5 5 5 5n {< ,0,J,j,j::!$'$)&+ -k - -) &&-0 '     $|3355SI	6 I	6 I	6 I	6 I	6 I	6 I	6 I	6 I	6 I	6 I	6 I	6 I	6 I	6 I	6V  //s   'JA#H=H?D'	H'D++H.D+/)HF'	-FF'	FF'	FF'	H'F++H.F+/AH;HHHHHJH"	"J%H"	&A)JJ"Jc                 P   | j         | j                            t          t          f          5  |                     | j                   }|r7t          d| j        | j        j	        t          | | j         |                     ddd           n# 1 swxY w Y   | j                            t          t          f          5  |                                 }ddd           n# 1 swxY w Y   | j         -| j         j        |d<   | j         j        |d<   | j         j        |d<   t           |iS )zRuns offline evaluation step via `self.offline_evaluate()` and handling runner
        failures.

        Returns:
            The results dict from the offline evaluation call.
        N!on_offline_eval_runners_recreated)r  r   env_runner_indicesr1   num_healthy_offline_eval_runners8offline_runners_actor_manager_num_outstanding_async_reqs(num_remote_offline_eval_runners_restarts)r  r   r"  r}   rx   r  r    r  r   +callbacks_on_offline_eval_runners_recreatedr  ru   r?  r;  num_in_flight_async_reqsnum_remote_runner_restartsrb   )r;  r  r  s      r   r   z%Algorithm._run_one_offline_evaluation  s    )5&&0R'STT  <<2   !;*.. KS#&*-1-K/7                      ( \""F,N#OPP 	3 	30022L	3 	3 	3 	3 	3 	3 	3 	3 	3 	3 	3 	3 	3 	3 	3
 )5 .I 2
 .G J
 .I : #L11s$   ABBB?C  C$'C$c                 l   t          | j                  5  | j        | j        j        rY| j                            t          t          f          5  | 	                    | j                  }ddd           n# 1 swxY w Y   n?| j
        d         5  | 	                    | j                  }ddd           n# 1 swxY w Y   |r"|                     | j        | j        |           | j        j        rU| j                            t          t          f          5  |                     |          }ddd           n# 1 swxY w Y   nu| j
        t                   5  |                     |          }ddd           n# 1 swxY w Y   | j
        t                                       | j        t"                              | j        T| j                                        |d<   | j                                        |d<   | j                                        |d<   ddd           n# 1 swxY w Y   t*          |iS )a@  Runs evaluation step via `self.evaluate()` and handling worker failures.

        Args:
            parallel_train_future: In case, we are training and avaluating in parallel,
                this arg carries the currently running ThreadPoolExecutor object that
                runs the training iteration. Use `parallel_train_future.done()` to
                check, whether the parallel training job has completed and
                `parallel_train_future.result()` to get its return values.

        Returns:
            The results dict from the evaluation call.
        Nrestore_eval_workersr  r  r  (actor_manager_num_outstanding_async_reqsnum_remote_worker_restarts)r   r&  r   r   r  r   r"  r}   rw   r  r  r  r   ra   ro  push_units_processedr!  rl   rZ  r  r  rb   )r;  r  r  r  s       r   r  zAlgorithm._run_one_evaluation  s     &d&KLL -	L -	L)5;A X..!?@  X X $(#;#;D<V#W#WX X X X X X X X X X X X X X X
 &<= X X#'#;#;D<V#W#WX X X X X X X X X X X X X X X  AA#5)-)C4< B    {= \**F4N+OPP  #'==.C $1 $ $L              
 \"<=  #'==.C $1 $ $L               78MMN#QR   )5 .IIKK )
 .GGII >
 .IIKK 0W-	L -	L -	L -	L -	L -	L -	L -	L -	L -	L -	L -	L -	L -	L -	L^ #L11s   :H"A6*H"6A:	:H"=A:	>H"B6*H"6B:	:H"=B:	>AH"D:.H":D>	>H"D>	H"E;/H";E?	?H"E?	BH""H&)H&c                 N    t           j                                        5 } j        j        r|                     fd          }n|                     fd          }                     |          }|                                \  }}ddd           n# 1 swxY w Y   |||fS )a8  Runs one training iteration and one evaluation step in parallel.

        First starts the training iteration (via `self._run_one_training_iteration()`)
        within a ThreadPoolExecutor, then runs the evaluation step in parallel.
        In auto-duration mode (config.evaluation_duration=auto), makes sure the
        evaluation step takes roughly the same time as the training iteration.

        Returns:
            A tuple containing the training results, the evaluation results, and
            the `TrainIterCtx` object returned by the training call.
        c                  ,                                      S rY  )r  rV  s   r   r  zRAlgorithm._run_one_training_iteration_and_evaluation_in_parallel.<locals>.<lambda>  s    D<<>> r   c                  ,                                      S rY  )r  rV  s   r   r  zRAlgorithm._run_one_training_iteration_and_evaluation_in_parallel.<locals>.<lambda>  s    DJJLL r   r  N)
concurrentfuturesThreadPoolExecutorr   r  submitr  r  )r;  executorr  evaluation_resultsr  r)  s   `     r   r  z@Algorithm._run_one_training_iteration_and_evaluation_in_parallel  s    2244 	K{= (0>>>>) )%% )1LLLL) )% "&!9!9&; ": " " -B,H,H,J,J)M>%	K 	K 	K 	K 	K 	K 	K 	K 	K 	K 	K 	K 	K 	K 	K( 0.@@s   A+BBBc                     t          | j        j        j                  dk    sJ | j        j        pd}di i}| j                                        D ]*\  }}|                    | j	        |          |d         |<   +|S )a6  Runs offline evaluation via `OfflineEvaluator.estimate_on_dataset()` API.

        This method will be used when `evaluation_dataset` is provided.
        Note: This will only work if the policy is a single agent policy.

        Returns:
            The results dict from the offline evaluation call.
        rs  rI  )n_parallelism)
r  r   rj  rL  r   r  r  r  estimate_on_datasetr  )r;  parallelismr*  evaluator_nameoffline_evaluators        r   rQ  z/Algorithm._run_offline_evaluation_old_api_stack  s     4(9DEEJJJJ,GL1 6;151G1M1M1O1O 	 	-N- "55') 6   !!78  $#r   r  r   c                 J    |j         o|j         }| o|j        dk    p|j        S )a  Determines whether we need to create evaluation workers.

        Returns False if we need to run offline evaluation
        (with ope.estimate_on_dastaset API) or when local worker is to be used for
        evaluation. Note: We only use estimate_on_dataset API with bandits for now.
        That is when ope_split_batch_by_episode is False.
        TODO: In future we will do the same for episodic RL OPE.
        r   )r   r  r  r  )r   r  run_offline_evaluations      r   r  z/Algorithm._should_create_evaluation_env_runners,  sE     5 ;:: 	 *) 
2Q6 /.	
r   c                 *    |j         dup
|j        dk    S )z@Determines whether we need to create offline evaluation workers.Nr   )r  r  )r   r  s     r   r  z3Algorithm._should_create_offline_evaluation_runnersB  s$    
 34? 83a7	
r   c                ~   t          | j                  5  | j        rt          d          |                                }|                    t          i                               t          d          |t          <   |rHt          |t                    rt          |          dk    r	t          |v sJ |                    |           | j        rY| j                                        | j                                        d|t           <   d| j                                        i|d<   d d d            n# 1 swxY w Y   |S )Na  `Algorithm._timers` is no longer supported on the new API stack! Instead, use `Algorithm.metrics.log_time([some key (str) or nested key sequence (tuple)])`, e.g. inside your custom `training_step()` method, do: `with self.metrics.log_time(('timers', 'my_block_to_be_timed')): ...`r   rs  )r  r  r  r   )r   r'  r  r   r   r  r]   rm   r   r  r  rb   r  r   rZ  r  rc   r  )r;  r  r  r+  s       r   r#  z$Algorithm._compile_iteration_resultsK  s   %d&RSS *	 *	|  \   #0"4"4"6"6G 7>kk"B7 7c0!44 23
  -|T22L))Q..*l:::; |,,, $  -HHJJ -HHJJ2 2-. ?-FFHH/*+M*	 *	 *	 *	 *	 *	 *	 *	 *	 *	 *	 *	 *	 *	 *	X s   DD22D69D6c                    t          d| j        |j        t          | |||j                             t          d| j        t          | |||j                             d S )Non_env_runners_recreated)r  r   r  is_evaluationr1  on_workers_recreated)r  r  
worker_idsr  )r2  r   )r    r  "callbacks_on_env_runners_recreatedr  in_evaluation)r;  r   r   r  s       r   r  z2Algorithm._make_on_env_runners_recreated_callbacksz  s     	&"n!'!J!1#>$2	  	
	
 
	
 
	
 
	
 	""n+6$2	  		
 		
 		
 		
 		
 		
r   c           
          | j         j        rKt          |           j         d| j         j         d| j         j         d| j         j         d| j         j         d
S t          |           j        S )Nz(env=z; env-runners=z; learners=z; multi-agent=))r   r  r   r  r  r  r  r  rV  s    r   __repr__zAlgorithm.__repr__  s    ;3 		'::&  { 6:k6Q  K4   ${9   ::&&r   c                 ,    | j         r| j         j        S dS )z>The local EnvRunner instance within the algo's EnvRunnerGroup.N)r   rj  rV  s    r   r  zAlgorithm.env_runner  s        	:(99tr   c                 ,    | j         r| j         j        S dS )zIThe local EnvRunner instance within the algo's evaluation EnvRunnerGroup.N)r   rj  rV  s    r   r[  zAlgorithm.eval_env_runner  s      % 	?->>tr   c                    t          t          j        |d                    t          t          j        t	          |d                              | j        j        }|t          vrd}t          t          j        |           dS )zkRecord the framework and algorithm used.

        Args:
            config: Algorithm config dict.
        r  r  USER_DEFINEDN)	r   r   RLLIB_FRAMEWORKRLLIB_NUM_WORKERSr  r   r  ALL_ALGORITHMSRLLIB_ALGORITHM)r;  r   algs      r   r  zAlgorithm._record_usage  sp     	v5vk7JKKKv7VDU=V9W9WXXXn%n$$ Cv5s;;;;;r   export_formatsc           	         t          j        |           i }t           j        |v rNt          j                            |t           j                  }|                     |           ||t           j        <   t           j        |v rNt          j                            |t           j                  }|                     |           ||t           j        <   t           j	        |v rpt          j                            |t           j	                  }|                     |t          t          j        dd                               ||t           j	        <   |S )N
ONNX_OPSET11)rW  )r   r  
CHECKPOINTr  r   rr  r^  MODELrZ  ONNXr   getenv)r;  r/  rV  exportedr   s        r   _export_modelzAlgorithm._export_model  s    	n---"n447<<
L,CDDD))$///04H\,-//7<<
L,>??D$$T***+/H\'(..7<<
L,=>>D$$TBIlD4Q4Q0R0R$SSS*.H\&'r   c                    | j         j        rt          d          t          |           | j                                         d}t          | d          r!| j        j                                        |d<   t          | d          r| j        | j	        j
        |d<   | j        |d<   | j        6| j                             d	          r| j                                        |d
<   | j        |t          <   |S )a  Returns current state of Algorithm, sufficient to restore it from scratch.

        Returns:
            The current state dict of this Algorithm, which can be used to sufficiently
            restore the algorithm from scratch without any other information.
        zgAlgorithm.__getstate__() not supported anymore on the new API stack! Use Algorithm.get_state() instead.)rS  r   r   r  r   Neval_policy_mapping_fncountersstore_buffer_in_checkpointsr  )r   r  r  r   r
  r  r   rj  r   r[  r   r!  r  r  r  r   r  s     r   rk  zAlgorithm.__getstate__  s    ;9 	5    $Dzzk++--
 

 4+,, 	Q"3DNNPPE(O
 D122	U*6.2.B.TE*+ !Nj #/DKOO)5
 5
/ ,0+C+M+M+O+OE'( %)$; !r   c                    | j         j        rt          d          t          | d          rd|v r|d         r| j                            |d                    t          j        |d                   | j        	                    fdd           | j
        r5|                    d          fd}| j
        	                    |           | j        y| j         j        r@d
|v r!| j                            |d
                    nHt                              d           n-d
|v r)t!          d          rt                              d           d|v r|d         | _        t$          |v r|t$                   | _        d	S d	S )zSets the algorithm to the provided state.

        Args:
            state: The state dict to restore this Algorithm instance to. `state` may
                have been returned by a call to an Algorithm's `__getstate__()` method.
        zgAlgorithm.__setstate__() not supported anymore on the new API stack! Use Algorithm.set_state() instead.r   r  c                 R    |                      t          j                            S rY  r  )r  remote_state_refs    r   r  z(Algorithm.__setstate__.<locals>.<lambda>  s    !++cg.>&?&?@@ r   FrQ  r:  c                     |                      t          j                             |                                d S rY  )r  r  r  set_policy_mapping_fn)r  _eval_policy_mapping_fnr?  s    r   _setup_eval_workerz2Algorithm.__setstate__.<locals>._setup_eval_worker  s=    KK(8 9 9::: ++,CDDDDDr   Nr  zI`store_buffer_in_checkpoints` is True, but no replay data found in state!-no_store_buffer_in_checkpoints_but_data_foundzL`store_buffer_in_checkpoints` is False, but some replay data found in state!r;  )r   r  r  r  r  r  r  r  r   r  r   r  r  r<  r  r_  r   r!  r   r  )r;  r   rC  rB  r?  s      @@r   rT  zAlgorithm.__setstate__  s    ;9 	5   4+,, 	RU1B1BuX1BO%%eHo666"wuX77!44@@@@!& 5    ) R*/))4L*M*M'E E E E E E *==>PQQQ #/ {6 (E11,66u=R7STTTTNN/    '%//H?5 5/ +  
 ":.DN&&#$67DOOO '&r   )r   r   r   r   c          	      f   | d         dk    rt          d| d          d          d}|                     d          dk    rt          d	          }t          | d
         d          5 }|,|                                }|                    |d          }nt          j        |          }ddd           n# 1 swxY w Y   | d         t          j	        d          k    r|                    d          |                    d          rn|d         }t          n|d                   fd|d                                         D             |d<   t          |d         t                    r-t          |d                   pt          |d                   |d<   |d                                         }	t          |	t"                    r|	                    |d                   }
n!t&                              |	|d                   }
|
j        }t          |t          t,          t.          f          rfd|D             }n fd|                                D             } |
j        d&||d|d|ini  |
|d<   i |d<   D ]}t2          j                            | d         d|d| d         dk    rdndz             }t2          j                            |          st          d | d!| d"          t          |d          5 }||                    |          |d         |<   nt          j        |          |d         |<   ddd           n# 1 swxY w Y   |||d<   ||d#         t:          k    r||d#<   |d         j        r)t2          j                            | d         d$          |d%<   |S )'a  Converts a checkpoint info or object to a proper Algorithm state dict.

        The returned state dict can be used inside self.__setstate__().

        Args:
            checkpoint_info: A checkpoint info dict as returned by
                `ray.rllib.utils.checkpoints.get_checkpoint_info(
                [checkpoint dir or AIR Checkpoint])`.
            policy_ids: Optional list/set of PolicyIDs. If not None, only those policies
                listed here will be included in the returned state. Note that
                state items such as filters, the `is_policy_to_train` function, as
                well as the multi-agent `policy_ids` dict will be adjusted as well,
                based on this arg.
            policy_mapping_fn: An optional (updated) policy mapping function
                to include in the returned state.
            policies_to_train: An optional list of policy IDs to be trained
                or a callable taking PolicyID and SampleBatchType and
                returning a bool (trainable or not?) to include in the returned state.

        Returns:
             The state dict usable within the `self.__setstate__()` method.
        r   r   zu`checkpoint` arg passed to `Algorithm._checkpoint_info_to_algorithm_state()` must be an Algorithm checkpoint (but is z)!Nr   r   Tr*  rd  rbF)rawr   r   r  r   c                 $    i | ]\  }}|v 	||S r  r  )r_  rt  filterr   s      r   rf  zAAlgorithm._checkpoint_info_to_algorithm_state.<locals>.<dictcomp>  s4     ' ' 'C*$$ V$$$r   r  rS  r   c                     h | ]}|v |	S r  r  )r_  rt  r   s     r   	<setcomp>z@Algorithm._checkpoint_info_to_algorithm_state.<locals>.<setcomp>  s#    QQQsj?P?P?P?P?Pr   c                 $    i | ]\  }}|v 	||S r  r  )r_  rt  specr   s      r   rf  zAAlgorithm._checkpoint_info_to_algorithm_state.<locals>.<dictcomp>  s0          "+#tsjGXGXCGXGXGXr   )r   r   r   r  r_  r   zpolicy_state.msgpckpklzOGiven checkpoint does not seem to be valid! No policy state file found for PID=z. The file not found is: r   is_policy_to_trainr  rv  r  )r   r  rT   rl  readunpackbrm  loadr   r   r  r  r   r  r   r   r  r   r  r   r  r   r  tupler/  r  r   rr  isfiler   r  )r   r   r   r   r   rs  datar   worker_stater<  
new_confignew_policiesrt  policy_state_files    `            r   r   z-Algorithm._checkpoint_info_to_algorithm_stateA  s   H 6"k11L0?0GL L L   x((I55(t444G/,/66 	'!"vvxx%88A	' 	' 	' 	' 	' 	' 	' 	' 	' 	' 	' 	' 	' 	' 	' 01GOE4J4JJJ		(##/		(## 0 !?L (4

,|:T J' ' ' '#/	#:#@#@#B#B' ' 'L# % 12C88 A ,<+,, , ,A&u->'?@@ '( ##45HHJJN./:: +<<U8_MM

&>>"E(O 

 &.L,dE(:;; QQQQ|QQQ       /;/A/A/C/C      #J" %"3 
 )4 )*;<<   )E(O -/L)! L L$&GLL#$45##28#<	#I#IxxuV	% %! w~~&788 $G47G G2CG G G   +T22 La*=D\\!___5c::=C[^^_5c:	L L L L L L L L L L L L L L L !,4E01!-   459III5F12?7 	)+ 019* *E%& s%   (AB77B;>B;?MM	M	c                     |                     d          r|d                              d          rdS d|d         d         v r| j        j        |d         d<   t          t          |d                   S )a  Create a MultiAgentReplayBuffer instance if necessary.

        Args:
            config: Algorithm-specific configuration data.

        Returns:
            MultiAgentReplayBuffer instance based on algorithm config.
            None, if local replay buffer is not needed.
        r   no_local_replay_bufferNEpisodeReplayBufferr   "metrics_num_episodes_for_smoothing)r  r   r^  rY   r   )r;  r   s     r   r  z2Algorithm._create_local_replay_buffer_if_necessary  s     zz011 	V<R5S5W5W$6
 6
 	 F !F+A$B6$JJJ > )*4 <0F)GHHHr   c                    | j         t                   5  | j                            d          dk    r2t                                          st                                           i }d }t          |           5 }|	                    |          s| j         d         5  | 
                    | j                  }|r"|                     | j        | j        |           d d d            n# 1 swxY w Y   | j         t                   5  |                                 }d d d            n# 1 swxY w Y   |r|}|	                    |          d d d            n# 1 swxY w Y   d d d            n# 1 swxY w Y   ||fS )Nr  r  r  restore_workersr  )r  r~   r   r  r  r  r  r  r  r  r  r   r  r   r  )r;  r+  training_step_resultsr)  r  s        r   r  z3Algorithm._run_one_training_iteration_old_api_stack  s   \23 	8 	8{{++u44R=Q=Q=S=S4**,,,G$(!4((( 8N(445JKK 8&78  #'#;#;D<Q#R#R#  II'+{151F<D J   	               &9: E E040B0B0D0D-E E E E E E E E E E E E E E E - 8"7 )445JKK 88 8 8 8 8 8 8 8 8 8 8 8 8 8 8	8 	8 	8 	8 	8 	8 	8 	8 	8 	8 	8 	8 	8 	8 	80 &&s~   A%E+8#E?C&E&C**E-C*.ED$E$D((E+D(,EE+E	E+E	E++E/2E/c                   i }d|v r?|                     d          }|                     t          d            |x|d<   |t          <   |                     di           |d<   |                     di           |d<   t          |i|d<   |d d          }| j        j        t          |          z
  }|dk    r2| j        | d          |z   }t          |          | j        j        k    sJ | j                            |           | j        | j        j         d          | _        t          ||| j        j	                  |t          <   | j                                        |d<   | j                                        |d<   | j                                        |d<   t          t           t"          t$          fD ]}| j        |         ||<   |                                }	| j        j        d	k    rd|j        |t          d
z   <   |j        |t           d
z   <   |j        |	z  |t          dz   <   |j        |	z  |t           dz   <   | j        t                   |d<   nc|j        |t"          d
z   <   |j        |t$          d
z   <   |j        |	z  |t"          dz   <   |j        |	z  |t$          dz   <   | j        t"                   |d<   |d         |t0          <   | j        t                   |t2          <   |j        |t4          <   | j        t                   |d<   i }
| j                                        D ]r\  }}t;          |j        dz  d          |
d                    |          <   |                                 r+t;          |j!        d          |
d                    |          <   s|
|d<   i }| j                                        D ]
\  }}|||<   ||d<   |d         "                    |           |S )N
evaluationcustom_metricsepisode_mediar  r   r  r  r  rg  
_this_iter_throughput_per_sectimesteps_totalagent_timesteps_totali     z
{}_time_msz{}_throughputtimersr;  )#r   rb   r   r   r^  r  r"  r  r8   r  r]   r   rZ  r  r  rf   ri   rk   ro   r!  get_time_taken_secr  sampledtrainedrm   rg   ry   r  r  r  rM  r   has_units_processedmean_throughputr  )r;  r  r  r  r+  r  episodes_for_metricsmissingctime_taken_secrk  ktimerr;  counters                  r   r&  z2Algorithm._compile_iteration_results_old_api_stack  sc   
 ! ,,,,00>>L!!"4d;;;BNNGL!G,>$? %6$9$9:JB$O$O !#4#8#8"#M#M  ():;  2!!!4+@3D
 D
 
 Q;;#'#8'#CFX#X ());AB B B B 	$$%7888 $ 5[;;==!
 '9 K7'
 '
"# !<<>> 	!	

 !::<< 	6	

 !<<>> 	(	
 $#!!	
 	+ 	+A *GAJJ!4466;%66>F>NG+l:;>F>NG+l:; >1 +.CCD  >1 +.CCD *.8O)PG%&&<D<LG)L89<D<LG)L89 >1 ),AAB  >1 ),AAB *.8M)NG%& 3::K2L./48N#5
01
 4<3C/0+/>:Q+R'( **,, 	T 	THAu-25:3Da-H-HF<&&q))*((** T49%:OQR4S4S--a001" ...00 	" 	"JAw!HQKK&
x(((r   z`Algorithm.compute_single_action` should no longer be used. Get the RLModule instance through `Algorithm.get_module([module ID])`, then compute actions through `RLModule.forward_inference({'obs': [obs batch]})`.)rg  r   )prev_actionprev_rewardr  
input_dictr=  
full_fetchexploretimestepepisodeunsquash_actionclip_actionobservationrx  ry  r  rz  r{  r|  r}  r  r  c          
         || j         j        }n|| j         j        }d}|%||||
J |            |t          j                 }n|
J |            |                     |          }|t          d| d          |j        t                   }t          |t          j        t          t          f          sD	 t          j        |          }n.# t          $ r! t!          dt#          |           d          w xY w|rt%          |          dk    s
J d            |d         }|                                s|                                 |t          j        |i}n0|t          j        |t          j                 i}nt!          d	          t+          d
d
|          }|                    d
            ||g          d         }|j        t          j                 }|-||t          j        <   |                    ||	|
|          \  }}}n!|                    ||||||	|
|          \  }}}|rt3          j        ||j                  }n|rt3          j        ||j                  }|s|r|||fS |S )NzcProvide either `input_dict` OR [`observation`, ...] as args to `Algorithm.compute_single_action()`!z
PolicyID 'z9' not found in PolicyMap of the Algorithm's local worker!zObservation type z# cannot be converted to np.ndarray.rs  z/Only one preprocessor should be in the pipeliner   z2Either observation or input_dict must be provided.0)r  )rz  r|  r}  r~  )obsr   rx  ry  r  r|  r}  r~  )r   normalize_actionsclip_actionsr,   OBSr  r\  agent_connectorsr!   r   rL  ndarrayr  rT  asarray	Exceptionr   r   r  is_identityin_evalr   resetrV  compute_single_actionr   r  action_space_structr  )r;  r  r   rx  ry  r  rz  r=  r{  r|  r}  r~  r  r  err_msgr  pp_input_dictacdac_oactionextras                         r   r  zAlgorithm.compute_single_action  s   6 ""k;OO +2K; 	 !#''MM "M"
 %W[1KK**G***++>-Y - - -   $%=>+
D%'@AA 	 j55    #[(9(9 # # #  
  	5r77a<<<!R<<<AB>>## 5

*#*;"<KK+#*;
7;0G"HKK$L   -S#{CC$$$r3%yy|"i4!&1Jw{##)#?#?%!	 $@ $ $ FE55 $*#?#?''! $@ 	$ 	$ FE5  	Q 09STTFF 	Q ,VV5OPPF 	J 	5%''Ms   7C +C7z`Algorithm.compute_actions` should no longer be used. Get the RLModule instance through `Algorithm.get_module([module ID])`, then compute actions through `RLModule.forward_inference({'obs': [obs batch]})`.)
rx  ry  r  r=  r{  r|  r}  r  unsquash_actionsr  observationsr  r  c       
           ! || j         j        }n|| j         j        }|d u}|                     |          }g g }}|                                D ]\  }}| j        j        }|j                            |          !|j        |         	                    |          }n|} |j
        |         |d          }|                    |           |~||v r|                    ||                    |                    |                                           t          j        |          }|g }n"t          t!          |           }d |D             }t"          j        |i}|||t&          j        <   |||t&          j        <   |r||t"          j        <   t/          |          D ]\  }}||d| <   |                    |||	|
          \  }}}t3          j        |          }i }t!          ||          D ]C\  }}|rt3          j        ||j                  }n|rt3          j        ||j                  }|||<   Di } t/          |          D ]\  !}!fd|D             | |<   |s|r|| |fS |S )NF)r  c                 6    g | ]}t          j        |          S r  )rL  stack)r_  ss     r   r`  z-Algorithm.compute_actions.<locals>.<listcomp>0  s     000QRXa[[000r   	state_in_)rz  r|  r}  r  c                      g | ]
}|         S r  r  )r_  r  idxs     r   r`  z-Algorithm.compute_actions.<locals>.<listcomp>O  s    )A)A)AQ!C&)A)A)Ar   )r   r  r  r  r  r   rj  preprocessorsr  	transformr  ra  get_initial_staterL  r  r  zipr,   r  rD   PREV_ACTIONSPREV_REWARDSINFOSr  compute_actions_from_input_dictr   unbatchr  r  r  )"r;  r  r   rx  ry  r  r=  r{  r|  r}  r  r  r  state_definedr  filtered_obsfiltered_stateagent_idobr  preprocessedfiltered	obs_batchrz  r  r  actionsstatesinfossingle_actionsrE  aunbatched_statesr  s"                                    @r   compute_actionszAlgorithm.compute_actions  s   . ##{<!;3LT)++')2n(..00 	B 	BLHb*;F#''	22>%3I>HHLL!0v~i0eLLLH)))}U""%%eHo6666%%f&>&>&@&@AAAAH\**	=EEn-..E00%000Ek9-
"3>J{/0"3>J{/0 	-(,Jw}%e$$ 	, 	,DAq*+J1''!'!G!G!	 "H "
 "
 %,W55,77 	 	FC K/63MNN K+Av/IJJGCLL&|44 	B 	BMC)A)A)A)A&)A)A)AX&& 	J 	,e33Nr   zAlgorithm.restore_env_runners)r   r   c                     d S rY  r  )r;  r  r   s      r   r`  zAlgorithm.restore_workersV  s    r   zAlgorithm.env_runner_groupc                     | j         S rY  )r   rV  s    r   r  zAlgorithm.workersZ  s     $$r   zAlgorithm.eval_env_runner_groupc                     | j         S rY  )r   rV  s    r   rF  zAlgorithm.evaluation_workersb  s     ))r   rY  )NNN)r   N)NN)NNNF)r  r   )r  r  r  __doc__r   r   r   __annotations__r   r   r   r5   r   r   r   r  r  r  r  r]   r`   rb   rm   rp   rr   r_   _progress_metricsMETADATA_FILE_NAMESTATE_FILE_NAMEclassmethodrO   rR   r   r   r  r   r   r   r   r   r   r   r  r   rN   r   r:  rW  rL   r  r   r   r\  rM   r   r  r   rA   r  r   r,  r?  r  r  r  ro  r   rX  r=  rY  r]  r<  r^  r  r   r  r  r+   r   r0   r"  r1   r
   r   r   r/   r  r  rK   rC   r  r  rA  rD  r  rm  Spacer   r   rK  rO  staticmethodr   rZ  r^  rw  r}  r   r
  r  r  r   r  rz  r  r  r   r   r  rI   rP  r   r   r  r4   r$  r   r  r   floatr  r  rJ   r3   rh  r  r   r  r  rQ  r  r  r#  r  r%  propertyr  r[  r  r8  rk  rT  r   r   r  r  r&  r   r   r   rD   r   r  r  r`  r  rF  __classcell__)r   s   @r   r   r      sh           )-FH_%,,, (,GXm$+++ 26h~.555 7;8N3::: /3M8N+222,0L(=)000 #  & 	-) >zJ 55 355JJ 2JJ5HJJ)+)+ "22 022 1'OXn 9=e+ 6:PT #e+ e+ e+CO$e+ 45e+ Z12e+ $Hgy-A8-K$LMe+ $8$(H_$=>DEG
e+" 
#e+ e+ e+ e+ e+  [e+N  -19=	r
 r
)r
 !"f*!56	r
 r
 r
 r
 r
 Yr
hH
 H
 H
T $!? ! ! ! [ $#! $	:S	 	: 	: 	: $#	: ;Xin
O n
 n
 n
 n
  ;:n
` $ 
$v,	   [ $# XiCj C C C CJ 6; 6; Y6;p  RVJ J'
(:(MNJ 
J J J YJX4U:sC;O5P 4 4 4 46
 
 
">G >G >G@jG jG jGXR
 R
 R
 R
hEG EG EGN $eN etCy e e e $#eN $, ,c , , , $#,\ $G G G $#GR /@  H XhEW    Y2  ,0KOJN $#'(,F$ F$ F$F$ "F$
 #4.F$ )11G(HF$ '//F&GF$ F$ !F$ "&F$ 
F$ F$ F$ YF$P 
 LPJN%)(,-1i$ i$ i$i$ )11G(H	i$
 '//F&Gi$ #i$ "&i$ '+i$ 
&	i$ i$ i$ Yi$V /@ 5 5H 5V 5 5 5 [5 
5 
5HT(^$< 
5 
5 
5 
5 Y
5 D4$#7 D D D YD(  .2#'	{> 9=37OS.2PT #'(,.2+(-{> {> {>{> T&\*{>  	{> $CJ$45{> sz/0{> 0JJKL{> {+{> $Hgy-A8-K$LM{> $8$(H_$=>DEG
{>" !#{>$ "&%{>& l+'{>. 
&	/{> {> {> [{>z  0?U FJ (,-1+-?U ?U ?U?U $HgY-@$AB	?U
 $8$(H_$=>DEG
?U "&?U '+?U  
!?U ?U ?U [?UB $ ;    \ [@  0"	B BB B sm	B
 
B B B [B"  0- -- - 
	- - - [-, Xi[Cc [Cd [C [C [C [Cz Xic d    . Xn =AO AE	O O OU3
3#789O !sJsO';!<=	O 
O O O Ob Xn78y 78T 78 78 78 78r XntE#?O:O4P/Q    4 Xn
%tCH~0E*F 
 
 
 
 Xn1 1 1 1 1f Xi+ + + + + +* Xi. . . .( $Xi3
?,FFG3
	y//	03
 3
 3
  [ $#3
j   \ VS'4/0V:IV	x}j(	)V V V \Vp "  	
   
   @ Xi

5:M)M#N 

SV 

 

 

  [

 Xi #'*.#'#( h ! C=	
 ! 
     , 
 26	)
 )
$)
 ,)
 !)	)

 
)
 )
 )
 [)
V ' 
 t    _ \"W0U:~3M-N W0 W0 W0 W0r.2 .2 .2d RV?2 ?2'
(:(MN?2 
?2 ?2 ?2 ?2B"A	z:~5	6"A "A "A "AH$ $ $, 
 
 
 [
* 
 
 
 [
- - -^
 
 
<
' 
' 
'   X   X< < < "3i58	c3h   [& +d + + + [+Z <8 <8 <8 [<8|  6:PT V V VV Z12V $Hgy-A8-K$LM	V
 $8$(H_$=>DEG
V 
V V V \ [Vp I0I	(	)I I I [I: ' ' ['6 t t [tl ZN 	   3726h
 37'+&*,0/ "&"&*.&*h h h./h -./h
 ./h e_h {#h [)h h h $h 3-h "$h d^h  
Z 0$sJ2GGH	J
!h h h  [hT ZF 	   37P
 3726&*/ "&"&+/'+P P P&P -./P
 ./P ./P {#P P P $P 3-P #4.P tnP P P  [Pd Z34@@@  A@ Z(   % % X	 
% Z-   * * X	 
* * * * *r   r   c                   8    e Zd ZdefdZd Zd ZdefdZd Z	dS )	r  r  c                 0    || _         d | _        d | _        d S rY  )r  
time_start	time_stop)r;  r  s     r   r:  zTrainIterCtx.__init__l  s    	r   c                    d| _         t          j                    | _        d| _        d| _        | j        j        j        r| j        j        	                    t          t          fd          | _        | j        j        	                    t          t          t          fd          | _        t#          | j        j        	                    t          t$          fi                                                     | _        t#          | j        j        	                    t          t*          fi                                                     | _        np| j        j        t0                   | _        | j        j        t2                   | _        | j        j        t4                   | _        | j        j        t6                   | _        | j        j        j        | _        | S )Nr  r   r  )failuresr  r  rm  rn  r  r   r  r   r  r]   rm   init_env_steps_sampledrd   r[   rp   init_env_steps_trainedr  rg   r  init_agent_steps_sampledrj   init_agent_steps_trainedr!  rk   ro   rf   ri   -num_consecutive_env_runner_failures_tolerancefailure_tolerancerV  s    r   	__enter__zTrainIterCtx.__enter__q  s    )++9> 	Y*.)*;*@*@#%CDa +A + +D' +/)*;*@*@ +/MN +A + +D' -0	!&&')IJTV '  &((- -D)
 -0	!&&$&FGQS '  &((- -D)) +/)*=>S*TD'*.)*=>S*TD',0I,?@W,XD),0I,?@W,XD)IJ 	 r   c                 6    t          j                     | _        d S rY  )r  r  )r;  r  s     r   __exit__zTrainIterCtx.__exit__  s    r   r   c                      | j         | j        z
  S )z4Returns the time we spent in the context in seconds.)r  r  rV  s    r   rl  zTrainIterCtx.get_time_taken_sec  s    ~//r   c                 Z   |dv r:| xj         dz  c_         | j         | j        k    rt          d| j         d          dS | j        j        j        rD| j        j        j        dk    rt          | j        j        	                    t          t          fi                                                     | j        z
  | _        t          | j        j        	                    t          t           fi                                                     | j        z
  | _        n!| j        j        	                    t          t&          fd          | j        z
  | _        | j        j        	                    t          t*          t,          fd          | j        z
  | _        n| j        j        j        dk    rI| j        j        t2                   | j        z
  | _        | j        j        t4                   | j        z
  | _        nH| j        j        t6                   | j        z
  | _        | j        j        t8                   | j        z
  | _        | j        j        j        }| j        j        j        }| j        j        j        }|rtA          j                     | j!        z
  |k    r|r| j        |k    r|r| j        |k    rd	S dS )
N)NFrs  z9More than `num_consecutive_env_runner_failures_tolerance=z'` consecutive worker failures! Exiting.Frg  r  r   T)"r  r  r  r  r   r  r  r  r   r  r]   rg   r  r  rm  rd   rj   r  rn  rm   r  r[   rp   r  r!  rf   ri   rk   ro   min_time_s_per_iteration"min_sample_timesteps_per_iteration!min_train_timesteps_per_iterationr  r  )r;  r+  min_tmin_sample_tsmin_train_tss        r   r  zTrainIterCtx.should_stop  s   m##MMQMM}t555"-     5 9> 4	y.-??	)../1QR$& /   !&((	  34  	)..,.NO$& /   !&((	  34  I%**+-KLVW +   12  I%**(+7UV ! +   1	2  y.-??I'(?@34 
 I'(?@34  I'(=>12 
 I'(=>12 
 	 9	(Ky'I 	)++75@@" A&*lm&C&C! 'D%)\\%A%A45r   N)
r  r  r  r   r:  r  r  r  rl  r  r  r   r   r  r  k  s}        Y    
# # #J% % %0E 0 0 0 0T T T T Tr   r  (  r  r   r  r  importlib.metadataro  r  r  ri  r  r  r  collectionsr   r   typingr   r   r   r   r	   r
   r   r   r   r   r   r   	gymnasiumr  numpyrL  
pyarrow.fspyarrowrb  	packagingr   r  ray.cloudpicklerc  rm  ray._common.deprecationr   r   r   ray._common.usage.usage_libr   r   	ray.actorr   %ray.rllib.algorithms.algorithm_configr   ray.rllib.algorithms.registryr   r,  ray.rllib.algorithms.utilsr   r   r   r   r   ray.rllib.callbacks.utilsr    &ray.rllib.connectors.agent.obs_preprocr!   *ray.rllib.connectors.connector_pipeline_v2r"   ray.rllib.corer#   r$   r%   r&   r'   r(   r)   r*   r+   ray.rllib.core.columnsr,   ray.rllib.core.rl_moduler-   (ray.rllib.core.rl_module.multi_rl_moduler.   r/   "ray.rllib.core.rl_module.rl_moduler0   r1   ray.rllib.envr2   ray.rllib.env.env_contextr3   ray.rllib.env.env_runnerr4   ray.rllib.env.env_runner_groupr5   ray.rllib.env.utilsr6   ray.rllib.evaluation.metricsr7   r8   ray.rllib.execution.rollout_opsr9   ray.rllib.offliner:   ray.rllib.offline.estimatorsr;   r<   r=   r>   r?   #ray.rllib.offline.offline_evaluatorr@   ray.rllib.policy.policyrA   rB   ray.rllib.policy.sample_batchrC   rD   ray.rllib.utilsrE   rF   rG   ray.rllib.utils.actor_managerrH   ray.rllib.utils.annotationsrI   rJ   rK   rL   rM   rN   rO   ray.rllib.utils.checkpointsrP   rQ   rR   rS   rT   ray.rllib.utils.debugrU   ray.rllib.utils.errorrV   rW   ray.rllib.utils.frameworkrX   ray.rllib.utils.from_configrY   ray.rllib.utils.metricsrZ   r[   r\   r]   r^   r_   r`   ra   rb   rc   rd   re   rf   rg   rh   ri   rj   rk   rl   rm   rn   ro   rp   rq   rr   rs   rt   ru   rv   rw   rx   ry   rz   r{   r|   r}   r~   r   $ray.rllib.utils.metrics.learner_infor   &ray.rllib.utils.metrics.metrics_loggerr   #ray.rllib.utils.metrics.ray_metricsr   r   r   ray.rllib.utils.replay_buffersr   r   $ray.rllib.utils.runners.runner_groupr   ray.rllib.utils.serializationr   r   ray.rllib.utils.spacesr   ray.rllib.utils.typingr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   ray.train.constantsr   ray.tuner   #ray.tune.execution.placement_groupsr   ray.tune.experiment.trialr   ray.tune.loggerr   r   ray.tune.registryr   r   r   ray.tune.resourcesr   ray.tune.resultr   ray.tune.trainabler   ray.utilr   ray.util.metricsr   r   ray.util.timerr   $ray.rllib.core.learner.learner_groupr   r  r   r  r  tfvr  r  r  r   r  r  r   r   <module>r      s
                      				  				   # # # # # #                                                     



                     
 G F F F F F F F ! ! ! ! ! ! A A A A A A T T T T T T              4 3 3 3 3 3 K K K K K K J J J J J J
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 + * * * * * 7 7 7 7 7 7        F E E E E E E E * * * * * * 0 0 0 0 0 0 . . . . . . 9 9 9 9 9 9 0 0 0 0 0 0        H G G G G G 4 4 4 4 4 4              A @ @ @ @ @ 6 6 6 6 6 6 6 6 H H H H H H H H B B B B B B B B B B C C C C C C                               B A A A A A J J J J J J J J 3 3 3 3 3 3 3 3 3 3 3 3' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' 'P > = = = = = @ @ @ @ @ @         
 P O O O O O O O < < < < < < L L L L L L L L . . . . . .                                       ( 5 4 4 4 4 4       E E E E E E 2 2 2 2 2 2 1 1 1 1 1 1 1 1 N N N N N N N N N N ( ( ( ( ( ( . . . . . . ( ( ( ( ( (       / / / / / / / / ! ! ! ! ! ! ;AAAAAA::::::}R		8	$	$ TF* TF* TF* TF* TF*	 TF* TF* TF*nLF F F F F F F F F Fr   