
    &`i0                        d dl Z d dlZd dlmZmZmZmZmZ d dlZ	d dl
Z
d dlZd dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZmZ d d	lmZ d d
lmZ d dlmZmZ d dl m!Z!m"Z" d dl#m$Z$ erd dl%m&Z&  ej'        d          Z( e            \  Z)Z*Z+dZ,dZ-dZ. e"d           G d dee j/                              Z0dS )    N)TYPE_CHECKINGAnyDictOptionalTuple)COMPONENT_RL_MODULE)StepFailedRecreateEnvError)FaultAwareApply)update_global_seed_if_necessary)try_import_tf)ENV_RESET_TIMERENV_STEP_TIMER)MetricsLogger)convert_to_torch_tensor)	StateDict
TensorType)DeveloperAPI	PublicAPI)Counter)AlgorithmConfigz	ray.rllibenv_reset_failureenv_step_failurenum_env_step_failuresalpha)	stabilityc            	           e Zd ZdZd fdZej        d             Zd Zd Z	ej        de
fd	            Zde
fd
Zedeej        eef         fd            Zej        deeeej        ej        f         f         fd            ZddZddZddddee         dee         dee
e
f         fdZd ZdefdZ de!deddfdZ" xZ#S )	EnvRunnera  Base class for distributed RL-style data collection from an environment.

    The EnvRunner API's core functionalities can be summarized as:
    - Gets configured via passing a AlgorithmConfig object to the constructor.
    Normally, subclasses of EnvRunner then construct their own environment (possibly
    vectorized) copies and RLModules/Policies and use the latter to step through the
    environment in order to collect training data.
    - Clients of EnvRunner can use the `sample()` method to collect data for training
    from the environment(s).
    - EnvRunner offers parallelism via creating n remote Ray Actors based on this class.
    Use `ray.remote([resources])(EnvRunner)` method to create the corresponding Ray
    remote class. Then instantiate n Actors using the Ray `[ctor].remote(...)` syntax.
    - EnvRunner clients can get information about the server/node on which the
    individual Actors are running.
    configr   c                   |                     d          | _        d| _        |                    d          | _        |                    d| j        j                  | _        d| _        t          |j	        d          | _
        t                                                       t          rI| j        j        dk    s|j        r2t                                          st                                           d| _        | j        j        8t)          | j        j        | j        pdz   d	| j        j        z  z             | _        t-          | j        j        | j        
           t/          ddd          | _        | j                            d| j        j        i           t/          ddd          | _        | j                            d| j        j        i           dS )zInitializes an EnvRunner instance.

        Args:
            config: The AlgorithmConfig to use to setup this EnvRunner.
            **kwargs: Forward compatibility kwargs.
        F)copy_frozenr   worker_indexnum_workersN)stats_cls_lookuproottf2g    .A)	frameworkseed)rllib_env_runner_num_try_env_step_counterz8Number of env.step() calls attempted in this Env Runner.)rllib)namedescriptiontag_keysr)   .rllib_env_runner_num_env_steps_sampled_counterz/Number of env steps sampled in this Env Runner.)copyr   num_env_steps_sampled_lifetimegetr!   num_env_runnersr"   envr   r#   metricssuper__init__tf1framework_strenable_tf1_exec_eagerlyexecuting_eagerlyenable_eager_execution_seedr'   intin_evaluationr   r   _metrics_num_try_env_stepset_default_tags	__class____name___metrics_num_env_steps_sampled)selfr   kwargsr@   s      l/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/rllib/env/env_runner.pyr5   zEnvRunner.__init__5   s    (.{{u{'E'E./+
 "(N!;!; &

=$+:U V V&3#4'
 '
 '

 	
 	)*e33v7U3))++ 4 &&((( %)
;' $)+ 224 DJ 	(k/	
 	
 	
 	
 *1<R*
 *
 *
&
 	&77dn-.	
 	
 	
 /6AI/
 /
 /
+
 	+<<dn-.	
 	
 	
 	
 	
    c                     dS )aH  Checks that self.__init__() has been completed properly.

        Useful in case an `EnvRunner` is run as @ray.remote (Actor) and the owner
        would like to make sure the Ray Actor has been properly initialized.

        Raises:
            AssertionError: If the EnvRunner Actor has NOT been properly initialized.
        N rC   s    rE   assert_healthyzEnvRunner.assert_healthy{         rF   c                     dS )a  Creates the RL environment for this EnvRunner and assigns it to `self.env`.

        Note that users should be able to change the EnvRunner's config (e.g. change
        `self.config.env_config`) and then call this method to create new environments
        with the updated configuration.
        It should also be called after a failure of an earlier env in order to clean up
        the existing env (for example `close()` it), re-create a new one, and then
        continue sampling with that new env.
        NrH   rI   s    rE   make_envzEnvRunner.make_env   s	     	rF   c                     dS )a(  Creates the RLModule for this EnvRunner and assigns it to `self.module`.

        Note that users should be able to change the EnvRunner's config (e.g. change
        `self.config.rl_module_spec`) and then call this method to create a new RLModule
        with the updated configuration.
        NrH   rI   s    rE   make_modulezEnvRunner.make_module   s	     	rF   returnc                     dS )a`  Returns experiences (of any form) sampled from this EnvRunner.

        The exact nature and size of collected data are defined via the EnvRunner's
        config and may be overridden by the given arguments.

        Args:
            **kwargs: Forward compatibility kwargs.

        Returns:
            The collected experience in any form.
        NrH   )rC   rD   s     rE   samplezEnvRunner.sample   rK   rF   c                     dS )zReturns metrics (in any form) of the thus far collected, completed episodes.

        Returns:
            Metrics of any form.
        NrH   rI   s    rE   get_metricszEnvRunner.get_metrics   	     	rF   c                     |                                  }|                     t                    }|                                 }t	          j        |          ||fS )a  Convenience method for fast, async algorithms.

        Use this in Algorithms that need to sample Episode lists as ray.ObjectRef, but
        also require (in the same remote call) the metrics and the EnvRunner states,
        except for the module weights.
        )not_components)rR   	get_stater   rT   rayput)rC   	_episodes_connector_states_metricss       rE   sample_get_state_and_metricsz&EnvRunner.sample_get_state_and_metrics   sQ     KKMM	 NN:MNNN##%% wy!!#4h>>rF   c                     dS )zFReturns a dict mapping ModuleIDs to 2-tuples of obs- and action space.NrH   rI   s    rE   
get_spaceszEnvRunner.get_spaces   rK   rF   Nc                     dS )zReleases all resources used by this EnvRunner.

        For example, when using a gym.Env in this EnvRunner, you should make sure
        that its `close()` method is called.
        NrH   rI   s    rE   stopzEnvRunner.stop   rU   rF   c                     dS )z:If this Actor is deleted, clears all resources used by it.NrH   rI   s    rE   __del__zEnvRunner.__del__   s    rF   r'   optionsr'   rf   c                   	 | j                             t                    5  | j                            ||          \  }}ddd           n# 1 swxY w Y   ||fS # t
          $ rk}| j        j        rXt          	                    d|j
        d                     |                                  |                     ||          cY d}~S |d}~ww xY w)a  Tries resetting the env and - if an error occurs - handles it gracefully.

        Args:
            seed: An optional seed (int) to be passed to the Env.reset() call.
            options: An optional options-dict to be passed to the Env.reset() call.

        Returns:
            The results of calling `Env.reset()`, which is a tuple of observations and
            info dicts.

        Raises:
            Exception: In case `config.restart_failed_sub_environments` is False and
                `Env.reset()` resulted in an error.
        re   Nz?Resetting the env resulted in an error! The original error is: r   )r3   log_timer   r2   reset	Exceptionr   restart_failed_sub_environmentslogger	exceptionargsrM   _try_env_reset)rC   r'   rf   obsinfoses         rE   ro   zEnvRunner._try_env_reset   sD   *	&&77 H H!X^^w^GG
UH H H H H H H H H H H H H H H : 	 	 	 {: 	  '6!9' '  
 **g*FFFFFFFF	sG   A  AA AA AA 
C&ACC
CCc           	         	 | j                             t                    5  | j                            |          }ddd           n# 1 swxY w Y   |                     | j        d           |S # t          $ r}| j                             t          dd           | j
        j        r_t          |t                    s*t                              d| j        j         d|            |                                  t&          cY d}~S t                              d| j        j         dt)          |           d	|            t*          |d}~ww xY w)
zHTries stepping the env and - if an error occurs - handles it gracefully.N   )metric	num_stepslifetime_sum)reduceRLlib z]: Environment step failed. Will force reset env(s) in this EnvRunner. The original error is: a&  : Environment step failed and 'config.restart_failed_sub_environments' is False. This env will not be recreated. Consider setting 'fault_tolerance(restart_failed_sub_environments=True)' in your AlgorithmConfig in order to automatically re-create and force-reset an env.The original error type: z. )r3   rh   r   r2   step_log_env_stepsr>   rj   	log_valueNUM_ENV_STEP_FAILURES_LIFETIMEr   rk   
isinstancer	   rl   rm   r@   rA   rM   ENV_STEP_FAILUREtypeRuntimeError)rC   actionsresultsrr   s       rE   _try_env_stepzEnvRunner._try_env_step   s    	*&&~66 1 1(--001 1 1 1 1 1 1 1 1 1 1 1 1 1 1t'EQRSSSN 	* 	* 	*L"".. #    {: *!!%?@@ $$ [!8  [  [  XY  [  [    (''''''  T^4  
 15Q      #)5	*sG   A1 AA1 AA1 A A1 1
E;BE
EAE

Ec                 ~    | j         j        dk    rt          |          S t          j        t
          j        |          S )z0Converts structs to a framework-specific tensor.torch)r   r7   r   treemap_structuretfconvert_to_tensor)rC   structs     rE   _convert_to_tensorzEnvRunner._convert_to_tensor#  s8     ;$//*6222%b&:FCCCrF   ru   rv   c           	          |dk    r|                     |           d S t                              d| j        j         d|j        d          d| d           d S )Nr   )valuery   z*: Skipping Prometheus logging for metric 'r*   z'. Received num_steps=z1, but the number of steps must be greater than 0.)incrl   warningr@   rA   info)rC   ru   rv   s      rE   r{   zEnvRunner._log_env_steps+  s    q==JJYJ'''''NNc0 c c\b\ghn\o c c&/c c c    rF   )r   r   )rP   N)$rA   
__module____qualname____doc__r5   abcabstractmethodrJ   rM   rO   r   rR   rT   r   r   rY   	ObjectRefr   r^   r   strgymSpacer`   rb   rd   r   r<   dictro   r   r   r   r   r{   __classcell__)r@   s   @rE   r   r   #   s%         D
 D
 D
 D
 D
 D
L 	  
 
 
   	#    S     ?	s}i2	3? ? ? \?" 	UDeCIsy,@&A!AB U U U U       #"&	' ' ' sm' $	'
 
sCx' ' ' 'R"* "* "*HDJ D D D DW          rF   r   )	metaclass)1r   loggingtypingr   r   r   r   r   	gymnasiumr   r   rY   ray.rllib.corer   ray.rllib.env.env_errorsr	   ray.rllib.utils.actor_managerr
   ray.rllib.utils.debugr   ray.rllib.utils.frameworkr   ray.rllib.utils.metricsr   r   &ray.rllib.utils.metrics.metrics_loggerr   ray.rllib.utils.torch_utilsr   ray.rllib.utils.typingr   r   ray.util.annotationsr   r   ray.util.metricsr   %ray.rllib.algorithms.algorithm_configr   	getLoggerrl   r6   r   _ENV_RESET_FAILUREr   r}   ABCMetar   rH   rF   rE   <module>r      s   



  < < < < < < < < < < < < < <      



 . . . . . . ? ? ? ? ? ? 9 9 9 9 9 9 A A A A A A 3 3 3 3 3 3 C C C C C C C C @ @ @ @ @ @ ? ? ? ? ? ? 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 $ $ $ $ $ $ FEEEEEE		;	'	']__
R' % !8 
 WN N N N N3; N N N N N NrF   