
    &`i>                     l   d dl Z d dlmZmZmZmZmZmZmZm	Z	m
Z
 d dlZd dlZd dlmZ d dlmZmZmZmZ erd dlmZ dZ e j        e          Ze G d d                      Zd	Zeefd
eeef         dddefd            Ze	 	 	 	 	 	 ddedeegef         dedededed         deddfd            Z dS )    N)	TYPE_CHECKINGAnyCallableDictListOptionalSetTupleUnion)OldAPIStack)AgentIDEnvIDEnvTypeMultiEnvDict)RolloutWorkerasync_reset_returnc                      e Zd ZdZ	 	 	 	 	 ddeeegef                  deded	ed
edd fdZ	de
eeeeeef         fdZdeddfdZ	 dddddee         dee         dee         de
ee         ee         f         fdZddee         ddfdZd dedeee         ef         fdZdee         fdZddee         ddfdZd!dZedej        fd            Zedej        fd            Zde
eeeeef         fdZdS )"BaseEnva3  The lowest-level env interface used by RLlib for sampling.

    BaseEnv models multiple agents executing asynchronously in multiple
    vectorized sub-environments. A call to `poll()` returns observations from
    ready agents keyed by their sub-environment ID and agent IDs, and
    actions for those agents can be sent back via `send_actions()`.

    All other RLlib supported env types can be converted to BaseEnv.
    RLlib handles these conversions internally in RolloutWorker, for example:

    gym.Env => rllib.VectorEnv => rllib.BaseEnv
    rllib.MultiAgentEnv (is-a gym.Env) => rllib.VectorEnv => rllib.BaseEnv
    rllib.ExternalEnv => rllib.BaseEnv

    .. testcode::
        :skipif: True

        MyBaseEnv = ...
        env = MyBaseEnv()
        obs, rewards, terminateds, truncateds, infos, off_policy_actions = (
            env.poll()
        )
        print(obs)

        env.send_actions({
          "env_0": {
            "car_0": 0,
            "car_1": 1,
          }, ...
        })
        obs, rewards, terminateds, truncateds, infos, off_policy_actions = (
            env.poll()
        )
        print(obs)

        print(terminateds)

    .. testoutput::

        {
            "env_0": {
                "car_0": [2.4, 1.6],
                "car_1": [3.4, -3.2],
            },
            "env_1": {
                "car_0": [8.0, 4.1],
            },
            "env_2": {
                "car_0": [2.3, 3.3],
                "car_1": [1.4, -0.2],
                "car_3": [1.2, 0.1],
            },
        }
        {
            "env_0": {
                "car_0": [4.1, 1.7],
                "car_1": [3.2, -4.2],
            }, ...
        }
        {
            "env_0": {
                "__all__": False,
                "car_0": False,
                "car_1": True,
            }, ...
        }

    N   Fr   make_envnum_envsremote_envsremote_env_batch_wait_msrestart_failed_sub_environmentsreturnc                     | S )a  Converts an RLlib-supported env into a BaseEnv object.

        Supported types for the `env` arg are gym.Env, BaseEnv,
        VectorEnv, MultiAgentEnv, ExternalEnv, or ExternalMultiAgentEnv.

        The resulting BaseEnv is always vectorized (contains n
        sub-environments) to support batched forward passes, where n may also
        be 1. BaseEnv also supports async execution via the `poll` and
        `send_actions` methods and thus supports external simulators.

        TODO: Support gym3 environments, which are already vectorized.

        Args:
            env: An already existing environment of any supported env type
                to convert/wrap into a BaseEnv. Supported types are gym.Env,
                BaseEnv, VectorEnv, MultiAgentEnv, ExternalEnv, and
                ExternalMultiAgentEnv.
            make_env: A callable taking an int as input (which indicates the
                number of individual sub-environments within the final
                vectorized BaseEnv) and returning one individual
                sub-environment.
            num_envs: The number of sub-environments to create in the
                resulting (vectorized) BaseEnv. The already existing `env`
                will be one of the `num_envs`.
            remote_envs: Whether each sub-env should be a @ray.remote actor.
                You can set this behavior in your config via the
                `remote_worker_envs=True` option.
            remote_env_batch_wait_ms: The wait time (in ms) to poll remote
                sub-environments for, if applicable. Only used if
                `remote_envs` is True.
            policy_config: Optional policy config dict.

        Returns:
            The resulting BaseEnv object.
         )selfr   r   r   r   r   s         j/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/rllib/env/base_env.pyto_base_envzBaseEnv.to_base_envY   s
    V     c                     t           )a	  Returns observations from ready agents.

        All return values are two-level dicts mapping from EnvID to dicts
        mapping from AgentIDs to (observation/reward/etc..) values.
        The number of agents and sub-environments may vary over time.

        Returns:
            Tuple consisting of:
            New observations for each ready agent.
            Reward values for each ready agent. If the episode is just started,
            the value will be None.
            Terminated values for each ready agent. The special key "__all__" is used to
            indicate episode termination.
            Truncated values for each ready agent. The special key "__all__"
            is used to indicate episode truncation.
            Info values for each ready agent.
            Agents may take off-policy actions, in which case, there will be an entry
            in this dict that contains the taken action. There is no need to
            `send_actions()` for agents that have already chosen off-policy actions.
        NotImplementedErrorr   s    r   pollzBaseEnv.poll   s
    < "!r!   action_dictc                     t           )a  Called to send actions back to running agents in this env.

        Actions should be sent for each ready agent that returned observations
        in the previous poll() call.

        Args:
            action_dict: Actions values keyed by env_id and agent_id.
        r#   )r   r'   s     r   send_actionszBaseEnv.send_actions   s
     "!r!   )seedoptionsenv_idr*   r+   c                    dS )a+  Attempt to reset the sub-env with the given id or all sub-envs.

        If the environment does not support synchronous reset, a tuple of
        (ASYNC_RESET_REQUEST, ASYNC_RESET_REQUEST) can be returned here.

        Note: A MultiAgentDict is returned when using the deprecated wrapper
        classes such as `ray.rllib.env.base_env._MultiAgentEnvToBaseEnv`,
        however for consistency with the poll() method, a `MultiEnvDict` is
        returned from the new wrapper classes, such as
        `ray.rllib.env.multi_agent_env.MultiAgentEnvWrapper`.

        Args:
            env_id: The sub-environment's ID if applicable. If None, reset
                the entire Env (i.e. all sub-environments).
            seed: The seed to be passed to the sub-environment(s) when
                resetting it. If None, will not reset any existing PRNG. If you pass an
                integer, the PRNG will be reset even if it already exists.
            options: An options dict to be passed to the sub-environment(s) when
                resetting it.

        Returns:
            A tuple consisting of a) the reset (multi-env/multi-agent) observation
            dict and b) the reset (multi-env/multi-agent) infos dict. Returns the
            (ASYNC_RESET_REQUEST, ASYNC_RESET_REQUEST) tuple, if not supported.
        )NNr   )r   r,   r*   r+   s       r   	try_resetzBaseEnv.try_reset   s
    @ zr!   c                     dS )a<  Attempt to restart the sub-env with the given id or all sub-envs.

        This could result in the sub-env being completely removed (gc'd) and recreated.

        Args:
            env_id: The sub-environment's ID, if applicable. If None, restart
                the entire Env (i.e. all sub-environments).
        Nr   r   r,   s     r   try_restartzBaseEnv.try_restart   s	     tr!   as_dictc                     |ri S g S )zReturn a reference to the underlying sub environments, if any.

        Args:
            as_dict: If True, return a dict mapping from env_id to env.

        Returns:
            List or dictionary of the underlying sub environments or [] / {}.
        r   )r   r2   s     r   get_sub_environmentszBaseEnv.get_sub_environments   s      	I	r!   c                     i S )z|Return the agent ids for the sub_environment.

        Returns:
            All agent ids for each the environment.
        r   r%   s    r   get_agent_idszBaseEnv.get_agent_ids   s	     	r!   c                     dS )zTries to render the sub-environment with the given id or all.

        Args:
            env_id: The sub-environment's ID, if applicable.
                If None, renders the entire Env (i.e. all sub-environments).
        Nr   r0   s     r   
try_renderzBaseEnv.try_render   s	     	r!   c                 |    |                                  D ]&}t          |d          r|                                 'dS )zReleases all resources used.closeN)r4   hasattrr:   )r   envs     r   stopzBaseEnv.stop   sI     ,,.. 	 	CsG$$ 			 	r!   c                     t           )a  Returns the observation space for each agent.

        Note: samples from the observation space need to be preprocessed into a
            `MultiEnvDict` before being used by a policy.

        Returns:
            The observation space for each environment.
        r#   r%   s    r   observation_spacezBaseEnv.observation_space  
     "!r!   c                     t           )a	  Returns the action space for each agent.

        Note: samples from the action space need to be preprocessed into a
            `MultiEnvDict` before being passed to `send_actions`.

        Returns:
            The observation space for each environment.
        r#   r%   s    r   action_spacezBaseEnv.action_space  r@   r!   c                 D    t                               d           i i i i i fS )a  Returns the last observations, rewards, done- truncated flags and infos ...

        that were returned by the environment.

        Returns:
            The last observations, rewards, done- and truncated flags, and infos
            for each sub-environment.
        z3last has not been implemented for this environment.)loggerwarningr%   s    r   lastzBaseEnv.last  s(     	LMMM2r2r!!r!   )Nr   Fr   FN)F)r   N) __name__
__module____qualname____doc__r   r   intr   boolr    r
   r   r&   r)   r   dictr.   r1   r   r   r4   r	   r   r6   r8   r=   propertygymSpacer?   rB   rF   r   r!   r   r   r      s       C CN 8<!()05+ +8SE7N34+ + 	+
 #&+ *.+ 
+ + + +Z"		
" " " "@	" 	" 	" 	" 	" 	" #'  #"&       sm	 
 $  
x%x'==	>       D	 	(5/ 	T 	 	 	 	 D U4=RVCV=W    s7|    	 	% 	D 	 	 	 	    	"39 	" 	" 	" X	" 	"ci 	" 	" 	" X	""	|\<|S	T" " " " " "r!   r   agent0env_id_to_valuesdummy_idr   r   c                 z    i }|                                  D ]#\  }}t          |t                    r|n||i||<   $|S rG   )items
isinstance	Exception)rS   rT   retr,   values        r   with_dummy_agent_idr[   1  sT     C+1133 S S  *%;;Ree(EARFJr!   r   Fr<   r   r   r   r   workerr   r   c           	         ddl m} ddlm} ddlm}	 ddlm}
m} |r|dk    rt          d          t          | t          ||
|f          r|                     |||||          S |rQt          | d	          r+t          j        | j                                                  nd
} |	||||| g||          } n1|
                    || g|| j        | j        |          }  ||           } t          | t                    s
J |             | S )a  Converts an RLlib-supported env into a BaseEnv object.

    Supported types for the `env` arg are gym.Env, BaseEnv,
    VectorEnv, MultiAgentEnv, ExternalEnv, or ExternalMultiAgentEnv.

    The resulting BaseEnv is always vectorized (contains n
    sub-environments) to support batched forward passes, where n may also
    be 1. BaseEnv also supports async execution via the `poll` and
    `send_actions` methods and thus supports external simulators.

    TODO: Support gym3 environments, which are already vectorized.

    Args:
        env: An already existing environment of any supported env type
            to convert/wrap into a BaseEnv. Supported types are gym.Env,
            BaseEnv, VectorEnv, MultiAgentEnv, ExternalEnv, and
            ExternalMultiAgentEnv.
        make_env: A callable taking an int as input (which indicates the
            number of individual sub-environments within the final
            vectorized BaseEnv) and returning one individual
            sub-environment.
        num_envs: The number of sub-environments to create in the
            resulting (vectorized) BaseEnv. The already existing `env`
            will be one of the `num_envs`.
        remote_envs: Whether each sub-env should be a @ray.remote actor.
            You can set this behavior in your config via the
            `remote_worker_envs=True` option.
        remote_env_batch_wait_ms: The wait time (in ms) to poll remote
            sub-environments for, if applicable. Only used if
            `remote_envs` is True.
        worker: An optional RolloutWorker that owns the env. This is only
            used if `remote_worker_envs` is True in your config and the
            `on_sub_environment_created` custom callback needs to be called
            on each created actor.
        restart_failed_sub_environments: If True and any sub-environment (within
            a vectorized env) throws any error during env stepping, the
            Sampler will try to restart the faulty sub-environment. This is done
            without disturbing the other (still intact) sub-environment and without
            the RolloutWorker crashing.

    Returns:
        The resulting BaseEnv object.
    r   )ExternalEnv)MultiAgentEnv)RemoteBaseEnv)	VectorEnvVectorEnvWrapperr   z_Remote envs only make sense to use if num_envs > 1 (i.e. environment vectorization is enabled).)r   r   r   r   r   _is_multi_agentF)
multiagentr   existing_envsr\   r   )r   re   r   rB   r?   r   )ray.rllib.env.external_envr^   ray.rllib.env.multi_agent_envr_   ray.rllib.env.remote_base_envr`   ray.rllib.env.vector_envra   rb   
ValueErrorrW   r   r    r;   raygetrc   remotevectorize_gym_envsrB   r?   )r<   r   r   r   r   r\   r   r^   r_   r`   ra   rb   rd   s                r   convert_to_base_envro   >  s   l 766666;;;;;;;;;;;;DDDDDDDD 
x1}};
 
 	
 #	;GHH ((#%=,K  
 
 	
  	(
 3 122+2244555 
  -%)A"e0O  CC ..!"e! -"%"70O /  C #"3''C c7##((S((#Jr!   )Nr   Fr   NF)!loggingtypingr   r   r   r   r   r   r	   r
   r   	gymnasiumrP   rk   ray.rllib.utils.annotationsr   ray.rllib.utils.typingr   r   r   r   #ray.rllib.evaluation.rollout_workerr   ASYNC_RESET_RETURN	getLoggerrH   rD   r   _DUMMY_AGENT_IDr[   rL   rM   ro   r   r!   r   <module>ry      s	    X X X X X X X X X X X X X X X X X X X X X X     



 3 3 3 3 3 3 H H H H H H H H H H H H BAAAAAA) 		8	$	$ W" W" W" W" W" W" W" W"v  >M	 	5#:&	2;		 	 	 	  *.$%(,,1n n	nug~&n n 	n
 "n _%n &*n n n n n n nr!   