
    &`iB                        d dl Z d dlZd dlZd dlmZmZmZmZ d dlZ	d dl
mZ d dlmZ d dlmZmZ d dlmZmZmZmZmZ erd dlmZ e G d d	ej                              Ze G d
 d                      Ze G d de                      ZdS )    N)TYPE_CHECKINGCallableOptionalTuple)deprecation_warning)BaseEnv)OldAPIStackoverride)EnvActionTypeEnvInfoDict
EnvObsTypeEnvTypeMultiEnvDict)Preprocessorc                   *   e Zd ZdZ	 d!dej        dej        defdZd Z	 d"d	e	e
         d
ede
fdZd	e
dedefdZd	e
dededdfdZ	 d!d	e
dede	e         ddfdZd	e
deddfdZd	e
ddfdZ	 	 	 	 	 d#de	eegef                  dededededdfd ZdS )$ExternalEnvah  An environment that interfaces with external agents.

    Unlike simulator envs, control is inverted: The environment queries the
    policy to obtain actions and in return logs observations and rewards for
    training. This is in contrast to gym.Env, where the algorithm drives the
    simulation through env.step() calls.

    You can use ExternalEnv as the backend for policy serving (by serving HTTP
    requests in the run loop), for ingesting offline logs data (by reading
    offline transitions in the run loop), or other custom use cases not easily
    expressed through gym.Env.

    ExternalEnv supports both on-policy actions (through self.get_action()),
    and off-policy actions (through self.log_action()).

    This env is thread-safe, but individual episodes must be executed serially.

    .. testcode::
        :skipif: True

        from ray.tune import register_env
        from ray.rllib.algorithms.dqn import DQN
        YourExternalEnv = ...
        register_env("my_env", lambda config: YourExternalEnv(config))
        algo = DQN(env="my_env")
        while True:
            print(algo.train())
    Naction_spaceobservation_spacemax_concurrentc                     t           j                            |            d| _        || _        || _        i | _        t                      | _        t          j	                    | _
        |t          dd           dS dS )zInitializes an ExternalEnv instance.

        Args:
            action_space: Action space of the env.
            observation_space: Observation space of the env.
        TNa	  The `max_concurrent` argument has been deprecated. Please configurethe number of episodes using the `rollout_fragment_length` and`batch_mode` arguments. Please raise an issue on the Ray Github if these arguments do not support your expected use case for ExternalEnv)error)	threadingThread__init__daemonr   r   	_episodesset	_finished	Condition_results_avail_conditionr   )selfr   r   r   s       n/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/rllib/env/external_env.pyr   zExternalEnv.__init__6   s     	!!$'''(!2(1(;(=(=%%X       &%    c                     t           )a  Override this to implement the run loop.

        Your loop should continuously:
            1. Call self.start_episode(episode_id)
            2. Call self.[get|log]_action(episode_id, obs, [action]?)
            3. Call self.log_returns(episode_id, reward)
            4. Call self.end_episode(episode_id, obs)
            5. Wait if nothing to do.

        Multiple episodes may be started at the same time.
        )NotImplementedErrorr!   s    r"   runzExternalEnv.runT   s
     "!r#   T
episode_idtraining_enabledreturnc                 "   |t          j                    j        }|| j        v r"t	          d                    |                    || j        v r"t	          d                    |                    t          || j        |          | j        |<   |S )ab  Record the start of an episode.

        Args:
            episode_id: Unique string id for the episode or
                None for it to be auto-assigned and returned.
            training_enabled: Whether to use experiences for this
                episode to improve the policy.

        Returns:
            Unique string id for the episode.
        N!Episode {} has already completed.zEpisode {} is already started)	uuiduuid4hexr   
ValueErrorformatr   _ExternalEnvEpisoder    )r!   r(   r)   s      r"   start_episodezExternalEnv.start_episodeb   s     )J''@GG
SSTTT''<CCJOOPPP%857G&
 &
z" r#   observationc                 V    |                      |          }|                    |          S )a  Record an observation and get the on-policy action.

        Args:
            episode_id: Episode id returned from start_episode().
            observation: Current environment observation.

        Returns:
            Action from the env action space.
        )_getwait_for_actionr!   r(   r4   episodes       r"   
get_actionzExternalEnv.get_action   s)     ))J''&&{333r#   actionc                 \    |                      |          }|                    ||           dS )zRecord an observation and (off-policy) action taken.

        Args:
            episode_id: Episode id returned from start_episode().
            observation: Current environment observation.
            action: Action for the observation.
        N)r6   
log_action)r!   r(   r4   r;   r9   s        r"   r=   zExternalEnv.log_action   s1     ))J'';/////r#   rewardinfoc                 j    |                      |          }|xj        |z  c_        |r|pi |_        dS dS )a  Records returns (rewards and infos) from the environment.

        The reward will be attributed to the previous action taken by the
        episode. Rewards accumulate until the next action. If no reward is
        logged before the next action, a reward of 0.0 is assumed.

        Args:
            episode_id: Episode id returned from start_episode().
            reward: Reward from the environment.
            info: Optional info dict.
        N)r6   
cur_rewardcur_info)r!   r(   r>   r?   r9   s        r"   log_returnszExternalEnv.log_returns   sM     ))J''f$ 	*#zrG	* 	*r#   c                     |                      |          }| j                            |j                   |                    |           dS )zRecords the end of an episode.

        Args:
            episode_id: Episode id returned from start_episode().
            observation: Current environment observation.
        N)r6   r   addr(   doner8   s       r"   end_episodezExternalEnv.end_episode   sF     ))J''7-...[!!!!!r#   r2   c                     || j         v r"t          d                    |                    || j        vr"t          d                    |                    | j        |         S )z2Get a started episode by its ID or raise an error.r,   zEpisode {} not found.)r   r0   r1   r   )r!   r(   s     r"   r6   zExternalEnv._get   se     ''@GG
SSTTTT^++4;;JGGHHH~j))r#      Fr   make_envnum_envsremote_envsremote_env_batch_wait_msrestart_failed_sub_environmentsr   c                 N    |dk    rt          d          t          |           }|S )a  Converts an RLlib MultiAgentEnv into a BaseEnv object.

        The resulting BaseEnv is always vectorized (contains n
        sub-environments) to support batched forward passes, where n may
        also be 1. BaseEnv also supports async execution via the `poll` and
        `send_actions` methods and thus supports external simulators.

        Args:
            make_env: A callable taking an int as input (which indicates
                the number of individual sub-environments within the final
                vectorized BaseEnv) and returning one individual
                sub-environment.
            num_envs: The number of sub-environments to create in the
                resulting (vectorized) BaseEnv. The already existing `env`
                will be one of the `num_envs`.
            remote_envs: Whether each sub-env should be a @ray.remote
                actor. You can set this behavior in your config via the
                `remote_worker_envs=True` option.
            remote_env_batch_wait_ms: The wait time (in ms) to poll remote
                sub-environments for, if applicable. Only used if
                `remote_envs` is True.

        Returns:
            The resulting BaseEnv object.
        rI   zExternal(MultiAgent)Env does not currently support num_envs > 1. One way of solving this would be to treat your Env as a MultiAgentEnv hosting only one type of agent but with several copies.)r0   ExternalEnvWrapper)r!   rJ   rK   rL   rM   rN   envs          r"   to_base_envzExternalEnv.to_base_env   s9    B q==9   !&&
r#   N)NT)NrI   Fr   F)__name__
__module____qualname____doc__gymSpaceintr   r'   r   strboolr3   r   r   r:   r=   floatr   rC   rG   r6   r   r   rR    r#   r"   r   r      s
        B #	 i 9 	   <" " " JN "3-BF	   <4S 4z 4m 4 4 4 400,60@M0	0 0 0 0 MQ* **',*4<[4I*	* * * **
"c 
"
 
"t 
" 
" 
" 
"	*s 	*'< 	* 	* 	* 	* 8<!()05* *8SE7N34* * 	*
 #&* *.* 
* * * * * *r#   r   c            	       V    e Zd ZdZ	 ddedej        dedefdZd Z	d	 Z
d
 Zd Zd ZdS )r2   z&Tracked state for each active episode.Fr(   results_avail_conditionr)   
multiagentc                 V   || _         || _        || _        || _        t	          j                    | _        t	          j                    | _        |r0d | _        d | _	        i | _
        ddi| _        ddi| _        i | _        d S d | _        d | _        d| _        d| _        d| _        i | _        d S )N__all__F        )r(   r`   r)   ra   queueQueue
data_queueaction_queuenew_observation_dictnew_action_dictcur_reward_dictcur_terminated_dictcur_truncated_dictcur_info_dictnew_observation
new_actionrA   cur_terminatedcur_truncatedrB   )r!   r(   r`   r)   ra   s        r"   r   z_ExternalEnvEpisode.__init__   s     %'>$ 0$+--!KMM 	(,D%#'D #%D (15'9D$'0%&8D#!#D#'D "DO!DO"'D!&DDMMMr#   c                 j    | j                                         rd S | j                                         S rS   )rg   empty
get_nowaitr&   s    r"   get_dataz_ExternalEnvEpisode.get_data  s1    ?  "" 	4))+++r#   c                     | j         r|| _        || _        n|| _        || _        |                                  | j                            dd           d S )NTg      N@timeout)ra   ri   rj   ro   rp   _sendrh   get)r!   r4   r;   s      r"   r=   z_ExternalEnvEpisode.log_action  s]    ? 	%(3D%#)D  #.D $DO

dD11111r#   c                     | j         r|| _        n|| _        |                                  | j                            dd          S )NTg     r@rx   )ra   ri   ro   rz   rh   r{   r!   r4   s     r"   r7   z#_ExternalEnvEpisode.wait_for_action$  sH    ? 	/(3D%%#.D 

 $$T5$999r#   c                     | j         r|| _        ddi| _        ddi| _        n|| _        d| _        d| _        |                                  d S )Nrc   TF)ra   ri   rl   rm   ro   rq   rr   rz   r}   s     r"   rF   z_ExternalEnvEpisode.done,  s\    ? 
	'(3D%(14'8D$ (1%&8D###.D "&D!&D

r#   c                 >   | j         ri| j        s| j        D ]}d| j        |         d<   | j        | j        | j        | j        | j        d}| j        
| j        |d<   d | _        d | _        i | _        nY| j        | j	        | j
        | j        | j        d}| j        
| j        |d<   d | _        d | _        d| _	        | j        sd|d         d<   | j        5  | j                            |           | j                                         d d d            d S # 1 swxY w Y   d S )NFr)   )obsr>   
terminated	truncatedr?   off_policy_actionrd   r?   )ra   r)   rn   ri   rk   rl   rm   rj   ro   rA   rq   rr   rB   rp   r`   rg   
put_nowaitnotify)r!   agent_iditems      r"   rz   z_ExternalEnvEpisode._send:  s   ? 	9( M $ 2 M MHGLD&x01CDD0."6!4* D #/,0,@()(,D%#'D #%D   +/"1!/ D *,0O()#'D "DO!DO( 938V/0) 	2 	2O&&t,,,(//111	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2s   4DDDN)F)rT   rU   rV   rW   r[   r   r   r\   r   rv   r=   r7   rF   rz   r^   r#   r"   r2   r2      s        00 !  "+!4 	
    8, , ,
2 2 2: : :  #2 #2 #2 #2 #2r#   r2   c                   X   e Zd ZdZ	 d	 	 	 ddZ ee          deeeeeef         fd	            Z	 ee          d
eddfd            Z
deeeeeeef         fdZe ee          dej        j        fd                        Ze ee          dej        fd                        ZdS )rP   z+Internal adapter of ExternalEnv to BaseEnv.Nexternal_envr   preprocessorr   c                     ddl m} || _        || _        t	          t          |          |          | _        |j        | _        |r|j	        | _
        n|j	        | _
        |                                 d S )Nr   )ExternalMultiAgentEnv)&ray.rllib.env.external_multi_agent_envr   r   prep
issubclasstypera   r   _action_spacer   _observation_spacestart)r!   r   r   r   s       r"   r   zExternalEnvWrapper.__init__d  s     	QPPPPP( 	$T,%7%79NOO)6 	E&2&DD##&2&DD#r#   r*   c                    | j         j        5  |                                 }t          |d                   dk    rs| j         j                                         |                                 }| j                                         st          d          t          |d                   dk    sd d d            n# 1 swxY w Y   |S )Nr   zServing thread has stopped.)r   r    _polllenwaitis_alive	Exception)r!   resultss     r"   pollzExternalEnvWrapper.polls  s    7 	C 	CjjllGgaj//Q&&!:??AAA**,,(1133 C#$ABBB	 gaj//Q&&	C 	C 	C 	C 	C 	C 	C 	C 	C 	C 	C 	C 	C 	C 	C s   B!B::B>B>action_dictc                 @   ddl m} | j        rF|                                D ]/\  }}| j        j        |         j                            |           0d S |                                D ]5\  }}| j        j        |         j                            ||                    6d S )Nr   )_DUMMY_AGENT_ID)ray.rllib.env.base_envr   ra   itemsr   r   rh   put)r!   r   r   env_idactionsr;   s         r"   send_actionszExternalEnvWrapper.send_actions  s    ::::::? 	#.#4#4#6#6 N N!+F3@DDWMMMMN N #."3"3"5"5  !+F3@DD?+    r#   c                    ddl m} i i i i i f\  }}}}}i }| j        j                                                                        D ]\  }|                                }	| j        r|j        d         n|j	        }
| j        r|j
        d         n|j        }|
s|r| j        j        = |	rq| j        r$| j                            |	d                   |<   n|	d         |<   |	d         |<   |	d         |<   |	d         |<   |	d         |<   d	|	v r|	d	         |<   | j        ro|                                D ]R\  }|                                D ]8fd
} ||d            ||d            ||d            ||i            9S||||||fS  ||           ||           ||d           ||d           ||           ||          fS )Nr   )with_dummy_agent_idrc   r   r>   r   r   r?   r   c                 6    |          vr||          <   d S d S rS   r^   )dzero_valr   eids     r"   fixz%ExternalEnvWrapper._poll.<locals>.fix  s.    #1S611/7AcF8,,, 21r#   rd   F)r   r   r   r   copyr   rv   ra   rl   rq   rm   rr   r   	transformkeys)r!   r   all_obsall_rewardsall_terminatedsall_truncateds	all_infosoff_policy_actionsr9   datarq   rr   eid_dictr   r   r   s                 @@r"   r   zExternalEnvWrapper._poll  s    	?>>>>> L
Ho~y   -7<<>>DDFF 	H 	HLC##%%D ?,+I66+  ?+*955* 
  5 5%/4 
H9 /#'9#6#6tE{#C#CGCLL#';GCL#'>C '+L'9$&*;&7s#!%f	#&$...23F.G&s+? 	 ") 
' 
'X ( 	' 	'H8 8 8 8 8 8 CS)))C///C...C	2&&&&	' "  $#G,,##K00##OY??##NI>>##I..##$677 r#   c                     | j         S rS   )r   r&   s    r"   r   z$ExternalEnvWrapper.observation_space  s     &&r#   c                     | j         S rS   )r   r&   s    r"   r   zExternalEnvWrapper.action_space  s     !!r#   rS   )r   r   r   r   )rT   rU   rV   rW   r   r
   r   r   r   r   r   r   propertyrX   spacesDictr   rY   r   r^   r#   r"   rP   rP   `  s}       55 KO)9G    Xg
	|\<|S	T
 
 
 
 Xg
 
 
 
 
 
K		
K K K KZ Xg'3:? ' ' '  X' Xg"ci " " "  X" " "r#   rP   )re   r   r-   typingr   r   r   r   	gymnasiumrX   ray._common.deprecationr   r   r   ray.rllib.utils.annotationsr	   r
   ray.rllib.utils.typingr   r   r   r   r   ray.rllib.models.preprocessorsr   r   r   r2   rP   r^   r#   r"   <module>r      s         ; ; ; ; ; ; ; ; ; ; ; ;     7 7 7 7 7 7 * * * * * * = = = = = = = =               <;;;;;; Z Z Z Z Z)" Z Z Zz g2 g2 g2 g2 g2 g2 g2 g2T A" A" A" A" A" A" A" A" A" A"r#   