
    &`i1K                     v   d dl Z d dlmZmZmZmZmZmZmZ d dl	Z
d dlZd dlmZmZmZ d dlmZmZ d dlmZmZmZmZ d dlmZ erd dlmZ  e j        e          Ze G d d	e                      Z ej         d 
           G d d                      Z! ej         d 
           G d d                      Z"dS )    N)TYPE_CHECKINGCallableDictListOptionalSetTuple)_DUMMY_AGENT_IDASYNC_RESET_RETURNBaseEnv)OldAPIStackoverride)AgentIDEnvIDEnvTypeMultiEnvDict)log_once)RolloutWorkerc                      e Zd ZdZ	 	 	 ddeegef         dedededee	e
j        j                          d	ed
         defdZ ee          deeeeeeef         fd            Z ee          deddfd            Z ee          	 d ddddee         dee         dee         deeef         fd            Z ee          d dee         ddfd            Z ee          d!d            Z ee          d"dede	e         fd            Ze ee          dej        j        fd                        Ze ee          dej        fd                        Z d dee         fdZ! ee          de"e#         fd            Z$dS )#RemoteBaseEnva'  BaseEnv that executes its sub environments as @ray.remote actors.

    This provides dynamic batching of inference as observations are returned
    from the remote simulator actors. Both single and multi-agent child envs
    are supported, and envs can be stepped synchronously or asynchronously.

    NOTE: This class implicitly assumes that the remote envs are gym.Env's

    You shouldn't need to instantiate this class directly. It's automatically
    inserted when you use the `remote_worker_envs=True` option in your
    Algorithm's config.
    NFmake_envnum_envs
multiagentremote_env_batch_wait_msexisting_envsworkerr   restart_failed_sub_environmentsc                     | _         | _        | _        |dz   _        | _        | _        |pg }d _        d _        d _        d _	        t          |          dk    rt          |d         t          j        j                  rd _        | _	        t           j	                   j        k     r\ j	                                                 t           j	                                       t           j	                   j        k     \nĈ fdt#           j                  D              _	        t          |          dk    r%|d         j         _        |d         j         _        ngt          j         j	        d         j                                         j	        d         j                                        g          \   _         _        d  j	        D              _        dS )ae  Initializes a RemoteVectorEnv instance.

        Args:
            make_env: Callable that produces a single (non-vectorized) env,
                given the vector env index as only arg.
            num_envs: The number of sub-environments to create for the
                vectorization.
            multiagent: Whether this is a multiagent env or not.
            remote_env_batch_wait_ms: Time to wait for (ray.remote)
                sub-environments to have new observations available when
                polled. Only when none of the sub-environments is ready,
                repeat the `ray.wait()` call until at least one sub-env
                is ready. Then return only the observations of the ready
                sub-environment(s).
            existing_envs: Optional list of already created sub-environments.
                These will be used as-is and only as many new sub-envs as
                necessary (`num_envs - len(existing_envs)`) will be created.
            worker: An optional RolloutWorker that owns the env. This is only
                used if `remote_worker_envs` is True in your config and the
                `on_sub_environment_created` custom callback needs to be
                called on each created actor.
            restart_failed_sub_environments: If True and any sub-environment (within
                a vectorized env) throws any error during env stepping, the
                Sampler will try to restart the faulty sub-environment. This is done
                without disturbing the other (still intact) sub-environment and without
                the RolloutWorker crashing.
        i  FNr   Tc                 :    g | ]}                     |          S  )_make_sub_env).0iselfs     q/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/rllib/env/remote_base_env.py
<listcomp>z*RemoteBaseEnv.__init__.<locals>.<listcomp>p   s'    OOOQ4--a00OOO    c                 B    i | ]}|j                                         |S r    )resetremote)r"   as     r%   
<dictcomp>z*RemoteBaseEnv.__init__.<locals>.<dictcomp>   s3     5
 5
 5
$%AGNNa5
 5
 5
r'   )r   r   r   poll_timeoutr   r   make_env_creates_actors_observation_space_action_spaceactorslen
isinstancerayactorActorHandleappendr!   rangeobservation_spaceaction_spacegetr*   pending)r$   r   r   r   r   r   r   r   s   `       r%   __init__zRemoteBaseEnv.__init__!   s   N ! $4t;/N, &+ (-$"&! >B }!!j!ci3'
 '
! ,0D('DKdk""T]22""4#5#5c$+6F6F#G#GHHH dk""T]22 POOO%:N:NOOODK=!!A%%*7*:*L'%21%5%B"" ?BgA8??AAA3::<<? ?;');5
 5
)-5
 5
 5
r'   returnc                    i i i i i f\  }}}}}g }|sEt          j        t          | j                  t	          | j                  | j                  \  }}|Et                      }|D ]p}	| j                            |	          }
| j        	                    |
          }|
                    |           	 t          j        |	          }nf# t          $ rY}| j        rFt                              |j        d                    |                     |           |i ddiddii f}n|Y d }~nd }~ww xY w| j        rd\  }}}}| j        rtt)          |t*                    rPt	          |          dk    r
|\  }}}}}nt	          |          dk    r|d         }|d	         }nt-          d
          t-          d          t)          |t*                    rt	          |          dk    r\t.          |d         i}t.          |d	         i}t.          |d         d|d         i}t.          |d         d|d         i}t.          |d         i}nPt	          |          dk    rt.          |d         i}t.          |d	         i}nt-          d
          t-          d          |&d |                                D             }ddi}ddi}n|\  }}}}}|||<   |||<   |||<   |||<   |||<   rt                              d|            |||||i fS )N)num_returnstimeoutr   __all__TF)NNNN         zYour gymnasium.Env seems to NOT return the correct number of return values for `step()` (needs to return 5 values: obs, reward, terminated, truncated and info) or `reset()` (needs to return 2 values: obs and info)!zeYour gymnasium.Env seems to only return a single value upon `reset()`! Must return 2 (obs AND infos).      zeYour gymnasium.Env seems to only return a single value upon `reset()`! Must return 2 (obs and infos).c                     i | ]}|d S )r   r    )r"   agent_ids     r%   r,   z&RemoteBaseEnv.poll.<locals>.<dictcomp>   s    AAA88QAAAr'   zGot obs batch for actors )r4   waitlistr<   r2   r-   setpopr1   indexaddr;   	Exceptionr   logger	exceptionargstry_restartr.   r   r3   tupleAssertionErrorr
   keysdebug)r$   obsrewardsterminateds
truncatedsinfosready_env_idsobj_refr5   env_idreterew
terminated	truncatedinfoobs                      r%   pollzRemoteBaseEnv.poll   s    8:2r2r7I4Wk:u  	xT\""--)  HE1  	 %% j	! j	!G L$$W--E[&&u--FKKgg&&    7 $$QVAY///$$V,,, "D)"E*CC G CCCC, + D;3I0ZD? 4!#u--  s88q==CF@BZDD XX]]!$QB#&q6DD #1!)# #  -M  
 "#u-- s88q=="13q6!:B#2CF";C*93q69cRSf)UJ)8#a&)SQRV(TI$3SV#<DD XX]]"13q6!:B$3SV#<DD #1!)# #  -M   ;AArwwyyAAAC"+U!3J!*E 2I 8;4CYCK!GFO",K!*Jv E&MM:::;;;G[*eR??s   4C		
D,AD''D,action_dictc                    |                                 D ]j\  }}| j        |         }| j        s-| j        r&|j                            |t                             }n|j                            |          }|| j        |<   kd S N)itemsr1   r   r.   stepr*   r
   r<   )r$   rk   rb   actionsr5   ra   s         r%   send_actionszRemoteBaseEnv.send_actions  s    *0022 	* 	*OFGK'E ? 5t'C 5*++GO,DEE
  *++G44$)DL!!	* 	*r'   seedoptionsrb   rs   rt   c                    | j         |         }|j                            ||          }|| j        |<   t          t          fS )Nrr   )r1   r)   r*   r<   r   )r$   rb   rs   rt   r5   ra   s         r%   	try_resetzRemoteBaseEnv.try_reset  sD     F#+$$$$@@ %W "#555r'   c                 `   	 | j         |         j                                         nF# t          $ r9}t	          d          r t
                              d| d|            Y d }~nd }~ww xY w| j         |         j                                         |                     |          | j         |<   d S )Nclose_sub_envzBTrying to close old and replaced sub-environment (at vector index=z"), but closing resulted in error:
)	r1   closer*   rP   r   rQ   warning__ray_terminate__r!   )r$   rb   rd   s      r%   rT   zRemoteBaseEnv.try_restart0  s    	K%,,.... 	 	 	(( L#L LHIL L  	 	F-44666 #0088Fs   $' 
A*/A%%A*c                 ^    | j         #| j         D ]}|j                                         d S d S rm   )r1   r{   r*   )r$   r5   s     r%   stopzRemoteBaseEnv.stopC  sE    ;" 1 1'..0000 #"1 1r'   as_dictc                 V    |r!t          t          | j                            S | j        S rm   )dict	enumerater1   )r$   r~   s     r%   get_sub_environmentsz"RemoteBaseEnv.get_sub_environmentsI  s*     	0	$+..///{r'   c                     | j         S rm   )r/   r$   s    r%   r9   zRemoteBaseEnv.observation_spaceO  s     &&r'   c                     | j         S rm   )r0   r   s    r%   r:   zRemoteBaseEnv.action_spaceT  s     !!r'   idxc                      j         rm                     |          } j        P j        j                             j         j        |          j        j                            |                     n fd} ||          }|S )z.Re-creates a sub-environment at the new index.Nvector_indexr   sub_environmentenv_contextc                    t                               d                    |                      j        r!t                              j        |           }n t                              j        |           }j        Ej        j	        
                    j        |j        j                            |                      |S )Nz Launching env {} in remote actorr   r   )rQ   rh   formatr   _RemoteMultiAgentEnvr*   r   _RemoteSingleAgentEnvr   	callbackson_sub_environment_createdr   copy_with_overrides)r#   sub_envr$   s     r%   make_remote_envz4RemoteBaseEnv._make_sub_env.<locals>.make_remote_envl  s    >EEaHHIII? M299$-KKGG3::4=!LLG;*K)DD#{(/$(K$;$O$O)* %P % % E    r'   )r.   r   r   r   r   r1   r   r   )r$   r   r   r   s   `   r%   r!   zRemoteBaseEnv._make_sub_envY  s     ' !	+mmC((G{&%@@;$(K$4 $ 7 K K%( !L ! ! A       $ &oc**Gr'   c                     | j         r6t          j        | j        d         j                                                  S t          hS )Nr   )r   r4   r;   r1   get_agent_idsr*   r
   r   s    r%   r   zRemoteBaseEnv.get_agent_ids  s:    ? 	%74;q>7>>@@AAA#$$r'   )NNFrm   )r>   N)F)%__name__
__module____qualname____doc__r   intr   boolr   r   r4   r5   r6   r=   r   r   r	   r   rj   rq   r   r   rv   rT   r}   r   propertygymspacesr   r9   Spacer:   r!   r   r   r   r    r'   r%   r   r      sR        & @D,005d
 d
C5'>*d
 d
 	d

 #&d
  SY%: ;<d
 )d
 *.d
 d
 d
 d
L XgF@		
F@ F@ F@ F@P Xg* * * * * * Xg #'6 #"&6 6 66 sm	6
 $6 
|\)	*6 6 6 6  Xg9 9(5/ 9T 9 9 9 9$ Xg1 1 1 1
 Xg D T']    
 Xg'3:? ' ' '  X' Xg"ci " " "  X"' '# ' ' ' 'R Xg%s7| % % % % % %r'   r   )num_cpusc                   r    e Zd ZdZd Zddddee         dee         fdZd Z	d	 Z
d
 Zdee         fdZdS )r   z:Wrapper class for making a multi-agent env a remote actor.c                 L     ||          | _         t                      | _        d S rm   )envrL   	agent_idsr$   r   r#   s      r%   r=   z_RemoteMultiAgentEnv.__init__  s     8A;;r'   Nrr   rs   rt   c                    | j                             ||          \  }}i }|                                D ]!}| j                            |           d||<   "ddi}ddi}|||||fS )Nrr           rB   F)r   r)   rW   r   rO   )	r$   rs   rt   rY   rh   re   rI   rf   rg   s	            r%   r)   z_RemoteMultiAgentEnv.reset  s    HNNgN>>	T 

 	  	 HNx(((CMM'
&	CY44r'   c                 6    | j                             |          S rm   )r   ro   )r$   rk   s     r%   ro   z_RemoteMultiAgentEnv.step  s    x}}[)))r'   c                     | j         j        S rm   r   r9   r   s    r%   r9   z&_RemoteMultiAgentEnv.observation_space      x))r'   c                     | j         j        S rm   r   r:   r   s    r%   r:   z!_RemoteMultiAgentEnv.action_space      x$$r'   r>   c                     | j         S rm   )r   r   s    r%   r   z"_RemoteMultiAgentEnv.get_agent_ids  s
    ~r'   )r   r   r   r   r=   r   r   r   r)   ro   r9   r:   r   r   r   r    r'   r%   r   r     s        DD   .2T 
5 
5 
5Xc] 
5HTN 
5 
5 
5 
5* * *
* * *% % %s7|      r'   r   c                   Z    e Zd ZdZd Zddddee         dee         fdZd Z	d	 Z
d
 ZdS )r   z2Wrapper class for making a gym env a remote actor.c                 &     ||          | _         d S rm   )r   r   s      r%   r=   z_RemoteSingleAgentEnv.__init__  s    8A;;r'   Nrr   rs   rt   c                    | j                             ||          }t          |d         i}t          |d         i}t          di}ddi}ddi}|||||fS )Nrr   r   rE   r   rB   F)r   r)   r
   )	r$   rs   rt   obs_and_inforY   rh   re   rf   rg   s	            r%   r)   z_RemoteSingleAgentEnv.reset  sd    x~~4~AAQ0a1$'
&	CY44r'   c                     | j                             |t                             }d |D             \  }}}}}|t                   |d<   |t                   |d<   |||||fS )Nc                      g | ]}t           |iS r    )r
   )r"   xs     r%   r&   z._RemoteSingleAgentEnv.step.<locals>.<listcomp>  s    0W0W0W!/11E0W0W0Wr'   rB   )r   ro   r
   )r$   actionresultsrY   re   rf   rg   rh   s           r%   ro   z_RemoteSingleAgentEnv.step  sg    (-- 7880W0Ww0W0W0W-S*i *? ;
9(9	)CY44r'   c                     | j         j        S rm   r   r   s    r%   r9   z'_RemoteSingleAgentEnv.observation_space  r   r'   c                     | j         j        S rm   r   r   s    r%   r:   z"_RemoteSingleAgentEnv.action_space  r   r'   )r   r   r   r   r=   r   r   r   r)   ro   r9   r:   r    r'   r%   r   r     s        <<   .2T 	5 	5 	5Xc] 	5HTN 	5 	5 	5 	55 5 5* * *% % % % %r'   r   )#loggingtypingr   r   r   r   r   r   r	   	gymnasiumr   r4   ray.rllib.env.base_envr
   r   r   ray.rllib.utils.annotationsr   r   ray.rllib.utils.typingr   r   r   r   ray.utilr   #ray.rllib.evaluation.rollout_workerr   	getLoggerr   rQ   r   r*   r   r   r    r'   r%   <module>r      s    L L L L L L L L L L L L L L L L L L     



 O O O O O O O O O O = = = = = = = = H H H H H H H H H H H H       BAAAAAA		8	$	$ t% t% t% t% t%G t% t% t%n Q       D Q!% !% !% !% !% !% !% !% !% !%r'   