
    &`i                     <   d dl Z d dlZd dlZd dlmZ d dlmZmZmZm	Z	m
Z
 d dlZd dlmZ d dlZd dlmZ d dlmZ d dlmZ d dlmZ d d	lmZmZmZmZmZ d d
lmZ d dl m!Z!m"Z" d dl#m$Z$m%Z% d dl&m'Z' d dl(m)Z)m*Z* d dl+m,Z, d dl-m.Z. d dl/m0Z0 d dl1m2Z2 d dl3m4Z4m5Z5 d dl6m7Z7 d dl8m9Z9m:Z:m;Z;m<Z<m=Z=m>Z>m?Z?m@Z@mAZAmBZBmCZCmDZDmEZEmFZFmGZGmHZHmIZImJZJmKZKmLZLmMZM d dlNmOZO d dlPmQZQmRZRmSZS d dlTmUZUmVZV d dlWmXZX  e jY        d          ZZ eXd           G d de*e2                      Z[dS )    N)defaultdict)
CollectionDefaultDictListOptionalUnion)DictInfoToList)
Deprecated)AlgorithmConfig)RLlibCallback)make_callback)!COMPONENT_ENV_TO_MODULE_CONNECTOR!COMPONENT_MODULE_TO_ENV_CONNECTORCOMPONENT_RL_MODULEDEFAULT_AGENT_IDDEFAULT_MODULE_ID)Columns)RLModuleRLModuleSpec)INPUT_ENV_SINGLE_SPACESINPUT_ENV_SPACES)
EnvContext)ENV_STEP_FAILURE	EnvRunner)SingleAgentEpisode)
force_list)override)Checkpointable)ERR_MSG_INVALID_ENV_DESCRIPTOREnvError)
get_device)ENV_TO_MODULE_CONNECTOREPISODE_DURATION_SEC_MEANEPISODE_LEN_MAXEPISODE_LEN_MEANEPISODE_LEN_MINEPISODE_RETURN_MAXEPISODE_RETURN_MEANEPISODE_RETURN_MINMODULE_TO_ENV_CONNECTORNUM_AGENT_STEPS_SAMPLED NUM_AGENT_STEPS_SAMPLED_LIFETIMENUM_ENV_STEPS_SAMPLEDNUM_ENV_STEPS_SAMPLED_LIFETIMENUM_EPISODESNUM_EPISODES_LIFETIMENUM_MODULE_STEPS_SAMPLED!NUM_MODULE_STEPS_SAMPLED_LIFETIMERLMODULE_INFERENCE_TIMERSAMPLE_TIMERTIME_BETWEEN_SAMPLINGWEIGHTS_SEQ_NO)unbatch)	EpisodeID
ResultDict	StateDict)ENV_CREATOR_global_registry)	PublicAPIz	ray.rllibalpha)	stabilityc                       e Zd ZdZ ee          def fd            Z ee          dddddddeded	e	d
e	de	de
e         fd            Zddddddee         dee         d	e	d
e	de	de
e         fdZ ee          d             Z ee          defd            Z ee          	 d-dddeeeee         f                  deeeee         f                  defd            Z ee          deddfd            Z ee          d             Z ee          d             Z ee          d             Z ee          d             Z ee          d.d            Z ee          d             Z ee          d             Zd Z d-d Z!d!ed"ed#e
e         fd$Z"d% Z#d& Z$ e%d'd()          d*             Z& e%d+d()          d,             Z' xZ(S )/SingleAgentEnvRunnerz9The generic environment runner for the single agent case.configc                    t                      j        dd|i| |                    d          | _        |                    di           | _        d t          | j        j                  D             | _        t          | j        | j
        sdn| j        j                  | _        d| _        d| _        | j
        '| j
        dk    s| j        j        s| j        j        dk    r|                                  | j                            | j        | j        | j                  | _        d| _        d| _        |                                  | j                            | j        | j                  | _        d	| _        d
 t5          | j                  D             | _        d| _        g | _        t=          t>                    | _         d| _!        d| _"        | j        j#        | _#        dS )zInitializes a SingleAgentEnvRunner instance.

        Args:
            config: An `AlgorithmConfig` object containing all settings needed to
                build this `EnvRunner` class.
        rB   tune_trial_idspacesc                 "    g | ]} |            S  rG   ).0clss     y/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/rllib/env/single_agent_env_runner.py
<listcomp>z1SingleAgentEnvRunner.__init__.<locals>.<listcomp>T   s+     0
 0
 0
CCEE0
 0
 0
    r   N)envrE   device)rM   rE   Tc                     g | ]}d S NrG   rH   _s     rJ   rK   z1SingleAgentEnvRunner.__init__.<locals>.<listcomp>   s%     >
 >
 >
D>
 >
 >
rL   rG   )$super__init__getrD   rE   r   rB   callbacks_class
_callbacksr!   worker_indexnum_gpus_per_env_runner_devicerM   num_envscreate_env_on_local_workernum_env_runnersmake_envbuild_env_to_module_connector_env_to_module_cached_to_modulemodulemake_modulebuild_module_to_env_connector_module_to_env_needs_initial_resetrange	_episodes_shared_data_done_episodes_for_metricsr   list_ongoing_episodes_for_metrics_weights_seq_no_time_after_samplingepisodes_to_numpy)selfrB   kwargs	__class__s      rJ   rT   zSingleAgentEnvRunner.__init__F   s    	111&111"(**_"="=jj2..0
 0
'(CDD0
 0
 0

 "K&OAADK,O
 
 48% 1$${5 %{*a//MMOOO #kGGT\ H 
 
 "& +/ #kGG H 
 

 +/!>
 >
..>
 >
 >
 !DF'  	* %& %)!
 "&!>rL   NFnum_timestepsnum_episodesexplorerandom_actionsforce_resetrt   ru   rv   rw   rx   returnc                z   | j         t          |  d          ||J | j        :| j                            t
          t          j                    | j        z
             | j                            t          | j	        d           | j        
                    t                    5  || j        j        }|9|7| j        j        dk    r'| j                            | j                  | j        z  }|"|dk    sJ |                     ||||          }n@|!|dk    sJ |                     |||	          }n|                     | j        ||	          }t'          d
| j        | j        j        t-          | | j        |                     ddd           n# 1 swxY w Y   t          j                    | _        |S )a  Runs and returns a sample (n timesteps or m episodes) on the env(s).

        If neither `num_timesteps` nor `num_episodes` are provided and the config
        `batch_mode` is "truncate_episodes" then
        `config.get_rollout_fragment_length(self.worker_index) * self.num_envs`
        timesteps will be sampled.

        Args:
            num_timesteps: The number of timesteps to sample during this call.
                The episodes returned will contain the total timesteps greater than or
                equal to num_timesteps and less than num_timesteps + num_envs_per_env_runner.
                Note that only one of `num_timesteps` or `num_episodes` may be provided.
            num_episodes: The number of episodes to sample during this call.
                Note that only one of `num_timesteps` or `num_episodes` may be provided.
            explore: If True, will use the RLModule's `forward_exploration()`
                method to compute actions. If False, will use the RLModule's
                `forward_inference()` method. If None (default), will use the `explore`
                boolean setting from `self.config` passed into this EnvRunner's
                constructor. You can change this setting in your config via
                `config.env_runners(explore=True|False)`.
            random_actions: If True, actions will be sampled randomly (from the action
                space of the environment). If False (default), actions or action
                distribution parameters are computed by the RLModule.
            force_reset: Whether to force-reset all vectorized environments before
                sampling. Useful if you would like to collect a clean slate of new
                episodes via this call. Note that when sampling n episodes
                (`num_episodes != None`), this is fixed to True.

        Returns:
            A list of `SingleAgentEpisode` instances, carrying the sampled data.
        Nz2 doesn't have an env! Can't call `sample()` on it.)keyvalue   )r{   r|   windowtruncate_episodesr   )rt   rv   rw   rx   )ru   rv   rw   on_sample_end)
env_runnermetrics_loggersamplescallbacks_objectscallbacks_functionsrq   )rM   
ValueErrorrn   metrics	log_valuer5   timeperf_counterr6   rm   log_timer4   rB   rv   
batch_modeget_rollout_fragment_lengthrX   r[   _sampler   rW   callbacks_on_sample_enddict)rp   rt   ru   rv   rw   rx   r   s          rJ   samplezSingleAgentEnvRunner.sample   sp   R 8KKK   "-,2J2JK $0L"")'))D,EE #    	& 	 	
 	
 	
 \""<00 4	 4	 +- % (K*.AAA K;;D<MNNm$  ($)))),,"/##1 +	 '   )#q((((,,!-##1 '   ,,!%##1 '   "&/$(K$G##'<#  		 	 	 	W4	 4	 4	 4	 4	 4	 4	 4	 4	 4	 4	 4	 4	 4	 4	l %)$5$7$7!s   'C%FFF)rt   ru   rw   rx   c          
          g }|s	| j         rMd t           j                  D             x} _        i x} _                             |||           d _         n j        } j        }|d _         d}	d}
||	|k     rn|
|k     r|r,t          j         j        j	        
                                i}n j        }|J d _        |r| j         j        j        pdz  |	z    j        j        pdz  } j                            t"                    5   j                            ||          }ddd           n# 1 swxY w Y   nQ j                            t"                    5   j                            |          }ddd           n# 1 swxY w Y                         j        |||| j        t,          f          }|                    t          j                  }|                    t          j        |          }                     |          }|t4          k    r                     ||||d	          S |\  }}}}}t9          |          t9          |          }}t;                      }t           j                  D ]͊fd
|                                D             } j        |t@          <    j                 j!        s?|         "                    |         |                    |#                               |	dz  }	|         $                    |         |         |         |         |         |         |           Ή j        1 %                    i || j        | j        tL          f           _        t           j                  D ]̊|v r '                    d|           n '                    d|           |         j(        r|
dz  }
 '                    d|            j)        r.|*                    |         +                                           n|*                    |                    |
|k    r n ,                    |           ||	|k     n|
|k      j-        .                    |           g }| fd j        D             } j        D ]}
|
j/        dk    r|
0                                  j1        |
j2                 *                    |
            j)        r(|*                    |
+                                           v|*                    |
           | _         3                     j4        |	            5                    |	tm          |                     ||z   S )z2Helper method to sample n timesteps or m episodes.Nc                     g | ]}d S rP   rG   rQ   s     rJ   rK   z0SingleAgentEnvRunner._sample.<locals>.<listcomp>  s    (L(L(L!(L(L(LrL   FTr   r}   )t)	rl_modulebatchepisodesrv   shared_datar   metrics_prefix_keyrs   c                 (    i | ]\  }}||         S rG   rG   )rH   kv	env_indexs      rJ   
<dictcomp>z0SingleAgentEnvRunner._sample.<locals>.<dictcomp>n  s#    %Q%Q%Q$!Qa9%Q%Q%QrL   observationinfos)r   actionrewardr   
terminated	truncatedextra_model_outputs)r   r   rv   r   r   r   r   on_episode_starton_episode_stepon_episode_endc                 P    g | ]"}|                     j        j                   #S ))len_lookback_buffer)cutrB   episode_lookback_horizon)rH   epsrp   s     rJ   rK   z0SingleAgentEnvRunner._sample.<locals>.<listcomp>  s<     . . . DK,PQQ. . .rL   )metric	num_steps)7rf   rg   r[   rh   ri   _reset_envsr   ACTIONSrM   action_spacer   ra   num_env_steps_sampled_lifetimerB   r]   r   r   r3   rb   forward_explorationforward_inferencere   r*   popACTIONS_FOR_ENV_try_env_stepr   r   r7   setitemsrm   r6   is_resetadd_env_resetaddadd_env_stepr`   r"   _make_on_episode_callbackis_donero   appendto_numpy_new_episoderj   extendr   validaterl   id__log_env_steps_metrics_num_env_steps_sampled_increase_sampled_metricslen)rp   rt   ru   rv   rw   rx   done_episodes_to_returnr   r   tsr   to_env	to_moduleglobal_env_steps_lifetimeactionsactions_for_envresultsobservationsrewardsterminateds
truncatedsr   call_on_episode_startextra_model_outputongoing_episodes_to_returnongoing_episodes_continuationsr   s   `                         @rJ   r   zSingleAgentEnvRunner._sample  s'    =?  		,,2d6O2(L(LuT]7K7K(L(L(LLHt~.00K$+X{G<<< ).D%%~H+K#(,D% $1$=R-C,DVDV  %OTX%:%A%A%C%C !2	 ,,,)-&  J ; K7<1> 49	1;-
 ../GHH  !%!@!@%)B "A " "              
 ../GHH J J!%!>!>y!I!IJ J J J J J J J J J J J J J J ,,"k %# + L(?'A -   jj11G$jj)@'JJO((99G***||"/!-##1 $ $    ELAL';
E$+L$9$977;K;K'L$'EE!"4=11  	%Q%Q%Q%Q&,,..%Q%Q%Q"595I">2 ~i09 Y'55$0$;#I. 6    *--i8888 !GBY'44$0$;&y1&y1#I.#.y#9",Y"7,> 5     {&)-)<)<%#"k + L(?'A *= * *& #4=11 %; %;	 55522*Ix   
 22)9h  
 I&. ;1HC 22()X  
 - L/66x	7J7S7S7U7UVVVV 066x	7JKKK l** %%i:::[ %2$=R-C,DVDV` 	'../FGGG
 &(" $. . . .>. . .*
 ~ ; ; 5A::237;BB3GGG ) ;.55cllnnEEEE /55c:::: <DN 	4#FRTUUU&&r3/F+G+GHHH ')CCCs$   D;;D?D?&FFFc                     | j         | j        S t          | j         j        | j         j        ft
          | j         j        | j         j        ft          | j	        j        | j         j        fiS rP   )
rM   rE   r   observation_spacer   r   single_observation_spacesingle_action_spacer   r`   rp   s    rJ   
get_spaceszSingleAgentEnvRunner.get_spaces  sc    8;tx948;PQ#1,& #5, 

 
	
rL   c                    | j         D ]}|j        sJ t          |          }|                                }|                                }|j        | j        v rb| j        |j                 D ]B}|t          |          z  }||                                z  }||                                z  }C| j        |j        = |                     |||           | j                                          | j	        
                                S rP   )rj   r   r   
get_returnget_duration_sr   rl   _log_episode_metricsclearr   reduce)rp   r   episode_lengthepisode_returnepisode_duration_seps2s         rJ   get_metricsz SingleAgentEnvRunner.get_metrics  s    2 	 	C;; XXN ^^--N!$!3!3!5!5w$<<< >swG @ @D"c$ii/N"doo&7&77N&$*=*=*?*??&&6sw?%%0B   
 	'--/// |""$$$rL   )not_components
componentsr   c                
   t           | j        i}|                     t          ||          r^ | j        j        d|                     t          |          |                     t          |          d||t          <   | j        |t          <   |                     t          ||          r!| j
                                        |t          <   |                     t          ||          r!| j                                        |t          <   |S )N)r   r   rG   )r.   r   _check_componentr   rb   	get_state_get_subcomponentsrm   r6   r   r`   r   re   )rp   r   r   rq   states        rJ   r   zSingleAgentEnvRunner.get_state  s    01TU  !4j.QQ 	9)>)> *223F
SS#66'   * *
 * *E%& %)$8E.!  -z>
 
 	W 8<7J7T7T7V7VE34  -z>
 
 	W 8<7J7T7T7V7VE34rL   r   c                 |   t           |v r%| j                            |t                               t          |v r%| j                            |t                              t
          |v r|                    t          d          }|dk    s| j        |k     r|t
                   }t          |t          j                  rt          j        |          }t          |t                    rt          |v r|t                   }| j                            |           |dk    r|| _        t          |v r|t                   | _        d S d S )Nr   )r   r`   	set_stater   re   r   rU   r6   rm   
isinstanceray	ObjectRefr   r   rb   r.   r   )rp   r   weights_seq_norl_module_states       rJ   r   zSingleAgentEnvRunner.set_state/  s9   ,55))%0Q*RSSS,55))%0Q*RSSS %'' #YY~q99N ""d&:^&K&K"'(;"<os}== ?&)go&>&>O55I)_<<&56G&HO%%o666 !!'5$ *U22278V2WD/// 32rL   c                     dd| j         ifS )NrG   rB   )rB   r   s    rJ   get_ctor_args_and_kwargsz-SingleAgentEnvRunner.get_ctor_args_and_kwargsQ  s     t{#
 	
rL   c                 X    t          j        |           }|                    i            |S rP   )r   get_metadataupdate)rp   metadatas     rJ   r   z!SingleAgentEnvRunner.get_metadataX  s4    !.t44	
 	
 	

 rL   c                 T    t           | j        ft          | j        ft          | j        fgS rP   )r   rb   r   r`   r   re   r   s    rJ   get_checkpointable_componentsz2SingleAgentEnvRunner.get_checkpointable_componentsb  s.     !$+..0CD.0CD
 	
rL   c                 8    | j         rt          | d          sJ dS )a  Checks that self.__init__() has been completed properly.

        Ensures that the instance has a `MultiRLModule` and an
        environment defined.

        Raises:
            AssertionError: If the EnvRunner Actor has NOT been properly initialized.
        rb   N)rM   hasattrr   s    rJ   assert_healthyz#SingleAgentEnvRunner.assert_healthyj  s'     x3GD(3333333rL   c                 V     j         Z	  j                                          n?# t          $ r2}t                              d|j        d                     Y d}~nd}~ww xY w j        j        }t          |t                    s(t          | j
         j         j        j                  n| j        j         st          d          t           j        j         t                    rlt          j        t"           j        j                   rHd}t          j        t"           j        j                   t'          j        |fdfd	           i }nIt+           j        j                   r$d}t'          j        | fd
 fd	           i }n j        j         }t'          j         j        j                  }	 t1          t'          j        |f j        j        |d|           _         nH# t&          j        j        $ r1}t;          t=          j         j        j                             |d}~ww xY w j         j          _          j          j        j        k    sJ d _!        tE          d j#         j        j$        tK            j&         j         j'                             dS )a  Creates a vectorized gymnasium env and stores it in `self.env`.

        Note that users can change the EnvRunner's config (e.g. change
        `self.config.env_config`) and then call this method to create new environments
        with the updated configuration.
        Nz7Tried closing the existing env, but failed with error: r   )rX   num_workersremotez`config.env` is not provided! You should provide a valid environment to your config through `config.environment([env descriptor e.g. 'CartPole-v1'])`.zrllib-single-agent-env-v0c                                  S rP   rG   )entry_pointenv_ctxs   rJ   <lambda>z/SingleAgentEnvRunner.make_env.<locals>.<lambda>  s    KK$8$8 rL   c                 $     d| iz            S Nr[   rG   )r[   r  r  s    rJ   r  z/SingleAgentEnvRunner.make_env.<locals>.<lambda>  s     KKz8445 5 rL   )r  vector_entry_pointc                  8    j                                        S rP   rB   rM   )r  rp   s   rJ   r  z/SingleAgentEnvRunner.make_env.<locals>.<lambda>  s    DKOOG$<$< rL   c                 B    j                             d| iz            S r  r  )r[   r  rp   s    rJ   r  z/SingleAgentEnvRunner.make_env.<locals>.<lambda>  s$    DKOOz8445 5 rL   )r[   vectorization_modeTon_environment_created)r   r   rM   env_contextr   )(rM   close	ExceptionloggerwarningargsrB   
env_configr   r   rX   r  remote_worker_envsr   strr<   containsr;   rU   gymregistercallableVectorizeModegym_env_vectorize_moder	   make_vecnum_envs_per_env_runnererrorErrorr    r   formatr[   rf   r   rW    callbacks_on_environment_createdr   r   	unwrapped)rp   er  env_namevectorize_moder  r  s   `    @@rJ   r^   zSingleAgentEnvRunner.make_envw  sH    8       #vay# #        [+
*j11 	! !. ,{5	  GG !G {  	'M   -- 	'2B2K3
 3
 	' 3H*.{DKOLLKL88888$ $ $ $ $    JJdko&& 	'2HL<<<<<$ $ $ $ $    JJ{H*4;+MNN	%![@'5  !	  DHH y 	 	 	.5dkoFF 	
 "X.} CCCCC %)! 	$"o $ L#|H&#	  	
	
 
	
 
	
 
	
 
	
 
	
s,   & 
A"(AA">1G0 0H5,H00H5c                 0   | j         | j         j        nd }	 | j                            ||                                 d          }|                                | _        | j                            | j                   d S # t          $ r d | _        Y d S w xY w)NT)rM   rE   inference_only)
rM   r+  rB   get_rl_module_specr   buildrb   torZ   NotImplementedError)rp   rM   module_specs      rJ   rc   z SingleAgentEnvRunner.make_module  s    $(H$8dh  d	(,(F(F 1 1$ )G ) )K &++--DK KNN4<((((( # 	 	 	DKKKK	s   A'B   BBc                 J    | j         | j                                          d S d S rP   )rM   r  r   s    rJ   stopzSingleAgentEnvRunner.stop  s,     8HNN  rL   c                 X   t          | j                  D ]}|                     ||           | j                                         |                     | j        r| j        nd d           \  }}t          |          }t          | j                  D ]+}||         	                    ||         ||                    ,d | _
        | j        r0|                     | j        |||| j        t          f          | _
        t          | j                  D ]}|                     d||           d S )N)seedoptionsr   )r   r   rv   r   r   r   r   )rg   r[   r   rl   r   _try_env_resetrf   _seedr7   r   ra   rb   r`   r   r"   r   )rp   r   r   rv   r   r   r   s          rJ   r   z SingleAgentEnvRunner._reset_envs  sp   t}-- 	3 	3Ii2222
 	*00222 #11#8Bd	 2 
 
e |,, t}-- 	 	IY--(3I& .     "&; 	%)%8%8+!'$;#= &9 & &D" t}-- 	T 	TI**+=y(SSSS	T 	TrL   c                     ||n| j         }t          | j        j        | j        j                  ||<   |                     d||           d S )N)r   r   on_episode_created)rh   r   rM   r   r   r   )rp   r   r   s      rJ   r   z!SingleAgentEnvRunner._new_episode   s\    '3880"h?5
 
 
 	&&';YQQQQQrL   whichidxr   c           	         t          ||         | | j        | j        j        | j        |          }|dk    r| j        ||         j                 |d<   t          || j        t          | j
        d|           |           d S )N)episoder   r   rM   r   r   r   prev_episode_chunks
callbacks_r   )r   r   rM   r+  rb   rl   r   r   rW   getattrrB   )rp   r?  r@  r   rq   s        rJ   r   z.SingleAgentEnvRunner._make_on_episode_callback(  s     SM<"k
 
 
 $$$,0,N!-F() 	"o '5I%5I5I J J		
 	
 	
 	
 	
 	
rL   c                 `   | j                             t          |d           | j                             t          t          f|d           | j                             t
          t          f|d           | j                             t          |d           | j                             t          |dd           | j                             t          t          f|d           | j                             t          t          f|d           | j                             t          |d           |S )Nsum)r   lifetime_sumT)r   with_throughput)r   r   r-   r+   r   r1   r   r/   r.   r,   r2   r0   )rp   r   num_episodes_completeds      rJ   r   z.SingleAgentEnvRunner._increase_sampled_metrics?  sg   4iNNN$&67 	 	
 	
 	

 	%'89 	 	
 	
 	

 	" 	 	
 	
 	
 	*! 	 	 	
 	
 	
 	-/?@! 	 	
 	
 	

 	.0AB! 	 	
 	
 	

 	!"! 	 	
 	
 	

 rL   c           	      
   t          dt          t          j        | j        j        | j        j        pdz                                }| j                            t          ||           | j                            t          ||           | j                            t          ||           | j                            dt          f||           | j                            dt          f||           | j                            t          |d|           | j                            t          |d|           | j                            t           |d|           | j                            t"          |d|           d S )Nr}   )r~   agent_episode_return_meanmodule_episode_return_meanmin)r   r~   max)rO  intmathceilrB   "metrics_num_episodes_for_smoothingr]   r   r   r%   r(   r#   r   r   r&   r)   r$   r'   )rp   lengthretsecwins        rJ   r   z)SingleAgentEnvRunner._log_episode_metricsi  s   
 	KB{27a9  
 
 	/DDD2CDDD8#cJJJ(*:;S 	 	
 	
 	
 	)+<=s3 	 	
 	
 	

 	uSQQQ13uSQQQuSQQQ13uSQQQQQrL   z6SingleAgentEnvRunner.get_state(components='rl_module')T)newr'  c                     d S rP   rG   rp   r  rq   s      rJ   get_weightsz SingleAgentEnvRunner.get_weights  s	    
 	rL   z SingleAgentEnvRunner.set_state()c                     d S rP   rG   rZ  s      rJ   set_weightsz SingleAgentEnvRunner.set_weights  s    rL   rP   )ry   N))__name__
__module____qualname____doc__r   r   r   rT   rP  boolr   r   r   r   r   r   r9   r   r   r   r  r   r:   r   r   r   r   r  r  r^   rc   r7  r   r   r   r   r   r
   r[  r]  __classcell__)rr   s   @rJ   rA   rA   B   s       CCXiM?/ M? M? M? M? M? M?^ Xi " $!u u u u 	u
 u u u 
 	!u u u ut (,&*$!WD WD WD  }WD sm	WD
 WD WD WD 
 	!WD WD WD WDr Xi
 
 
 Xi%Z % % % %2 Xn =A AE	  U3
3#789 !sJsO';!<=	 
   : XnXy XT X X X XB Xn
 
 
 Xn   Xn
 
 
 Xi
4 
4 
4 Xi`
 `
 `
 `
D Xi  ( Xi  
)T )T )TVR R R R

"
.23E.F
 
 
 
.( ( (TR R R@ ZD   	  Z6dCCC  DC    rL   rA   )\loggingrQ  r   collectionsr   typingr   r   r   r   r   	gymnasiumr   gymnasium.wrappers.vectorr	   r   ray._common.deprecationr
   %ray.rllib.algorithms.algorithm_configr   ray.rllib.callbacks.callbacksr   ray.rllib.callbacks.utilsr   ray.rllib.corer   r   r   r   r   ray.rllib.core.columnsr   "ray.rllib.core.rl_module.rl_moduler   r   ray.rllib.envr   r   ray.rllib.env.env_contextr   ray.rllib.env.env_runnerr   r   "ray.rllib.env.single_agent_episoder   ray.rllib.utilsr   ray.rllib.utils.annotationsr   ray.rllib.utils.checkpointsr   ray.rllib.utils.errorr   r    ray.rllib.utils.frameworkr!   ray.rllib.utils.metricsr"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   "ray.rllib.utils.spaces.space_utilsr7   ray.rllib.utils.typingr8   r9   r:   ray.tune.registryr;   r<   ray.util.annotationsr=   	getLoggerr  rA   rG   rL   rJ   <module>r     s      # # # # # # A A A A A A A A A A A A A A     4 4 4 4 4 4 



 . . . . . . A A A A A A 7 7 7 7 7 7 3 3 3 3 3 3              + * * * * * E E E E E E E E C C C C C C C C 0 0 0 0 0 0 @ @ @ @ @ @ @ @ A A A A A A & & & & & & 0 0 0 0 0 0 6 6 6 6 6 6 J J J J J J J J 0 0 0 0 0 0                                             . 7 6 6 6 6 6 C C C C C C C C C C ; ; ; ; ; ; ; ; * * * * * *		;	'	'
 WO O O O O9n O O O O OrL   