
    &`ij                     n   d dl Z d dlZd dlZd dlmZ d dlmZ d dlmZm	Z	m
Z
mZmZmZ d dlZd dlZd dlmZ d dlmZ d dlmZ d dlmZmZmZ d d	lmZ d d
lmZmZ d dl m!Z!m"Z" d dl#m$Z$ d dl%m&Z&m'Z' d dl(m)Z) d dl*m+Z+ d dl,m-Z- d dl.m/Z/ d dl0m1Z1 d dl2m3Z3 d dl4m5Z5 d dl6m7Z7 d dl8m9Z9m:Z: d dl;m<Z<m=Z=m>Z>m?Z?m@Z@mAZAmBZBmCZCmDZDmEZEmFZFmGZGmHZHmIZImJZJmKZKmLZLmMZMmNZNmOZOmPZP d dlQmRZR d dlSmTZTmUZUmVZVmWZW d dlXmYZYmZZZ d dl[m\Z\  e:            \  Z]Z^ e j_        d          Z` e\d           G d d e'e7                      ZadS )!    N)defaultdict)partial)
CollectionDefaultDictDictListOptionalUnion)
Deprecated)AlgorithmConfig)make_callback)!COMPONENT_ENV_TO_MODULE_CONNECTOR!COMPONENT_MODULE_TO_ENV_CONNECTORCOMPONENT_RL_MODULE)Columns)MultiRLModuleMultiRLModuleSpec)INPUT_ENV_SINGLE_SPACESINPUT_ENV_SPACES)
EnvContext)ENV_STEP_FAILURE	EnvRunner)MultiAgentEnv)MultiAgentEpisode)_gym_env_creator)make_vec)VectorMultiAgentEnv)
force_list)override)Checkpointable)
get_devicetry_import_torch)ENV_TO_MODULE_CONNECTOREPISODE_DURATION_SEC_MEANEPISODE_LEN_MAXEPISODE_LEN_MEANEPISODE_LEN_MINEPISODE_RETURN_MAXEPISODE_RETURN_MEANEPISODE_RETURN_MINMODULE_TO_ENV_CONNECTORNUM_AGENT_STEPS_SAMPLED NUM_AGENT_STEPS_SAMPLED_LIFETIMENUM_ENV_STEPS_SAMPLEDNUM_ENV_STEPS_SAMPLED_LIFETIMENUM_EPISODESNUM_EPISODES_LIFETIMENUM_MODULE_STEPS_SAMPLED!NUM_MODULE_STEPS_SAMPLED_LIFETIMERLMODULE_INFERENCE_TIMERSAMPLE_TIMERTIME_BETWEEN_SAMPLINGWEIGHTS_SEQ_NO)check_multiagent_environments)	EpisodeIDModelWeights
ResultDict	StateDict)ENV_CREATOR_global_registry)	PublicAPIz	ray.rllibalpha)	stabilityc                       e Zd ZdZ ee          def fd            Z ee          dddddddeded	e	d
e	de	de
e         fd            Zddddddee         dee         d	e	d
e	de	de
e         fdZd Z ee          d             Z ee          defd            Z ee          	 d4dddeeeee         f                  deeeee         f                  defd            Z ee          deddfd            Z ee          d             Z ee          d             Z ee          d             Z ee          d             Z ee          d             Z ee          d             Z ee          d             Z d  Z!d5d"Z"d#ed$ed%e
e         fd&Z#d' Z$	 	 	 d6d(Z%e&d)efd*            Z' e(d+d,          d4d-            Z) e(d.d,          	 	 d7d0e*d1ee+         d2eddfd3            Z, xZ-S )8MultiAgentEnvRunnerz8The genetic environment runner for the multi-agent case.configc                     t                      j        dd|i| | j        j        s%t	          dt          |           j         d          |                    di           | _        | 	                                 d t          | j        j                  D             | _        t          | j        | j        sdn| j        j                  | _        d| _        d| _        | j        '| j        dk    s| j        j        s| j        j        dk    r|                                  | j                            | j        | j        | j                  | _        d| _        d| _        |                                  | j                            | j        r| j        j        nd| j        	          | _        d
| _        d| _        d| _         d| _!        d| _"        dS )zInitializes a MultiAgentEnvRunner instance.

        Args:
            config: An `AlgorithmConfig` object containing all settings needed to
                build this `EnvRunner` class.
        rD   z!Cannot use this EnvRunner class (z), if your setup is not multi-agent! Try adding multi-agent information to your AlgorithmConfig via calling the `config.multi_agent(policies=..., policy_mapping_fn=...)`.spacesc                 "    g | ]} |            S  rH   ).0clss     x/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/rllib/env/multi_agent_env_runner.py
<listcomp>z0MultiAgentEnvRunner.__init__.<locals>.<listcomp>_   s    TTTS3355TTT    r   N)envrF   device)rN   rF   TrH   )#super__init__rD   is_multi_agent
ValueErrortype__name__getrF   _setup_metricsr   callbacks_class
_callbacksr!   worker_indexnum_gpus_per_env_runner_devicerN   num_envscreate_env_on_local_workernum_env_runnersmake_envbuild_env_to_module_connector_env_to_module_cached_to_modulemodulemake_modulebuild_module_to_env_connector	unwrapped_module_to_env_needs_initial_reset_episode_shared_data_weights_seq_no_time_after_sampling)selfrD   kwargs	__class__s      rK   rQ   zMultiAgentEnvRunner.__init__G   s    	111&111 {) 	+DJJ4G + + +   jj2.. UTJt{7R,S,STTT "K&OAADK,O
 
 37% 1$${5 %{*a//MMOOO #kGGT\ H 
 
 "& 04 #kGG&*h8""D H 
 
 +/!59 $% %)!!!rM   NFnum_timestepsnum_episodesexplorerandom_actionsforce_resetrr   rs   rt   ru   rv   returnc                v   | j         t          |  d          ||J d|d|            | j        :| j                            t
          t          j                    | j        z
             | j                            t          | j	        d           | j        
                    t                    5  || j        j        }|9|7| j        j        dk    r'| j                            | j                  | j        z  }||                     ||||	          }n8||                     |||
          }n|                     | j        ||
          }t'          d| j        | j        j        t-          | | j        |                     ddd           n# 1 swxY w Y   t          j                    | _        |S )a  Runs and returns a sample (n timesteps or m episodes) on the env(s).

        Args:
            num_timesteps: The number of timesteps to sample during this call.
                Note that only one of `num_timesteps` or `num_episodes` may be provided.
            num_episodes: The number of episodes to sample during this call.
                Note that only one of `num_timesteps` or `num_episodes` may be provided.
            explore: If True, will use the RLModule's `forward_exploration()`
                method to compute actions. If False, will use the RLModule's
                `forward_inference()` method. If None (default), will use the `explore`
                boolean setting from `self.config` passed into this EnvRunner's
                constructor. You can change this setting in your config via
                `config.env_runners(explore=True|False)`.
            random_actions: If True, actions will be sampled randomly (from the action
                space of the environment). If False (default), actions or action
                distribution parameters are computed by the RLModule.
            force_reset: Whether to force-reset all (vector) environments before
                sampling. Useful if you would like to collect a clean slate of new
                episodes via this call. Note that when sampling n episodes
                (`num_episodes != None`), this is fixed to True.

        Returns:
            A list of `MultiAgentEpisode` instances, carrying the sampled data.
        Nz2 doesn't have an env! Can't call `sample()` on it.zSProvide either `num_timesteps` or `num_episodes`. Both provided here:num_timesteps=z, num_episodes=)keyvalue   )ry   rz   windowtruncate_episodes)rr   rt   ru   rv   )rs   rt   ru   on_sample_end)
env_runnermetrics_loggersamplescallbacks_objectscallbacks_functionsro   )rN   rS   rm   metrics	log_valuer6   timeperf_counterr7   rl   log_timer5   rD   rt   
batch_modeget_rollout_fragment_lengthrZ   r]   _sampler   rY   callbacks_on_sample_enddict)rn   rr   rs   rt   ru   rv   r   s          rK   samplezMultiAgentEnvRunner.sample   st   D 8KKK   "-,2J2J11 1!-1 1 3K2JK $0L"")'))D,EE #    	& 	 	
 	
 	
 \""<00 0	 0	 +-% (K*.AAA K;;D<MNNm$  (,,"/##1 +	 '   ),,!-##1 '   ,,!%##1 '   "&/$(K$G##'<#  		 	 	 	O0	 0	 0	 0	 0	 0	 0	 0	 0	 0	 0	 0	 0	 0	 0	d %)$5$7$7!s   5CFFF)rr   rs   ru   rv   c          
         # g }|s	| j         rMd t           j                  D             x# _        i x} _                             #||           d _         n j        # j        }|d _         d}d}	d}
||
|k     rTn j        j        dk    r||k     r<n|	|k     r4|r/t          j	        # fdt           j                  D             i}n j
        }|J d  _
        |r|r| j         j        j        pdz  |z    j        j        pdz  } j                            t                    5   j                            ||          }d d d            n# 1 swxY w Y   nQ j                            t                    5   j                            |          }d d d            n# 1 swxY w Y                         j        |#|| j        t(          f	          }ni }i |d
<   |                    t          j	        d #D                       }|                    t          j        |          }                     |          }|t0          k    r                     ||||d          S |\  }}}}}t5                      }t           j                  D ]s}t7          t8                    }|                                D ]G\  }}||         }|                                D ]%\  }}|||         |<    j        ||         t>          <   &Ht9          |          } j        |         j         s?#|         !                    ||         ||                    |"                    |           Ӊ#|         #                    ||         ||         ||         ||         ||         ||         |            $                     j%        d           | &                    d||         #|                   z  }|	tO          ||                   z  }	ug }t           j                  D ]	}||v r (                    d|#           n (                    d|#           #|         j)        r|
dz  }
 (                    d|#            *                    #|                    |+                    #|                    |+                    #|                    #|         j,        } -                    |#|
|k               |d
         .                    |#|         j,        i           |
|k    r n j        S|r  /                    i || j        |d             /                    i #| j        | j        t`          f           _
         j        j1        r|D ]} | 2                                 ||
|k     n j        j        dk    r||k     ,n|	|k     4 j3        4                    |           g }!|ǈ fd j        D             }" j        D ]}
|
j5        dk    r|
6                                  j7        |
j,                 +                    |
            *                    |
            j        j1        r(|!+                    |
2                                           |!+                    |
           |" _        ||!z   S )Nc                     g | ]}d S NrH   rI   _s     rK   rL   z/MultiAgentEnvRunner._sample.<locals>.<listcomp>  s    (L(L(L!(L(L(LrM   FTr   	env_stepsc                 `    g | ])fd                                           D             *S )c                     i | ]?}|j         j                 j                            |                                          @S rH   )rN   envsrg   get_action_spacer   )rI   aidirn   s     rK   
<dictcomp>z:MultiAgentEnvRunner._sample.<locals>.<listcomp>.<dictcomp>1  sP        !$  q!1&'7'7'<'<#VXX  rM   )get_agents_to_act)rI   r   episodesrn   s    @rK   rL   z/MultiAgentEnvRunner._sample.<locals>.<listcomp>0  sd     
& 
& 
&      (0{'D'D'F'F  
& 
& 
&rM   r{   )t)	rl_modulebatchr   rt   shared_datar   metrics_prefix_keyvector_env_episodes_mapc                     g | ]}i S rH   rH   r   s     rK   rL   z/MultiAgentEnvRunner._sample.<locals>.<listcomp>j  s    2H2H2H!22H2H2HrM   rq   observationsinfos)r   actionsrewardsr   terminateds
truncatedsextra_model_outputs)metric	num_stepson_episode_starton_episode_stepon_episode_end)call_on_episode_created)r   r   rt   r   r   r   )r   r   rt   r   r   r   r   c                 P    g | ]"}|                     j        j                   #S ))len_lookback_buffer)cutrD   episode_lookback_horizon)rI   epsrn   s     rK   rL   z/MultiAgentEnvRunner._sample.<locals>.<listcomp>  s<     . . . DK,PQQ. . .rM   )8ri   ranger]   	_episodesrk   _reset_envsrD   count_steps_byr   ACTIONSrc   num_env_steps_sampled_lifetimer_   r   r   r4   rd   forward_explorationforward_inferencerh   r+   popACTIONS_FOR_ENV_try_env_stepr   r   setr   r   itemsrl   r7   is_resetadd_env_resetaddadd_env_step_log_env_steps_metrics_num_env_steps_sampled_increase_sampled_metricslen_make_on_episode_callbackis_done_prune_zero_len_sa_episodesappendid__new_episodeupdaterb   r#   episodes_to_numpyto_numpy_done_episodes_for_metricsextendenv_tvalidate_ongoing_episodes_for_metrics)$rn   rr   rs   rt   ru   rv   done_episodes_to_returnr   env_tsagent_tsr   to_env	to_moduleglobal_env_steps_lifetimer   actions_for_envresultsr   r   r   r   r   call_on_episode_start	env_indexr   colma_dict_listma_dictagent_idval"done_episodes_to_run_env_to_moduleold_episode_idepisodeongoing_episodes_to_returnongoing_episodes_continuationsr   s$   `                                  @rK   r   zMultiAgentEnvRunner._sample  s	    <>  	,,2d6O2(L(LuT]7K7K(L(L(LLHt~.00K$+X{G<<< ).D%%~H+K#(,D%  $ < ;-<< &&&--  4 O 
& 
& 
& 
& 
& "'t}!5!5
& 
& 
&  !2	 ,,,)-&   N !? $ ; @qB$% "[8=A	5?1
 "\223KLL  %)[%D%D )-F &E & &F              
 "\223KLL N N%)[%B%B9%M%MFN N N N N N N N N N N N N N N "00"&+$!) '$/ $,C+E 1  FF  F57K12 jj2H2Hx2H2H2HIIG$jj)@'JJO((99G***||"/!-##1 $ $    ELAL';
E$'EE! #4=11 .= .=	&1$&7&7# *0 1 1%C*95G)0 1 1#=@+H5c: !0 ,H5* 1
 '++>&?&?# ~i09 =Y'55%1)%<#I. 6   
 *--i8888 Y'44%1)%< '	 2 '	 2#I.$/	$:#-i#8,? 5    ''#Ba (    d<<<	2HY4G  F L$; < <<HH13."4=11 2 2	 55522*Ix   
 22)9h  
 I&. %1HC 22()X   44Xi5HIII+228I3FGGG 7==hy>QRRR%-i%8%<N
 %%! 141D &      9:AA'))<)@A   l** {&5  '' !C '"&+$/ $ (    *.)<)<%#"k + L(?'A *= * *& {, '6 ' 'G$$&&&&G $ < ;-<< &&&--D 	'../FGGG
 &(" $. . . .>. . .*
 ~ ; ; 9>>237;BB3GGG00555 ;0 ;.55cllnnEEEE /55c:::: <DN ')CCCs$   1EE!EF,,F03F0c                 :   t          | j                  D ]}|                     ||           | j                                         |                     | j        r| j        nd d           \  }}t          | j                  D ]+}||                             ||         ||                    ,d | _	        | j
        r0|                     | j
        |||| j        t          f          | _	        t          | j                  D ]}|                     d||           d S )N)seedoptionsr   )r   r   rt   r   r   metrics_key_prefixr   )r   r]   r   r   clear_try_env_resetri   _seedr   rc   rd   rb   r   r#   r   )rn   r   r   rt   r   r   r   s          rK   r   zMultiAgentEnvRunner._reset_envs2  sa   t}-- 	3 	3Ii2222 	*00222 #11#8Bd	 2 
 
e t}-- 	 	IY--))4I& .     "&; 	%)%8%8+!'$;#= &9 & &D" t}-- 	T 	TI**+=y(SSSS	T 	TrM   c                       j          j        S t           j         j         j         j        ft
           j         j         j         j        fi fd j        j        j        	                                D             S )Nc                 @    i | ]\  }}||j         j        |         fS rH   )rb   action_space)rI   midorn   s      rK   r   z2MultiAgentEnvRunner.get_spaces.<locals>.<dictcomp>f  s?       C a,9#>?  rM   )
rN   rF   r   observation_spacer   r   single_observation_spacesingle_action_spacerb   r   rn   s   `rK   
get_spaceszMultiAgentEnvRunner.get_spacesZ  s    8; tx948;PQ#1,&

   "1CJPPRR  

 
	
rM   c                    | j         D ]}|j        sJ t          |          }t          t          d |j                                        D                       }|                                }|                                }t          t          d |j        
                                D                       }t          t          d |j        
                                D                       }|j        | j        v r| j        |j                 D ]}|                                }	|t          |          z  }||	z  }||                                z  }|j        
                                D ]|}
|
                                }|t          |
j                  xx         t          |
          z  cc<   |t          |
j                  xx         |z  cc<   ||
j        xx         |z  cc<   }| j        |j        = |                     |||||t#          |                     | j                                          | j                                        S )Nc                 N    i | ]"\  }}t          |          t          |          #S rH   )strr   )rI   r   sa_epss      rK   r   z3MultiAgentEnvRunner.get_metrics.<locals>.<dictcomp>t  s*    UUU;3S3v;;UUUrM   c                 \    i | ])}t          |j                  |                                *S rH   )r  r   
get_returnrI   r  s     rK   r   z3MultiAgentEnvRunner.get_metrics.<locals>.<dictcomp>{  s@        ((&*;*;*=*=  rM   c                 B    i | ]}|j         |                                S rH   )	module_idr  r  s     rK   r   z3MultiAgentEnvRunner.get_metrics.<locals>.<dictcomp>  s9        $f&7&7&9&9  rM   )r   r   r   r   intagent_episodesr   r  get_duration_sfloatvaluesr   r   r  r   r	  _log_episode_metricsr   r   r   reduce)rn   r   episode_lengthagent_stepsepisode_returnepisode_duration_sagent_episode_returnsmodule_episode_returnseps2return_eps2r  	return_sas               rK   get_metricszMultiAgentEnvRunner.get_metricsl  s    2 0	 0	C;; XXN%UU#:L:R:R:T:TUUU K !^^--N!$!3!3!5!5$/ "%"4";";"="=  % %! &1 "%"4";";"="=  & &" w$<<< >swG 
N 
ND"&//"3"3K"c$ii/N"k1N&$*=*=*?*??&"&"5"<"<">"> N N$*$5$5$7$7	#C$8$8999S[[H999-c&/.B.BCCCyPCCC.v/?@@@IM@@@@	N 6sw?%%"%&[!!    	'--/// |""$$$rM   )not_components
componentsr  c                
   t           | j        i}|                     t          ||          r^ | j        j        d|                     t          |          |                     t          |          d||t          <   | j        |t          <   |                     t          ||          r!| j
                                        |t          <   |                     t          ||          r!| j                                        |t          <   |S )N)r  r  rH   )r/   r   _check_componentr   rd   	get_state_get_subcomponentsrl   r7   r   rb   r   rh   )rn   r  r  ro   states        rK   r  zMultiAgentEnvRunner.get_state  s    01TU   !4j.QQ 	9)>)> *223F
SS#66'   * *
 * *E%& %)$8E.!   -z>
 
 	W 8<7J7T7T7V7VE34  -z>
 
 	W 8<7J7T7T7V7VE34rM   r!  c                 &   t           |v r%| j                            |t                               t          |v r%| j                            |t                              t
          |v r|                    t          d          }|dk    s| j        |k     rU|t
                   }t          |t          j                  rt          j        |          }| j                            |           |dk    r|| _        t          |v r|t                   | _        d S d S Nr   )r   rb   	set_stater   rh   r   rV   r7   rl   
isinstanceray	ObjectRefrd   r/   r   )rn   r!  weights_seq_norl_module_states       rK   r$  zMultiAgentEnvRunner.set_state  s   ,55))%0Q*RSSS,55))%0Q*RSSS %'' #YY~q99N ""d&:^&K&K"'(;"<os}== ?&)go&>&>O%%o666 !!'5$ *U22278V2WD/// 32rM   c                     dd| j         ifS )NrH   rD   )rD   r   s    rK   get_ctor_args_and_kwargsz,MultiAgentEnvRunner.get_ctor_args_and_kwargs  s     t{#
 	
rM   c                 X    t          j        |           }|                    i            |S r   )r    get_metadatar   )rn   metadatas     rK   r-  z MultiAgentEnvRunner.get_metadata  s4    !.t44	
 	
 	

 rM   c                 T    t           | j        ft          | j        ft          | j        fgS r   )r   rd   r   rb   r   rh   r   s    rK   get_checkpointable_componentsz1MultiAgentEnvRunner.get_checkpointable_components  s.     !$+..0CD.0CD
 	
rM   c                 &    | j         r| j        sJ dS )a  Checks that self.__init__() has been completed properly.

        Ensures that the instances has a `MultiRLModule` and an
        environment defined.

        Raises:
            AssertionError: If the EnvRunner Actor has NOT been properly initialized.
        N)rN   rd   r   s    rK   assert_healthyz"MultiAgentEnvRunner.assert_healthy   s     x'DK'''''rM   c                 6   | j         \	 | j                                          n?# t          $ r2}t                              d|j        d                     Y d }~nd }~ww xY w| ` | j        j        }t          |t                    s,t          || j
        | j        j        | j        j                  }| j        j         st          d          t          | j        j         t                    rWt          j        t"          | j        j                   r3t%          t          j        t"          | j        j                   |          }n!t%          t(          | j        j         |          }t+          j        d|d           | j        j        }t1          d| j        j        t          |t*          j        j        j                  r|n5t*          j        j                            |                                          	          | _         | j         j        | _        | j        | j        j        k    sJ | j        j        sa	 | j         j        D ]}tA          |j!                   n# t          $ r/}t          "                    |j        d                    Y d }~nd }~ww xY w	 t          | j         tF                    sJ t          | j         j        d         j!        tH                    sJ np# tJ          $ r> t          "                    d
| j          d| j         j        d         j!         d           Y n)tL          $ r t          "                    d           Y nw xY wd| _'        tQ          d| j)        | j        j*        tW          | | j,        | j         j!        |                     d S )NzETried closing the existing env (multi-agent), but failed with error: r   )rZ   num_workersremotez`config.env` is not provided! You should provide a valid environment to your config through `config.environment([env descriptor e.g. 'CartPole-v1'])`.)env_descriptorenv_contextzrllib-multi-agent-env-v0T)entry_pointdisable_env_checker)r]   vectorization_modezWhen using the `MultiAgentEnvRunner`, the environment must inherit from `ray.rllib.env.vector.vector_multi_agent_env.VectorMultiAgentEnv` (but yours is zJ) and the individual envs must inherit from `MultiAgentEnv` (but yours is z)!z^When using the `MultiAgentEnvRunner`, the env must have a subscriptable `self.envs` attribute!on_environment_created)r   r   rN   r7  r   )-rN   close	ExceptionloggerwarningargsrD   
env_configr%  r   rZ   r_   remote_worker_envsrS   r  r>   containsr=   r   rV   r   gymregistergym_env_vectorize_moder   num_envs_per_env_runnerr   registrationVectorizeModelowerr]   disable_env_checkingr8   rg   	exceptionr   r   AssertionError	TypeErrorri   r   rY    callbacks_on_environment_createdr   r   )rn   eenv_ctxr8  vectorize_moderN   s         rK   r`   zMultiAgentEnvRunner.make_env  s    8       *fQi* *       
 +(':.. 	 !. K7{5	  G { 	$   -- 	2B2K3
 3
 	 " $[$+/BB KK
 " #{#  K
 	&# $	
 	
 	
 	

 ; &[8 nch.C.QRRQX*889M9M9O9OPP
 
 
 "X.} CCCCC {/ 	,8= A AC1#-@@@@A , , ,  ++++++++,!$(,?@@@@@!$(-"2"<mLLLLLL!     6:>(6 6 x}Q'1	6 6 6          B     %)! 	$"o $ L#|H&#	  	
	
 
	
 
	
 
	
 
	
 
	
sF   # 
A(AA6#I 
J$%JJAK   AM'#MMc                 @     j          j         j        nd }	  j                            |                                 d          }|                                 _        t          r j                             fd           d S d S # t          $ r d  _        Y d S w xY w)NT)rN   rF   inference_onlyc                 z    t          |t          j        j                  r|                    j                  n|S r   )r%  torchnnModuletor\   )r   modrn   s     rK   <lambda>z1MultiAgentEnvRunner.make_module.<locals>.<lambda>  s4    %c58?;;!t|,,,  rM   )
rN   rg   rD   get_multi_rl_module_specr   buildrd   rV  foreach_moduleNotImplementedError)rn   rN   module_specs   `  rK   re   zMultiAgentEnvRunner.make_moduley  s    $(H$8dh  d	-1[-Q-Q 1 1$ .R . .K &++--DK
  **         # 	 	 	DKKKK	s   A,B BBc                 J    | j         | j                                          d S d S r   )rN   r<  r   s    rK   stopzMultiAgentEnvRunner.stop  s,     8HNN  rM   c                 F    g | _         t          t                    | _        d S r   )r   r   listr   r   s    rK   rW   z"MultiAgentEnvRunner._setup_metrics  s%    CE'  	***rM   Tc                 0    ||n j         }t           fd j        j                 j        j        D              fd j        j                 j        j        D              j        j                  |<   |r                     d|           d S d S )Nc                 f    i | ]-}|j         j                 j                            |          .S rH   )rN   r   rg   get_observation_spacerI   r   r   rn   s     rK   r   z4MultiAgentEnvRunner._new_episode.<locals>.<dictcomp>  sE        TX]9-7MMcRR  rM   c                 f    i | ]-}|j         j                 j                            |          .S rH   )rN   r   rg   r   rh  s     rK   r   z4MultiAgentEnvRunner._new_episode.<locals>.<dictcomp>  sE        TX]9-7HHMM  rM   )r   r   agent_to_module_mapping_fnon_episode_created)	r   r   rN   r   rg   possible_agentsrD   policy_mapping_fnr   )rn   r   r   r   s   ``  rK   r   z MultiAgentEnvRunner._new_episode  s    '388/    8=3=M      8=3=M   (,{'D

 

 

 # 	V**+?HUUUUU	V 	VrM   whichidxr   c           	         t          ||         | | j        | j        j        | j        |          }|dk    r| j        ||         j                 |d<   t          || j        t          | j
        d|           |           d S )N)r   r   r   rN   r   r   r   prev_episode_chunks
callbacks_r   )r   r   rN   rg   rd   r   r   r   rY   getattrrD   )rn   rn  ro  r   ro   s        rK   r   z-MultiAgentEnvRunner._make_on_episode_callback  s     SM<"k
 
 
 $$$,0,N!-F() 	"o '5I%5I5I J J		
 	
 	
 	
 	
 	
rM   c                    | j                             t          |d           | j                             t          |dd           |j        rD| j                             t
          dd           | j                             t          dd           |D ]}| j                             t          t          |          fdd           | j                             t          t          |          fdd           | j                             t          |                    |          fdd           | j                             t          |                    |          fdd           |S )Nsum)r  lifetime_sumT)r  with_throughputr{   )r   r   r.   r/   r   r0   r1   r,   r  r-   r2   
module_forr3   )rn   r   next_obsr   r   s        rK   r   z-MultiAgentEnvRunner._increase_sampled_metrics  s   4iNNN*! 	 	 	
 	
 	
 ? 	TL""<5"AAAL""#8!N"SSS  	 	CL""(#c((3 #   
 L""13s88<% #   
 L"")7+=+=c+B+BC #   
 L""2G4F4Fs4K4KL% #    
 rM   c           	         t          dt          t          j        | j        j        | j        j        pdz                                }| j                            t          |t          |t          |i||||dni |           | j                            t          |t          |id|           | j                            t          |t          |id|           d S )Nr{   )agent_episode_returns_meanmodule_episode_returns_meanr  )r|   min)r  r|   max)r~  r
  mathceilrD   "metrics_num_episodes_for_smoothingr_   r   log_dictr&   r)   r$   r'   r*   r%   r(   )rn   lengthretsecagentsmodulesr  wins           rK   r  z(MultiAgentEnvRunner._log_episode_metrics  s,    	KB{27a9  
 
 	 &#S)3 ) 7=7>'2     # 	 	
 	
 	
( 	"C  	 	
 	
 	
 	"C  	 	
 	
 	
 	
 	
rM   r   c                 |    | j                                                                         D ]\  }}|s| j         |= d S r   )r  copyr   )r   r   	agent_epss      rK   r   z/MultiAgentEnvRunner._prune_zero_len_sa_episodes*  sO    #*#9#>#>#@#@#F#F#H#H 	5 	5Hi 5*84	5 	5rM   z5MultiAgentEnvRunner.get_state(components='rl_module'))newerrorc                 R    |                      t                    t                   }|S )N)r  )r  r   )rn   r  r)  s      rK   get_weightszMultiAgentEnvRunner.get_weights0  s(    
 ..4G.HH
 rM   zMultiAgentEnvRunner.set_state()r   weightsglobal_varsr(  c                 P    |J |                      t          |t          |i          S r   )r$  r   r7   )rn   r  r  r(  s       rK   set_weightszMultiAgentEnvRunner.set_weights:  s5     """~~#W
 
 	
rM   r   )NT)NNNr#  ).rU   
__module____qualname____doc__r   r   r   rQ   r
  boolr   r   r   r	   r   r   r   r;   r  r    r
   r  r   r<   r  r$  r+  r-  r0  r2  r`   re   rb  rW   r   r   r   r  staticmethodr   r   r  r:   r   r  __classcell__)rp   s   @rK   rC   rC   C   s"       BBXiI) I) I) I) I) I) I)V Xi " $!m m m m 	m
 m m m 
	 m m m md (,&*$!mD mD mD  }mD sm	mD
 mD mD mD 
	 mD mD mD mD^	&T &T &TP Xi
 
 
" Xi8%Z 8% 8% 8% 8%t Xn =A  AE	     U3
3#789  !sJsO';!<=	  
       D XnXy XT X X X X8 Xn
 
 
 Xn   Xn
 
 
 Xi
( 
( 
( Xii
 i
 i
V Xi  4 Xi  
  V V V V 

"
.23D.E
 
 
 
.$ $ $V ;
 ;
 ;
 ;
z 5-> 5 5 5 \5
 ZC    	  Z5UCCC '+	
 

 d^
 	

 

 
 
 DC
 
 
 
 
rM   rC   )bloggingr  r   collectionsr   	functoolsr   typingr   r   r   r   r	   r
   	gymnasiumrD  r&  ray._common.deprecationr   %ray.rllib.algorithms.algorithm_configr   ray.rllib.callbacks.utilsr   ray.rllib.corer   r   r   ray.rllib.core.columnsr   (ray.rllib.core.rl_module.multi_rl_moduler   r   ray.rllib.envr   r   ray.rllib.env.env_contextr   ray.rllib.env.env_runnerr   r   ray.rllib.env.multi_agent_envr   !ray.rllib.env.multi_agent_episoder   ray.rllib.env.utilsr   !ray.rllib.env.vector.registrationr   +ray.rllib.env.vector.vector_multi_agent_envr   ray.rllib.utilsr   ray.rllib.utils.annotationsr   ray.rllib.utils.checkpointsr    ray.rllib.utils.frameworkr!   r"   ray.rllib.utils.metricsr#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   ray.rllib.utils.pre_checks.envr8   ray.rllib.utils.typingr9   r:   r;   r<   ray.tune.registryr=   r>   ray.util.annotationsr?   rV  r   	getLoggerr>  rC   rH   rM   rK   <module>r     s      # # # # # #       G G G G G G G G G G G G G G G G     



 . . . . . . A A A A A A 3 3 3 3 3 3         
 + * * * * * U U U U U U U U C C C C C C C C 0 0 0 0 0 0 @ @ @ @ @ @ @ @ 7 7 7 7 7 7 ? ? ? ? ? ? 0 0 0 0 0 0 6 6 6 6 6 6 K K K K K K & & & & & & 0 0 0 0 0 0 6 6 6 6 6 6 B B B B B B B B                                             . I H H H H H Q Q Q Q Q Q Q Q Q Q Q Q ; ; ; ; ; ; ; ; * * * * * *q		;	'	'
 WC
 C
 C
 C
 C
)^ C
 C
 C
 C
 C
rM   