
    &`i                       d dl Z d dlZd dlZd dlmZ d dlmZmZmZm	Z	m
Z
mZmZmZmZ d dlZd dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlm Z m!Z!m"Z" d dl#m$Z$  e$d           G d d                      Z%dS )    N)defaultdict)	AnyCallable
CollectionDefaultDictDictListOptionalSetUnion)
Deprecated)SingleAgentEpisode)InfiniteLookbackBuffer)MultiAgentBatch)
force_list)MultiAgentEnvError)batch)AgentIDModuleIDMultiAgentDict)	PublicAPIalpha)	stabilityc            %          e Zd ZdZdZdZ	 drdddddddddddddddddee         d	eee	                  d
ee
j                 deee	                  deee	                  dee
j                 deee	                  dee	ef         dee	ef         deee	                  dee         deeeef                  deeef         deeeef                  deeeef                  deeed gef                  f dZddd	e	dee	         ddfdZ	 drddddd	e	de	de	dee	         dee	         dee	         dee	         ddfdZdsdZed             Zedefd             Zedefd!            Zed"             Zdtd#Zdud%Zdvdedd fd'Zedee         fd(            Z ede	fd)            Z!d*edee         fd+Z"	 	 dwd,ddddd-d.eeeee         e#f                  d/eee$e         ef                  d0ed1ed2ee%         d3ed4edee	ee	         f         fd5Z&	 	 dwd,dddd6d.eeeee         e#f                  d/eee$e         ef                  d0ed1ed2ee%         d4edee	ee	         f         fd7Z'	 	 dwd,ddddd-d.eeeee         e#f                  d/eee$e         ef                  d0ed1ed2ee%         d3ed4edee	ee	         f         fd8Z(	 	 dwd,dddd6d.eeeee         e#f                  d/eee$e         ef                  d0ed1ed2ee)         d4edee	ee	         f         fd9Z*	 	 	 dxd,dddd6d:ee         d.eeeee         e#f                  d/eee$e         ef                  d0ed1ed2ee%         d4edee	ee	         f         fd;Z+de	fd<Z,de	fd=Z-dd>d?e#dee         dd fd@Z#dA Z.dB Z/dsdCZ0deee%f         fdDZ1e2dEeee%f         dd fdF            Z3de4fdGZ5	 dydHede)fdIZ6dee         fdJZ7dee         fdKZ8de)fdLZ9dddMdNe	dOeeeee         e#f                  d1eddfdPZ:dddMdNe	dOeeeee         e#f                  d1eddfdQZ;dddMdNe	dOeeeee         e#f                  d1eddfdRZ<dddMdNe	dOeeeee         e#f                  d1eddfdSZ=defdTZ>defdUZ?dVe#dd fdWZ@ddddddddddX	deeeef                  deeeef                  d	eee	                  deee	                  deee	                  deee	                  dee	ef         dee	ef         deee	                  fdYZAdd,ddddddZd[ZBd\ ZCd]ed.eee#ee         f         d/e$e         d1ed2e%d^edee	         fd_ZDd]ed.eee#ee         f         d/e$e         d1ed2e%d3ed^ede	fd`ZEd]edaeFd*edbeeef         d2e%d3eGd^edce%de%fddZHddddded]ed*edfeeef         d2ee%         d3ed^ee         dcee%         de%fdgZId]ed*ede%fdhZJd*ed$d ddfdiZKd*eddfdjZLd*eddfdkZM	 	 	 dxd*ed]ed^ee         dcee%         fdlZN eOdmd,n          do             ZP eOdpd,n          dq             ZQdS )zMultiAgentEpisodea  Stores multi-agent episode data.

    The central attribute of the class is the timestep mapping
    `self.env_t_to_agent_t` that maps AgentIDs to their specific environment steps to
    the agent's own scale/timesteps.

    Each AgentID in the `MultiAgentEpisode` has its own `SingleAgentEpisode` object
    in which this agent's data is stored. Together with the env_t_to_agent_t mapping,
    we can extract information either on any individual agent's time scale or from
    the (global) multi-agent environment time scale.

    Extraction of data from a MultiAgentEpisode happens via the getter APIs, e.g.
    `get_observations()`, which work analogous to the ones implemented in the
    `SingleAgentEpisode` class.

    Note that recorded `terminateds`/`truncateds` come as simple
    `MultiAgentDict`s mapping AgentID to bools and thus have no assignment to a
    certain timestep (analogous to a SingleAgentEpisode's single `terminated/truncated`
    boolean flag). Instead we assign it to the last observation recorded.
    Theoretically, there could occur edge cases in some environments
    where an agent receives partial rewards and then terminates without
    a last observation. In these cases, we duplicate the last observation.

    Also, if no initial observation has been received yet for an agent, but
    some  rewards for this same agent already occurred, we delete the agent's data
    up to here, b/c there is nothing to learn from these "premature" rewards.
    )id_agent_to_module_mapping_fn_agent_to_module_mappingobservation_spaceaction_spaceenv_t_startedenv_tagent_t_startedenv_t_to_agent_t_hanging_actions_end _hanging_extra_model_outputs_end_hanging_rewards_end_hanging_rewards_beginis_terminatedis_truncatedagent_episodes_last_step_time_len_lookback_buffers_start_time_custom_dataSNFauto)observationsr   infosactionsr    rewardsterminateds
truncatedsextra_model_outputsr!   r#   len_lookback_bufferagent_episode_idsagent_module_idsr   r   r2   r   r3   r4   r    r5   r6   r7   r8   r!   r#   r9   r:   r;   r   c                   |pt          j                    j        | _        |ddlm} |j        }|| _        |pi | _        |dk    rt          |pg           }|| _
        |pi | _        |pi | _        |pi }|	pi }	|pd| _        |t          |          nd| j
        z
  | j        z   | _        t          t           |pi           | _        t          t$                    | _        i | _        t          t*                    | _        t          t.                    | _        t          t.                    | _        t5          |t6                    r|n|                    dd          | _        t5          |	t6                    r|	n|	                    dd          | _        i | _        |                      ||||||||	|
	  	         i | _!        d| _"        d| _#        | $                                 dS )a  Initializes a `MultiAgentEpisode`.

        Args:
            id_: Optional. Either a string to identify an episode or None.
                If None, a hexadecimal id is created. In case of providing
                a string, make sure that it is unique, as episodes get
                concatenated via this string.
            observations: A list of dictionaries mapping agent IDs to observations.
                Can be None. If provided, should match all other episode data
                (actions, rewards, etc.) in terms of list lengths and agent IDs.
            observation_space: An optional gym.spaces.Dict mapping agent IDs to
                individual agents' spaces, which all (individual agents') observations
                should abide to. If not None and this MultiAgentEpisode is numpy'ized
                (via the `self.to_numpy()` method), and data is appended or set, the new
                data will be checked for correctness.
            infos: A list of dictionaries mapping agent IDs to info dicts.
                Can be None. If provided, should match all other episode data
                (observations, rewards, etc.) in terms of list lengths and agent IDs.
            actions: A list of dictionaries mapping agent IDs to actions.
                Can be None. If provided, should match all other episode data
                (observations, rewards, etc.) in terms of list lengths and agent IDs.
            action_space: An optional gym.spaces.Dict mapping agent IDs to
                individual agents' spaces, which all (individual agents') actions
                should abide to. If not None and this MultiAgentEpisode is numpy'ized
                (via the `self.to_numpy()` method), and data is appended or set, the new
                data will be checked for correctness.
            rewards: A list of dictionaries mapping agent IDs to rewards.
                Can be None. If provided, should match all other episode data
                (actions, rewards, etc.) in terms of list lengths and agent IDs.
            terminateds: A boolean defining if an environment has
                terminated OR a MultiAgentDict mapping individual agent ids
                to boolean flags indicating whether individual agents have terminated.
                A special __all__ key in these dicts indicates, whether the episode
                is terminated for all agents.
                The default is `False`, i.e. the episode has not been terminated.
            truncateds: A boolean defining if the environment has been
                truncated OR a MultiAgentDict mapping individual agent ids
                to boolean flags indicating whether individual agents have been
                truncated. A special __all__ key in these dicts indicates, whether the
                episode is truncated for all agents.
                The default is `False`, i.e. the episode has not been truncated.
            extra_model_outputs: A list of dictionaries mapping agent IDs to their
                corresponding extra model outputs. Each of these "outputs" is a dict
                mapping keys (str) to model output values, for example for
                `key=STATE_OUT`, the values would be the internal state outputs for
                that agent.
            env_t_started: The env timestep (int) that defines the starting point
                of the episode. This is only larger zero, if an already ongoing episode
                chunk is being created, for example by slicing an ongoing episode or
                by calling the `cut()` method on an ongoing episode.
            agent_t_started: A dict mapping AgentIDs to the respective agent's (local)
                timestep at which its SingleAgentEpisode chunk started.
            len_lookback_buffer: The size of the lookback buffers to keep in
                front of this Episode for each type of data (observations, actions,
                etc..). If larger 0, will interpret the first `len_lookback_buffer`
                items in each type of data as NOT part of this actual
                episode chunk, but instead serve as "historical" record that may be
                viewed and used to derive new data from. For example, it might be
                necessary to have a lookback buffer of four if you would like to do
                observation frame stacking and your episode has been cut and you are now
                operating on a new chunk (continuing from the cut one). Then, for the
                first 3 items, you would have to be able to look back into the old
                chunk's data.
                If `len_lookback_buffer` is "auto" (default), will interpret all
                provided data in the constructor as part of the lookback buffers.
            agent_episode_ids: An optional dict mapping AgentIDs
                to their corresponding `SingleAgentEpisode`. If None, each
                `SingleAgentEpisode` in `MultiAgentEpisode.agent_episodes`
                will generate a hexadecimal code. If a dictionary is provided,
                make sure that IDs are unique, because the agents' `SingleAgentEpisode`
                instances are concatenated or recreated by it.
            agent_module_ids: An optional dict mapping AgentIDs to their respective
                ModuleIDs (these mapping are always valid for an entire episode and
                thus won't change during the course of this episode). If a mapping from
                agent to module has already been provided via this dict, the (optional)
                `agent_to_module_mapping_fn` will NOT be used again to map the same
                agent (agents do not change their assigned module in the course of
                one episode).
            agent_to_module_mapping_fn: A callable taking an AgentID and a
                MultiAgentEpisode as args and returning a ModuleID. Used to map agents
                that have not been mapped yet (because they just entered this episode)
                to a ModuleID. The resulting ModuleID is only stored inside the agent's
                SingleAgentEpisode object.
        Nr   )AlgorithmConfigr1   __all__F)	r;   r:   r2   r3   r4   r5   r6   r7   r8   )%uuiduuid4hexr   %ray.rllib.algorithms.algorithm_configr=   "DEFAULT_AGENT_TO_MODULE_MAPPING_FNr   r   lenr-   r   r    r!   r"   r   intr#   r   r$   r%   dictr&   floatr'   r(   
isinstanceboolgetr)   r*   r+   _init_single_agent_episodesr/   r.   r,   validate)selfr   r2   r   r3   r4   r    r5   r6   r7   r8   r!   r#   r9   r:   r;   r   r=   s                     u/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/rllib/env/multi_agent_episode.py__init__zMultiAgentEpisode.__init__W   s/   T /tz||/%-MMMMMM  B ' +E'
 BRAWUW% &(("%gm"4"4%8"!2!8b(.B!'R%2
 +/a$0S\\\a() ! 	

  +30E2FF" .// 	 %'!0;D0A0A-$/$6$6!
 '2%&8&8#
 +t,,3KKE22 	 *d++2JJ	511 	 BD((-/%#! 3 	) 
	
 
	
 
	
   # 	    )r3   returnc          
      f   | j         rJ | j        | j        cxk    rdk    sn J |pi }|                                D ]\  }}| j        |                             d           || j        vrdt          ||                     |          | j	        | j
                            |          | j                            |                    | j        |<   | j        |                             ||                    |                     |                                  t          j                    | _        dS )aE  Stores initial observation.

        Args:
            observations: A dictionary mapping agent IDs to initial observations.
                Note that some agents may not have an initial observation.
            infos: A dictionary mapping agent IDs to initial info dicts.
                Note that some agents may not have an initial info dict. If not None,
                the agent IDs in `infos` must be a subset of those in `observations`
                meaning it would not be allowed to have an agent with an info dict,
                but not with an observation.
        r   agent_id	module_idmulti_agent_episode_idr   r    observationr3   N)is_doner"   r!   itemsr$   appendr+   r   
module_forr   r   rJ   r    add_env_resetrL   timeperf_counterr.   )rM   r2   r3   rT   	agent_obss        rN   r]   zMultiAgentEpisode.add_env_reset+  sV   " < zT/44441444444 $0#5#5#7#7 	 	Hi !(+221555t2220B%"ooh77+/8&*&<&@&@&J&J!%!2!6!6x!@!@1 1 1#H- )77%ii)) 8     	  ,..rP   )r6   r7   r8   c                X   | j         rt          d          |pi }pi pi |pi }| xj        dz  c_                            dd          | _                            dd          | _        t          d | j                                        D             fdD             z   fdD             z             t          fd| j
        D                       rd	| _        t          |                                          }| j                                        D ]#\  }	}
|	|vr|
                    | j                   $t          |                                          t          |                                          z  t          |                                          z  t                                                    z  t                                                    z  t                              d          s                    d          r| j                                        nt                                z  dhz
  }|D ]}	|	| j        vr]t          |	|                     |	          | j        | j                            |	          | j                            |	          
          }n| j        |	         }|                    |	          }|                    |	          }|                    |	          }|                    |	          }                    |	d          p| j        }                    |	d          p| j        }|                    |	          }|j         r@t)          d |||||fD                       r"t          d|	 d| d| d| d| d| d          |pd}|||	|vrt          d|	 d          n$||| j                            |	d          }|;| j                            |	d          }| j                            |	d          |z   }n|s|r|                     |	           |	| j        vsJ | j        |	                             | j        g| j        z  dgz              | j        | j        |	         _        |                    ||           | j        |	xx         |z  cc<   || j        |	<   |	|vrT|	|v rP|rt          d|	 d          |r|j        }|j         }n|	| j        vsJ || j        |	<   || j        |	<   || j        |	<   n| j                            |	          }|s|rv||                     |	           |rt          d|	 d          |j        }|j         }| j                            |	d          }| j                            |	d          |z   }n4|	| j        vr| j        |	xx         |z  cc<   n| j        |	xx         |z  cc<   |V|!                    |||||||           | j        |	                             tE          |          |j#        j        z              |s|r|                     |	           | $                                 tK          j&                    | _'        | j(        | j'        | _(        dS dS )a  Adds a timestep to the episode.

        Args:
            observations: A dictionary mapping agent IDs to their corresponding
                next observations. Note that some agents may not have stepped at this
                timestep.
            actions: Mandatory. A dictionary mapping agent IDs to their
                corresponding actions. Note that some agents may not have stepped at
                this timestep.
            rewards: Mandatory. A dictionary mapping agent IDs to their
                corresponding observations. Note that some agents may not have stepped
                at this timestep.
            infos: A dictionary mapping agent IDs to their
                corresponding info. Note that some agents may not have stepped at this
                timestep.
            terminateds: A dictionary mapping agent IDs to their `terminated` flags,
                indicating, whether the environment has been terminated for them.
                A special `__all__` key indicates that the episode is terminated for
                all agent IDs.
            terminateds: A dictionary mapping agent IDs to their `truncated` flags,
                indicating, whether the environment has been truncated for them.
                A special `__all__` key indicates that the episode is `truncated` for
                all agent IDs.
            extra_model_outputs: A dictionary mapping agent IDs to their
                corresponding specific model outputs (also in a dictionary; e.g.
                `vf_preds` for PPO).
        zGCannot call `add_env_step` on a MultiAgentEpisode that is already done!   r>   Fc                 &    g | ]\  }}|j         |S  )rY   .0aidsa_epss      rN   
<listcomp>z2MultiAgentEpisode.add_env_step.<locals>.<listcomp>  s#    RRR[S&6>RSRRRrP   c                 $    g | ]}|         
|S rd   rd   rf   rg   r6   s     rN   ri   z2MultiAgentEpisode.add_env_step.<locals>.<listcomp>  s#    >>>s[-=>s>>>rP   c                 $    g | ]}|         
|S rd   rd   rf   rg   r7   s     rN   ri   z2MultiAgentEpisode.add_env_step.<locals>.<listcomp>  s"    <<<sJsO<s<<<rP   c              3   :   K   | ]}|t                    v V  d S N)set)rf   rg   agents_dones     rN   	<genexpr>z1MultiAgentEpisode.add_env_step.<locals>.<genexpr>  s0      AA3sc+&&&AAAAAArP   TrS   c              3      K   | ]}|d uV  	d S ro   rd   )rf   vs     rN   rr   z1MultiAgentEpisode.add_env_step.<locals>.<genexpr>  s:       * * * * * * * *rP   zAgent zl already had its `SingleAgentEpisode.is_done` set to True, but still received data in a following step! obs=z act=z rew=z info=z extra_model_outputs=.        NzL acted (and received next obs), but did NOT receive any reward from the env!r   rW   z acted and then got truncated, but did NOT receive a last (truncation) observation, required for e.g. value function bootstrapping!)rX   actionrewardr3   
terminated	truncatedr8   ))rY   r   r"   rJ   r)   r*   rp   r+   rZ   all	agent_idskeysr$   r[   SKIP_ENV_TS_TAGr   r\   r   r   r    anyr%   popr&   r'   _del_hangingextendr-   lookbackr]   r(   _last_added_observation_last_added_infosadd_env_steprD   r2   rL   r^   r_   r,   r.   )rM   r2   r4   r5   r3   r6   r7   r8   stepped_agent_idsrT   r$   agent_ids_with_data
sa_episode_observation_action_reward_infos_terminated
_truncated_extra_model_outputsrq   s        ``             @rN   r   zMultiAgentEpisode.add_env_step]  s   N < 	$  
 !'R%2
17R 	

a

 )__Y>>&NN9e<< RRD$7$=$=$?$?RRR>>>>k>>>?<<<<j<<<=
 
 AAAA$.AAAAA 	&!%D   1 1 3 344*.*?*E*E*G*G 	> 	>&H&000 ''(<===
 !!##$$',,..!!"',,..!!" +""$$%%& *//##$$	%
 ??9--1;	1J1J#((***UU 	 K , F	, F	,Ht222/%"ooh77+/8&*&<&@&@&J&J!%!2!6!6x!@!@  

 "0:
 (++H55Lkk(++Gkk(++GYYx((F%//(E::Pd>PK#%88MD<MJ#6#:#:8#D#D  ! 	c * *&&BVW* * * ' ' 	 )CX C C'C C.5C C<CC CKQC C ,@C C C   nG 'G,?7**,< < < <   + )go377$GG &+/+P+T+T $, ,( #7;;HcJJWTGG # !j !))(333   (t/DDDDD-h7>>!12TZ?1#E  
 &*%? -$""00\QW0XXX3H===H===8B+H5  --(g2E2E  -,8 8 8 8   ! -
 $.#EL'9FF
 $4+DDDDD:AD-h7:AD-h7 - 9   377AA  'G* 'G ))(333 
 " 0EX E E E   $.#EL'9F ,0+P+T+T $, ,( #7;;HcJJWTGG  t':::3H===H==== 1(;;;wF;;; ''' ,"" *((< (    %h/66
OOj&=&FF    ,j ,!!(+++ 	  $022##3D $#rP   c                 f    | j                                         D ]}|                                 dS )zValidates the episode's data.

        This function ensures that the data stored to a `MultiAgentEpisode` is
        in order (e.g. that the correct number of observations, actions, rewards
        are there).
        N)r+   valuesrL   )rM   epss     rN   rL   zMultiAgentEpisode.validate  s;     &--// 	 	CLLNNNN	 	rP   c                     | j         S ro   )r/   rM   s    rN   custom_datazMultiAgentEpisode.custom_data  s      rP   c                 b    t          d | j                                        D                       S )z?Returns True if `self.add_env_reset()` has already been called.c              3   F   K   | ]}t          |j                  d k    V  dS )r   N)rD   r2   )rf   r   s     rN   rr   z-MultiAgentEpisode.is_reset.<locals>.<genexpr>  sF       
 
 
'((1,
 
 
 
 
 
rP   )r   r+   r   r   s    rN   is_resetzMultiAgentEpisode.is_reset  sA      
 
"188::
 
 
 
 
 	
rP   c                    t          t          | j                                                            j        t          fd| j                                        D                       st          d|  d          S )zDTrue, if the data in this episode is already stored as numpy arrays.c              3   *   K   | ]}|j         u V  d S ro   )is_numpy)rf   r   r   s     rN   rr   z-MultiAgentEpisode.is_numpy.<locals>.<genexpr>  s*      TT3<8+TTTTTTrP   z(Only some SingleAgentEpisode objects in z( are converted to numpy, others are not!)nextiterr+   r   r   r{   RuntimeError)rM   r   s    @rN   r   zMultiAgentEpisode.is_numpy  s     T07799::;;DTTTTt7J7Q7Q7S7STTTTT 	*4 * * *   rP   c                     | j         p| j        S )a  Whether the episode is actually done (terminated or truncated).

        A done episode cannot be continued via `self.add_env_step()` or being
        concatenated on its right-side with another episode chunk or being
        succeeded via `self.cut()`.

        Note that in a multi-agent environment this does not necessarily
        correspond to single agents having terminated or being truncated.

        `self.is_terminated` should be `True`, if all agents are terminated and
        `self.is_truncated` should be `True`, if all agents are truncated. If
        only one or more (but not all!) agents are `terminated/truncated the
        `MultiAgentEpisode.is_terminated/is_truncated` should be `False`. This
        information about single agent's terminated/truncated states can always
        be retrieved from the `SingleAgentEpisode`s inside the 'MultiAgentEpisode`
        one.

        If all agents are either terminated or truncated, but in a mixed fashion,
        i.e. some are terminated and others are truncated: This is currently
        undefined and could potentially be a problem (if a user really implemented
        such a multi-agent env that behaves this way).

        Returns:
            Boolean defining if an episode has either terminated or truncated.
        )r)   r*   r   s    rN   rY   zMultiAgentEpisode.is_done  s    6 !6T%66rP   c                     | j                                                                         D ]\  }}|                                 | S )ah  Converts this Episode's list attributes to numpy arrays.

        This means in particular that this episodes' lists (per single agent) of
        (possibly complex) data (e.g. an agent having a dict obs space) will be
        converted to (possibly complex) structs, whose leafs are now numpy arrays.
        Each of these leaf numpy arrays will have the same length (batch dimension)
        as the length of the original lists.

        Note that Columns.INFOS are NEVER numpy'ized and will remain a list
        (normally, a list of the original, env-returned dicts). This is due to the
        heterogeneous nature of INFOS returned by envs, which would make it unwieldy to
        convert this information to numpy arrays.

        After calling this method, no further data may be added to this episode via
        the `self.add_env_step()` method.

        Examples:

        .. testcode::

            import numpy as np

            from ray.rllib.env.multi_agent_episode import MultiAgentEpisode
            from ray.rllib.env.tests.test_multi_agent_episode import (
                TestMultiAgentEpisode
            )

            # Create some multi-agent episode data.
            (
                observations,
                actions,
                rewards,
                terminateds,
                truncateds,
                infos,
            ) = TestMultiAgentEpisode._mock_multi_agent_records()
            # Define the agent ids.
            agent_ids = ["agent_1", "agent_2", "agent_3", "agent_4", "agent_5"]

            episode = MultiAgentEpisode(
                observations=observations,
                infos=infos,
                actions=actions,
                rewards=rewards,
                # Note: terminated/truncated have nothing to do with an episode
                # being converted `to_numpy` or not (via the `self.to_numpy()` method)!
                terminateds=terminateds,
                truncateds=truncateds,
                len_lookback_buffer=0,  # no lookback; all data is actually "in" episode
            )

            # Episode has not been numpy'ized yet.
            assert not episode.is_numpy
            # We are still operating on lists.
            assert (
                episode.get_observations(
                    indices=[1],
                    agent_ids="agent_1",
                ) == {"agent_1": [1]}
            )

            # Numpy'ized the episode.
            episode.to_numpy()
            assert episode.is_numpy

            # Everything is now numpy arrays (with 0-axis of size
            # B=[len of requested slice]).
            assert (
                isinstance(episode.get_observations(
                    indices=[1],
                    agent_ids="agent_1",
                )["agent_1"], np.ndarray)
            )

        Returns:
             This `MultiAgentEpisode` object with the converted numpy data.
        )r+   copyrZ   to_numpy)rM   rT   	agent_epss      rN   r   zMultiAgentEpisode.to_numpy  sN    ^ $(#6#;#;#=#=#C#C#E#E 	! 	!Hi    rP   otherc           	         |j         | j         k    sJ | j        rJ | j        |j        k    sJ |                                 t          | j                  t          |j                  z  }|D ]2}| j                            |          }|V|j        |         | j        |<   |j	        |         | j	        |<   |j
        |         | j
        |<   |                     ||           u|j        r}||j        v r|| j        v r|| j        v sJ |                    |j        |                             d          |j        |                             d          | j        |         | j        |         |j        |         z   | j        |                    |                    |j        |                    ||j        v rct)          j        |j        |                   | j        |<   |j        |         | j        |<   t)          j        |j        |                   | j        |<   | j        }t-          |j	        |         dd                   D ]\\  }}|| j        k    r&| j	        |                             | j                   6| j	        |                             |dz   |z              ]4|j        | _        |j        rd| _        n|j        rd| _        | j                            |j                   |                                  dS )a  Adds the given `other` MultiAgentEpisode to the right side of `self`.

        In order for this to work, both chunks (`self` and `other`) must fit
        together. This is checked by the IDs (must be identical), the time step counters
        (`self.env_t` must be the same as `episode_chunk.env_t_started`), as well as the
        observations/infos of the individual agents at the concatenation boundaries.
        Also, `self.is_done` must not be True, meaning `self.is_terminated` and
        `self.is_truncated` are both False.

        Args:
            other: The other `MultiAgentEpisode` to be concatenated to this one.

        Returns:
            A `MultiAgentEpisode` instance containing the concatenated data
            from both episodes (`self` and `other`).
        Nr   )rX   r3   rw   rx   r8   rb   T)r   rY   r"   r!   rL   rp   r|   r+   rJ   r$   r#   _copy_hangingr%   r&   r   get_observations	get_infosr'   r(   concat_episoder   deepcopy	enumerater~   r[   r)   r*   r   update)rM   r   all_agent_idsrT   r   jivals           rN   r   z MultiAgentEpisode.concat_episode  sF   $ yDH$$$$ <zU00000 DN++c%/.B.BB% 7	J 7	JH,00::J !050DX0N#H-272H2R%h/161Fx1P$X.""8U3333 # *J U111 t888#t'LLLLL++$)$8$B$S$STU$V$V#28<FFqII#8B 5h?#:8DE !A(K ,    ))%*>x*HIIIu999:>-28<; ;D-h7 ;@:T ;D-h7 GKm>xHG GD9(C
 J'(>x(H(LMM J JFAsd222-h7>>t?STTTT-h7>>q1uqyIIII [
 	%!%D 	% $D
 	 1222 	rP   r   c                     |dk    sJ  j         rt          d          |} j                                        D ]\  }} j        |                             d           j        k    sJ t          |t           j        |         j	                  dz             D ]A} j        |                             |            j        k    rt          ||dz
            } nBt          | dz
  d          }|dk    rt          | d          nt          dd          }                     |d          }                     |d          }	                     |d          }
                     |d          }                     d|d          }t#          di d	 j        d
|d j        d|	d|
d j        d|d|d                                 d                                 d j        d fd j        D             d j        d fd j        D             d j        dd} j                                        |_        t9          j         j                  |_         |S )a  Returns a successor episode chunk (of len=0) continuing from this Episode.

        The successor will have the same ID as `self`.
        If no lookback buffer is requested (len_lookback_buffer=0), the successor's
        observations will be the last observation(s) of `self` and its length will
        therefore be 0 (no further steps taken yet). If `len_lookback_buffer` > 0,
        the returned successor will have `len_lookback_buffer` observations (and
        actions, rewards, etc..) taken from the right side (end) of `self`. For example
        if `len_lookback_buffer=2`, the returned successor's lookback buffer actions
        will be identical to the results of `self.get_actions([-2, -1])`.

        This method is useful if you would like to discontinue building an episode
        chunk (b/c you have to return it from somewhere), but would like to have a new
        episode instance to continue building the actual gym.Env episode at a later
        time. Vie the `len_lookback_buffer` argument, the continuing chunk (successor)
        will still be able to "look back" into this predecessor episode's data (at
        least to some extend, depending on the value of `len_lookback_buffer`).

        Args:
            len_lookback_buffer: The number of environment timesteps to take along into
                the new chunk as "lookback buffer". A lookback buffer is additional data
                on the left side of the actual episode data for visibility purposes
                (but without actually being part of the new chunk). For example, if
                `self` ends in actions: agent_1=5,6,7 and agent_2=6,7, and we call
                `self.cut(len_lookback_buffer=2)`, the returned chunk will have
                actions 6 and 7 for both agents already in it, but still
                `t_started`==t==8 (not 7!) and a length of 0. If there is not enough
                data in `self` yet to fulfil the `len_lookback_buffer` request, the
                value of `len_lookback_buffer` is automatically adjusted (lowered).

        Returns:
            The successor Episode chunk of this one with the same ID and state and the
            only observation being the last observation in self.
        r   zFCan't call `MultiAgentEpisode.cut()` when the episode is already done!rb   NT)indicesreturn_list)keyr   r   r   r2   r   r3   r4   r    r5   r8   r6   r7   r!   r#   c                 Z    i | ]'}j         |         j        |j         |         j        (S rd   )r+   rY   trf   rg   rM   s     rN   
<dictcomp>z)MultiAgentEpisode.cut.<locals>.<dictcomp>  sG       *3/7T(-/  rP   r:   r;   c                 6    i | ]}|j         |         j        S rd   )r+   rU   r   s     rN   r   z)MultiAgentEpisode.cut.<locals>.<dictcomp>  s3       <?T(-7  rP   r   r9   r1   rd   )!rY   r   r%   rZ   r$   rJ   r~   rangerD   datamaxslicer   r   get_actionsget_rewardsget_extra_model_outputsr   r   r   r    get_terminatedsget_truncatedsr"   r|   r:   r   r'   r   r(   r   r   r/   )rM   r9   orig_len_lbrT   agent_actionsr   indices_obs_and_infosindices_restr2   r3   r4   r5   r8   	successors   `             rN   cutzMultiAgentEpisode.cut  sd   F #a''''< 	X   *'+'@'F'F'H'H 	 	#Hm(266r::d>RRRRR;D,A(,K,P(Q(QTU(UVV  (266r::d>RRR*-.A1q5*I*I'E S !&':&:Q&> E E #Q&& &&---tQ 	 ,,)t - 
 
 '<$OO""<T"JJ""<T"JJ"::  ; 
 
 & 
 
 

 &
 #44	

 %
 G
 **
 G
 !4 3
 ,,...
 **,,,
 **
    >   !
, #44-
.   CG>   /
4 (,'F'F5
8 !'9
	B ,0+D+I+I+K+K	( "&t/?!@!@	rP   c                 N    t          | j                                                  S )zReturns the agent ids.)rp   r+   r}   r   s    rN   r|   zMultiAgentEpisode.agent_ids  s!     4&++--...rP   c                 H    d | j                                         D             S )z3Returns ids from each agent's `SingleAgentEpisode`.c                 $    i | ]\  }}||j         S rd   r   rf   rT   r   s      rN   r   z7MultiAgentEpisode.agent_episode_ids.<locals>.<dictcomp>  s0     
 
 
#) im
 
 
rP   )r+   rZ   r   s    rN   r:   z#MultiAgentEpisode.agent_episode_ids  s2    
 
'+':'@'@'B'B
 
 
 	
rP   rT   c                 r    || j         vr"|                     ||           x}| j         |<   |S | j         |         S )a  Returns the ModuleID for a given AgentID.

        Forces the agent-to-module mapping to be performed (via
        `self.agent_to_module_mapping_fn`), if this has not been done yet.
        Note that all such mappings are stored in the `self._agent_to_module_mapping`
        property.

        Args:
            agent_id: The AgentID to get a mapped ModuleID for.

        Returns:
            The ModuleID mapped to from the given `agent_id`.
        )r   r   )rM   rT   rU   s      rN   r\   zMultiAgentEpisode.module_for  sR     4888 //$??@I5 0::rP   T)	env_stepsneg_index_as_lookbackfillone_hot_discreter   r   r|   r   r   r   r   r   c          
      <    |                      d|||||||          S )a  Returns agents' observations or batched ranges thereof from this episode.

        Args:
            indices: A single int is interpreted as an index, from which to return the
                individual observation stored at this index.
                A list of ints is interpreted as a list of indices from which to gather
                individual observations in a batch of size len(indices).
                A slice object is interpreted as a range of observations to be returned.
                Thereby, negative indices by default are interpreted as "before the end"
                unless the `neg_index_as_lookback=True` option is used, in which case
                negative indices are interpreted as "before ts=0", meaning going back
                into the lookback buffer.
                If None, will return all observations (from ts=0 to the end).
            agent_ids: An optional collection of AgentIDs or a single AgentID to get
                observations for. If None, will return observations for all agents in
                this episode.
            env_steps: Whether `indices` should be interpreted as environment time steps
                (True) or per-agent timesteps (False).
            neg_index_as_lookback: If True, negative values in `indices` are
                interpreted as "before ts=0", meaning going back into the lookback
                buffer. For example, an episode with agent A's observations
                [4, 5, 6,  7, 8, 9], where [4, 5, 6] is the lookback buffer range
                (ts=0 item is 7), will respond to `get_observations(-1, agent_ids=[A],
                neg_index_as_lookback=True)` with {A: `6`} and to
                `get_observations(slice(-2, 1), agent_ids=[A],
                neg_index_as_lookback=True)` with {A: `[5, 6,  7]`}.
            fill: An optional value to use for filling up the returned results at
                the boundaries. This filling only happens if the requested index range's
                start/stop boundaries exceed the episode's boundaries (including the
                lookback buffer on the left side). This comes in very handy, if users
                don't want to worry about reaching such boundaries and want to zero-pad.
                For example, an episode with agent A' observations [10, 11,  12, 13, 14]
                and lookback buffer size of 2 (meaning observations `10` and `11` are
                part of the lookback buffer) will respond to
                `get_observations(slice(-7, -2), agent_ids=[A], fill=0.0)` with
                `{A: [0.0, 0.0, 10, 11, 12]}`.
            one_hot_discrete: If True, will return one-hot vectors (instead of
                int-values) for those sub-components of a (possibly complex) observation
                space that are Discrete or MultiDiscrete.  Note that if `fill=0` and the
                requested `indices` are out of the range of our data, the returned
                one-hot vectors will actually be zero-hot (all slots zero).
            return_list: Whether to return a list of multi-agent dicts (instead of
                a single multi-agent dict of lists/structs). False by default. This
                option can only be used when `env_steps` is True due to the fact the
                such a list can only be interpreted as one env step per list item
                (would not work with agent steps).

        Returns:
            A dictionary mapping agent IDs to observations (at the given
            `indices`). If `env_steps` is True, only agents that have stepped
            (were ready) at the given env step `indices` are returned (i.e. not all
            agent IDs are necessarily in the keys).
            If `return_list` is True, returns a list of MultiAgentDicts (mapping agent
            IDs to observations) instead.
        r2   whatr   r|   r   r   r   r   r   _getrM   r   r|   r   r   r   r   r   s           rN   r   z"MultiAgentEpisode.get_observations  s8    F yy"7-#  	
 	
 		
rP   )r   r   r   r   c          	      :    |                      d||||||          S )a  Returns agents' info dicts or list (ranges) thereof from this episode.

        Args:
            indices: A single int is interpreted as an index, from which to return the
                individual info dict stored at this index.
                A list of ints is interpreted as a list of indices from which to gather
                individual info dicts in a list of size len(indices).
                A slice object is interpreted as a range of info dicts to be returned.
                Thereby, negative indices by default are interpreted as "before the end"
                unless the `neg_index_as_lookback=True` option is used, in which case
                negative indices are interpreted as "before ts=0", meaning going back
                into the lookback buffer.
                If None, will return all infos (from ts=0 to the end).
            agent_ids: An optional collection of AgentIDs or a single AgentID to get
                info dicts for. If None, will return info dicts for all agents in
                this episode.
            env_steps: Whether `indices` should be interpreted as environment time steps
                (True) or per-agent timesteps (False).
            neg_index_as_lookback: If True, negative values in `indices` are
                interpreted as "before ts=0", meaning going back into the lookback
                buffer. For example, an episode with agent A's info dicts
                [{"l":4}, {"l":5}, {"l":6},  {"a":7}, {"b":8}, {"c":9}], where the
                first 3 items are the lookback buffer (ts=0 item is {"a": 7}), will
                respond to `get_infos(-1, agent_ids=A, neg_index_as_lookback=True)`
                with `{A: {"l":6}}` and to
                `get_infos(slice(-2, 1), agent_ids=A, neg_index_as_lookback=True)`
                with `{A: [{"l":5}, {"l":6},  {"a":7}]}`.
            fill: An optional value to use for filling up the returned results at
                the boundaries. This filling only happens if the requested index range's
                start/stop boundaries exceed the episode's boundaries (including the
                lookback buffer on the left side). This comes in very handy, if users
                don't want to worry about reaching such boundaries and want to
                auto-fill. For example, an episode with agent A's infos being
                [{"l":10}, {"l":11},  {"a":12}, {"b":13}, {"c":14}] and lookback buffer
                size of 2 (meaning infos {"l":10}, {"l":11} are part of the lookback
                buffer) will respond to `get_infos(slice(-7, -2), agent_ids=A,
                fill={"o": 0.0})` with
                `{A: [{"o":0.0}, {"o":0.0}, {"l":10}, {"l":11}, {"a":12}]}`.
            return_list: Whether to return a list of multi-agent dicts (instead of
                a single multi-agent dict of lists/structs). False by default. This
                option can only be used when `env_steps` is True due to the fact the
                such a list can only be interpreted as one env step per list item
                (would not work with agent steps).

        Returns:
            A dictionary mapping agent IDs to observations (at the given
            `indices`). If `env_steps` is True, only agents that have stepped
            (were ready) at the given env step `indices` are returned (i.e. not all
            agent IDs are necessarily in the keys).
            If `return_list` is True, returns a list of MultiAgentDicts (mapping agent
            IDs to infos) instead.
        r3   r   r   r|   r   r   r   r   r   rM   r   r|   r   r   r   r   s          rN   r   zMultiAgentEpisode.get_infosk  s5    | yy"7#  
 
 	
rP   c          
      <    |                      d|||||||          S )a  Returns agents' actions or batched ranges thereof from this episode.

        Args:
            indices: A single int is interpreted as an index, from which to return the
                individual actions stored at this index.
                A list of ints is interpreted as a list of indices from which to gather
                individual actions in a batch of size len(indices).
                A slice object is interpreted as a range of actions to be returned.
                Thereby, negative indices by default are interpreted as "before the end"
                unless the `neg_index_as_lookback=True` option is used, in which case
                negative indices are interpreted as "before ts=0", meaning going back
                into the lookback buffer.
                If None, will return all actions (from ts=0 to the end).
            agent_ids: An optional collection of AgentIDs or a single AgentID to get
                actions for. If None, will return actions for all agents in
                this episode.
            env_steps: Whether `indices` should be interpreted as environment time steps
                (True) or per-agent timesteps (False).
            neg_index_as_lookback: If True, negative values in `indices` are
                interpreted as "before ts=0", meaning going back into the lookback
                buffer. For example, an episode with agent A's actions
                [4, 5, 6,  7, 8, 9], where [4, 5, 6] is the lookback buffer range
                (ts=0 item is 7), will respond to `get_actions(-1, agent_ids=[A],
                neg_index_as_lookback=True)` with {A: `6`} and to
                `get_actions(slice(-2, 1), agent_ids=[A],
                neg_index_as_lookback=True)` with {A: `[5, 6,  7]`}.
            fill: An optional value to use for filling up the returned results at
                the boundaries. This filling only happens if the requested index range's
                start/stop boundaries exceed the episode's boundaries (including the
                lookback buffer on the left side). This comes in very handy, if users
                don't want to worry about reaching such boundaries and want to zero-pad.
                For example, an episode with agent A' actions [10, 11,  12, 13, 14]
                and lookback buffer size of 2 (meaning actions `10` and `11` are
                part of the lookback buffer) will respond to
                `get_actions(slice(-7, -2), agent_ids=[A], fill=0.0)` with
                `{A: [0.0, 0.0, 10, 11, 12]}`.
            one_hot_discrete: If True, will return one-hot vectors (instead of
                int-values) for those sub-components of a (possibly complex) observation
                space that are Discrete or MultiDiscrete.  Note that if `fill=0` and the
                requested `indices` are out of the range of our data, the returned
                one-hot vectors will actually be zero-hot (all slots zero).
            return_list: Whether to return a list of multi-agent dicts (instead of
                a single multi-agent dict of lists/structs). False by default. This
                option can only be used when `env_steps` is True due to the fact the
                such a list can only be interpreted as one env step per list item
                (would not work with agent steps).

        Returns:
            A dictionary mapping agent IDs to actions (at the given
            `indices`). If `env_steps` is True, only agents that have stepped
            (were ready) at the given env step `indices` are returned (i.e. not all
            agent IDs are necessarily in the keys).
            If `return_list` is True, returns a list of MultiAgentDicts (mapping agent
            IDs to actions) instead.
        r4   r   r   r   s           rN   r   zMultiAgentEpisode.get_actions  s8    D yy"7-#  	
 	
 		
rP   c          	      :    |                      d||||||          S )a)  Returns agents' rewards or batched ranges thereof from this episode.

        Args:
            indices: A single int is interpreted as an index, from which to return the
                individual rewards stored at this index.
                A list of ints is interpreted as a list of indices from which to gather
                individual rewards in a batch of size len(indices).
                A slice object is interpreted as a range of rewards to be returned.
                Thereby, negative indices by default are interpreted as "before the end"
                unless the `neg_index_as_lookback=True` option is used, in which case
                negative indices are interpreted as "before ts=0", meaning going back
                into the lookback buffer.
                If None, will return all rewards (from ts=0 to the end).
            agent_ids: An optional collection of AgentIDs or a single AgentID to get
                rewards for. If None, will return rewards for all agents in
                this episode.
            env_steps: Whether `indices` should be interpreted as environment time steps
                (True) or per-agent timesteps (False).
            neg_index_as_lookback: If True, negative values in `indices` are
                interpreted as "before ts=0", meaning going back into the lookback
                buffer. For example, an episode with agent A's rewards
                [4, 5, 6,  7, 8, 9], where [4, 5, 6] is the lookback buffer range
                (ts=0 item is 7), will respond to `get_rewards(-1, agent_ids=[A],
                neg_index_as_lookback=True)` with {A: `6`} and to
                `get_rewards(slice(-2, 1), agent_ids=[A],
                neg_index_as_lookback=True)` with {A: `[5, 6,  7]`}.
            fill: An optional float value to use for filling up the returned results at
                the boundaries. This filling only happens if the requested index range's
                start/stop boundaries exceed the episode's boundaries (including the
                lookback buffer on the left side). This comes in very handy, if users
                don't want to worry about reaching such boundaries and want to zero-pad.
                For example, an episode with agent A' rewards [10, 11,  12, 13, 14]
                and lookback buffer size of 2 (meaning rewards `10` and `11` are
                part of the lookback buffer) will respond to
                `get_rewards(slice(-7, -2), agent_ids=[A], fill=0.0)` with
                `{A: [0.0, 0.0, 10, 11, 12]}`.
            return_list: Whether to return a list of multi-agent dicts (instead of
                a single multi-agent dict of lists/structs). False by default. This
                option can only be used when `env_steps` is True due to the fact the
                such a list can only be interpreted as one env step per list item
                (would not work with agent steps).

        Returns:
            A dictionary mapping agent IDs to rewards (at the given
            `indices`). If `env_steps` is True, only agents that have stepped
            (were ready) at the given env step `indices` are returned (i.e. not all
            agent IDs are necessarily in the keys).
            If `return_list` is True, returns a list of MultiAgentDicts (mapping agent
            IDs to rewards) instead.
        r5   r   r   r   s          rN   r   zMultiAgentEpisode.get_rewards   s5    x yy"7#  
 
 	
rP   r   c          
      <    |                      d|||||||          S )a  Returns agents' actions or batched ranges thereof from this episode.

        Args:
            key: The `key` within each agents' extra_model_outputs dict to extract
                data for. If None, return data of all extra model output keys.
            indices: A single int is interpreted as an index, from which to return the
                individual extra model outputs stored at this index.
                A list of ints is interpreted as a list of indices from which to gather
                individual extra model outputs in a batch of size len(indices).
                A slice object is interpreted as a range of extra model outputs to be
                returned.
                Thereby, negative indices by default are interpreted as "before the end"
                unless the `neg_index_as_lookback=True` option is used, in which case
                negative indices are interpreted as "before ts=0", meaning going back
                into the lookback buffer.
                If None, will return all extra model outputs (from ts=0 to the end).
            agent_ids: An optional collection of AgentIDs or a single AgentID to get
                extra model outputs for. If None, will return extra model outputs for
                all agents in this episode.
            env_steps: Whether `indices` should be interpreted as environment time steps
                (True) or per-agent timesteps (False).
            neg_index_as_lookback: If True, negative values in `indices` are
                interpreted as "before ts=0", meaning going back into the lookback
                buffer. For example, an episode with agent A's actions
                [4, 5, 6,  7, 8, 9], where [4, 5, 6] is the lookback buffer range
                (ts=0 item is 7), will respond to `get_actions(-1, agent_ids=[A],
                neg_index_as_lookback=True)` with {A: `6`} and to
                `get_actions(slice(-2, 1), agent_ids=[A],
                neg_index_as_lookback=True)` with {A: `[5, 6,  7]`}.
            fill: An optional value to use for filling up the returned results at
                the boundaries. This filling only happens if the requested index range's
                start/stop boundaries exceed the episode's boundaries (including the
                lookback buffer on the left side). This comes in very handy, if users
                don't want to worry about reaching such boundaries and want to zero-pad.
                For example, an episode with agent A' actions [10, 11,  12, 13, 14]
                and lookback buffer size of 2 (meaning actions `10` and `11` are
                part of the lookback buffer) will respond to
                `get_actions(slice(-7, -2), agent_ids=[A], fill=0.0)` with
                `{A: [0.0, 0.0, 10, 11, 12]}`.
            one_hot_discrete: If True, will return one-hot vectors (instead of
                int-values) for those sub-components of a (possibly complex) observation
                space that are Discrete or MultiDiscrete.  Note that if `fill=0` and the
                requested `indices` are out of the range of our data, the returned
                one-hot vectors will actually be zero-hot (all slots zero).
            return_list: Whether to return a list of multi-agent dicts (instead of
                a single multi-agent dict of lists/structs). False by default. This
                option can only be used when `env_steps` is True due to the fact the
                such a list can only be interpreted as one env step per list item
                (would not work with agent steps).

        Returns:
            A dictionary mapping agent IDs to actions (at the given
            `indices`). If `env_steps` is True, only agents that have stepped
            (were ready) at the given env step `indices` are returned (i.e. not all
            agent IDs are necessarily in the keys).
            If `return_list` is True, returns a list of MultiAgentDicts (mapping agent
            IDs to extra_model_outputs) instead.
        r8   )r   extra_model_outputs_keyr   r|   r   r   r   r   r   )rM   r   r   r|   r   r   r   r   s           rN   r   z)MultiAgentEpisode.get_extra_model_outputsF  s8    J yy&$'"7#  	
 	
 		
rP   c                 f      fd j         D             }|                    d j        i           |S )z&Gets the terminateds at given indices.c                 6    i | ]}|j         |         j        S rd   )r+   r)   rf   rT   rM   s     rN   r   z5MultiAgentEpisode.get_terminateds.<locals>.<dictcomp>  s6     
 
 
 d)(3A
 
 
rP   r>   r|   r   r)   )rM   r6   s   ` rN   r   z!MultiAgentEpisode.get_terminateds  sQ    
 
 
 
 N
 
 
 	It'9:;;;rP   c                 f      fd j         D             }|                    d j        i           |S )Nc                 6    i | ]}|j         |         j        S rd   )r+   r*   r   s     rN   r   z4MultiAgentEpisode.get_truncateds.<locals>.<dictcomp>  s6     
 
 
 d)(3@
 
 
rP   r>   r   )rM   r7   s   ` rN   r   z MultiAgentEpisode.get_truncateds  sQ    
 
 
 
 N
 
 

 	9d&89:::rP   )r9   slice_c                   |j         dvrt          d|j          d          |j        }|j        }|d}nI|dk     r!t	          t          |           |z   d          }n"|t          |           k    rt          |           }|t          |           }nI|dk     r!t	          t          |           |z   d          }n"|t          |           k    rt          |           }d	 | j                                        D ]}\  }}|j        j	        |j        j	        k    sJ |j
        j	        k    sJ |j        j	        k    sJ t          fd|j                                        D                       sJ ~n# t          $ r t!          d          w xY wi i i }| j                                        D ]\  }}| j        |         }|t          |          k     r>||         | j        k    r-t          |          ||         k    r|j        |<   |j        |<   |t          |          k     rFt+          |t          |                    D ](}	||	         | j        k    r|j        ||	         z   ||<    n)t          fd| j        D                       d	<   t          fd
| j        D                       d	<   ||n}
||
z
  dk     r|
|z
  k     r|z   }
|                     t1          ||
z
  |dz             dd          }|                     t1          ||
z
  |          dd          }|                     t1          ||
z
  |          dd          }|                     t1          ||
z
  |          dd          }t9          | j        || j        || j        |||
| j         |z   d | j                                        D             || j!        | j"                  }| j#        r|$                                 |S )a7  Returns a slice of this episode with the given slice object.

        Works analogous to
        :py:meth:`~ray.rllib.env.single_agent_episode.SingleAgentEpisode.slice`

        However, the important differences are:
        - `slice_` is provided in (global) env steps, not agent steps.
        - In case `slice_` ends - for a certain agent - in an env step, where that
        particular agent does not have an observation, the previous observation will
        be included, but the next action and sum of rewards until this point will
        be stored in the agent's hanging values caches for the returned
        MultiAgentEpisode slice.

        .. testcode::

            from ray.rllib.env.multi_agent_episode import MultiAgentEpisode
            from ray.rllib.utils.test_utils import check

            # Generate a simple multi-agent episode.
            observations = [
                {"a0": 0, "a1": 0},  # 0
                {         "a1": 1},  # 1
                {         "a1": 2},  # 2
                {"a0": 3, "a1": 3},  # 3
                {"a0": 4},           # 4
            ]
            # Actions are the same as observations (except for last obs, which doesn't
            # have an action).
            actions = observations[:-1]
            # Make up a reward for each action.
            rewards = [
                {aid: r / 10 + 0.1 for aid, r in o.items()}
                for o in observations
            ]
            episode = MultiAgentEpisode(
                observations=observations,
                actions=actions,
                rewards=rewards,
                len_lookback_buffer=0,
            )

            # Slice the episode and check results.
            slice = episode[1:3]
            a0 = slice.agent_episodes["a0"]
            a1 = slice.agent_episodes["a1"]
            check((a0.observations, a1.observations), ([3], [1, 2, 3]))
            check((a0.actions, a1.actions), ([], [1, 2]))
            check((a0.rewards, a1.rewards), ([], [0.2, 0.3]))
            check((a0.is_done, a1.is_done), (False, False))

            # If a slice ends in a "gap" for an agent, expect actions and rewards to be
            # cached for this agent.
            slice = episode[:2]
            a0 = slice.agent_episodes["a0"]
            check(a0.observations, [0])
            check(a0.actions, [])
            check(a0.rewards, [])
            check(slice._hanging_actions_end["a0"], 0)
            check(slice._hanging_rewards_end["a0"], 0.1)

        Args:
            slice_: The slice object to use for slicing. This should exclude the
                lookback buffer, which will be prepended automatically to the returned
                slice.
            len_lookback_buffer: If not None, forces the returned slice to try to have
                this number of timesteps in its lookback buffer (if available). If None
                (default), tries to make the returned slice's lookback as large as the
                current lookback buffer of this episode (`self`).

        Returns:
            The new MultiAgentEpisode representing the requested slice.
        )rb   Nz9Slicing MultiAgentEnv with a step other than 1 (you used z) is not supported!Nr   c              3   .   K   | ]}|j         k    V  d S ro   )r   )rf   ilbref_lookbacks     rN   rr   z*MultiAgentEpisode.slice.<locals>.<genexpr>  s>         LL0     rP   zdCan only slice a MultiAgentEpisode if all lookback buffers in this episode have the exact same size!c              3   B   K   | ]}                     |          V  d S ro   rJ   rk   s     rN   rr   z*MultiAgentEpisode.slice.<locals>.<genexpr>A  s@       %
 %
%(KOOC  %
 %
 %
 %
 %
 %
rP   r>   c              3   B   K   | ]}                     |          V  d S ro   r   rm   s     rN   rr   z*MultiAgentEpisode.slice.<locals>.<genexpr>D  s/      #W#WCJNN3$7$7#W#W#W#W#W#WrP   rb   T)r   r   )r   r   r   c                 $    i | ]\  }}||j         S rd   r   )rf   rg   eids      rN   r   z+MultiAgentEpisode.slice.<locals>.<dictcomp>m  s-       !)cSW  rP   )r   r2   r   r4   r    r5   r8   r6   r7   r9   r!   r:   r#   r;   r   )%stepNotImplementedErrorstartstopr   rD   r+   rZ   r2   r   r4   r5   r{   r8   r   AssertionError
ValueErrorr$   r~   r)   r*   r   	t_startedr   r   r   r   r   r   r   r   r    r!   r   r   r   r   )rM   r   r9   r   r   rg   r   r#   mappingr   _lbr2   r4   r5   r8   
ma_episoder   r6   r7   s                   @@@rN   r   zMultiAgentEpisode.slice  s   \ ;i''%5K5 5 5   { =EEQYYD		E)1--EESYYIIE <t99DDAXXs4yy4'++DDCIIt99D	#'#6#<#<#>#> 	 	Z'#-#:#CL!.7<GGGG!)2lBBBB!)2lBBBB    )=DDFF        	  	 	 	4  	 
#288:: 	 	OC+C0G s7||##DMT%999
OOwt}44#-#;C ","9
3s7||##uc'll33  AqzT%999/9/Cgaj/P, : "% %
 %
 %
 %
,0,?%
 %
 %
 "
 "
I !$#W#W#W#W4CV#W#W#W W W
9 &9%D!!,3;??|sU{;;&C,,%#+tax(("& - 
 

 ""%#+t$$"& # 
 

 ""%#+t$$"& # 
 

 #::%#+t,,"& ; 
 
 '%"4* 3#! #,u4 -1-@-F-F-H-H   ,!:'+'F%
 
 

, = 	"!!!s   BE7 7Fc                      | j         | j        z
  S )a7  Returns the length of an `MultiAgentEpisode`.

        Note that the length of an episode is defined by the difference
        between its actual timestep and the starting point.

        Returns: An integer defining the length of the episode or an
            error if the episode has not yet started.
        )r"   r!   r   s    rN   __len__zMultiAgentEpisode.__len__{  s     zD...rP   c           	          d | j                                         D             }dt          |            d| j         d| d| j         d	S )Nc                 >    i | ]\  }}||                                 S rd   
get_returnre   s      rN   r   z.MultiAgentEpisode.__repr__.<locals>.<dictcomp>  s7     
 
 
)4fC""$$
 
 
rP   z
MAEps(len=z done=z Rs=z id_=))r+   rZ   rD   rY   r   )rM   sa_eps_returnss     rN   __repr__zMultiAgentEpisode.__repr__  sz    
 
8<8K8Q8Q8S8S
 
 
3T 3 3$, 3 3 3 3'+x3 3 3	
rP   c           	         t          d | j                                        D                       }t          t	          | j                                                            j        }t          d | j        D                       }dd|z  z   d                    d t          | ||z
            D                       z   dz   }g }| j        	                                D ]b\  }}| dd|t          |          z
  z  z   }|j        D ]}	|	d	k    r|d
z  }|dz  }|                    |                                           ct          |d                    |          z              dS )zHPrints this MultiAgentEpisode as a table of observations for the agents.c              3   >   K   | ]}|                                 V  d S ro   )len_incl_lookback)rf   tss     rN   rr   z*MultiAgentEpisode.print.<locals>.<genexpr>  s.      UUR))++UUUUUUrP   c              3   4   K   | ]}t          |          V  d S ro   rD   )rf   rg   s     rN   rr   z*MultiAgentEpisode.print.<locals>.<genexpr>  s(      ??CHH??????rP   r   z   c              3   4   K   | ]}t          |          V  d S ro   )str)rf   r   s     rN   rr   z*MultiAgentEpisode.print.<locals>.<genexpr>  s(      MMAQMMMMMMrP   
z  r0   z    z x  N)r   r$   r   r   r   r   r|   joinr   rZ   rD   r   r[   rstripprint)
rM   max_tsr   longest_agentheaderrowsagent
inf_bufferrowr   s
             rN   r  zMultiAgentEpisode.print  s    UUd6K6R6R6T6TUUUUUT299;;<<==F??????? ]"$jjMMy&8:K)L)LMMMMMN  	 !%!6!<!<!>!> 
	& 
	&E:,,,#U)C"DEC_ " "886MCC 6MCCKK

%%%% 	ftyy&'''''rP   c                    i d| j         d| j        d| j        d| j        d| j        d| j        d| j        d| j        d	| j        d
| j	        d| j
        d| j        d| j        d| j        d| j        dt          d | j                                        D                                                       d| j        | j        | j        dS )zReturns the state of a multi-agent episode.

        Note that from an episode's state the episode itself can
        be recreated.

        Returns: A dicitonary containing pickable data for a
            `MultiAgentEpisode`.
        r   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   c                 >    i | ]\  }}||                                 S rd   )	get_stater   s      rN   r   z/MultiAgentEpisode.get_state.<locals>.<dictcomp>  s:       +) i1133  rP   r.   )r,   r   )r   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   listr+   rZ   r.   r,   r   r   s    rN   r  zMultiAgentEpisode.get_state  s_   
48
($*I
 '(E
  !7	

 D-
 T/
 TZ
 t3
  5
 #D$=
 /0U
 #D$=
 %d&A
 T/
  D-!
" d /3/B/H/H/J/J   %''	 #
. 4+/
0  $3+3
 
 
 	
rP   statec                 ^   t          | d                   }| d         |_        | d         |_        | d         |_        | d         |_        | d         |_        | d         |_        | d	         |_        | d
         |_        | d         |_	        | d         |_
        | d         |_        | d         |_        | d         |_        | d         |_        d | d         D             |_        | d         |_        | d         |_        |                     di           |_        |                                 |S )a>  Creates a multi-agent episode from a state dictionary.

        See `MultiAgentEpisode.get_state()` for creating a state for
        a `MultiAgentEpisode` pickable state. For recreating a
        `MultiAgentEpisode` from a state, this state has to be complete,
        i.e. all data must have been stored in the state.

        Args:
            state: A dict containing all data required to recreate a MultiAgentEpisode`.
                See `MultiAgentEpisode.get_state()`.

        Returns:
            A `MultiAgentEpisode` instance created from the state data.
        r   r   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   c                 >    i | ]\  }}|t          j        |          S rd   )r   
from_state)rf   rT   agent_states      rN   r   z0MultiAgentEpisode.from_state.<locals>.<dictcomp>  s:     "
 "
 "
%+ (3K@@"
 "
 "
rP   r+   r.   r,   r   )r   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r.   r,   rJ   r/   rL   )r  episodes     rN   r   zMultiAgentEpisode.from_state  sL   " $e555-23O-P*+01K+L($)*=$>!$^4 %o 6g"'(9":#();#< ',-C'D$38.4
0 (--C'D$)./G)H& %o 6$^4"
 "
)./?)@"
 "
 "
 $M2"'(9":$yy;; 	rP   c                     t          d | j                                        D             | j        | j        z
            S )a  Converts this `MultiAgentEpisode` into a `MultiAgentBatch`.

        Each `SingleAgentEpisode` instances in `MultiAgentEpisode.agent_epiosdes`
        will be converted into a `SampleBatch` and the environment timestep will be
        passed as the returned MultiAgentBatch's `env_steps`.

        Returns:
            A MultiAgentBatch containing all of this episode's data.
        c                 d    i | ]-\  }}|j         |j        z
  d k    ||                                .S r   )r   r   get_sample_batchr   s      rN   r   z6MultiAgentEpisode.get_sample_batch.<locals>.<dictcomp>  sI       'Hi;!44q88 )4466888rP   )policy_batchesr   )r   r+   rZ   r"   r!   r   s    rN   r&  z"MultiAgentEpisode.get_sample_batch  sU      +/+>+D+D+F+F  
 j4#55
 
 
 	
rP   include_hanging_rewardsc                     t          d | j                                        D                       }|rB| j                                        D ]}||z  }| j                                        D ]}||z  }|S )aX  Returns all-agent return.

        Args:
            include_hanging_rewards: Whether we should also consider
                hanging rewards wehn calculating the overall return. Agents might
                have received partial rewards, i.e. rewards without an
                observation. These are stored in the "hanging" caches (begin and end)
                for each agent and added up until the next observation is received by
                that agent.

        Returns:
            The sum of all single-agents' returns (maybe including the hanging
            rewards per agent).
        c              3   >   K   | ]}|                                 V  d S ro   r   )rf   r   s     rN   rr   z/MultiAgentEpisode.get_return.<locals>.<genexpr>+  s?       
 
'0I  ""
 
 
 
 
 
rP   )sumr+   r   r(   r'   )rM   r(  
env_return	hanging_rs       rN   r  zMultiAgentEpisode.get_return  s    $  
 
484G4N4N4P4P
 
 
 
 

 # 	(!8??AA ( (	i'

!6==?? ( (	i'

rP   c                 j      fd                      d                                          D             S )aD  Returns a set of agent IDs required to send an action to `env.step()` next.

        Those are generally the agents that received an observation in the most recent
        `env.step()` call.

        Returns:
            A set of AgentIDs that are supposed to send actions to the next `env.step()`
            call.
        c                 8    h | ]}j         |         j        |S rd   )r+   rY   r   s     rN   	<setcomp>z6MultiAgentEpisode.get_agents_to_act.<locals>.<setcomp>@  s:     
 
 
&s+3

 
 
rP   r   )r   r}   r   s   `rN   get_agents_to_actz#MultiAgentEpisode.get_agents_to_act6  sH    
 
 
 
,,R005577
 
 
 	
rP   c                 j    t          |                     d                                                    S )a  Returns a set of agent IDs of those agents that just finished stepping.

        These are all the agents that have an observation logged at the last env
        timestep, which may include agents, whose single agent episode just terminated
        or truncated.

        Returns:
            A set of AgentIDs of those agents that just finished stepping (that have a
            most recent observation on the env timestep scale), regardless of whether
            their single agent episodes are done or not.
        r   )rp   r   r}   r   s    rN   get_agents_that_steppedz)MultiAgentEpisode.get_agents_that_steppedF  s,     4((,,1133444rP   c                 2    | j         dS | j         | j        z
  S )z8Returns the duration of this Episode (chunk) in seconds.Nrv   )r,   r.   r   s    rN   get_duration_sz MultiAgentEpisode.get_duration_sT  s!    '3#d&666rP   )
at_indicesr   new_datar6  c                    |                                 D ]D\  }}|| j        vrt          d| d          | j        |                             |||           EdS )a	  Overwrites all or some single-agent Episode's observations with the provided data.

        This is a helper method to batch `SingleAgentEpisode.set_observations`.
        For more detail, see `SingleAgentEpisode.set_observations`.

        Args:
            new_data: A dict mapping agent IDs to new observation data.
                Each value in the dict is the new observation data to overwrite existing data with.
                This may be a list of individual observation(s) in case this episode
                is still not numpy'ized yet. In case this episode has already been
                numpy'ized, this should be (possibly complex) struct matching the
                observation space and with a batch size of its leafs exactly the size
                of the to-be-overwritten slice or segment (provided by `at_indices`).
            at_indices: A single int is interpreted as one index, which to overwrite
                with `new_data` (which is expected to be a single observation).
                A list of ints is interpreted as a list of indices, all of which to
                overwrite with `new_data` (which is expected to be of the same size
                as `len(at_indices)`).
                A slice object is interpreted as a range of indices to be overwritten
                with `new_data` (which is expected to be of the same size as the
                provided slice).
                Thereby, negative indices by default are interpreted as "before the end"
                unless the `neg_index_as_lookback=True` option is used, in which case
                negative indices are interpreted as "before ts=0", meaning going back
                into the lookback buffer.
            neg_index_as_lookback: If True, negative values in `at_indices` are
                interpreted as "before ts=0", meaning going back into the lookback
                buffer. For example, an episode with
                observations = [4, 5, 6,  7, 8, 9], where [4, 5, 6] is the
                lookback buffer range (ts=0 item is 7), will handle a call to
                `set_observations(individual_observation, -1,
                neg_index_as_lookback=True)` by overwriting the value of 6 in our
                observations buffer with the provided "individual_observation".

        Raises:
            IndexError: If the provided `at_indices` do not match the size of
                `new_data`.
        	AgentID '' not found in this episode.r7  r6  r   N)rZ   r+   KeyErrorset_observationsrM   r7  r6  r   rT   new_agent_datas         rN   r=  z"MultiAgentEpisode.set_observationsZ  s    Z )1(8(8 	 	$Hnt222Q8QQQRRR)::'%&; ;    	 	rP   c                    |                                 D ]D\  }}|| j        vrt          d| d          | j        |                             |||           EdS )aG	  Overwrites all or some of this Episode's actions with the provided data.

        This is a helper method to batch `SingleAgentEpisode.set_actions`.
        For more detail, see `SingleAgentEpisode.set_actions`.

        Args:
            new_data: A dict mapping agent IDs to new action data.
                Each value in the dict is the new action data to overwrite existing data with.
                This may be a list of individual action(s) in case this episode
                is still not numpy'ized yet. In case this episode has already been
                numpy'ized, this should be (possibly complex) struct matching the
                action space and with a batch size of its leafs exactly the size
                of the to-be-overwritten slice or segment (provided by `at_indices`).
            at_indices: A single int is interpreted as one index, which to overwrite
                with `new_data` (which is expected to be a single observation).
                A list of ints is interpreted as a list of indices, all of which to
                overwrite with `new_data` (which is expected to be of the same size
                as `len(at_indices)`).
                A slice object is interpreted as a range of indices to be overwritten
                with `new_data` (which is expected to be of the same size as the
                provided slice).
                Thereby, negative indices by default are interpreted as "before the end"
                unless the `neg_index_as_lookback=True` option is used, in which case
                negative indices are interpreted as "before ts=0", meaning going back
                into the lookback buffer.
            neg_index_as_lookback: If True, negative values in `at_indices` are
                interpreted as "before ts=0", meaning going back into the lookback
                buffer. For example, an episode with
                actions = [4, 5, 6,  7, 8, 9], where [4, 5, 6] is the
                lookback buffer range (ts=0 item is 7), will handle a call to
                `set_actions(individual_action, -1,
                neg_index_as_lookback=True)` by overwriting the value of 6 in our
                actions buffer with the provided "individual_action".

        Raises:
            IndexError: If the provided `at_indices` do not match the size of
                `new_data`.
        r9  r:  r;  N)rZ   r+   r<  set_actionsr>  s         rN   rA  zMultiAgentEpisode.set_actions      Z )1(8(8 	 	$Hnt222Q8QQQRRR)55'%&; 6    	 	rP   c                    |                                 D ]D\  }}|| j        vrt          d| d          | j        |                             |||           EdS )a	  Overwrites all or some of this Episode's rewards with the provided data.

        This is a helper method to batch `SingleAgentEpisode.set_rewards`.
        For more detail, see `SingleAgentEpisode.set_rewards`.

        Args:
            new_data: A dict mapping agent IDs to new reward data.
                Each value in the dict is the new reward data to overwrite existing data with.
                This may be a list of individual reward(s) in case this episode
                is still not numpy'ized yet. In case this episode has already been
                numpy'ized, this should be a np.ndarray with a length exactly
                the size of the to-be-overwritten slice or segment (provided by
                `at_indices`).
            at_indices: A single int is interpreted as one index, which to overwrite
                with `new_data` (which is expected to be a single reward).
                A list of ints is interpreted as a list of indices, all of which to
                overwrite with `new_data` (which is expected to be of the same size
                as `len(at_indices)`).
                A slice object is interpreted as a range of indices to be overwritten
                with `new_data` (which is expected to be of the same size as the
                provided slice).
                Thereby, negative indices by default are interpreted as "before the end"
                unless the `neg_index_as_lookback=True` option is used, in which case
                negative indices are interpreted as "before ts=0", meaning going back
                into the lookback buffer.
            neg_index_as_lookback: If True, negative values in `at_indices` are
                interpreted as "before ts=0", meaning going back into the lookback
                buffer. For example, an episode with
                rewards = [4, 5, 6,  7, 8, 9], where [4, 5, 6] is the
                lookback buffer range (ts=0 item is 7), will handle a call to
                `set_rewards(individual_reward, -1,
                neg_index_as_lookback=True)` by overwriting the value of 6 in our
                rewards buffer with the provided "individual_reward".

        Raises:
            IndexError: If the provided `at_indices` do not match the size of
                `new_data`.
        r9  r:  r;  N)rZ   r+   r<  set_rewardsr>  s         rN   rD  zMultiAgentEpisode.set_rewards  rB  rP   c                    |                                 D ]E\  }}|| j        vrt          d| d          | j        |                             ||||           FdS )a+
  Overwrites all or some of this Episode's extra model outputs with `new_data`.

        This is a helper method to batch `SingleAgentEpisode.set_extra_model_outputs`.
        For more detail, see `SingleAgentEpisode.set_extra_model_outputs`.

        Args:
            key: The `key` within `self.extra_model_outputs` to override data on or
                to insert as a new key into `self.extra_model_outputs`.
            new_data: A dict mapping agent IDs to new extra model outputs data.
                Each value in the dict is the new extra model outputs data to overwrite existing data with.
                This may be a list of individual reward(s) in case this episode
                is still not numpy'ized yet. In case this episode has already been
                numpy'ized, this should be a np.ndarray with a length exactly
                the size of the to-be-overwritten slice or segment (provided by
                `at_indices`).
            at_indices: A single int is interpreted as one index, which to overwrite
                with `new_data` (which is expected to be a single extra model output).
                A list of ints is interpreted as a list of indices, all of which to
                overwrite with `new_data` (which is expected to be of the same size
                as `len(at_indices)`).
                A slice object is interpreted as a range of indices to be overwritten
                with `new_data` (which is expected to be of the same size as the
                provided slice).
                Thereby, negative indices by default are interpreted as "before the end"
                unless the `neg_index_as_lookback=True` option is used, in which case
                negative indices are interpreted as "before ts=0", meaning going back
                into the lookback buffer.
            neg_index_as_lookback: If True, negative values in `at_indices` are
                interpreted as "before ts=0", meaning going back into the lookback
                buffer. For example, an episode with
                extra_model_outputs[key][agent_id] = [4, 5, 6, 7, 8, 9], where [4, 5, 6] is the
                lookback buffer range (ts=0 item is 7), will handle a call to
                `set_extra_model_outputs(key, individual_output, -1,
                neg_index_as_lookback=True)` by overwriting the value of 6 in our
                extra_model_outputs[key][agent_id] buffer with the provided "individual_output".

        Raises:
            IndexError: If the provided `at_indices` do not match the size of
                `new_data`.
        r9  r:  )r   r7  r6  r   N)rZ   r+   r<  set_extra_model_outputs)rM   r   r7  r6  r   rT   r?  s          rN   rF  z)MultiAgentEpisode.set_extra_model_outputs  s    ` )1(8(8 	 	$Hnt222Q8QQQRRR)AA'%&;	 B    	 	rP   c                      t          |           S )zReturns the number of environment steps.

        Note, this episode instance could be a chunk of an actual episode.

        Returns:
            An integer that counts the number of environment steps this episode instance
            has seen.
        r
  r   s    rN   r   zMultiAgentEpisode.env_steps6  s     4yyrP   c                 b    t          d | j                                        D                       S )zNumber of agent steps.

        Note, there are >= 1 agent steps per environment step.

        Returns:
            An integer counting the number of agent steps executed during the time this
            episode instance records.
        c              3   4   K   | ]}t          |          V  d S ro   r
  )rf   r   s     rN   rr   z0MultiAgentEpisode.agent_steps.<locals>.<genexpr>J  s(      DD3s88DDDDDDrP   )r+  r+   r   r   s    rN   agent_stepszMultiAgentEpisode.agent_stepsA  s0     DDt':'A'A'C'CDDDDDDrP   itemc                 ~    t          |t                    r|                     |          S t          d| d          )zGEnable squared bracket indexing- and slicing syntax, e.g. episode[-4:].)r   z1MultiAgentEpisode does not support getting item 'z>'! Only slice objects allowed with the syntax: `episode[a:b]`.)rH   r   r   )rM   rK  s     rN   __getitem__zMultiAgentEpisode.__getitem__L  sT    dE"" 	::T:***%ND N N N  rP   )	r;   r:   r2   r4   r5   r3   r6   r7   r8   c       	             |d S ||rJ |	rJ g }g }g }	|&d t          t          |                    D             }|	&d t          t          |                    D             }	t          t                    }
t          t                    }t          t                    }t          t                    }t          t                    t          t                    }t           fd          }t          ||                                ng           }|pi }t          t          ||                    D ]C\  }\  }}t          |          |k    r||         ni }t          |	          |k    r|	|         ni }t          |          |k    r||         ni }|	                                D ]<\  }|
                               |
                             |           |                             |                    i                      t          |
                   dk    r|                              j                                                                               j                                                 |                              j                                                 nB j        vr9 j                                      j        g|z             |xx         |z  cc<   |v rX|          j        <   |                    i            j        <    j        xx         |                    d          z  cc<   nP|                              s|                              rd|<   n |t          |          dz
  k     r
dx|<   |<    j                                     t          |
                   dz
             >|D ]|vr|vr j                                      j                   t           j                            j        z
  dk    r|xx         dz  cc<    j        xx         |                    d          z  cc<   Et           j                                                  D ]|
vr	 j        = t          |
                   t          |                   cxk    rVt          |                   dz   cxk    r9t                             dz   cxk    rt          |                   dz   k    sn J  j         j                 _        |
	                                D ]p\  }t          |          dk    r+|                              r                                D|                                                              }t1          ||                              nd | j        | j                                      |         |          j                                      |                  r-fd         d                                         D             nd |                    d	          |                    d	           j                 t;          |         d          
          }| j        <   rd S )Nc                     g | ]}i S rd   rd   rf   _s     rN   ri   zAMultiAgentEpisode._init_single_agent_episodes.<locals>.<listcomp>o  s    :::AR:::rP   c                     g | ]}i S rd   rd   rP  s     rN   ri   zAMultiAgentEpisode._init_single_agent_episodes.<locals>.<listcomp>q  s    "C"C"C!2"C"C"CrP   c                       j         S ro   )r-   r   s   rN   <lambda>z?MultiAgentEpisode._init_single_agent_episodes.<locals>.<lambda>y  s
    D<V rP   rb   rv   Tr   c                 <    i | ]fd          D             S )c                      g | ]
}|         S rd   rd   )rf   r   ks     rN   ri   zLMultiAgentEpisode._init_single_agent_episodes.<locals>.<dictcomp>.<listcomp> 	  s    RRRQAaDRRRrP   rd   )rf   rW  rT   extra_model_outputs_per_agents    @rN   r   zAMultiAgentEpisode._init_single_agent_episodes.<locals>.<dictcomp>  sG        RRRR*G*QRRR  rP   F)r   rT   rU   rV   r2   r   r3   r4   r    r5   r8   ry   rz   r   r9   )r   rD   r   r  rI   rp   r}   r   ziprZ   addr[   rJ   r%   r   r&   r'   r$   r   r~   r-   r   
_del_agentr   r   r   r   r    r#   r   r+   )rM   r;   r:   r2   r4   r5   r3   r6   r7   r8   observations_per_agentinfos_per_agentactions_per_agentrewards_per_agentdone_per_agentlen_lookback_buffer_per_agentr   data_idxobsinfact
extra_outsrewr`   rU   r   rT   rX  s   `                         @@rN   rK   z-MultiAgentEpisode._init_single_agent_episodesV  s    F?;****GG"$ =::s<'8'8!9!9:::E&"C"CuS\\/B/B"C"C"C!,T!2!2%d++'--'--(3D(9(9%$T**(34V4V4V4V(W(W%(9(E""$$$2
 
 ,1r %.c,.F.F$G$G M	R M	R HjsC
 (+7||h'>'>'(##BC *++h66 $H-- 
 (+7||h'>'>'(##BC'*yy{{ / /#)!!(+++&x077	BBB)0021F1FGGG -h7881<<%h/66155h??   2(;BB=AA(KK   &h/66155h??     t'<<<-h7>>!12X=   6h???8K??? s?? ;>h-D-h7FPnn "G GD9(C -h7773778S;Q;QQ7777 __X.. L*..2J2J L/3N8,, L 1 1A 555GKKN8,{8/D %h/66.x899A=    * R R3&&8>+I+I)(3::4;OPPP
 D1(;<<45  6h???1D???-h7773778S;Q;QQ777R  T27799:: 	R 	RH555)(3*8455x0118 8 8 8(233a78 8 8 8 4X>??!C8 8 8 8 (233a7	8 8 8 8 8 8 8<7QD!(+44 $:#?#?#A#A +	7 +	7Hi 9~~""~'9'9('C'C")))
 ),,$99(DII I , )4 &))(333!#'+x&"&"8"<"<X"F"F%h/)(3!.228<<)(3 5X>	     !>x!H!K!P!P!R!R   
 &??8U;;$..599.x8$'(Eh(OQR$S$S5  J: -7D))W+	7 +	7rP   )r|   r   r   r   r   r   r   c       	            t          t          |                    p| j        }t          |||||t          |si nd|i          |	          }
|du r"|rt	          d| d           | j        di |
S |r | j        di |
S  | j        di |
S )Nr   )r   r   r|   r   r   r   r   Fz`MultiAgentEpisode.get_zG()` can't be called with both `env_steps=False` and `return_list=True`!rd   )rp   r   r|   rF   r   _get_data_by_agent_steps_get_data_by_env_steps_as_list_get_data_by_env_steps)rM   r   r   r|   r   r   r   r   r   r   kwargss              rN   r   zMultiAgentEpisode._get	  s    
9--..@$.	"7!*V1CEU0V  %<
 
 
   @d @ @ @   140::6:::  	9646@@@@@ /4.88888rP   c          	         i }| j                                         D ]b\  }	}
|	|vr
t          |
|          }|                     ||	          }|||         }||         } |j        d||||d|}||g k    r]|||	<   c|S )Nr   r   r   _add_last_ts_valuerd   )r+   rZ   getattr_get_hanging_valuerJ   )rM   r   r   r|   r   r   r   r   retrT   r   inf_lookback_bufferhanging_valagent_values                 rN   ri  z*MultiAgentEpisode._get_data_by_agent_steps<	  s      %)$7$=$=$?$? 	( 	( Hjy((")*d";";11$AAK&2&9:Q&R#)*AB1-1 &;#.	 
 # K "kR&7&7'CMM
rP   r   r   c                |   i }| j                                         D ]5}	|	|vr| j        |	                             ||| j        |dv          ||	<   6|sg S g }
t          t          t          t          |	                                                                        D ]}i }|
                                D ]m\  }	}|                     ||	          }|                     |	|||||                   \  }}|dk    r|s|sI|                     |||	|||||          }||||	<   n|
                    |           |
S )Nr2   r3   r   r   _ignore_last_tsfilter_for_skip_indicesr8   r   rs  rT   index_incl_lookbackr   r   r   rt  )r+   r}   r$   rJ   r~   r   rD   r   r   r   rZ   rq   _get_inf_lookback_buffer_or_dict_get_single_agent_data_by_indexr[   )rM   r   r   r|   r   r   r   r   agent_indicesrT   rr  r   ret2idxesrt  rs  indices_to_useru  s                     rN   rj  z0MultiAgentEpisode._get_data_by_env_steps_as_list`	  s    +0022 	 	Hy((&*&;H&E&I&I&;) !%,E E 'J ' 'M(##  	Is4]%9%9%;%; < <==>>??  	  	AD#0#6#6#8#8 1 1%"55dHEE 99+,1!H :  '" 111/ 2' 2 "BB(;%(6%5,C + C 	 	 *%0DNJJt
rP   c                   |dv}i }	| j                                         D ]\  }
}|
|vr
|                     ||
          }| j        |
                             |||| j        nd |          }|                     |
||||          \  }}t          |t                    r5| 	                    ||
|||||          }t          |          dk    r||	|
<   |                     |||
|||||          }|||	|
<   |	S )Nrw  rx  rz  )r   rT   indices_incl_lookbackr   r   rt  r   r   r|  )r+   rZ   rq  r$   rJ   r~   r~  rH   r  *_get_single_agent_data_by_env_step_indicesrD   r  )rM   r   r   r|   r   r   r   r   ignore_last_tsrr  rT   r   rt  r  rs  agent_valuess                   rN   rk  z(MultiAgentEpisode._get_data_by_env_steps	  sw    %>>$($7$=$=$?$? ,	1 ,	1 Hjy((11$AAK 1(;??&;-1-=T))4 !/ @  M 261V1V'(5 2W 2 2. -.. 1#NN%*7%5 +,C  O     |$$q(($0CM#CC(;%(5%5,C +  D 	  	   +$0CM
rP   rs  r}  rt  c          	          | j         |         }	|| j        k    r6|d S  t          |	d|           d	dd|dt          |i nd|i          |S |dk    r||t	          |t                    sJ i }
|rB|                                D ],\  }} |j        d	||j        z
  d||d n||         d||
|<   -n>|                                D ])} t                      j        d	|d|||         d||
|<   *|
S  |j        d	||j        z
  d||d|S )
Nget_    J)Fr   r   r   r   r8   Trn  rd   )
r+   r~   rp  rF   rH   rZ   rJ   r   r}   r   )rM   r   rs  rT   r}  r   r   r   rt  r   rr  r   
sub_buffers                rN   r  z1MultiAgentEpisode._get_single_agent_data_by_index	  s    (2
$"666|57:}d}}55 
%&+
 
 .6 B!89 	
 #
 
 
$ ***/F/N&*[$*G*G&&GC" ':'@'@'B'B 	 	OC-z~   3j6I I.2!$/$7DD[=M    +   CHH	 '++--  C;577;   3.2!+6s+;	   
 +   CHH J +&* +.A.JJ&*#.	 
 #  rP   )r   r   r   rt  r  c                     j         |         }t          ||          ||          j        |v r| j        ddd|d|}	g }
|D ]^}| j        k    r|
                    |	           #|
                     j        d|t          ||          j        z
  d||d|           _ j        rt          |
          }
n! fd|D             } j        d|d||d|}
|
S )	a}  Returns single data item from the episode based on given (env step) indices.

        The returned data item will have a batch size that matches the env timesteps
        defined via `indices_incl_lookback`.

        Args:
            what: A (str) descriptor of what data to collect. Must be one of
                "observations", "infos", "actions", "rewards", or "extra_model_outputs".
            indices_incl_lookback: A list of ints specifying, which indices
                to pull from the InfiniteLookbackBuffer defined by `agent_id` and `what`
                (and maybe `extra_model_outputs_key`). Note that these indices
                disregard the special logic of the lookback buffer. Meaning if one
                index in `indices_incl_lookback` is 0, then the first value in the
                lookback buffer should be returned, not the first value after the
                lookback buffer (which would be normal behavior for pulling items from
                an `InfiniteLookbackBuffer` object).
            agent_id: The individual agent ID to pull data for. Used to lookup the
                `SingleAgentEpisode` object for this agent in `self`.
            fill: An optional float value to use for filling up the returned results at
                the boundaries. This filling only happens if the requested index range's
                start/stop boundaries exceed the buffer's boundaries (including the
                lookback buffer on the left side). This comes in very handy, if users
                don't want to worry about reaching such boundaries and want to zero-pad.
                For example, a buffer with data [10, 11,  12, 13, 14] and lookback
                buffer size of 2 (meaning `10` and `11` are part of the lookback buffer)
                will respond to `indices_incl_lookback=[-1, -2, 0]` and `fill=0.0`
                with `[0.0, 0.0, 10]`.
            one_hot_discrete: If True, will return one-hot vectors (instead of
                int-values) for those sub-components of a (possibly complex) space
                that are Discrete or MultiDiscrete. Note that if `fill=0` and the
                requested `indices_incl_lookback` are out of the range of our data, the
                returned one-hot vectors will actually be zero-hot (all slots zero).
            extra_model_outputs_key: Only if what is "extra_model_outputs", this
                specifies the sub-key (str) inside the extra_model_outputs dict, e.g.
                STATE_OUT or ACTION_DIST_INPUTS.
            hanging_val: In case we are pulling actions, rewards, or extra_model_outputs
                data, there might be information "hanging" (cached). For example,
                if an agent receives an observation o0 and then immediately sends an
                action a0 back, but then does NOT immediately reveive a next
                observation, a0 is now cached (not fully logged yet with this
                episode). The currently cached value must be provided here to be able
                to return it in case the index is -1 (most recent timestep).

        Returns:
            A data item corresponding to the provided args.
        Nr  Fr  Trn  c                 :    g | ]}|j         k    |j        z
  S rd   )r~   r   )rf   r   rs  rM   s     rN   ri   zPMultiAgentEpisode._get_single_agent_data_by_env_step_indices.<locals>.<listcomp>
  s:       ,,, '00,,,rP   rd   )r+   rp  r~   rJ   r[   r   r   r   )rM   r   rT   r  r   r   r   rt  r   single_fill_valuerr  r   r   rs  s   `            @rN   r  z<MultiAgentEpisode._get_single_agent_data_by_env_step_indices$
  s   r (2
%j$77"."56M"N
 #888T=M 7 3 7 !%&+! ! #	! ! C*  ,,,JJ01111JJ/+/ $%
D(A(A(J$J26!%/:	 
 /     } !Cjj    .  G
 *%) &*#.	 
 # C 
rP   c                     |dk    r| j                             |          S |dk    r| j                            |          S |dk    r| j                            |          S dS )zFReturns the hanging action/reward/extra_model_outputs for given agent.r4   r8   r5   N)r%   rJ   r&   r'   )rM   r   rT   s      rN   rq  z$MultiAgentEpisode._get_hanging_value
  sq    9,00:::***8<<XFFFY,00::: rP   c                    ||j         v r|j         |         | j         |<   ||j        v ret          j        |j        |                   | j        |<   |j        |         | j        |<   t          j        |j        |                   | j        |<   dS dS )zICopies hanging action, reward, extra_model_outputs from `other` to `self.N)r(   r'   r   r   r%   r&   )rM   rT   r   s      rN   r   zMultiAgentEpisode._copy_hanging
  s    u333494P5D'1 u11126-*843 3D%h/ 382LX2VD%h/>Bm6x@? ?D1(;;; 21rP   c                     | j                             |d           | j                            |d           | j                            |d           | j                            |d           dS )zGDeletes all hanging action, reward, extra_model_outputs of given agent.N)r(   r   r%   r&   r'   rM   rT   s     rN   r   zMultiAgentEpisode._del_hanging
  sl    #''$777!%%h555-11(DAAA!%%h55555rP   c                 <   |                      |           | j                            |d           | j                            |           | j                            |d           | j                            |d           | j                            |d           dS )z2Deletes all data of given agent from this episode.N)r   r+   r   r|   discardr$   r   r#   r  s     rN   r[  zMultiAgentEpisode._del_agent
  s    (###$///x(((!!(D111%))(D999  400000rP   c                   	 t          | j        |         |          x}}|dk    rD|	||         }n9|r/t          t          |                                                    }n|||fS |S |St          |          |j        z   |duz   	|dvt          |t                    r	fd|D             }n
r|	k    rd}||fS |S )a   Returns a single InfiniteLookbackBuffer or a dict of such.

        In case `what` is "extra_model_outputs" AND `extra_model_outputs_key` is None,
        a dict is returned. In all other cases, a single InfiniteLookbackBuffer is
        returned.
        r8   Nrw  c                 (    g | ]}r|k    rd n|S )r0   rd   )rf   r   r  inf_lookback_buffer_lens     rN   ri   zFMultiAgentEpisode._get_inf_lookback_buffer_or_dict.<locals>.<listcomp>
  s>     + + + *Qa3J.J.JCCPQ+ + +rP   r0   )	rp  r+   r   r   r   rD   r   rH   r  )
rM   rT   r   r   rt  r{  inf_lookback_buffer_or_dictrs  r  r  s
           @@rN   r~  z2MultiAgentEpisode._get_inf_lookback_buffer_or_dict
  sG    =D)4=
 =
 	
#&9 (((&2&A+'## - 3&*40K0R0R0T0T+U+U&V&V##(424KKK22".'((%./d*, $
 ")BBN1488 .+ + + + +4+ + +''   .$;?V$V$V*-'.0GGG..rP   z-MultiAgentEpisode.custom_data[some-key] = ...)newerrorc                     d S ro   rd   r   s    rN   add_temporary_timestep_dataz-MultiAgentEpisode.add_temporary_timestep_data
      rP   z'MultiAgentEpisode.custom_data[some-key]c                     d S ro   rd   r   s    rN   get_temporary_timestep_dataz-MultiAgentEpisode.get_temporary_timestep_data
  r  rP   ro   )rQ   N)rQ   r   )r   r   rQ   Nr%  )NN)NNN)F)R__name__
__module____qualname____doc__	__slots__r~   r
   r  r	   r   gymSpacer   rI   rE   r   r   r   r   rO   r]   r   rL   propertyr   r   r   rY   r   r   r   r   r|   r:   r\   r   r   r   r   r   r   rG   r   r   r   r   r   r  r  r  staticmethodr   r   r&  r  r1  r3  r5  r=  rA  rD  rF  r   rJ  rM  rK   r   ri  rj  rk  r   rF   r  r  rq  r   r   r[  r~  r   r  r  rd   rP   rN   r   r       s        8I. O "R 8<150426,0263827>B'+8</5:>>B )R R Rc]R tN34	R
 $CI.R ^,-R $~./R sy)R $~./R >4/0R .$./R &d>&:;R  }R "$w|"45R #38_R  $D#$67!R" #4(9#:;#R$ %-g23X=>%
%R R R Rp +/	0/ 0/ 0/ %0/ '	0/
 
0/ 0/ 0/ 0/n +/h4 15/38<h4 h4 h4$h4  h4  	h4
 'h4 n-h4 ^,h4 &n5h4 
h4 h4 h4 h4T	    ! ! X! 
$ 
 
 
 X
 	$ 	 	 	 X	 7 7 X78R R R Rhh h h hTp ps p3F p p p pd /3w< / / / X/ 
> 
 
 
 X
;7 ;x/A ; ; ; ;0 ;?CGL

 &+"!&!L
 L
 L
%T#Y 567L
 E*W"5w">?@L

 L
  $L
 smL
 L
 L
 
~tN33	4L
 L
 L
 L
` ;?CGF

 &+"!F
 F
 F
%T#Y 567F
 E*W"5w">?@F

 F
  $F
 smF
 F
 
~tN33	4F
 F
 F
 F
T ;?CGK

 &+"!&!K
 K
 K
%T#Y 567K
 E*W"5w">?@K

 K
  $K
 smK
 K
 K
 
~tN33	4K
 K
 K
 K
^ ;?CGD

 &+ $!D
 D
 D
%T#Y 567D
 E*W"5w">?@D

 D
  $D
 uoD
 D
 
~tN33	4D
 D
 D
 D
P ":>CG	N
 &+"!N
 N
 N
c]N
 %T#Y 567N
 E*W"5w">?@	N
 N
  $N
 smN
 N
 
~tN33	4N
 N
 N
 N
`         .2	R R RR &c]	R
 
R R R Rh	/ 	/ 	/
 
 
( ( ( (>#
4S> #
 #
 #
 #
J -$sCx. --@ - - - \-^
/ 
 
 
 
0 ). !% 
   :
3w< 
 
 
 
 5W 5 5 5 57 7 7 7 7 >B&+4 4 4 !4 U3S	5#89:	4
  $4 
4 4 4 4t >B&+4 4 4 !4 U3S	5#89:	4
  $4 
4 4 4 4t >B&+4 4 4 !4 U3S	5#89:	4
  $4 
4 4 4 4v >B&+8 8 8 !	8
 U3S	5#89:8  $8 
8 8 8 8t	3 	 	 	 		ES 	E 	E 	E 	E *=     ?C:>7;2626043827>Bv7 v7 v7 #4(9#:;v7 $D#$67	v7
 tN34v7 $~./v7 $~./v7 ^,-v7 >4/0v7 .$./v7 &d>&:;v7 v7 v7 v7z # $,9 ,9 ,9 ,9 ,9\" " "H> > sE49,-	>
 g&>  $> > "%> 
n	> > > >@: : sE49,-	:
 g&:  $: : : "%: 
: : : :xF F 4	F
 F #38_F F F "%F F 
F F F F\ #!&15%)g g g g 	g
  %S#Xg smg g "*#g c]g 
g g g gR;s ;g ;# ; ; ; ;g 6I d    6W 6 6 6 6 617 1t 1 1 1 1 26%) $-/ -/-/ -/ "*#	-/
 c]-/ -/ -/ -/^ ZC4PPP  QP Z=TJJJ  KJ  rP   r   )&r   r^   r?   collectionsr   typingr   r   r   r   r   r	   r
   r   r   	gymnasiumr  ray._common.deprecationr   "ray.rllib.env.single_agent_episoder   ,ray.rllib.env.utils.infinite_lookback_bufferr   ray.rllib.policy.sample_batchr   ray.rllib.utilsr   ray.rllib.utils.errorr   "ray.rllib.utils.spaces.space_utilsr   ray.rllib.utils.typingr   r   r   ray.util.annotationsr   r   rd   rP   rN   <module>r     s      # # # # # #
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
     . . . . . . A A A A A A O O O O O O 9 9 9 9 9 9 & & & & & & 4 4 4 4 4 4 4 4 4 4 4 4 D D D D D D D D D D * * * * * *
 WJ+ J+ J+ J+ J+ J+ J+ J+ J+ J+rP   