
    &`iq                    $   d dl Z d dlZd dlZd dlmZ d dlmZ d dlmZm	Z	m
Z
mZmZmZ d dlZd dlZd dlmZmZ d dlmZ d dlmZmZmZ d dlmZmZmZ d d	lmZm Z  d d
l!m"Z" d dl#m$Z$m%Z%m&Z&m'Z'm(Z( d dl)m*Z*  e            \  Z+Z,Z- e             \  Z.Z/dZ0ede1fd            Z2e G d de1                      Z3e G d d                      Z4ede
e&         de&fd            Z5ede
e&         ddfd            Z6ddde'fdZ7ede&de3fd            Z8dS )    N)partial)Number)DictIteratorListOptionalSetUnion)
Deprecateddeprecation_warning)Columns)DeveloperAPIExperimentalAPI	PublicAPI)is_compressedpackunpack)try_import_tftry_import_torch)convert_to_torch_tensor)ModuleIDPolicyIDSampleBatchType
TensorTypeViewRequirementsDict)log_oncedefault_policytensor_dictc                    |                      t          j                  }|t          r*t                              |          rt          |d          rt          |          dk    rpt          rMt                              |          r3t          |	                                
                                          S t          t          |                    S |                                 D ]\  }}|t          j        k    rt          |t                    s
J |             |t          j        k    s*|                    d          s|                    d          rpt          |t           t"          f          rt%          j        |          n|g}d |D             }	 t          |d                   }|r|c S # t(          $ r Y w xY wdS )a  Attempt to count timesteps based on dimensions of individual elements.

    Returns the first successfully counted number of timesteps.
    We do not attempt to count on INFOS or any state_in_* and state_out_* keys. The
    number of timesteps we count in cases where we are unable to count is zero.

    Args:
        tensor_dict: A SampleBatch or another dict.

    Returns:
        count: The inferred number of timesteps >= 0.
    Nnumpyr   	state_in_
state_out_c                 r    g | ]4}t          |t          t          f          rt          j        |          n|5S  )
isinstancer   listnparray).0_vs     q/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/rllib/policy/sample_batch.py
<listcomp>z+attempt_count_timesteps.<locals>.<listcomp>T   sD     
 
 
GIJrFD>::BBHRLLL
 
 
    )getSampleBatchSEQ_LENStf	is_tensorhasattrlentorchintsumitemitemsr%   strINFOS
startswithdicttupletreeflatten	Exception)r   seq_lenskvv_list_lens         r+   attempt_count_timestepsrG   !   s   " {344H 	X.. 	7>x7Q7Q 	MMA 	&U__X.. 	&x||~~**,,---s8}}%%%!!##    1$$$!S!!..;..! """||K(( #||L)) # 
 %/q4-$@$@Iaqc
 
MS
 
 
	vay>>D  	 	 	D	 1s   F66
GGc                   <   e Zd ZdZej        Zej        Zej        Zej        Zej	        Z	ej
        Z
ej        Zej        Zej        Zej        Zej        Zej        Zej        Zej        Zej        ZdZdZdZdZdZdZdZd	Zd
ZdZdZed             Z ede!fd            Z"ede!fd            Z#ede!fd            Z$e%d             Z&e%d             Z'e(de)fd            Z*e(de)fd            Z+e,e e-dd          d                                     Z.edUd            Z/edVde)dd fd            Z0ede1e2e3e4f                  fd            Z5ed e6e3         de6e7         fd!            Z8edWd"            Z9edXd$e:e3         de6d          fd%            Z;	 dYd&e!d'e!dd fd(Z<d)e<dd fd*Z=e	 	 	 dZd+e:e!         d,e:e!         d-e:e!         de6d          fd.            Z> e-d/d          d[d0            Z?d[d1e!d2e)fd3Z@e(	 	 	 	 d\d5e3d6e)d7e)d8e:eAd9                  fd:            ZBede!fd;            ZCdXd<ZDedXd=e:eE         dd>fd?            ZFed$eAe3e<f         de4fd@            ZGed]dA            ZHeIdB             ZJd[dCeAe)dDf         fdEZKedF             ZLe%d eMddGg          fdHe)dIeNe3         dd fdJ            ZOe% eMddGg          fdIeNe3         dd fdK            ZPe%dL             ZQdM ZRd)e<dd fdNZS e-dO          dP             ZTe(	 d^dReUdSeAe3e!f         dd fdT            ZVd#S )_r/   zWrapper around a dictionary with string keys and array-like values.

    For example, {"obs": [1, 2, 3], "reward": [0, -1, 1]} is a batch of three
    samples, each with an "obs" and "reward" attribute.
    action_distprev_actionsprev_rewardsenv_idagent_index	unroll_id
obs_embedsreturns_to_goattention_masksdonesobsc                    t           j        |v rt          d          |                    dd          | _        |                    dd          | _        |                    dd          | _        |                    dd          | _        |                    dd          | _        t          j
        | g|R i | d| _        t                      | _        t                      | _        t                      | _        i | _        d| _        |                     t           j                  }|(t)          |t*                    r4t-          |          d	k    r!|                     t           j        d           nt)          |t*                    r0t/          j        |t.          j        
          x| t           j        <   }nQt4          rt4                              |          s!t8          r)t8                              |          r|| t           j        <   | j        |t8          rt8                              |          stt-          |          d	k    rat4          rFt4                              |          r,|                                                                | _        nt;          |          | _        | j        |                     dd          | _        |                                 D ]H\  }}t)          |t@          t*          f          r'|t           j!        k    st/          j        |          | |<   ItE          |           | _#        g | _$        dS )aP  Constructs a sample batch (same params as dict constructor).

        Note: All args and those kwargs not listed below will be passed
        as-is to the parent dict constructor.

        Args:
            _time_major: Whether data in this sample batch
                is time-major. This is False by default and only relevant
                if the data contains sequences.
            _max_seq_len: The max sequence chunk length
                if the data contains sequences.
            _zero_padded: Whether the data in this batch
                contains sequences AND these sequences are right-zero-padded
                according to the `_max_seq_len` setting.
            _is_training: Whether this batch is used for
                training. If False, batch may be used for e.g. action
                computations (inference).
        zSampleBatch cannot be constructed anymore with a `DONES` key! Instead, set the new TERMINATEDS and TRUNCATEDS keys. The values under DONES will then be automatically computed using terminated|truncated._time_majorN_max_seq_len_zero_paddedF_is_training_num_grad_updatesr   dtypeis_training)%r/   DONESKeyErrorpop
time_majormax_seq_lenzero_paddedrX   num_grad_updatesr=   __init___slice_seq_lens_in_Bsetaccessed_keys
added_keysdeleted_keysintercepted_valuesget_interceptorr.   r0   r%   r&   r4   r'   r(   int32r5   r2   r1   maxr8   r9   r   r;   rG   count
_slice_map)selfargskwargs	seq_lens_rC   rD   s         r+   rd   zSampleBatch.__init__   s   * &&Y   !**]D99!::nd;;!::ne<<"JJ~t<< 28<OQU1V1V 	d,T,,,V,,, %*! UU%%EE"$# HH[122	It!<!<YSTATATHH[)40000	4(( 	357Xirx5X5X5XXD%& 	3	22 	3 	3r||I?V?V 	3)2D%& $% &LL33 &I"" 233 2#,==??#7#7#9#9  #&y>> $ $ > >DJJLL 	& 	&DAq !fd^,, &Q+:K5K5K(1++Q,T22
 r-   returnc                     | j         S )z2Returns the amount of samples in the sample batch.rn   rp   s    r+   __len__zSampleBatch.__len__       zr-   c                      t          |           S )zReturns the same as len(self) (number of steps in this batch).

        To make this compatible with `MultiAgentBatch.agent_steps()`.
        r4   rw   s    r+   agent_stepszSampleBatch.agent_steps       4yyr-   c                      t          |           S )zReturns the same as len(self) (number of steps in this batch).

        To make this compatible with `MultiAgentBatch.env_steps()`.
        r{   rw   s    r+   	env_stepszSampleBatch.env_steps  r}   r-   c                     d| _         d S )NTre   rw   s    r+   enable_slicing_by_batch_idz&SampleBatch.enable_slicing_by_batch_id   s    $(!!!r-   c                     d| _         d S )NFr   rw   s    r+   disable_slicing_by_batch_idz'SampleBatch.disable_slicing_by_batch_id$  s    $)!!!r-   c                 ~    | t           j                 d         p%t           j        | v o| t           j                 d         S )zCReturns True if `self` is either terminated or truncated at idx -1.)r/   TERMINATEDS
TRUNCATEDSrw   s    r+   is_terminated_or_truncatedz&SampleBatch.is_terminated_or_truncated(  s;     K+,R0 
"d*OtK4J/KB/O	
r-   c                     t          | t          j                 dd                    o5t          j        | vp't          | t          j                 dd                    S )zReturns True if this SampleBatch only contains one trajectory.

        This is determined by checking all timesteps (except for the last) for being
        not terminated AND (if applicable) not truncated.
        Nr   )anyr/   r   r   rw   s    r+   is_single_trajectoryz SampleBatch.is_single_trajectory/  s\     tK34SbS9::: 
"$. :tK23CRC8999	
r-   /concat_samples() from rllib.policy.sample_batchTnewerrorc                     d S Nr$   sampless    r+   concat_sampleszSampleBatch.concat_samples;  s	     	r-   otherc                 $    t          | |g          S )ag  Concatenates `other` to this one and returns a new SampleBatch.

        Args:
            other: The other SampleBatch object to concat to this one.

        Returns:
            The new SampleBatch, resulting from concating `other` to `self`.

        .. testcode::
            :skipif: True

            import numpy as np
            from ray.rllib.policy.sample_batch import SampleBatch
            b1 = SampleBatch({"a": np.array([1, 2])})
            b2 = SampleBatch({"a": np.array([3, 4, 5])})
            print(b1.concat(b2))

        .. testoutput::

            {"a": np.array([1, 2, 3, 4, 5])}
        )r   )rp   r   s     r+   concatzSampleBatch.concatA  s    . tUm,,,r-   Fshallowc                 "   t          |           }t          j        fd|          }t          || j        | j        | j        | j                  }|                    | j	                   | j
        |_
        | j        |_        | j        |_        |S )zCreates a deep or shallow copy of this SampleBatch and returns it.

        Args:
            shallow: Whether the copying should be done shallowly.

        Returns:
            A deep or shallow copy of this SampleBatch object.
        c                 j    t          | t          j                  rt          j        |            n| S )Ncopy)r%   r'   ndarrayr(   )rD   r   s    r+   <lambda>z"SampleBatch.copy.<locals>.<lambda>f  s3    1;Arz1J1JQW----PQ r-   )rU   rW   rV   rY   )r=   r?   map_structurer/   r`   rb   ra   rc   set_get_interceptorrk   rh   ri   rg   )rp   r   copy_datas    `  r+   r   zSampleBatch.copyZ  s     T

!    	
 
 ))"3
 
 
 	!!$"6777?!."0r-   c              #       K                         t          j        d          dndt                     }t	           j                  D ]}t          j        |f fd	|          V   dS )a  Returns an iterator over data rows, i.e. dicts with column values.

        Note that if `seq_lens` is set in self, we set it to 1 in the rows.

        Yields:
            The column values of the row in this iteration.

        .. testcode::
            :skipif: True

            from ray.rllib.policy.sample_batch import SampleBatch
            batch = SampleBatch({
               "a": [1, 2, 3],
               "b": [4, 5, 6],
               "seq_lens": [1, 2]
            })
            for row in batch.rows():
                print(row)

        .. testoutput::

            {"a": 1, "b": 4, "seq_lens": 1}
            {"a": 2, "b": 5, "seq_lens": 1}
            {"a": 3, "b": 6, "seq_lens": 1}
           Nc                 :    | d         j         k    r||         nS Nr   )r0   )prD   irp   rB   s      r+   r   z"SampleBatch.rows.<locals>.<lambda>  s    !A$$-*?*?!A$$X r-   )r.   r/   r0   r=   rangern   r?   map_structure_with_path)rp   self_as_dictr   rB   s   `  @r+   rowszSampleBatch.rowsx  s      8  88K$8!<<D44!Dzztz"" 	 	A. MMMMMM     	 	r-   keysc                 J    g }|D ]}|                     | |                    |S )a  Returns a list of the batch-data in the specified columns.

        Args:
            keys: List of column names fo which to return the data.

        Returns:
            The list of data items ordered by the order of column
            names in `keys`.

        .. testcode::
            :skipif: True

            from ray.rllib.policy.sample_batch import SampleBatch
            batch = SampleBatch({"a": [1], "b": [2], "c": [3]})
            print(batch.columns(["a", "b"]))

        .. testoutput::

            [[1], [2]]
        )append)rp   r   outrC   s       r+   columnszSampleBatch.columns  s6    0  	  	 AJJtAw
r-   c                 B   |                      t          j                  du}|r| j        st	          d          |s%t
          j                            | j                  n<t
          j                            t          | t          j                                     t          |           }|                    t          j        d          t          j        fd|          }fdD             |t          j        <   |                     |           i | _        | S )a  Shuffles the rows of this batch in-place.

        Returns:
            This very (now shuffled) SampleBatch.

        Raises:
            ValueError: If self[SampleBatch.SEQ_LENS] is defined.

        .. testcode::
            :skipif: True

            from ray.rllib.policy.sample_batch import SampleBatch
            batch = SampleBatch({"a": [1, 2, 3, 4]})
            print(batch.shuffle())

        .. testoutput::

            {"a": [4, 1, 3, 2]}
        NzbSampleBatch.shuffle not possible when your data has `seq_lens` defined AND is not zero-padded yet!c                     |          S r   r$   )rD   permutations    r+   r   z%SampleBatch.shuffle.<locals>.<lambda>  s    + r-   c                      g | ]
}|         S r$   r$   )r)   r   infoss     r+   r,   z'SampleBatch.shuffle.<locals>.<listcomp>  s    *I*I*I58*I*I*Ir-   )r.   r/   r0   rb   
ValueErrorr'   randomr   rn   r4   r=   r_   r   r;   r?   r   updaterj   )rp   has_time_rankr   shuffledr   r   s       @@r+   shufflezSampleBatch.shuffle  s   * !566dB  	!1 	A    	Q)//
;;KK )//D9M4N0O0OPPKDzz  55%&>&>&>&>MM*I*I*I*I[*I*I*IL'H #%r-   Nkeyc                 N    |(|t           j        t           j        fv sJ d| d             fd} fd}t           j        |t           j        |i}t           j        t           j        g}d}|:|t           j        k    r| vrt            d| d           ||                     }nB|D ](}|t           j        k    s| v r ||                     } n)|t            d| d	          t	          d
 |D                        j        k    sJ d  d|             d j         d |S )a  Splits by `eps_id` column and returns list of new batches.
        If `eps_id` is not present, splits by `dones` instead.

        Args:
            key: If specified, overwrite default and use key to split.

        Returns:
            List of batches, one per distinct episode.

        Raises:
            KeyError: If the `eps_id` AND `dones` columns are not present.

        .. testcode::
            :skipif: True

            from ray.rllib.policy.sample_batch import SampleBatch
            # "eps_id" is present
            batch = SampleBatch(
                {"a": [1, 2, 3], "eps_id": [0, 0, 1]})
            print(batch.split_by_episode())

            # "eps_id" not present, split by "dones" instead
            batch = SampleBatch(
                {"a": [1, 2, 3, 4, 5], "dones": [0, 0, 1, 0, 1]})
            print(batch.split_by_episode())

            # The last episode is appended even if it does not end with done
            batch = SampleBatch(
                {"a": [1, 2, 3, 4, 5], "dones": [0, 0, 1, 0, 0]})
            print(batch.split_by_episode())

            batch = SampleBatch(
                {"a": [1, 2, 3, 4, 5], "dones": [0, 0, 0, 0, 0]})
            print(batch.split_by_episode())


        .. testoutput::

            [{"a": [1, 2], "eps_id": [0, 0]}, {"a": [3], "eps_id": [1]}]
            [{"a": [1, 2, 3], "dones": [0, 0, 1]}, {"a": [4, 5], "dones": [0, 1]}]
            [{"a": [1, 2, 3], "dones": [0, 0, 1]}, {"a": [4, 5], "dones": [0, 0]}]
            [{"a": [1, 2, 3, 4, 5], "dones": [0, 0, 0, 0, 0]}]


        Nz"`SampleBatch.split_by_episode(key=z,)` invalid! Must be [None|'dones'|'eps_id'].c                  0   g } t           j                 d         }d}t          j                  D ]A}t           j                 |         }||k    r!|                     ||                    |}|}B|                     |j                            | S r   )r/   EPS_IDr   rn   r   )slices
cur_eps_idoffsetr   next_eps_idrp   s        r+   slice_by_eps_idz5SampleBatch.split_by_episode.<locals>.slice_by_eps_id&  s    Fk01!4JF4:&& - -";#56q9*,,MM$vax.111F!,JMM$v
23444Mr-   c                  T   g } d}t          j                  D ]e}t          j                 |         s&t          j        v r=t          j                 |         r%|                     ||dz                       |dz   }f|j        k    r|                     |d                     | S )Nr   r   )r   rn   r/   r   r   r   )r   r   r   rp   s      r+   "slice_by_terminateds_or_truncatedszHSampleBatch.split_by_episode.<locals>.slice_by_terminateds_or_truncateds5  s    FF4:&& # #/03 #*d22tK<R7STU7V2
 MM$vA~"6777UF##d677m,,,Mr-   z does not have key `z`!z does not have keys !c              3   $   K   | ]}|j         V  d S r   rv   r)   ss     r+   	<genexpr>z/SampleBatch.split_by_episode.<locals>.<genexpr>^  s$      ((A((((((r-   zCalling split_by_episode on z	 returns zwhich should in total have z timesteps!)r/   r   r]   r^   r7   rn   )rp   r   r   r   key_to_methodkey_resolve_orderr   s   `      r+   split_by_episodezSampleBatch.split_by_episode  s   ` {ck&8+:K%LLLL0 0 0 0 MLL
	 	 	 	 		 	 	 	 	$ A
 )/1BC?k(((S__$CCCCCCDDD']3'))FF )  ++++sd{{/]3/11FE 0; ~$PP<MPPPQQQ (((((((DJ666A$AAAA 766=dj====r-   startendc                    |                      t          j                  t          | t          j                           dk    r{dk     r"fd|                                 D             }n!fd|                                 D             }||J d}d                    |          }|| v r1| |         ||         ||<   |dz  }d                    |          }|| v 1t          | t          j                 ||                   }t          |t          t          |                                       }	t          |          |	k    r2t          |          |	k    sJ |	t          |dd                   z
  |d<   n d}
d}d}t          | t          j                           D ]\  }}|
|z  }
|
k    rd}d                    |          }||}|| v r4| |         ||dz            ||<   |dz  }d                    |          }|| v 4t          | t          j                 ||                   ||
z
  z
  gz   }dk     r|dxx          z  cc<   t          |          z
  z
  }|dk    r|dxx         |z  cc<   t          |          z
  k    sJ  n||
k    r|}t          ||| j        | j        | j                  S t          t          j        fd	|           | j        | j        | j        
          S )a  Returns a slice of the row data of this batch (w/o copying).

        Args:
            start: Starting index. If < 0, will left-zero-pad.
            end: Ending index.

        Returns:
            A new SampleBatch, which has a slice of this batch's data.
        Nr   c                     i | ]t\  }}|t           j        k    |                    d           *|t          j        t          j         f|j        dd         z   |j                  |d         g          uS )r!   r   N)shaper[   r   )r/   r0   r<   r'   concatenatezerosr   r[   r)   rC   rD   r   r   s      r+   
<dictcomp>z%SampleBatch.slice.<locals>.<dictcomp>t  s     	 	 	 1K000k9R9R0 r~HUF9qwqrr{+B!'RRRaeH  100r-   c                     i | ]D\  }}|t           j        k    |                    d           *|t          j        fd|          ES )r!   c                     |          S r   r$   )r   r   r   s    r+   r   z.SampleBatch.slice.<locals>.<dictcomp>.<lambda>  s    AeCiL r-   )r/   r0   r<   r?   r   r   s      r+   r   z%SampleBatch.slice.<locals>.<dictcomp>  sb       1K000k9R9R0 t)*@*@*@*@*@!DD000r-   zstate_in_{}r   r   )rB   rX   rU   rY   c                     |          S r   r$   )valuer   r   s    r+   r   z#SampleBatch.slice.<locals>.<lambda>  s    uSy1A r-   rX   rU   rY   )r.   r/   r0   r4   r9   formatr&   nextiterr7   	enumerater\   r`   rc   r?   r   )rp   r   r   state_start	state_endr   	state_idx	state_keyrB   data_lenrn   r   seq_lendiffs    ``           r+   slicezSampleBatch.slicec  s    HH[)**6D-.//!33qyy	 	 	 	 	 !%

	 	 	     $

  
 & ,,,	)00;;	4''&*9ok)6K&LDONI - 4 4Y ? ?I  4''  [%9 :;y;P QRRtDd$4$4566x==H,,x==83333#+c(3B3-.@.@#@HRL ""+D1E,F"G"G ( (JAwW$E||$%	$1$8$8$C$C	&.*+K'4//.29okAPQE>Q.RDO%NI(5(<(<Y(G(GI (4// $([-A(B;q=(Q#R#R#us{3V $ !199$QKKKE61KKK"8}}e<!88$QKKK4/KKK"8}}u====$,&'!!- O"&"7    "#A#A#A#A#A4HH!- O"&"7	   r-   slice_c                    |j         pd|j        pt          | t          j                           t          |           k    rt          |           dk    rdk    r	|j        dv sJ |                     t          j        d          }t          j	        fd|           }|t          t          | t          j                 d                             }t          t          | t          j                                              }|||         |t          j        <   || t          j        <   t          || j        | j        | j                  S )2  Helper method to handle SampleBatch slicing using a slice object.

        The returned SampleBatch uses the same underlying data object as
        `self`, so changing the slice will also change `self`.

        Note that only zero or positive bounds are allowed for both start
        and stop values. The slice step must be 1 (or None, which is the
        same).

        Args:
            slice_: The python slice object to slice by.

        Returns:
            A new SampleBatch, however "linking" into the same data
            (sliced) as self.
        r   )r   NNc                     |          S r   r$   )r   r   stops    r+   r   z*SampleBatch._batch_slice.<locals>.<lambda>  s    eDj0A r-   r   )r   r   r4   r/   r0   stepr_   r;   r?   r   r6   r7   r   r\   r`   rc   )rp   r   r   r   info_slice_startinfo_slice_stopr   r   s         @@r+   _batch_slicezSampleBatch._batch_slice  sH   " !{=c${';"<== #d))t99DzzdaiiFK9,D,D,DD *D11!"A"A"A"A"A4HH"3tK,@'A&5&'I#J#JKK!#d;+?&@t&L"M"MNNO&+,<_,L&MD"#"'D)"3	
 
 
 	
r-   size
num_slicesrC   c                    ||t          dd           |J |}|nt          |t                    sJ g }t          |           }d}|r@||t          |          z
  z  }||z   }|                    | ||                    ||z  }|}|@|S t          |t                    sJ g }t          |           }d}|r+||z   }|                    | ||                    ||z  }|}|+|S )a(  Returns SampleBatches, each one representing a k-slice of this one.

        Will start from timestep 0 and produce slices of size=k.

        Args:
            size: The size (in timesteps) of each returned SampleBatch.
            num_slices: The number of slices to produce.
            k: Deprecated: Use size or num_slices instead. The size
                (in timesteps) of each returned SampleBatch.

        Returns:
            The list of `num_slices` (new) SampleBatches or n (new)
            SampleBatches each one of size `size`.
        NrC   zsize or num_slicesr   )r   r%   r6   r4   r   )	rp   r   r   rC   r   leftr   len_r   s	            r+   
timesliceszSampleBatch.timeslices  s5   * <J.%9:::===D<j#.....Ft99DE 
S[[ 89t|d5:.///   M dC(((((Ft99DE t|d5:.///	   Mr-   zSampleBatch.right_zero_padc                     d S r   r$   )rp   ra   exclude_statess      r+   zero_padzSampleBatch.zero_pad$  s    r-   ra   r   c                                           t          j                  }|t          d            t	          |          z   fd}t                     }t          j        ||           d _         _	         S )a*  Right (adding zeros at end) zero-pads this SampleBatch in-place.

        This will set the `self.zero_padded` flag to True and
        `self.max_seq_len` to the given `max_seq_len` value.

        Args:
            max_seq_len: The max (total) length to zero pad to.
            exclude_states: If False, also right-zero-pad all
                `state_in_x` data. If True, leave `state_in_x` keys
                as-is.

        Returns:
            This very (now right-zero-padded) SampleBatch.

        Raises:
            ValueError: If self[SampleBatch.SEQ_LENS] is None (not defined).

        .. testcode::
            :skipif: True

            from ray.rllib.policy.sample_batch import SampleBatch
            batch = SampleBatch(
                {"a": [1, 2, 3], "seq_lens": [1, 2]})
            print(batch.right_zero_pad(max_seq_len=4))

            batch = SampleBatch({"a": [1, 2, 3],
                                 "state_in_0": [1.0, 3.0],
                                 "seq_lens": [1, 2]})
            print(batch.right_zero_pad(max_seq_len=5))

        .. testoutput::

            {"a": [1, 0, 0, 0, 2, 3, 0, 0], "seq_lens": [1, 2]}
            {"a": [1, 0, 0, 0, 0, 2, 3, 0, 0, 0],
             "state_in_0": [1.0, 3.0],  # <- all state-ins remain as-is
             "seq_lens": [1, 2]}

        NzNCannot right-zero-pad SampleBatch if no `seq_lens` field present! SampleBatch=c                 d   	du r| d                              d          s| d         t          j        k    rd S |j        t          k    s|j        j        t          j        u rd g
z  }n9t          j        
ft          j	        |          dd          z   |j                  }dx}}t          j                 D ]!}||||z            ||||z   <   |z  }||z  }"|t          |          k    s
J |            }t          |           D ](\  }}|t          |           dz
  k    r|||<   ||         })d S )NTr   r!   r   rZ   )r<   r/   r0   r[   objecttyper'   str_r   r   r4   r   )pathr   f_pad
f_pad_basef_baser   currr   r   r   lengthra   rp   s            r+   _zero_pad_in_placez6SampleBatch.right_zero_pad.<locals>._zero_pad_in_placeX  so   $&&47+=+=k+J+J&tP%P& P& {f$$(8BG(C(C &RXe__QRR-@!@TTT"##J[12  8=fvPT}>T8Uj:#445k)
$SZZ'''''' D!$  1D		A%%#DGAw r-   T)
r.   r/   r0   r   r4   r=   r?   r   rb   ra   )rp   ra   r   rB   r  r   r  s   ```   @r+   right_zero_padzSampleBatch.right_zero_pad(  s    N 88K011/(,/ /  
 X,	 	 	 	 	 	 	 	6 Dzz$%7FFF  &r-   r5   	framework
pin_memory
use_streamstreamtorch.cuda.Streamr  c                     |dk    r;t           J |                                 D ]\  }}t          |||||          | |<   nt          | S )9TODO: transfer batch to given device as framework tensor.r5   N)r  r  r  )r5   r9   r   NotImplementedError)rp   devicer
  r  r  r  rC   rD   s           r+   	to_devicezSampleBatch.to_device|  sq     $$$

  11))!  Q &%r-   c                 X    t          d t          j        |           D                       S )a  Returns sum over number of bytes of all data buffers.

        For numpy arrays, we use ``.nbytes``. For all other value types, we use
        sys.getsizeof(...).

        Returns:
            The overall size in bytes of the data buffer (all columns).
        c              3      K   | ]9}t          |t          j                  r|j        nt	          j        |          V  :d S r   )r%   r'   r   nbytessys	getsizeof)r)   rD   s     r+   r   z)SampleBatch.size_bytes.<locals>.<genexpr>  sW       
 
 #1bj11GAHHs}Q7G7G
 
 
 
 
 
r-   )r7   r?   r@   rw   s    r+   
size_byteszSampleBatch.size_bytes  s<      
 
\$''
 
 
 
 
 	
r-   c                 R    	 |                      |          S # t          $ r |cY S w xY w)z=Returns one column (by key) from the data or a default value.)__getitem__r^   )rp   r   defaults      r+   r.   zSampleBatch.get  s?    	##C((( 	 	 	NNN	s    &&	module_idMultiAgentBatchc                 >    t          |pt          | i| j                  S )ak  Returns the respective MultiAgentBatch

        Note, if `module_id` is not provided uses `DEFAULT_POLICY`_ID`.

        Args;
            module_id: An optional module ID. If `None` the `DEFAULT_POLICY_ID`
                is used.

        Returns:
            The MultiAgentBatch (using DEFAULT_POLICY_ID) corresponding
            to this SampleBatch.
        )r  DEFAULT_POLICY_IDrn   )rp   r  s     r+   as_multi_agentzSampleBatch.as_multi_agent  s!     	 >->EtzRRRr-   c                     t          |t                    r|                     |          S |t          j        k    r| t          j                 S |dk    r(t          d          rt          ddd           | j        S t          | |          s|| v r| j
                            |           t                              | |          }| j        3|| j        vr|                     |          | j        |<   | j        |         }|S )a'  Returns one column (by key) from the data or a sliced new batch.

        Args:
            key: The key (column name) to return or
                a slice object for slicing this SampleBatch.

        Returns:
            The data under the given key or a sliced version of this batch.
        r\   SampleBatch['is_training']SampleBatch.is_trainingFoldr   r   )r%   r   _slicer/   r]   r   r   r   r\   r3   rg   addr=   r  rk   rj   )rp   r   r   s      r+   r  zSampleBatch.__getitem__  s    c5!! 	$;;s### +###/00M!!455 #41   
 ##tS!! 	(cTkk""3'''  s+++$111/3/C/CE/J/J',+C0Er-   c                    |t           j        k    rt          d          t          | d          st                              | ||           dS |dk    r*t          d          rt          ddd           || _        dS || vr| j	        
                    |           t                              | ||           || j        v r|| j        |<   dS dS )	zInserts (overrides) an entire column (by key) in the data buffer.

        Args:
            key: The column name to set a value for.
            item: The data to insert.
        zCannot set `DONES` anymore in a SampleBatch! Instead, set the new TERMINATEDS and TRUNCATEDS keys. The values under DONES will then be automatically computed using terminated|truncated.rh   Nr\   r$  r%  Fr&  )r/   r]   r^   r3   r=   __setitem__r   r   rX   rh   r)  rj   )rp   r   r8   s      r+   r+  zSampleBatch.__setitem__  s    +###Y   |,, 	T3---F -455 #41   
 !%DFd??O$$$sD)))$)))+/D#C((( *)r-   c                     | j         Rt          | j        t                    r8d| j        vr"|                      | j                  | j        d<   | j        d         S | j        S )NrX   )rk   r%   rX   boolrj   rw   s    r+   r\   zSampleBatch.is_training  sg    +
4;Ld0S0S+T%<<<:>:N:N%; ;'7 *>::  r-   trainingztf1.placeholderc                 J    || _         | j                            dd           dS )z1Sets the `is_training` flag for this SampleBatch.rX   N)rX   rj   r_   )rp   r.  s     r+   set_trainingzSampleBatch.set_training  s)    $##ND99999r-   c                 p    | j                             |           t                              | |           d S r   )ri   r)  r=   __delitem__rp   r   s     r+   r2  zSampleBatch.__delitem__  s5    c"""s#####r-   new_obsbulkr   c                 D      fd}t          j        |             S )a  Compresses the data buffers (by column) in place.

        Args:
            bulk: Whether to compress across the batch dimension (0)
                as well. If False will compress n separate list items, where n
                is the batch size.
            columns: The columns to compress. Default: Only
                compress the obs and new_obs columns.

        Returns:
            This very (now compressed) SampleBatch.
        c                     | d         vrd S }t          |           D ]Y\  }}|t          |           dz
  k    r6rt          |          ||<   n!t          j        d |D                       ||<   ||         }Zd S )Nr   r   c                 ,    g | ]}t          |          S r$   )r   r)   os     r+   r,   zDSampleBatch.compress.<locals>._compress_in_place.<locals>.<listcomp>5  s    +C+C+CDGG+C+C+Cr-   )r   r4   r   r'   r(   )r  r   r  r   r   r5  r   rp   s        r+   _compress_in_placez0SampleBatch.compress.<locals>._compress_in_place,  s    Awg%%D!$  1D		A%% E"&u++Q"$(+C+CU+C+C+C"D"DQAw r-   r?   r   )rp   r5  r   r;  s   ``` r+   compresszSampleBatch.compress  sC    "
	 
	 
	 
	 
	 
	 
	 	$%7>>>r-   c                 @      fd}t          j        |             S )a  Decompresses data buffers (per column if not compressed) in place.

        Args:
            columns: The columns to decompress. Default: Only
                decompress the obs and new_obs columns.

        Returns:
            This very (now uncompressed) SampleBatch.
        c                 F   | d         vrd S }| d d         D ]
}||         }t          |          rt          |          || d         <   d S t          |          dk    r>t          |d                   r+t          j        d |D                       || d         <   d S d S d S )Nr   r   c                 ,    g | ]}t          |          S r$   )r   r9  s     r+   r,   zRSampleBatch.decompress_if_needed.<locals>._decompress_in_place.<locals>.<listcomp>U  s    *D*D*D6!99*D*D*Dr-   )r   r   r4   r'   r(   )r  r   r  r   r   rp   s       r+   _decompress_in_placez>SampleBatch.decompress_if_needed.<locals>._decompress_in_placeJ  s    Awg%%D#2#Y  AwU## F!'T"XUaM%($;$;!#*D*De*D*D*D!E!ET"X  r-   r<  )rp   r   rA  s   `` r+   decompress_if_neededz SampleBatch.decompress_if_needed<  sC    	F 	F 	F 	F 	F 	F 	$%94@@@r-   c                 4    || j         uri | _        || _         dS )z.Sets a function to be called on every getitem.N)rk   rj   )rp   fns     r+   r   zSampleBatch.set_get_interceptor[  s(     T)))&(D#!r-   c                 $   t          |                                           }|                     t          j                  d| j         d| dS |                    t          j                   d| j         dt          | d                    d| dS )NzSampleBatch(z: )z (seqs=rB   z): )r&   r   r.   r/   r0   rn   remover4   )rp   r   s     r+   __repr__zSampleBatch.__repr__c  s    DIIKK  88K())17$*777777KK,---WtzWWc$z:J6K6KWWPTWWWr-   c                   
 | j         r|                     |          S |j        pd
|j        pt	          |           t	          |           k    rt	          |           |                     t          j                  t	          | t          j                           dk    r| j        sd}t          t          t          | t          j                                     D ]*\  }}| j                            ||fg|z             ||z   }+| j                            t	          | t          j                           |f           | j        
         \  }| j                 \  }||| j        r| j        z  | j        z  fd}|                     t          j        d          }t%          j        ||           }	|Gt)          |t*          t,          j        f          r&|| t          j        <   |||         |	t          j        <   t          |	| j        | j        | j        | j        r| j        nd| j                  S |                     t          j        d          }t%          j        
fd|           }	|Gt)          |t*          t,          j        f          r&|| t          j        <   |
         |	t          j        <   t          |	| j        | j        | j                  S )r   r   Nc                     | d         t           j        k    r%| d                             d          s
|         S |         S )Nr   r!   )r/   r0   r<   )r  r   start_paddedstart_seq_lenstop_paddedstop_seq_lens     r+   map_z SampleBatch._slice.<locals>.map_  sS    7k22247;M;M< <2 !k!9:: |!;<<r-   )rX   rU   rW   rV   rY   c                     |          S r   r$   )r   r   r   s    r+   r   z$SampleBatch._slice.<locals>.<lambda>  s    %* r-   r   )re   r   r   r   r4   r.   r/   r0   ro   r   mapr6   extendr   rb   ra   r_   r;   r?   r   r%   r&   r'   r   r\   r`   rc   r   )rp   r   sum_r   lstart_unpaddedstop_unpaddedrO  r   r   r   rK  rL  r   rM  rN  s             @@@@@@r+   r(  zSampleBatch._slicem  s   " $ 	-$$V,,,!{'c$ii #d))t99D HH[)**6D-.//!33 ? P%c#tK4H/I&J&JKK $ $DAqO**QI;?;;;!8DD &&D1E,F(G(G'NOOO,0OE,B)M>*./$*?'L-)L'K >,t/??*T-=== = = = = = = = HH[.55E/d;;D Zbj7I%J%J */[&'*/}0L*M[&'!- O!-151AKT--t"&"7    HH[.55E%&=&=&=&=&=tDDD Zbj7I%J%J */[&'*/d
*;[&'!- O"&"7	   r-   )r   c                 z   g }g }|                      t          j                  ]t          | t          j                           dk    r9t	          j        | t          j                 |k               s
J d            d}d}d}d}d}|t          | t          j                           k     r| t          j                 |         }	||	z  }|| j        s|	n| j        z  }||k    r{|dz   }
| j        s8|                    |||z   f           ||z  }||k    r||z
  }||	|z
  z  }|dz  }n|                    ||f           |}|                    ||
f           d}|dz   }|dz  }|t          | t          j                           k     n7d}|| j	        k     r*|                    |||z   f           ||z  }|| j	        k     *||fS )Nr   zFERROR: `slice_size` must be larger than the max. seq-len in the batch!r   )
r.   r/   r0   r4   r'   allrb   ra   r   rn   )rp   
slice_sizedata_slicesdata_slices_states	start_poscurrent_slize_sizeactual_slice_idx	start_idxidxr   end_idxoverheadr   s                r+   _get_slice_indiceszSampleBatch._get_slice_indices  s!   HH[)**6D-.//!336${34zABB    B I!" ICD!567777{34S9"g-" #'#3IGG9I  &33!AgG  + 
5#**Iy:7M+NOOO!Z/	-
::'9J'FH%8);;I1HC $**I7G+HIII$4	&--y'.BCCC)*& #aIq5 D!5677778 Adj..""Aq:~#6777Z dj.. ...r-   lastview_requirementsindexc           	      l   t           j        t           j        t           j        t           j        t           j        t           j        i}i }|                                D ])\  }}|j        du r|j	        p|}|dk    r|
                    ||          }|j        | |         d         }t          | |                   }	|	|j        z  }
|t           j        t           j        fv rdnd}|j        |z   }|j        |z   dz   }|dk    rd}t          j        t          j        || |         |
 d         g          ||         g          ||<   t%          j        d | |                   ||<   | |         ||dk    r|dz   nd         ||<   +t          |t          j        dgt          j                  	          S )
a  Creates single ts SampleBatch at given index from `self`.

        For usage as input-dict for model (action or value function) calls.

        Args:
            view_requirements: A view requirements dict from the model for
                which to produce the input_dict.
            index: An integer index value indicating the
                position in the trajectory for which to generate the
                compute_actions input dict. Set to "last" to generate the dict
                at the very end of the trajectory (e.g. for value estimation).
                Note that "last" is different from -1, as "last" will use the
                final NEXT_OBS as observation input.

        Returns:
            The (single-timestep) input dict for ModelV2 calls.
        Frd  Nr   r   r   c                     | dd          S )Nr   r$   )rD   s    r+   r   z8SampleBatch.get_single_step_input_dict.<locals>.<lambda>8  s    !BCC& r-   rZ   )rB   )r/   OBSNEXT_OBSPREV_ACTIONSACTIONSPREV_REWARDSREWARDSr9   used_for_compute_actionsdata_colr.   
shift_fromr4   batch_repeat_valueshift_tor'   r(   r   r?   r   rl   )rp   re  rf  last_mappings
input_dictview_colview_reqrp  r   traj_lenmissing_at_end	obs_shiftfrom_to_s                 r+   get_single_step_input_dictz&SampleBatch.get_single_step_input_dict  s   0 O[1$k&9$k&9
 
"3"9"9";"; *	 *	Hh0E99  (4HH(,,Xx@@&2  >"-D"4>22H%-0K%KN
 ';?K<P*QQQWX  %/);E"+i7!;Caxx"+-8ND$x..AQAQ2R+STT %c	, ,Jx(( ,0+=((X, ,Jx(( (,H~"EAII$>(
8$$ :!BH0M0M0MNNNNr-   )r   r/   rt   r/   )F)rt   r/   r   )NN)NNN)Tr5   FFN)rt   N)rd  )W__name__
__module____qualname____doc__r   ri  rl  rn  r   r   r;   r0   TACTION_DIST_INPUTSACTION_PROBACTION_LOGPVF_PREDSVALUES_BOOTSTRAPPEDr   rj  ACTION_DISTrk  rm  ENV_IDAGENT_INDEX	UNROLL_ID
OBS_EMBEDSRETURNS_TO_GOATTENTION_MASKSr]   CUR_OBSr   rd   r6   rx   r|   r   r   r   r   r   r-  r   r   staticmethodr   r   r   r   r   r   r:   r   r   r   r   r   r   r   r   r   r   r   r   r	  r
   r  r  r.   r   r"  r  r+  propertyr\   r0  r2  	frozensetr	   r=  rB  r   rH  r(  rc  r   r}  r$   r-   r+   r/   r/   c   s         +CoGoG%K#JMEH	A 3%K%KH!5^FH  K!L!LFK I J#M'O EGk k YkZ     Y S    Y 3    Y ) ) \) * * \* 
D 
 
 
 _
 	
d 	
 	
 	
 _	
 ZETRRR  SR Y \ - - - Y-0  D ]    Y: #htCO45 # # # Y#J DI $s)    Y8 3 3 3 Y3j o oHSM oT-=P o o o Yod AEY YY"Y	Y Y Y Yv-
5 -
] -
 -
 -
 -
^  #$(	4 4sm4 SM4 C=	4
 
m	4 4 4 Y4l Z0===   >=R R# Rt R R R Rh  !  LP   	
  GHI   _. 
C 
 
 
 Y
    S S(: SFW S S S YS #uS%Z0 #Z # # # Y#J $0 $0 $0 Y$0L ! ! X!: :U41B+B%C : : : :
 $ $ Y$  ii	@R6S6S +.s8	   \@ "+)UI,>"?"? 3x	   \< " " \"  UU U} U U U Un Ze0/ 0/ 0/d  "(IO IO/IO S#XIO 
	IO IO IO _IO IO IOr-   r/   c            
          e Zd ZdZedeeef         defd            Z	edefd            Z
edefd            Zedefd            Zed	eded          fd
            Zeedeeef         dedeed f         fd                        Zee edd          ded          dd fd                                    Zed&d            Ze	 	 	 	 d'dededeed                  fd            Zedefd            Zed eddg          fdedee         ddfd            Ze eddg          fdee         dd fd             Zed&d!            Z d"edefd#Z!d$ Z"d% Z#dS )(r  a  A batch of experiences from multiple agents in the environment.

    Attributes:
        policy_batches (Dict[PolicyID, SampleBatch]): Dict mapping policy IDs to
            SampleBatches of experiences.
        count: The number of env steps in this batch.
    policy_batchesr   c                 ~    |                                 D ]}t          |t                    sJ || _        || _        dS )au  Initialize a MultiAgentBatch instance.

        Args:
            policy_batches: Dict mapping policy IDs to SampleBatches of experiences.
            env_steps: The number of environment steps in the environment
                this batch contains. This will be less than the number of
                transitions this batch contains across all policies in total.
        N)valuesr%   r/   r  rn   )rp   r  r   rD   s       r+   rd   zMultiAgentBatch.__init__N  sM      &&(( 	. 	.Aa------, 


r-   rt   c                     | j         S )zThe number of env steps (there are >= 1 agent steps per env step).

        Returns:
            The number of environment steps contained in this batch.
        rv   rw   s    r+   r   zMultiAgentBatch.env_stepsa  s     zr-   c                     | j         S )zSame as `self.env_steps()`.rv   rw   s    r+   rx   zMultiAgentBatch.__len__j  ry   r-   c                 V    d}| j                                         D ]}||j        z  }|S )zThe number of agent steps (there are >= 1 agent steps per env step).

        Returns:
            The number of agent steps total in this batch.
        r   )r  r  rn   )rp   ctbatchs      r+   r|   zMultiAgentBatch.agent_stepso  s:     (//11 	 	E%+BB	r-   rC   c           	        
 ddl m} g }| j                                        D ]f\  }}|                                D ]L}|                    |t          j                 |t          j                 |t          j	                 ||f           Mg|
                                 g t          j        |          
d
fd}t          j        |d           D ]@\  }}	|	D ]\  }}}}} 
|         j        di | dz  |k    r |             dk    sJ Adk    r
 |             t!                    dk    s
J             S )a  Returns k-step batches holding data for each agent at those steps.

        For examples, suppose we have agent1 observations [a1t1, a1t2, a1t3],
        for agent2, [a2t1, a2t3], and for agent3, [a3t3] only.

        Calling timeslices(1) would return three MultiAgentBatches containing
        [a1t1, a2t1], [a1t2], and [a1t3, a2t3, a3t3].

        Calling timeslices(2) would return two MultiAgentBatches containing
        [a1t1, a1t2, a2t1], and [a1t3, a2t3, a3t3].

        This method is used to implement "lockstep" replay mode. Note that this
        method does not guarantee each batch contains only data from a single
        unroll. Batches might contain data from multiple different envs.
        r   )SampleBatchBuilderc                      dk    sJ t          d                                 D                       } d                                                     |            d S )Nr   c                 >    i | ]\  }}||                                 S r$   )build_and_resetr)   rC   rD   s      r+   r   zDMultiAgentBatch.timeslices.<locals>.finish_slice.<locals>.<dictcomp>  s*    FFFDAqA%%''FFFr-   )r  r9   clearr   )r  	cur_slicecur_slice_sizefinished_slicess    r+   finish_slicez0MultiAgentBatch.timeslices.<locals>.finish_slice  sq    !A%%%%#FFIOO4E4EFFF E NOO""5)))))r-   c                     | d d         S )N   r$   )xs    r+   r   z,MultiAgentBatch.timeslices.<locals>.<lambda>  s    1RaR5 r-   r   r$   ))ray.rllib.evaluation.sample_batch_builderr  r  r9   r   r   r/   r   r  r  sortcollectionsdefaultdict	itertoolsgroupby
add_valuesr4   )rp   rC   r  steps	policy_idr  rowr  _groupr  r  r  s             @@@r+   r   zMultiAgentBatch.timeslices{  s   " 	QPPPPP  $ 3 9 9 ; ; 
	 
	Iuzz|| 	 	K./KM*K34!   	 	

+,>??		* 	* 	* 	* 	* 	* 	* ")%AA 	+ 	+HAu+0 7 7'1aC/	)$/66#6666aN""%****ALNNN?##a''''''r-   c                 v    t          |           dk    rt          | v r| t                   S t          | |          S )a  Returns SampleBatch or MultiAgentBatch, depending on given policies.
        If policy_batches is empty (i.e. {}) it returns an empty MultiAgentBatch.

        Args:
            policy_batches: Mapping from policy ids to SampleBatch.
            env_steps: Number of env steps in the batch.

        Returns:
            The single default policy's SampleBatch or a MultiAgentBatch
            (more than one policy).
        r   r  r   )r4   r!  r  r  s     r+   wrap_as_neededzMultiAgentBatch.wrap_as_needed  sA      ~!##(9^(K(K!"344n	RRRRr-   r   Tr   r   c                      t          |           S r   )concat_samples_into_ma_batchr   s    r+   r   zMultiAgentBatch.concat_samples  s     ,G444r-   c                 n    t          d | j                                        D             | j                  S )z{Deep-copies self into a new MultiAgentBatch.

        Returns:
            The copy of self with deep-copied data.
        c                 >    i | ]\  }}||                                 S r$   r   r  s      r+   r   z(MultiAgentBatch.copy.<locals>.<dictcomp>  s&    CCCVaQCCCr-   )r  r  r9   rn   rw   s    r+   r   zMultiAgentBatch.copy  s:     CCt':'@'@'B'BCCCTZ
 
 	
r-   r5   FNr  r  r  r  c                     |dk    rKt           J | j                                        D ]'\  }}|                    |||||          | j        |<   (nt          | S )r  r5   N)r
  r  r  r  )r5   r  r9   r  r  )rp   r  r
  r  r  r  pidpolicy_batchs           r+   r  zMultiAgentBatch.to_device  s     $$$%)%8%>%>%@%@  !\+7+A+A'))! ,B , ,#C(( &%r-   c                 b    t          d | j                                        D                       S )ze
        Returns:
            The overall size in bytes of all policy batches (all columns).
        c              3   >   K   | ]}|                                 V  d S r   )r  )r)   bs     r+   r   z-MultiAgentBatch.size_bytes.<locals>.<genexpr>  s*      HHa1<<>>HHHHHHr-   )r7   r  r  rw   s    r+   r  zMultiAgentBatch.size_bytes  s0     HH4+>+E+E+G+GHHHHHHr-   rS   r4  r5  r   c                 l    | j                                         D ]}|                    ||           dS )a8  Compresses each policy batch (per column) in place.

        Args:
            bulk: Whether to compress across the batch dimension (0)
                as well. If False will compress n separate list items, where n
                is the batch size.
            columns: Set of column names to compress.
        )r5  r   N)r  r  r=  )rp   r5  r   r  s       r+   r=  zMultiAgentBatch.compress  sE     (//11 	7 	7ENNgN6666	7 	7r-   c                 h    | j                                         D ]}|                    |           | S )zDecompresses each policy batch (per column), if already compressed.

        Args:
            columns: Set of column names to decompress.

        Returns:
            Self.
        )r  r  rB  )rp   r   r  s      r+   rB  z$MultiAgentBatch.decompress_if_needed  s>     (//11 	0 	0E&&w////r-   c                     | S )zSimply returns `self` (already a MultiAgentBatch).

        Returns:
            This very instance of MultiAgentBatch.
        r$   rw   s    r+   r"  zMultiAgentBatch.as_multi_agent   s	     r-   r   c                     | j         |         S )z0Returns the SampleBatch for the given policy id.)r  r3  s     r+   r  zMultiAgentBatch.__getitem__)  s    "3''r-   c                 \    d                     t          | j                  | j                  S Nz!MultiAgentBatch({}, env_steps={})r   r:   r  rn   rw   s    r+   __str__zMultiAgentBatch.__str__-  ,    299#$$dj
 
 	
r-   c                 \    d                     t          | j                  | j                  S r  r  rw   s    r+   rH  zMultiAgentBatch.__repr__2  r  r-   )rt   r  r~  )$r  r  r  r  r   r   r   r/   r6   rd   r   rx   r|   r   r   r  r
   r  r   r   r   r   r-  r   r  r  r   r  r	   r:   r=  rB  r"  r  r  rH  r$   r-   r+   r  r  D  sa         tHk,A'B s    Y$ 3    Y     Y 	S 	 	 	 Y	 >C >D):$; > > > Y>@ SX{23S@CS	{--	.S S S Y \S$ ZETRRR5%6 7 5<M 5 5 5 SR Y \5 
 
 
 Y
    LP  	
  GHI   _. IC I I I YI  ii	@R6S6S7 77+.s87	7 7 7 \7 "+)UI,>"?"? 3x	   \    \(s ({ ( ( ( (
 
 


 
 
 
 
r-   r  r   rt   c           
         t          d | D                       rt          |           S g }ddg}g }dx}x}}| D ]3}|j        dk    r||j        }|j        }|j        }|j        |k    s|j        |k    rt          d          |j        ||j        |k    rt          d          |r|j        |k    rt          d          |t          ||j                  }|                    t          j
                  %|                    |t          j
                            |j        2|dxx         |j        z  cc<   |dxx         |j        |j        z  z  cc<   |                    |           5t          |          dk    rt                      S i }|d                                         D ]gt          j        k    rt#          fd	|D             d
|i|<   .fd|D             }	t%          t"          |          }
t'          j        |
g|	R  |<   h|g k    rBt*          r;t*                              |d                   rt*                              |          }nG|g k    rAt0          r:t0                              |d                   rt0                              |          }t          ||||||d         |d         pdz            S )aX  Concatenates a list of  SampleBatches or MultiAgentBatches.

    If all items in the list are or SampleBatch typ4, the output will be
    a SampleBatch type. Otherwise, the output will be a MultiAgentBatch type.
    If input is a mixture of SampleBatch and MultiAgentBatch types, it will treat
    SampleBatch objects as MultiAgentBatch types with 'default_policy' key and
    concatenate it with th rest of MultiAgentBatch objects.
    Empty samples are simply ignored.

    Args:
        samples: List of SampleBatches or MultiAgentBatches to be
            concatenated.

    Returns:
        A new (concatenated) SampleBatch or MultiAgentBatch.

    .. testcode::
        :skipif: True

        import numpy as np
        from ray.rllib.policy.sample_batch import SampleBatch
        b1 = SampleBatch({"a": np.array([1, 2]),
                          "b": np.array([10, 11])})
        b2 = SampleBatch({"a": np.array([3]),
                          "b": np.array([12])})
        print(concat_samples([b1, b2]))


        c1 = MultiAgentBatch({'default_policy': {
                                        "a": np.array([1, 2]),
                                        "b": np.array([10, 11])
                                        }}, env_steps=2)
        c2 = SampleBatch({"a": np.array([3]),
                          "b": np.array([12])})
        print(concat_samples([b1, b2]))

    .. testoutput::

        {"a": np.array([1, 2, 3]), "b": np.array([10, 11, 12])}
        MultiAgentBatch = {'default_policy': {"a": np.array([1, 2, 3]),
                                              "b": np.array([10, 11, 12])}}

    c              3   @   K   | ]}t          |t                    V  d S r   )r%   r  r   s     r+   r   z!concat_samples.<locals>.<genexpr>f  s,      
;
;a:a))
;
;
;
;
;
;r-   r   g        NzNAll SampleBatches' `zero_padded` and `time_major` settings must be consistent!z?Samples must consistently either provide or omit `max_seq_len`!zPFor `zero_padded` SampleBatches, the values of `max_seq_len` must be consistent!r   c                      g | ]
}|         S r$   r$   )r)   r   rC   s     r+   r,   z"concat_samples.<locals>.<listcomp>  s    1111!A$111r-   r`   c                      g | ]
}|         S r$   r$   )r)   crC   s     r+   r,   z"concat_samples.<locals>.<listcomp>  s    ???!???r-   r`   g      ?)rB   rU   rW   rV   rY   )r   r  rn   rb   ra   r`   r   rm   r.   r/   r0   rR  rc   r   r4   r   r;   _concat_valuesr   r?   r   r5   r2   Tensorr1   convert_to_tensor)r   concatd_seq_lensconcatd_num_grad_updatesconcated_samplesrb   ra   r`   r   concatd_datavalues_to_concat_concat_values_w_timerC   s              @r+   r   r   8  sp   \ 
;
;7
;
;
;;; 5+G444  !3x-11K1+
 ## ##7a<<-K-KJ =K''1<:+E+E&  
 M![%8m{**Q    	Q]k99&  
 "k1=99K55%&&2##Ak&:$;<<<)$Q'''172'''$Q'''1+=+GG'''"""" !!}} La %%''  !!!,1111 0111% LOO
  @???.>???$+Nz$R$R$R!"0%(8  LOO 2%EOO<LQ<O,P,P <<(899	R		B	2<<8H8K+L+L	//0@AA !   %Q'+CA+F+M#N   r-   c                 n   t          j        t                    }d}| D ]}t          |t                    r)t          |          dk    r+|                                }nIt          |t                    s4t          d	                    t          |          j                            |j                                        D ] \  }}||                             |           !||                                z  }i }|                                D ]\  }}t!          |          ||<   t          ||          S )a  Concatenates a list of SampleBatchTypes to a single MultiAgentBatch type.

    This function, as opposed to concat_samples() forces the output to always be
    MultiAgentBatch which is more generic than SampleBatch.

    Args:
        samples: List of SampleBatches or MultiAgentBatches to be
            concatenated.

    Returns:
        A new (concatenated) MultiAgentBatch.

    .. testcode::
        :skipif: True

        import numpy as np
        from ray.rllib.policy.sample_batch import SampleBatch
        b1 = MultiAgentBatch({'default_policy': {
                                        "a": np.array([1, 2]),
                                        "b": np.array([10, 11])
                                        }}, env_steps=2)
        b2 = SampleBatch({"a": np.array([3]),
                          "b": np.array([12])})
        print(concat_samples([b1, b2]))

    .. testoutput::

        {'default_policy': {"a": np.array([1, 2, 3]),
                            "b": np.array([10, 11, 12])}}

    r   z[`concat_samples_into_ma_batch` can only concat SampleBatch|MultiAgentBatch objects, not {}!)r  r  r&   r%   r/   r4   r"  r  r   r   r   r  r  r9   r   r   r   )r   r  r   r   r   r  r   batchess           r+   r  r    s<   D !,T22NI # #a%% 	1vv{{ $$&&A// 	??Evd1ggFV?W?W  
 *0022 	. 	.JC3&&u----Q[[]]"		
C&,,.. + +W!'**C3	***r-   r  c                 `   t           r@t                               |d                   r t                               || rdnd          S t          |d         t          j                  rt	          j        || rdnd          S t          r@t                              |d                   r t                              || rdnd          S t          |d         t                    rg }|D ]}|
                    |           |S t          dt          |d                    d|d                    )zConcatenates a list of values.

    Args:
        values: The values to concatenate.
        time_major: Whether to concatenate along the first axis
            (time_major=False) or the second axis (time_major=True).
    r   r   )dim)axisz$Unsupported type for concatenation: z first element: )r5   r2   catr%   r'   r   r   r1   r   r&   rR  r   r   )r`   r  concatenated_listsublists       r+   r  r    s<     
++ 
yy*%;QQ!y<<<	F1Irz	*	* 
~f
+A11BBBB	 
VAY'' 
yy:&<aa1y===	F1It	$	$ 	
 	. 	.G$$W----  *4q	?? * *$Qi* *
 
 	
r-   r  c                     t          | t                    rW| j                                        }t	          |          dk    rt
          |v r| j        t
                   } nt          d          | S )a:  Converts a MultiAgentBatch to a SampleBatch if neccessary.

    Args:
        batch: The SampleBatchType to convert.

    Returns:
        batch: the converted SampleBatch

    Raises:
        ValueError if the MultiAgentBatch has more than one policy_id
        or if the policy_id is not `DEFAULT_POLICY_ID`
    r   a  RLlib tried to convert a multi agent-batch with data from more than one policy to a single-agent batch. This is not supported and may be due to a number of issues. Here are two possible ones:1) Off-Policy Estimation is not implemented for multi-agent batches. You can set `off_policy_estimation_methods: {}` to resolve this.2) Loading multi-agent data for offline training is not implemented.Load single-agent data instead to resolve this.)r%   r  r  r   r4   r!  r   )r  policy_keyss     r+    convert_ma_batch_to_sample_batchr    sw     %)) *//11{q  %6+%E%E():;EEB	 	 	 Lr-   )9r  r  r  	functoolsr   numbersr   typingr   r   r   r   r	   r
   r    r'   r?   ray._common.deprecationr   r   ray.rllib.core.columnsr   ray.rllib.utils.annotationsr   r   r   ray.rllib.utils.compressionr   r   r   ray.rllib.utils.frameworkr   r   ray.rllib.utils.torch_utilsr   ray.rllib.utils.typingr   r   r   r   r   ray.utilr   tf1r1   tfvr5   r  r!  r=   rG   r/   r  r   r  r  r  r$   r-   r+   <module>r     s!           



             = = = = = = = = = = = = = = = =      C C C C C C C C * * * * * * P P P P P P P P P P C C C C C C C C C C E E E E E E E E ? ? ? ? ? ?                   }Rq %  > > > > >B ]O ]O ]O ]O ]O$ ]O ]O ]O@' p
 p
 p
 p
 p
 p
 p
 p
f AD1 Ao A A A AH ;+$*? ;+DU ;+ ;+ ;+ ;+| (, 
 
 

 
 
 
 
4 O       r-   