
    &`ic%              
          d dl mZ d dlmZmZmZ d dlZd dlZd dl	m
Z
mZ d dlmZ edededeeef         fd	            Zed
eee
ej        j        ef                  dedeej        j                 fd            Zedej        j        dee         dedeej        j                 fd            Zedee         dej        j        dej        j        fd            ZdS )    )deque)ListTupleUnionN)BatchedNdArraybatch)DeveloperAPIepisode_lenTreturnc                    g }g }t          | |          }|                    |           t          j        dg|z  dg||z
  z  z   t          j                  }|                    |           | |z
  }|dk    rrt          ||          }|                    |           t          j        dg|z  dg||z
  z  z   t          j                  }|                    |           ||z  }|dk    r||fS )a7  Creates loss mask and a seq_lens array, given an episode length and T.

    Args:
        episode_lens: A list of episode lengths to infer the loss mask and seq_lens
            array from.
        T: The maximum number of timesteps in each "row", also known as the maximum
            sequence length (max_seq_len). Episodes are split into chunks that are at
            most `T` long and remaining timesteps will be zero-padded (and masked out).

    Returns:
         Tuple consisting of a) list of the loss masks to use (masking out areas that
         are past the end of an episode (or rollout), but had to be zero-added due to
         the added extra time rank (of length T) and b) the list of sequence lengths
         resulting from splitting the given episodes into chunks of at most `T`
         timesteps.
       r   )minappendnparraybool_)r
   r   maskseq_lenslen_rowoverflow	extra_rows           /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/rllib/utils/postprocessing/zero_padding.pycreate_mask_and_seq_lensr      s    $ DH{ADOOD
(A3:q4x 00"(
;
;CKK QH
Q,,8QHaS4Z1#T*::BHEE	IA Q,, >    	item_listmax_seq_lenc                    t          j        d | d                   }g }g }d}t          |           } t          |           dk    r]|                                 }||z
  }t          j        |          }g }	g }
d}|D ]}t          |t                    r`|
                    |d|                    t          |          |k    rt          |          }W|}|	                    ||d                    w|
                    |           d}||z  }|                    t          j	        ||
                     |	r(| 
                    t          j	        ||	                     ||k    r(|                    t          |d                     g }d}t          |           dk    ]|dk    rF||k     r@|                    |g||z
  z             |                    t          |d                     |S )a  Splits the contents of `item_list` into a new list of ndarrays and returns it.

    In the returned list, each item is one ndarray of len (axis=0) `max_seq_len`.
    The last item in the returned list may be (right) zero-padded, if necessary, to
    reach `max_seq_len`.

    If `item_list` contains one or more `BatchedNdArray` (instead of individual
    items), these will be split accordingly along their axis=0 to yield the returned
    structure described above.

    .. testcode::

        from ray.rllib.utils.postprocessing.zero_padding import (
            BatchedNdArray,
            split_and_zero_pad,
        )
        from ray.rllib.utils.test_utils import check

        # Simple case: `item_list` contains individual floats.
        check(
            split_and_zero_pad([0, 1, 2, 3, 4, 5, 6, 7], 5),
            [[0, 1, 2, 3, 4], [5, 6, 7, 0, 0]],
        )

        # `item_list` contains BatchedNdArray (ndarrays that explicitly declare they
        # have a batch axis=0).
        check(
            split_and_zero_pad([
                BatchedNdArray([0, 1]),
                BatchedNdArray([2, 3, 4, 5]),
                BatchedNdArray([6, 7, 8]),
            ], 5),
            [[0, 1, 2, 3, 4], [5, 6, 7, 8, 0]],
        )

    Args:
        item_list: A list of individual items or BatchedNdArrays to be split into
            `max_seq_len` long pieces (the last of which may be zero-padded).
        max_seq_len: The maximum length of each item in the returned list.

    Returns:
        A list of np.ndarrays (all of length `max_seq_len`), which contains the same
        data as `item_list`, but split into sub-chunks of size `max_seq_len`.
        The last item in the returned list may be zero-padded, if necessary.
    c                 f    t          j        t          | t                    r	| d         gn|           S )Nr   )r   
zeros_like
isinstancer   )ss    r   <lambda>z$split_and_zero_pad.<locals>.<lambda>d   s)    "-*Q*G*G N1QOO r   r   Nr   auto)'individual_items_already_have_batch_dim)treemap_structurer   lenpopleftflattenr"   r   r   unflatten_as
appendleftr   extend)r   r   zero_elementretcurrent_time_row	current_titemt	item_flatitem_list_appendcurrent_time_row_flat_itemsadd_to_current_titms                r   split_and_zero_padr:   1   sK   d %OO! L C Ii  I
i..1

  "")# L&&	&(# 	% 	%C#~.. %+223rr7;;;s88q=='*3xx$$'($$++CG4444 ,223777#$  %%	 1$8S T TUUU 	L  !249I!J!JKKK ##JJ$<B      "IQ i..1

V 1}}[00+	2I JKKK

"FSSS	
 	
 	
 Jr   nd_arrayepisode_lensc                     g }d}|D ]E}t          | |||z                      }|                    t          |g|                     ||z  }F|S )a  Splits and zero-pads a single np.ndarray based on episode lens and a maxlen.

    Args:
        nd_array: The single np.ndarray to be split into n chunks, based on the given
            `episode_lens` and the `max_seq_len` argument. For example, if `nd_array`
            has a batch dimension (axis 0) of 21, `episode_lens` is [15, 3, 3], and
            `max_seq_len` is 6, then the returned list would have np.ndarrays in it of
            batch dimensions (axis 0): [6, 6, 6 (zero-padded), 6 (zero-padded),
            6 (zero-padded)].
            Note that this function doesn't work on nested data, such as dicts of
            ndarrays.
        episode_lens: A list of episode lengths along which to split and zero-pad the
            given `nd_array`.
        max_seq_len: The maximum sequence length to split at (and zero-pad).

    Returns: A list of n np.ndarrays, resulting from splitting and zero-padding the
        given `nd_array`.
    r   )r   r.   r:   )r;   r<   r   r0   cursorr
   itemss          r   split_and_zero_pad_n_episodesr@      si    0 CF#  x+1E(EFGG

%ug{;;<<<+Jr   datac                    t          |j                  dk    r|S t          |j                  dk    sJ g }d}|j        d         }| D ]q}t          ||          \  }}t          |          D ]"}|                    ||                    |dz  }#|dk    r$|                    ||d|f                    |dz  }rt          j        |          S )a  Removes right-side zero-padding from data based on `episode_lens`.

    ..testcode::

        from ray.rllib.utils.postprocessing.zero_padding import unpad_data_if_necessary
        import numpy as np

        unpadded = unpad_data_if_necessary(
            episode_lens=[4, 2],
            data=np.array([
                [2, 4, 5, 3, 0, 0, 0, 0],
                [-1, 3, 0, 0, 0, 0, 0, 0],
            ]),
        )
        assert (unpadded == [2, 4, 5, 3, -1, 3]).all()

        unpadded = unpad_data_if_necessary(
            episode_lens=[1, 5],
            data=np.array([
                [2, 0, 0, 0, 0],
                [-1, -2, -3, -4, -5],
            ]),
        )
        assert (unpadded == [2, -1, -2, -3, -4, -5]).all()

    Args:
        episode_lens: A list of actual episode lengths.
        data: A 2D np.ndarray with right-side zero-padded rows.

    Returns:
        A 1D np.ndarray resulting from concatenation of the un-padded
        input data along the 0-axis.
    r      r   N)r)   shapedivmodranger   r   concatenate)	r<   rA   new_datarow_idxr   r   num_rowscol_idxis	            r   unpad_data_if_necessaryrM      s    N 4:! tz??aHG
1A   #4OO' x 	 	AOODM***qLGG Q;;OOD(7(!23444 qLG>(###r   )collectionsr   typingr   r   r   numpyr   r'   "ray.rllib.utils.spaces.space_utilsr   r   ray.util.annotationsr	   intr   _typingNDArrayfloatr:   r@   rM    r   r   <module>rX      s         % % % % % % % % % %      D D D D D D D D - - - - - - "# "# "%d
:K " " " "J rE."**<eCDErr 
"*
r r r rj j s)  
"*
	   D C$s)C$
*
C$ ZC$ C$ C$ C$ C$ C$r   