
    &`i                     
   d dl mZmZ d dlZd dlmZ d dlmZ edee         fd            Z	edee
         deej        j                 d	eej        j                 fd
            Zedee         dee         d	dfd            ZdS )    )ListTupleN)SingleAgentEpisode)DeveloperAPIepisodesc                 0   g }| D ]}|                     |j                   |xj        dz  c_        |j                             |j        d                    |j                             |j        d                    |j                             |j        d                    |j                             d           |j                                        D ]}|                     |d                    |j	        sd|_        |
                                 |S )a  Adds an artificial timestep to an episode at the end.

    In detail: The last observations, infos, actions, and all `extra_model_outputs`
    will be duplicated and appended to each episode's data. An extra 0.0 reward
    will be appended to the episode's rewards. The episode's timestep will be
    increased by 1. Also, adds the truncated=True flag to each episode if the
    episode is not already done (terminated or truncated).

    Useful for value function bootstrapping, where it is required to compute a
    forward pass for the very last timestep within the episode,
    i.e. using the following input dict: {
      obs=[final obs],
      state=[final state output],
      prev. reward=[final reward],
      etc..
    }

    Args:
        episodes: The list of SingleAgentEpisode objects to extend by one timestep
            and add a truncation flag if necessary.

    Returns:
        A list of the original episodes' truncated values (so the episodes can be
        properly restored later into their original states).
       g        T)appendis_truncatedtobservationsinfosactionsrewardsextra_model_outputsvaluesis_donevalidate)r   orig_truncatedsepisodevs       {/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/rllib/utils/postprocessing/episodes.py#add_one_ts_to_episodes_and_truncater   	   s   6 O  w3444 			Q		 	##G$8$<===W]2.///wr2333s###,3355 	 	AHHQrUOOOO  	(#'G     episode_lensdatareturnc                 :   g }d}| D ]0}|                     t          |||z   dz
                       ||z  }1g }|D ]5|                     t          j        fd|D                                  6t	          |          dk    rt          |          n|d         S )a  Removes the last timesteps from each given data item.

    Each item in data is a concatenated sequence of episodes data.
    For example if `episode_lens` is [2, 4], then data is a shape=(6,)
    ndarray. The returned corresponding value will have shape (4,), meaning
    both episodes have been shortened by exactly one timestep to 1 and 3.

    ..testcode::

        from ray.rllib.algorithms.ppo.ppo_learner import PPOLearner
        import numpy as np

        unpadded = PPOLearner._remove_last_ts_from_data(
            [5, 3],
            np.array([0, 1, 2, 3, 4,  0, 1, 2]),
        )
        assert (unpadded[0] == [0, 1, 2, 3, 0, 1]).all()

        unpadded = PPOLearner._remove_last_ts_from_data(
            [4, 2, 3],
            np.array([0, 1, 2, 3,  0, 1,  0, 1, 2]),
            np.array([4, 5, 6, 7,  2, 3,  3, 4, 5]),
        )
        assert (unpadded[0] == [0, 1, 2,  0,  0, 1]).all()
        assert (unpadded[1] == [4, 5, 6,  2,  3, 4]).all()

    Args:
        episode_lens: A list of current episode lengths. The returned
            data will have the same lengths minus 1 timestep.
        data: A tuple of data items (np.ndarrays) representing concatenated episodes
            to be shortened by one timestep per episode.
            Note that only arrays with `shape=(n,)` are supported! The
            returned data will have `shape=(n-len(episode_lens),)` (each
            episode gets shortened by one timestep).

    Returns:
        A tuple of new data items shortened by one timestep.
    r   r	   c                      g | ]
}|         S  r!   ).0sds     r   
<listcomp>z,remove_last_ts_from_data.<locals>.<listcomp>r   s    "8"8"8A1Q4"8"8"8r   )r   slicenpconcatenatelentuple)r   r   slicessumlen_retr$   s         @r   remove_last_ts_from_datar/   <   s    Z F
C  eCta00111t C ; ;

2>"8"8"8"8"8"8"899::::SA5:::3q61r   r   c                    t          | |          D ]\  }}|xj        dz  c_        |j                                         |j                                         |j                                         |j                                         |j                                        D ]}|                                 ||_	        dS )aI  Reverts the effects of `_add_ts_to_episodes_and_truncate`.

    Args:
        episodes: The list of SingleAgentEpisode objects to extend by one timestep
            and add a truncation flag if necessary.
        orig_truncateds: A list of the original episodes' truncated values to be
            applied to the `episodes`.
    r	   N)
zipr   r   popr   r   r   r   r   r   )r   r   r   orig_truncatedr   s        r   3remove_last_ts_from_episodes_and_restore_truncatedsr4   v   s     $'x#A#A . .		Q		  """,3355 	 	AEEGGGG-. .r   )typingr   r   numpyr'   "ray.rllib.env.single_agent_episoder   ray.util.annotationsr   r   int_typingNDArrayr/   boolr4   r!   r   r   <module>r=      s               A A A A A A - - - - - - /$7I2J / / / /d 62s)62#$62 2:62 62 62 62r .%&.$Z. 
. . . . . .r   