
    &`iC                         d dl mZmZmZmZmZ d dlZd dlZ	d dl
Z
d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZ d dlmZ  ed           G d de                      ZdS )    )Any
CollectionDictListOptionalN)Box)ConnectorV2)Columns)RLModule)override)flatten_inputs_to_1d_tensor)get_base_struct_from_space)AgentIDEpisodeType)	PublicAPIalpha)	stabilityc                   f    e Zd ZdZ ee          dej        fd            Z	 	 ddddddde	ej                 de	ej                 d	e
d
e	ee                  de
de	ee                  f fdZ ee          ddddedeeef         dee         de	e
         de	e         defd            Zd Z xZS )FlattenObservationsa  A connector piece that flattens all observation components into a 1D array.

    - Can be used either in env-to-module or learner pipelines.
    - When used in env-to-module pipelines:
        - Works directly on the incoming episodes list and changes the last observation
        in-place (write the flattened observation back into the episode).
        - This connector does NOT alter the incoming batch (`data`) when called.
    - When used in learner pipelines:
        Works directly on the incoming episodes list and changes all observations
        before stacking them into the batch.


    .. testcode::

        import gymnasium as gym
        import numpy as np

        from ray.rllib.connectors.env_to_module import FlattenObservations
        from ray.rllib.env.single_agent_episode import SingleAgentEpisode
        from ray.rllib.utils.test_utils import check

        # Some arbitrarily nested, complex observation space.
        obs_space = gym.spaces.Dict({
            "a": gym.spaces.Box(-10.0, 10.0, (), np.float32),
            "b": gym.spaces.Tuple([
                gym.spaces.Discrete(2),
                gym.spaces.Box(-1.0, 1.0, (2, 1), np.float32),
            ]),
            "c": gym.spaces.MultiDiscrete([2, 3]),
        })
        act_space = gym.spaces.Discrete(2)

        # Two example episodes, both with initial (reset) observations coming from the
        # above defined observation space.
        episode_1 = SingleAgentEpisode(
            observations=[
                {
                    "a": np.array(-10.0, np.float32),
                    "b": (1, np.array([[-1.0], [-1.0]], np.float32)),
                    "c": np.array([0, 2]),
                },
            ],
        )
        episode_2 = SingleAgentEpisode(
            observations=[
                {
                    "a": np.array(10.0, np.float32),
                    "b": (0, np.array([[1.0], [1.0]], np.float32)),
                    "c": np.array([1, 1]),
                },
            ],
        )

        # Construct our connector piece.
        connector = FlattenObservations(obs_space, act_space)

        # Call our connector piece with the example data.
        output_batch = connector(
            rl_module=None,  # This connector works without an RLModule.
            batch={},  # This connector does not alter the input batch.
            episodes=[episode_1, episode_2],
            explore=True,
            shared_data={},
        )

        # The connector does not alter the data and acts as pure pass-through.
        check(output_batch, {})

        # The connector has flattened each item in the episodes to a 1D tensor.
        check(
            episode_1.get_observations(0),
            #         box()  disc(2).  box(2, 1).  multidisc(2, 3)........
            np.array([-10.0, 0.0, 1.0, -1.0, -1.0, 1.0, 0.0, 0.0, 0.0, 1.0]),
        )
        check(
            episode_2.get_observations(0),
            #         box()  disc(2).  box(2, 1).  multidisc(2, 3)........
            np.array([10.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0]),
        )

        # Two example episodes, both with initial (reset) observations coming from the
        # above defined observation space.
        episode_1 = SingleAgentEpisode(
            observations=[
                {
                    "a": np.array(-10.0, np.float32),
                    "b": (1, np.array([[-1.0], [-1.0]], np.float32)),
                    "c": np.array([0, 2]),
                },
            ],
        )
        episode_2 = SingleAgentEpisode(
            observations=[
                {
                    "a": np.array(10.0, np.float32),
                    "b": (0, np.array([[1.0], [1.0]], np.float32)),
                    "c": np.array([1, 1]),
                },
            ],
        )

        # Construct our connector piece and remove in it the "a" (Box()) key-value
        # pair from the dictionary observations.
        connector = FlattenObservations(obs_space, act_space, keys_to_remove=["a"])

        # Call our connector piece with the example data.
        output_batch = connector(
            rl_module=None,  # This connector works without an RLModule.
            batch={},  # This connector does not alter the input batch.
            episodes=[episode_1, episode_2],
            explore=True,
            shared_data={},
        )

        # The connector has flattened each item in the episodes to a 1D tensor
        # and removed the "a" (Box()) key-value pair.
        check(
            episode_1.get_observations(0),
            #         disc(2).  box(2, 1).  multidisc(2, 3)........
            np.array([0.0, 1.0, -1.0, -1.0, 1.0, 0.0, 0.0, 0.0, 1.0]),
        )
        check(
            episode_2.get_observations(0),
            #         disc(2).  box(2, 1).  multidisc(2, 3)........
            np.array([1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0]),
        )

        # Use the connector in a learner pipeline. Note, we need here two
        # observations in the episode because the agent has to have stepped
        # at least once.
        episode_1 = SingleAgentEpisode(
            observations=[
                {
                    "a": np.array(-10.0, np.float32),
                    "b": (1, np.array([[-1.0], [-1.0]], np.float32)),
                    "c": np.array([0, 2]),
                },
                {
                    "a": np.array(-10.0, np.float32),
                    "b": (1, np.array([[-1.0], [-1.0]], np.float32)),
                    "c": np.array([0, 2]),
                },
            ],
            actions=[1],
            rewards=[0],
            # Set the length of the lookback buffer to 0 to read the data as
            # from an actual step.
            len_lookback_buffer=0,
        )
        episode_2 = SingleAgentEpisode(
            observations=[
                {
                    "a": np.array(10.0, np.float32),
                    "b": (0, np.array([[1.0], [1.0]], np.float32)),
                    "c": np.array([1, 1]),
                },
                {
                    "a": np.array(10.0, np.float32),
                    "b": (0, np.array([[1.0], [1.0]], np.float32)),
                    "c": np.array([1, 1]),
                },

            ],
            actions=[1],
            rewards=[0],
            # Set the length of the lookback buffer to 0 to read the data as
            # from an actual step.
            len_lookback_buffer=0,
        )

        # Construct our connector piece for a learner pipeline and remove the
        # "a" (Box()) key-value pair.
        connector = FlattenObservations(
            obs_space,
            act_space,
            as_learner_connector=True,
            keys_to_remove=["a"]
        )

        # Call our connector piece with the example data.
        output_batch = connector(
            rl_module=None,  # This connector works without an RLModule.
            batch={},  # This connector does not alter the input batch.
            episodes=[episode_1, episode_2],
            explore=True,
            shared_data={},
        )

        check(list(output_batch.keys()), ["obs"])
        check(list(output_batch["obs"].keys()), [(episode_1.id_,), (episode_2.id_,)])

        check(
            output_batch["obs"][(episode_1.id_,)][0][0],
            np.array([0.0, 1.0, -1.0, -1.0, 1.0, 0.0, 0.0, 0.0, 1.0]),
        )
        check(
            output_batch["obs"][(episode_2.id_,)][0][0],
            np.array([1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0]),
        )
    returnc                     t           j                   _         j        ri } j                                        D ]\  }} j        r3 fd j        |                                         D              j        |<    j        r| j        vr j        |         ||<   `t          t          j	        d  j        |                    j        |         d          }t          t          d          t          d          t          |          ft          j                  ||<   t          j                            |          S  j        r* fd j                                        D              _        t          t          j	        d  j                   j        d          }t          t          d          t          d          t          |          ft          j                  S )	Nc                 .    i | ]\  }}|j         v||S  _keys_to_remove.0kvselfs      /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/rllib/connectors/common/flatten_observations.py
<dictcomp>zJFlattenObservations.recompute_output_observation_space.<locals>.<dictcomp>   s7     = = = AqD$888 1888    c                 *    |                                  S Nsampless    r!   <lambda>zHFlattenObservations.recompute_output_observation_space.<locals>.<lambda>   s    ahhjj r#   F)
batch_axisz-infinfc                 .    i | ]\  }}|j         v||S r   r   r   s      r!   r"   zJFlattenObservations.recompute_output_observation_space.<locals>.<dictcomp>  s7     / / /1 444 q444r#   c                 *    |                                  S r%   r&   r(   s    r!   r*   zHFlattenObservations.recompute_output_observation_space.<locals>.<lambda>  s    ahhjj r#   )r   input_observation_space_input_obs_base_struct_multi_agentitemsr   
_agent_idsr   treemap_structurer   floatlennpfloat32gymspacesr   )r    r/   input_action_spacer;   agent_idspacer'   s   `      r!   "recompute_output_observation_spacez6FlattenObservations.recompute_output_observation_space   s    'A('
 '
#  *	PF#'#>#D#D#F#F  % ' = = = =$($?$I$O$O$Q$Q= = =D/9
 ? xt'F'F'+'B8'LF8$$8*00 7A  3H=#(  F (+fuU||c&kk^RZ( (F8$$ :??6*** # / / / / $ ; A A C C/ / /+
 1"((/  +   F uV}}eEllS[[NBJOOOr#   NF)multi_agent	agent_idsas_learner_connectorkeys_to_remover/   r<   r@   rA   rB   rC   c                "   d| _         || _        || _        || _        |K|rt	          |t
          j        j                  s*|rt          d | j	        D                       s
J d            |pg | _
         t                      j        ||fi | dS )a  Initializes a FlattenObservations instance.

        Args:
            multi_agent: Whether this connector operates on multi-agent observations,
                in which case, the top-level of the Dict space (where agent IDs are
                mapped to individual agents' observation spaces) is left as-is.
            agent_ids: If multi_agent is True, this argument defines a collection of
                AgentIDs for which to flatten. AgentIDs not in this collection are
                ignored.
                If None, flatten observations for all AgentIDs. None is the default.
            as_learner_connector: Whether this connector is part of a Learner connector
                pipeline, as opposed to an env-to-module pipeline.
                Note, this is usually only used for offline rl where the data comes
                from an offline dataset instead of a simulator. With a simulator the
                data is simply rewritten.
            keys_to_remove: Optional keys to remove from the observations.

        Nc              3   T   K   | ]#}t          |t          j        j                  V  $d S r%   )
isinstancer:   r;   r   )r   agent_spaces     r!   	<genexpr>z/FlattenObservations.__init__.<locals>.<genexpr>:  sD        # {CJO<<     r#   zTWhen using `keys_to_remove` the observation space must be of type `gym.spaces.Dict`.)r0   r1   r3   _as_learner_connectorrF   r:   r;   r   anyr/   r   super__init__)	r    r/   r<   r@   rA   rB   rC   kwargs	__class__s	           r!   rL   zFlattenObservations.__init__  s    : '+#'#%9"% &2CJODD & 	 &
   '+'C     &% b &% 

  .302DOOOOOOOr#   )exploreshared_data	rl_modulebatchepisodesrO   rP   c                   
  j         r                     |d          D ]t

f
 fd	}                     |t          j         |
                    t          dt          
                              
          t          
          
           un                     |d          D ]

                    d          }                     |
          } j	        r; j
        
j         j
        vr|}	n:t          | j        
j                 d	          }	nt          | j        d	          }	
                    d|	
            j        
_        |S )NT)agents_that_stepped_onlyc                     fd| D             } t                    }t          | j        d          }|                    |d                                          S )Nc                 <    g | ]}                     |          S r   )_remove_keys_from_dict)r   o
sa_episoder    s     r!   
<listcomp>zAFlattenObservations.__call__.<locals>._map_fn.<locals>.<listcomp>V  s)    SSS!466q*EESSSr#   Finputsspaces_structr+   )r7   r   r0   reshapecopy)obs_sa_episode
batch_sizeflattened_obsrZ   r    s       r!   _map_fnz-FlattenObservations.__call__.<locals>._map_fnT  sp    SSSSSsSSSC!$ZJ$?" '+&A#(% % %M )00R@@EEGGGr#   r   )indices)rR   columnitems_to_add	num_itemssingle_agent_episoder_   Fr\   )
at_indicesnew_data)rI   single_agent_episode_iteratoradd_n_batch_itemsr
   OBSget_observationsslicer7   rX   r1   r3   r=   r   r0   set_observationsobservation_space)r    rQ   rR   rS   rO   rP   rM   rf   last_obsre   rZ   s   `         @r!   __call__zFlattenObservations.__call__D  s    % E	F"@@4 A    
 .8 H H H H H H H &&";!("33E!S__<U<U3VV"" " "*oo)3 ' 	 	 	 	'> #@@4 A   %F %F
 &66r::66xLL$ 3&/tFF(0(C#+ +/*E * 3+ (-	) 	) 	) %@'&*&A#(	% % %M ++rM+RRR 04/E
,,r#   c                     d} j         r0t           j        |j                 t          j        j                  }n$t           j        t          j        j                  }|r' j        r  fd|                                D             }|S )a:  Removes keys from dictionary spaces.

        Args:
            obs: Observation sample from space.
            sa_episode: Single-agent episode. Needs `agent_id` set in multi-agent
                setups.

        Returns:
            Observation sample `obs` with keys in `self._keys_to_remove` removed.
        Fc                 .    i | ]\  }}|j         v||S r   r   r   s      r!   r"   z>FlattenObservations._remove_keys_from_dict.<locals>.<dictcomp>  s,    QQQDAq1D<P3P3P1a3P3P3Pr#   )	r1   rF   r/   r=   r:   r;   r   r   r2   )r    rb   rZ   is_dict_spaces   `   r!   rX   z*FlattenObservations._remove_keys_from_dict  s      	V&,Z-@A3:? MM 't'CSZ_UUM  	RT1 	RQQQQCIIKKQQQC
r#   )NN)__name__
__module____qualname____doc__r   r	   r:   Spacer?   r   boolr   r   r   strrL   r   r   r   r   dictrv   rX   __classcell__)rN   s   @r!   r   r      s       G GR Xk3P 
	3P 3P 3P 3Pn 8<26/P
 "37%*.2/P /P /P!)#)!4/P %SY//P
 /P Jw/0/P #/P !c+/P /P /P /P /P /Pb Xk #'&*Q Q Q Q CH~	Q
 {#Q $Q d^Q 
Q Q Q Qf      r#   r   )typingr   r   r   r   r   	gymnasiumr:   numpyr8   r4   gymnasium.spacesr   !ray.rllib.connectors.connector_v2r	   ray.rllib.core.columnsr
   "ray.rllib.core.rl_module.rl_moduler   ray.rllib.utils.annotationsr   ray.rllib.utils.numpyr   "ray.rllib.utils.spaces.space_utilsr   ray.rllib.utils.typingr   r   ray.util.annotationsr   r   r   r#   r!   <module>r      sP   8 8 8 8 8 8 8 8 8 8 8 8 8 8                      9 9 9 9 9 9 * * * * * * 7 7 7 7 7 7 0 0 0 0 0 0 = = = = = = I I I I I I 7 7 7 7 7 7 7 7 * * * * * * W^ ^ ^ ^ ^+ ^ ^ ^ ^ ^r#   