§
    &`ƒio  ã                   óz   — d dl Z d dlmZ d dlZd dlmZmZ d dlm	Z	m
Z
 d dlmZ e	 G d„ de¦  «        ¦   «         ZdS )é    N)ÚOptional)ÚExternalEnvÚ_ExternalEnvEpisode)ÚOldAPIStackÚoverride)ÚMultiAgentDictc                   ól  — e Zd ZdZdej        dej        fd„Zd„ Z ee	¦  «        	 dde
e         d	ed
efd„¦   «         Z ee	¦  «        deded
efd„¦   «         Z ee	¦  «        dededed
dfd„¦   «         Z ee	¦  «        	 	 ddedededed
df
d„¦   «         Z ee	¦  «        deded
dfd„¦   «         ZdS )ÚExternalMultiAgentEnvz/This is the multi-agent version of ExternalEnv.Úaction_spaceÚobservation_spacec                 ó¨  — t          j        | ||¦  «         t          | j        t          ¦  «        st          | j        t          ¦  «        r‡| j                             ¦   «         | j                             ¦   «         k    sQt          d                     | j                             ¦   «         | j                             ¦   «         ¦  «        ¦  «        ‚dS dS )z¶Initializes an ExternalMultiAgentEnv instance.

        Args:
            action_space: Action space of the env.
            observation_space: Observation space of the env.
        z=Agent ids disagree for action space and obs space dict: {} {}N)	r   Ú__init__Ú
isinstancer   Údictr   ÚkeysÚ
ValueErrorÚformat)Úselfr   r   s      úz/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/rllib/env/external_multi_agent_env.pyr   zExternalMultiAgentEnv.__init__   sÑ   € õ 	Ô˜T <Ð1BÑCÔCÐCõ dÔ'­Ñ.Ô.ð 		µ*ØÔ"¥Dñ3
ô 3
ð 		ð Ô%×*Ò*Ñ,Ô,°Ô0F×0KÒ0KÑ0MÔ0MÒMÐMÝ ð(ß(.ªØÔ)×.Ò.Ñ0Ô0°$Ô2H×2MÒ2MÑ2OÔ2Oñ)ô )ñô ð ð NÐMð		ð 		ó    c                 ó   — t           ‚)a  Override this to implement the multi-agent run loop.

        Your loop should continuously:
            1. Call self.start_episode(episode_id)
            2. Call self.get_action(episode_id, obs_dict)
                    -or-
                    self.log_action(episode_id, obs_dict, action_dict)
            3. Call self.log_returns(episode_id, reward_dict)
            4. Call self.end_episode(episode_id, obs_dict)
            5. Wait if nothing to do.

        Multiple episodes may be started at the same time.
        )ÚNotImplementedError)r   s    r   ÚrunzExternalMultiAgentEnv.run(   s
   € õ "Ð!r   NTÚ
episode_idÚtraining_enabledÚreturnc                 ó&  — |€t          j        ¦   «         j        }|| j        v r"t	          d                     |¦  «        ¦  «        ‚|| j        v r"t	          d                     |¦  «        ¦  «        ‚t          || j        |d¬¦  «        | j        |<   |S )Nz!Episode {} has already completed.zEpisode {} is already startedT)Ú
multiagent)	ÚuuidÚuuid4ÚhexÚ	_finishedr   r   Ú	_episodesr   Ú_results_avail_condition)r   r   r   s      r   Ústart_episodez#ExternalMultiAgentEnv.start_episode8   s   € ð ÐÝœ™œÔ)ˆJà˜œÐ'Ð'ÝÐ@×GÒGÈ
ÑSÔSÑTÔTÐTà˜œÐ'Ð'ÝÐ<×CÒCÀJÑOÔOÑPÔPÐPå%8Ø˜Ô5Ð7GÐTXð&
ñ &
ô &
ˆŒzÑ"ð Ðr   Úobservation_dictc                 óV   — |                       |¦  «        }|                     |¦  «        S )a‘  Record an observation and get the on-policy action.

        Thereby, observation_dict is expected to contain the observation
        of all agents acting in this episode step.

        Args:
            episode_id: Episode id returned from start_episode().
            observation_dict: Current environment observation.

        Returns:
            action: Action from the env action space.
        )Ú_getÚwait_for_action©r   r   r&   Úepisodes       r   Ú
get_actionz ExternalMultiAgentEnv.get_actionK   s*   € ð" —)’)˜JÑ'Ô'ˆØ×&Ò&Ð'7Ñ8Ô8Ð8r   Úaction_dictc                 ó\   — |                       |¦  «        }|                     ||¦  «         dS )a  Record an observation and (off-policy) action taken.

        Args:
            episode_id: Episode id returned from start_episode().
            observation_dict: Current environment observation.
            action_dict: Action for the observation.
        N)r(   Ú
log_action)r   r   r&   r-   r+   s        r   r/   z ExternalMultiAgentEnv.log_action_   s2   € ð —)’)˜JÑ'Ô'ˆØ×ÒÐ+¨[Ñ9Ô9Ð9Ð9Ð9r   Úreward_dictÚ	info_dictÚmultiagent_done_dictc                 ó  — |                       |¦  «        }|                     ¦   «         D ].\  }}||j        v r|j        |xx         |z  cc<   Œ$||j        |<   Œ/|r$|                     ¦   «         D ]\  }}||j        |<   Œ|r|pi |_        dS dS )a   Record returns from the environment.

        The reward will be attributed to the previous action taken by the
        episode. Rewards accumulate until the next action. If no reward is
        logged before the next action, a reward of 0.0 is assumed.

        Args:
            episode_id: Episode id returned from start_episode().
            reward_dict: Reward from the environment agents.
            info_dict: Optional info dict.
            multiagent_done_dict: Optional done dict for agents.
        N)r(   ÚitemsÚcur_reward_dictÚcur_done_dictÚcur_info_dict)	r   r   r0   r1   r2   r+   ÚagentÚrewÚdones	            r   Úlog_returnsz!ExternalMultiAgentEnv.log_returnsq   sÑ   € ð* —)’)˜JÑ'Ô'ˆð &×+Ò+Ñ-Ô-ð 	5ð 	5‰JˆE3Ø˜Ô/Ð/Ð/ØÔ'¨Ð.Ð.Ô.°#Ñ5Ð.Ð.Ñ.Ð.à14Ô'¨Ñ.Ð.àð 	4Ø3×9Ò9Ñ;Ô;ð 4ð 4‘tØ/3Ô% eÑ,Ð,àð 	4Ø$- O°ˆGÔ!Ð!Ð!ð	4ð 	4r   c                 ó˜   — |                       |¦  «        }| j                             |j        ¦  «         |                     |¦  «         dS )z¶Record the end of an episode.

        Args:
            episode_id: Episode id returned from start_episode().
            observation_dict: Current environment observation.
        N)r(   r"   Úaddr   r:   r*   s       r   Úend_episodez!ExternalMultiAgentEnv.end_episode—   sG   € ð —)’)˜JÑ'Ô'ˆØŒ×Ò˜7Ô-Ñ.Ô.Ð.ØŠÐ%Ñ&Ô&Ð&Ð&Ð&r   )NT)NN)Ú__name__Ú
__module__Ú__qualname__Ú__doc__ÚgymÚSpacer   r   r   r   r   ÚstrÚboolr%   r   r,   r/   r;   r>   © r   r   r
   r
      sà  € € € € € à9Ð9ðà”iðð œ9ðð ð ð ð2"ð "ð "ð  €XˆkÑÔàIMðð Ø" 3œ-ðØBFðà	ðð ð ñ Ôðð$ €XˆkÑÔð9Øð9Ø1?ð9à	ð9ð 9ð 9ñ Ôð9ð& €XˆkÑÔð:àð:ð )ð:ð $ð	:ð
 
ð:ð :ð :ñ Ôð:ð" €XˆkÑÔð
 %)Ø/3ð#4ð #4àð#4ð $ð#4ð "ð	#4ð
 -ð#4ð 
ð#4ð #4ð #4ñ Ôð#4ðJ €XˆkÑÔð
' cð 
'¸^ð 
'ÐPTð 
'ð 
'ð 
'ñ Ôð
'ð 
'ð 
'r   r
   )r   Útypingr   Ú	gymnasiumrC   Úray.rllib.env.external_envr   r   Úray.rllib.utils.annotationsr   r   Úray.rllib.utils.typingr   r
   rG   r   r   ú<module>rM      s¸   ðØ €€€Ø Ð Ð Ð Ð Ð à Ð Ð Ð à GÐ GÐ GÐ GÐ GÐ GÐ GÐ GØ =Ð =Ð =Ð =Ð =Ð =Ð =Ð =Ø 1Ð 1Ð 1Ð 1Ð 1Ð 1ð ðV'ð V'ð V'ð V'ð V'˜Kñ V'ô V'ñ „ðV'ð V'ð V'r   