§
    &`ƒi¤  ã                   ó
  — d Z ddlmZ ddlmZ ddlmZmZ ddlm	Z	m
Z
  eddd¬	¦  «        Ze                     d
¬¦  «         e                     ddd¬¦  «         edk    r¢e                     ¦   «         Zej        d
k    s
J d¦   «         ‚ e	ej        ¦  «                             ¦   «                              edej        i¬¦  «                             d„ ¬¦  «                             ddhd„ ¬¦  «        Z eee¦  «         dS dS )a¡  Example of running a multi-agent experiment w/ agents always acting simultaneously.

This example:
    - demonstrates how to write your own (multi-agent) environment using RLlib's
    MultiAgentEnv API.
    - shows how to implement the `reset()` and `step()` methods of the env such that
    the agents act simultaneously.
    - shows how to configure and setup this environment class within an RLlib
    Algorithm config.
    - runs the experiment with the configured algo, trying to solve the environment.


How to run this script
----------------------
`python [script file name].py --sheldon-cooper-mode`

For debugging, use the following additional command line options
`--no-tune --num-env-runners=0`
which should allow you to set breakpoints anywhere in the RLlib code and
have the execution stop there for inspection and debugging.

For logging to your WandB account, use:
`--wandb-key=[your WandB API key] --wandb-project=[some project name]
--wandb-run-name=[optional: WandB run name (within the defined project)]`


Results to expect
-----------------
You should see results similar to the following in your console output:

+-----------------------------------+----------+--------+------------------+-------+
| Trial name                        | status   |   iter |   total time (s) |    ts |
|-----------------------------------+----------+--------+------------------+-------+
| PPO_RockPaperScissors_8cef7_00000 | RUNNING  |      3 |          16.5348 | 12000 |
+-----------------------------------+----------+--------+------------------+-------+
+-------------------+------------------+------------------+
|   combined return |   return player2 |   return player1 |
|-------------------+------------------+------------------|
|                 0 |            -0.15 |             0.15 |
+-------------------+------------------+------------------+

Note that b/c we are playing a zero-sum game, the overall return remains 0.0 at
all times.
é    ©ÚFlattenObservations)ÚRockPaperScissors)Úadd_rllib_example_script_argsÚ#run_rllib_example_script_experiment)Úget_trainable_clsÚregister_envgÍÌÌÌÌÌì?é2   i † )Údefault_rewardÚdefault_itersÚdefault_timestepsé   )Ú
num_agentsz--sheldon-cooper-modeÚ
store_truez‰Whether to add two more actions to the game: Lizard and Spock. Watch here for more details :) https://www.youtube.com/watch?v=x5Q6-wMx-K8)ÚactionÚhelpÚ__main__z1Must set --num-agents=2 when running this script!Úsheldon_cooper_mode)Ú
env_configc                 ó"   — t          d¬¦  «        S )NT)Úmulti_agentr   )ÚenvÚspacesÚdevices      ú…/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/rllib/examples/envs/agents_act_simultaneously.pyú<lambda>r   `   s   € Õ,?ÈDÐ,QÑ,QÔ,Q€ ó    )Úenv_to_module_connectorÚplayer1Úplayer2c                 ó   — | S )N© )Úagent_idÚepisodeÚkws      r   r   r   h   s   € ¸h€ r   )ÚpoliciesÚpolicy_mapping_fnN)Ú__doc__Ú7ray.rllib.connectors.env_to_module.flatten_observationsr   Ú?ray.rllib.examples.envs.classes.multi_agent.rock_paper_scissorsr   Úray.rllib.examples.utilsr   r   Úray.tune.registryr   r	   ÚparserÚset_defaultsÚadd_argumentÚ__name__Ú
parse_argsÚargsr   ÚalgoÚget_default_configÚenvironmentr   Úenv_runnersr   Úbase_configr"   r   r   ú<module>r8      sÖ  ðð+ð +ðX XÐ WÐ WÐ WÐ WÐ Wðð ð ð ð ð ðð ð ð ð ð ð ð ð >Ð =Ð =Ð =Ð =Ð =Ð =Ð =à	&Ð	&Ø b¸Fð
ñ 
ô 
€ð × Ò Øð ñ ô ð ð × Ò ØØð
Qð ñ ô ð ð ˆzÒÐØ×ÒÑÔ€DàŒ?˜aÒÐÐÐ!TÑÔÐð  	Ð˜$œ)Ñ$Ô$ß	Ò	Ñ	Ô	ß	ŠØØ-¨tÔ/GÐHð 
ñ 

ô 

÷ 
ŠàQÐQð 
ñ 

ô 

÷
 
Šà Ð+ð GÐFð 
ñ 

ô 

ð ð* (Ð'¨°TÑ:Ô:Ð:Ð:Ð:ðO Ðr   