
    &`i                        d Z ddlZddlmZ ddlZddlZddlm	Z	m
Z
 ddlmZmZ ddlmZmZ  eddd	
          Ze                    dedd            G d dej                  Zedk    r`e                                Z eej                                                                      edej        i          Z eee           dS dS )a)  Example of defining a custom gymnasium Env to be learned by an RLlib Algorithm.

This example:
    - demonstrates how to write your own (single-agent) gymnasium Env class, define its
    physics and mechanics, the reward function used, the allowed actions (action space),
    and the type of observations (observation space), etc..
    - shows how to configure and setup this environment class within an RLlib
    Algorithm config.
    - runs the experiment with the configured algo, trying to solve the environment.

To see more details on which env we are building for this example, take a look at the
`SimpleCorridor` class defined below.


How to run this script
----------------------
`python [script file name].py`

Use the `--corridor-length` option to set a custom length for the corridor. Note that
for extremely long corridors, the algorithm should take longer to learn.

For debugging, use the following additional command line options
`--no-tune --num-env-runners=0`
which should allow you to set breakpoints anywhere in the RLlib code and
have the execution stop there for inspection and debugging.

For logging to your WandB account, use:
`--wandb-key=[your WandB API key] --wandb-project=[some project name]
--wandb-run-name=[optional: WandB run name (within the defined project)]`


Results to expect
-----------------
You should see results similar to the following in your console output:

+--------------------------------+------------+-----------------+--------+
| Trial name                     | status     | loc             |   iter |
|--------------------------------+------------+-----------------+--------+
| PPO_SimpleCorridor_78714_00000 | TERMINATED | 127.0.0.1:85794 |      7 |
+--------------------------------+------------+-----------------+--------+

+------------------+-------+----------+--------------------+
|   total time (s) |    ts |   reward |   episode_len_mean |
|------------------+-------+----------+--------------------|
|          18.3034 | 28000 | 0.908918 |            12.9676 |
+------------------+-------+----------+--------------------+
    N)Optional)BoxDiscrete)add_rllib_example_script_args#run_rllib_example_script_experiment)get_trainable_clsregister_envg?2   i )default_rewarddefault_itersdefault_timestepsz--corridor-length
   zcThe length of the corridor in fields. Note that this number includes the starting- and goal states.)typedefaulthelpc                   @    e Zd ZdZddee         fdZddddZd ZdS )	SimpleCorridora  Example of a custom env in which the agent has to walk down a corridor.

    ------------
    |S........G|
    ------------
    , where S is the starting position, G is the goal position, and fields with '.'
    mark free spaces, over which the agent may step. The length of the above example
    corridor is 10.
    Allowed actions are left (0) and right (1).
    The reward function is -0.01 per step taken and a uniform random value between
    0.5 and 1.5 when reaching the goal state.

    You can configure the length of the corridor via the env's config. Thus, in your
    AlgorithmConfig, you can do:
    `config.environment(env_config={"corridor_length": ..})`.
    Nconfigc                     |pi }|                     dd          | _        d| _        t          d          | _        t          d| j        dt          j                  | _        d S )Ncorridor_length   r      g        )   )shapedtype)	getend_poscur_posr   action_spacer   npfloat32observation_space)selfr   s     z/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/rllib/examples/envs/custom_gym_env.py__init__zSimpleCorridor.__init__b   sY    2zz"3Q77$QKK!$S$,d"*!U!U!U    )seedoptionsc                    t          j        |           d| _        t          j        | j        gt          j                  ddifS )Nr   	env_statereset)randomr'   r   r    arrayr!   )r#   r'   r(   s      r$   r+   zSimpleCorridor.reseti   s;    Dx
33k75KKKr&   c                 <   |dv s
J |            |dk    r| j         dk    r| xj         dz  c_         n|dk    r| xj         dz  c_         | j         | j        k    }d}|rt          j        dd          nd}i }t	          j        | j         gt          j                  ||||fS )N)r   r   r   r   Fg      ?g      ?g{Gz)r   r   r,   uniformr    r-   r!   )r#   action
terminated	truncatedrewardinfoss         r$   stepzSimpleCorridor.stepo   s    Q;;4<!++LLALLLq[[LLALL \T\1
	-7BS)))UHdl^RZ00
 	
r&   )N)	__name__
__module____qualname____doc__r   dictr%   r+   r5    r&   r$   r   r   P   s|         "V Vx~ V V V V !$ L L L L L
 
 
 
 
r&   r   __main__r   )
env_config)r9   r,   typingr   	gymnasiumgymnumpyr    gymnasium.spacesr   r   ray.rllib.examples.utilsr   r   ray.tune.registryr   r	   parseradd_argumentintEnvr   r6   
parse_argsargsalgoget_default_configenvironmentr   base_configr;   r&   r$   <module>rO      s  . .b                * * * * * * * *        > = = = = = = =	&	&
 
 

   	
!	    4
 4
 4
 4
 4
SW 4
 4
 4
r zD  	$)$$					)4+?@ 
 

 

  ('T:::::3 r&   