
    &`i7                     <   d dl mZ d dlmZmZ d dlZd dlZd dlm	Z	 d dl
mZ d dlmZmZ edeej        ef         defd	            Zed
             Ze G d dej                              Ze G d dej                              Ze G d dej                              Ze G d dej                              Ze G d dej                              Ze G d dej                              Ze G d dej                              Ze G d dej                              Ze G d dej                              Ze G d dej                              Z e Z!e	 	 	 	 d*dej        d"e"d#e"d$ee"         d%edej        fd&            Z#ed+d)            Z$dS ),    )deque)OptionalUnionN)spaces)	PublicAPI)resizergb2grayenvreturnc                 "   t          | t                    sQt          | j        d          r+| j        j        t          | j        j                  dk    rdS dt          |           v S |                     d          p|                     d          S )a  Returns, whether a given env object or env descriptor (str) is an Atari env.

    Args:
        env: The gym.Env object or a string descriptor of the env (for example,
        "ale_py:ALE/Pong-v5").

    Returns:
        Whether `env` is an Atari environment.
    shapeN   FzAtariEnv<ALEzALE/zale_py:)
isinstancestrhasattrobservation_spacer   len
startswith)r
   s    y/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/rllib/env/wrappers/atari_wrappers.pyis_atarir      s     c3 
CC)733	%+7C)/00A555S)) ~~f%%B	)B)BB    c                 v    | }	 t          ||          r|S t          |t          j                  r|j        }ndS 7)z8Returns the gym env wrapper of the given class, or None.TN)r   gymWrapperr
   )r
   cls
currentenvs      r   get_wrapper_by_clsr   &   sM     Jj#&& 	
CK00 	#JJ4r   c                       e Zd Zd Zd ZdS )ClipRewardEnvc                 F    t           j                            | |           d S N)r   RewardWrapper__init__selfr
   s     r   r#   zClipRewardEnv.__init__5   s!    ""4-----r   c                 *    t          j        |          S )z&Bin reward to {+1, 0, -1} by its sign.)npsign)r%   rewards     r   r)   zClipRewardEnv.reward8   s    wvr   N)__name__
__module____qualname__r#   r)    r   r   r   r   3   s2        . . .    r   r   c                        e Zd Zd Zd Zd ZdS )EpisodicLifeEnvc                 b    t           j                            | |           d| _        d| _        dS )zMake end-of-life == end-of-episode, but only reset on true game over.
        Done by DeepMind for the DQN and co. since it helps value estimation.
        r   TN)r   r   r#   liveswas_real_terminatedr$   s     r   r#   zEpisodicLifeEnv.__init__?   s1     	T3'''
#'   r   c                     | j                             |          \  }}}}}|| _        | j         j        j                                        }|| j        k     r|dk    rd}|| _        |||||fS )Nr   T)r
   stepr2   	unwrappedaler1   )r%   actionobsr)   
terminated	truncatedinfor1   s           r   r4   zEpisodicLifeEnv.stepG   sw    378==3H3H0VZD#-  "&,,..4:%!)) J
FJ	477r   c                     | j         r | j        j        di |\  }}n | j                            d          \  }}}}}| j        j        j                                        | _        ||fS )zReset only when lives are exhausted.
        This way all states are still reachable even though lives are episodic,
        and the learner need not know about any of this behind-the-scenes.
        r   r-   )r2   r
   resetr4   r5   r6   r1   )r%   kwargsr8   r;   _s        r   r=   zEpisodicLifeEnv.resetU   ss    
 # 	2&0000IC "&q!1!1CAq$X'+1133
Dyr   Nr*   r+   r,   r#   r4   r=   r-   r   r   r/   r/   =   sA        ( ( (8 8 8    r   r/   c                        e Zd Zd Zd Zd ZdS )FireResetEnvc                     t           j                            | |           |j                                        d         dk    sJ t          |j                                                  dk    sJ dS )zLTake action on reset.

        For environments that are fixed until firing.   FIRE   N)r   r   r#   r5   get_action_meaningsr   r$   s     r   r#   zFireResetEnv.__init__e   sm     	T3'''}002215????3=4466771<<<<<<r   c                     | j         j        di | | j                             d          \  }}}}}|s|r | j         j        di | | j                             d          \  }}}}}|s|r | j         j        di | ||fS )NrD   r   r-   )r
   r=   r4   )r%   r>   r8   r?   r9   r:   r;   s          r   r=   zFireResetEnv.resetm   s         +/8==+;+;(Q
Iq 	% 	%DHN$$V$$$.2hmmA.>.>+Q
It 	% 	%DHN$$V$$$Dyr   c                 6    | j                             |          S r!   r
   r4   r%   acs     r   r4   zFireResetEnv.stepw       x}}R   r   Nr*   r+   r,   r#   r=   r4   r-   r   r   rB   rB   c   sA        = = =  ! ! ! ! !r   rB   c                   .    e Zd Zd ZddddZd Zd ZdS )
FrameStackc                    t           j                            | |           || _        t	          g |          | _        |j        j        }t          j	        t          j        |j        j        |d          t          j        |j        j        |d          |d         |d         |d         |z  f|j        j                  | _        dS )	zStack k last frames.)maxlen)repeatsaxisr   rD   r   lowhighr   dtypeN)r   r   r#   kr   framesr   r   r   Boxr'   repeatrW   rX   rY   )r%   r
   rZ   shps       r   r#   zFrameStack.__init__}   s    T3'''Bq)))#)!'	#/3QRHHH305qrJJJq63q63q6A:.'-	"
 "
 "
r   Nseedoptionsc                    | j                             ||          \  }}t          | j                  D ]}| j                            |           |                                 |fS )Nr_   )r
   r=   rangerZ   r[   append_get_ob)r%   r`   ra   obinfosr?   s         r   r=   zFrameStack.reset   s`    HNNgN>>	Etv 	# 	#AKr""""||~~u$$r   c                     | j                             |          \  }}}}}| j                            |           |                                 ||||fS r!   )r
   r4   r[   rd   re   )r%   r7   rf   r)   r9   r:   r;   s          r   r4   zFrameStack.step   sO    26(--2G2G/FJ	42||~~vz9dBBr   c                 v    t          | j                  | j        k    sJ t          j        | j        d          S )Nr   rU   )r   r[   rZ   r'   concatenater%   s    r   re   zFrameStack._get_ob   s6    4;46))))~dk2222r   )r*   r+   r,   r#   r=   r4   re   r-   r   r   rP   rP   {   sd        
 
 
 !$ % % % % %C C C
3 3 3 3 3r   rP   c                       e Zd Zd Zd ZdS )FrameStackTrajectoryViewc                     t           j                            | |           |j        j        }|d         dk    sJ t          j        dd|d         |d         f|j        j                  | _        dS )z4No stacking. Trajectory View API takes care of this.r   rD   r      rV   N)r   r   r#   r   r   r   r\   rY   )r%   r
   r^   s      r   r#   z!FrameStackTrajectoryView.__init__   sq    T3'''#)1v{{{{!'CFCF#33;P;V"
 "
 "
r   c                 .    t          j        |d          S )NrS   rj   )r'   squeezer%   observations     r   rt   z$FrameStackTrajectoryView.observation   s    z+B////r   N)r*   r+   r,   r#   rt   r-   r   r   rn   rn      s2        
 
 
0 0 0 0 0r   rn   c                   "    e Zd ZddZd Zd ZdS )MaxAndSkipEnv   c                     t           j                            | |           t          j        d|j        j        z   |j        j                  | _        || _	        dS )z!Return only every `skip`-th frame)r   )rY   N)
r   r   r#   r'   zerosr   r   rY   _obs_buffer_skip)r%   r
   skips      r   r#   zMaxAndSkipEnv.__init__   sV    T3'''83(..c6K6Q
 
 
 


r   c                 :   d}dx}x}}t          | j                  D ]]}| j                            |          \  }}}}}|| j        dz
  k    r
|| j        d<   || j        dz
  k    r
|| j        d<   ||z  }|s|r n^| j                            d          }	|	||||fS )z:Repeat action, sum reward, and max over last observations.g        Nr   r   rD   rj   )rc   r{   r
   r4   rz   max)
r%   r7   total_rewardr9   r:   r;   ir8   r)   	max_frames
             r   r4   zMaxAndSkipEnv.step   s    (,,
,Ytz"" 	 	A7;x}}V7L7L4CYDJN""&) #DJN""&) #F"L Y  $((a(00	,
ItCCr   c                 &     | j         j        di |S )Nr-   )r
   r=   )r%   r>   s     r   r=   zMaxAndSkipEnv.reset   s    tx~'''''r   N)rw   r@   r-   r   r   rv   rv      sI           D D D&( ( ( ( (r   rv   c                   :    e Zd Zd	dZd Zd Zd Zd Zd Zd Z	dS )

MonitorEnvNc                     t           j                            | |           d| _        d| _        d| _        g | _        g | _        d| _        d| _	        dS )z4Record episodes stats prior to EpisodicLifeEnv, etc.Nr   )
r   r   r#   _current_reward
_num_steps_total_steps_episode_rewards_episode_lengths_num_episodes_num_returnedr$   s     r   r#   zMonitorEnv.__init__   sW    T3'''#  " "r   c                 :    | j         j        di |\  }}| j        t          | j                  | _        | j        N| j                            | j                   | j                            | j                   | xj	        dz  c_	        d| _        d| _        ||fS )NrD   r   r-   )
r
   r=   r   sumr   r   r   rd   r   r   )r%   r>   r8   r;   s       r   r=   zMonitorEnv.reset   s    "DHN,,V,,	T$ #D$9 : :D+!(()=>>>!((999!# Dyr   c                     | j                             |          \  }}}}}| xj        |z  c_        | xj        dz  c_        | xj        dz  c_        |||||fS )NrD   )r
   r4   r   r   r   )r%   r7   r8   rewr9   r:   r;   s          r   r4   zMonitorEnv.step   si    04f0E0E-S*i#1QCY44r   c                     | j         S r!   )r   rl   s    r   get_episode_rewardszMonitorEnv.get_episode_rewards       $$r   c                     | j         S r!   )r   rl   s    r   get_episode_lengthszMonitorEnv.get_episode_lengths   r   r   c                     | j         S r!   )r   rl   s    r   get_total_stepszMonitorEnv.get_total_steps   s      r   c              #      K   t          | j        t          | j                            D ]}| j        |         | j        |         fV  t          | j                  | _        d S r!   )rc   r   r   r   r   )r%   r   s     r   next_episode_resultszMonitorEnv.next_episode_results   sl      t)3t/D+E+EFF 	G 	GA(+T-B1-EFFFFF !677r   r!   )
r*   r+   r,   r#   r=   r4   r   r   r   r   r-   r   r   r   r      s        	 	 	 	   5 5 5% % %% % %! ! !8 8 8 8 8r   r   c                   "    e Zd ZddZd Zd ZdS )NoopResetEnv   c                     t           j                            | |           || _        d| _        d| _        |j                                        d         dk    sJ dS )zsSample initial states by taking random number of no-ops on reset.
        No-op is assumed to be action 0.
        Nr   NOOP)r   r   r#   noop_maxoverride_num_noopsnoop_actionr5   rG   )r%   r
   r   s      r   r#   zNoopResetEnv.__init__   s]     	T3''' "&}002215??????r   c                     | j         j        di | | j        | j        }nb	 | j        j                            d| j        dz             }n8# t          $ r+ | j        j                            d| j        dz             }Y nw xY w|dk    sJ d}t          |          D ]@}| j         
                    | j                  \  }}}}}|s|r | j         j        di |\  }}A||fS )z7Do no-op action for a number of steps in [1, noop_max].NrD   r   r-   )r
   r=   r   r5   	np_randomintegersr   AttributeErrorrandintrc   r4   r   )r%   r>   noopsr8   r?   r9   r:   r;   s           r   r=   zNoopResetEnv.reset
  s        ".+EEO099!T]Q=NOO! O O O088DMA<MNNOqyyyyu 	5 	5A26(--@P2Q2Q/CJ	4 5Y 5*DHN44V44	TDys   (A 2B Bc                 6    | j                             |          S r!   rJ   rK   s     r   r4   zNoopResetEnv.step  rM   r   N)r   rN   r-   r   r   r   r      sJ        @ @ @ @  *! ! ! ! !r   r   c                   $     e Zd Z fdZd Z xZS )NormalizedImageEnvc                      t                      j        |i | t          j                            dd| j        j        t          j                  | _        d S )Ng            ?)r   rY   )	superr#   r   r   r\   r   r   r'   float32)r%   argsr>   	__class__s      r   r#   zNormalizedImageEnv.__init__%  sV    $)&)))!$(.*	 "0 "
 "
r   c                 L    |                     t          j                  dz  dz
  S )Ng      `@r   )astyper'   r   rs   s     r   rt   zNormalizedImageEnv.observation0  s"    ""2:..6#==r   )r*   r+   r,   r#   rt   __classcell__)r   s   @r   r   r   #  sG        
 
 
 
 
> > > > > > >r   r   c                   "    e Zd ZddefdZd ZdS )GrayScaleAndResizeT	grayscalec                     t           j                            | |           || _        || _        || _        t          j        dd| j        | j        |rdndft          j	                  | _
        dS )z.Warp frames to the specified size (dim x dim).r   rp   rD   rF   rV   N)r   ObservationWrapperr#   widthheightr   r   r\   r'   uint8r   )r%   r
   dimr   s       r   r#   zGrayScaleAndResize.__init__6  sq    ''c222
"!';
,AAAB(	"
 "
 "
r   c                     | j         r:t          |          }t          || j        | j                  }|d d d d d f         S t          || j        | j                  S )N)r   r   )r   r	   r   r   r   )r%   frames     r   rt   zGrayScaleAndResize.observationC  sb    > 	GUOOE5DJGGGEAAAt$$%4:FFFFr   N)T)r*   r+   r,   boolr#   rt   r-   r   r   r   r   4  sH        
 
D 
 
 
 
G G G G Gr   r   @   rw   Tr   	frameskip
framestackr   c                    t           j                            | d          } t          | ||          } t	          |           } |dk    r| j        J t          | |          } t          | d          } t          |           } d	| j	        
                                v rt          |           } |rt          | |
          } | S )ae  Wraps `env` for new-API-stack-friendly RLlib Atari experiments.

    Note that we assume reward clipping is done outside the wrapper.

    Args:
        env: The env object to wrap.
        dim: Dimension to resize observations to (dim x dim).
        frameskip: Whether to skip n frames and max over them (keep brightest pixels).
        framestack: Whether to stack the last n (grayscaled) frames. Note that this
            step happens after(!) a possible frameskip step, meaning that if
            frameskip=4 and framestack=2, we would perform the following over this
            trajectory:
            actual env timesteps: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 -> ...
            frameskip:            ( max ) ( max ) ( max   ) ( max     )
            framestack:           ( stack       ) (stack              )

    Returns:
        The wrapped gym.Env.
    i )max_episode_steps)r   r   rD   Nr|   r   r   rE   )rZ   )r   wrappers	TimeLimit	WarpFramer   specrv   r   r/   r5   rG   rB   rP   )r
   r   r   r   r   s        r   wrap_atari_for_new_api_stackr   O  s    < ,
 
 
 
?
?C
CSI
6
6
6C
S
!
!C1}}x###Ci000 sR
(
(
(C
#

C2244443 ,
+++Jr   T   Fc                 8   t          |           } t          | d          } | j        |du rt          | d          } t	          |           } d| j                                        v rt          |           } t          | |          } |du rt          | d          } | S )a   Configure environment for DeepMind-style Atari.

    Note that we assume reward clipping is done outside the wrapper.

    Args:
        env: The env object to wrap.
        dim: Dimension to resize observations to (dim x dim).
        framestack: Whether to framestack observations.
    r   r   NTrw   r   rE   )
r   r   r   rv   r/   r5   rG   rB   r   rP   )r
   r   r   noframeskips       r   wrap_deepmindr     s     S//C
sR
(
(
(C
xt 3 3Ca(((
#

C2244443
C

C Ta  Jr   )r   rw   NT)r   TF)%collectionsr   typingr   r   	gymnasiumr   numpyr'   r   ray.rllib.utils.annotationsr   ray.rllib.utils.imagesr   r	   Envr   r   r   r   r"   r   r   r/   rB   rP   r   rn   rv   r   r   r   r   r   intr   r   r-   r   r   <module>r      s         " " " " " " " "               1 1 1 1 1 1 3 3 3 3 3 3 3 3 C%% C$ C C C C2 	 	 	     C%    " " " " "ck " " "J ! ! ! ! !3; ! ! !. 3 3 3 3 3 3 3 3< 0 0 0 0 0s5 0 0 0 ( ( ( ( (CK ( ( (B /8 /8 /8 /8 /8 /8 /8 /8d !! !! !! !! !!3; !! !! !!H > > > > >/ > > >  G G G G G/ G G G. 	   $2 2	2	2 2 	2
 2 	W2 2 2 2j      r   