
    &`i#                        d dl mZmZ d dlZd dlmZ d dlmZ d dl	m
Z
 d dlmZmZ d dlmZ d dlmZ d d	lmZmZmZmZ d d
lmZ d dlmZ d dlmZ d dlmZ  e            \  ZZ Z! e            \  Z"Z#e G d de                      Z$dS )    )OptionalUnionN)Space)ActionDistribution)ModelV2)OldAPIStackoverride)Exploration)Random)
TensorTypeget_variabletry_import_tftry_import_torch)convert_to_numpy)Schedule)PiecewiseSchedule)zero_logps_from_actionsc                   x    e Zd ZdZdddddddd	ed
ededededededede	e
         f fdZ ee          dddedeeef         defd            Zdededeeef         fdZdededeeef         fdZ ee          d"de	d         fd            Z ee          d"dede	d         d dfd!            Z xZS )#GaussianNoisea  An exploration that adds white noise to continuous actions.

    If explore=True, returns actions plus scale (annealed over time) x
    Gaussian noise. Also, some completely random period is possible at the
    beginning.

    If explore=False, returns the deterministic action.
    i  g?g      ?g{Gz?i'  N)random_timestepsstddevinitial_scalefinal_scalescale_timestepsscale_scheduleaction_space	frameworkmodelr   r   r   r   r   r   c                   |J  t                      j        |f||d|
 || _        t          |f| j        | j        d|
| _        || _        |	pt          ||f||z   |fg|| j                  | _	        t          t          j        dt          j                  | j        dt          j                  | _        | j        dk    r|                                 | _        dS dS )a  Initializes a GaussianNoise instance.

        Args:
            random_timesteps: The number of timesteps for which to act
                completely randomly. Only after this number of timesteps, the
                `self.scale` annealing process will start (see below).
            stddev: The stddev (sigma) to use for the
                Gaussian noise to be added to the actions.
            initial_scale: The initial scaling weight to multiply
                the noise with.
            final_scale: The final scaling weight to multiply
                the noise with.
            scale_timesteps: The timesteps over which to linearly anneal
                the scaling factor (after(!) having used random actions for
                `random_timesteps` steps).
            scale_schedule: An optional Schedule object
                to use (instead of constructing one from the given parameters).
        N)r   r   )	endpointsoutside_valuer   r   timestep)r   tf_namedtypetf)super__init__r   r   r   r   random_explorationr   r   r   r   nparrayint64last_timestep	get_state_tf_state_op)selfr   r   r   r   r   r   r   r   r   kwargs	__class__s              ~/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/rllib/utils/exploration/gaussian_noise.pyr'   zGaussianNoise.__init__%   s$   @ $$$RUiRR6RRR !1"(#
 $
dn#
 #
HN#
 #
 , 
0A!=1!O3[A &n1
 1
 1
 *HQ!!n(	
 
 
 >T!! $ 0 0D "!    Texploreaction_distributionr"   r5   c                t    | j         dk    r|                     |||          S |                     |||          S )Ntorch)r   _get_torch_exploration_action_get_tf_exploration_action_op)r/   r6   r"   r5   s       r2   get_exploration_actionz$GaussianNoise.get_exploration_actionf   sQ     >W$$55#Wh   55#Wh  r3   action_distc                 \   	
 ||n j         }|                                	                     |          t          j                            t                              	           j                  z  
 j        	                    ||          \  }t          
                    t                              | j        k               fd	
 fd          t          
                    t          |t                    r&t                              |t          j                  n|fd	fd          }t!          	          } j        dk    r^| j                             d	           n= j                             t                              |t          j                             ||fS | t,                               j         d	          nt,                               j         |          }t,                              |g          5  ||fcd d d            S # 1 swxY w Y   d S )
N)r   c                       S N )random_actionss   r2   <lambda>z=GaussianNoise._get_tf_exploration_action_op.<locals>.<lambda>   s    N r3   c                      t                                z   j        j        t                                          z  j        j        t                                          z            S r?   )r%   clip_by_valuer   low	ones_likehigh)deterministic_actionsgaussian_sampler/   s   r2   rB   z=GaussianNoise._get_tf_exploration_action_op.<locals>.<lambda>   sU    R--%7!%5J(K(KK!&6K)L)LL  r3   )predtrue_fnfalse_fn)r$   c                       S r?   r@   )stochastic_actionss   r2   rB   z=GaussianNoise._get_tf_exploration_action_op.<locals>.<lambda>   s    . r3   c                       S r?   r@   )rH   s   r2   rB   z=GaussianNoise._get_tf_exploration_action_op.<locals>.<lambda>   s    2 r3   tf2   )r,   deterministic_sampler   r%   randomnormalshaper   r(   get_tf_exploration_action_opcondconvert_to_tensorr   
isinstanceboolconstantr   r   
assign_addassigncastr+   tf1control_dependencies)r/   r<   r5   r"   ts_actionlogp	assign_oprH   rI   rA   rN   s   `        @@@@r2   r:   z+GaussianNoise._get_tf_exploration_action_opx   s    "-XX43E !, @ @ B B --b11BI4D4DHH*++DK 5E 5
 5
 

 !3PP
 
  WW%%b4+@&@AA****      % 
 
 '4((WBG444....2222  
 
 ''<== >U"""--a0000"))"''(BH*E*EFFF4< # t11555ZZ 2H== 
 ))9+66 $ $t|$ $ $ $ $ $ $ $ $ $ $ $ $ $ $ $ $ $s   H!!H%(H%c           
         ||n	| j         dz   | _         |re| j         | j        k     r!| j                            |d          \  }}nH|                                }|                     | j                   }|t                              t                              |	                                          | j
                                      | j                  z  }t                              t                              ||z   t                              | j        j        t          j        | j                            t                              | j        j        t          j        | j                            }n|                                }t                              |	                                d         ft          j        | j                  }	||	fS )NrQ   Tr4   )meanstd)r$   devicer   )r,   r   r(   get_torch_exploration_actionrR   r   r8   rT   zerossizer   tori   minmaxtensorr   rE   float32rG   )
r/   r<   r5   r"   rc   rb   det_actionsscalerI   rd   s
             r2   r9   z+GaussianNoise._get_torch_exploration_action   s    !,HH$2Dq2H 	
  	8!D$999 3PP Q  	
 *>>@@++D,>??"'%,,[%5%5%7%788dk +7 + +"T[//#" II#o5 -1"'-#'; %    LL).emDK !    !5577F {{FKKMM!,.emDK{XXt|r3   sessz
tf.Sessionc                     |r|                     | j                  S |                     | j                  }| j        dk    rt          |          n|| j        dk    rt          | j                  n| j        dS )zReturns the current scale value.

        Returns:
            Union[float,tf.Tensor[float]]: The current scale value.
        r%   )	cur_scaler,   )runr.   r   r,   r   r   )r/   rt   rs   s      r2   r-   zGaussianNoise.get_state   s      	/88D-...##D$67748Nd4J4J)%000PU~%% .d.@AAA#	
 
 	
r3   statereturnc                     | j         dk    r$| j                            |d         |           d S t          | j        t                    r|d         | _        d S | j                            |d                    d S )Nr%   r,   )session)r   r,   loadrY   intr]   )r/   rx   rt   s      r2   	set_statezGaussianNoise.set_state   s    >T!!##E/$:D#IIIII*C00 	>!&!7D%%eO&<=====r3   r?   )__name__
__module____qualname____doc__r   strr   r}   floatr   r   r'   r	   r
   r   r   r   rZ   r;   r:   r9   r-   dictr~   __classcell__)r1   s   @r2   r   r      s         !%"!$-1?1 ?1 ?1?1 	?1
 ?1 ?1 ?1 ?1 ?1 ?1 !*?1 ?1 ?1 ?1 ?1 ?1B Xk    0 Z(	
    "7$'7$ 7$ Z(	7$ 7$ 7$ 7$r-'- - Z(	- - - -^ Xk
 
h|4 
 
 
 
  Xk> >t >8L+A >T > > > > > > > >r3   r   )%typingr   r   numpyr)   gymnasium.spacesr   ray.rllib.models.action_distr   ray.rllib.models.modelv2r   ray.rllib.utils.annotationsr   r	   'ray.rllib.utils.exploration.explorationr
   "ray.rllib.utils.exploration.randomr   ray.rllib.utils.frameworkr   r   r   r   ray.rllib.utils.numpyr   ray.rllib.utils.schedulesr   ,ray.rllib.utils.schedules.piecewise_scheduler   ray.rllib.utils.tf_utilsr   r_   r%   tfvr8   rb   r   r@   r3   r2   <module>r      s   " " " " " " " "     " " " " " " ; ; ; ; ; ; , , , , , , = = = = = = = = ? ? ? ? ? ? 5 5 5 5 5 5            3 2 2 2 2 2 . . . . . . J J J J J J < < < < < <}Rq ]> ]> ]> ]> ]>K ]> ]> ]> ]> ]>r3   