
    &`i$                        d dl Z d dlmZmZ d dlZd dlZd dlZd dl	m
Z
 d dlmZ d dlmZmZ d dlmZmZ d dlmZmZmZ d dlmZ d d	lmZ d d
lmZmZ d dlmZ  e            \  Z Z!Z" e            \  Z#Z$e G d de                      Z%dS )    N)OptionalUnion)ActionDistribution)TorchMultiActionDistribution)OldAPIStackoverride)Exploration
TensorType)get_variabletry_import_tftry_import_torch)from_config)convert_to_numpy)PiecewiseScheduleSchedule)	FLOAT_MINc                       e Zd ZdZddd ed          dddej        j        d	ed
e	de	dedede
e         f fdZ ee          dddedeeef         de
eeef                  fd            Zdedeeef         deeef         ddfdZdededeeef         ddfdZ ee          d de
d         fd            Z ee          d dede
d         ddfd            Z xZS )!EpsilonGreedya
  Epsilon-greedy Exploration class that produces exploration actions.

    When given a Model's output and a current epsilon value (based on some
    Schedule), it produces a random action (if rand(1) < eps) or
    uses the model-computed one (if rand(1) >= eps).
    g      ?g?r   g     j@N)initial_epsilonfinal_epsilonwarmup_timestepsepsilon_timestepsepsilon_scheduleaction_space	frameworkr   r   r   r   r   c                   |J  t                      j        d	||d| t          t          ||          p"t	          d|f||f||z   |fg|| j                  | _        t          t          j	        dt          j
                  |dt          j
                  | _        | j        dk    r|                                 | _        dS dS )
aT  Create an EpsilonGreedy exploration class.

        Args:
            action_space: The action space the exploration should occur in.
            framework: The framework specifier.
            initial_epsilon: The initial epsilon value to use.
            final_epsilon: The final epsilon value to use.
            warmup_timesteps: The timesteps over which to not change epsilon in the
                beginning.
            epsilon_timesteps: The timesteps (additional to `warmup_timesteps`)
                after which epsilon should always be `final_epsilon`.
                E.g.: warmup_timesteps=20k epsilon_timesteps=50k -> After 70k timesteps,
                epsilon will reach its final value.
            epsilon_schedule: An optional Schedule object
                to use (instead of constructing one from the given parameters).
        N)r   r   )r   r   )	endpointsoutside_valuer   timestep)r   tf_namedtypetf )super__init__r   r   r   r   r   r   nparrayint64last_timestep	get_state_tf_state_op)
selfr   r   r   r   r   r   r   kwargs	__class__s
            ~/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/rllib/utils/exploration/epsilon_greedy.pyr%   zEpsilonGreedy.__init__   s    8 $$$RliRR6RRR +&)!
 !
 !
 
!
O$!?3!$55}E
 (n
 
 
 	 *HQ!!(	
 
 
 >T!! $ 0 0D "!    T)exploreaction_distributionr   r1   c                p    | j         dv r|                     |||          S |                     |||          S )N)tf2r"   )r   _get_tf_exploration_action_op_get_torch_exploration_action)r,   r2   r   r1   s       r/   get_exploration_actionz$EpsilonGreedy.get_exploration_actionV   sQ     >]**55#Wh   55#Wh  r0   returnz	tf.Tensorc                 l  
 |j         }|                     ||n| j                  }t                              |d          t                              |          d         }t                              t                              |t          j        j	                  t          
                    |          t          j        j	        z  t          
                    |                    }t                              t          j                            |d          d          t          j                            t                              |g          ddt          j                  |k     
t                              t#          |t$                    r&t                              |t          j                  n|
fdfd	          }| j        d
k    r<| j        d         s/|| _        |t                              |t          j                  fS t.                              | j        t                              |t          j                            }	t.                              |	g          5  |t                              |t          j                  fcddd           S # 1 swxY w Y   dS )a!  TF method to produce the tf op for an epsilon exploration action.

        Args:
            action_distribution: The instantiated ActionDistribution object
                to work with when creating exploration actions.

        Returns:
            The tf exploration-action op.
        N   axisr   )minvalmaxvalr!   r!   c                  <    t                                          S N)r"   where)chose_randomexploit_actionrandom_actionss   r/   <lambda>z=EpsilonGreedy._get_tf_exploration_action_op.<locals>.<lambda>   s    RXXlNNSS r0   c                       S rA   r#   )rD   s   r/   rF   z=EpsilonGreedy._get_tf_exploration_action_op.<locals>.<lambda>   s    ^ r0   )predtrue_fnfalse_fnr4   eager_tracing)inputsr   r)   r"   argmaxshaperB   equalfloat32min	ones_likesqueezerandomcategoricaluniformstackcond
isinstanceboolconstantr   policy_config
zeros_liketf1assigncastr(   control_dependencies)r,   r2   r1   r   q_valuesepsilon
batch_sizerandom_valid_action_logitsaction	assign_oprC   rD   rE   s             @@@r/   r5   z+EpsilonGreedy._get_tf_exploration_action_oph   s     '-'' ,HH$2D
 

 8!44XXh''*
 &(XXHHXrz~..LL""RZ^3LL""&
 &
"
 I!!"<a@@q $ 
 

 I*&&q"*     	 '4((WBG444SSSSSS++++  
 
 >U""4+=o+N"!)D2==rz=BBBB

4#5rwwx7R7RSSI))9+66 G Gr}}V2:}FFFG G G G G G G G G G G G G G G G G Gs   4(J))J-0J-ztorch.Tensorc                    |j         }|| _        |                                }|                                d         }t                              |t          j                  }|r|                     | j                  }t          |t                    rt          j        |          }t          |          D ]}	t          j                    |k     rst          j        | j                                                  }
t          t!          |                    D ]+}t                              |
|                   ||         |	<   ,t          j        |j        |          }||fS t                              |t*          k    t                              |          dz  t                              |                    }t                              t                              |d          d          }t                              t                              |f                                                              | j                  |k     ||          }||fS ||fS )a   Torch method to produce an epsilon exploration action.

        Args:
            action_distribution: The instantiated
                ActionDistribution object to work with when creating
                exploration actions.

        Returns:
            The exploration-action.
        r   r?   g        r:   r;   )rL   r)   deterministic_samplesizetorchzerosfloatr   rY   r   treeflattenrangerT   r   samplelentensorunflatten_asaction_space_structrB   r   rR   rS   multinomialemptyuniform_todevice)r,   r2   r1   r   rb   rD   rd   action_logprc   irandom_actionjre   rE   rf   s                  r/   r6   z+EpsilonGreedy._get_torch_exploration_action   s     '-%,AACC]]__Q'
kk*EKk@@  (	/++D,>??G-/KLL "+!%n!=!=z** R RA}00(,T5F5M5M5O5O(P(P!&s>':':!;!; R RA38<<a@P3Q3QN1-a00!%!2';^" " &{22
 .3[[	)OOH--3OOH--. .* "'%%&@!DD1 "/ " "
 KK..7799<<T[IIGS""  {** ";..r0   sessz
tf.Sessionc                     |r|                     | j                  S |                     | j                  }| j        dk    rt          |          n|| j        dk    rt          | j                  n| j        dS )Nr"   )cur_epsilonr)   )runr+   r   r)   r   r   )r,   r   epss      r/   r*   zEpsilonGreedy.get_state   s     	/88D-...##D$67748Nd4J4J+C000PS~%% .d.@AAA#	
 
 	
r0   statec                     | j         dk    r$| j                            |d         |           d S t          | j        t                    r|d         | _        d S | j                            |d                    d S )Nr"   r)   )session)r   r)   loadrY   intr_   )r,   r   r   s      r/   	set_statezEpsilonGreedy.set_state   s    >T!!##E/$:D#IIIII*C00 	>!&!7D%%eO&<=====r0   rA   )__name__
__module____qualname____doc__r   gymspacesSpacestrrm   r   r   r%   r   r	   r   r   r
   rZ   r7   r5   r6   r*   dictr   __classcell__)r.   s   @r/   r   r      sI         "%# !!$S/351 51 51j&51 	51
 51 51 51 51 #8,51 51 51 51 51 51n Xk 6:   0 Z(	
 %j 012   "9G/9G tZ'(9G Z(	9G
 
9G 9G 9G 9Gv?//?/ ?/ Z(	?/
 
?/ ?/ ?/ ?/B Xk	
 	
h|4 	
 	
 	
 	
 Xk> >t >8L+A >T > > > > > > > >r0   r   )&rT   typingr   r   	gymnasiumr   numpyr&   rn   ray.rllib.models.action_distr   (ray.rllib.models.torch.torch_action_distr   ray.rllib.utils.annotationsr   r   'ray.rllib.utils.exploration.explorationr	   r
   ray.rllib.utils.frameworkr   r   r   ray.rllib.utils.from_configr   ray.rllib.utils.numpyr   ray.rllib.utils.schedulesr   r   ray.rllib.utils.torch_utilsr   r^   r"   tfvrk   _r   r#   r0   r/   <module>r      st    " " " " " " " "          ; ; ; ; ; ; Q Q Q Q Q Q = = = = = = = = K K K K K K K K S S S S S S S S S S 3 3 3 3 3 3 2 2 2 2 2 2 A A A A A A A A 1 1 1 1 1 1}Rq `> `> `> `> `>K `> `> `> `> `>r0   