
    &`i3                         d dl Z d dlmZmZ d dlZd dlZd dlm	Z	 d dl
mZ d dlmZmZ d dlmZ d dlmZ d dlmZmZmZmZ d d	lmZ  e            \  ZZZ e            \  ZZe G d
 de                      ZdS )    N)OptionalUnion)ActionDistribution)ModelV2)OldAPIStackoverride)Exploration)Random)
TensorTypeget_variabletry_import_tftry_import_torch)zero_logps_from_actionsc            	            e Zd ZdZdddej        j        dedede	f fdZ
 ee          d	d
ddedeee	ef                  defd            Zd Zdedeee	f         deeef         fdZ xZS )StochasticSamplinga+  An exploration that simply samples from a distribution.

    The sampling can be made deterministic by passing explore=False into
    the call to `get_exploration_action`.
    Also allows for scheduled parameters for the distributions, such as
    lowering stddev, temperature, etc.. over time.
    r   )random_timestepsaction_space	frameworkmodelr   c                   |J  t                      j        |f||d| || _        t          |f| j        | j        d|| _        t          t          j	        dt          j
                  | j        dt          j
                  | _        dS )a  Initializes a StochasticSampling Exploration object.

        Args:
            action_space: The gym action space used by the environment.
            framework: One of None, "tf", "torch".
            model: The ModelV2 used by the owning Policy.
            random_timesteps: The number of timesteps for which to act
                completely randomly. Only after this number of timesteps,
                actual samples will be drawn to get exploration actions.
        N)r   r   r   timestep)r   tf_namedtype)super__init__r   r
   r   r   random_explorationr   nparrayint64last_timestep)selfr   r   r   r   kwargs	__class__s         /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/rllib/utils/exploration/stochastic_sampling.pyr   zStochasticSampling.__init__"   s    & $$$RUiRR6RRR !1"(#
 $
dn#
 #
HN#
 #

 *HQ!!n(	
 
 
    NT)r   exploreaction_distributionr   r&   c                t    | j         dk    r|                     |||          S |                     |||          S )Ntorch)r   _get_torch_exploration_action_get_tf_exploration_action_op)r!   r'   r   r&   s       r$   get_exploration_actionz)StochasticSampling.get_exploration_actionG   sQ     >W$$55#Xw   55#Xw  r%   c           	         	  j         dz   }t                              t                              | j        k                fdfd          	                                t                              t          |t                    rt                              |          n|	fdfd          }t                              t          j	        
                    |t                              | j        k                        fdt          j        t                              } j        d	k    r j                             d           ||fS | t                                j         d          nt                                j         |          }t                               |g          5  ||fcd d d            S # 1 swxY w Y   d S )
N   c                  H    j                              d          d         S )NTr&   r   )r   get_tf_exploration_action_op)action_distr!   s   r$   <lambda>zBStochasticSampling._get_tf_exploration_action_op.<locals>.<lambda>]   s1    'DD E   r%   c                  ,                                      S N)sampler2   s   r$   r3   zBStochasticSampling._get_tf_exploration_action_op.<locals>.<lambda>b   s    [//11 r%   )predtrue_fnfalse_fnc                       S r5    )stochastic_actionss   r$   r3   zBStochasticSampling._get_tf_exploration_action_op.<locals>.<lambda>h   s    . r%   c                       S r5   r<   )deterministic_actionss   r$   r3   zBStochasticSampling._get_tf_exploration_action_op.<locals>.<lambda>i   s    2 r%   )r9   r:   c                  ,                                      S r5   )sampled_action_logpr7   s   r$   r3   zBStochasticSampling._get_tf_exploration_action_op.<locals>.<lambda>p   s    K;;== r%   tf2)r    tfcondconvert_to_tensorr   deterministic_sample
isinstanceboolconstantmathlogical_and	functoolspartialr   r   
assign_addtf1assigncontrol_dependencies)
r!   r2   r   r&   tsactionlogp	assign_opr?   r=   s
   ``      @@r$   r+   z0StochasticSampling._get_tf_exploration_action_opX   s*   !#WW%%b4+@&@AA    
 2111 % 
 
 !, @ @ B B$.w$=$=JBKK   7....2222  
 
 wwG--bD4I.IJJ  >===&'>@UVV  
 
 >U""))!,,,4< # t11555ZZ 2H== 
 ))9+66 $ $t|$ $ $ $ $ $ $ $ $ $ $ $ $ $ $ $ $ $s   7GGGr2   c                 f   ||n	| j         dz   | _         |rY| j         | j        k     r | j                            |d          \  }}ni|                                }|                                }n@|                                }t                              |                                          }||fS )Nr.   Tr0   )	r    r   r   get_torch_exploration_actionr6   rA   rF   r)   
zeros_like)r!   r2   r   r&   rS   rT   s         r$   r*   z0StochasticSampling._get_torch_exploration_action   s     !,HH$2Dq2H 	
  	G!D$999#6SS  T    
 %++--"6688 !5577F##K$C$C$E$EFFDt|r%   )__name__
__module____qualname____doc__gymspacesSpacestrr   intr   r   r	   r   r   r   r   rH   r,   r+   r*   __classcell__)r#   s   @r$   r   r      s>         !"#
 #
 #
j&#
 	#

 #
 #
 #
 #
 #
 #
 #
J Xk
 6:   0 5j12	
     '$ '$ '$R' 
C( z4'(	       r%   r   ) rL   typingr   r   	gymnasiumr]   numpyr   ray.rllib.models.action_distr   ray.rllib.models.modelv2r   ray.rllib.utils.annotationsr   r   'ray.rllib.utils.exploration.explorationr	   "ray.rllib.utils.exploration.randomr
   ray.rllib.utils.frameworkr   r   r   r   ray.rllib.utils.tf_utilsr   rO   rC   tfvr)   _r   r<   r%   r$   <module>ro      s^       " " " " " " " "         ; ; ; ; ; ; , , , , , , = = = = = = = = ? ? ? ? ? ? 5 5 5 5 5 5            = < < < < <}Rq D D D D D D D D D Dr%   