
    &`iC                         d dl mZmZ d dlmZmZmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZmZ d dlmZ d dlmZ e G d	 d
e                      ZdS )    )OptionalUnion)DiscreteMultiDiscreteSpace)ActionDistribution)Categorical)TorchCategorical)OldAPIStackoverride)StochasticSampling)
TensorTypec                        e Zd ZdZdddedee         def fdZ e	e
          	 dd	ed
eeef         def fd            Z xZS )SoftQzSpecial case of StochasticSampling w/ Categorical and temperature param.

    Returns a stochastic sample from a Categorical parameterized by the model
    output divided by the temperature. Returns the argmax iff explore=False.
    g      ?temperatureaction_space	frameworkr   c                    t          |t          t          f          sJ  t                      j        |fd|i| || _        dS )aK  Initializes a SoftQ Exploration object.

        Args:
            action_space: The gym action space used by the environment.
            temperature: The temperature to divide model outputs by
                before creating the Categorical distribution to sample from.
            framework: One of None, "tf", "torch".
        r   N)
isinstancer   r   super__init__r   )selfr   r   r   kwargs	__class__s        v/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/rllib/utils/exploration/soft_q.pyr   zSoftQ.__init__   sR      ,=(ABBBBBEEEfEEE&    Taction_distributiontimestepexplorec                     t          |          }t          |t          t          f          sJ  ||j        | j        | j                  }t                                          |||          S )Nr   )r   r   r    )	type
issubclassr	   r
   inputsmodelr   r   get_exploration_action)r   r   r   r    clsdistr   s         r   r&   zSoftQ.get_exploration_action)   sx     &''#-=>????? s&-tztGWXXXww-- $x . 
 
 	
r   )T)__name__
__module____qualname____doc__r   r   strfloatr   r   r   r   r   intr   boolr&   __classcell__)r   s   @r   r   r      s          !' ' '' C=	'
 ' ' ' ' ' '( X !!
 	
 
/
 Z(
 	
 
 
 
 
 "!
 
 
 
 
r   r   N)typingr   r   gymnasium.spacesr   r   r   ray.rllib.models.action_distr   "ray.rllib.models.tf.tf_action_distr	   (ray.rllib.models.torch.torch_action_distr
   ray.rllib.utils.annotationsr   r   /ray.rllib.utils.exploration.stochastic_samplingr   ray.rllib.utils.frameworkr   r    r   r   <module>r;      s    " " " " " " " " ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; : : : : : : E E E E E E = = = = = = = = N N N N N N 0 0 0 0 0 0 *
 *
 *
 *
 *
 *
 *
 *
 *
 *
r   