
    &`ia                     p    d dl Zd dlZd dlmZ d dlmZ d dlm	Z	m
Z
mZmZ e G d d                      ZdS )    N)ModelV2)OldAPIStack)ListModelConfigDict
TensorTypeUnionc            
           e Zd ZdZdee         defdZdefdZdefdZ	defdZ
d	edefd
Zdd defdZdefdZdd defdZdefdZeedej        dedeeej        f         fd                        ZdS )ActionDistributionzThe policy action distribution of an agent.

    Attributes:
        inputs: input vector to compute samples from.
        model (ModelV2): reference to model producing the inputs.
    inputsmodelc                 "    || _         || _        dS )a  Initializes an ActionDist object.

        Args:
            inputs: input vector to compute samples from.
            model (ModelV2): reference to model producing the inputs. This
                is mainly useful if you want to use model variables to compute
                action outputs (i.e., for autoregressive action distributions,
                see examples/autoregressive_action_dist.py).
        N)r   r   )selfr   r   s      p/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/rllib/models/action_dist.py__init__zActionDistribution.__init__   s     


    returnc                     t           )z+Draw a sample from the action distribution.NotImplementedErrorr   s    r   samplezActionDistribution.sample       !!r   c                     t           )z
        Get the deterministic "sampling" output from the distribution.
        This is usually the max likelihood output, i.e. mean for Normal, argmax
        for Categorical, etc..
        r   r   s    r   deterministic_samplez'ActionDistribution.deterministic_sample#   s
     "!r   c                     t           )z7Returns the log probability of the last sampled action.r   r   s    r   sampled_action_logpz&ActionDistribution.sampled_action_logp+   r   r   xc                     t           )z.The log-likelihood of the action distribution.r   )r   r   s     r   logpzActionDistribution.logp/   r   r   otherc                     t           )z3The KL-divergence between two action distributions.r   r   r    s     r   klzActionDistribution.kl3   r   r   c                     t           )z'The entropy of the action distribution.r   r   s    r   entropyzActionDistribution.entropy7   r   r   c                 ,    |                      |          S )zThe KL-divergence between two action distributions.

        This differs from kl() in that it can return an array for
        MultiDiscrete. TODO(ekl) consider removing this.
        )r#   r"   s     r   multi_klzActionDistribution.multi_kl;   s     wwu~~r   c                 *    |                                  S )zThe entropy of the action distribution.

        This differs from entropy() in that it can return an array for
        MultiDiscrete. TODO(ekl) consider removing this.
        )r%   r   s    r   multi_entropyz ActionDistribution.multi_entropyC   s     ||~~r   action_spacemodel_configc                     t           )a|  Returns the required shape of an input parameter tensor for a
        particular action space and an optional dict of distribution-specific
        options.

        Args:
            action_space (gym.Space): The action space this distribution will
                be used for, whose shape attributes will be used to determine
                the required shape of the input parameter tensor.
            model_config: Model's config dict (as defined in catalog.py)

        Returns:
            model_output_shape (int or np.ndarray of ints): size of the
                required input vector (minus leading batch dimension).
        r   )r*   r+   s     r   required_model_output_shapez.ActionDistribution.required_model_output_shapeK   s
    & "!r   N)__name__
__module____qualname____doc__r   r   r   r   r   r   r   r   r#   r%   r'   r)   staticmethodr   gymSpacer   r   intnpndarrayr-    r   r   r
   r
   	   s        tJ/     "
 " " " ""j " " " ""Z " " " ""j "Z " " " "", " " " " "" " " " "2 z    z     "i"/>"	sBJ	" " " [ \" " "r   r
   )	gymnasiumr3   numpyr6   ray.rllib.models.modelv2r   ray.rllib.utils.annotationsr   ray.rllib.utils.typingr   r   r   r   r
   r8   r   r   <module>r>      s            , , , , , , 3 3 3 3 3 3 K K K K K K K K K K K K T" T" T" T" T" T" T" T" T" T"r   