
    &`ij                        d dl Z d dlmZ d dlmZ d dlZd dlZd dl	Z	d dl
mZ d dlmZ d dlmZmZmZ d dlmZmZ d dlmZmZ d d	lmZ d d
lmZmZmZmZmZ  e            \  Z Z!Z" e            Z#e G d de                      Z$e G d de$                      Z%ede&fd            Z'e G d de$                      Z(e G d de%                      Z)e G d de$                      Z*e G d de$                      Z+e G d de$                      Z,e G d de$                      Z-e G d de$                      Z.e G d d e$                      Z/e G d! d"e$                      Z0dS )#    N)log)Optional)ActionDistribution)ModelV2)MAX_LOG_NN_OUTPUTMIN_LOG_NN_OUTPUTSMALL_NUMBER)OldAPIStackoverride)try_import_tftry_import_tfp)get_base_struct_from_space)ListModelConfigDict
TensorTypeTupleUnionc                        e Zd ZdZ ee          dee         def fd            Z	defdZ
 ee          defd            Z ee          defd            Z xZS )	TFActionDistributionz9TF-specific extensions for building action distributions.inputsmodelc                     t                                          ||           |                                 | _        |                     | j                  | _        d S N)super__init___build_sample_op	sample_oplogpsampled_action_logp_op)selfr   r   	__class__s      v/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/rllib/models/tf/tf_action_dist.pyr   zTFActionDistribution.__init__   sL    '''..00&*ii&?&?###    returnc                     t           )zImplement this instead of sample(), to enable op reuse.

        This is needed since the sample op is non-deterministic and is shared
        between sample() and sampled_action_logp().
        )NotImplementedErrorr    s    r"   r   z%TFActionDistribution._build_sample_op   s
     "!r#   c                     | j         S )z+Draw a sample from the action distribution.)r   r'   s    r"   samplezTFActionDistribution.sample'   s     ~r#   c                     | j         S )z2Returns the log probability of the sampled action.)r   r'   s    r"   sampled_action_logpz(TFActionDistribution.sampled_action_logp,   s     **r#   )__name__
__module____qualname____doc__r   r   r   r   r   r   r   r)   r+   __classcell__r!   s   @r"   r   r      s        CCX !!@tJ/ @ @ @ @ @ @ "!@
"* " " " " X !!
    "! X !!+Z + + + "!+ + + + +r#   r   c                   f    e Zd ZdZ	 ddee         dedef fdZ e	e
          defd	            Z e	e
          d
edefd            Z e	e
          defd            Z e	e
          de
defd            Z e	e          defd            Ze e	e
          d                         Z xZS )Categoricalz4Categorical distribution for discrete action spaces.N      ?r   r   temperaturec                 r    |dk    s
J d            t                                          ||z  |           d S )N        (Categorical `temperature` must be > 0.0!r   r   r    r   r   r5   r!   s       r"   r   zCategorical.__init__6   sF     S   "L    	+-u55555r#   r$   c                 N    t           j                            | j        d          S N   axis)tfmathargmaxr   r'   s    r"   deterministic_samplez Categorical.deterministic_sample>   s    w~~dk~222r#   xc                     t           j                            | j        t                               |t           j                             S )N)logitslabels)r@   nn(sparse_softmax_cross_entropy_with_logitsr   castint32r    rD   s     r"   r   zCategorical.logpB   s=    >>;rwwq"(';'; ? 
 
 
 	
r#   c                 L   | j         t                              | j         dd          z
  }t                              |          }t                              |dd          }||z  }t                              |t          j                            |          |z
  z  d          S Nr=   Tr?   keepdimsr>   r   r@   
reduce_maxexp
reduce_sumrA   r   )r    a0ea0z0p0s        r"   entropyzCategorical.entropyH   s}    [2==1t=LLLffRjj]]3Q]662X}}R27;;r??R#78q}AAAr#   otherc                 T   | j         t                              | j         dd          z
  }|j         t                              |j         dd          z
  }t                              |          }t                              |          }t                              |dd          }t                              |dd          }||z  }t                              ||t          j                            |          z
  |z
  t          j                            |          z   z  d          S rN   rQ   )	r    rZ   rU   a1rV   ea1rW   z1rX   s	            r"   klzCategorical.klP   s    [2==1t=LLL\BMM%,QMNNNffRjjffRjj]]3Q]66]]3Q]662X}}R2B#7"#<rw{{2#NOVW}XXXr#   c                     t                               t           j                            | j        d          d          S r<   )r@   squeezerandomcategoricalr   r'   s    r"   r   zCategorical._build_sample_op[   s,    zz")//Q??azHHHr#   c                     | j         S r   naction_spacemodel_configs     r"   required_model_output_shapez'Categorical.required_model_output_shape_   s     ~r#   Nr4   )r,   r-   r.   r/   r   r   r   floatr   r   r   rC   r   rY   r_   r   r   staticmethodrj   r0   r1   s   @r"   r3   r3   2   s       >> UX6 6:&6/66LQ6 6 6 6 6 6 X !!3j 3 3 3 "!3 X !!
j 
Z 
 
 
 "!

 X !!B B B B "!B X !!Y* Yz Y Y Y "!Y X"##I* I I I $#I X !!  "! \    r#   r3   tc                 2      G  fddt                     }|S )zGCategorical distribution class that has customized default temperature.c                   &     e Zd Zdf fd	Z xZS )Jget_categorical_class_with_temperature.<locals>.CategoricalWithTemperatureNc                 N    t                                          |||           d S r   r9   r:   s       r"   r   zSget_categorical_class_with_temperature.<locals>.CategoricalWithTemperature.__init__j   s%    GGVUK88888r#   )r,   r-   r.   r   r0   )r!   rn   s   @r"   CategoricalWithTemperaturerq   i   sC        )-1 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9r#   rs   r3   )rn   rs   s   ` r"   &get_categorical_class_with_temperatureru   e   s?    9 9 9 9 9 9 9[ 9 9 9 &%r#   c                   &   e Zd ZdZ	 ddee         dedeee         e	j
        eedf         f         fdZ ee          defd	            Z ee          d
edefd            Z ee          defd            Z ee          defd            Z ee          dedefd            Z ee          dedefd            Z ee          defd            Ze ee          dej        dedeee	j
        f         fd                        ZdS )MultiCategoricalz>MultiCategorical distribution for MultiDiscrete action spaces.Nr   r   
input_lens.c                    t          j        | |           fdt                              ||d          D             | _        || _        | j        3t          j                            d | j        D                       | _        | 	                                | _
        |                     | j
                  | _        d S )Nc                 0    g | ]}t          |          S  rt   ).0input_r   s     r"   
<listcomp>z-MultiCategorical.__init__.<locals>.<listcomp>}   s3     
 
 
 &&
 
 
r#   r=   r>   c                 2    g | ]}|j         j        d          S )r=   )r   shape)r|   cs     r"   r~   z-MultiCategorical.__init__.<locals>.<listcomp>   s!    666q"666r#   )r   r   r@   splitcatsrh   gymspacesMultiDiscreter   r   r   r   )r    r   r   rx   rh   s     `  r"   r   zMultiCategorical.__init__t   s     	#D&%888
 
 
 
((6:A(>>
 
 
	 )$ #
 8 866DI666! !D ..00&*ii&?&?###r#   r$   c           	      V   t                               d | j        D             d          }t          | j        t
          j        j                  rYt                               t           	                    |dgt          | j        j                  z             | j        j                  S |S )Nc                 6    g | ]}|                                 S r{   rC   r|   cats     r"   r~   z9MultiCategorical.deterministic_sample.<locals>.<listcomp>   s$    LLL3C4466LLLr#   r=   r>   r@   stackr   
isinstancerh   r   r   BoxrJ   reshapelistr   dtype)r    sample_s     r"   rC   z%MultiCategorical.deterministic_sample   s    ((LL$)LLLST(UUd'88 	77

7RD40A0G+H+H$HII!'   r#   actionsc           	         t          |t          j                  rt          | j        t          j        j                  rGt                              |dt          t          j
        | j        j                            g          }nMt          | j        t          j        j                  r)|                    d t          | j                  f           t                              t                              |t          j                  d          }t                              d t)          | j        |          D                       }t                              |d          S )Nr   r=   r>   c                 >    g | ]\  }}|                     |          S r{   )r   )r|   r   acts      r"   r~   z)MultiCategorical.logp.<locals>.<listcomp>   s&    PPPHC#((3--PPPr#   r   )r   r@   Tensorrh   r   r   r   r   intnpprodr   r   	set_shapelenr   unstackrJ   rK   r   ziprT   )r    r   logpss      r"   r   zMultiCategorical.logp   s    gry)) 	E$+SZ^<< :**b#bgd.?.E&F&F"G"GH  D-sz/GHH :!!4TY"8999jj"(!;!;!jDDGPPDIw8O8OPPPQQ}}U}+++r#   c                 X    t                               d | j        D             d          S )Nc                 6    g | ]}|                                 S r{   rY   r   s     r"   r~   z2MultiCategorical.multi_entropy.<locals>.<listcomp>   s     <<<3<<<r#   r=   r>   )r@   r   r   r'   s    r"   multi_entropyzMultiCategorical.multi_entropy   s)    xx<<$)<<<1xEEEr#   c                 ^    t                               |                                 d          S r<   )r@   rT   r   r'   s    r"   rY   zMultiCategorical.entropy   s$    }}T//11}:::r#   rZ   c                 ~    t                               d t          | j        |j                  D             d          S )Nc                 >    g | ]\  }}|                     |          S r{   r_   )r|   r   oth_cats      r"   r~   z-MultiCategorical.multi_kl.<locals>.<listcomp>   s&    LLLgSVVG__LLLr#   r=   r>   )r@   r   r   r   r    rZ   s     r"   multi_klzMultiCategorical.multi_kl   s?    xxLLTY
1K1KLLLST  
 
 	
r#   c                 `    t                               |                     |          d          S r<   )r@   rT   r   r   s     r"   r_   zMultiCategorical.kl   s$    }}T]]511}:::r#   c           	      X   t                               d | j        D             d          }t          | j        t
          j        j                  rZt                               t           	                    |dgt          | j        j                  z             | j        j                  S |S )Nc                 6    g | ]}|                                 S r{   r)   r   s     r"   r~   z5MultiCategorical._build_sample_op.<locals>.<listcomp>   s     @@@scjjll@@@r#   r=   r>   r   r   r   )r    r   s     r"   r   z!MultiCategorical._build_sample_op   s    HH@@di@@@qHII	d'88 	77

9rdT$2C2I-J-J&JKK'-     r#   rh   ri   c                    t          | t          j        j                  r| j        j                            d          sJ t          j        | j	                  }t          j
        | j                  }t          j        | j	        |k              sJ t          j        | j        |k              sJ t          j        | j        t          j                  ||z
  dz   z  S t          j        | j                  S )Nr   r   r=   )r   r   r   r   r   name
startswithr   minlowmaxhighallr   r   rK   sumnvec)rh   ri   low_high_s       r"   rj   z,MultiCategorical.required_model_output_shape   s     lCJN33 
	-%*55e<<<<<6,*++DF<,--E6,*d2333336,+u4555557<-RX>>>%$,QRBRSS 6,+,,,r#   r   )r,   r-   r.   r/   r   r   r   r   r   r   ndarrayr   r   r   r   rC   r   r   rY   r   r_   r   r   rm   r   Spacer   rj   r{   r#   r"   rw   rw   p   sW       HH @ @Z @ @ $s)RZsCx@A	@ @ @ @* X !!j    "! X !!,J ,: , , , "!, X !!Fz F F F "!F X !!; ; ; ; "!; X !!
0 
Z 
 
 
 "!

 X !!;* ;z ; ; ; "!; X"##*    $# X !!-i-/>-	sBJ	- - - "! \- - -r#   rw   c            
            e Zd ZdZ	 	 	 	 ddee         dededee	j
        j                 f fdZ ee          d	ef fd
            Z ee          ded	efd            Z xZS )SlateMultiCategoricalaf  MultiCategorical distribution for MultiDiscrete action spaces.

    The action space must be uniform, meaning all nvec items have the same size, e.g.
    MultiDiscrete([10, 10, 10]), where 10 is the number of candidates to pick from
    and 3 is the slate size (pick 3 out of 10). When picking candidates, no candidate
    must be picked more than once.
    Nr4   r   r   r5   rh   c                 &    |dk    s
J d            t                                          ||z  |           | _        t           j        t          j        j                  r%t           fd j        j        D                       sJ | _	        d S )Nr7   r8   c              3   D   K   | ]}|j         j        d          k    V  dS )r   N)rh   r   )r|   rf   r    s     r"   	<genexpr>z1SlateMultiCategorical.__init__.<locals>.<genexpr>   sL       O
 O
/0A"'**O
 O
 O
 O
 O
 O
r#   )
r   r   rh   r   r   r   r   r   r   
all_slates)r    r   r   r5   rh   r   r!   s   `     r"   r   zSlateMultiCategorical.__init__   s     S   "L    	+-u555( $+SZ-EFF 	
3 O
 O
 O
 O
484E4JO
 O
 O
 L
 L
 	
 	
 
 %r#   r$   c                     t                                                      }t                              | j        |          S r   )r   rC   r@   gatherr   )r    r)   r!   s     r"   rC   z*SlateMultiCategorical.deterministic_sample   s0     --//yy&111r#   rD   c                 T    t                               | j        d d df                   S )Nr   )r@   	ones_liker   rL   s     r"   r   zSlateMultiCategorical.logp   s%     ||DK1-...r#   )Nr4   NN)r,   r-   r.   r/   r   r   r   rl   r   r   r   r   r   r   r   rC   r   r0   r1   s   @r"   r   r      s          ;?% %Z % % 	%
 sz78% % % % % %( X !!2j 2 2 2 2 2 "!2 X !!/j /Z / / / "!/ / / / /r#   r   c            
       8    e Zd ZdZ	 ddee         dedef fdZ e	e
          defd	            Z e	e
          d
edefd            Z e	e          defd            Ze e	e
          dej        dedeeej        f         fd                        Z xZS )GumbelSoftmaxa  GumbelSoftmax distr. (for differentiable sampling in discr. actions

    The Gumbel Softmax distribution [1] (also known as the Concrete [2]
    distribution) is a close cousin of the relaxed one-hot categorical
    distribution, whose tfp implementation we will use here plus
    adjusted `sample_...` and `log_prob` methods. See discussion at [0].

    [0] https://stackoverflow.com/questions/56226133/
    soft-actor-critic-with-discrete-action-space

    [1] Categorical Reparametrization with Gumbel-Softmax (Jang et al, 2017):
    https://arxiv.org/abs/1611.01144
    [2] The Concrete Distribution: A Continuous Relaxation of Discrete Random
    Variables (Maddison et al, 2017) https://arxiv.org/abs/1611.00712
    Nr4   r   r   r5   c                    |dk    sJ t           j                            ||          | _        t          j                            | j        j        j                  | _	        t                                          ||           dS )aA  Initializes a GumbelSoftmax distribution.

        Args:
            temperature: Temperature parameter. For low temperatures,
                the expected value approaches a categorical random variable.
                For high temperatures, the expected value approaches a uniform
                distribution.
        r7   )r5   rF   N)tfpdistributionsRelaxedOneHotCategoricaldistr@   rH   softmax_distributionrF   probsr   r   r:   s       r"   r   zGumbelSoftmax.__init__  sy     c!!!!%>>#F ? 
 
	 U]]49#:#ABB
'''''r#   r$   c                     | j         S r   )r   r'   s    r"   rC   z"GumbelSoftmax.deterministic_sample  s     zr#   rD   c                    |j         | j        j        j         k    rt                              || j        j        j                                         d         t          j                  }|j         | j        j        j         k    s J |j         | j        j        j         f            t                              | t          j        	                    | j        j        d          z  d           S )Nr   r   r>   )
r   r   rF   r@   one_hotas_listfloat32rT   rH   log_softmax)r    rD   valuess      r"   r   zGumbelSoftmax.logp$  s     7di&,,,ZZ49#)1133B7rz    F <49#3#9999	 &<999 B""49#3""===B  
 
 
 	
r#   c                 4    | j                                         S r   r   r)   r'   s    r"   r   zGumbelSoftmax._build_sample_op7      y!!!r#   rh   ri   c                     | j         S r   re   rg   s     r"   rj   z)GumbelSoftmax.required_model_output_shape;  s    
 ~r#   rk   )r,   r-   r.   r/   r   r   r   rl   r   r   r   rC   r   r   r   rm   r   r   r   r   r   r   r   rj   r0   r1   s   @r"   r   r      sk        " UX( (:&(/6(LQ( ( ( ( ( ($ X !!j    "! X !!
j 
Z 
 
 
 "!
$ X"##"* " " " $#" X !!i/>	sBJ	   "! \    r#   r   c            
           e Zd ZdZdddee         dedeej	        j
                 f fdZ ee          defd	            Z ee          d
edefd            Z ee          dedefd            Z ee          defd            Z ee          defd            Ze ee          dej
        dedeeej        f         fd                        Z xZS )DiagGaussianzAction distribution where each vector element is a gaussian.

    The first half of the input vector defines the gaussian means, and the
    second half the gaussian standard deviations.
    N)rh   r   r   rh   c                
   t                               |dd          \  }}|| _        || _        t                               |          | _        |o
|j        dk    | _        t                      	                    ||           d S )N   r=   r>   r{   )
r@   r   meanlog_stdrS   stdr   zero_action_dimr   r   )r    r   r   rh   r   r   r!   s         r"   r   zDiagGaussian.__init__K  st     33g	66'??+H0Bb0H'''''r#   r$   c                     | j         S r   )r   r'   s    r"   rC   z!DiagGaussian.deterministic_sampleZ  s
    yr#   rD   c           	         t          t                              |          j        d                   dk    rt                              |d          }dt                              t          j                            t                              |t          j                  | j	        z
  | j
        z            d          z  dt          j        dt          j        z            z  t                              t                              |          d         t          j                  z  z
  t                              | j        d          z
  S )Nr   r=   r>   g            ?       @)r   r@   r   expand_dimsrT   rA   squarerJ   r   r   r   r   r   pir   rL   s     r"   r   zDiagGaussian.logp^  s     rxx{{ #$$))qq))Amm2: 6 6 BdhNOOVW    BF3;'''"''"((1++a."**M*MM	N
 mmDLqm112	
r#   rZ   c                 |   t          |t                    sJ t                              |j        | j        z
  t          j                            | j                  t          j                            | j        |j        z
            z   dt          j                            |j                  z  z  z   dz
  d          S )Nr   r   r=   r>   )	r   r   r@   rT   r   rA   r   r   r   r   s     r"   r_   zDiagGaussian.kll  s    %.....}}Mlw~~dh''"'..UZ9O*P*PPRW^^EI...00 	
   
 
 	
r#   c                     t                               | j        dt          j        dt          j        z  t          j        z            z  z   d          S )Nr   r   r=   r>   )r@   rT   r   r   r   r   er'   s    r"   rY   zDiagGaussian.entropyx  s<    }}T\C"&rurt9K2L2L,LLST}UUUr#   c                     | j         | j        t          j                            t                              | j                             z  z   }| j        rt                              |d          S |S Nr   r>   )r   r   r@   rb   normalr   r   ra   )r    r)   s     r"   r   zDiagGaussian._build_sample_op|  sY    TX	(8(8$)9L9L(M(MMM 	/::f2:...r#   ri   c                 R    t          j        | j        t           j                  dz  S Nr   r   r   r   r   rK   rg   s     r"   rj   z(DiagGaussian.required_model_output_shape  #    
 w|):::Q>>r#   )r,   r-   r.   r/   r   r   r   r   r   r   r   r   r   r   rC   r   r_   rY   r   r   rm   r   r   r   r   r   rj   r0   r1   s   @r"   r   r   C  s         48( ( (Z ( (
 sz/0( ( ( ( ( ( X !!j    "! X !!
j 
Z 
 
 
 "!
 X !!	
* 	
z 	
 	
 	
 "!	
 X !!V V V V "!V X"##*    $# X !!?i?/>?	sBJ	? ? ? "! \? ? ? ? ?r#   r   c            
           e Zd ZdZ	 	 ddee         dededef fdZ e	e
          d	efd
            Z e	e          d	efd            Z e	e
          ded	efd            Zd Z e	e
          d	efd            Z e	e
          de
d	efd            Zded	efdZded	efdZe e	e
          dej        ded	eeej        f         fd                        Z xZS )SquashedGaussianzA tanh-squashed Gaussian distribution defined by: mean, std, low, high.

    The distribution will never return low or high exactly, but
    `low`+SMALL_NUMBER or `high`-SMALL_NUMBER respectively.
          r4   r   r   r   r   c                    t           J t                              |dd          \  }}t                              |t          t
                    }t                              |          }t           j                            ||          | _	        t          j        t          j        ||                    sJ || _        || _        t                                          ||           dS )zParameterizes the distribution via `inputs`.

        Args:
            low: The lowest possible sampling value
                (excluding this value).
            high: The highest possible sampling value
                (excluding this value).
        Nr   r   r>   )locscale)r   r@   r   clip_by_valuer   r   rS   r   Normaldistrr   r   lessr   r   r   r   )	r    r   r   r   r   r   r   r   r!   s	           r"   r   zSquashedGaussian.__init__  s     44g""7,=?PQQffWoo&--$c-BB
vbgc4(()))))	'''''r#   r$   c                 ^    | j                                         }|                     |          S r   )r   r   _squashr    r   s     r"   rC   z%SquashedGaussian.deterministic_sample  s%    z  ||D!!!r#   c                 Z    |                      | j                                                  S r   )r  r   r)   r'   s    r"   r   z!SquashedGaussian._build_sample_op  s"    ||DJ--//000r#   rD   c                    t                               |                     |          | j        j                  }| j                            |          }t                               |dd          }t                               |d          }t           j	        
                    |          }|t                               t           j	                            d|dz  z
  t          z             d          z
  }|S )Nid   r   r>   r=   r   )r@   rJ   	_unsquashr   r   r   log_probr   rT   rA   tanhr   r	   )r    rD   unsquashed_valueslog_prob_gaussianunsquashed_values_tanhdr	  s         r"   r   zSquashedGaussian.logp  s     GGDNN1$5$5t{7HII J//0ABB,,->cJJMM*;"MEE"$',,/@"A"A$r}}GKK3Q66EFFR (5 (
 (
 
 r#   c           	      &   | j                                         }|                     |          }|t                              | j                             |          t          j                            d||z  z
  t          z             z
  d          fS Nr=   r   r>   )	r   r)   r  r@   rT   r	  rA   r   r	   )r    zr   s      r"   sample_logpzSquashedGaussian.sample_logp  s~    J,,q//J""RW[[Ww5F1F1U%V%VV & 
 
 
 	
r#   c                      t          d          )Nz)Entropy not defined for SquashedGaussian!
ValueErrorr'   s    r"   rY   zSquashedGaussian.entropy  s    DEEEr#   rZ   c                      t          d          )Nz$KL not defined for SquashedGaussian!r  r   s     r"   r_   zSquashedGaussian.kl  s    ?@@@r#   
raw_valuesc                     t           j                            |          dz   dz  | j        | j        z
  z  | j        z   }t                               || j        | j                  S )Nr4   r   )r@   rA   r
  r   r   r   )r    r  squasheds      r"   r  zSquashedGaussian._squash  sX    W\\*--3s:I 
H $(DI>>>r#   r   c                     || j         z
  | j        | j         z
  z  dz  dz
  }t                              |dt          z   dt          z
            }t          j                            |          }|S )Nr   r4   r   )r   r   r@   r   r	   rA   atanh)r    r   normed_valuessave_normed_values
unsquasheds        r"   r  zSquashedGaussian._unsquash  si    $(*ty48/CDsJSP--4,.l0B
 
 W]]#566
r#   rh   ri   c                 R    t          j        | j        t           j                  dz  S r   r   rg   s     r"   rj   z,SquashedGaussian.required_model_output_shape  r   r#   )r   r4   )r,   r-   r.   r/   r   r   r   rl   r   r   r   rC   r   r   r   r  rY   r_   r  r  rm   r   r   r   r   r   r   r   rj   r0   r1   s   @r"   r   r     sF         ( (Z ( ( 	(
 ( ( ( ( ( (4 X !!"j " " " "!" X"##1* 1 1 1 $#1 X !!j Z    "!
 
 
 X !!F F F F "!F X !!A* Az A A A "!A?* ? ? ? ? ?
 z     X !!?i?/>?	sBJ	? ? ? "! \? ? ? ? ?r#   r   c            
       ^    e Zd ZdZ	 	 ddee         dededef fdZ e	e
          d	efd
            Z e	e          d	efd            Z e	e
          ded	efd            Zded	efdZded	efdZe e	e
          dej        ded	eeej        f         fd                        Z xZS )BetaaB  
    A Beta distribution is defined on the interval [0, 1] and parameterized by
    shape parameters alpha and beta (also called concentration parameters).

    PDF(x; alpha, beta) = x**(alpha - 1) (1 - x)**(beta - 1) / Z
        with Z = Gamma(alpha) Gamma(beta) / Gamma(alpha + beta)
        and Gamma(n) = (n - 1)!
    r7   r4   r   r   r   r   c                    t                               |t          t                    t          t                               }t           j                            t           j                            |          dz             dz   }|| _        || _        t                               |dd          \  }}t          j
                            ||          | _        t                                          ||           d S )Nr4   r   r   r>   )concentration1concentration0)r@   r   r   r	   rA   rS   r   r   r   r   r   r   r   r   r   )r    r   r   r   r   alphabetar!   s          r"   r   zBeta.__init__  s     !!&#l*;*;c,>O>O=OPPRW[[003677#=	hhvqrh22t%**%PT*UU	'''''r#   r$   c                 ^    | j                                         }|                     |          S r   )r   r   r  r  s     r"   rC   zBeta.deterministic_sample
  s%    y~~||D!!!r#   c                 Z    |                      | j                                                  S r   )r  r   r)   r'   s    r"   r   zBeta._build_sample_op  s"    ||DI,,..///r#   rD   c                     |                      |          }t          j                            | j                            |          d          S r   )r  r@   rA   rT   r   r	  )r    rD   r  s      r"   r   z	Beta.logp  s@     NN1--w!!$)"4"45F"G"Gb!QQQr#   r  c                 6    || j         | j        z
  z  | j        z   S r   )r   r   )r    r  s     r"   r  zBeta._squash  s    TY12TX==r#   r   c                 6    || j         z
  | j        | j         z
  z  S r   )r   r   )r    r   s     r"   r  zBeta._unsquash  s    !di$(&:;;r#   rh   ri   c                 R    t          j        | j        t           j                  dz  S r   r   rg   s     r"   rj   z Beta.required_model_output_shape  r   r#   )r7   r4   )r,   r-   r.   r/   r   r   r   rl   r   r   r   rC   r   r   r   r  r  rm   r   r   r   r   r   r   r   rj   r0   r1   s   @r"   r   r     s         ( (Z ( ( 	(
 ( ( ( ( ( (" X !!"j " " " "!" X"##0* 0 0 0 $#0 X !!Rj RZ R R R "!R>* > > > > ><
 <z < < < < X !!?i?/>?	sBJ	? ? ? "! \? ? ? ? ?r#   r   c            
       
   e Zd ZdZ ee          defd            Z ee          dedefd            Z	 ee          defd            Z
e ee          dej        dedeeej        f         fd	                        Zd
S )DeterministiczAction distribution that returns the input values directly.

    This is similar to DiagGaussian with standard deviation zero (thus only
    requiring the "mean" values as NN output).
    r$   c                     | j         S r   r   r'   s    r"   rC   z"Deterministic.deterministic_sample.  
    {r#   rD   c                 @    t                               | j                  S r   )r@   
zeros_liker   rL   s     r"   r   zDeterministic.logp2  s    }}T[)))r#   c                     | j         S r   r/  r'   s    r"   r   zDeterministic._build_sample_op6  r0  r#   rh   ri   c                 L    t          j        | j        t           j                  S Nr   r   rg   s     r"   rj   z)Deterministic.required_model_output_shape:      
 w|)::::r#   N)r,   r-   r.   r/   r   r   r   rC   r   r   r   rm   r   r   r   r   r   r   r   rj   r{   r#   r"   r-  r-  &  s         X !!j    "! X"##*j *Z * * * $#* X"##*    $# X !!;i;/>;	sBJ	; ; ; "! \; ; ;r#   r-  c                   0   e Zd ZdZd Z ee          d             Z ee          d             Z ee          d             Z	 ee          d             Z
 ee          d             Z ee          d             Z ee          d	             Zd
S )MultiActionDistributionzAction distribution that operates on a set of actions.

    Args:
        inputs (Tensor list): A list of tensors from which to compute samples.
    c                *   t          j        | |           t          |          | _        t	          j        |t          j                  | _        t          	                    || j        d          }t          j        fd||          | _        d S )Nr   r=   r>   c                      | |fi S r   r{   )r   r}   kwargsr   s     r"   <lambda>z2MultiActionDistribution.__init__.<locals>.<lambda>T  s    fe!>!>v!>!> r#   )r   r   r   action_space_structr   arrayrK   rx   r@   r   treemap_structureflat_child_distributions)r    r   r   child_distributionsrx   rh   r;  split_inputss     `   ` r"   r   z MultiActionDistribution.__init__J  s     	#D&%888#=l#K#K (:RX>>>xxax@@(,(:>>>>>)
 )
%%%r#   c                    t          |t          j        t          j        f          rg }| j        D ]}t          |t                    r|                    d           -t          |t                    r9|j	        2|                    t          j
        |j	        j                             {|                                }t          |j                  dk    r|                    d           |                    t                              |          d                    t                              ||d          }nt          j        |          }d }t          j        ||| j                  }t%          j        d |          S )Nr=   r>   c                    t          |t                    rYt                              t	          | j                  dk    rt                              | d          n| t          j                  } |                    |           S r  )	r   r3   r@   rJ   r   r   ra   rK   r   )valr   s     r"   map_z*MultiActionDistribution.logp.<locals>.map_q  sg    $,, gg03CI0B0BBJJsJ,,,RX  99S>>!r#   c                     | |z   S r   r{   abs     r"   r<  z.MultiActionDistribution.logp.<locals>.<lambda>}  
    QU r#   )r   r@   r   r   r   rA  r3   appendrw   rh   r   r   r)   r   r   r?  flattenr@  	functoolsreduce)r    rD   split_indicesr   r)   split_xrG  
flat_logpss           r"   r   zMultiActionDistribution.logpY  sn    a")RZ011 	&M5 B BdK00 B!((++++t%566
B;?;L;X!((1B1H)I)IJJJJ![[]]F6<((A--%,,Q////%,,RXXf-=-=a-@AAAAhhq-ah88GG l1ooG	" 	" 	" 'gt7TUU
 2 2J???r#   c                 v    d t          | j        |j                  D             }t          j        d |          S )Nc                 >    g | ]\  }}|                     |          S r{   r   )r|   dos      r"   r~   z.MultiActionDistribution.kl.<locals>.<listcomp>  s6     
 
 
1 DDGG
 
 
r#   c                     | |z   S r   r{   rI  s     r"   r<  z,MultiActionDistribution.kl.<locals>.<lambda>  rL  r#   )r   rA  rO  rP  )r    rZ   kl_lists      r"   r_   zMultiActionDistribution.kl  sN    
 
-u/M 
 
 
  2 2G<<<r#   c                 P    d | j         D             }t          j        d |          S )Nc                 6    g | ]}|                                 S r{   r   )r|   rV  s     r"   r~   z3MultiActionDistribution.entropy.<locals>.<listcomp>  s     KKK		KKKr#   c                     | |z   S r   r{   rI  s     r"   r<  z1MultiActionDistribution.entropy.<locals>.<lambda>  rL  r#   )rA  rO  rP  )r    entropy_lists     r"   rY   zMultiActionDistribution.entropy  s/    KKT-JKKK 2 2LAAAr#   c                 l    t          j        | j        | j                  }t          j        d |          S )Nc                 *    |                                  S r   r   ss    r"   r<  z0MultiActionDistribution.sample.<locals>.<lambda>  s    AHHJJ r#   r?  unflatten_asr=  rA  r@  r    rB  s     r"   r)   zMultiActionDistribution.sample  s:    "/$d&C
 
 !"6"68KLLLr#   c                 l    t          j        | j        | j                  }t          j        d |          S )Nc                 *    |                                  S r   r   r`  s    r"   r<  z>MultiActionDistribution.deterministic_sample.<locals>.<lambda>  s    a,,.. r#   rb  rd  s     r"   rC   z,MultiActionDistribution.deterministic_sample  s@    "/$d&C
 
 !..0C
 
 	
r#   c                     | j         d                                         }| j         dd          D ]}||                                z  }|S )Nr   r=   )rA  r+   )r    pr   s      r"   r+   z+MultiActionDistribution.sampled_action_logp  sS    )!,@@BB.qrr2 	) 	)A&&(((AAr#   c                 L    t          j        | j        t           j                  S r5  )r   r   rx   rK   )r    rh   ri   s      r"   rj   z3MultiActionDistribution.required_model_output_shape  s    vdoRX6666r#   N)r,   r-   r.   r/   r   r   r   r   r_   rY   r)   rC   r   r+   rj   r{   r#   r"   r8  r8  B  sH        
 
 
 X !!#@ #@ "!#@J X !!= = "!= X !!B B "!B X !!M M "!M X !!
 
 "!
 X"##  $# X !!7 7 "!7 7 7r#   r8  c            
           e Zd ZdZdee         def fdZ ee	          defd            Z
 ee	          dedefd            Z ee	          defd	            Z ee	          d
e	defd            Z ee          defd            Ze ee	          dej        dedeeej        f         fd                        Z xZS )	DirichletzDirichlet distribution for continuous actions that are between
    [0,1] and sum to 1.

    e.g. actions that represent resource allocation.r   r   c                     d| _         t                              |          | j         z   }t          j                            |dd          | _        t                                          ||           dS )aB  Input is a tensor of logits. The exponential of logits is used to
        parametrize the Dirichlet distribution as all parameters need to be
        positive. An arbitrary small epsilon is added to the concentration
        parameters to be zero due to numerical error.

        See issue #4440 for more details.
        gHz>TF)concentrationvalidate_argsallow_nan_statsN)	epsilonr@   rS   tf1r   rk  r   r   r   )r    r   r   rm  r!   s       r"   r   zDirichlet.__init__  sl     v5%//'! 0 
 
	
 	.....r#   r$   c                 T    t           j                            | j        j                  S r   )r@   rH   r   r   rm  r'   s    r"   rC   zDirichlet.deterministic_sample  s    u}}TY4555r#   rD   c                     t                               || j                  }|t                               |dd          z  }| j                            |          S )Nr   TrO   )r@   maximumrp  rT   r   r	  rL   s     r"   r   zDirichlet.logp  sK    
 JJq$,''ab4888y!!!$$$r#   c                 4    | j                                         S r   )r   rY   r'   s    r"   rY   zDirichlet.entropy  s    y  """r#   rZ   c                 @    | j                             |j                   S r   )r   kl_divergencer   s     r"   r_   zDirichlet.kl  s    y&&uz222r#   c                 4    | j                                         S r   r   r'   s    r"   r   zDirichlet._build_sample_op  r   r#   rh   ri   c                 L    t          j        | j        t           j                  S r5  r   rg   s     r"   rj   z%Dirichlet.required_model_output_shape  r6  r#   )r,   r-   r.   r/   r   r   r   r   r   r   rC   r   rY   r_   r   r   rm   r   r   r   r   r   r   r   rj   r0   r1   s   @r"   rk  rk    s       8 8
/tJ/ / / / / / / /" X !!6j 6 6 6 "!6 X !!%j %Z % % % "!% X !!# # # # "!# X !!3* 3z 3 3 3 "!3 X"##"* " " " $#" X !!;i;/>;	sBJ	; ; ; "! \; ; ; ; ;r#   rk  )1rO  rA   r   typingr   	gymnasiumr   numpyr   r?  ray.rllib.models.action_distr   ray.rllib.models.modelv2r   ray.rllib.utilsr   r   r	   ray.rllib.utils.annotationsr
   r   ray.rllib.utils.frameworkr   r   "ray.rllib.utils.spaces.space_utilsr   ray.rllib.utils.typingr   r   r   r   r   rq  r@   tfvr   r   r3   rl   ru   rw   r   r   r   r   r   r-  r8  rk  r{   r#   r"   <module>r     s                            ; ; ; ; ; ; , , , , , , N N N N N N N N N N = = = = = = = = C C C C C C C C I I I I I I R R R R R R R R R R R R R R}Rn + + + + +- + + +8 / / / / /& / / /d &e & & & & \- \- \- \- \-+ \- \- \-~ '/ '/ '/ '/ '/K '/ '/ '/T D D D D D( D D DN D? D? D? D? D?' D? D? D?N _? _? _? _? _?+ _? _? _?D 4? 4? 4? 4? 4? 4? 4? 4?n ; ; ; ; ;( ; ; ;6 d7 d7 d7 d7 d72 d7 d7 d7N 5; 5; 5; 5; 5;$ 5; 5; 5; 5; 5;r#   