
    &`i                        d dl mZmZ d dlZd dlZd dlmZmZm	Z	m
Z
 d dlmZ d dlmZ d dlmZ d dlmZmZ d dlmZ d d	lmZmZmZ d d
lmZ d dlmZ d dlmZ  e            \  Z Z!Z" e            \  Z#Z$e G d de                      Z%dS )    )OptionalUnionN)BoxDiscreteMultiDiscreteSpace)ActionDistribution)ModelV2)force_tuple)OldAPIStackoverride)Exploration)
TensorTypetry_import_tftry_import_torch)Simplex)get_base_struct_from_space)zero_logps_from_actionsc                        e Zd ZdZdededee         f fdZ e	e
          ddded	eeef         d
efd            Zded
eeeef                  fdZded
efdZ xZS )RandomzA random action selector (deterministic/greedy for explore=False).

    If explore=True, returns actions randomly from `self.action_space` (via
    Space.sample()).
    If explore=False, returns the greedy/max-likelihood action.
    action_spacemodel	frameworkc                t     t                      j        d|||d| t          | j                  | _        dS )zInitialize a Random Exploration object.

        Args:
            action_space: The gym action space used by the environment.
            framework: One of None, "tf", "torch".
        )r   r   r   N )super__init__r   r   action_space_struct)selfr   r   r   kwargs	__class__s        v/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/rllib/utils/exploration/random.pyr   zRandom.__init__   sT     	 	
%Ui	
 	
KQ	
 	
 	
 $>d>O#P#P       T)exploreaction_distributiontimestepr$   c                l    | j         dv r|                     ||          S |                     ||          S )N)tf2tf)r   get_tf_exploration_action_opget_torch_exploration_action)r   r%   r&   r$   s       r"   get_exploration_actionzRandom.get_exploration_action-   s?     >]**445H'RRR445H'RRRr#   action_distc                       fd}fd}t                               t          |t                    r&t                               |t           j                  n|||          }t          |          }||fS )Nc            
      h   dt                              j        t          j        dd                               } t          j        j                  t          |           dz   k    r%t                              j                  d         fd}t          j
        |j                  }|S )N   model_configr   c                 L     j         pd}t           t                    r5t          j                            f j         z    j         j                  S t           t                    r.t          	                     fd j
        D             d          S t           t                    r j                                        r j                                        r j        j                            d          rLt          j                            f|z    j        j        d          j        j        d          j                  S t          j                            f|z    j         j         j                  S t          j                            f|z    j        	          S t           t*                    sJ d
                                           t          j                            t          j                            f|z   dd j                            S )N)r0   shapemaxvaldtypec                 b    g | ]+}t           j                            d f|j                  ,S )r0   r3   )r)   randomuniformr6   ).0n
batch_size	components     r"   
<listcomp>zbRandom.get_tf_exploration_action_op.<locals>.true_fn.<locals>.random_component.<locals>.<listcomp>Z   sO        !" I--'11oay .    r#   r0   )axisintr   )r4   minvalr5   r6   )r4   r6   z<Unsupported distribution component '{}' for random sampling!g        g      ?)r4   
isinstancer   r)   r8   r9   r;   r6   r   concatnvecr   bounded_aboveallbounded_belowname
startswithlowflathighnormalr   formatnnsoftmax)r=   r4   r<   s   ` r"   random_componentzNRandom.get_tf_exploration_action_op.<locals>.true_fn.<locals>.random_componentM   sJ    "/4i22 09,,)mio=({'o -   
  	=99 *99     &/^	    %     	3//   .2244 9P9T9T9V9V $?/::5AA #%9#4#4'1me&;'0}'9!'<'0~':1'=&/o	 $5 $ $  $&9#4#4'1me&;'0}'0~&/o	 $5 $ $   "y//#--%"7y  0      &i99  $$*F9$5$5 9 5==	))#--%"7#&#&"+/	 *    r#   )r   required_model_output_shaper   getattrr   leninputsr4   r)   treemap_structurer   )reqrQ   actionsr<   r-   r   s      @r"   true_fnz4Random.get_tf_exploration_action_op.<locals>.true_fn@   s    J77%wtz>4'P'P  C ;%+,,C1<<XXk&899!<
5 5 5 5 5n ()94;STTGNr#   c                  ,                                      S )N)deterministic_sample)r-   s   r"   false_fnz5Random.get_tf_exploration_action_op.<locals>.false_fn   s    33555r#   )r6   )predrZ   r]   )r)   condrB   boolconstantr   )r   r-   r$   rZ   r]   actionlogps   ``     r"   r*   z#Random.get_tf_exploration_action_op;   s    
E	 E	 E	 E	 E	 E	N	6 	6 	6 	6 	6 '4((WBG444  
 
 'v..t|r#   c           
          |rt          |                     j        t           j        dd                               }t          |j        j                  t          |          dz   k    r@|j        j        d         }t          j	         fdt          |          D                       }n j                                        }t                              |                               j                  }n|                                }t                              |                                d         ft          j         j                  }||fS )Nr1   r0   r   c                 B    g | ]}j                                         S r   )r   sample)r:   _r   s     r"   r>   z7Random.get_torch_exploration_action.<locals>.<listcomp>   s(    TTTQd/6688TTTr#   )r6   device)r   rR   r   rS   r   rT   rU   r4   npstackrangerf   torch
from_numpytorh   r\   zerossizefloat32)r   r-   r$   rX   r<   arb   rc   s   `       r"   r+   z#Random.get_torch_exploration_action   s'     	877%wtz>4'P'P  C ;%+,,C1<<(/5a8
HTTTT%
BSBSTTTUU%,,..%%a((++DK88FF 5577F{{FKKMM!,.emDK{XXt|r#   )__name__
__module____qualname____doc__r   r
   r   strr   r   r   r	   r   r@   r   r`   r,   r*   r+   __classcell__)r!   s   @r"   r   r      s6        Q!Q-4QAI#Q Q Q Q Q Q Xk S S S 0S Z(	S
 S S S SX'X %j 012X X X Xt-8<       r#   r   )&typingr   r   numpyri   rV   gymnasium.spacesr   r   r   r   ray.rllib.models.action_distr	   ray.rllib.models.modelv2r
   ray.rllib.utilsr   ray.rllib.utils.annotationsr   r   'ray.rllib.utils.exploration.explorationr   ray.rllib.utils.frameworkr   r   r   ray.rllib.utils.spaces.simplexr   "ray.rllib.utils.spaces.space_utilsr   ray.rllib.utils.tf_utilsr   tf1r)   tfvrl   rg   r   r   r#   r"   <module>r      sw   " " " " " " " "      @ @ @ @ @ @ @ @ @ @ @ @ ; ; ; ; ; ; , , , , , , ' ' ' ' ' ' = = = = = = = = ? ? ? ? ? ? Q Q Q Q Q Q Q Q Q Q 2 2 2 2 2 2 I I I I I I < < < < < <}Rq S S S S S[ S S S S Sr#   