
    &`i                         d dl Z d dlmZ d dlmZ d dlmZmZ d dlm	Z	 d dl
mZ d dlmZmZmZmZmZmZmZ  G d d	e          Z G d
 de          ZdS )    N)Self)	Algorithm)AlgorithmConfigNotProvided)RLModuleSpec)override)ENV_RUNNER_RESULTSENV_RUNNER_SAMPLING_TIMERLEARNER_RESULTSLEARNER_UPDATE_TIMERNUM_ENV_STEPS_SAMPLED_LIFETIMESYNCH_WORKER_WEIGHTS_TIMERTIMERSc                        e Zd ZU dZdZeed<   d fd	Z ee	          e
ddef fd            Z ee	          d	             Z ee	          d
             Z xZS )	VPGConfigzA simple VPG (vanilla policy gradient) algorithm w/o value function support.

    Use for testing purposes only!

    This Algorithm should use the VPGTorchLearner and VPGTorchRLModule
    Treport_mean_weightsNc                     t                                          |pt                     d| _        d| _        d| _        d S )N)
algo_class
   complete_episodes   )super__init__VPGnum_episodes_per_train_batch
batch_modenum_env_runners)selfr   	__class__s     }/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/rllib/examples/algorithms/classes/vpg.pyr   zVPGConfig.__init__   sF    J$5#666 -/) .  !    )r   returnc                Z     t                      j        di | |t          ur|| _        | S )am  Sets the training related configuration.

        Args:
            num_episodes_per_train_batch: The number of complete episodes per train
                batch. VPG requires entire episodes to be sampled from the EnvRunners.
                For environments with varying episode lengths, this leads to varying
                batch sizes (in timesteps) as well possibly causing slight learning
                instabilities. However, for simplicity reasons, we stick to collecting
                always exactly n episodes per training update.

        Returns:
            This updated AlgorithmConfig object.
         )r   trainingr   r   )r   r   kwargsr   s      r    r%   zVPGConfig.training+   s:      	""6"""'{::0LD-r!   c                 ~    | j         dk    rddlm} t          |ddi          }nt	          d| j                    |S )Ntorchr   )VPGTorchRLModule
hidden_dim@   )module_classmodel_configUnsupported framework: )framework_str3ray.rllib.examples.rl_modules.classes.vpg_torch_rlmr)   r   
ValueError)r   r)   specs      r    get_default_rl_module_specz$VPGConfig.get_default_rl_module_specB   sr    ((       -*B/  DD
 Kt7IKKLLLr!   c                 V    | j         dk    rddlm} |S t          d| j                    )Nr(   r   )VPGTorchLearnerr.   )r/   5ray.rllib.examples.learners.classes.vpg_torch_learnerr5   r1   )r   r5   s     r    get_default_learner_classz#VPGConfig.get_default_learner_classR   sP    ((      #"Kt7IKKLLLr!   N)__name__
__module____qualname____doc__r   bool__annotations__r   r   r   r   r   r%   r3   r7   __classcell__)r   s   @r    r   r      s           !%$$$! ! ! ! ! ! Xo7B   QU      , Xo   XoM M M M M M Mr!   r   c                   |    e Zd Ze ee          defd                        Z ee          dd            Zd Z	dS )r   r"   c                     t                      S r8   )r   )clss    r    get_default_configzVPG.get_default_config_   s     {{r!   Nc           	         | j                             t          t          f          5  |                                 \  }}ddd           n# 1 swxY w Y   | j                             |t                     | j                             dt          t          t          |                    dd           | j                             dt          t          t          |                    d	           | j                             t          t          f          5  | j                            |t          | j                             t          t          f          i
          }ddd           n# 1 swxY w Y   | j                             |t                      | j                             t          t"          f          5  | j                            | j        d           ddd           dS # 1 swxY w Y   dS )a  Override of the training_step method of `Algorithm`.

        Runs the following steps per call:
        - Sample B timesteps (B=train batch size). Note that we don't sample complete
        episodes due to simplicity. For an actual VPG algo, due to the loss computation,
        you should always sample only completed episodes.
        - Send the collected episodes to the VPG LearnerGroup for model updating.
        - Sync the weights from LearnerGroup to all EnvRunners.
        N)key%episode_timesteps_sampled_mean_win100meand   )reducewindowepisode_timesteps_sampled_emag?)	ema_coeff)episodes	timestepsT)from_worker_or_learner_groupinference_only)metricslog_timer   r
   _sample_episodes	aggregater	   	log_valuesummaplenr   learner_groupupdater   peekr   r   env_runner_groupsync_weights)r   rM   env_runner_resultslearner_resultss       r    training_stepzVPG.training_stepd   s    \""F,E#FGG 	C 	C+/+@+@+B+B(H(	C 	C 	C 	C 	C 	C 	C 	C 	C 	C 	C 	C 	C 	C 	C 	17IJJJ
 	3C""##	 	 	
 	
 	
 	+C""## 	 	
 	
 	
 \""F,@#ABB 
	 
	"077!2))/1OP  8 	 	O
	 
	 
	 
	 
	 
	 
	 
	 
	 
	 
	 
	 
	 
	 
	 	ODDD \""F,F#GHH 	 	!..-1-?# /   	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	s6   AAAAE,,E03E0>"G--G14G1c                 
   | j         j        | j         j        pdz  | j                            fd| j                                        dk              }t          j        d |D                       }d |D             }||fS )Nr   c                 X    |                                |                                 fS )N)num_episodes)sampleget_metrics)
env_runnernum_episodes_per_env_runners    r    <lambda>z&VPG._sample_episodes.<locals>.<lambda>   s.    !!/J!KK&&((  r!   r   )local_env_runnerc                     g | ]
}|d          S )r   r$   .0ss     r    
<listcomp>z(VPG._sample_episodes.<locals>.<listcomp>   s     < < <!1 < < <r!   c                     g | ]
}|d          S )r   r$   rk   s     r    rn   z(VPG._sample_episodes.<locals>.<listcomp>   s    222qt222r!   )configr   r   r\   foreach_env_runnernum_remote_workerstreeflatten)r   sampled_datarM   stats_dictsrg   s       @r    rS   zVPG._sample_episodes   s    &*k&NK',1'
# ,??    "2EEGG1L @ 	
 	
 < < <| < < <==22\222$$r!   )r"   N)
r9   r:   r;   classmethodr   r   r   rC   r`   rS   r$   r!   r    r   r   ^   s        Xi9     [ Xi4 4 4 4l% % % % %r!   r   )rs   typing_extensionsr   ray.rllib.algorithmsr   %ray.rllib.algorithms.algorithm_configr   r   "ray.rllib.core.rl_module.rl_moduler   ray.rllib.utils.annotationsr   ray.rllib.utils.metricsr	   r
   r   r   r   r   r   r   r   r$   r!   r    <module>r~      sA    " " " " " " * * * * * * N N N N N N N N ; ; ; ; ; ; 0 0 0 0 0 0                 HM HM HM HM HM HM HM HMVQ% Q% Q% Q% Q%) Q% Q% Q% Q% Q%r!   