
    &`i9                         d Z ddlZddlmZmZ ddlmZ ddlm	Z	 ddl
mZ ddlmZmZ ddlmZ  e            \  ZZ	 dd
Zdddd	ddZddddedefdZddd	ddededdfdZd Zd Zd Zd ZdS )a  
[1] Mastering Diverse Domains through World Models - 2023
D. Hafner, J. Pasukonis, J. Ba, T. Lillicrap
https://arxiv.org/pdf/2301.04104v1.pdf

[2] Mastering Atari with Discrete World Models - 2021
D. Hafner, T. Lillicrap, M. Norouzi, J. Ba
https://arxiv.org/pdf/2010.02193.pdf
    N)create_cartpole_dream_imagecreate_frozenlake_dream_image)DEFAULT_MODULE_ID)Columns)try_import_torch)LEARNER_RESULTSREPLAY_BUFFER_RESULTS)inverse_symlogtorchc           	      v   | j         }|d         }|d         }|dk    r%t          t          |j        j                                                            j        }|j                            t                              |           	                    ||z  df          
                    |          t                              |          	                    ||z  f|j         dd         z             
                    |                                                                                                                    }	n[|j                            | 	                    ||z  df          |	                    ||z  f|j         dd         z                       }	t          j	        |	||f|z             }
|
S )Returnsr      r      N)hz)shapenextiterworld_modeldecoder
parametersdevicer   
from_numpyreshapetodetachcpunumpynp)	h_t0_to_H	z_t0_to_Hdreamer_modelobs_dims_shape	frameworkr   TBr   !reconstructed_obs_distr_means_TxBreconstructed_obs_T_Bs              /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/rllib/algorithms/dreamerv3/utils/summaries.pyreconstruct_obs_from_h_and_zr+      s    OEaAaA
 Gd=4<GGIIJJKKR%--""9--55q1ubkBBEEfMM""9--!a%IOABB$7788F .   VXXSUUUWW 	*) -:,E,M,MQ,,Q9?122+> >?? -N -
 -
) J)Aq6N+B  !     )r   T)batch_indicesdescinclude_imagesr%   c                    |sd S | d         }t          |d         |d         |||          }	|                    d          rt          nt          }
|D ].}g }t	          t          |	          dz
            D ]}|                     |
|	|         |         |d         |         |         |d         |         |         |d	         |dz            |         d
| v r| d
         |         |         nd |d         |dz            |         | d         |         |         |d         |         |         d	  	                                                   |                     d|rd|z   nd d| t          j
        |d          i           0d S )N
dream_datah_states_t0_to_H_BxTz_states_prior_t0_to_H_BxTr!   r"   r#   r$   r%   CartPoler   values_dreamed_t0_to_H_BxT actions_ints_dreamed_t0_to_H_BxTrewards_dreamed_t0_to_H_BxT DISAGREE_intrinsic_rewards_H_BxTcontinues_dreamed_t0_to_H_BxTVALUE_TARGETS_H_BxTT)	dreamed_obs	dreamed_V	dreamed_adreamed_r_tp1dreamed_ri_tp1dreamed_c_tp1value_target	initial_h	as_tensordreamed_trajectories_ _Baxis)r+   
startswithr   r   rangelenappendr   updater    concatenate)resultsenvr#   r$   r-   r.   r/   r%   r1   dreamed_obs_H_Bfuncbimagests                 r*   report_dreamed_trajectoryrX   H   s     &J2349:#%  O >>*%%	+##* 	   
  
s?++a/00 	 	AMM / 21 5()EFqI!L)*LMaPQRS#-.K#LQQRU#STU#V
 >HH   BCAFqII! ##BCAEJ1M!()>!?!B1!E()?@CAF"#  $ %'''   . 	HT'ADrHHQHHN6222	
 	
 	
 	
5 
  
r,   )
symlog_obs	do_reportrY   rZ   c           
         t           t          df}|                     |dg          d         }|                     |d           d| d}|s|                     |d           dS t	          | t          j        |d	|f|t          j                 j	        d
d         z             |t          j                 dd	         ||           dS )a  Summarizes sampled data (from the replay buffer) vs world-model predictions.

    World model predictions are based on the posterior states (z computed from actual
    observation encoder input + the current h-states).

    Observations: Computes MSE (sampled vs predicted/recreated) over all features.
    For image observations, also creates direct image comparisons (sampled images
    vs predicted (posterior) ones).
    Rewards: Compute MSE (sampled vs predicted).
    Continues: Compute MSE (sampled vs predicted).

    Args:
        metrics: The MetricsLogger object of the DreamerV3 algo.
        sample: The sampled data (dict) from the replay buffer. Already torch-tensor
            converted.
        batch_size_B: The batch size (B). This is the number of trajectories sampled
            from the buffer.
        batch_length_T: The batch length (T). This is the length of an individual
            trajectory sampled from the buffer.
        do_report: Whether to actually log the report (default). If this is set to
            False, this function serves as a clean-up on the given metrics, making sure
            they do NOT contain anymore any (spacious) data relevant for producing
            the report/videos.
    /WORLD_MODEL_fwd_out_obs_distribution_means_b0xTNdefaultr   F	key_error.WORLD_MODEL_sampled_vs_predicted_posterior_b0x_videosr   r   r   metricscomputed_float_obs_B_T_dimssampled_obs_B_T_dimsmetrics_keyrY   )
r   r   peekdelete_report_obsr    r   r   OBSr   )	rd   samplebatch_size_Bbatch_length_TrY   rZ   fwd_output_key*predicted_observation_means_single_examplefinal_result_keys	            r*   report_predicted_vs_sampled_obsrr      s   D 	9N 29 2> 2 22
. NN>UN333 	QPPP   '5999$&J6&"5";ABB"??%
 %

 $GK015$
 
 
 
 
 
r,   )rY   rZ   r%   returnc                 `   |                      t          t          dfi           }|                     t          t          dd           d| d}	d| d}
d| d}|sG|                     |	d           |                     |
d           |                     |d           d	S t	          |d
         d         |d         d         ||t
          j                 j        dd	         |          }|}||z   }t          | t          j
        |dd          dd         |t
          j                 dd||f         |	|           t          | |d         d         |t
          j                 d	d	||f         |
           t          | |d         d         d|d         z
  d	d	||f         |           d	S )a  Logs dreamed observations, rewards, continues and compares them vs sampled data.

    For obs, we'll try to create videos (side-by-side comparison) of the dreamed,
    recreated-from-prior obs vs the sampled ones (over dreamed_T timesteps).

    Args:
        metrics: The MetricsLogger object of the DreamerV3 algo.
        sample: The sampled data (dict) from the replay buffer. Already torch-tensor
            converted.
        burn_in_T: The number of burn-in timesteps (these will be skipped over in the
            reported video comparisons and MSEs).
        dreamed_T: The number of timesteps to produce dreamed data for.
        dreamer_model: The DreamerModel to use to create observation vectors/images
            from dreamed h- and (prior) z-states.
        symlog_obs: Whether to inverse-symlog the computed observations or not. Set this
            to True for environments, in which we should symlog the observations.
        do_report: Whether to actually log the report (default). If this is set to
            False, this function serves as a clean-up on the given metrics, making sure
            they do NOT contain anymore any (spacious) data relevant for producing
            the report/videos.
    r1   r]   Fr_   %EVALUATION_sampled_vs_dreamed_prior_H_obs_rewards_MSE_continues_MSENh_states_t0_to_H_Bx1r   z_states_prior_t0_to_H_Bx1r   r4   r   rc   rewards_dreamed_t0_to_H_Bx1)rd   computed_rewardssampled_rewardsrg   continues_dreamed_t0_to_H_Bx1      ?is_terminated)rd   computed_continuessampled_continuesrg   )rh   r   r   ri   r+   r   rk   r   rj   r    swapaxes_report_rewardsREWARDS_report_continues)rd   rl   	burn_in_T	dreamed_Tr#   rY   rZ   r%   r1   final_result_key_obsfinal_result_key_rewfinal_result_key_contrS   t0tHs                  r*   )report_dreamed_eval_trajectory_vs_samplesr      s+   @ 	+\:   J NN?$5|uNUUUR9RRRG	GGG  	J	III   +u===+u===,>>> 334Q79:1=#gk*04  O 
B	iB$&KA$F$FaC%
 $GK01be<(	 	 	 	 #$AB1Ew/2b59(	    %&EFqI!88!!!RU(C)	     r,   c                     |                                 }|                                }|                                }|                                }|                     |j        ||||dt          d           d S )N)capacitysize_num_episodessize_timestepsreplayed_stepsadded_stepsr   )keywindow)get_num_episodesget_num_timestepsget_sampled_timestepsget_added_timestepslog_dictr   r	   )rd   replay_bufferepisodes_in_bufferts_in_bufferr   r   s         r*   !report_sampling_and_replay_bufferr   !  s    &7799 2244L"88::N3355K %.!3*,&	
 	
 "  
 
 
 
 
r,   c                    t          |j                  dv r/t          |j                  dk    rdnd}|rt          |          }|sX|dz   dz  }|dz   dz  }t          j        |dd                              t          j                  }t          j        ||          }t          j        |dd                              t          j                  }t          j        ||          }t          j        ||gd	
          }t          |j                  dk    rt          j	        |d	          }| 
                    ||dd           dS dS )a  Summarizes computed- vs sampled observations: MSE and (if applicable) images.

    Args:
        metrics: The MetricsLogger object of the DreamerV3 algo.
        computed_float_obs_B_T_dims: Computed float observations
            (not clipped, not cast'd). Shape=(B, T, [dims ...]).
        sampled_obs_B_T_dims: Sampled observations (as-is from the environment, meaning
            this could be uint8, 0-255 clipped images). Shape=(B, T, [dims ...]).
        metrics_key: The metrics key (or key sequence) under which to log ths resulting
            video sequence.
        symlog_obs: Whether to inverse-symlog the computed observations or not. Set this
            to True for environments, in which we should symlog the observations.
    )      r   )r   r   r   r      )r   r   r   r   r      g        g     o@r   rI   r   item_seriesr   )reducer   N)rM   r   r
   r    clipastypeuint8	transposerP   expand_dims	log_value)rd   re   rf   rg   rY   transpose_axescomputed_imagessampled_vs_computed_imagess           r*   rj   rj   5  s   . %&&&00  ##7#=>>!CCOO 	  	V*89T*U*U'  	V+F+LPS*S'$83$>##E #%7+?e#L#L#S#S$ $  $&<0Dn#U#U '"=sEJJQQH
 
 ,GG &(^23&
 &
 &
"
 #)**e33)+8RTV)W)W&& 	 	 	
 	
 	
 	
 	
A 10r,   c                     t          j        t          j        ||z
                      }t          j        |          }|                     ||d           d S Nr   )r   )r    meansquarer   )rd   r|   r}   rg   mse_sampled_vs_computed_rewardss        r*   r   r   t  sl     ')g
	"_455' '# ')g.M&N&N#'      r,   c                     t          j        t          j        ||                    |j                  z
                      }|                     ||d           d S r   )r    r   r   r   dtyper   )rd   r   r   rg   !mse_sampled_vs_computed_continuess        r*   r   r     sr     )+
	!2!9!9:L:R!S!SS	
 	
) )%
 )      r,   )r   )__doc__r   r    .ray.rllib.algorithms.dreamerv3.utils.debuggingr   r   ray.rllib.corer   ray.rllib.core.columnsr   ray.rllib.utils.frameworkr   ray.rllib.utils.metricsr   r	   ray.rllib.utils.torch_utilsr
   r   rF   r+   rX   boolrr   r   r   rj   r   r    r,   r*   <module>r      s               - , , , , , * * * * * * 6 6 6 6 6 6        7 6 6 6 6 6q )! )! )! )!d 	<
 <
 <
 <
 <
J = = = = = = = =N W W W W W 
W W W Wt  (<
 <
 <
~  $    r,   