
    &`izl                     \   d dl Z d dlZd dlZd dlmZmZmZmZmZm	Z	m
Z
mZ d dlZd dlZd dlZd dlmZ d dlmZmZ d dlmZ d dlmZ d dlmZmZ d dlmZ d d	lm Z  d d
l!m"Z"m#Z# d dl$m%Z% erd dl&m'Z' ej(        ej(        ej)        ej)        ej*        ej*        ej+        ej+        ej,        ej,        ej-        ej-        ej.        ej.        ej/        ej/        ej0        ej0        ej1        ej1        ej2        ej2        ddddddiZ3 ej4        e5          Z6 e%d           G d d                      Z7dS )    N)TYPE_CHECKINGAnyDictListOptionalSetTupleUnion)Columns)MultiRLModuleMultiRLModuleSpec)SingleAgentEpisode)flatten_dict)OverrideToImplementCustomLogic5OverrideToImplementCustomLogic_CallToSuperRecommended)unpack_if_needed)ReplayBuffer)EpisodeTypeModuleID)	PublicAPI)AlgorithmConfigagent_indexdones	unroll_idalpha)	stabilityc                   4   e Zd ZdZedddddddeeej        ej        f                  dee	         dee
eef                  d	e
eef         f
d
            Zede
eej        f         de
eej        f         fd            Zedefd            Zede
eef         fd            Zdee         dee         fdZddefdZeeedddddfdede
eee ej        f         f         de
eef         dedeee                  dee         dej        dej        d	e
eef         de
eee!         f         fd                        Z"eeeddfdede
eee ej        f         f         de
eef         dedeee                  de
eee!         f         fd                        Z#dS )OfflinePreLearnera?  Class that coordinates data transformation from dataset to learner.

    This class is an essential part of the new `Offline RL API` of `RLlib`.
    It is a callable class that is run in `ray.data.Dataset.map_batches`
    when iterating over batches for training. It's basic function is to
    convert data in batch from rows to episodes (`SingleAGentEpisode`s
    for now) and to then run the learner connector pipeline to convert
    further to trainable batches. These batches are used directly in the
    `Learner`'s `update` method.

    The main reason to run these transformations inside of `map_batches`
    is for better performance. Batches can be pre-fetched in `ray.data`
    and therefore batch trransformation can be run highly parallelized to
    the `Learner''s `update`.

    This class can be overridden to implement custom logic for transforming
    batches and make them 'Learner'-ready. When deriving from this class
    the `__call__` method and `_map_to_episodes` can be overridden to induce
    custom logic for the complete transformation pipeline (`__call__`) or
    for converting to episodes only ('_map_to_episodes`).

    Custom `OfflinePreLearner` classes can be passed into
    `AlgorithmConfig.offline`'s `prelearner_class`. The `OfflineData` class
    will then use the custom class in its data pipeline.
    N)spacesmodule_specmodule_stateconfigr   r   r    r!   kwargsc                8   || _         | j         j        | _        | j         j        | _        |                                | _        | j                            |           |pd\  | _        | _        | j                             | j        | j                  | _	        | j         j
        | _        |j        | _        d| _        | j        s | j                                        s| j        r6| j         j        p| j        }| j        | j         j        z  } |di || _        d S d S )N)NN)input_observation_spaceinput_action_spacer    )r"   input_read_episodesinput_read_sample_batchesbuild_module	set_stateobservation_spaceaction_spacebuild_learner_connector_learner_connectorpolicies_to_train_policies_to_trainis_multi_agent_is_multi_agentiter_since_last_module_updateis_statefulprelearner_buffer_classdefault_prelearner_buffer_class default_prelearner_buffer_kwargsprelearner_buffer_kwargsepisode_buffer)selfr"   r   r    r!   r#   r7   r:   s           x/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/rllib/offline/offline_prelearner.py__init__zOfflinePreLearner.__init__Q   sM    (.)-)H /3{/T&&1&7&7&9&9|,,, 5;4Jl1 1 #'+"E"E$($:#0 #F #
 #

 #'+"?%+%:23* *	|''))	 '	 3 87 $
 5+67 %
 #:"9 # #*# #D	 	    batchreturnc           	         | j         rddlddlfd|d         D             }|                     |          }| j                            |           | j                            | j        j        | j	        
                                r | j        j                            dd          nd| j                            dd          pddd	          }nG| j        rt                              | j        |dt"          | j        j        z  | j        j        
          d         }|                     |          }| j                            |           | j                            | j        j        | j	        
                                r | j        j                            dd          nd| j                            dd          pddd	          }nM|                     | j        |t"          | j        j        z  d| j        j        | j        | j                  d         }|                     | j	        i |i d          }|D ]}|                     ||          s||= t3          |          S )aq  Prepares plain data batches for training with `Learner`'s.

        Args:
            batch: A dictionary of numpy arrays containing either column data
                with `self.config.input_read_schema`, `EpisodeType` data, or
                `BatchType` data.

        Returns:
            A `MultiAgentBatch` that can be passed to `Learner.update` methods.
        r   Nc                 l    g | ]0}t          j                            |j                             1S ))object_hook)r   
from_stateunpackbdecode).0statemnpmsgpacks     r=   
<listcomp>z.OfflinePreLearner.__call__.<locals>.<listcomp>   sM     2 2 2  #-OOEszOBB 2 2 2r?   itemmax_seq_lenn_step   T)	num_itemsbatch_length_TrO   sample_episodesto_numpy)rT   schemainput_compress_columnsepisodesF)rU   rT   rV   r-   r.   )	rl_moduler@   rW   shared_datametrics)r(   rK   msgpack_numpy_validate_episodesr;   addsampler"   train_batch_size_per_learnerr+   r6   model_configgetr)   r   _map_sample_batch_to_episoder4   SCHEMAinput_read_schemarV   _map_to_episodesr-   r.   r0   _should_module_be_updatedr   )r<   r@   rW   	module_idrJ   rK   s       @@r=   __call__zOfflinePreLearner.__call__   s    # C	NNN''''2 2 2 2 2 #6]	2 2 2H ..x88H##H---*11+B<++-- t{7;;M1MMM{x338q !% 2 
 
HH + &	 ">>$ =='+{'I ?     ..x88H##H---*11+B<++-- t{7;;M1MMM{x338q !% 2 
 
HH 261F1F$ =='+{'I"&"8!. 2G 2 2 2HL ''l  ( 
 
  	% 	%I11)UCC %)$ E"""r?   c                     ddl m} |S )zSets the default replay buffer.r   )EpisodeReplayBuffer)4ray.rllib.utils.replay_buffers.episode_replay_bufferrj   )r<   rj   s     r=   r8   z1OfflinePreLearner.default_prelearner_buffer_class  s(    	
 	
 	
 	
 	
 	

 #"r?   c                 :    | j         j        dz  | j         j        dS )zSets the default arguments for the replay buffer.

        Note, the `capacity` might vary with the size of the episodes or
        sample batches in the offline dataset.
        
   )capacitybatch_size_B)r"   r_   )r<   s    r=   r9   z2OfflinePreLearner.default_prelearner_buffer_kwargs  s(     @2E KD
 
 	
r?   rW   c                 .   t          d |D                       st          d          t                      }t                      }|D ]M}|j        |vrB|j        | j        j        vr/|                    |j                   |                    |           N|S )a  Validate episodes sampled from the dataset.

        Note, our episode buffers cannot handle either duplicates nor
        non-ordered fragmentations, i.e. fragments from episodes that do
        not arrive in timestep order.

        Args:
            episodes: A list of `SingleAgentEpisode` instances sampled
                from a dataset.

        Returns:
            A set of `SingleAgentEpisode` instances.

        Raises:
            ValueError: If not all episodes are `done`.
        c              3   $   K   | ]}|j         V  d S N)is_done)rH   epss     r=   	<genexpr>z7OfflinePreLearner._validate_episodes.<locals>.<genexpr>6  s$      3333;333333r?   zWhen sampling from episodes (`input_read_episodes=True`) all recorded episodes must be done (i.e. either `terminated=True`) or `truncated=True`).)all
ValueErrorsetid_r;   episode_id_to_indexr]   )r<   rW   unique_episode_idscleaned_episodesrt   s        r=   r\   z$OfflinePreLearner._validate_episodes"  s    ( 33(33333 	(   !UU55 	* 	*C111G4#6#JJJ"&&sw/// $$S)))r?   c                     | j         sdS t          | j                   s|t          | j                   v S |                      ||          S )z:Checks which modules in a MultiRLModule should be updated.T)r2   callablerx   )r<   rg   multi_agent_batchs      r=   rf   z+OfflinePreLearner._should_module_be_updatedI  sT    & 	I4$122 	ID$; < <<<**96GHHHr?   Fr3   rU   rT   rV   ignore_final_observationr-   r.   c                    pg g }	t          |t          j                                    D ]\  }
| rMt          j        |v r|t          j                                   nd         |v r|d                           nd}nd}| rZt          j        v rt	          |
          n|
}|r)t          j        d t          j        |                    }nWt          j        v r+t	          |t          j	                                             n|t          j	                                   }t          t          j                 |v r+t          |t          j                                             nt          j                    j        |||gi t          j                 |v r|t          j                                   ni gt          j        v r+t	          |t          j                                             n|t          j                                   g|t          j                                   g|t          j                 |v rt          j                 nd                  t          j                 |v r|t          j                                   ndfd|                                D             d
  
        }|r|                                 |	                    |           d	|	iS )
z!Maps a batch of data to episodes.r   Nc                     d| z  S )Nr   r'   )xs    r=   <lambda>z4OfflinePreLearner._map_to_episodes.<locals>.<lambda>  s
    !a% r?   r   Fc                     i | ]F\  }}|vr=|                                 vr'|d v#||v rt          |                   n|         gGS )r   r   typevaluesr   rH   kvirV   rU   s      r=   
<dictcomp>z6OfflinePreLearner._map_to_episodes.<locals>.<dictcomp>  s     ) ) ) !AqVOO ! 8 8 !)I I I  $::: -QqT222!"1 !J I Ir?   r   
ry   agent_idobservationsinfosactionsrewards
terminated	truncatedextra_model_outputslen_lookback_bufferrW   )	enumerater   OBSAGENT_IDr   treemap_structurecopydeepcopyNEXT_OBSr   EPS_IDstruuiduuid4hexINFOSACTIONSREWARDSTERMINATEDS
TRUNCATEDSitemsrT   append)r3   r@   rU   rT   rV   r   r-   r.   r#   rW   obsr   unpacked_obsunpacked_next_obsepisoder   s     ` `          @r=   re   z"OfflinePreLearner._map_to_episodesS  sB     "8!=2fW[&9 :;; c	) c	)FAs    '500 &!123A66 "-0E99 f]34Q77!    P)
 {&<<< %S)))  , 	(,(:'|)D)D) )%% #;*@@@ )vg6F/G)H)KLLL"6'*:#;<Q? & -gn-66 E&"89!<===)% #/0A!B!'-0E99 fW]34Q77	 #?.DDD )vgo/F)G)JKKK"6'/#:;A>
 #6'/#:;A>?$!'"56%?? w233$  	  g01U:: $F7+=$>?BB) ) ) ) ) ) %*KKMM) ) ) )*k6 6 6p  '$$&&&((((H%%r?   c                 	  
 pg g }t          |t          j                                    D ]P\  
| r'd         |v r|d                  
         d         nd}nd}| r4t          t                    r6t                    fdt          j        d                   D             nt          t          j	                  rJt          j                 v r!t          
                                          n
                                n t          dt                     d          t          j                 |v r}                    t          j                 v r1t          |t          j                          
         d                   n#|t          j                          
         d                    n                    d                    t          j                 |v r]t          j                 |v rI|t          j                          
         d         }|t          j                          
         d         }nt          j                 |v r;t          j                 |vr'|t          j                          
         d         }d}nnt          j                 |vr;t          j                 |v r'|t          j                          
         d         }d}nd	|v r|d	         
         d         }d}nd
}d}t#          t          j                 |v r1t	          |t          j                          
         d                   nt'          j                    j        |t          j                 |v r|t          j                          
         ni gt/                    z  t          j        v r+t          |t          j                          
                   n|t          j                          
         |t          j                          
         ||
fd|                                D             d
  
        }	|r|	                                 |                    |	           Rd|iS )z6Maps an old stack `SampleBatch` to new stack episodes.r   r   Nc                 $    g | ]}|d f         S ).r'   )rH   r   r   s     r=   rL   zBOfflinePreLearner._map_sample_batch_to_episode.<locals>.<listcomp>  s!    DDD13q#v;DDDr?   zUnknown observation type: z. When mapping from old recorded `SampleBatches` batched observations should be either of type `np.array` or - if the column is compressed - of `str` type.FdoneTc                     i | ]E\  }}|vr<|                                 vr&|d v#||v rt          |                   n|         FS r   r   r   s      r=   r   zBOfflinePreLearner._map_sample_batch_to_episode.<locals>.<dictcomp>Z  s{     
) 
) 
) !AqVOO ! 8 8 !)I I I  666 ,AaD111qT
 !J I Ir?   r   rW   )r   r   r   
isinstancer   r   rangeshapenpndarraytolist	TypeErrorr   r   r   r   r   r   r   r   r   r   r   lenr   r   r   rT   )r3   r@   rU   rT   rV   rW   r   r   r   r   r   r   s     ` `     @@r=   rb   z.OfflinePreLearner._map_sample_batch_to_episode  s    "8!=2 fW[&9 :;; I	) I	)FAs    m,55 &/03A66	    {)
 c3''  +3//C EDDDcil0C0CDDDCC  RZ00  "'+.2HHH )666 ZZ\\ C $LT#YY L L L   '*+u44 JJ!'+.2HHH )vg6F/G)H)KB)OPPP"6'*:#;<Q?C    JJs2w'''
 7-.%77w23u<< %fW-?&@ A! DR HI!&vg.A'B!CA!Fr!JJJ7-.%77w235@@ %fW-?&@ A! DR HI!&JJ7-.e;;w23u<<!&vg.A'B!CA!Fr!JJ %IIu__!&vq!1"!5J %II "&J %I - gn-66 E&"89!<Q?@@@)%!$ "'-0E99 fW]34Q77 TCHH_
 #?.DDD )vgo/F)G)JKKK"6'/#:;A>!&"9:1=)'
) 
) 
) 
) 
) 
) %*KKMM	
) 
) 
) )*S* * *\  '$$&&&((((H%%r?   rr   )$__name__
__module____qualname____doc__r   r   r	   gymSpacer   r   r   r   r   r>   r   r   r   rh   propertyr   r8   r9   r   r   r   r\   boolrf   staticmethodrc   r
   listr   re   rb   r'   r?   r=   r   r   5   s_        4 ;
 9=376:8 8 8 "8 sy#)345	8
 /08 tHcM238 sCx.8 8 8 ;:8t $}#d3
?3 }#S"*_8M }# }# }# $#}#~ # # # # X# 	
$sCx. 	
 	
 	
 X	
% /0% 		 %  %  %  % NI Id I I I I $ "(6:38'+"&v& v&v&CtRZ/001v& S#Xv& 	v&
 !)c 3v& #+4.v& 9v& iv& sCx.v& 
c4$$	%v& v& v& \ $#v&p $ "(6:_& _&_&CtRZ/001_& S#X_& 	_&
 !)c 3_& 
c4$$	%_& _& _& \ $#_& _& _&r?   r   )8r   loggingr   typingr   r   r   r   r   r   r	   r
   	gymnasiumr   numpyr   r   ray.rllib.core.columnsr   (ray.rllib.core.rl_module.multi_rl_moduler   r   "ray.rllib.env.single_agent_episoder   ray.rllib.utilsr   ray.rllib.utils.annotationsr   r   ray.rllib.utils.compressionr   ,ray.rllib.utils.replay_buffers.replay_bufferr   ray.rllib.utils.typingr   r   ray.util.annotationsr   %ray.rllib.algorithms.algorithm_configr   r   r   	MODULE_IDr   r   r   r   r   r   r   Trc   	getLoggerr   loggerr   r'   r?   r=   <module>r      sM      N N N N N N N N N N N N N N N N N N N N          * * * * * * U U U U U U U U A A A A A A ( ( ( ( ( (        9 8 8 8 8 8 E E E E E E 8 8 8 8 8 8 8 8 * * * * * * FEEEEEE NGNg&w(KOW_OW_M7=g&,*Iwy=W
$ 
	8	$	$ Wx& x& x& x& x& x& x& x& x& x&r?   