
    &`i<                     b   d dl Z d dlmZmZmZmZmZmZmZ d dl	Z	d dl
mZ d dlmZmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZ d dlmZmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z% d dl&m'Z' d dl(m)Z) d dl*m+Z+ d dl,m-Z- d dl.m/Z/m0Z0m1Z1m2Z2 erd dl3m4Z4  e            \  Z5Z6dZ7 G d de+e          Z8dS )    N)TYPE_CHECKINGAny
CollectionDictIterableOptionalUnion)DataIterator)ALL_MODULESCOMPONENT_RL_MODULE)SelfSupervisedLossAPI)MultiRLModuleSpec)MultiAgentBatch)override)Checkpointable)
get_devicetry_import_torch)	DATASET_NUM_ITERS_EVALUATED$DATASET_NUM_ITERS_EVALUATED_LIFETIMEMODULE_SAMPLE_BATCH_SIZE_MEANNUM_ENV_STEPS_SAMPLEDNUM_ENV_STEPS_SAMPLED_LIFETIMENUM_MODULE_STEPS_SAMPLED!NUM_MODULE_STEPS_SAMPLED_LIFETIMEOFFLINE_SAMPLING_TIMERWEIGHTS_SEQ_NO)MiniBatchRayDataIterator)convert_to_numpy)Runnerconvert_to_torch_tensor)
DeviceTypeModuleID	StateDict
TensorType)AlgorithmConfigtotal_eval_lossc                      e Zd Z	 d,dddee         fdZ ee          	 	 d-ded	ed
dfd            Z	d
e
fdZded	ed
dfdZ ee          d             Z ee          	 d,dddeeeee         f                  deeeee         f                  d
efd            Zd
efdZ ee          d.d            Z ee          d.d            Z ee          d             Z ee          d             Z	 	 	 d/dedededed
ef
dZdeeef         deeef         d
eeef         fdZdedddeeef         deeef         d
ef
d Z  ee          d!ed
dfd"            Z!ded
dfd#Z" ee          d$             Z# ee          d%             Z$d& Z%e&d
e'fd'            Z(d( Z)e&d
e*fd)            Z+e&d
ee,df         fd*            Z-e&d
efd+            Z.dS )0OfflineEvaluationRunnerNconfigr&   module_specc                     || _         d | _        d | _        t          j        | fd|i| t          j        |            t          j        |                                 |           | _	        d S )Nr*   )
%_OfflineEvaluationRunner__module_spec*_OfflineEvaluationRunner__dataset_iterator(_OfflineEvaluationRunner__batch_iteratorr   __init__r   types
MethodTypeget_loss_for_module_fn_loss_for_module_fn)selfr*   r+   kwargss       /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/rllib/offline/offline_evaluation_runner.pyr0   z OfflineEvaluationRunner.__init__*   st     1<"& $66V6v666%%% $)#3D4O4O4Q4QSW#X#X       FTexploretrainreturnc                    | j         t          |  d          | j        s | j        di | j        j        | _        | j                            t          | j
        d           | j                            t                    5  || j        j        }|                     ||          cd d d            S # 1 swxY w Y   d S )NzM doesn't have a data iterator. Can't call `run` on `OfflineEvaluationRunner`.   keyvaluewindow)r9   r:    )r.   
ValueError_batch_iterator_create_batch_iteratorr*   iter_batches_kwargsr/   metrics	log_valuer   _weights_seq_nolog_timer   r9   	_evaluate)r5   r9   r:   r6   s       r7   runzOfflineEvaluationRunner.run>   s>    "* - - -  
 # 	$?D$? % %+1% %D!
 	& 	 	
 	
 	
 \""#9:: 	 	+- >> "  	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	s   %B55B9<B9c                 `    t          d| j        | j        | j        j        | j        j        d|S )N)iteratordeviceminibatch_size	num_itersrB   )r   _dataset_iterator_devicer*   "offline_eval_batch_size_per_runner!dataset_num_iters_per_eval_runner)r5   r6   s     r7   rE   z.OfflineEvaluationRunner._create_batch_iteratorb   sE     ( 
+<;IkC	
 

 
 
 	
r8   c                    t          | j                  D ]\  }}t          |j                                                  t          | j                                                  z
  }|rt          d| d          |r | j                            |j                  }nA|r | j                            |j                  }n| j        	                    |j                  }| 
                    ||j                  }|                     |           | j                            t          t          f|dz   d           | j                            t          t           f|dz   d           t#          |                                          D ])\  }}	| j                            |t&          f|	d           *| j                                        S )	Nz&Batch contains one or more ModuleIDs (z) that are not in this Learner!)fwd_outbatchr=   sumreducelifetime_sumr>   )	enumeraterD   setpolicy_batcheskeysmodulerC   forward_explorationforward_trainforward_inferencecompute_eval_losses_log_steps_evaluated_metricsrG   rH   r   r   r   r   itemsTOTAL_EVAL_LOSS_KEYr[   )
r5   r9   r:   	iterationtensor_minibatchunknown_module_idsrW   eval_loss_per_modulemidlosss
             r7   rK   z!OfflineEvaluationRunner._evaluatem   s    ,5T5I+J+J 	@ 	@'I' "%%5%D%I%I%K%K!L!Ls  ""P P " "  0=O 0 0 0  
  Y+99$3   Y+334D4STT+778H8WXX#'#;#;'7'F $< $ $  --.>???? 	56M	 	 	
 	
 	
 	>?M! 	 	
 	
 	
 **>??EEGG 	 	ICL""-. #     |""$$$r8   c                     dd| j         ifS )NrB   r*   )r*   r5   s    r7   get_ctor_args_and_kwargsz0OfflineEvaluationRunner.get_ctor_args_and_kwargs   s     t{#
 	
r8   )not_components
componentsrr   c                    i }|                      t          ||          r^ | j        j        d|                     t          |          |                     t          |          d||t          <   | j        |t          <   |S )N)rs   rr   rB   )_check_componentr   ra   	get_state_get_subcomponentsrI   r   )r5   rs   rr   r6   states        r7   rv   z!OfflineEvaluationRunner.get_state   s       !4j.QQ 	9)>)> *223F
SS#66'   * *
 * *E%& %)$8E.!r8   c                      t          |          S )z0Converts structs to a framework-specific tensor.r    )r5   structs     r7   _convert_to_tensorz*OfflineEvaluationRunner._convert_to_tensor   s    &v...r8   c                     dS )zReleases all resources used by this EnvRunner.

        For example, when using a gym.Env in this EnvRunner, you should make sure
        that its `close()` method is called.
        NrB   rp   s    r7   stopzOfflineEvaluationRunner.stop   s	     	r8   c                     dS )z:If this Actor is deleted, clears all resources used by it.NrB   rp   s    r7   __del__zOfflineEvaluationRunner.__del__   s	     	r8   c                 8    | j         rt          | d          sJ dS )a  Checks that self.__init__() has been completed properly.

        Ensures that the instances has a `MultiRLModule` and an
        environment defined.

        Raises:
            AssertionError: If the EnvRunner Actor has NOT been properly initialized.
        ra   N)rR   hasattrrp   s    r7   assert_healthyz&OfflineEvaluationRunner.assert_healthy   s(     %A'$*A*AAAAAAr8   c                 4    | j                                         S N)rG   r[   rp   s    r7   get_metricsz#OfflineEvaluationRunner.get_metrics   s    |""$$$r8   rX   	to_device
pin_memory
use_streamc                     t          |j        |r| j        nd ||          }t          d |                                D                       }t          ||          }|S )N)rO   r   r   c              3   4   K   | ]}t          |          V  d S r   )len).0bs     r7   	<genexpr>z>OfflineEvaluationRunner._convert_batch_type.<locals>.<genexpr>   s(      44SVV444444r8   )	env_steps)r!   r_   rS   maxvaluesr   )r5   rX   r   r   r   lengths         r7   _convert_batch_typez+OfflineEvaluationRunner._convert_batch_type   so     ( #,64<<$!!	
 
 
 44U\\^^44444888r8   rW   c                n   i }|D ]}||         }||         }| j         |                                         }t          |t                    r3|                    | || j                            |          ||          }n1|                     || j                            |          ||          }|||<   |S )N)learner	module_idr*   rX   rW   r   r*   rX   rW   )ra   	unwrapped
isinstancer   compute_self_supervised_lossr*   get_config_for_modulecompute_eval_loss_for_module)	r5   rW   rX   loss_per_moduler   module_batchmodule_fwd_outra   rn   s	            r7   re   z+OfflineEvaluationRunner.compute_eval_losses   s       	. 	.I +L$Y/N[+5577F&"788 :: ';<<YGG&* ;   88';<<YGG&*	 9   *.OI&&r8   r   c                4    |                      ||||          S )Nr   )r4   )r5   r   r*   rX   rW   s        r7   r   z4OfflineEvaluationRunner.compute_eval_loss_for_module  s-     ''	 ( 
 
 	
r8   rx   c                 <   t           |v r|                    t          d          }|dk    s| j        |k     rU|t                    }t	          |t
          j                  rt          j        |          }| j                            |           |dk    r|| _        d S d S d S Nr   )	r   getr   rI   r   ray	ObjectRefra   	set_state)r5   rx   weights_seq_norl_module_states       r7   r   z!OfflineEvaluationRunner.set_state%  s     %'' #YY~q99N ""d&:^&K&K"'(;"<os}== ?&)go&>&>O%%o666 !!'5$$$ (' "!r8   c                    |j                                         D ]\  }}| j                            |t          f| j        d           t          |          }| j                            |t          f|           | j                            |t          f|d           | j                            |t          f|d           | j                            t          t          f|d           | j                            t          t          f|d           | j                            t          t          f|                                d           | j                            t          t          f|                                dd	           d S )
Nr=   )rA   )r?   r@   rY   )r?   r@   r[   r\   rZ   T)r[   with_throughput)r_   rg   rG   rH   r   rI   r   r   r   r   r   r   r   r   )r5   rX   rm   r   module_batch_sizes        r7   rf   z4OfflineEvaluationRunner._log_steps_evaluated_metrics:  s   !&!5!;!;!=!= #	 #	CL""n%$ #    !$L 1 1L""78' #   
 L""23' #   
 L"";<'% #    L"" ":;' #   
 L"" "CD'% #     	/0OO 	 	
 	
 	

 	89OO! 	 	 	
 	
 	
 	
 	
r8   c                     	 t          | j        | j        sdn| j        j                  | _        d S # t
          $ r d | _        Y d S w xY wr   )r   r*   worker_index num_gpus_per_offline_eval_runner _OfflineEvaluationRunner__deviceNotImplementedErrorrp   s    r7   
set_devicez"OfflineEvaluationRunner.set_devicel  se    
	!&  ,FAAE DMMM # 	! 	! 	! DMMMM	!s   -1 AAc                 f    	 ddl m}  j        sN j                             j        j        | j        j         j        j        fi j        j                   _	         j        
                                 _         j                             fd           d S # t          $ r d  _        Y d S w xY w)Nr   )INPUT_ENV_SPACES)envspacesinference_onlyc                 z    t          |t          j        j                  r|                    j                  n|S r   )r   torchnnModuletorS   )rm   modr5   s     r7   <lambda>z5OfflineEvaluationRunner.make_module.<locals>.<lambda>  s2    ,6sEHO,L,LUCFF4<(((RU r8   )ray.rllib.envr   _module_specr*   get_multi_rl_module_specr   observation_spaceaction_space%offline_eval_rl_module_inference_onlyr-   buildra   foreach_moduler   )r5   r   s   ` r7   make_modulez#OfflineEvaluationRunner.make_modulez  s     	666666$ %)[%I%I( K9 K4+ $(;#T &J 
& 
&" +1133DK K&&        # 	 	 	DKKKK	s   BB B0/B0c                 b    | j         j        p#| j                                         j        d         S )Ncompute_loss_for_module)r*   offline_loss_for_module_fnget_default_learner_class__dict__rp   s    r7   r3   z.OfflineEvaluationRunner.get_loss_for_module_fn  s2     K2 {4466?)	
r8   c                     | j         S )zReturns the dataset iterator.r.   rp   s    r7   rR   z)OfflineEvaluationRunner._dataset_iterator  s     &&r8   c                     || _         dS )zSets the dataset iterator.Nr   )r5   rN   s     r7   set_dataset_iteratorz,OfflineEvaluationRunner.set_dataset_iterator  s    "*r8   c                     | j         S r   )r/   rp   s    r7   rD   z'OfflineEvaluationRunner._batch_iterator  s    $$r8   c                     | j         S r   )r   rp   s    r7   rS   zOfflineEvaluationRunner._device  s
    }r8   c                     | j         S )z1Returns the `MultiRLModuleSpec` of this `Runner`.)r-   rp   s    r7   r   z$OfflineEvaluationRunner._module_spec  s     !!r8   r   )FT)r;   N)TFF)/__name__
__module____qualname__r   r   r0   r   r   boolrL   r   rE   rK   r   rq   r	   strr   r$   rv   r%   r{   r}   r   r   r   r   r   r   r   re   r#   r   r   rf   r   r   r3   propertyr
   rR   r   r   rD   r"   rS   r   rB   r8   r7   r)   r)   )   s        48Y Y!Y /0Y Y Y Y( Xf ! !! !
 
! ! ! !F	
( 	
 	
 	
 	
7%7% 7% 
	7% 7% 7% 7%r Xn
 
 
 Xn =A AE	  U3
3#789 !sJsO';!<=	 
   */J / / / / Xf    Xf    Xf
B 
B 
B Xf% % %      	
  
   $sCx.15c3h	c3h   :
 
 "	

 CH~
 c:o&
 

 
 
 
  Xn6y 6T 6 6 6 6(0
/ 0
d 0
 0
 0
 0
d Xf! ! ! Xf! ! !F
 
 
 '< ' ' ' X'+ + + %!9 % % % X% z4/0    X "/ " " " X" " "r8   r)   )9r1   typingr   r   r   r   r   r   r	   r   ray.data.iteratorr
   ray.rllib.corer   r   ray.rllib.core.rl_module.apisr   (ray.rllib.core.rl_module.multi_rl_moduler   ray.rllib.policy.sample_batchr   ray.rllib.utils.annotationsr   ray.rllib.utils.checkpointsr   ray.rllib.utils.frameworkr   r   ray.rllib.utils.metricsr   r   r   r   r   r   r   r   r   ray.rllib.utils.minibatch_utilsr   ray.rllib.utils.numpyr   ray.rllib.utils.runners.runnerr   ray.rllib.utils.torch_utilsr!   ray.rllib.utils.typingr"   r#   r$   r%   %ray.rllib.algorithms.algorithm_configr&   r   _rh   r)   rB   r8   r7   <module>r      sL    R R R R R R R R R R R R R R R R R R 



 * * * * * *        @ ? ? ? ? ? F F F F F F 9 9 9 9 9 9 0 0 0 0 0 0 6 6 6 6 6 6 B B B B B B B B
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 E D D D D D 2 2 2 2 2 2 1 1 1 1 1 1 ? ? ? ? ? ? N N N N N N N N N N N N FEEEEEEq' S" S" S" S" S"fn S" S" S" S" S"r8   