
    &`iT                        d dl Z d dlZd dlZd dlZd dlmZ d dlmZmZm	Z	 d dl
Z
d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZ d dlmZ d dlmZ d dlmZm Z  d dl!m"Z" d dl#m$Z$m%Z% d dl&m'Z' d dl(m)Z)m*Z* d dl+m,Z,m-Z-  e j.        e/          Z0 e            \  Z1Z2dZ3dZ4dZ5dZ6dZ7dZ8dZ9dZ:dZ;da< G d de          Z=e=Z> G d dej?                  Z@ G d  d!ej?                  ZAdS )"    N)deque)AnyDictUnion)CircularBuffer)&LEARNER_RESULTS_CURR_ENTROPY_COEFF_KEY)COMPONENT_RL_MODULE)Learner)TrainingDataValueFunctionAPI)5OverrideToImplementCustomLogic_CallToSuperRecommendedoverride)try_import_torch)LambdaDefaultDict)ALL_MODULESNUM_ENV_STEPS_SAMPLED_LIFETIME)MetricsLogger))DEFAULT_HISTOGRAM_BOUNDARIES_SHORT_EVENTSTimerAndPrometheusLogger)	Scheduler)ModuleID
ResultDict)Gauge	Histogramgpu_loader_queue_wait_timergpu_loader_load_to_gpu_timer"learner_thread_in_queue_wait_timer learner_thread_env_steps_droppedlearner_thread_update_timerray_get_episodes_timerqueue_size_gpu_loader_queuequeue_size_learner_thread_queuequeue_size_results_queuec            
       n    e Zd Z ee           fd            Z ee          d fd            Z ee          dddedee	e
f         d	edefd
            Zedee	e
f         ddf fd            Z ee          de	f fd            Ze ee          dee         fd                        Z xZS )IMPALALearnerc                 R    t                      j        |i | t          ddt          d          | _        | j                            d| j        j        i           t          ddt          d          | _        | j                            d| j        j        i           t          dd	t          d          | _	        | j	                            d| j        j        i           t          d
dt          d          | _
        | j
                            d| j        j        i           d S )N rllib_learner_impala_update_timez2Time spent in the 'IMPALALearner.update()' method.rllibnamedescription
boundariestag_keysr*   +rllib_learner_impala_update_solve_refs_timez6Time spent on resolving refs in the 'Learner.update()'8rllib_learner_impala_update_make_batch_if_necessary_timez7Time spent on making a batch in the 'Learner.update()'.+rllib_learner_impala_get_learner_state_timez4Time spent on get_state() in IMPALALearner.update().)super__init__r   r   _metrics_learner_impala_updateset_default_tags	__class____name__)_metrics_learner_impala_update_solve_refs6_metrics_learner_impala_update_make_batch_if_necessary._metrics_learner_impala_get_learner_state_time)selfargskwargsr7   s      ~/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/rllib/algorithms/impala/impala_learner.pyr4   zIMPALALearner.__init__5   sl   $)&))) /83L@	/
 /
 /
+ 	+<<dn-.	
 	
 	
 :C>P@	:
 :
 :
6 	6GGdn-.	
 	
 	
 GPKQ@	G
 G
 G
C 	CTTdn-.	
 	
 	
 ?H>N@	?
 ?
 ?
; 	;LLdn-.	
 	
 	
 	
 	
    returnNc                     t                                                       t          j                     j        _        d _        t          j                     _        t           fd           _
        t          j                     _        t           d          st           j        j                   _         j        j        dk    rH fdt)           j        j                  D              _         j        D ]}|                                 t1          t2          j         j                    _         j                                         d S )Nr   c                 v    t          j                            |           j        j        j                  S )N)fixed_value_or_schedule	frameworkdevice)r   configget_config_for_moduleentropy_coeffrE   _device)	module_idr<   s    r?   <lambda>z%IMPALALearner.build.<locals>.<lambda>t   s6    iK55i@@N.|   r@   _learner_thread_in_queue)maxlenc                 ^    g | ])}t          j        j        j        j                   *S ))in_queue	out_queuerF   metrics_logger)_GPULoaderThread_gpu_loader_in_queuerM   rJ   metrics).0_r<   s     r?   
<listcomp>z'IMPALALearner.build.<locals>.<listcomp>   sP     ( ( (  !!6";<#'<	  ( ( (r@   )update_methodrP   learner)r3   build	threadingRLockrU   _threading_lock_num_updatesLock_num_updates_lockr   #entropy_coeff_schedulers_per_modulequeueQueuerT   hasattrr   rG   learner_queue_sizerM   num_gpus_per_learnerrangenum_gpu_loader_threads_gpu_loader_threadsstart_LearnerThreadr
   update_learner_thread)r<   tr7   s   ` r?   r[   zIMPALALearner.buildb   s_    (1'8'8$!*!1!1
    
 
 	0  %*KMM! t788 	Y,19W,X,X,XD) ;+a//( ( ( ( t{ABB( ( (D$ -  				  .!.2  
  
  
 	""$$$$$r@   F)return_statetraining_data	timestepsrp   c                R    |pi a t           j                  5  t           j                  5  |                                 ddd           n# 1 swxY w Y   t           j                  5                       |          }|J 	 ddd           n# 1 swxY w Y    j        j        dk    rY j	        
                    |            j                            t          t          f j	                                                   nt!           j        t$                    rD j                            |          } j                            t          t(          f|d           n&t*                               j        | j                    j        5   j        }ddd           n# 1 swxY w Y   i }|dk    r? j        5  d _        ddd           n# 1 swxY w Y    j                                        }|rt           j                  5                        fd j                                        D             d	          }	t=          j
        |	t>                             |	t>          <   |	|d
<   ddd           n# 1 swxY w Y   ddd           n# 1 swxY w Y   |S )aF  

        Args:
            batch:
            timesteps:
            return_state: Whether to include one of the Learner worker's state from
                after the update step in the returned results dict (under the
                `_rl_module_state_after_update` key). Note that after an update, all
                Learner workers' states should be identical, so we use the first
                Learner's state here. Useful for avoiding an extra `get_weights()` call,
                e.g. for synchronizing EnvRunner weights.
            **kwargs:

        Returns:

        N)rq   r   sumreduce   c                 T    g | ]$}                     |          t          d z   |z   %S )/)should_module_be_updatedr	   )rV   midr<   s     r?   rX   z(IMPALALearner.update.<locals>.<listcomp>   sG     $ $ $ ##<<SAA$/#5;$ $ $r@   T)
componentsinference_only_rl_module_state_after_update) _CURRENT_GLOBAL_TIMESTEPSr   r5   r9   
solve_refsr:   _make_batch_if_necessaryrG   rg   rT   putrU   	log_valuer   QUEUE_SIZE_GPU_LOADER_QUEUEqsize
isinstancerM   r   add LEARNER_THREAD_ENV_STEPS_DROPPEDrl   enqueuera   r_   rv   r;   	get_statemodulekeysrayr	   )
r<   rq   rr   rp   r>   batch
ts_droppedcountresultlearner_states
   `         r?   rm   zIMPALALearner.update   s   4 %.O!%d&IJJ <	L <	L)>  + + ((***+ + + + + + + + + + + + + + +
 *K  ) ) 55M5RR((((	) ) ) ) ) ) ) ) ) ) ) ) ) ) ) {/!33)--e444&& "=>-3355   
 d;^LL !%!>!B!B5!I!IJL**$&FG"$ +     #**5udl   ' * *)* * * * * * * * * * * * * * *F{{+ * *()D%* * * * * * * * * * * * * * *,,.. L-G  L L %)NN$ $ $ $'+{'7'7'9'9$ $ $
 (, %3 % %M :=%&9:: :M"56 ?LF:;L L L L L L L L L L L L L L L[<	L <	L <	L <	L <	L <	L <	L <	L <	L <	L <	L <	L <	L <	L <	L| s   JAJA	JA	J/BJB	JB	C7JF*J*F.	.J1F.	2JGJG	JG	 2JA'J9JJ			JJ		JJ #J c                @   t                                          |           | j                                        D ]`}| j        |                             |                    t          d                    }| j        	                    |t          f|d           ad S )N)rr   r   )timestep   )window)r3   before_gradient_based_updater   r   rb   rm   getr   rU   r   r   )r<   rr   rK   new_entropy_coeffr7   s       r?   r   z*IMPALALearner.before_gradient_based_update  s    ,,y,AAA))++ 		 		I $ H!fimm,JANNfOO  L""BC! #    		 		r@   rK   c                 ~    t                                          |           | j                            |           d S N)r3   remove_modulerb   pop)r<   rK   r7   s     r?   r   zIMPALALearner.remove_module(  s8    i(((044Y?????r@   c                     t           gS r   r   )clss    r?   rl_module_required_apisz%IMPALALearner.rl_module_required_apis-  s    
 !!!r@   rA   N)r8   
__module____qualname__r   r
   r4   r[   r   r   strr   boolr   rm   r   r   r   classmethodlisttyper   __classcell__r7   s   @r?   r&   r&   4   s       Xg*
 *
 *
 *
 *
X Xg@% @% @% @% @% @%D Xg #Y Y Y#Y S>	Y
 Y 
Y Y Y Yf ;c3h D      ;: Xg@s @ @ @ @ @ @ Xg"T
 " " "  [" " " " "r@   r&   c                   T     e Zd Zdej        dedej        def fdZ	d
dZ
d
d	Z xZS )rS   rP   rQ   rF   rR   c                \   t                                          d           d| _        || _        || _        d| _        || _        || _        t          ddt          d          | _
        | j
                            d	d
i           t          ddt          d          | _        | j                            d	d
i           t          ddt          d          | _        | j                            d	d
i           t          ddd          | _        | j                            d	d
i           d S )NrS   r,   Tr   0rllib_learner_impala_gpu_loader_thread_step_timez2Time taken in seconds for gpu loader thread _step.r)   r+   r*   zIMPALA/GPULoaderThread4rllib_learner_impala_gpu_loader_thread_step_get_timezBTime taken in seconds for gpu loader thread _step _in_queue.get().<rllib_learner_impala_gpu_loader_thread_step_load_to_gpu_timezGTime taken in seconds for GPU loader thread _step to load batch to GPU.9rllib_impala_gpu_loader_thread_in_qsize_beginning_of_stepzISize of the _GPULoaderThread in-queue size, at the beginning of the step.)r,   r-   r/   )r3   r4   daemon	_in_queue
_out_queue_ts_droppedrJ   rU   r   r   +_metrics_impala_gpu_loader_thread_step_timer6   8_metrics_impala_gpu_loader_thread_step_in_queue_get_time7_metrics_impala_gpu_loader_thread_step_load_to_gpu_timer   <_metrics_impala_gpu_loader_thread_in_qsize_beginning_of_step)r<   rP   rQ   rF   rR   r7   s        r?   r4   z_GPULoaderThread.__init__9  s    	0111!#%;DCL@	<
 <
 <
8 	8II./	
 	
 	
 IRG\@	I
 I
 I
E 	EVV./	
 	
 	
 HQOa@	H
 H
 H
D 	DUU./	
 	
 	
 MRLcM
 M
 M
I
 	IZZ./	
 	
 	
 	
 	
r@   rA   Nc                     	 t          | j                  5  |                                  d d d            n# 1 swxY w Y   Ar   )r   r   _stepr<   s    r?   runz_GPULoaderThread.runq  s    	)@    

              	s   7;;c                    | j                             | j                                                   | j                            t          t          f          5  t          | j	                  5  | j        
                                }d d d            n# 1 swxY w Y   d d d            n# 1 swxY w Y   | j                            t          t          f          5  t          | j                  5  |                    | j        d          }d d d            n# 1 swxY w Y   d d d            n# 1 swxY w Y   t          | j        t"                    rE| j                            |          }| j                            t          t(          f|d           d S t*                              | j        || j                   d S )N)valueF)
pin_memoryrt   ru   )r   setr   r   rU   log_timer   GPU_LOADER_QUEUE_WAIT_TIMERr   r   r   GPU_LOADER_LOAD_TO_GPU_TIMERr   	to_devicerJ   r   r   r   r   r   r   rl   r   )r<   ma_batch_on_cpuma_batch_on_gpur   s       r?   r   z_GPULoaderThread._stepx  s   IMM.&&(( 	N 	
 	
 	
 \""K1L#MNN 	7 	7)M  7 7 #'."4"4"6"67 7 7 7 7 7 7 7 7 7 7 7 7 7 7	7 	7 	7 	7 	7 	7 	7 	7 	7 	7 	7 	7 	7 	7 	7 \""K1M#NOO 	 	)L    #2";";LU #< # #              	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 do~66 		S,,_==JL"">? #      ""4?OT\RRRRRsl   B+.BB+B	B+B	B++B/2B/D11DD1D	D1!D	"D11D58D5r   )r8   r   r   rc   rd   r   torchrF   r   r4   r   r   r   r   s   @r?   rS   rS   8  s        6
 +6
 	6

 6
 &6
 6
 6
 6
 6
 6
p   S S S S S S S Sr@   rS   c                   ^     e Zd Zdeeef         f fdZd	dZd Ze	defd            Z
 xZS )
rl   rP   c                v   t                                          d           d| _        || _        d| _        || _        || _        t          ddt          d          | _	        | j	        
                    d	d
i           t          ddt          d          | _        | j        
                    d	d
i           d S )Nrl   r   TF-rllib_learner_impala_learner_thread_step_timez/Time taken in seconds for learner thread _step.r)   r+   r*   zIMPALA/LearnerThread4rllib_learner_impala_learner_thread_step_update_timez6Time taken in seconds for learner thread _step update.)r3   r4   r   rZ   stopped_update_methodr   r   r   #_metrics_learner_impala_thread_stepr6   *_metrics_learner_impala_thread_step_update)r<   rY   rP   rZ   r7   s       r?   r4   z_LearnerThread.__init__  s     	.///+7? 4=@I@	4
 4
 4
0 	0AA,-	
 	
 	
 ;DGP@	;
 ;
 ;
7 	7HH,-	
 	
 	
 	
 	
r@   rA   Nc                     | j         sIt          | j                  5  |                                  d d d            n# 1 swxY w Y   | j         Gd S d S r   )r   r   r   stepr   s    r?   r   z_LearnerThread.run  s    , 	)$*RSS  		               , 	 	 	 	 	s   =AAc                 0   | j         j                            t          t          f          5  t          | j        t                    r| j                                        }n;| j        st          j
        d           | j        | j                                        }d d d            n# 1 swxY w Y   | j        rd S | j         j                            t          t          f          5  t          | j                  5  |                     | j         t#          |          t$          d           d d d            n# 1 swxY w Y   | j         j        5  | j         xj        dz  c_        d d d            n# 1 swxY w Y   d d d            d S # 1 swxY w Y   d S )Ng-C6?)r   T)r<   rq   rr   _no_metrics_reducer   )rZ   rU   r   r   "LEARNER_THREAD_IN_QUEUE_WAIT_TIMERr   r   r   sampletimesleeppopleftr   LEARNER_THREAD_UPDATE_TIMERr   r   r   r   r   ra   r_   )r<   r   s     r?   r   z_LearnerThread.step  s    \!**<=
 
 	; 	; $..99 
;"&."7"7"9"9 . 'Jv&&& . ' #'."8"8":":	; 	; 	; 	; 	; 	; 	; 	; 	; 	; 	; 	; 	; 	; 	;$ < 	F \!**K9T+UVV 	/ 	/
 *?    ##"._"E"E"E7'+	 $                  / / /))Q.))/ / / / / / / / / / / / / / /#	/ 	/ 	/ 	/ 	/ 	/ 	/ 	/ 	/ 	/ 	/ 	/ 	/ 	/ 	/ 	/ 	/ 	/sl   A0B((B,/B,'F<2D:.F:D>	>FD>	FE3'F3E7	7F:E7	;FFFlearner_queuec                 N   t          |           | j        k    rH|                    t          t          f|                                                                 d           |                     |           |                    t          t          ft          |                      d S )Nrt   ru   )	lenrN   r   r   r   r   	env_stepsappendQUEUE_SIZE_LEARNER_THREAD_QUEUE)r   r   rU   s      r?   r   z_LearnerThread.enqueue  s     }!555>?%%''1133    
 	U### 	9:	
 	
 	
 	
 	
r@   r   )r8   r   r   r   r   r   r4   r   r   staticmethodr   r   r   s   @r?   rl   rl     s        (
 ~-.	(
 (
 (
 (
 (
 (
T   
,/ ,/ ,/\ 
u 
 
 
 \
 
 
 
 
r@   rl   )Bloggingrc   r\   r   collectionsr   typingr   r   r   r   ray.rllib.algorithms.appo.utilsr   "ray.rllib.algorithms.impala.impalar   ray.rllib.corer	   ray.rllib.core.learner.learnerr
   $ray.rllib.core.learner.training_datar   ray.rllib.core.rl_module.apisr   ray.rllib.utils.annotationsr   r   ray.rllib.utils.frameworkr   "ray.rllib.utils.lambda_defaultdictr   ray.rllib.utils.metricsr   r   &ray.rllib.utils.metrics.metrics_loggerr   #ray.rllib.utils.metrics.ray_metricsr   r   #ray.rllib.utils.schedules.schedulerr   ray.rllib.utils.typingr   r   ray.util.metricsr   r   	getLoggerr8   loggerr   rW   r   r   r   r   r   RAY_GET_EPISODES_TIMERr   r   QUEUE_SIZE_RESULTS_QUEUEr   r&   ImpalaLearnerThreadrS   rl    r@   r?   <module>r      s                # # # # # # # # # # 



 : : : : : : U U U U U U . . . . . . 2 2 2 2 2 2 = = = = = = : : : : : :        7 6 6 6 6 6 @ @ @ @ @ @        A @ @ @ @ @        : 9 9 9 9 9 7 7 7 7 7 7 7 7 - - - - - - - -		8	$	$q; = %I "#E  ; 1 ; "C 5   ~" ~" ~" ~" ~"G ~" ~" ~"B ]S ]S ]S ]S ]Sy' ]S ]S ]S@t
 t
 t
 t
 t
Y% t
 t
 t
 t
 t
r@   