
    &`ie                         d dl Z d dlZd dlZd dlmZmZ d dlmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZmZ d dlmZ d d	lmZ d d
lmZ  e            \  ZZZe G d dej                              ZdS )    N)DictOptional)RolloutWorker)MinibatchBuffer)OldAPIStack)try_import_tf)LEARNER_INFOLearnerInfoBuilder)
WindowStat)_NextValueNotReady)_Timerc            
       `    e Zd ZdZdededededef
dZdd
Zdee	         fdZ
ddedefdZd	S )LearnerThreadaV  Background thread that updates the local model from sample trajectories.

    The learner thread communicates with the main thread through Queues. This
    is needed since Ray operations can only be run on the main thread. In
    addition, moving heavyweight gradient ops session runs off the main thread
    improves overall throughput.
    local_workerminibatch_buffer_sizenum_sgd_iterlearner_queue_sizelearner_queue_timeoutc                    t           j                            |            t          dd          | _        || _        t          j        |          | _        t          j                    | _	        t          | j        ||||          | _        t                      | _        t                      | _        t                      | _        t                      | _        d| _        g | _        i | _        d| _        d| _        dS )	aW  Initialize the learner thread.

        Args:
            local_worker: process local rollout worker holding
                policies this thread will call learn_on_batch() on
            minibatch_buffer_size: max number of train batches to store
                in the minibatching buffer
            num_sgd_iter: number of passes to learn on per train batch
            learner_queue_size: max size of queue of inbound
                train batches to this thread
            learner_queue_timeout: raise an exception if the queue has
                been empty for this long in seconds
        size2   )maxsize)inqueuer   timeout
num_passesinit_num_passesTFr   N)	threadingThread__init__r   r   r   queueQueuer   outqueuer   minibatch_bufferr   queue_timer
grad_timer
load_timerload_wait_timerdaemonpolicy_ids_updatedlearner_infostopped	num_steps)selfr   r   r   r   r   s         v/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/rllib/execution/learner_thread.pyr   zLearnerThread.__init__   s    * 	!!$'''",VR"8"8({+=>>> /L&)#(!
 !
 !
 "88 (( ((%xx"$    returnNc                     | j         j        j        dk    rt                                           | j        s|                                  | j        d S d S )Ntf2)r   configframework_strtf1enable_eager_executionr+   step)r-   s    r.   runzLearnerThread.runG   s_    #1U::&&(((, 	IIKKK , 	 	 	 	 	r/   c                    | j         5  	 | j                                        \  }}n/# t          j        $ r t                      cY cd d d            S w xY w	 d d d            n# 1 swxY w Y   | j        5  t          d          }| j        j	        j
        r| j                                         | j                            |          }| j        j	        j
        r| j                                         | j                            t!          |                                                     |                                D ]\  }}|                    ||           |                                | _        d d d            n# 1 swxY w Y   | xj        dz  c_        | j                            |j        |                                | j        f           | j                            | j                                                   d S )N   )num_devices)r$   r#   getr    Emptyr   r%   r
   r   r3   policy_states_are_swappablelocklearn_on_batchunlockr)   extendlistkeysitemsadd_learn_on_batch_resultsfinalizer*   r,   r"   putcountagent_stepsr   pushr   qsize)r-   batch_learner_info_buildermulti_agent_resultspidresultss          r.   r7   zLearnerThread.stepN   s    	, 	,,04466qq; , , ,)++++		, 	, 	, 	, 	, 	, 	, 	,, 	, 	, 	, 	, 	, 	, 	, 	, 	, 	, 	, 	, 	, 	, 	,
 _ 	@ 	@ $6!#D#D#D  'C )!&&((("&"3"B"B5"I"I 'C +!((***#**40C0H0H0J0J+K+KLLL 3 9 9 ; ; N NW$??MMMM 4 = = ? ?D	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@" 	!5;(9(9(;(;T=NOPPP$$T\%7%7%9%9:::::sD   A#'A#AA#AA##A'*A'5DFF	FTresultc                    d }|r|d                              d| j                                        t          t	          j        | j                  d || j                   || j                   || j	                   || j
                  di           nr|d                              | j                                         || j                   || j                   || j	                   || j
                  dd           |S )z&Add internal metrics to a result dict.c                 2    t          d| j        z  d          S )Ni     )roundmean)timers    r.   timer_to_msz6LearnerThread.add_learner_metrics.<locals>.timer_to_msm   s    
*A...r/   infolearner_queuetiming_breakdown)learner_grad_time_mslearner_load_time_mslearner_load_wait_time_mslearner_dequeue_time_ms)r\   r]   )updater   statsr	   copydeepcopyr*   r%   r&   r'   r$   )r-   rS   overwrite_learner_inforZ   s       r.   add_learner_metricsz!LearnerThread.add_learner_metricsj   s;   	/ 	/ 	/ " 	6N!!#T%<%B%B%D%D $-0A"B"B&0;DO0L0L0;DO0L0L5@[AU5V5V3>;t?O3P3P	) )	    6N!!%)%<%B%B%D%D0;DO0L0L0;DO0L0L5@[AU5V5V3>;t?O3P3P	) ) 
 
 
 r/   )r0   N)T)__name__
__module____qualname____doc__r   intr   r8   r   r   r7   r   rg    r/   r.   r   r      s         )#)  #) 	)
  )  #) ) ) )V   ;h12 ; ; ; ;8 $ PT      r/   r   )rd   r    r   typingr   r   #ray.rllib.evaluation.rollout_workerr   $ray.rllib.execution.minibatch_bufferr   ray.rllib.utils.annotationsr   ray.rllib.utils.frameworkr   $ray.rllib.utils.metrics.learner_infor	   r
   #ray.rllib.utils.metrics.window_statr   ray.util.iterr   ray.util.timerr   r5   tftfvr   r   rm   r/   r.   <module>ry      s/         ! ! ! ! ! ! ! ! = = = = = = @ @ @ @ @ @ 3 3 3 3 3 3 3 3 3 3 3 3 Q Q Q Q Q Q Q Q : : : : : : , , , , , , ! ! ! ! ! !}R v v v v vI$ v v v v vr/   