
    &`i>                         d Z ddlZddlZddlmZ ddlZddlmZ ddl	m
Z
 ddlmZ ddlmZmZ ddlmZmZ d	Zd
Z G d d          Zede
fd            ZdS )z
[1] IMPACT: Importance Weighted Asynchronous Architectures with Clipped Target Networks.
Luo et al. 2020
https://arxiv.org/pdf/1912.00167
    N)deque)ModelCatalog)ModelV2)OldAPIStack))DEFAULT_HISTOGRAM_BOUNDARIES_SHORT_EVENTSTimerAndPrometheusLogger)Counter	Histogramfunctarget_funcc                   P    e Zd ZdZdedefdZd Zd Zed             Z	defd	Z
d
S )CircularBufferaJ  A circular batch-wise buffer as described in [1] for APPO.

    The buffer holds at most N batches, which are sampled at random (uniformly).
    If full and a new batch is added, the oldest batch is discarded. Also, each batch
    currently in the buffer can be sampled at most K times (after which it is also
    discarded).
    num_batchesiterations_per_batchc                     || _         || _        | j         | j        z  | _        d| _        t	          d t          | j                  D             | j                  | _        t                      | _        | j        | _	        t          j                    | _        t          j                                        | _        t#          ddt$          d          | _        | j                            d| j        j        i           t/          d	d
d          | _        | j                            d| j        j        i           t#          ddt$          d          | _        | j                            d| j        j        i           d S )Nr   c                     g | ]}d S N ).0_s     s/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/rllib/algorithms/appo/utils.py
<listcomp>z+CircularBuffer.__init__.<locals>.<listcomp>+   s    ===qd===    )maxlen$rllib_utils_circular_buffer_add_timez"Time spent in CircularBuffer.add())rllib)namedescription
boundariestag_keysr   2rllib_utils_circular_buffer_add_ts_dropped_counterz8Total number of env steps dropped by the CircularBuffer.)r   r   r    'rllib_utils_circular_buffer_sample_timez%Time spent in CircularBuffer.sample())r   r   _NxK
_num_addedr   range_bufferset_indices_offset	threadingLock_locknprandomdefault_rng_rngr
   r   !_metrics_circular_buffer_add_timeset_default_tags	__class____name__r	   '_metrics_circular_buffer_add_ts_dropped$_metrics_circular_buffer_sample_time)selfr   r   s      r   __init__zCircularBuffer.__init__"   s   &$8!$t'@@	==E$),<,<===diPPPy^%%
I))++	 2;7<@	2
 2
 2
. 	.??dn-.	
 	
 	
 8?ER8
 8
 8
4
 	4EEdn-.	
 	
 	
 5>:?@	5
 5
 5
1 	1BBdn-.	
 	
 	
 	
 	
r   c                 V   t          | j                  5  | j        5  | j        d         }t	          | j                  D ]r}| j                            |           | j                            | j	                   | j        
                    | j	        | j        z
             | xj	        dz  c_	        s| xj        dz  c_        d d d            n# 1 swxY w Y   d}|5|                                }|dk    r| j                            |           d d d            n# 1 swxY w Y   |S )Nr      )value)r   r1   r,   r&   r%   r   appendr(   addr)   discardr#   r$   	env_stepsr5   inc)r7   batchdropped_entryr   
dropped_tss        r   r=   zCircularBuffer.addP   s   %d&LMM 	W 	W % % $Qt899 & &AL''...M%%dl333M))$,*BCCCLLA%LLL1$% % % % % % % % % % % % % % % J(*4466
>>@DD:DVVV	W 	W 	W 	W 	W 	W 	W 	W 	W 	W 	W 	W 	W 	W 	W" s5   DB%CDC	DC	<DD"%D"c           	      B   t          | j                  5  t          |           dk    r't          j        d           t          |           dk    '| j        5  | j                            t          | j	                            }|| j
        z
  | j        z   }| j        |         }|(J ||| j
        | j	        d | j        D             f            d | j        |<   | j	                            |           d d d            n# 1 swxY w Y   d d d            n# 1 swxY w Y   |S )Nr   g-C6?c                     g | ]}|d u S r   r   )r   bs     r   r   z)CircularBuffer.sample.<locals>.<listcomp>u   s    5551Q$Y555r   )r   r6   lentimesleepr,   r0   choicelistr(   r)   r#   r&   r>   )r7   idxactual_buffer_idxrA   s       r   samplezCircularBuffer.samplee   s   %d&OPP 	+ 	+d))q..
6""" d))q..  + +i&&tDM':':;;$'$,$6$B!%67((%LM55555+((( 37./%%c***+ + + + + + + + + + + + + + +	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+( s7   ADBC=1D=D	DD	DDDc                 d    | j         5  | j        | j        k    cddd           S # 1 swxY w Y   dS )zIWhether the buffer has been filled once with at least `self.num_batches`.N)r,   r$   r   r7   s    r   filledzCircularBuffer.filled}   s}     Z 	7 	7?d&66	7 	7 	7 	7 	7 	7 	7 	7 	7 	7 	7 	7 	7 	7 	7 	7 	7 	7s   %))returnc                 l    | j         5  t          | j                  cddd           S # 1 swxY w Y   dS )zIReturns the number of actually valid (non-expired) batches in the buffer.N)r,   rG   r(   rP   s    r   __len__zCircularBuffer.__len__   s{    Z 	& 	&t}%%	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	&s   )--N)r4   
__module____qualname____doc__intr8   r=   rN   propertyrQ   rT   r   r   r   r   r      s         ,
C ,
s ,
 ,
 ,
 ,
\  *  0 7 7 X7
& & & & & & &r   r   rR   c                    t          j        | j        | j        d                   \  }}t          j        | j        | j        || j        d         t          | j                  | _        | j        	                                | _
        t          j        | j        | j        || j        d         t          | j                  | _        | j        	                                | _        | j        S )zBuilds model and target model for APPO.

    Returns:
        ModelV2: The Model for the Policy to use.
            Note: The target model will not be returned, just assigned to
            `policy.target_model`.
    model)r   	framework)r   get_action_distaction_spaceconfigget_model_v2observation_spacePOLICY_SCOPEr\   r[   	variablesmodel_variablesTARGET_POLICY_SCOPEtarget_modeltarget_model_variables)policyr   	logit_dims      r   make_appo_modelsrj      s      /V]73 LAy
  , g"  FL $\3355F '3 g "  F %+$7$A$A$C$CF! <r   )rW   r*   rH   collectionsr   numpyr-   ray.rllib.models.catalogr   ray.rllib.models.modelv2r   ray.rllib.utils.annotationsr   #ray.rllib.utils.metrics.ray_metricsr   r   ray.util.metricsr	   r
   rb   re   r   rj   r   r   r   <module>rr      s)   
                1 1 1 1 1 1 , , , , , , 3 3 3 3 3 3        0 / / / / / / /# m& m& m& m& m& m& m& m&` $ $ $ $ $ $ $r   