
    &`i68                     \   d dl Z d dlZd dlmZ d dlmZmZmZmZm	Z	 d dl
Zd dlZd dlmZmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlZdZ e j         e!          Z"e G d de                      Z#edede$ddfd            Z%e G d dee                      Z&dS )    N)Enum)AnyDictListOptionalUnion)SampleBatchconcat_samples)FaultAwareApply)override)
WindowStat)ReplayBufferInterface)SampleBatchType)DeveloperAPI)log_once__all__c                   "    e Zd ZdZdZdZdZdZdS )StorageUnitzSpecifies how batches are structured in a ReplayBuffer.

    timesteps: One buffer slot per timestep.
    sequences: One buffer slot per sequence.
    episodes: One buffer slot per episode.
    fragemts: One buffer slot per incoming batch.
    	timesteps	sequencesepisodes	fragmentsN)__name__
__module____qualname____doc__	TIMESTEPS	SEQUENCESEPISODES	FRAGMENTS     /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/rllib/utils/replay_buffers/replay_buffer.pyr   r      s.          IIHIIIr"   r   item	num_itemsreturnc                 ~   t          d          r|                                 }t          j                    }|j        dz  }||z  dz  }d                    ||| j        ||          }||k    rt          |          |d|z  k    rt          	                    |           dS t          
                    |           dS dS )z;Warn if the configured replay buffer capacity is too large.replay_capacityg    eAz~Estimated max memory usage for replay buffer is {} GB ({} batches of size {}, {} bytes each), available system memory is {} GBg?N)r   
size_bytespsutilvirtual_memorytotalformatcount
ValueErrorloggerwarninginfo)r$   r%   	item_size
psutil_memtotal_gbmem_sizemsgs          r#   warn_replay_capacityr8   +   s     !"" OO%%	*,,
#c)y(3.//5v)TZH0 0 	 hS//!h&&NN3KK# r"   c                      e Zd ZdZ	 	 ddedeeef         fdZ e	e
          defd            Z e	e
          d	edd
fd            Zededd
fd            Z e	e
          	 ddee         dee         fd            Zeddedefd            Z e	e
          deeef         fd            Z e	e
          deeef         dd
fd            Zedee         defd            Zd
S )ReplayBufferaQ  The lowest-level replay buffer interface used by RLlib.

    This class implements a basic ring-type of buffer with random sampling.
    ReplayBuffer is the base class for advanced types that add functionality while
    retaining compatibility through inheritance.

    The following examples show how buffers behave with different storage_units
    and capacities. This behaviour is generally similar for other buffers, although
    they might not implement all storage_units.

    Examples:

    .. testcode::

        from ray.rllib.utils.replay_buffers.replay_buffer import ReplayBuffer
        from ray.rllib.utils.replay_buffers.replay_buffer import StorageUnit
        from ray.rllib.policy.sample_batch import SampleBatch

        # Store any batch as a whole
        buffer = ReplayBuffer(capacity=10, storage_unit=StorageUnit.FRAGMENTS)
        buffer.add(SampleBatch({"a": [1], "b": [2, 3, 4]}))
        buffer.sample(1)

        # Store only complete episodes
        buffer = ReplayBuffer(capacity=10,
                                storage_unit=StorageUnit.EPISODES)
        buffer.add(SampleBatch({"c": [1, 2, 3, 4],
                                SampleBatch.T: [0, 1, 0, 1],
                                SampleBatch.TERMINATEDS: [False, True, False, True],
                                SampleBatch.EPS_ID: [0, 0, 1, 1]}))
        buffer.sample(1)

        # Store single timesteps
        buffer = ReplayBuffer(capacity=2, storage_unit=StorageUnit.TIMESTEPS)
        buffer.add(SampleBatch({"a": [1, 2], SampleBatch.T: [0, 1]}))
        buffer.sample(1)

        buffer.add(SampleBatch({"a": [3], SampleBatch.T: [2]}))
        print(buffer._eviction_started)
        buffer.sample(1)

        buffer = ReplayBuffer(capacity=10, storage_unit=StorageUnit.SEQUENCES)
        buffer.add(SampleBatch({"c": [1, 2, 3], SampleBatch.SEQ_LENS: [1, 2]}))
        buffer.sample(1)

    .. testoutput::

        True

    `True` is not the output of the above testcode, but an artifact of unexpected
    behaviour of sphinx doctests.
    (see https://github.com/ray-project/ray/pull/32477#discussion_r1106776101)
    '  r   capacitystorage_unitc                    |dt           j        fv rt           j        | _        n|dt           j        fv rt           j        | _        n|dt           j        fv rt           j        | _        nh|dt           j        fv rt           j        | _        nFt          dt           j         dt           j         dt           j         dt           j         d| 
          g | _        |d	k    r"t          d
                    |                    || _	        d	| _
        t          j        | j	                  | _        d| _        d	| _        d	| _        d	| _        t%          dd          | _        d	| _        d| _        dS )a  Initializes a (FIFO) ReplayBuffer instance.

        Args:
            capacity: Max number of timesteps to store in this FIFO
                buffer. After reaching this number, older samples will be
                dropped to make space for new ones.
            storage_unit: If not a StorageUnit, either 'timesteps', 'sequences' or
                'episodes'. Specifies how experiences are stored.
            ``**kwargs``: Forward compatibility kwargs.
        r   r   r   r   zstorage_unit must be either 'z', 'z' or 'z
', but is r   zHCapacity of replay buffer has to be greater than zero but was set to {}.Fevicted_hiti  N)r   r   r=   r   r   r    r/   _storager-   r<   	_next_idxnpzeros
_hit_count_eviction_started_num_timesteps_added_num_timesteps_added_wrap_num_timesteps_sampledr   _evicted_hit_stats_est_size_bytes
batch_size)selfr<   r=   kwargss       r#   __init__zReplayBuffer.__init__z   s   " K)>??? + 5Dk;+@AAA + 5Dj+*>??? + 4Dk;+@AAA + 5DG0E G G)G G/:/CG G",G G8DG G    q==%%+VH%5%5   !(4=11 "'
 %&!)*& '(#",]D"A"A r"   r&   c                 *    t          | j                  S N)lenr@   )rL   s    r#   __len__zReplayBuffer.__len__   s    4=!!!r"   batchNc                    |j         dk    sdS t          || j        |j         z             | j        t          j        k    r*|                    d          }|D ]} | j        |fi | dS | j        t          j        k    rEd}|	                    t          j                  D ]!}|}||z   } | j        |||         fi | |}"dS | j        t          j        k    r|                                D ]}	|		                    t          j        dg          d         dk    r]|		                    t          j        dg          d         s'|		                    t          j        dg          d         r | j        |	fi | t#          d          rt$                              d	           dS | j        t          j        k    r | j        |fi | dS dS )
av  Adds a batch of experiences or other data to this buffer.

        Splits batch into chunks of timesteps, sequences or episodes, depending on
        `self._storage_unit`. Calls `self._add_single_batch` to add resulting slices
        to the buffer storage.

        Args:
            batch: The batch to add.
            ``**kwargs``: Forward compatibility kwargs.
        r   N)r$   r%      TFonly_full_episodeszThis buffer uses episodes as a storage unit and thus allows only full episodes to be added to it (starting from T=0 and ending in `terminateds=True` or `truncateds=True`. Some samples may be dropped.)r.   r8   r<   r=   r   r   
timeslices_add_single_batchr   getr	   SEQ_LENSr   split_by_episodeTTERMINATEDS
TRUNCATEDSr   r0   r2   r    )
rL   rS   rM   rX   ttimestep_countseq_len	start_seqend_seqepss
             r#   addzReplayBuffer.add   s)    {QF%4=5;3NOOOO 555))!,,J 4 4&&q33F33334 4 +"777N 99[%9:: ) )*	(72&&uYw->'?JJ6JJJ!(	) ) +"666--//  77;=1#..q1Q66GGK3dV<<R@ 7ww{5w??C 7 +D*399&9999 455 ;   $ +"777"D"533F33333 87r"   r$   c                    | xj         |j        z  c_         | xj        |j        z  c_        | j        t	          | j                  k    r=| j                            |           | xj        |                                z  c_        ne| j        | j                 }| xj        |                                z  c_        || j        | j        <   | xj        |                                z  c_        | j	        r9| j
                            | j        | j                            d| j        | j        <   | j        | j        k    rd| _	        d| _        d| _        dS | xj        dz  c_        dS )ax  Add a SampleBatch of experiences to self._storage.

        An item consists of either one or more timesteps, a sequence or an
        episode. Differs from add() in that it does not consider the storage
        unit or type of batch and simply stores it.

        Args:
            item: The batch to be added.
            ``**kwargs``: Forward compatibility kwargs.
        r   TrU   N)rF   r.   rG   rA   rQ   r@   appendrJ   r)   rE   rI   pushrD   r<   )rL   r$   rM   item_to_be_removeds       r#   rY   zReplayBuffer._add_single_batch   sV    	!!TZ/!!&&$*4&&>S////M  &&&  DOO$5$55   !%t~!>  $6$A$A$C$CC  ,0DM$.)  DOO$5$55   ! 	0#(()HIII./DODN+ )T]::%)D"-.D*DNNNNNaNNNNr"   r%   c                      t                     dk    rt          d           fdt          |          D             }                     |          } xj        |j        z  c_        |S )a6  Samples `num_items` items from this buffer.

        The items depend on the buffer's storage_unit.
        Samples in the results may be repeated.

        Examples for sampling results:

        1) If storage unit 'timesteps' has been chosen and batches of
        size 5 have been added, sample(5) will yield a concatenated batch of
        15 timesteps.

        2) If storage unit 'sequences' has been chosen and sequences of
        different lengths have been added, sample(5) will yield a concatenated
        batch with a number of timesteps equal to the sum of timesteps in
        the 5 sampled sequences.

        3) If storage unit 'episodes' has been chosen and episodes of
        different lengths have been added, sample(5) will yield a concatenated
        batch with a number of timesteps equal to the sum of timesteps in
        the 5 sampled episodes.

        Args:
            num_items: Number of items to sample from this buffer.
            ``**kwargs``: Forward compatibility kwargs.

        Returns:
            Concatenated batch of items.
        r   z&Trying to sample from an empty buffer.c                 Z    g | ]'}t          j        d t                    dz
            (S )r   rU   )randomrandintrQ   ).0_rL   s     r#   
<listcomp>z'ReplayBuffer.sample.<locals>.<listcomp>;  s/    LLLa3t99q=11LLLr"   )rQ   r/   range_encode_samplerH   r.   )rL   r%   rM   idxessamples   `    r#   ru   zReplayBuffer.sample  ss    @ t99>>EFFFLLLL5;K;KLLL$$U++##v|3##r"   Fdebugc                     | j         | j        | j        | j        | j        t          | j                  d}|r,|                    | j        	                                           |S )zReturns the stats of this buffer.

        Args:
            debug: If True, adds sample eviction statistics to the returned
                stats dict.

        Returns:
            A dictionary of stats about this buffer.
        )added_countadded_count_wrappedeviction_startedsampled_countest_size_bytesnum_entries)
rF   rG   rE   rH   rJ   rQ   r@   updaterI   stats)rL   rv   datas      r#   r   zReplayBuffer.stats@  sk      4#'#A $ 6!8"2t}--
 
  	9KK/5577888r"   c                 v    | j         | j        d}|                    |                     d                     |S )N)r@   rA   F)rv   )r@   rA   r~   r   rL   states     r#   	get_statezReplayBuffer.get_stateW  s8    !]HHTZZeZ,,---r"   r   c                     |d         | _         |d         | _        |d         | _        |d         | _        |d         | _        |d         | _        |d         | _        d S )Nr@   rA   rx   ry   rz   r{   r|   )r@   rA   rF   rG   rE   rH   rJ   r   s     r#   	set_statezReplayBuffer.set_state]  sf     j){+$)-$8!)./D)E&!&'9!:&+O&<#$%56r"   rt   c                     g }|D ]7}| j         |xx         dz  cc<   |                    | j        |                    8|rt          |          }nt	                      }|                                 |S )z?Fetches concatenated samples at given indices from the storage.rU   )rD   rh   r@   r
   r	   decompress_if_needed)rL   rt   samplesiouts        r#   rs   zReplayBuffer._encode_samplei  s      	- 	-AOA!#NN4=+,,,, 	  ))CC--C  """
r"   )r;   r   rP   )F)r   r   r   r   intr   strr   rN   r   r   rR   r   rf   r   rY   r   ru   booldictr   r   r   r   r   r   rs   r!   r"   r#   r:   r:   B   s1       4 4p 0;@ @@ C,-@ @ @ @D X#$$" " " " %$" X#$$14 14t 14 14 14 %$14f " o " D "  "  "  \" H X#$$)-$ $!#$	/	"$ $ $ %$$L  4 D    \, X#$$4S>    %$
 X#$$	7tCH~ 	7$ 	7 	7 	7 %$	7 DI /    \  r"   r:   )'loggingrm   enumr   typingr   r   r   r   r   numpyrB   rayray.rllib.policy.sample_batchr	   r
   ray.rllib.utils.actor_managerr   ray.rllib.utils.annotationsr   #ray.rllib.utils.metrics.window_statr   #ray.rllib.utils.replay_buffers.baser   ray.rllib.utils.typingr   ray.util.annotationsr   ray.util.debugr   r*   _ALL_POLICIES	getLoggerr   r0   r   r   r8   r:   r!   r"   r#   <module>r      s           3 3 3 3 3 3 3 3 3 3 3 3 3 3     


 E E E E E E E E 9 9 9 9 9 9 0 0 0 0 0 0 : : : : : : E E E E E E 2 2 2 2 2 2 - - - - - - # # # # # #  		8	$	$     $    / c d    , t t t t t(/ t t t t tr"   