§
    &`ƒiº  ã                   ó°   — d dl Z d dlZd dlZd dlmZ d dlmZ d dlmZm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZmZ d d	lmZ e G d
„ d¦  «        ¦   «         ZdS )é    N)ÚOptional©ÚSimpleReplayBuffer)ÚDEFAULT_POLICY_IDÚconcat_samples)ÚOldAPIStack)Ú
ReplayMode)Ú_ALL_POLICIES)ÚPolicyIDÚSampleBatchType)Ú_Timerc                   óp   — e Zd ZdZej        fdededefd„Zde	ddfd	„Z
efd
edee	         fd„Zdefd„ZdS )ÚMixInMultiAgentReplayBufferaž  This buffer adds replayed samples to a stream of new experiences.

    - Any newly added batch (`add()`) is immediately returned upon
    the next `replay` call (close to on-policy) as well as being moved
    into the buffer.
    - Additionally, a certain number of old samples is mixed into the
    returned sample according to a given "replay ratio".
    - If >1 calls to `add()` are made without any `replay()` calls
    in between, all newly added batches are returned (plus some older samples
    according to the "replay ratio").

    .. testcode::

        from ray.rllib.execution.buffers.mixin_replay_buffer import (
            MixInMultiAgentReplayBuffer)
        from ray.rllib.policy.sample_batch import SampleBatch
        # replay ratio 0.66 (2/3 replayed, 1/3 new samples):
        buffer = MixInMultiAgentReplayBuffer(capacity=100,
                                             replay_ratio=0.66)
        A, B, C = (SampleBatch({"obs": [1]}), SampleBatch({"obs": [2]}),
            SampleBatch({"obs": [3]}))
        buffer.add(A)
        buffer.add(B)
        buffer.add(B)
        print(buffer.replay()["obs"])

    .. testoutput::
        :hide:

        ...
    ÚcapacityÚreplay_ratioÚreplay_modec                 ó:  ‡— ‰| _         || _        d| _        | j        dk    r| j        d| j        z
  z  | _        |dt          j        fv rt          j        | _        nD|dt          j        fv rt          j        | _        n"t          d                     |¦  «        ¦  «        ‚ˆfd„}t          j
        |¦  «        | _        t          ¦   «         | _        t          ¦   «         | _        t          ¦   «         | _        d| _        t          j
        t"          ¦  «        | _        dS )aè  Initializes MixInReplay instance.

        Args:
            capacity: Number of batches to store in total.
            replay_ratio: Ratio of replayed samples in the returned
                batches. E.g. a ratio of 0.0 means only return new samples
                (no replay), a ratio of 0.5 means always return newest sample
                plus one old one (1:1), a ratio of 0.66 means always return
                the newest sample plus 2 old (replayed) ones (1:2), etc...
        Nç      ð?ÚlockstepÚindependentzUnsupported replay mode: {}c                  ó$   •— t          ‰ ¬¦  «        S )N)Ú	num_slotsr   )r   s   €úƒ/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/rllib/execution/buffers/mixin_replay_buffer.pyÚ
new_bufferz8MixInMultiAgentReplayBuffer.__init__.<locals>.new_bufferN   s   ø€ Ý%°Ð9Ñ9Ô9Ð9ó    r   )r   r   Úreplay_proportionr	   ÚLOCKSTEPr   ÚINDEPENDENTÚ
ValueErrorÚformatÚcollectionsÚdefaultdictÚreplay_buffersr   Úadd_batch_timerÚreplay_timerÚupdate_priorities_timerÚ	num_addedÚlistÚlast_added_batches)Úselfr   r   r   r   s    `   r   Ú__init__z$MixInMultiAgentReplayBuffer.__init__1   s  ø€ ð  !ˆŒØ(ˆÔØ!%ˆÔØÔ Ò#Ð#Ø%)Ô%6¸#ÀÔ@QÑ:QÑ%RˆDÔ"à˜:¥zÔ':Ð;Ð;Ð;Ý)Ô2ˆDÔÐØ˜]­JÔ,BÐCÐCÐCÝ)Ô5ˆDÔÐåÐ:×AÒAÀ+ÑNÔNÑOÔOÐOð	:ð 	:ð 	:ð 	:ð 	:õ *Ô5°jÑAÔAˆÔõ  &™xœxˆÔÝ"™HœHˆÔÝ'-¡x¤xˆÔ$ð ˆŒõ #.Ô"9½$Ñ"?Ô"?ˆÔÐÐr   ÚbatchÚreturnNc                 ó<  — |                      ¦   «         }|                     ¦   «         }| j        5  | j        t          j        k    rK| j        t                                        |¦  «         | j	        t                    
                    |¦  «         n_|j                             ¦   «         D ]E\  }}| j        |                              |¦  «         | j	        |          
                    |¦  «         ŒFddd¦  «         n# 1 swxY w Y   | xj        |j        z  c_        dS )a2  Adds a batch to the appropriate policy's replay buffer.

        Turns the batch into a MultiAgentBatch of the DEFAULT_POLICY_ID if
        it is not a MultiAgentBatch. Subsequently adds the individual policy
        batches to the storage.

        Args:
            batch: The batch to be added.
        N)ÚcopyÚas_multi_agentr$   r   r	   r   r#   r
   Ú	add_batchr)   ÚappendÚpolicy_batchesÚitemsr'   Úcount)r*   r,   Ú	policy_idÚsample_batchs       r   ÚaddzMixInMultiAgentReplayBuffer.add^   sY  € ð —
’
‘”ˆØ×$Ò$Ñ&Ô&ˆàÔ!ð 	Lð 	LØÔ¥:Ô#6Ò6Ð6ð Ô#¥MÔ2×<Ò<¸UÑCÔCÐCØÔ'­Ô6×=Ò=¸eÑDÔDÐDÐDð 05Ô/C×/IÒ/IÑ/KÔ/Kð Lð LÑ+I˜|ØÔ'¨	Ô2×<Ò<¸\ÑJÔJÐJØÔ+¨IÔ6×=Ò=¸lÑKÔKÐKÐKð	Lð 	Lð 	Lñ 	Lô 	Lð 	Lð 	Lð 	Lð 	Lð 	Lð 	Løøøð 	Lð 	Lð 	Lð 	Lð 	ˆŒ˜%œ+Ñ%ˆŒˆˆs   °C C<Ã<D ÄD r6   c                 óB  — | j         t          j        k    r2|t          k    r't	          d                     t          ¦  «        ¦  «        ‚| j        |         }t          |¦  «        dk    s)t          | j        |         ¦  «        dk    r| j	        dk     rd S | j
        5  | j        |         }g | j        |<   | j	        dk    rt          |¦  «        cd d d ¦  «         S | j	        dk    r |                     ¦   «         cd d d ¦  «         S t          |¦  «        }| j        }t          j        ¦   «         ||z  k     rF|dz  }|                     |                     ¦   «         ¦  «         t          j        ¦   «         ||z  k     °Ft          |¦  «        cd d d ¦  «         S # 1 swxY w Y   d S )Nz¼Trying to sample from single policy's buffer in lockstep mode. In lockstep mode, all policies' experiences are sampled from a single replay buffer which is accessed with the policy id `{}`r   r   g        é   )r   r	   r   r
   r   r    r#   Úlenr)   r   r%   r   Úreplayr   Úrandomr2   )r*   r6   ÚbufferÚoutput_batchesÚnum_newr   s         r   r<   z"MixInMultiAgentReplayBuffer.replay{   s   € ð ÔzÔ2Ò2Ð2°yÅMÒ7QÐ7QÝð*÷ +1ª&µÑ*?Ô*?ñ	ô ð ð Ô$ YÔ/ˆõ
 ˆv‰;Œ;˜!ÒÐÝÔ'¨	Ô2Ñ3Ô3°qÒ8Ð8¸TÔ=NÐQTÒ=TÐ=Tà4ð Ôð 	2ð 	2Ø!Ô4°YÔ?ˆNØ13ˆDÔ# IÑ.ð Ô  CÒ'Ð'Ý% nÑ5Ô5ð	2ð 	2ð 	2ð 	2ñ 	2ô 	2ð 	2ð 	2ð Ô" cÒ)Ð)Ø—}’}‘”ð	2ð 	2ð 	2ð 	2ñ 	2ô 	2ð 	2ð 	2õ ˜.Ñ)Ô)ˆGØ $Ô 6ÐÝ”-‘/”/ GÐ.?Ñ$?Ò?Ð?Ø! QÑ&Ð!Ø×%Ò% f§m¢m¡o¤oÑ6Ô6Ð6õ ”-‘/”/ GÐ.?Ñ$?Ò?Ð?õ " .Ñ1Ô1ð'	2ð 	2ð 	2ð 	2ñ 	2ô 	2ð 	2ð 	2ð 	2ð 	2ð 	2ð 	2øøøð 	2ð 	2ð 	2ð 	2ð 	2ð 	2s   Â1FÃFÄBFÆFÆFc                 ó(   — t          j        ¦   «         S )zµReturns the computer's network name.

        Returns:
            The computer's networks name or an empty string, if the network
            name could not be determined.
        )ÚplatformÚnode)r*   s    r   Úget_hostz$MixInMultiAgentReplayBuffer.get_host¦   s   € õ Œ}‰ŒÐr   )Ú__name__Ú
__module__Ú__qualname__Ú__doc__r	   r   ÚintÚfloatr+   r   r8   r   r   r   r<   ÚstrrD   © r   r   r   r      sÖ   € € € € € ðð ðH #-Ô"8ð	+@ð +@àð+@ð ð+@ð  ð	+@ð +@ð +@ð +@ðZ&˜ð &¨Tð &ð &ð &ð &ð< %6ð)2ð )2Ø!ð)2à	/Ô	"ð)2ð )2ð )2ð )2ðV˜#ð ð ð ð ð ð r   r   )r!   rB   r=   Útypingr   Úray.rllib.execution.replay_opsr   Úray.rllib.policy.sample_batchr   r   Úray.rllib.utils.annotationsr   Ú8ray.rllib.utils.replay_buffers.multi_agent_replay_bufferr	   Ú,ray.rllib.utils.replay_buffers.replay_bufferr
   Úray.rllib.utils.typingr   r   Úray.util.timerr   r   rL   r   r   ú<module>rU      s  ðØ Ð Ð Ð Ø €€€Ø €€€Ø Ð Ð Ð Ð Ð à =Ð =Ð =Ð =Ð =Ð =Ø KÐ KÐ KÐ KÐ KÐ KÐ KÐ KØ 3Ð 3Ð 3Ð 3Ð 3Ð 3Ø OÐ OÐ OÐ OÐ OÐ OØ FÐ FÐ FÐ FÐ FÐ FØ <Ð <Ð <Ð <Ð <Ð <Ð <Ð <Ø !Ð !Ð !Ð !Ð !Ð !ð ð]ð ]ð ]ð ]ð ]ñ ]ô ]ñ „ð]ð ]ð ]r   