
    &`i7'                     H   d dl Z d dlZd dlmZmZmZmZ d dlZd dl	m
Z
 d dlmZ d dlmZ d dlmZmZ d dlmZ d dlmZ d d	lmZmZ d d
lmZ erd dlmZ  ej        e          Zdee         dej         fdZ!e G d d                      Z"e G d d                      Z#dS )    N)TYPE_CHECKINGAnyDictList)deprecation_warning)_DUMMY_AGENT_ID)Policy)MultiAgentBatchSampleBatch)OldAPIStack)	summarize)AgentIDPolicyID)log_once)RLlibCallbackvreturnc                     t          j        |           }|j        t           j        k    r|                    t           j                  S |S N)nparraydtypefloat64astypefloat32)r   arrs     }/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/rllib/evaluation/sample_batch_builder.py_to_float_arrayr      s7    
(1++C
yBJzz"*%%%J    c                   H    e Zd ZdZdZd ZdeddfdZdeddfd	Z	defd
Z
dS )SampleBatchBuilderzUtil to build a SampleBatch incrementally.

    For efficiency, SampleBatches hold values in column form (as arrays).
    However, it is useful to add data one row (dict) at a time.
    r   c                 P    t          j        t                    | _        d| _        d S )Nr   )collectionsdefaultdictlistbufferscountselfs    r   __init__zSampleBatchBuilder.__init__'   s    (3(?(E(E


r   valuesr   Nc                     |                                 D ]%\  }}| j        |                             |           &| xj        dz  c_        dS )z7Add the given dictionary (row) of values to this batch.   N)itemsr&   appendr'   )r)   r+   kr   s       r   
add_valueszSampleBatchBuilder.add_values+   sO     LLNN 	& 	&DAqLO""1%%%%

a



r   batchc                     |                                 D ]%\  }}| j        |                             |           &| xj        |j        z  c_        dS )z,Add the given batch of values to this batch.N)r.   r&   extendr'   )r)   r2   r0   columns       r   	add_batchzSampleBatchBuilder.add_batch2   sR      	+ 	+IAvLO""6****

ek!



r   c                 N   t          d | j                                        D                       }t           j        |vrFt	          j        t          j        |j                  |t           j        <   t          xj        dz  c_        | j        	                                 d| _        |S )z=Returns a sample batch including all previously added values.c                 4    i | ]\  }}|t          |          S  )r   ).0r0   r   s      r   
<dictcomp>z6SampleBatchBuilder.build_and_reset.<locals>.<dictcomp><   s&    TTTtq!Q 2 2TTTr   r-   r   )
r   r&   r.   	UNROLL_IDr   repeatr!   _next_unroll_idr'   clear)r)   r2   s     r   build_and_resetz"SampleBatchBuilder.build_and_reset9   s     TTt|?Q?Q?S?STTTUU --+-9"2EK, ,E+'( ..!3..
r   )__name__
__module____qualname____doc__r>   r*   r   r1   r   r6   r@   r9   r   r   r!   r!      s          O  3 4    "{ "t " " " "      r   r!   c                       e Zd ZdZdeeef         deddfdZde	fdZ
defd	Zd
edededdfdZdddZddZddefdZdS )MultiAgentSampleBatchBuilderag  Util to build SampleBatches for each policy in a multi-agent env.

    Input data is per-agent, while output data is per-policy. There is an M:N
    mapping between agents and policies. We retain one local batch builder
    per agent. When an agent is done, then its local batch is appended into the
    corresponding policy batch for the agent's policy.
    
policy_mapclip_rewards	callbacksr   c                     t          d          rt          dd           || _        || _        d |                                D             | _        i | _        i | _        || _        d| _	        dS )aS  Initialize a MultiAgentSampleBatchBuilder.

        Args:
            policy_map (Dict[str,Policy]): Maps policy ids to policy instances.
            clip_rewards (Union[bool,float]): Whether to clip rewards before
                postprocessing (at +/-1.0) or the actual value to +/- clip.
            callbacks: RLlib callbacks.
        rF   F)olderrorc                 ,    i | ]}|t                      S r9   )r!   )r:   r0   s     r   r;   z9MultiAgentSampleBatchBuilder.__init__.<locals>.<dictcomp>d   s!    SSSA#5#7#7SSSr   r   N)
r   r   rG   rH   keyspolicy_buildersagent_buildersagent_to_policyrI   r'   )r)   rG   rH   rI   s       r   r*   z%MultiAgentSampleBatchBuilder.__init__Q   s     233 	Q$B%PPPP$(SSARARSSS !!" 


r   r   c                 b    t          d | j                                        D                       S )zReturns the total number of steps taken in the env (all agents).

        Returns:
            int: The number of steps taken in total in the environment over all
                agents.
        c              3   $   K   | ]}|j         V  d S r   )r'   )r:   as     r   	<genexpr>z5MultiAgentSampleBatchBuilder.total.<locals>.<genexpr>w   s$      AAq17AAAAAAr   )sumrP   r+   r(   s    r   totalz"MultiAgentSampleBatchBuilder.totalo   s0     AAD$7$>$>$@$@AAAAAAr   c                 2    t          | j                  dk    S )zReturns whether there is pending unprocessed data.

        Returns:
            bool: True if there is at least one per-agent builder (with data
                in it).
        r   )lenrP   r(   s    r   has_pending_agent_dataz3MultiAgentSampleBatchBuilder.has_pending_agent_datay   s     4&''!++r   agent_id	policy_idr+   Nc                     || j         vr t                      | j         |<   || j        |<   |t          k    r||d<    | j         |         j        di | dS )a  Add the given dictionary (row) of values to this batch.

        Args:
            agent_id: Unique id for the agent we are adding values for.
            policy_id: Unique id for policy controlling the agent.
            values: Row of values to add for this agent.
        r[   Nr9   )rP   r!   rQ   r   r1   )r)   r[   r\   r+   s       r   r1   z'MultiAgentSampleBatchBuilder.add_values   sp     4...,>,@,@D)-6D * &&!)F:0H%0::6:::::r   c           
         i }| j                                         D ]4\  }}| j        | j        |                  |                                f||<   5i }| j        du r;|                                D ]%\  }\  }}t          j        |d                   |d<   &nO| j        rH|                                D ]3\  }\  }}t          j        |d         | j         | j                  |d<   4|                                D ]\  }\  }}|	                                }||= | j        | j        |                  }	|
                                r0t          t          |t          j                                     dk    rt          d|          |||<   t!          |	dd          4|	j                            |	||         |	                                           |	                    ||         ||          ||<   t)          d          r:t*                              d	                    t1          |                               d
dlm}
 t7          |                                          D ]i\  }}| j                             |
            ||| j        |         | j        ||           | j        | j        |                                      |           j| j                                           | j                                          dS )a@  Apply policy postprocessors to any unprocessed rows.

        This pushes the postprocessed per-agent batches onto the per-policy
        builders, clearing per-agent state.

        Args:
            episode (Optional[Episode]): The Episode object that
                holds this MultiAgentBatchBuilder object.
        Trewards)a_mina_maxr-   zPBatches sent to postprocessing must only contain steps from a single trajectory.explorationN
after_postz8Trajectory fragment after postprocess_trajectory():

{}
r   )get_global_worker)workerepisoder[   r\   policiespostprocessed_batchoriginal_batches)!rP   r.   rG   rQ   r@   rH   r   signclipcopyis_single_trajectoryrY   setr   EPS_ID
ValueErrorgetattrrb   postprocess_trajectoryget_sessionr   loggerinfoformatr   #ray.rllib.evaluation.rollout_workerrd   sortedrI   on_postprocess_trajectoryrO   r6   r?   )r)   rf   pre_batchesr[   builderpost_batches_	pre_batchother_batchespolicyrd   
post_batchs               r   postprocess_batch_so_farz5MultiAgentSampleBatchBuilder.postprocess_batch_so_far   sb    !%!4!:!:!<!< 	 	Hg 4X >?''))%K!! $$%0%6%6%8%8 E E!>Ay')wy/C'D'D	)$$E 	%0%6%6%8%8  !>Ay')wi(,,+( ( (	)$$
 )4(9(9(;(; 	 	$Hnq)',,..Mh'_T%9(%CDF2244s9[%7899::Q>> 0   &/L"v}d33?"99L2F4F4F4H4H   &,%B%BX&w& &L"" L!! 	KKMTTl++    	JIIIII$*<+=+=+?+?$@$@ 
	W 
	W HjN44((**!.x8$.!, 5     !5h!?@JJ:VVVV!!###""$$$$$r   c                     | j                                         D ]O\  }}|j                                        s1t	          d                    || j        |                   dz             Pd S )NzpThe environment terminated for all agents, but we still don't have a last observation for agent {} (policy {}). z}Please ensure that you include the last observations of all live agents when setting '__all__' terminated|truncated to True. )rP   r.   r&   is_terminated_or_truncatedrp   rv   rQ   )r)   r[   r{   s      r   check_missing_donesz0MultiAgentSampleBatchBuilder.check_missing_dones   s    !%!4!:!:!<!< 	 	Hg?==?? 
 --3V $"6x"@. .  	 	 	
	 	r   c                     |                      |           i }| j                                        D ]'\  }}|j        dk    r|                                ||<   (| j        }d| _        t          j        ||          S )a  Returns the accumulated sample batches for each policy.

        Any unprocessed rows will be first postprocessed with a policy
        postprocessor. The internal state of this builder will be reset.

        Args:
            episode (Optional[Episode]): The Episode object that
                holds this MultiAgentBatchBuilder object or None.

        Returns:
            MultiAgentBatch: Returns the accumulated sample batches for each
                policy.
        r   )r   rO   r.   r'   r@   r
   wrap_as_needed)r)   rf   policy_batchesr\   r{   	old_counts         r   r@   z,MultiAgentSampleBatchBuilder.build_and_reset   s     	%%g..."&"6"<"<">"> 	F 	FIw}q  ,3,C,C,E,Ey)J	
-niHHHr   r   )r   N)rA   rB   rC   rD   r   r   r	   boolr*   intrW   rZ   r   r   r1   r   r   r
   r@   r9   r   r   rF   rF   G   s        6)*  #	   <Bs B B B B, , , , ,;7 ;w ;# ;RV ; ; ; ;&M% M% M% M% M%^   I I I I I I I Ir   rF   )$r#   loggingtypingr   r   r   r   numpyr   ray._common.deprecationr   ray.rllib.env.base_envr   ray.rllib.policy.policyr	   ray.rllib.policy.sample_batchr
   r   ray.rllib.utils.annotationsr   ray.rllib.utils.debugr   ray.rllib.utils.typingr   r   ray.util.debugr   ray.rllib.callbacks.callbacksr   	getLoggerrA   rt   ndarrayr   r!   rF   r9   r   r   <module>r      s        1 1 1 1 1 1 1 1 1 1 1 1     7 7 7 7 7 7 2 2 2 2 2 2 * * * * * * F F F F F F F F 3 3 3 3 3 3 + + + + + + 4 4 4 4 4 4 4 4 # # # # # # <;;;;;;		8	$	$tCy RZ     & & & & & & & &R AI AI AI AI AI AI AI AI AI AIr   