
    &`ie:                         d dl Z d dlmZ d dlmZmZmZmZmZm	Z	m
Z
 d dlZd dlmZ d dlmZ d dlmZmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZmZmZmZ erd dl m!Z! d dl"m#Z# e G d d                      Z$dS )    N)defaultdict)TYPE_CHECKINGAnyCallableDictListOptionalTuple)_DUMMY_AGENT_ID)AgentCollector)_PolicyCollector_PolicyCollectorGroup)	PolicyMap)SampleBatch)OldAPIStack)AgentIDEnvIDEnvInfoDictPolicyID
TensorType)RLlibCallback)RolloutWorkerc                      e Zd ZdZddddededeed dgef         de	d         d	e	d
         f
dZ
edfdededefdZdee         fdZdedefdZd(dZdddededeeef         deddf
dZdedeeef         ddfdZ	 	 d)dedededdfdZd*dedefd Zdedefd!Zdedefd"Zdedefd#Zded$efd%Zefdede	e         fd&Z e!d'             Z"dS )+	EpisodeV2z=Tracks the current state of a (possibly multi-agent) episode.N)worker	callbacksenv_idpoliciespolicy_mapping_fnr   r   r   r   c                   t          j        t          d                    | _        || _        d| _        d| _        d| _        d| _        d| _	        i | _
        i | _        i | _        i | _        || _        || _        || _        || _        i | _        i | _        d| _        i | _        t-          t.                    | _        t-          t2                    | _        i | _        i | _        i | _        i | _        dS )a  Initializes an Episode instance.

        Args:
            env_id: The environment's ID in which this episode runs.
            policies: The PolicyMap object (mapping PolicyIDs to Policy
                objects) to use for determining, which policy is used for
                which agent.
            policy_mapping_fn: The mapping function mapping AgentIDs to
                PolicyIDs.
            worker: The RolloutWorker instance, in which this episode runs.
        g NgmCg        r   N)random	randrangeint
episode_idr   total_rewardactive_env_stepstotal_env_stepsactive_agent_stepstotal_agent_stepscustom_metrics	user_data	hist_datamediar   r   
policy_mapr   _agent_to_policy_agent_collectors_next_agent_index_agent_to_indexr   floatagent_rewardslist_agent_reward_history_has_init_obs_last_terminateds_last_truncateds_last_infos)selfr   r   r   r   r   s         s/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/rllib/evaluation/episode_v2.py__init__zEpisodeV2.__init__   s   *  &/D		::#& &( %''(&'
 13 *, 24%'
"%-  	 :<@B&'35 EPPUDVDV?J4?P?P"246857 13    Fagent_idrefreshreturnc                     || j         vs|r(|                     || | j                  x}| j         |<   n| j         |         }|| j        vrt	          d| d          |S )a  Returns and stores the policy ID for the specified agent.

        If the agent is new, the policy mapping fn will be called to bind the
        agent to a policy for the duration of the entire episode (even if the
        policy_mapping_fn is changed in the meantime!).

        Args:
            agent_id: The agent ID to lookup the policy ID for.

        Returns:
            The policy ID for the specified agent.
        )r   z.policy_mapping_fn returned invalid policy id 'z'!)r0   r   r   r/   KeyError)r<   r@   rA   	policy_ids       r=   
policy_forzEpisodeV2.policy_fore   s    $ 4000G0:>:P:P{ ;Q ; ; I-h77 -h7I DO++QIQQQ   r?   c                 N    t          | j                                                  S )zReturns list of agent IDs that have appeared in this episode.

        Returns:
            The list of all agent IDs that have appeared so far in this
            episode.
        )r6   r3   keysr<   s    r=   
get_agentszEpisodeV2.get_agents   s!     D(--//000r?   c                 l    || j         vr| j        | j         |<   | xj        dz  c_        | j         |         S )zGet the index of an agent among its environment.

        A new index will be created if an agent is seen for the first time.

        Args:
            agent_id: ID of an agent.

        Returns:
            The index of this agent.
           )r3   r2   r<   r@   s     r=   agent_indexzEpisodeV2.agent_index   sD     4///-1-CD *""a'""#H--r?   c                 F    | xj         dz  c_         | xj        dz  c_        dS )z(Advance the episode forward by one step.rL   N)r'   r(   rI   s    r=   stepzEpisodeV2.step   s0    "!r?   r!   )tinit_obs
init_infosrQ   c                   | j         |                     |                   }|| j        vsJ t          |j        |j        d         d         |j                            dd          |                                |                                d          | j        |<   | j        |         	                    | j
        |                     |          | j        |||           d| j        |<   dS )	zAdd initial env obs at the start of a new episode

        Args:
            agent_id: Agent ID.
            init_obs: Initial observations.
            init_infos: Initial infos dicts.
            t: timestamp.
        modelmax_seq_len_disable_action_flatteningF)rV   disable_action_flatteningis_policy_recurrentintial_states_enable_new_api_stack)r%   rN   r   rR   rS   rQ   TN)r/   rF   r1   r   view_requirementsconfiggetis_recurrentget_initial_stateadd_init_obsr%   rN   r   r8   )r<   r@   rR   rS   rQ   policys         r=   ra   zEpisodeV2.add_init_obs   s      !:!:; t55555+9$g.}=&,m&7&7,e' ' !' 3 3 5 5 2244"'	,
 	,
 	,
x( 	x(55((22;! 	6 	
 	
 	
 (,8$$$r?   valuesc                    || j         v sJ | xj        dz  c_        | xj        dz  c_        |t          k    r||d<   | j         |                             |           |t
          j                 }| xj        |z  c_        | j        || 	                    |          fxx         |z  cc<   | j
        |                             |           t
          j        |v r|t
          j                 | j        |<   t
          j        |v r|t
          j                 | j        |<   t
          j        |v r(|                     ||t
          j                            dS dS )zAdd action, reward, info, and next_obs as a new step.

        Args:
            agent_id: Agent ID.
            values: Dict of action, reward, info, and next_obs.
        rL   r@   N)r1   r)   r*   r   add_action_reward_next_obsr   REWARDSr&   r5   rF   r7   appendTERMINATEDSr9   
TRUNCATEDSr:   INFOSset_last_info)r<   r@   rc   rewards       r=   add_action_reward_done_next_obsz)EpisodeV2.add_action_reward_done_next_obs   sh    4111111$!# &&!)F: 	x(CCFKKK +,V#Hdooh&?&?@AAAVKAAA"8,33F;;; "f,,/5k6M/ND"8,!V++.4[5K.LD!(+ &&x0A)BCCCCC '&r?   batch_builderis_donecheck_donesc           
         i }| j                                         D ]U\  }}|j        dk    r|                     |          }| j        |         }|                    |j                  }	|||	f||<   V|                                D ]\  }\  }}}	|rU|rS|	                                s?t          d	                    | j
        ||                     |                    dz             | j                            |i                               dd          s|	                                r5t          t          j        |	t"          j                                     dk    rt          d|	          t          |          dk    r|                                }
|
|= ni }
|	}t)          |dd	          .|j                            |||                                           |                    d	           |                    ||
|           }dd
lm} | j                             |            | ||| j        ||           ||j        vrt=          |          |j        |<   |j        |                             ||j                    |xj        | j         z  c_        |xj!        | j"        z  c_!        d| _         d| _"        d	S )a  Build and return currently collected training samples by policies.

        Clear agent collector states if this episode is done.

        Args:
            batch_builder: _PolicyCollectorGroup for saving the collected per-agent
                sample batches.
            is_done: If this episode is done (terminated or truncated).
            check_dones: Whether to make sure per-agent trajectories are actually done.
        r   zkEpisode {} terminated for all agents, but we still don't have a last observation for agent {} (policy {}). zkPlease ensure that you include the last observations of all live agents when setting done[__all__] to True.training_enabledTrL   zPBatches sent to postprocessing must only contain steps from a single trajectory.explorationN)get_global_worker)r   episoder@   rE   r   postprocessed_batchoriginal_batches)#r1   itemsagent_stepsrF   r/   build_for_trainingr\   is_terminated_or_truncated
ValueErrorformatr%   r;   r^   is_single_trajectorylennpuniquer   EPS_IDcopygetattrrs   postprocess_trajectoryget_sessionset_get_interceptor#ray.rllib.evaluation.rollout_workerrt   r   on_postprocess_trajectorypolicy_collectorsr   $add_postprocessed_batch_for_trainingr)   	env_stepsr'   )r<   rn   ro   rp   pre_batchesr@   	collectorpidrb   	pre_batchother_batches
post_batchrt   s                r=   postprocess_episodezEpisodeV2.postprocess_episode   s+   ( #'#9#?#?#A#A 	= 	=Hi$))//(++C_S)F!44V5MNNI%(&)$<K!!2=2C2C2E2E =	 =	.H.sFI  ; y/S/S/U/U  "F4?Hdooh>W>WXX   #''"5599:LdSS  2244ry;+=!>??@@1DD 0   ;!## + 0 0 2 2!(++ " #Jv}d33?"99J(:(:(<(<   **400066z=RVWWJMMMMMMN44((**!$.!, 5    -9997G7O7O/4+C0UUF4    	!!T%<<!!4#88 #$ !r?   c                     ||| j         v o| j         |         S t          t          | j                                                             S )zReturns whether this episode has initial obs for an agent.

        If agent_id is None, return whether we have received any initial obs,
        in other words, whether this episode is completely fresh.
        )r8   anyr6   rc   rM   s     r=   has_init_obszEpisodeV2.has_init_obs]  sJ     t11Rd6H6RRtD.557788999r?   c                 V    |                      |          p|                     |          S N)is_terminatedis_truncatedrM   s     r=   ro   zEpisodeV2.is_doneh  s)    !!(++Jt/@/@/J/JJr?   c                 8    | j                             |d          S NF)r9   r^   rM   s     r=   r   zEpisodeV2.is_terminatedk  s    %))(E:::r?   c                 8    | j                             |d          S r   )r:   r^   rM   s     r=   r   zEpisodeV2.is_truncatedn  s    $((5999r?   infoc                     || j         |<   d S r   )r;   )r<   r@   r   s      r=   rk   zEpisodeV2.set_last_infoq  s    %)"""r?   c                 6    | j                             |          S r   )r;   r^   rM   s     r=   last_info_forzEpisodeV2.last_info_fort  s     ##H---r?   c                     | j         S r   )r(   rI   s    r=   lengthzEpisodeV2.lengthy  s    ##r?   )rB   N)FFr   )#__name__
__module____qualname____doc__r   r   r   r   r   r	   r>   r   boolrF   r   rJ   r$   rN   rP   r   r   strra   rm   r   r   r   ro   r   r   rk   r   r   propertyr    r?   r=   r   r      s       GG -1/3H3 H3 H3H3 H3 $Wk?$KX$UV	H3 )H3 O,H3 H3 H3 H3V #25! !!<@!	! ! ! !F1DM 1 1 1 1.G . . . . . " " " " (, (, (, (, 	(,
 j)(, (, 
(, (, (, (,T'D'D S*_%'D 
	'D 'D 'D 'DX !	b" b",b" b" 	b"
 
b" b" b" b"H	: 	:W 	: 	: 	: 	: 	:K KD K K K K;g ;$ ; ; ; ;:W : : : : :*g *T * * * * #2. ..	+	. . . .
 $ $ X$ $ $r?   r   )%r"   collectionsr   typingr   r   r   r   r   r	   r
   numpyr   ray.rllib.env.base_envr   /ray.rllib.evaluation.collectors.agent_collectorr   5ray.rllib.evaluation.collectors.simple_list_collectorr   r   ray.rllib.policy.policy_mapr   ray.rllib.policy.sample_batchr   ray.rllib.utils.annotationsr   ray.rllib.utils.typingr   r   r   r   r   ray.rllib.callbacks.callbacksr   r   r   r   r   r?   r=   <module>r      s    # # # # # # L L L L L L L L L L L L L L L L L L     2 2 2 2 2 2 J J J J J J        2 1 1 1 1 1 5 5 5 5 5 5 3 3 3 3 3 3 T T T T T T T T T T T T T T B;;;;;;AAAAAA c$ c$ c$ c$ c$ c$ c$ c$ c$ c$r?   