
    &`i%                     
   d dl Z d dlZd dlmZmZ d dlmZmZ d dlm	Z	m
Z
mZmZmZmZ d dlmZmZ d dlmZmZ d dlmZ d dlmZ d d	lmZmZ d d
lmZ d dlmZ d dl m!Z! d dl"m#Z#m$Z$ d dl%m&Z& d dl'm(Z( d dl)m*Z* e	rd dl+m,Z, d dl-m.Z. d dl/m0Z0  e&            \  Z1Z2Z3 e j4        e5          Z6 edg d          Z7eee
                  Z8 G d de          Z9e# G d dee                      Z:e# G d de:                      Z;dS )    N)ABCMetaabstractmethod)defaultdict
namedtuple)TYPE_CHECKINGAnyListOptionalTypeUnion)DEPRECATED_VALUEdeprecation_warning)BaseEnvconvert_to_base_env)SampleCollector)SimpleListCollector)EnvRunnerV2
_PerfStats)RolloutMetrics)InputReader)concat_samples)OldAPIStackoverride)try_import_tf)SampleBatchType)log_once)RLlibCallback)ObservationFunction)RolloutWorker_PolicyEvalData)env_idagent_idobsinfo	rnn_stateprev_actionprev_rewardc                       e Zd Zd ZdS )_NewEpisodeDefaultDictc                 f    | j         t          |          |                      |          x}| |<   |S N)default_factoryKeyError)selfr!   rets      p/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/rllib/evaluation/sampler.py__missing__z"_NewEpisodeDefaultDict.__missing__-   s:    '6"""!%!5!5f!=!==C$v,J    N)__name__
__module____qualname__r1    r2   r0   r)   r)   ,   s#            r2   r)   c                       e Zd ZdZ ee          defd            Zedefd            Z	ede
e         fd            Zede
e         fd            ZdS )SamplerInputz1Reads input experiences from an existing sampler.returnc                     |                                  g}|                    |                                            t          |          dk    rt	          d          t          |          S )Nr   zNo data available from sampler.)get_dataextendget_extra_batcheslenRuntimeErrorr   )r.   batchess     r0   nextzSamplerInput.next9   s^    ==??#t--//000w<<1@AAAg&&&r2   c                     t           )zCalled by `self.next()` to return the next batch of data.

        Override this in child classes.

        Returns:
            The next batch of data.
        NotImplementedErrorr.   s    r0   r;   zSamplerInput.get_dataA   s
     "!r2   c                     t           )a$  Returns list of episode metrics since the last call to this method.

        The list will contain one RolloutMetrics object per completed episode.

        Returns:
            List of RolloutMetrics objects, one per completed episode since
            the last call to this method.
        rC   rE   s    r0   get_metricszSamplerInput.get_metricsL   s
     "!r2   c                     t           )a  Returns list of extra batches since the last call to this method.

        The list will contain all SampleBatches or
        MultiAgentBatches that the user has provided thus-far. Users can
        add these "extra batches" to an episode by calling the episode's
        `add_extra_batch([SampleBatchType])` method. This can be done from
        inside an overridden `Policy.compute_actions_from_input_dict(...,
        episodes)` or from a custom callback's `on_episode_[start|step|end]()`
        methods.

        Returns:
            List of SamplesBatches or MultiAgentBatches provided thus-far by
            the user since the last call to this method.
        rC   rE   s    r0   r=   zSamplerInput.get_extra_batchesX   s
      "!r2   N)r3   r4   r5   __doc__r   r   r   rA   r   r;   r	   r   rG   r=   r6   r2   r0   r8   r8   5   s        ;;Xk'o ' ' ' ' "/ " " " ^" 	"T.1 	" 	" 	" ^	" "4#8 " " " ^" " "r2   r8   )	metaclassc                   B   e Zd ZdZddddddddddddeeedddd	ed
eeef         de	de
dddedededed         deee                  defdZ ee          defd            Z ee          dee         fd            Z ee          dee         fd            ZdS )SyncSamplerzHSync SamplerInput that collects experiences when `get_data()` is called.	env_stepsFTN)count_steps_bymultiple_episodes_in_batchnormalize_actionsclip_actionsobservation_fnsample_collector_classrenderpoliciespolicy_mapping_fnpreprocessorsobs_filterstf_sesshorizonsoft_horizonno_done_at_endworkerr   envclip_rewardsrollout_fragment_lengthrN   	callbacksr   rO   rP   rQ   rR   r   rS   rT   c          
      <   t          d          r|t          d           |t          d           |t          d           |t          d           |t          d           |t          k    rt          d	d
           |t          k    rt          dd
           |t          k    rt          dd
           t          |          | _        || _        t          j                    | _        t          |j
        j                  | _        |st          } ||j        |||||          | _        || _        t#          || j        ||| j        ||| j                  | _        | j                                        | _        t          j                    | _        dS )aN  Initializes a SyncSampler instance.

        Args:
            worker: The RolloutWorker that will use this Sampler for sampling.
            env: Any Env object. Will be converted into an RLlib BaseEnv.
            clip_rewards: True for +/-1.0 clipping,
                actual float value for +/- value clipping. False for no
                clipping.
            rollout_fragment_length: The length of a fragment to collect
                before building a SampleBatch from the data and resetting
                the SampleBatchBuilder object.
            count_steps_by: One of "env_steps" (default) or "agent_steps".
                Use "agent_steps", if you want rollout lengths to be counted
                by individual agent steps. In a multi-agent env,
                a single env_step contains one or more agent_steps, depending
                on how many agents are present at any given time in the
                ongoing episode.
            callbacks: The RLlibCallback object to use when episode
                events happen during rollout.
            multiple_episodes_in_batch: Whether to pack multiple
                episodes into each batch. This guarantees batches will be
                exactly `rollout_fragment_length` in size.
            normalize_actions: Whether to normalize actions to the
                action space's bounds.
            clip_actions: Whether to clip actions according to the
                given action_space's bounds.
            observation_fn: Optional multi-agent observation func to use for
                preprocessing observations.
            sample_collector_class: An optional SampleCollector sub-class to
                use to collect, store, and retrieve environment-, model-,
                and sampler data.
            render: Whether to try to render the environment after each step.
        deprecated_sync_sampler_argsNrU   )oldrV   rW   rX   rY   rZ   T)rd   errorr[   r\   )ema_coef)rN   )r]   base_envrO   ra   
perf_statsr`   rN   rT   )r   r   r   r   rg   r`   queueQueueextra_batchesr   configsampler_perf_stats_ema_coefrh   r   
policy_mapsample_collectorrT   r   _env_runner_objrun_env_runnermetrics_queue)r.   r]   r^   r_   r`   rN   ra   rO   rP   rQ   rR   rS   rT   rU   rV   rW   rX   rY   rZ   r[   r\   s                        r0   __init__zSyncSampler.__init__o   s   x 233 	F##
3333 ,#(;<<<<(#8888&#6666"#	2222***#	>>>>///#dCCCC!111#(8EEEE+C00'>$"[]]$]>
 
 
 & 	9%8" 6 6&#)!
 !
 !
   +]'A$;);	 
 	 
 	 
  /3355"[]]r2   r9   c                     	 t          | j                  }t          |t                    r| j                            |           n|S Gr+   )rA   rr   
isinstancer   rs   put)r.   items     r0   r;   zSyncSampler.get_data   sM    	())D$// "&&t,,,,	r2   c                     g }	 	 |                     | j                                                            | j                                                             n# t          j        $ r Y nw xY wo|S )NT)rh   )appendrs   
get_nowait_replacerh   getri   Empty)r.   	completeds     r0   rG   zSyncSampler.get_metrics   s    		  &1133<<#'?#6#6#8#8 =     
 ;   	 s   AA A/.A/c                     g }	 	 |                     | j                                                   n# t          j        $ r Y nw xY wD|S r+   )rz   rk   r{   ri   r~   )r.   extras     r0   r=   zSyncSampler.get_extra_batches   sc    	T/::<<====;   	
 s   ,2 AA)r3   r4   r5   rI   r   r   r   boolfloatintstrr
   r   r   rt   r   r8   r   r;   r	   r   rG   r=   r6   r2   r0   rL   rL   k   s       RR *+0"&":>BF %'/m+ m+ m+  m+ 	m+
 D%K(m+ "%m+ m+ #m+ %)m+  m+ m+ !!67m+ !)o)> ?m+ m+ m+ m+ m+^ Xl/     XlT.1     Xl4#8      r2   rL   )<loggingri   abcr   r   collectionsr   r   typingr   r   r	   r
   r   r   ray._common.deprecationr   r   ray.rllib.env.base_envr   r   0ray.rllib.evaluation.collectors.sample_collectorr   5ray.rllib.evaluation.collectors.simple_list_collectorr   "ray.rllib.evaluation.env_runner_v2r   r   ray.rllib.evaluation.metricsr   ray.rllib.offliner   ray.rllib.policy.sample_batchr   ray.rllib.utils.annotationsr   r   ray.rllib.utils.frameworkr   ray.rllib.utils.typingr   ray.util.debugr   ray.rllib.callbacks.callbacksr   )ray.rllib.evaluation.observation_functionr   #ray.rllib.evaluation.rollout_workerr   tf1tf_	getLoggerr3   loggerr    
StateBatchr)   r8   rL   r6   r2   r0   <module>r      s     ' ' ' ' ' ' ' ' / / / / / / / /                J I I I I I I I ? ? ? ? ? ? ? ? L L L L L L U U U U U U F F F F F F F F 7 7 7 7 7 7 ) ) ) ) ) ) 8 8 8 8 8 8 = = = = = = = = 3 3 3 3 3 3 2 2 2 2 2 2 # # # # # # B;;;;;;MMMMMMAAAAAA]__
R		8	$	$*TTT  $s)_
    [    2" 2" 2" 2" 2";' 2" 2" 2" 2"j Q Q Q Q Q, Q Q Q Q Qr2   