
    &`i                         d dl Z d dlZd dlZd dlmZmZ d dlmZ d dlm	Z	 d dl
mZmZ d dlmZ  ej        e          Ze G d de j                              ZdS )	    N)AnyDict)Dataset)Policy)DeveloperAPIExperimentalAPI)SampleBatchTypec            
           e Zd ZdZedefd            Zej        ede	de
eef         fd                        Zede	de
eef         fd            Ze ej                    dd	ed
ede
eef         fd            ZdS )OfflineEvaluatorz.Interface for an offline evaluator of a policypolicyc                     || _         dS )zInitializes an OffPolicyEstimator instance.

        Args:
            policy: Policy to evaluate.
            kwargs: forward compatibility placeholder.
        N)r   )selfr   kwargss      w/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/rllib/offline/offline_evaluator.py__init__zOfflineEvaluator.__init__   s         batchreturnc                     t           )aP  Returns the evaluation results for the given batch of episodes.

        Args:
            batch: The batch to evaluate.
            kwargs: forward compatibility placeholder.

        Returns:
            The evaluation done on the given batch. The returned
            dict can be any arbitrary mapping of strings to metrics.
        )NotImplementedErrorr   r   r   s      r   estimatezOfflineEvaluator.estimate   s
     "!r   c                     i S )a6  Sometimes you need to train a model inside an evaluator. This method
        abstracts the training process.

        Args:
            batch: SampleBatch to train on
            kwargs: forward compatibility placeholder.

        Returns:
            Any optional metrics to return from the evaluator
         r   s      r   trainzOfflineEvaluator.train+   s	     	r   )n_parallelismdatasetr   c                    dS )a  Calculates the estimate of the metrics based on the given offline dataset.

        Typically, the dataset is passed through only once via n_parallel tasks in
        mini-batches to improve the run-time of metric estimation.

        Args:
            dataset: The ray dataset object to do offline evaluation on.
            n_parallelism: The number of parallelism to use for the computation.

        Returns:
            Dict[str, Any]: A dictionary of the estimated values.
        Nr   )r   r   r   s      r   estimate_on_datasetz$OfflineEvaluator.estimate_on_dataset9   s      r   N)__name__
__module____qualname____doc__r   r   r   abcabstractmethodr	   r   strr   r   r   r   os	cpu_countr   intr   r   r   r   r   r      s       88v    \ 	"o "DcN " " " \ " ? c3h    \ 
 *R\^^	   	
 
c3h   _  r   r   )r$   loggingr'   typingr   r   ray.datar   ray.rllib.policyr   ray.rllib.utils.annotationsr   r   ray.rllib.utils.typingr	   	getLoggerr    loggerABCr   r   r   r   <module>r3      s    



  				               # # # # # # E E E E E E E E 2 2 2 2 2 2		8	$	$ = = = = =sw = = = = =r   