
    &`i#                         d dl Z d dlZd dlmZmZmZ d dlZd dlm	Z	 d dl
mZ d dlmZ d dlmZmZmZ erd dlmZ  ej        e          Z e e j        dg d	                    Zd d i i i i i d
i f	ej        _        ededefd            Ze	 	 	 ddddeee                  dededef
d            Ze	 	 ddddeee                  dedee         fd            Ze	 	 ddee         dee         dedefd            Z dS )    N)TYPE_CHECKINGListOptional)DEFAULT_POLICY_ID)OldAPIStack)LEARNER_STATS_KEY)GradInfoDictLearnerStatsDict
ResultDict)EnvRunnerGroupRolloutMetrics)	episode_lengthepisode_rewardagent_rewardscustom_metrics
perf_stats	hist_datamediaepisode_faultyconnector_metricsF	grad_inforeturnc                     t           | v r| t                    S i }|                                 D ]4\  }}t          |          t          u rt           |v r|t                    ||<   5|S )aD  Return optimization stats reported from the policy.

    .. testcode::
        :skipif: True

        grad_info = worker.learn_on_batch(samples)

        # {"td_error": [...], "learner_stats": {"vf_loss": ..., ...}}

        print(get_stats(grad_info))

    .. testoutput::

        {"vf_loss": ..., "policy_loss": ...}
    )r   itemstypedict)r   multiagent_statskvs       p/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/rllib/evaluation/metrics.pyget_learner_statsr!   $   sp    " I%%*++!! ; ;177d?? A%%&'(9&: #       workersr   remote_worker_idstimeout_secondskeep_custom_metricsc                 N    t          | ||          }t          |||          }|S )a  Gathers episode metrics from rollout worker set.

    Args:
        workers: EnvRunnerGroup.
        remote_worker_ids: Optional list of IDs of remote workers to collect
            metrics from.
        timeout_seconds: Timeout in seconds for collecting metrics from remote workers.
        keep_custom_metrics: Whether to keep custom metrics in the result dict as
            they are (True) or to aggregate them (False).

    Returns:
        A result dict of metrics.
    )r&   )r'   )collect_episodessummarize_episodes)r$   r%   r&   r'   episodesmetricss         r    collect_metricsr-   A   sE    (  "O  H !(0C  G Nr"   c                     |                      d d||          }t          |          dk    rt                              d           g }|D ]}|                    |           |S )a`  Gathers new episodes metrics tuples from the given RolloutWorkers.

    Args:
        workers: EnvRunnerGroup.
        remote_worker_ids: Optional list of IDs of remote workers to collect
            metrics from.
        timeout_seconds: Timeout in seconds for collecting metrics from remote workers.

    Returns:
        List of RolloutMetrics.
    c                 *    |                                  S )N)get_metrics)ws    r    <lambda>z"collect_episodes.<locals>.<lambda>s   s    !--// r"   T)local_env_runnerr%   r&   r   zWARNING: collected no metrics.)foreach_env_runnerlenloggerwarningextend)r$   r%   r&   metric_listsr+   r,   s         r    r)   r)   ^   s    ( --!!+'	 .  L <A7888H ! !    Or"   r+   new_episodesc                 D   || }g }g }t          j        t                    }t          j        t                    }t          j        t                    }t          j        t                    }t          j        t                    }	t          j        t                    }
d}| D ]}|j                                        D ] \  }}||                             |           !|j        r|dz  }J|                    |j                   |                    |j                   |j	                                        D ] \  }}||                             |           !t          |j                  dk    pt          |j        v}|r=|j                                        D ]#\  \  }}}||                             |           $|j                                        D ]\  }}||xx         |z  cc<   |j                                        D ] \  }}|	|                             |           !t          |d          rh|j                                        D ]N}|                                D ]7}|                                D ] \  }}|
|                             |           !8O |r3t%          |          }t'          |          }t)          j        |          }n-t-          d          }t-          d          }t-          d          }|rt)          j        |          }nt-          d          }||d<   ||d<   i }i }i }|                                                                D ]b\  }}t)          j        |          ||<   t)          j        |          ||<   t)          j        |          ||<   ||d                    |          <   c|                                                                D ]\  }}d	 |D             } |r| ||<   t)          j        |           ||d
z   <   | r5t)          j        |           ||dz   <   t)          j        |           ||dz   <   n*t-          d          ||dz   <   t-          d          ||dz   <   ||= |                                                                D ]\  }}t)          j        |          ||<   t3                      }!|
                                D ]\  }}t)          j        |          |!|<   t3          di d|d|d|d|dt3          |	          dt5          |          d|d|d|dt3          |          dt3          |          dt3          |          d|d|!dt          |          d|d|d|dt          |          S ) a  Summarizes a set of episode metrics tuples.

    Args:
        episodes: List of most recent n episodes. This may include historical ones
            (not newly collected in this iteration) in order to achieve the size of
            the smoothing window.
        new_episodes: All the episodes that were completed in this iteration.
        keep_custom_metrics: Whether to keep custom metrics in the result dict as
            they are (True) or to aggregate them (False).

    Returns:
        A result dict of metrics.
    Nr      r   nanr   episode_lengthszpolicy_{}_rewardc                 ^    g | ]*}t          j        t          j        |                    (|+S  )npanyisnan).0r   s     r    
<listcomp>z&summarize_episodes.<locals>.<listcomp>   s/    ===a)<)<====r"   _mean_min_maxepisode_reward_maxepisode_reward_minepisode_reward_meanepisode_len_meanepisode_mediaepisodes_timesteps_totalpolicy_reward_minpolicy_reward_maxpolicy_reward_meanr   
hist_statssampler_perfnum_faulty_episodesnum_episodesepisode_return_maxepisode_return_minepisode_return_meanepisodes_this_iterr@   )collectionsdefaultdictlistr   r   appendr   r   r   r   r5   r   r   r   r   hasattrr   valuesminmaxrA   meanfloatcopyformatr   sum)"r+   r:   r'   episode_rewardsr>   policy_rewardsr   r   rR   rM   r   rT   episoder   r   is_multi_agent_	policy_idrewardper_pipeline_metricsper_connector_metricsconnector_metric_nameval
min_reward
max_reward
avg_reward
avg_lengthrO   rQ   rP   rewardsv_listfiltmean_connector_metricss"                                     r    r*   r*      s   ( OO ,T22N ,T22N(..J(..J+D11M#/55 M M&,,.. 	$ 	$DAqqM  #### ! 	1$w5666w5666*0022 	( 	(DAq1$$Q''''%&&* > (== 	  	9*1*?*E*E*G*G 9 9&Iy)008888%++-- 	 	DAqqMMMQMMMMM'')) 	' 	'DAq!##A&&&&7/00 	M(/(A(H(H(J(J M M$-A-H-H-J-J M M)6K6Q6Q6S6S M M2-s)*?@GGLLLLMM  "))
))
W_--

5\\
5\\
5\\
 "W_--

5\\
 $3J $3J !,113399;; C C	7')vg)$(*(8(89%')vg)$ <C
%,,Y7788#((**0022 " "	6==6=== 
	" $N1*,'$--N1w;' :-/VD\\q6z*-/VD\\q6z**-25\\q6z*-25\\q6z*q!!__&&,,.. ( (	6
1!VV&,,.. 4 4	6$&GFOOq!!   %:%: 'J $	
 =))) "%_!5!5!5 ,+ ,+ .- N+++ 
### *%%% 0/ 10$ &&&%& &:'( &:)* 'J+, |,,,- r"   )Nr#   F)Nr#   )NF)!rZ   loggingtypingr   r   r   numpyrA   ray.rllib.policy.sample_batchr   ray.rllib.utils.annotationsr   $ray.rllib.utils.metrics.learner_infor   ray.rllib.utils.typingr	   r
   r   ray.rllib.env.env_runner_groupr   	getLogger__name__r6   
namedtupler   __new____defaults__r!   intboolr-   r)   r*   r@   r"   r    <module>r      s        0 0 0 0 0 0 0 0 0 0     ; ; ; ; ; ; 3 3 3 3 3 3 B B B B B B M M M M M M M M M M >======		8	$	$K
	
 
	
 
	
    ()!RRR&K  #  2B    8  .2 %	 S	*  	
    8  .2    S	*    
.	       F  *. %H H>"H~&H H 	H H H H H Hr"   