
    &`iY                         d dl mZ d dlmZ d dlZd dlZd dlmZ d dl	m
Z
 d dlmZ dZdZe
 G d	 d
                      Ze
d             ZdS )    )defaultdict)DictN)DEFAULT_POLICY_ID)OldAPIStack)PolicyIDlearnerlearner_statsc                   J    e Zd ZddefdZefdededdfdZd	eddfd
Z	d Z
dS )LearnerInfoBuilder   num_devicesc                 T    || _         t          t                    | _        d| _        d S )NF)r   r   listresults_all_towersis_finalized)selfr   s     x/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/rllib/utils/metrics/learner_info.py__init__zLearnerInfoBuilder.__init__   s(    &"-d"3"3!    results	policy_idreturnNc           	         | j         r
J d            dvr"| j        |                                        dS | j        |                             t          j        d gfdt          | j                  D             R                                             D ]h\  }}|t          k    rB|                                         D ]&\  }}|| j        |         d         t                   |<   'R|| j        |         d         |<   idS )a=  Adds a policy.learn_on_(loaded)?_batch() result to this builder.

        Args:
            results: The results returned by Policy.learn_on_batch or
                Policy.learn_on_loaded_batch.
            policy_id: The policy's ID, whose learn_on_(loaded)_batch method
                returned `results`.
        z7LearnerInfo already finalized! Cannot add more results.tower_0c                     t          | g|R  S )N)_all_tower_reduce)pss     r   <lambda>z?LearnerInfoBuilder.add_learn_on_batch_results.<locals>.<lambda>1   s    "3A":":":": r   c              3   h   K   | ],}                     d                     |                    V  -dS )ztower_{}N)popformat).0	tower_numr   s     r   	<genexpr>z@LearnerInfoBuilder.add_learn_on_batch_results.<locals>.<genexpr>2   sQ        %  J$5$5i$@$@AA     r   N)	r   r   appendtreemap_structure_with_pathranger   itemsLEARNER_STATS_KEY)r   r   r   kvk1v1s    `     r   add_learn_on_batch_resultsz-LearnerInfoBuilder.add_learn_on_batch_results   sr    !	E 	ED	E 	E! G###I.55g>>>>> #I.55,::   ).t/?)@)@         B B1)))")!*"2"2"4"4  B  /	:2>?PQ 
 ABD+I6r:1==B Br   all_policies_resultsc                 t    |                                 D ]"\  }}|dk    r|                     ||           #dS )a$  Adds multiple policy.learn_on_(loaded)?_batch() results to this builder.

        Args:
            all_policies_results: The results returned by all Policy.learn_on_batch or
                Policy.learn_on_loaded_batch wrapped as a dict mapping policy ID to
                results.
        batch_count)r   N)r+   r1   )r   r2   pidresults       r   &add_learn_on_batch_results_multi_agentz9LearnerInfoBuilder.add_learn_on_batch_results_multi_agentA   sV     05577 	G 	GKCm##//#/FFF	G 	Gr   c                     d| _         i }| j                                        D ]\  }}t          j        t
          g|R  ||<    |S )NT)r   r   r+   r(   r)   r   )r   infor   r   s       r   finalizezLearnerInfoBuilder.finalizeP   sd     -1-D-J-J-L-L 	 	)I) #:!$6  DOO r   )r   )__name__
__module____qualname__intr   r   r   r   r1   r7   r:    r   r   r   r      s        " "C " " " " 0&B &B&B &B 
	&B &B &B &BPG"G 
G G G G    r   r   c                    t          |           dk    r"| d         dk    rt          j        |d          S |d         dS t          | d         t                    r^| d                             d          rt          j        |          S | d                             d          rt          j        |          S t          j        |          	                                rt          j
        S t          j        |          S )	z<Reduces stats across towers based on their stats-dict paths.r   r   td_error)axisNr&   min_max_)lennpconcatenate
isinstancestr
startswithnanminnanmaxisnanallnannanmean)path
tower_datas     r   r   r   ^   s    
 4yyA~~$q'Z//~jq1111	A	t$r(C   ) 8v&& 	)9Z((("X  (( 	)9Z(((	x
!! v:j!!!r   )collectionsr   typingr   numpyrF   r(   ray.rllib.policy.sample_batchr   ray.rllib.utils.annotationsr   ray.rllib.utils.typingr   LEARNER_INFOr,   r   r   r?   r   r   <module>rZ      s    # # # # # #            ; ; ; ; ; ; 3 3 3 3 3 3 + + + + + +  $  H H H H H H H HV " " " " "r   