
    &`i(                         d dl Z d dlZd dlmZ d dlmZmZ d dlZd dlm	Z	 d dl
mZ d dlmZmZ d dlmZ d dlmZ d d	lmZ  e            \  ZZZ e j        e          Ze G d
 de                      ZdS )    N)deque)DictSet)deprecation_warning)Policy)OldAPIStackoverride)try_import_tf)	with_lock)PolicyIDc                       e Zd ZdZdddddddddedef fdZe ee	          d	e
fd
                        Ze ee	          de
defd                        Ze ee	          de
fd                        Z ee	          d             Z ee	          d             Z ee	          d             Z ee	          d             Ze ee	          d                         Ze ee	          de
fd                        Ze ee	          defd                        Ze ee	          d	e
fd                        Z ee	          defd            ZdefdZedefd            Z xZS )	PolicyMapa  Maps policy IDs to Policy objects.

    Thereby, keeps n policies in memory and - when capacity is reached -
    writes the least recently used to disk. This allows adding 100s of
    policies to a Algorithm for league-based setups w/o running out of memory.
    d   FN)capacitypolicy_states_are_swappableworker_indexnum_workerspolicy_configsession_creatorseedr   r   c                   |t          dd           t                                                       || _        t	          d |||||fD                       rt          ddd	           || _        i | _        t                      | _        t                      | _
        i | _        t          j                    | _        dS )
aw  Initializes a PolicyMap instance.

        Args:
            capacity: The size of the Policy object cache. This is the maximum number
                of policies that are held in RAM memory. When reaching this capacity,
                the least recently used Policy's state will be stored in the Ray object
                store and recovered from there when being accessed again.
            policy_states_are_swappable: Whether all Policy objects in this map can be
                "swapped out" via a simple `state = A.get_state(); B.set_state(state)`,
                where `A` and `B` are policy instances in this map. You should set
                this to True for significantly speeding up the PolicyMap's cache lookup
                times, iff your policies all share the same neural network
                architecture and optimizer types. If True, the PolicyMap will not
                have to garbage collect old, least recently used policies, but instead
                keep them in memory and simply override their state with the state of
                the most recently accessed one.
                For example, in a league-based training setup, you might have 100s of
                the same policies in your map (playing against each other in various
                combinations), but all of them share the same state structure
                (are "swappable").
        NzPolicyMap(policy_config=..)T)olderrorc              3      K   | ]}|d uV  	d S N ).0is     o/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/rllib/policy/policy_map.py	<genexpr>z%PolicyMap.__init__.<locals>.<genexpr>F   s:       
 
 TM
 
 
 
 
 
    zPolicyMap([deprecated args]...)z8PolicyMap(capacity=..., policy_states_are_swappable=...)F)r   newr   )r   super__init__r   anyr   cacheset_valid_keysr   _deque_policy_state_refs	threadingRLock_lock)	selfr   r   r   r   r   r   r   	__class__s	           r   r$   zPolicyMap.__init__   s    B $1   
 	  
 
#\;QUV
 
 
 
 
 	  5N    ,G( )+
 &)UUgg #%
 _&&


r!   itemc                    || j         vrt          d| d| j          d          || j        v rA| j                            |           | j                            |           | j        |         S || j        vrt          d| d          t          j	        | j        |                   }d }t          | j                  | j        k    r|                                 }|:| j        r3t                              d|            |                    |           n1t                              d|            t#          j        |          }|| j        |<   | j                            |           |S )Nz
PolicyID 'z7' not found in this PolicyMap! IDs stored in this map: .z	PolicyID z. not found in internal Ray object store cache!zrestoring policy: zcreating new policy: )r(   KeyErrorr&   r)   removeappendr*   AssertionErrorraygetlenr   _stash_least_used_policyr   loggerdebug	set_stater   
from_state)r.   r0   policy_statepolicys       r   __getitem__zPolicyMap.__getitem__b   s    t'''?T ? ?+/+;? ? ?   4:Kt$$$Kt$$$:d## t... PDPPP   wt6t<== t{t},,2244F
 $"BLL4d44555\****LL777888&|44F!
44   r!   keyvaluec                 ,   || j         v r| j                            |           n1t          | j                  | j        k    r|                                  | j                            |           || j         |<   | j                            |           d S r   )	r&   r)   r4   r9   r   r:   r5   r(   add)r.   rB   rC   s      r   __setitem__zPolicyMap.__setitem__   s     $*Ks#### 4;4=00--/// 	3  
3S!!!!!r!   c                    | j                             |           || j        v r| j                            |           || j        v r*| j        |         }|                     |           | j        |= || j        v r
| j        |= d S d S r   )r(   r4   r)   r&   _close_sessionr*   )r.   rB   r@   s      r   __delitem__zPolicyMap.__delitem__   s     	$$$$+Ks###$*Z_F'''
3 $)))',,, *)r!   c                 D    t          |                                           S r   )iterkeysr.   s    r   __iter__zPolicyMap.__iter__   s    DIIKK   r!   c                 "      fd} |            S )z2Iterates over all policies, even the stashed ones.c               3   8   K   j         D ]} | |          fV  d S r   r(   )rB   r.   s    r   genzPolicyMap.items.<locals>.gen   s;      ' ' 'DI&&&&&' 'r!   r   )r.   rR   s   ` r   itemszPolicyMap.items   s)    	' 	' 	' 	' 	' suur!   c                     | j                                          t          | j                  | j                                          fd} |            S )z.Returns all valid keys, even the stashed ones.c               3      K   D ]} | V  d S r   r   )rB   kss    r   rR   zPolicyMap.keys.<locals>.gen   s+        				 r!   )r-   acquirelistr(   release)r.   rR   rV   s     @r   rL   zPolicyMap.keys   sc     	
$"##
	 	 	 	 	 suur!   c                       j                                           fd j        D              j                                          fd} |            S )z0Returns all valid values, even the stashed ones.c                      g | ]
}|         S r   r   )r   kr.   s     r   
<listcomp>z$PolicyMap.values.<locals>.<listcomp>   s    000!d1g000r!   c               3      K   D ]} | V  d S r   r   )rC   vss    r   rR   zPolicyMap.values.<locals>.gen   s+         r!   )r-   rW   r(   rY   )r.   rR   r_   s   ` @r   valueszPolicyMap.values   sp     	
0000t/000
	 	 	 	 	 suur!   c                     |                                 D ]
\  }}|| |<   |                                 D ]
\  }}|| |<   dS )z2Updates the map with the given dict and/or kwargs.N)rS   )r.   _PolicyMap__mkwargsr\   vs        r   updatezPolicyMap.update   sZ     IIKK 	 	DAqDGGLLNN 	 	DAqDGG	 	r!   c                 (    || j         vrdS | |         S )z9Returns the value for the given key or None if not found.NrQ   )r.   rB   s     r   r8   zPolicyMap.get   s!     d&&&4Cyr!   returnc                 *    t          | j                  S )zCReturns number of all policies, including the stashed-to-disk ones.)r9   r(   rM   s    r   __len__zPolicyMap.__len__   s     4#$$$r!   c                     || j         v S r   rQ   )r.   r0   s     r   __contains__zPolicyMap.__contains__   s     t'''r!   c                 \    d| j          dt          |                                            dS )Nz <PolicyMap lru-caching-capacity=z policy-IDs=>)r   rX   rL   rM   s    r   __str__zPolicyMap.__str__   s<    
$t} $ $DIIKK  $ $ $	
r!   c                    | j                                         }|| j        v sJ | j        |         }|                                }| j        s|                     |           | j        |= t          j        |          | j        |<   |S )zWrites the least-recently used policy's state to the Ray object store.

        Also closes the session - if applicable - of the stashed policy.

        Returns:
            The least-recently used policy, that just got removed from the cache.
        )	r)   popleftr&   	get_stater   rH   r7   putr*   )r.   dropped_policy_idr@   r?   s       r   r:   z"PolicyMap._stash_least_used_policy  s     !K//11 DJ....-.'')) / 	(''' J() 69W\5J5J 12 r!   r@   c                 ^    |                                  }||                                 d S d S r   )get_sessionclose)r@   sesss     r   rH   zPolicyMap._close_session!  s2    !!##JJLLLLL r!   )__name__
__module____qualname____doc__intboolr$   r   r	   dictr   rA   r   rF   rI   rN   rS   rL   r`   re   r8   ri   rk   strrn   r:   staticmethodrH   __classcell__)r/   s   @r   r   r      s         ,1E' E' E' E' &*	E' E' E' E' E' E'N Xd^^+ + + + ^ Y+Z Xd^^"x " " " " ^ Y"& Xd^^-x - - - ^ Y-  Xd^^! ! ^! Xd^^  ^ Xd^^
 
 ^
 Xd^^
 
 ^
 Xd^^  ^ Y Xd^^x    ^ Y Xd^^% % % % ^ Y% Xd^^( ( ( ( ^ Y( Xd^^
 
 
 
 ^
&    8 v    \    r!   r   )loggingr+   collectionsr   typingr   r   r7   ray._common.deprecationr   ray.rllib.policy.policyr   ray.rllib.utils.annotationsr   r	   ray.rllib.utils.frameworkr
   ray.rllib.utils.threadingr   ray.rllib.utils.typingr   tf1tftfv	getLoggerrx   r;   r~   r   r   r!   r   <module>r      s,                      



 7 7 7 7 7 7 * * * * * * = = = = = = = = 3 3 3 3 3 3 / / / / / / + + + + + +}R		8	$	$ S S S S S S S S S Sr!   