
    &`ivF                        d dl Z d dlmZmZmZ d dlZd dlmZm	Z	 d dl
mZmZmZ d dlmZ d dlmZ d dlmZmZ d dlmZ d d	lmZmZmZmZmZ d d
lmZmZm Z m!Z!m"Z" d dl#m$Z$ d dl%m&Z& d dl'Z' e j(        e)          Z*e&dedeee"f         ddfd            Z+ededede!de ddf
d            Z,e&dede-de.dee!         fd            Z/e&de0ddfd            Z1e&de!de-ddfd            Z2dede!ddfdZ3dS )    N)AnyDictOptional)DEPRECATED_VALUEdeprecation_warning)MultiAgentBatchSampleBatchconcat_samples)OldAPIStack)from_config)ALL_MODULESTD_ERROR_KEY)LEARNER_STATS_KEY)EpisodeReplayBuffer!MultiAgentPrioritizedReplayBufferMultiAgentReplayBufferPrioritizedEpisodeReplayBufferReplayBuffer)AlgorithmConfigDictModuleID
ResultDictSampleBatchType
TensorType)log_once)DeveloperAPIreplay_buffer	td_errorsreturnc                 p   t          | t                    r|                                D ]\  }}|dt          fv rt          |vs|t                   At          d                    |                    rt                              d| d           h| 	                    |t                   |           d S d S )N__all__z+no_td_error_in_train_results_from_module_{}z0Trying to update priorities for module with ID `z` in prioritized episode replay buffer without providing `td_errors` in train_results. Priority update for this policy is being skipped.)

isinstancer   itemsr   r   r   formatloggerwarningupdate_priorities)r   r   	module_idtd_errors       x/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/rllib/utils/replay_buffers/utils.py*update_priorities_in_episode_replay_bufferr*   "   s     -!?@@ O $-??#4#4 	O 	OIxY444 8++x/E/MAHHSS   NN8%8 8 8    ++H\,BINNNN5O O	O 	O    configtrain_batchtrain_resultsc                 4   t          | t                    ri }|                                D ]R\  }}|                    d|t                                       d                    }|j        |         }|                    d           |                    d          }	t          j        |v rg }
|j	        rt          |          |j        gz  }n'|t          j                 dt          |                   }d}|D ]"}|
                    |	|                    ||z  }#t          j        |
          }	|Qt          d                    |                    r-t"                              d                    |                     V|	Qt          d                    |                    r-t"                              d                    |                     t          |	          t          |          k    r| j        }t          |	          t          |          k    rt          |	          |z  dk    sJ |	                    d	|g          dddf         }	t          |	          t          |          k    sJ |	|f||<   T|                     |           dS dS )
a  Updates the priorities in a prioritized replay buffer, given training results.

    The `abs(TD-error)` from the loss (inside `train_results`) is used as new
    priorities for the row-indices that were sampled for the train batch.

    Don't do anything if the given buffer does not support prioritized replay.

    Args:
        replay_buffer: The replay buffer, whose priority values to update. This may also
            be a buffer that does not support priorities.
        config: The Algorithm's config dict.
        train_batch: The batch used for the training update.
        train_results: A train results dict, generated by e.g. the `train_one_step()`
            utility.
    r(   Nbatch_indexesr   z+no_td_error_in_train_results_from_policy_{}zTrying to update priorities for policy with id `{}` in prioritized replay buffer without providing td_errors in train_results. Priority update for this policy is being skipped.z.no_batch_indices_in_train_result_for_policy_{}zTrying to update priorities for policy with id `{}` in prioritized replay buffer without providing batch_indices in train_batch. Priority update for this policy is being skipped.)r!   r   r"   getr   policy_batchesset_get_interceptorr	   SEQ_LENSzero_paddedlenmax_seq_lenappendnparrayr   r#   r$   r%   replay_sequence_lengthreshaper&   )r   r,   r-   r.   	prio_dict	policy_idinfor(   policy_batchbatch_indices_batch_indicesseq_lenssequence_sumseq_lenTs                  r)   "update_priorities_in_replay_bufferrH   F   s   . -!BCC J3	,2244 C	= C	=OIt
 xx
D1B,C,G,G
,S,STTH&5i@L ,,T222(,,_==M#|33 "$
  + S"8}}0H/IIHH+K,@A/CMM/RH  !' , ,G"))-*EFFF G+LL " 8 8AHHSS   NN# $*6)#4#4	   $DKKIVV   NN# $*6)#4#4	    =!!S]]22!8&&X663};M;MPQ;QUV;V;V;VV - 5 5r1g > >qqq!t D=))S]]::::$18#<Ii   	''	22222UJ3 J3r+   	min_stepscount_by_agent_stepsc                    d}g }||k     rh|                      d          }|r|                                n|                                }|dk    r|S |                    |           ||z  }||k     ht	          |          }|S )a  Samples a minimum of n timesteps from a given replay buffer.

    This utility method is primarily used by the QMIX algorithm and helps with
    sampling a given number of time steps which has stored samples in units
    of sequences or complete episodes. Samples n batches from replay buffer
    until the total number of timesteps reaches `train_batch_size`.

    Args:
        replay_buffer: The replay buffer to sample from
        num_timesteps: The number of timesteps to sample
        count_by_agent_steps: Whether to count agent steps or env steps

    Returns:
        A concatenated SampleBatch or MultiAgentBatch with samples from the
        buffer.
    r      )	num_items)sampleagent_steps	env_stepsr9   r
   )r   rI   rJ   train_batch_sizetrain_batchesbatch	batch_lenr-   s           r)   sample_min_n_steps_from_bufferrU      s    ( M
Y
&
&$$q$11+?VE%%'''U__EVEV	>>LU###I% Y
&
& !//Kr+   c                    |                      dd          i | d<   |                      dt                    t          k    rt          ddd           |                      dt                    }|t          k    rt          d	d
d           |                      dt                    }|t          k    r!| d                              dt                    }|t          k    rt          ddd           |                      dt                    }|t          k    r|| d         d<   t          dd           |                      dt                    }|t          k    r!| d                              dt                    }|t          k    rt          ddd           g d}|D ]}|                      |t                    t          k    r^t          d                    |          d                    |          d           |                      d          | |         | d         |<   |                      d|                      di                                dt                              }|t          k    rt          ddd           || d<   |                      dd          }||| d         d<   t          d d!d           | d         }	d"|	v s
J d#            | d         d"         }
t	          |
t
                    r'|
                    d$          d%k    rd&|
z   | d         d"<   t          |
| d                   }t          |          | d         d"<   t          |d'          r`| d                              d(d)          d*k    rt          d+          | d                              dd,          d-k    rt          d.          dS | d                              d          rt          d/          dS )0a  Checks and fixes values in the replay buffer config.

    Checks the replay buffer config for common misconfigurations, warns or raises
    error in case validation fails. The type "key" is changed into the inferred
    replay buffer class.

    Args:
        config: The replay buffer config to be validated.

    Raises:
        ValueError: When detecting severe misconfiguration.
    replay_buffer_configNworker_side_prioritizationz$config['worker_side_prioritization']z<config['replay_buffer_config']['worker_side_prioritization']T)oldnewerrorprioritized_replayzTconfig['prioritized_replay'] or config['replay_buffer_config']['prioritized_replay']zReplay prioritization specified by config key. RLlib's new replay buffer API requires setting `config['replay_buffer_config']['type']`, e.g. `config['replay_buffer_config']['type'] = 'MultiAgentPrioritizedReplayBuffer'` to change the default behaviour.)rY   helpr[   buffer_sizezFconfig['buffer_size'] or config['replay_buffer_config']['buffer_size']z*config['replay_buffer_config']['capacity']burn_inreplay_burn_inzconfig['burn_in']z0config['replay_buffer_config']['replay_burn_in'])rY   r]   replay_batch_sizezRconfig['replay_batch_size'] or config['replay_buffer_config']['replay_batch_size']zSpecification of replay_batch_size is not supported anymore but is derived from `train_batch_size`. Specify the number of items you want to replay upon calling the sample() method of replay buffers if this does not work for you.)prioritized_replay_alphaprioritized_replay_betaprioritized_replay_epsno_local_replay_bufferreplay_zero_init_states*replay_buffer_shards_colocated_with_driverzconfig['{}']z$config['replay_buffer_config']['{}']Flearning_startszMconfig['learning_starts'] orconfig['replay_buffer_config']['learning_starts']z2config['num_steps_sampled_before_learning_starts'](num_steps_sampled_before_learning_startsr<   z config['replay_sequence_length']z~Replay sequence length specified at new location config['replay_buffer_config']['replay_sequence_length'] will be overwritten.typez@Can not instantiate ReplayBuffer from config without 'type' key..r1   zray.rllib.utils.replay_buffers.r&   replay_modeindependentlockstepz>Prioritized replay is not supported when replay_mode=lockstep.r   rL   zDPrioritized replay is not supported when replay_sequence_length > 1.zJWorker side prioritization is not supported when prioritized_replay=False.)r2   r   r   r#   r!   strfindr   rj   hasattr
ValueError)r,   r\   capacityr`   ra   keys_with_deprecated_positionskrh   r<   rW   buffer_typedummy_buffers               r)   validate_buffer_configrx      s    zz($//7)+%&zz.0@AAEUUU6N	
 	
 	
 	
  $8:JKK---$ 
	
 
	
 
	
 
	
 zz-)9::H###0155mEUVV###<		
 	
 	
 	
 ZZ	+;<<N)));I%&'78#C	
 	
 	
 	

 

#68HII,,,"#9:>>!1
 
 ,,,#5 	
 	
 	
 	
& & &" , 
> 
>::a)**.>>>"))!,,>EEaHH    zz011=4:1I-.q1jj

)2..223DFVWW O ***@E		
 	
 	
 	
 >M9:
 $ZZ(@$GG) # 	%&$	
 	2= 	
 	
 	
 	
 ""89&&&&I 	'&& /08K+s## 
(8(8(=(=(C(C .; 	%&v. {F3I,JKKL-1,-?-?F!"6*|011 )*..}mLL  P   *+//0H!LLqPP.   QP ()--.JKK 	,  	 	r+   itemrs   c                 ~   t          d          r|                                 }t          j                    }|j        dz  }||z  dz  }d                    ||| j        ||          }||k    rt          |          |d|z  k    rt          	                    |           dS t          
                    |           dS dS )a@  Warn if the configured replay buffer capacity is too large for machine's memory.

    Args:
        item: A (example) item that's supposed to be added to the buffer.
            This is used to compute the overall memory footprint estimate for the
            buffer.
        capacity: The capacity value of the buffer. This is interpreted as the
            number of items (such as given `item`) that will eventually be stored in
            the buffer.

    Raises:
        ValueError: If computed memory footprint for the buffer exceeds the machine's
            RAM.
    warn_replay_buffer_capacityg    eAz~Estimated max memory usage for replay buffer is {} GB ({} batches of size {}, {} bytes each), available system memory is {} GBg?N)r   
size_bytespsutilvirtual_memorytotalr#   countrr   r$   r%   r@   )ry   rs   	item_size
psutil_memtotal_gbmem_sizemsgs          r)   r{   r{   s  s      -.. OO%%	*,,
#c)i'#-//5v(DJ	80 0 	 hS//!h&&NN3KK# r+   bufferfake_sample_outputc                     t          | t                    r6t          t                    s!t                                                    ddt
          dt          t                   ffd}|| _        dS )a  Patch a ReplayBuffer such that we always sample fake_sample_output.

    Transforms fake_sample_output into a MultiAgentBatch if it is not a
    MultiAgentBatch and the buffer is a MultiAgentBuffer. This is useful for testing
    purposes if we need deterministic sampling.

    Args:
        buffer: The buffer to be patched
        fake_sample_output: The output to be sampled

    N_r   c                     S )aP  Always returns a predefined batch.

        Args:
            _: dummy arg to match signature of sample() method
            __: dummy arg to match signature of sample() method
            ``**kwargs``: dummy args to match signature of sample() method

        Returns:
            Predefined MultiAgentBatch fake_sample_output
         )r   kwargsr   s     r)   fake_samplez;patch_buffer_with_fake_sampling_method.<locals>.fake_sample  s     "!r+   )N)	r!   r   r   r	   as_multi_agentr   r   r   rN   )r   r   r   s    ` r)   &patch_buffer_with_fake_sampling_methodr     s     &011 N*O; ; N ));<<KKMM" "s "0I " " " " " "  FMMMr+   )4loggingtypingr   r   r   numpyr:   ray._common.deprecationr   r   ray.rllib.policy.sample_batchr   r	   r
   ray.rllib.utils.annotationsr   ray.rllib.utils.from_configr   ray.rllib.utils.metricsr   r   $ray.rllib.utils.metrics.learner_infor   ray.rllib.utils.replay_buffersr   r   r   r   r   ray.rllib.utils.typingr   r   r   r   r   ray.utilr   ray.util.annotationsr   r}   	getLogger__name__r$   r*   rH   intboolrU   dictrx   r{   r   r   r+   r)   <module>r      s    & & & & & & & & & &     I I I I I I I I V V V V V V V V V V 3 3 3 3 3 3 3 3 3 3 3 3 = = = = = = = = B B B B B B                                 - - - - - - 		8	$	$  O& O Hj() O 
	 O  O  O  OF `3`3`3 !`3 	`3
 
`3 `3 `3 `3F ,/GKo   D b4 bD b b b bJ    C  D        F! ! .=! 	!  !  !  !  !  ! r+   