
    &`i^8                        d dl Z d dlmZmZmZ d dlmZ d dlmZm	Z	 d dl
mZmZ d dlmZ d dlmZ d dlmZmZ d d	lmZ d d
lmZ d dlmZ d dlmZ d dlmZ d dlmZm Z  d dl!m"Z" d dl#m$Z$m%Z% d dl&m'Z'm(Z( d dl)m*Z*m+Z+m,Z,m-Z-m.Z.m/Z/m0Z0m1Z1m2Z2m3Z3m4Z4m5Z5m6Z6 d dl7m8Z8m9Z9  e'            \  Z:Z;Z< e(            Z= e j>        e?          Z@ G d de          ZA G d de          ZBdS )    N)OptionalTypeUnion)Self)DEPRECATED_VALUEdeprecation_warning)AlgorithmConfigNotProvided)CQLTFPolicy)CQLTorchPolicy)SAC	SACConfig)"AddObservationsFromEpisodesToBatch)+AddNextObservationsFromEpisodesToTrainBatch)Learner)RLModuleSpec)synchronous_parallel_sample)multi_gpu_train_one_steptrain_one_step)Policy)OldAPIStackoverride)try_import_tftry_import_tfp)LAST_TARGET_UPDATE_TSLEARNER_RESULTSLEARNER_UPDATE_TIMERNUM_AGENT_STEPS_SAMPLEDNUM_AGENT_STEPS_TRAINEDNUM_ENV_STEPS_SAMPLEDNUM_ENV_STEPS_TRAINEDNUM_TARGET_UPDATESOFFLINE_SAMPLING_TIMERSAMPLE_TIMERSYNCH_WORKER_WEIGHTS_TIMERTARGET_NET_UPDATE_TIMERTIMERS)
ResultDictRLModuleSpecTypec                       e Zd ZdZd fd	Z ee          eeeeeeeddee	         dee
         dee	         dee         d	ee
         d
ee
         dee         def fd            Z ee          def fd            Z ee          deed         ef         fd            Z ee          	 d fd	            Z ee          d fd            Z ee          defd            Ze fd            Z xZS )	CQLConfiga  Defines a configuration class from which a CQL can be built.

    .. testcode::
        :skipif: True

        from ray.rllib.algorithms.cql import CQLConfig
        config = CQLConfig().training(gamma=0.9, lr=0.01)
        config = config.resources(num_gpus=0)
        config = config.env_runners(num_env_runners=4)
        print(config.to_dict())
        # Build a Algorithm object from the config and run 1 training iteration.
        algo = config.build(env="CartPole-v1")
        algo.train()
    Nc           	      b   t                                          |pt                     d| _        d| _        d| _        d| _        d| _        d| _        d| _	        d| _
        d	| _        d
| _        d
| _        ddt          d          dddddd| _        d| _        d| _        t$          | _        d S )N)
algo_classi N  g      ?
   Fg      @Tga2U0*3?g-C6?gMbP?!MultiAgentPrioritizedReplayBufferg    .Ag333333?g?gư>)_enable_replay_buffer_apitypecapacityprioritized_replayprioritized_replay_alphaprioritized_replay_betaprioritized_replay_epsworker_side_prioritizationr   d   )super__init__CQLbc_iterstemperaturenum_actions
lagrangianlagrangian_threshmin_q_weightdeterministic_backuplractor_lr	critic_lralpha_lrintreplay_buffer_config"min_sample_timesteps_per_iteration!min_train_timesteps_per_iterationr   timesteps_per_iteration)selfr-   	__class__s     p/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/rllib/algorithms/cql/cql.pyr:   zCQLConfig.__init__I   s    J$5#666
 !$$(!  *.7C"'(+'*&**/%
 %
!  34/14. (8$$$    )r<   r=   r>   r?   r@   rA   rB   r<   r=   r>   r?   r@   rA   rB   returnc                    t                      j        di | |t          ur|| _        |t          ur|| _        |t          ur|| _        |t          ur|| _        |t          ur|| _        |t          ur|| _        |t          ur|| _	        | S )a  Sets the training-related configuration.

        Args:
            bc_iters: Number of iterations with Behavior Cloning pretraining.
            temperature: CQL loss temperature.
            num_actions: Number of actions to sample for CQL loss
            lagrangian: Whether to use the Lagrangian for Alpha Prime (in CQL loss).
            lagrangian_thresh: Lagrangian threshold.
            min_q_weight: in Q weight multiplier.
            deterministic_backup: If the target in the Bellman update should have an
                entropy backup. Defaults to `True`.

        Returns:
            This updated AlgorithmConfig object.
         )
r9   trainingr
   r<   r=   r>   r?   r@   rA   rB   )
rL   r<   r=   r>   r?   r@   rA   rB   kwargsrM   s
            rN   rS   zCQLConfig.trainingu   s    : 	""6""";&&$DMk))*Dk))*D[(((DOK//%6D"{** ,D{22(<D%rO   c                      t                      j        di | d|v rOddlm} t	          |                    d          |          s&t          d|                    d           d          | S )Nprelearner_classr   )OfflinePreLearnerz`prelearner_class` z is not a subclass of `OfflinePreLearner`. Any class passed to `prelearner_class` needs to implement the interface given by `OfflinePreLearner`.rR   )r9   offline_dataray.rllib.offline.offline_datarW   
issubclassget
ValueError)rL   rT   rW   rM   s      rN   rX   zCQLConfig.offline_data   s     	&&v&&& ''HHHHHHfjj);<<>OPP  +&**5G*H*H + + +   rO   r   c                 X    | j         dk    rddlm} |S t          d| j          d          )Ntorchr   )CQLTorchLearnerThe framework z) is not supported. Use `'torch'` instead.)framework_str0ray.rllib.algorithms.cql.torch.cql_torch_learnerr_   r\   )rL   r_   s     rN   get_default_learner_classz#CQLConfig.get_default_learner_class   sS    ((XXXXXX"")!3 ) ) )  rO   c                     t                                          |||          }|                    t          t	                                 |S )N)input_observation_spaceinput_action_spacedevice)r9   build_learner_connectorinsert_afterr   r   )rL   re   rf   rg   pipelinerM   s        rN   rh   z!CQLConfig.build_learner_connector   sY     7722$;1 3 
 
 	.799	
 	
 	

 rO   c                    | j         t          k    rt          ddd           t                                                       | j        dur| j        dk    rd| _        | j        dv rHt          At          	                    dt          rt          j        nd  d           t          d	           | j        d
k    r%| j        s | j        r|                     d           d S d S d S d S )NrK   rJ   T)oldnewerrorr^   )tftf2zYou need `tensorflow_probability` in order to run CQL! Install it via `pip install tensorflow_probability`. Your tf.__version__=z5.Trying to import tfp results in the following error:)rn   r   zWhen using a single local learner the number of iterations per learner, `dataset_num_iters_per_learner` has to be defined. Set this hyperparameter in the `AlgorithmConfig.offline_data`.)rK   r   r   r9   validatesimple_optimizerra   tfploggerwarningro   __version__r   num_learnersdataset_num_iters_per_learnerenable_rl_module_and_learner_value_errorrL   rM   s    rN   rq   zCQLConfig.validate   s6    '+;;;-7    	
  ,,1Cw1N1N$(D!..3;NNG46"@"..DG G G   &&&& ""6 #1 # Q    	 #"""""rO   c                 t    | j         dk    rddlm} t          |          S t	          d| j          d          )Nr^   r   )DefaultCQLTorchRLModule)module_classr`   z is not supported. Use `torch`.)ra   :ray.rllib.algorithms.cql.torch.default_cql_torch_rl_moduler}   r   r\   )rL   r}   s     rN   get_default_rl_module_specz$CQLConfig.get_default_rl_module_spec  se    ((       -DEEEET!3TTT  rO   c                 >    t                      j        d| j        iz  S )Nr>   )r9   _model_config_auto_includesr>   r{   s    rN   r   z%CQLConfig._model_config_auto_includes  s$    ww24+6
 
 	
rO   NrP   N)__name__
__module____qualname____doc__r:   r   r   r
   r   rG   floatboolr   rS   r	   rX   r   r   strrc   rh   rq   r)   r   propertyr   __classcell__)rM   s   @rN   r+   r+   9   sO        *8 *8 *8 *8 *8 *8X Xi #.'2%0%0-8(3/:- - - 3-- e_	-
 c]- TN- $E?- uo- 'tn- 
- - - - - -^ Xo      & Xi	5i#1E+F 	 	 	 	 Xo
 	     * Xi' ' ' ' ' 'R Xi
,< 
 
 
 
 
 
 
 
 X
 
 
 
 
rO   r+   c                       e Zd ZdZe ee          defd                        Ze ee          de	de
ee                  fd                        Z ee          d	d            Zedefd            ZdS )
r;   zCQL (derived from SAC).rP   c                     t                      S r   )r+   )clss    rN   get_default_configzCQL.get_default_config  s     {{rO   configc                 6    |d         dk    rt           S t          S )N	frameworkr^   )r   r   )r   r   s     rN   get_default_policy_classzCQL.get_default_policy_class!  s     
 +'))!!rO   Nc                    | j         j        s|                                 S | j                            t
          t          f          5  | j         j        dk    p| j         j        dk    x}}| j	        
                    | j         j        | j         j        |          }d d d            n# 1 swxY w Y   | j                            t
          t          f          5  | j                            || j         j        | j         j                  }| j                            |t                      d d d            d S # 1 swxY w Y   d S )Nr      )num_samples
num_shardsreturn_iterator)data_iteratorsminibatch_size	num_iters)key)r   "enable_env_runner_and_connector_v2_training_step_old_api_stackmetricslog_timer'   r#   rw   rx   rX   sampletrain_batch_size_per_learnerr   learner_groupupdate	aggregater   )rL   r   batch_or_iteratorlearner_resultss       rN   training_stepzCQL.training_step+  s    {= 	744666 \""F,B#CDD 	 	 (1, B;<AOo !% 1 8 8 KD;3 !0 !9 ! !	 	 	 	 	 	 	 	 	 	 	 	 	 	 	$ \""F,@#ABB 	I 	I"0770#{G+C 8  O L""?"HHH	I 	I 	I 	I 	I 	I 	I 	I 	I 	I 	I 	I 	I 	I 	I 	I 	I 	Is&   AB''B+.B+AD88D<?D<c                    | j         t                   5  t          | j                  }d d d            n# 1 swxY w Y   |                                }| j        t          xx         |                                z  cc<   | j        t          xx         |	                                z  cc<   | j
                            d          pd } ||| j        | j
                  }| j
                            d          du rt          | |          }nt          | |          }| j        | j
        j        dk    rt          nt                    }| j        t"                   }||z
  | j
        j        k    r| j         t&                   5  | j                                        | j                            fd           d d d            n# 1 swxY w Y   | j        t.          xx         dz  cc<   || j        t"          <   | j                                        d	k    rd| j         t2                   5  | j                            t7          |                                          
           d d d            n# 1 swxY w Y   |S )N)
worker_setbefore_learn_on_batchc                     | S r   rR   )bas     rN   <lambda>z2CQL._training_step_old_api_stack.<locals>.<lambda>X  s    Q rO   rr   Tagent_stepsc                 4    |v o|                                  S r   )update_target)ppid	to_updates     rN   r   z2CQL._training_step_old_api_stack.<locals>.<lambda>n  s    3)#3#I8I8I rO   r   r   )policies)_timersr$   r   env_runner_groupas_multi_agent	_countersr   r   r    	env_stepsr   r[   r   r   count_steps_byr   r!   r   target_network_update_freqr&   
env_runnerget_policies_to_trainforeach_policy_to_trainr"   num_remote_workersr%   sync_weightslistkeys)rL   train_batchpost_fntrain_resultscur_tslast_updater   s         @rN   r   z CQL._training_step_old_api_stackN  s@    \,' 	X 	X5AVWWWK	X 	X 	X 	X 	X 	X 	X 	X 	X 	X 	X 	X 	X 	X 	X!0022.///;3J3J3L3LL///,---1F1F1H1HH--- +//"9::Ogk4+@$+NN
 ;??-..$66*4==MM4T;GGM {)]:: $#&

 n%:;K4;#III56   OAACC	77IIII                
 N-...!3...4:DN01  33559989 X X%22DASASAUAU<V<V2WWWX X X X X X X X X X X X X X X s0   6::>7GGG$;I++I/2I/r   )r   r   r   r   classmethodr   r   r+   r   r	   r   r   r   r   r   r   r(   r   rR   rO   rN   r;   r;     s        !!Xc]]9    ] [ Xc]]$	$v,	   ] [ Xc]] I  I  I ] ID +j + + + [+ + +rO   r;   )Cloggingtypingr   r   r   typing_extensionsr   ray._common.deprecationr   r   %ray.rllib.algorithms.algorithm_configr	   r
   &ray.rllib.algorithms.cql.cql_tf_policyr   )ray.rllib.algorithms.cql.cql_torch_policyr   ray.rllib.algorithms.sac.sacr   r   Cray.rllib.connectors.common.add_observations_from_episodes_to_batchr   Oray.rllib.connectors.learner.add_next_observations_from_episodes_to_train_batchr   ray.rllib.core.learner.learnerr   "ray.rllib.core.rl_module.rl_moduler   ray.rllib.execution.rollout_opsr   ray.rllib.execution.train_opsr   r   ray.rllib.policy.policyr   ray.rllib.utils.annotationsr   r   ray.rllib.utils.frameworkr   r   ray.rllib.utils.metricsr   r   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   ray.rllib.utils.typingr(   r)   tf1ro   tfvrs   	getLoggerr   rt   r+   r;   rR   rO   rN   <module>r      s    ( ( ( ( ( ( ( ( ( ( " " " " " "        O N N N N N N N > > > > > > D D D D D D                  3 2 2 2 2 2 ; ; ; ; ; ;             + * * * * * = = = = = = = = C C C C C C C C                              @ ? ? ? ? ? ? ?}Rn		8	$	$]
 ]
 ]
 ]
 ]
	 ]
 ]
 ]
@a a a a a# a a a a arO   