
    &`i                        d Z ddlZddlZddlZddlmZmZmZmZm	Z	m
Z
 ddlZddlZddlmZ ddlmZ ddlmZ ddlmZmZmZmZ ddlmZmZ dd	lmZ dd
lmZ ddl m!Z! ddl"m#Z#m$Z$m%Z%m&Z&m'Z' ddl(m)Z) ddl*m+Z+ ddl,m-Z-m.Z.m/Z/ ddl0m1Z1 ddl2m3Z3 ddl4m5Z5 ddl6m7Z7 ddl8m9Z9 ddl:m;Z;m<Z<m=Z=m>Z> ddl?m@Z@  e+            \  ZAZBZC ejD        eE          ZFe# G d de                      ZGdS )zsEager mode TF policy built using build_tf_policy().

It supports both traced and non-traced eager execution modes.
    N)DictListOptionalTupleTypeUnion)ModelCatalog)ModelV2)TFActionDistribution)_convert_to_tf_disallow_var_creation_OptimizerWrapper_traced_eager_policy)PolicyPolicyState)#pad_batch_to_sequences_of_same_size)SampleBatch)
force_list)OldAPIStackOverrideToImplementCustomLogic5OverrideToImplementCustomLogic_CallToSuperRecommendedis_overriddenoverride))ERR_MSG_TF_POLICY_CANNOT_SAVE_KERAS_MODEL)try_import_tf)'DIFF_NUM_GRAD_UPDATES_VS_SAMPLER_POLICYNUM_AGENT_STEPS_TRAINEDNUM_GRAD_UPDATES_LIFETIME)LEARNER_STATS_KEY)convert_to_numpy)normalize_action)get_gpu_devices)	with_lock)AlgorithmConfigDictLocalOptimizerModelGradients
TensorType)log_oncec                   \    e Zd ZdZdej        j        dej        j        defdZe	d             Z
edej        j        dej        j        defd            Ze ee          d	eed
f         dee         dedeeee         f         fd                        Zededeeef         fd            Zedededeeef         fd            Zedefd            Zededededefd            Zedddeddfd            Zed	ededede eeeee         f         fd            Z!ed	ededede ee"ee         f         fd            Z#ede$fd            Z%e&deeef         fd            Z'e&deeef         fd             Z( ee          e&	 	 dPd"ed#e)e         fd$                        Z*ededed         f         fd%            Z+d& Z,d' Z-d( Z. ee          	 	 	 dQd)eeef         d*e/d+e)e$         de eee         eeef         f         fd,            Z0 ee          	 	 	 	 	 	 	 dRd-            Z1e2 ee          	 	 	 	 	 dSd/eee         ef         deee         ef         de)ee                  d0e)eee         ef                  d1e)eee         ef                  d2e/d3e/defd4                        Z3e2 ee          d5                         Z4 ee          d6ede eeeef         f         fd7            Z5 ee          d8edd!fd9            Z6 ee          dTd;            Z7 ee          d<             Z8 ee          d=             Z9 ee          d>             Z: ee          d?             Z; ee          d@             Z< ee          e&de=f fdA                        Z> ee          e&dBe=dd!f fdC                        Z? ee          dUdDe)e$         dd!fdE            Z@dF ZAdG ZBe2	 dUdH            ZCdUdIZDdJ ZEe2dK             ZFdL ZGdM ZHd6efdNZIeJdO             ZK xZLS )VEagerTFPolicyV2zsA TF-eager / TF2 based tensorflow policy.

    This class is intended to be used and extended by sub-classing.
    observation_spaceaction_spaceconfigc                    |                     dd          | _        t                              d                    t                      rdnd                     t          j        | |||           d| _        t          
                    ddt          j                  | _        t          
                    | j        d	         dt          j                  | _        |                                 }|dk    r9t                      }t                              d
t#          |           d           d| _        d| _        d | _        |                                 | _        | j        d         d         | _        |                     ||| j                   |                                 | _        |                                 | _        |                                  |                                 | _        | j                                        | _         t#          | j                   dk    | _!        | j        "                    d           tG          j$                    | _%        d| _&        d S )N	frameworktf2z'Creating TF-eager policy running on {}.GPUCPUFr   )	trainabledtypeexplorezFound z visible cuda devices.modelmax_seq_len)'getr/   loggerinfoformatr"   r   __init___is_trainingtfVariableint64global_timestepr-   boolr5   _get_num_gpus_for_policylen_loss_initialized_lossget_batch_divisibility_reqbatch_divisibility_req_max_seq_lenvalidate_spaces_init_dist_class
dist_class
make_modelr6   _init_view_requirements_create_explorationexplorationget_initial_state_state_inputs_is_recurrentassign	threadingRLock_lock_re_trace_counter)selfr+   r,   r-   kwargsnum_gpusgpu_idss          w/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/rllib/policy/eager_tf_policy_v2.pyr<   zEagerTFPolicyV2.__init__A   s     K77 	5<<(**5 	
 	
 	
 	/vFFF!!{{1RX{NN{{K	"e27 # 
 

 0022a<<%''GKKEWEEEFFF!!& 
&*&E&E&G&G# K0?.dkJJJ //11__&&
$$&&&3355!Z99;; !344q8 	##A&&& _&&
 "#    c                      t           r4t                                           st                                            d S d S d S N)tf1executing_eagerlyenable_eager_execution r^   r]   #enable_eager_execution_if_necessaryz3EagerTFPolicyV2.enable_eager_execution_if_necessary   sK      	)s,,.. 	)&&(((((	) 	) 	) 	)r^   	obs_spacec                     i S r`   rd   )rY   rf   r,   r-   s       r]   rJ   zEagerTFPolicyV2.validate_spaces   	     	r^   r6   ztf.keras.ModelrL   train_batchreturnc                     t           )aB  Compute loss for this policy using model, dist_class and a train_batch.

        Args:
            model: The Model to calculate the loss for.
            dist_class: The action distr. class.
            train_batch: The training data.

        Returns:
            A single loss tensor or a list of loss tensors.
        )NotImplementedError)rY   r6   rL   ri   s       r]   losszEagerTFPolicyV2.loss   s
    $ "!r^   c                     i S )zStats function. Returns a dict of statistics.

        Args:
            train_batch: The SampleBatch (already) used for training.

        Returns:
            The stats dict.
        rd   )rY   ri   s     r]   stats_fnzEagerTFPolicyV2.stats_fn   s	     	r^   gradsc                     i S )zGradient stats function. Returns a dict of statistics.

        Args:
            train_batch: The SampleBatch (already) used for training.

        Returns:
            The stats dict.
        rd   )rY   ri   rp   s      r]   grad_stats_fnzEagerTFPolicyV2.grad_stats_fn   s	     	r^   c                     t          j        | j        | j        d                   \  }}t          j        | j        | j        || j        d         | j                  S )zoBuild underlying model for this Policy.

        Returns:
            The Model for the Policy to use.
        r6   )r/   )r	   get_action_distr,   r-   get_model_v2r+   r/   )rY   _	logit_dims      r]   rM   zEagerTFPolicyV2.make_model   sc     $3t{73
 
9 ("K n
 
 
 	
r^   policy	optimizerrm   c                     dS )a  Gradients computing function (from loss tensor, using local optimizer).

        Args:
            policy: The Policy object that generated the loss tensor and
                that holds the given local optimizer.
            optimizer: The tf (local) optimizer object to
                calculate the gradients with.
            loss: The loss tensor for which gradients should be
                calculated.

        Returns:
            ModelGradients: List of the possibly clipped gradients- and variable
                tuples.
        Nrd   )rY   rx   ry   rm   s       r]   compute_gradients_fnz$EagerTFPolicyV2.compute_gradients_fn   s	    $ tr^   ztf.keras.optimizers.Optimizerztf.Operationc                     dS )aY  Gradients computing function (from loss tensor, using local optimizer).

        Args:
            optimizer: The tf (local) optimizer object to
                calculate the gradients with.
            grads: The gradient tensor to be applied.

        Returns:
            "tf.Operation": TF operation that applies supplied gradients.
        Nrd   )rY   ry   rp   s      r]   apply_gradients_fnz"EagerTFPolicyV2.apply_gradients_fn   s	      tr^   	obs_batchstate_batchesc                    dS )ae  Custom function for sampling new actions given policy.

        Args:
            model: Underlying model.
            obs_batch: Observation tensor batch.
            state_batches: Action sampling state batch.

        Returns:
            Sampled action
            Log-likelihood
            Action distribution inputs
            Updated state
        )NNNNrd   rY   r6   r~   r   rZ   s        r]   action_sampler_fnz!EagerTFPolicyV2.action_sampler_fn  s
    , &%r^   c                    dS )aC  Action distribution function for this Policy.

        Args:
            model: Underlying model.
            obs_batch: Observation tensor batch.
            state_batches: Action sampling state batch.

        Returns:
            Distribution input.
            ActionDistribution class.
            State outs.
        NNNrd   r   s        r]   action_distribution_fnz&EagerTFPolicyV2.action_distribution_fn  s
    *  r^   c                     dS )zrGet batch divisibility request.

        Returns:
            Size N. A sample batch must be of size K*N.
           rd   rY   s    r]   rG   z*EagerTFPolicyV2.get_batch_divisibility_req2  s	     qr^   c                     i S )zExtra values to fetch and return from compute_actions().

        Returns:
             Dict[str, TensorType]: An extra fetch-dict to be passed to and
                returned from the compute_actions() call.
        rd   r   s    r]   extra_action_out_fnz#EagerTFPolicyV2.extra_action_out_fn<  s	     	r^   c                     i S )zExtra stats to be reported after gradient computation.

        Returns:
             Dict[str, TensorType]: An extra fetch-dict.
        rd   r   s    r]   extra_learn_fetches_fnz&EagerTFPolicyV2.extra_learn_fetches_fnF  rh   r^   Nsample_batchother_agent_batchesc                 b    t                                           sJ t          j        | |          S )a  Post process trajectory in the format of a SampleBatch.

        Args:
            sample_batch: sample_batch: batch of experiences for the policy,
                which will contain at most one episode trajectory.
            other_agent_batches: In a multi-agent env, this contains a
                mapping of agent ids to (policy, agent_batch) tuples
                containing the policy and experiences of the other agents.
            episode: An optional multi-agent episode object to provide
                access to all of the internal episode state, which may
                be useful for model-based or multi-agent algorithms.

        Returns:
            The postprocessed sample batch.
        )r>   rb   r   postprocess_trajectory)rY   r   r   episodes       r]   r   z&EagerTFPolicyV2.postprocess_trajectoryO  s/    . ##%%%%%,T<@@@r^   c                 `    t           j        j                            | j        d                   S )zTF optimizer to use for policy optimization.

        Returns:
            A local optimizer or a list of local optimizers to use for this
                Policy's Model.
        lr)r>   keras
optimizersAdamr-   r   s    r]   ry   zEagerTFPolicyV2.optimizeri  s$     x"''D(9:::r^   c                     t          | j                  st          | j                  r%t          | j                  st	          d          d S t          j        | j        | j        d                   \  }}|S )NzT`make_model` is required if `action_sampler_fn` OR `action_distribution_fn` is givenr6   )	r   r   r   rM   
ValueErrorr	   rt   r,   r-   )rY   rL   rv   s      r]   rK   z EagerTFPolicyV2._init_dist_classu  s    /00 	M'5
 5
 	 !11  8   4(8!4;w#7 MJ r^   c                     |                                   | j                            | j        j                   t          j        | j        v rd| j        t          j                 _        d S d S )NF)/_update_model_view_requirements_from_init_stateview_requirementsupdater6   r   INFOSused_for_trainingr   s    r]   rN   z'EagerTFPolicyV2._init_view_requirements  sd    <<>>>%%dj&BCCC  666JOD";#45GGG 76r^   c                     t          |                                           }| j        r| j                            |          }|| _        |r|d         nd | _        |                     d           d| _        d S )Nr   T)auto_remove_unneeded_view_reqs)r   ry   rP   get_exploration_optimizer_optimizers
_optimizer!_initialize_loss_from_dummy_batchrE   )rY   r   s     r]   #maybe_initialize_optimizer_and_lossz3EagerTFPolicyV2.maybe_initialize_optimizer_and_loss  s     0 011
 	P)CCJOOJ 2< <F*O*Q--4..+/ 	/ 	
 	
 	
 "&r^   
input_dictr5   timestepc                 8   d| _         ||n| j        }||n| j        }t          |t          j                  r!t          |                                          }|                               	                    d           fd
                                D             }|| _        t          t          j        | j                            dk    | _        | j        r/| j                            |||                                            |                     || j        d         rd n|||          }| j                            t          j        |d                   d         j                                        d                    t1          |          S )NFc                 8    g | ]}d |dd         v |         S )state_inN   rd   ).0kr   s     r]   
<listcomp>zCEagerTFPolicyV2.compute_actions_from_input_dict.<locals>.<listcomp>  s6     
 
 

ae8K8KJqM8K8K8Kr^   r   )r   r5   tf_sesseager_tracing)r=   r5   rA   
isinstancer>   Tensorintnumpy_lazy_tensor_dictset_trainingkeys	_state_inrD   treeflattenrS   rP   before_compute_actionsget_session_compute_actions_helperr-   
assign_addshapeas_listr    )rY   r   r5   r   episodesrZ   r   rets    `      r]   compute_actions_from_input_dictz/EagerTFPolicyV2.compute_actions_from_input_dict  s    "$0''dl'3889Mh	** 	-8>>++,,H ++J77
&&&
 
 
 
#-??#4#4
 
 
 ' dn!=!=>>B  	33!7D<L<L<N<N 4    **K0>DDh
 
 	''SV(<(<Q(?(E(M(M(O(OPQ(RSSS$$$r^   c	                 (   t          t           j        |it                              d                    }
|t	          |          D ]}||
d<   |||
t           j        <   |||
t           j        <   |||
t           j        <    | j        d|
|||d|	S )NFr=   zstate_in_{i})r   r5   r   r   rd   )	r   CUR_OBSr>   constant	enumeratePREV_ACTIONSPREV_REWARDSr   r   )rY   r~   r   prev_action_batchprev_reward_batch
info_batchr   r5   r   rZ   r   ss               r]   compute_actionszEagerTFPolicyV2.compute_actions  s     !#Y U++	
 
 

 $}-- / /-.
>**(3DJ{/0(3DJ{/0!,6J{()3t3 
!	
 

 
 
 	
r^   Tactionsr   r   actions_normalizedin_trainingc                    t          | j                  r#t          | j                  st          d          t                              t          |          t          j                  }t          t          j	        t          
                    |          t          j        |id          }	|'t          
                    |          |	t          j        <   |'t          
                    |          |	t          j        <   | j        r| j                            d           t          | j                  rD|                     | | j        |	dd          \  }
| _        }|                     |
| j                  }n5|                     |	||          \  }
}|                     |
| j                  }|s"| j        d         rt'          || j                  }|                    |          }|S )NzfCannot compute log-prob/likelihood w/o an `action_distribution_fn` and a provided `action_sampler_fn`!r4   Fr   )r5   )r5   is_trainingnormalize_actions)r   r   r   r   r>   onesrD   int32r   r   convert_to_tensorACTIONSr   r   rP   r   r6   rL   r-   r!   action_space_structlogp)rY   r   r~   r   r   r   r   r   seq_lensinput_batchdist_inputsrv   action_distlog_likelihoodss                 r]   compute_log_likelihoodsz'EagerTFPolicyV2.compute_log_likelihoods  s    /00 	':
 :
 	 '   773y>>7::!#R%9%9)%D%D#W 
 
 
 (464H4H!5 5K01 (464H4H!5 5K01
  	C33E3BBB 455 	C.2.I.Idj+u% /J / /+K! //+tzBBKK "ZZ]HMMNK//+tzBBK " 	Jdk2E&F 	J&w0HIIG%**733r^   c                    i }| j                             | ||           t          || j        d| j        | j                   d| _        |                     |          }|                    d           | 	                    |          }| xj
        dz  c_
        |                    d|t          |j        t          | j
        t          | j
        dz
  |j
        pdz
  i           t!          |          S )N)rx   ri   resultF)r7   shufflerH   r   Tr   custom_metricsr   )	callbackson_learn_on_batchr   rI   rH   r   r=   r   r   _learn_on_batch_helpernum_grad_updatesr   r   countr   r   r    )rY   postprocessed_batchlearn_statsstatss       r]   learn_on_batchzEagerTFPolicyV2.learn_on_batch5  s    ((%8 	) 	
 	
 	
 	,)#'#>"4	
 	
 	
 	
 !"445HII((...++,?@@" +')<)B)4+@7)*;@qB
	
 	
 	
  &&&r^   r   c                     t          |d| j        | j        | j                   d| _        |                     |           |                    d           |                     |          \  }}}t          ||f          S )NF)r   r7   rH   r   T)	r   rI   rH   r   r=   r   r   _compute_gradients_helperr    )rY   r   grads_and_varsrp   r   s        r]   compute_gradientsz!EagerTFPolicyV2.compute_gradients\  s    
 	,)#'#>"4	
 	
 	
 	
 !2333((...'+'E'E(
 (
$u  ///r^   	gradientsc           	          |                      t          t          d |D             | j                                                                       d S )Nc                 J    g | ] }|t                               |          nd !S r`   )r>   r   )r   gs     r]   r   z3EagerTFPolicyV2.apply_gradients.<locals>.<listcomp>v  s?        56M--a000t  r^   )_apply_gradients_helperlistzipr6   trainable_variables)rY   r   s     r]   apply_gradientszEagerTFPolicyV2.apply_gradientsq  sm    $$ !*   J2244  
	
 
	
 
	
 
	
 
	
r^   Fc                 ^    |                                  }|rd |D             S d |D             S )Nc                 B    i | ]}|j         |                                S rd   )namer   r   vs     r]   
<dictcomp>z/EagerTFPolicyV2.get_weights.<locals>.<dictcomp>  s$    999!AFAGGII999r^   c                 6    g | ]}|                                 S rd   )r   r   s     r]   r   z/EagerTFPolicyV2.get_weights.<locals>.<listcomp>  s     ---a		---r^   )	variables)rY   as_dictr  s      r]   get_weightszEagerTFPolicyV2.get_weights  sA    NN$$	 	:99y9999--9----r^   c                    |                                  }t          |          t          |          k    s&J t          |          t          |          f            t          ||          D ]\  }}|                    |           d S r`   )r  rD   r   rT   )rY   weightsr  r  ws        r]   set_weightszEagerTFPolicyV2.set_weights  s    NN$$	7||s9~~---Gc)nn/M---	7++ 	 	DAqHHQKKKK	 	r^   c                 N    t          | j                                                  S r`   )r    rP   	get_stater   s    r]   get_exploration_statez%EagerTFPolicyV2.get_exploration_state  s     0 : : < <===r^   c                     | j         S r`   )rS   r   s    r]   is_recurrentzEagerTFPolicyV2.is_recurrent  s    !!r^   c                 *    t          | j                  S r`   )rD   rR   r   s    r]   num_state_tensorsz!EagerTFPolicyV2.num_state_tensors  s    4%&&&r^   c                 X    t          | d          r| j                                        S g S )Nr6   )hasattrr6   rQ   r   s    r]   rQ   z!EagerTFPolicyV2.get_initial_state  s-    4!! 	2://111	r^   c                 l   t                                                      }|d                                         |d<   g |d<   | j        rFt	          | j                                                  dk    r| j                                        |d<   | j        r| j                                        |d<   |S )NrA   _optimizer_variablesr   _exploration_state)superr  r   r   rD   r  rP   )rY   state	__class__s     r]   r  zEagerTFPolicyV2.get_state  s     !!###():#;#A#A#C#C (*$%? 	Hs4?#<#<#>#>??!CC,0O,E,E,G,GE()  	G +/*:*D*D*F*FE&'r^   r  c                 z   |                     dd           }|r| j                                        rt          |           j                            d          s)t          d          rt                              d           t          | j                                        |          D ]\  }}|
                    |           t          | d          r%d|v r!| j                            |d                    | j        
                    |d                    t                                          |           d S )	Nr  _traced+set_state_optimizer_vars_tf_eager_policy_v2zCannot restore an optimizer's state for tf eager! Keras is not able to save the v1.x optimizers (from tf.compat.v1.train) since they aren't compatible with checkpoints.rP   r  )r  rA   )r8   r   r  type__name__endswithr(   r9   warningr   rT   r  rP   	set_staterA   r  )rY   r  optimizer_varsopt_varvaluer  s        r]   r!  zEagerTFPolicyV2.set_state  sG    #94@@ 	&do7799 	&::&//	:: x=@ @  #   #&do&?&?&A&A>"R"R & &u%%%%4'' 	J,@E,I,I&&U3G-H&III 	##E*;$<=== 	%     r^   onnxc                 n   |rq	 dd l }n"# t          $ r}t          d          |d }~ww xY w|j                            | j        j        t          j        	                    |d                    \  }}d S t          | d          rt          | j        d          r}t          | j        j        t          j        j                  rT	 | j        j                            |d           d S # t           $ r# t"                              t&                     Y d S w xY wt"                              t&                     d S )	Nr   zmConverting a TensorFlow model to ONNX requires `tf2onnx` to be installed. Install with `pip install tf2onnx`.z
model.onnx)output_pathr6   
base_modelr>   )save_format)tf2onnxImportErrorRuntimeErrorconvert
from_kerasr6   r(  ospathjoinr  r   r>   r   Modelsave	Exceptionr9   r   r   )rY   
export_dirr%  r*  emodel_protoexternal_tensor_storages          r]   export_modelzEagerTFPolicyV2.export_model  sd    	F   "-  	 4;?3M3M
%GLL\BB 4N 4 40K000 D'""
	F
L11
	F 4:0"(.AA
	F
J
%**:4*HHHHH J J JHIIIIIIJ NNDEEEEEs"   	 
(#(!C& &)DDc                     t          | j        t          j        j                  r| j        j        S | j                                        S )z9Return the list of all savable variables for this policy.)r   r6   r>   r   r2  r  r   s    r]   r  zEagerTFPolicyV2.variables  s9    dj"(.11 	*:'':'')))r^   c                     | j         S r`   )rE   r   s    r]   loss_initializedz EagerTFPolicyV2.loss_initialized  s    %%r^   c           
      J   | xj         dz  c_         t          j        |v r|t          j                 }n_t          j        |t          j                           d         j        d         }|r&t                              |t          j	                  nd }i }	t          
                    t                    5  t          | j                  r8|                     | j        |t          j                 |||||          \  }
}}}nt          | j                  r;|                     | j        |t          j                 ||||d          \  }| _        }nt#          | j        t          j        j                  rR|r!d|vrt)          |          D ]\  }}||d| <   |                     |           |                     |          \  }}}	n|                     |||          \  }}|                     || j                  }| j                            |||	          \  }
}d d d            n# 1 swxY w Y   |6t                              |          |	t          j        <   ||	t          j        <   |||	t          j        <   |	                    |                                            |
||	fS )
Nr   r   r   )r~   r   r   r5   r   r   F)r~   r   r   r5   r   r   
state_in_0	state_in_)action_distributionr   r5   )rX   r   SEQ_LENSr   r   OBSr   r>   r   r   variable_creator_scoper   r   r   r6   r   rL   r   r   r2  r   r   rP   get_exploration_actionexpACTION_PROBACTION_LOGPACTION_DIST_INPUTSr   r   )rY   r   r   r   r5   r   _ray_trace_ctxr   
batch_sizeextra_fetchesr   r   r   	state_outir   r   s                    r]   r   z'EagerTFPolicyV2._compute_actions_helper  s    	!# :--!+"67HHj&ABB1EKANJ>KUrwwzw:::QUH &&'=>> .	 .	T344 ,8<8N8NJ(9"/%#%% 9O 9 95{II !!<== 
 33
",[_"=&3!) '!)$) 4  	#!	  
BHN;; 	$ <Z)G)G$-m$<$< < <DAq:;J177**:666<@JJz<R<R9KMM-1ZZ"M8. .*K #ook4:FF !% 0 G G(3%# !H ! !U.	 .	 .	 .	 .	 .	 .	 .	 .	 .	 .	 .	 .	 .	 .	b 57VVD\\M+1259M+12"<GM+89T5577888	=00s   2E'H%%H),H)c                     | xj         dz  c_         t                              t                    5  |                     |          \  }}}d d d            n# 1 swxY w Y   |                     |           |S )Nr   )rX   r>   rC  r   r   r   )rY   samplesrI  r   rv   r   s         r]   r   z&EagerTFPolicyV2._learn_on_batch_helperP  s    
 	!#&&'=>> 	O 	O'+'E'Eg'N'N$NAu	O 	O 	O 	O 	O 	O 	O 	O 	O 	O 	O 	O 	O 	O 	O$$^444s   AAAc                 @    t                               | j                  S r`   )r>   r   r=   r   s    r]   _get_is_training_placeholderz,EagerTFPolicyV2._get_is_training_placeholder\  s    ##D$5666r^   c                   
 | xj         dz  c_         t          | j        t          j        j                  r| j        j        n| j                                        t                              t          | j	                            5 
| 
                    | j        | j        |          }ddd           n# 1 swxY w Y   t          |          }t          | j	                  rbt          
          }| j        d         r(| 	                    |gt          |          z  |          }n-| 	                    ||d                   g}n
fd|D             }t!          d          r1|D ].}|D ])\  }}|"t"                              d|j                    */| j        d         rd	 |D             }n|d         }d
 |D             }|                     ||          }	|||	fS )z,Computes and returns grads as eager tensors.r   )
persistentN%_tf_policy_handles_more_than_one_lossr   c           
      r    g | ]3}t          t                              |                              4S rd   )r   r   gradient)r   rm   taper  s     r]   r   z=EagerTFPolicyV2._compute_gradients_helper.<locals>.<listcomp>  sF       IMStY77CCDD  r^   	grad_varszOptimizing variable c                 &    g | ]}d  |D             S )c                     g | ]\  }}|S rd   rd   r   r   rv   s      r]   r   zHEagerTFPolicyV2._compute_gradients_helper.<locals>.<listcomp>.<listcomp>  s    ,,,DAqa,,,r^   rd   )r   g_and_vs     r]   r   z=EagerTFPolicyV2._compute_gradients_helper.<locals>.<listcomp>  s'    KKK,,G,,,KKKr^   c                     g | ]\  }}|S rd   rd   r[  s      r]   r   z=EagerTFPolicyV2._compute_gradients_helper.<locals>.<listcomp>  s    22241aQ222r^   )rX   r   r6   r>   r   r2  r   GradientTaper   r{   rm   rL   r   r   r-   rD   r(   r9   r:   r   _stats)rY   rO  lossesry   r   r\  r   r  rp   r   rW  r  s             @@r]   r   z)EagerTFPolicyV2._compute_gradients_helper_  sz    	!# dj"(.11 	9
6II
6688I __$T%>??  
 
 	EYYtz4?GDDF	E 	E 	E 	E 	E 	E 	E 	E 	E 	E 	E 	E 	E 	E 	E F## 233 	
 *$//I{BC S!%!:!:K#f++-v" "
 #'";";Ivay"Q"Q!R    QW  N K   	E) E E# E EDAq}$C16$C$CDDDE ;>? 	3KKNKKKEE ,A.N22>222EGU++ue++s   
"B88B<?B<c                    | xj         dz  c_         t          | j                  rG| j        d         r|                     | j        |           d S |                     | j        |           d S | j        d         rAt          | j                  D ]*\  }}|                    d ||         D                        +d S | j                            d |D                        d S )Nr   rT  c                      g | ]\  }}|||fS r`   rd   r   r   r  s      r]   r   z;EagerTFPolicyV2._apply_gradients_helper.<locals>.<listcomp>  s!    OOODAq!Qr^   c                      g | ]\  }}|||fS r`   rd   rc  s      r]   r   z;EagerTFPolicyV2._apply_gradients_helper.<locals>.<listcomp>  s!    HHH1!-aV---r^   )rX   r   r}   r-   r   r   r   r   )rY   r   rM  os       r]   r   z'EagerTFPolicyV2._apply_gradients_helper  s   
 	!#011 	{BC I''(8.IIIII''HHHHH{BC %d&677  DAq%%OON1,=OOO    
 //HHHHH    r^   c                 p   i }t          | j                  r+t          |                     |                    |t          <   n
i |t          <   |                    t          |                                                      |                    t          |                     ||                               |S r`   )r   ro   dictr   r   r   rr   )rY   rO  rp   fetchess       r]   r_  zEagerTFPolicyV2._stats  s    '' 	,)-dmmG.D.D)E)EG%&&)+G%&tD7799::;;;tD..w>>??@@@r^   c                     t          |t                    st          |          }|                    t                     |S r`   )r   r   set_get_interceptorr   )rY   r   s     r]   r   z!EagerTFPolicyV2._lazy_tensor_dict  s?    -{;; 	C"-.A"B"B//???""r^   c                      t          |           S r`   )r   )clss    r]   with_tracingzEagerTFPolicyV2.with_tracing  s    #C(((r^   )NNr   )NNNNNNN)NNNTT)Fr`   )Mr  
__module____qualname____doc__gymspacesSpacer$   r<   staticmethodre   r   rJ   r   r   r   r
   r   r   r   r'   r   rm   r   strro   r&   rr   rM   r%   r{   r}   r   r   r  r   r   rG   r   r   r   r   r   ry   rK   rN   r   rB   r   r   r#   r   r   r   r   r  r
  r  r  r  rQ   r   r  r!  r9  r  r<  r   r   rQ  r   r   r_  r   classmethodrm  __classcell__)r  s   @r]   r*   r*   :   s	        
I#:+I# j&I# $	I# I# I# I#V ) ) \) $:# j& $	   $# $Xf"W../" -." !	"
 
z4
++	," " "  $#"$ $	K 	Dj4I 	 	 	 $#	 $&/=	c:o	   $# $
G 
 
 
 $#
$ $)7?I	   $#& $2  
	   $#" $&& 	&
 "& 
z:z4
3CC	D& & & $#&. $   	 
 "  
z4j!11	2      $# , $C    $# ;T#z/%:    ;: ;S*_(=    ;: Xf: 6:	A A!A &k2A A A ;: A0 $	;	.5T0UU	V	; 	; 	; $#	;   P P P& & &" Xf "&+% +%j)+% +% 3-	+% 
z4
+T#z/-BB	C+% +% +% +%^ Xf $
 $
 $
 $
L Xf
 59KOKO#' : :tJ'34: j):56:  Z 01	:
 $E$z*:J*F$GH: $E$z*:J*F$GH: !: : 
: : :  Y:x Xf#' #'  Y#'J Xf0#.0	~tCO44	50 0 0 0( Xf
 
D 
 
 
 
 Xf. . . . Xf   Xf> > > Xf" " " Xf' ' ' Xf  
 Xf:;      ;: $ Xf:!{ !t ! ! ! ! ! ;: !2 XfF FXc] Fd F F F F:* * *& & &  S1 S1 S1 YS1r
 
 
 
7 7 7 ;, ;, Y;,z  .	 	 	#[ # # # # ) ) [) ) ) ) )r^   r*   )Hrp  loggingr/  rU   typingr   r   r   r   r   r   	gymnasiumrq  r   ray.rllib.models.catalogr	   ray.rllib.models.modelv2r
   "ray.rllib.models.tf.tf_action_distr    ray.rllib.policy.eager_tf_policyr   r   r   r   ray.rllib.policy.policyr   r   ray.rllib.policy.rnn_sequencingr   ray.rllib.policy.sample_batchr   ray.rllib.utilsr   ray.rllib.utils.annotationsr   r   r   r   r   ray.rllib.utils.errorr   ray.rllib.utils.frameworkr   ray.rllib.utils.metricsr   r   r   $ray.rllib.utils.metrics.learner_infor   ray.rllib.utils.numpyr    "ray.rllib.utils.spaces.space_utilsr!   ray.rllib.utils.tf_utilsr"   ray.rllib.utils.threadingr#   ray.rllib.utils.typingr$   r%   r&   r'   ray.util.debugr(   ra   r>   tfv	getLoggerr  r9   r*   rd   r^   r]   <module>r     s   
  				     ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ;      1 1 1 1 1 1 , , , , , , C C C C C C            8 7 7 7 7 7 7 7 O O O O O O 5 5 5 5 5 5 & & & & & &              L K K K K K 3 3 3 3 3 3         
 C B B B B B 2 2 2 2 2 2 ? ? ? ? ? ? 4 4 4 4 4 4 / / / / / /            $ # # # # #}R		8	$	$ M) M) M) M) M)f M) M) M) M) M)r^   