
    &`i                     :   d Z ddlZddlZddlZddlZddlmZmZmZm	Z	m
Z
 ddlZddlmZmZ ddlmZ ddlmZ ddlmZmZ ddlmZ dd	lmZ dd
lmZmZ ddlmZmZ ddl m!Z! ddl"m#Z# ddl$m%Z%m&Z&m'Z' ddl(m)Z) ddl*m+Z+ ddl,m-Z- ddl.m/Z/ ddl0m1Z1 ddl2m3Z3m4Z4m5Z5m6Z6 ddl7m8Z8  e#            \  Z9Z:Z; ej<        e=          Z>d"dZ?d Z@d ZAd ZBd ZCd ZDe G d de                      ZEd ZF G d d           ZGeddddddddddddddddddeddfd!            ZHdS )#zrEager mode TF policy built using build_tf_policy().

It supports both traced and non-traced eager execution modes.    N)DictListOptionalTupleUnion)DEPRECATED_VALUEdeprecation_warning)ModelCatalog)RepeatedValues)PolicyPolicyState)#pad_batch_to_sequences_of_same_size)SampleBatch)
add_mixins
force_list)OldAPIStackoverride))ERR_MSG_TF_POLICY_CANNOT_SAVE_KERAS_MODEL)try_import_tf)'DIFF_NUM_GRAD_UPDATES_VS_SAMPLER_POLICYNUM_AGENT_STEPS_TRAINEDNUM_GRAD_UPDATES_LIFETIME)LEARNER_STATS_KEY)convert_to_numpy)normalize_action)get_gpu_devices)	with_lock)LocalOptimizerModelGradientsTensorStructType
TensorType)log_oncec                    t          | t                    r8d |                                 D             }t          j        t
          |          S t          | t                    r| S t          | t                    r8t          t          j        t
          | j                  | j	        | j
                  S | |t          j        fd|           S | S )Nc                 :    i | ]\  }}|t           j        k    ||S  )r   INFOS.0kvs      t/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/rllib/policy/eager_tf_policy.py
<dictcomp>z"_convert_to_tf.<locals>.<dictcomp>2   s+    FFF$!QqK4E/E/EA/E/E/E    c                     t          | t                    rt          |           n8| 5t                              |           st                              |           n| S N)
isinstancer   _convert_to_tftf	is_tensorconvert_to_tensor)fds    r+   <lambda>z _convert_to_tf.<locals>.<lambda>?   sY    !^,,nQ*** }R\\!__} %%a+++	 r-   )r0   r   itemstreemap_structurer1   r   r   valueslengthsmax_len)xdtypedict_r6   s      @r+   r1   r1   0   s    ![!! 	
FF!''))FFF!.%888	Av		 
	A~	&	& 
~qx88!)QY
 
 	
 	}!   
 
 
 	
 Hr-   c                     d }	 t           j                            ||           S # t          $ r0 t	          d                    t          |                               w xY w)Nc                 b    t          | t          j                  r|                                 S | S r/   )r0   r2   Tensornumpy)r>   s    r+   _mapz_convert_to_numpy.<locals>._mapK   s)    a## 	7799r-   z4Object of type {} has no method to convert to numpy.)r2   nestr:   AttributeError	TypeErrorformattype)r>   rE   s     r+   _convert_to_numpyrK   J   ss      

w$$T1--- 
 
 
CKKDQRGGTT
 
 	

s	   % :Ac                 F     t          j                    fd            }|S )Nc                      t                                           r2d | D             }d |                                D             } |i |S  | i |S )Nc                 ,    g | ]}t          |          S r%   )r1   )r(   r>   s     r+   
<listcomp>z8_convert_eager_inputs.<locals>._func.<locals>.<listcomp>\   s     :::.++:::r-   c                 d    i | ]-\  }}|d v	|t          ||dk    rt          j        nd          .S )>   episodes
info_batchtimestepNr?   )r1   r2   int64r'   s      r+   r,   z8_convert_eager_inputs.<locals>._func.<locals>.<dictcomp>^   sO       Aq666 >!qJ288DQQQ666r-   )r2   executing_eagerlyr8   )argskwargs
eager_argseager_kwargsfuncs       r+   _funcz$_convert_eager_inputs.<locals>._funcY   s|    !! 
	)::T:::J "LLNN  L
 44|4444((((r-   	functoolswrapsr[   r\   s   ` r+   _convert_eager_inputsra   X   s8    _T) ) ) ) ) Lr-   c                 F     t          j                    fd            }|S )Nc                       | i |}t                                           r%t           j                            t          |          }|S r/   )r2   rV   rF   r:   rK   )rW   rX   outr[   s      r+   r\   z%_convert_eager_outputs.<locals>._funck   sI    dD#F##!! 	@'''(93??C
r-   r]   r`   s   ` r+   _convert_eager_outputsre   j   s8    _T     Lr-   c                 `     | di |}t          d                    |j                            )NzDetected a variable being created during an eager forward pass. Variables should only be created during model initialization: {}r%   )
ValueErrorrI   name)next_creatorkwr*   s      r+   _disallow_var_creationrk   u   s;    rA
	##)6!&>>  r-   c                       fd}|S )z9Asserts that a given number of re-traces is not breached.c                     | j                             d          %| j        | j         d         k    rt          d           | g|R i |S )Neager_max_retraceszToo many tf-eager re-traces detected! This could lead to significant slow-downs (even slower than running in tf-eager mode w/ `eager_tracing=False`). To switch off these re-trace counting checks, set `eager_max_retraces` in your config to None.)configget_re_trace_counterRuntimeError)self_rW   rX   objs      r+   r\   z'_check_too_many_retraces.<locals>._func   si    L122>'%,7K*LLL+   s5*4***6***r-   r%   )rt   r\   s   ` r+   _check_too_many_retracesru   ~   s#    + + + + + Lr-   c                       e Zd ZdZdS )EagerTFPolicyzCDummy class to recognize any eagerized TFPolicy by its inheritance.N)__name__
__module____qualname____doc__r%   r-   r+   rw   rw      s        MMDr-   rw   c                 h      G  fdd            j         dz   _          j        dz   _        S )zWrapper class that enables tracing for all eager policy methods.

    This is enabled by the `--trace`/`eager_tracing=True` config when
    framework=tf2.
    c                       e Zd Z fdZe ee          	 	 	 ddeee	f         de
dee         dee	ee	         eee	f         f         f fd                        Ze e           fd                        Ze e          d	edef fd
                        Ze ee          deddf fd                        Zed             Z xZS )/_traced_eager_policy.<locals>.TracedEagerPolicyc                 v    d| _         d| _        d| _        d| _         t	          |           j        |i | d S )NF)_traced_learn_on_batch_helper_traced_compute_actions_helper _traced_compute_gradients_helper_traced_apply_gradients_helpersuper__init__)selfrW   rX   TracedEagerPolicy	__class__s      r+   r   z8_traced_eager_policy.<locals>.TracedEagerPolicy.__init__   sL    16D.27D/49D127D/3E#T**3TDVDDDDDr-   N
input_dictexplorerS   returnc                     | j         du rP| j        sIt          t                              t          |           j        dd                    | _        d| _          t          |           j        d||||d|S )z9Traced version of Policy.compute_actions_from_input_dict.FT	autographreduce_retracingr   r   rS   rQ   r%   )r   _no_tracingra   r2   functionr   _compute_actions_helpercompute_actions_from_input_dict)r   r   r   rS   rQ   rX   r   r   s         r+   r   zO_traced_eager_policy.<locals>.TracedEagerPolicy.compute_actions_from_input_dict   s     2e;;DDT;/DKK/66N"')-    0 0, 7;3 R5*D11Q %!!	 
   r-   c                     | j         du rP| j        sIt          t                              t          |           j        dd                    | _        d| _         t          |                               |          S )z(Traced version of Policy.learn_on_batch.FTr   )r   r   ra   r2   r   r   _learn_on_batch_helperlearn_on_batchr   samplesr   r   s     r+   r   z>_traced_eager_policy.<locals>.TracedEagerPolicy.learn_on_batch   s     1U::4CS:.CKK/66M"')-    / /+ 6:2 *D11@@IIIr-   r   c                     | j         du rP| j        sIt          t                              t          |           j        dd                    | _        d| _         t          |                               |          S )z+Traced version of Policy.compute_gradients.FTr   )r   r   ra   r2   r   r   _compute_gradients_helpercompute_gradientsr   s     r+   r   zA_traced_eager_policy.<locals>.TracedEagerPolicy.compute_gradients   s     4==dFV=1FKK/66P"')-    2 2. 9=5 *D11CCGLLLr-   gradsc                     | j         du rP| j        sIt          t                              t          |           j        dd                    | _        d| _         t          |                               |          S )z)Traced version of Policy.apply_gradients.FTr   )r   r   ra   r2   r   r   _apply_gradients_helperapply_gradients)r   r   r   r   s     r+   r   z?_traced_eager_policy.<locals>.TracedEagerPolicy.apply_gradients   s     2e;;DDT;/DKK/66N"')-    0 0, 7;3 *D11AA%HHHr-   c                     | S r/   r%   clss    r+   with_tracingz<_traced_eager_policy.<locals>.TracedEagerPolicy.with_tracing  s	     Jr-   NNN)rx   ry   rz   r   ru   r   r   r   strr!   boolr   intr   r   r   r   r   r   r   r   classmethodr   __classcell__)r   r   eager_policy_clss   @r+   r   r~      s       	E 	E 	E 	E 	E 	E 
"	&		 !&*	 	S*_-	 	 sm		 :tJ/c:o1FFG	 	 	 	 	 	 
	 
"	!	> 
"	"	#	#	J 	J 	J 	J 	J 
$	# 
"	!	J$ 
"	"	#	#	M[ 	M^ 	M 	M 	M 	M 	M 	M 
$	# 
"	!	M$ 
"	&			I 	ID 	I 	I 	I 	I 	I 	I 
	 
"	!	I$ 
	 	 
	 	 	 	 	r-   r   _traced)rx   rz   )r   r   s   `@r+   _traced_eager_policyr      st    h h h h h h h h, h h hT "2!:Y!F%5%BY%N"r-   c                       e Zd Zd Zd ZdS )_OptimizerWrapperc                     || _         d S r/   )tape)r   r   s     r+   r   z_OptimizerWrapper.__init__  s    			r-   c                 n    t          t          | j                            ||          |                    S r/   )listzipr   gradient)r   lossvar_lists      r+   r   z#_OptimizerWrapper.compute_gradients  s,    C	**4::HEEFFFr-   N)rx   ry   rz   r   r   r%   r-   r+   r   r     s7          G G G G Gr-   r   c                 B  	
 t          t          |          }|t          k    rt          dd           |t          ddd           |t          dd	d            G 
	fd
d|          }| dz   |_        | dz   |_        |S )a  Build an eager TF policy.

    An eager policy runs all operations in eager mode, which makes debugging
    much simpler, but has lower performance.

    You shouldn't need to call this directly. Rather, prefer to build a TF
    graph policy and use set `.framework("tf2", eager_tracing=False) in your
    AlgorithmConfig to have it automatically be converted to an eager policy.

    This has the same signature as build_tf_policy().obs_include_prev_action_rewardT)olderrorNextra_action_fetches_fnextra_action_out_fn)r   newr   gradients_fncompute_gradients_fnc                   R    e Zd Z
fdZ ee          	 	 	 d-deeef         de	de
e         deeee         eeef         f         fd            Z ee          	 	 	 	 	 	 	 d.deee         ef         d	e
ee                  d
eee         ef         deee         ef         de
eeef                  de
e         de
e	         de
e         deeee         eeef         f         fd            Ze ee          	 	 	 	 d/fd	                        Z ee          	 d0fd	            Ze ee          d                         Z ee          dedeeeeef         f         fd            Z ee          deddfd            Z ee          d1d            Z ee          d             Z ee          d             Z ee          d             Z ee          d             Z ee          d             Z  ee          de!f fd            Z" ee          de!ddf fd             Z# ee          d2d!e
e         ddfd"            Z$d# Z%d$ Z&efd%            Z'd2d&Z(d' Z)efd(            Z*fd)Z+	fd*Z,defd+Z-e.d,             Z/ xZ0S )30_build_eager_tf_policy.<locals>.eager_policy_clsc                    t                                           st                                            |                    dd          | _        t
                              | |||           t                              ddt          j	                  | _
        t                              | j        d         dt          j                  | _        |                                 }|dk    r9t                      }t                               dt%          |           d           d| _        d| _        d| _        | _        n.| j        j        j        d	k    r| j        j        | _        nd | _        t5                    r |           npd
| _        |d         d         | _        r | |||           r | |||           || _        d | _        	srst=          d          n(t?          j         || j        d                   \  | _        }r | |||          | _!        n)t?          j"        ||||d         | j                  | _!        tG          j$                    | _%        | &                                 | j'        (                    | j!        j'                   | )                                | _*        | j!        +                                | _,        t%          | j,                  dk    | _-        r | |||           r | |          }n*t          j.        j/        0                    |d                   }tc          |          }| j*        r| j*        2                    |          }|| _3        |r|d         nd | _4        | 5                    d           d| _        
r 
| |||           | j
        6                    d           d S )N	frameworktf2r   F)	trainabler?   r   zFound z visible cuda devices.zPolicy.loss   modelmax_seq_lenzT`make_model` is required if `action_sampler_fn` OR `action_distribution_fn` is given)r   lrT)auto_remove_unneeded_view_reqsstats_fn)7tf1rV   enable_eager_executionrp   r   rw   r   r2   VariablerU   global_timestepro   r   r   _get_num_gpus_for_policyr   loggerinfolen_is_trainingrq   _loss_initialized_lossr   __func__rz   callablebatch_divisibility_req_max_seq_len
dist_classrg   r
   get_action_distr   get_model_v2	threadingRLock_lock/_update_model_view_requirements_from_init_stateview_requirementsupdate_create_explorationexplorationget_initial_state_state_inputs_is_recurrentkeras
optimizersAdamr   get_exploration_optimizer_optimizers
_optimizer!_initialize_loss_from_dummy_batchassign)r   observation_spaceaction_spacero   num_gpusgpu_ids	logit_dimr   action_distribution_fnaction_sampler_fn
after_initbefore_initbefore_loss_initget_batch_divisibility_reqloss_fn
make_modeloptimizer_fnr   validate_spacess           r+   r   z9_build_eager_tf_policy.<locals>.eager_policy_cls.__init__K  s!    ((** -**,,,#ZZU;;DN""4):L&QQQ $&;;qE;#R#RD ;;I&%rw '  DL
 4466H!||)++IS\\IIIJJJ %D &'D"%*D" "$

 #0MAA!Y/

 "
 6777**400005A '
 !'w >D O&7vNNN KD"3\6JJJ DK"DO  	$: 	! $<   .:-I $+g"6. .*  	'Z.?vVV

)6% 7O"n  
 #**DJ @@BBB"))$**FGGG#7799D!%!=!=!?!?D!$T%7!8!81!<D P  '8,OOO D)\$77

X055fTlCC
#J//J T!-GG
SS
 6@D @J.SjmmtDO22/3! 3    &*D" J
4!2L&III  ''*****r-   Nr   r   rS   r   c                 z   | j                             d          s2t                                          st                                           d| _        ||n| j        }||n| j        }t          |t          j
                  r!t          |                                          }|                                                   d           fd                                D             }|| _        |g k    | _        | j                            |||                                            |                     || j         d         rd n|||          }| j                            t/          j        |d                   d         j                                        d                    t7          |          S )Neager_tracingFc                 8    g | ]}d |dd         v |         S )state_inN   r%   )r(   r)   r   s     r+   rO   zd_build_eager_tf_policy.<locals>.eager_policy_cls.compute_actions_from_input_dict.<locals>.<listcomp>  s6       "#J!BQB%<O<O
1<O<O<Or-   )rS   r   tf_sessr   )ro   rp   r   rV   r   r   r   r   r0   r2   rC   r   rD   _lazy_tensor_dictset_trainingkeys	_state_inr   r   before_compute_actionsget_sessionr   
assign_addr9   flattenshapeas_listr   )r   r   r   rS   rQ   rX   state_batchesrets    `      r+   r   zP_build_eager_tf_policy.<locals>.eager_policy_cls.compute_actions_from_input_dict  s    ;???33 -C<Q<Q<S<S -**,,, %D!(!4gg$,G#+#7xxT=QH(BI.. 1x~~//00 //
;;J##E***   '1'8'8  M +DN!."!4D 33!7D<L<L<N<N 4    ..O4B( C  ++DLQ,@,@,C,I,Q,Q,S,STU,VWWW#C(((r-   	obs_batchr  prev_action_batchprev_reward_batchrR   rQ   c	                 4   t          t           j        |it                              d                    }
|t	          |          D ]\  }}||
d| <   |||
t           j        <   |||
t           j        <   |||
t           j        <    | j        d|
|||d|	S )NFr   	state_in_r   r%   )	r   CUR_OBSr2   constant	enumeratePREV_ACTIONSPREV_REWARDSr&   r   )r   r  r  r  r  rR   rQ   r   rS   rX   r   iss                r+   compute_actionsz@_build_eager_tf_policy.<locals>.eager_policy_cls.compute_actions  s     %'  [[//	  J (%m44 4 4DAq23J1// ,7H
;34 ,7H
;34%0:
;,-747 %!!	 
   r-   Tc                    rt          d          t                              t          |          t          j                  }t          t
          j        t                              |          id          }	|'t                              |          |	t
          j        <   |'t                              |          |	t
          j	        <   | j
        r| j
                            d           r | | j        |	dd          \  }
}}n!|                     |	||          \  }
}| j        } ||
| j                  }|s"| j        d         rt          || j                  }|                    |          }|S )NzfCannot compute log-prob/likelihood w/o an `action_distribution_fn` and a provided `action_sampler_fn`!rT   Fr  )r   )r   is_trainingnormalize_actions)rg   r2   onesr   int32r   r  r4   r  r  r   r  r   r   ro   r   action_space_structlogp)r   actionsr  r  r  r  actions_normalizedrX   seq_lensinput_batchdist_inputsr   _action_distlog_likelihoodsr   r   s                  r+   compute_log_likelihoodszH_build_eager_tf_policy.<locals>.eager_policy_cls.compute_log_likelihoods   s    ! %;%C +   wws9~~RXw>>H%$b&:&:9&E&EF"  K !,8:8L8L%9 9K45 !,8:8L8L%9 9K45  G 777FFF & --C-C$*k5e. . .*Z
 "&K!Q!QQ!_
$*[$*==K & N$+6I*J N*7D4LMM)..w77O""r-   c                     t                                           sJ t                              | |          }r | |||          S |S r/   )r2   rV   rw   postprocess_trajectory)r   sample_batchother_agent_batchesepisodepostprocess_fns       r+   r2  zG_build_eager_tf_policy.<locals>.eager_policy_cls.postprocess_trajectoryY  sX     '')))))(??lSSL X%~dL:MwWWWr-   c                    i }| j                             | ||           t          || j        d| j        | j                   d| _        |                     |          }|                    d           | 	                    |          }| xj
        dz  c_
        |                    d|t          |j        t          | j
        t          | j
        dz
  |j
        pdz
  i           t!          |          S )N)policytrain_batchresultF)r   shuffler   r   Tr   custom_metricsr   )	callbackson_learn_on_batchr   r   r   r   r   r  r  r   num_grad_updatesr   r   countr   r   r   )r   postprocessed_batchlearn_statsstatss       r+   r   z?_build_eager_tf_policy.<locals>.eager_policy_cls.learn_on_batchd  s!    KN,,)<[ -    0# -'+'B"&"8    !%D"&"8"89L"M"M,,T222//0CDDE!!Q&!!LL$k+-@-F-t/D <-.?D1F   $E***r-   rA  c                     t          |d| j        | j        | j                   d| _        |                     |           |                    d           |                     |          \  }}}t          ||f          S )NF)r;  r   r   r   T)	r   r   r   r   r   r  r  r   r   )r   rA  grads_and_varsr   rC  s        r+   r   zB_build_eager_tf_policy.<locals>.eager_policy_cls.compute_gradients  s     0# -'+'B"&"8    !%D""#6777,,T222+/+I+I#, ,(NE5 $UEN333r-   	gradientsc           	          |                      t          t          d |D             | j                                                                       d S )Nc                 J    g | ] }|t                               |          nd !S r/   )r2   r4   )r(   gs     r+   rO   zT_build_eager_tf_policy.<locals>.eager_policy_cls.apply_gradients.<locals>.<listcomp>  s?        ! 9:R11!4444  r-   )r   r   r   r   trainable_variables)r   rF  s     r+   r   z@_build_eager_tf_policy.<locals>.eager_policy_cls.apply_gradients  sm    (( %.   
6688  
 
 
 
 
r-   Fc                 ^    |                                  }|rd |D             S d |D             S )Nc                 B    i | ]}|j         |                                S r%   )rh   rD   r(   r*   s     r+   r,   zP_build_eager_tf_policy.<locals>.eager_policy_cls.get_weights.<locals>.<dictcomp>  s$    ===a		===r-   c                 6    g | ]}|                                 S r%   )rD   rM  s     r+   rO   zP_build_eager_tf_policy.<locals>.eager_policy_cls.get_weights.<locals>.<listcomp>  s     111!AGGII111r-   )	variables)r   as_dictrO  s      r+   get_weightsz<_build_eager_tf_policy.<locals>.eager_policy_cls.get_weights  sA    ((I >==9====11y1111r-   c                    |                                  }t          |          t          |          k    s&J t          |          t          |          f            t          ||          D ]\  }}|                    |           d S r/   )rO  r   r   r   )r   weightsrO  r*   ws        r+   set_weightsz<_build_eager_tf_policy.<locals>.eager_policy_cls.set_weights  s    ((Iw<<3y>>111CLL#i..3Q111Iw//  1 r-   c                 N    t          | j                                                  S r/   )r   r   	get_stater   s    r+   get_exploration_statezF_build_eager_tf_policy.<locals>.eager_policy_cls.get_exploration_state  s    #D$4$>$>$@$@AAAr-   c                     | j         S r/   )r   rX  s    r+   is_recurrentz=_build_eager_tf_policy.<locals>.eager_policy_cls.is_recurrent  s    %%r-   c                 *    t          | j                  S r/   )r   r   rX  s    r+   num_state_tensorszB_build_eager_tf_policy.<locals>.eager_policy_cls.num_state_tensors  s    t)***r-   c                 X    t          | d          r| j                                        S g S )Nr   )hasattrr   r   rX  s    r+   r   zB_build_eager_tf_policy.<locals>.eager_policy_cls.get_initial_state  s-    tW%% 6z33555Ir-   c                 b   t                                                      }|d                                         |d<   | j        rFt	          | j                                                  dk    r| j                                        |d<   | j        r| j                                        |d<   |S )Nr   r   _optimizer_variables_exploration_state)r   rW  rD   r   r   rO  r   )r   stater   s     r+   rW  z:_build_eager_tf_policy.<locals>.eager_policy_cls.get_state  s     GG%%''E',->'?'E'E'G'GE#$ L3t'@'@'B'B#C#Ca#G#G040I0I0K0K,- K /3.>.H.H.J.J*+Lr-   rc  c                 z   |                     dd           }|r| j                                        rt          |           j                            d          s)t          d          rt                              d           t          | j                                        |          D ]\  }}|
                    |           t          | d          r%d|v r!| j                            |d                    | j        
                    |d                    t                                          |           d S )	Nra  r   +set_state_optimizer_vars_tf_eager_policy_v2zCannot restore an optimizer's state for tf eager! Keras is not able to save the v1.x optimizers (from tf.compat.v1.train) since they aren't compatible with checkpoints.r   rb  )rc  r   )rp   r   rO  rJ   rx   endswithr"   r   warningr   r   r_  r   	set_stater   r   )r   rc  optimizer_varsopt_varvaluer   s        r+   rh  z:_build_eager_tf_policy.<locals>.eager_policy_cls.set_state  sG    #YY'=tDDN *$/";";"="= *Dzz*33I>> 8AD D  NN'   '*$/*C*C*E*E~&V&V * *NGUNN5))))t]++ N0D0M0M **7K1L*MMM  ''.?(@AAA GGe$$$$$r-   onnxc                 p   t          | d          rt          | j        d          rt          | j        j        t          j        j                  r|rq	 ddl}n"# t          $ r}t          d          |d}~ww xY w|j
                            | j        j        t          j                            |d                    \  }}dS 	 | j        j                            |d	           dS # t           $ r# t"                              t&                     Y dS w xY wt"                              t&                     dS )
a  Exports the Policy's Model to local directory for serving.

            Note: Since the TfModelV2 class that EagerTfPolicy uses is-NOT-a
            tf.keras.Model, we need to assume that there is a `base_model` property
            within this TfModelV2 class that is-a tf.keras.Model. This base model
            will be used here for the export.
            TODO (kourosh): This restriction will be resolved once we move Policy and
            ModelV2 to the new Learner/RLModule APIs.

            Args:
                export_dir: Local writable directory.
                onnx: If given, will export model in ONNX format. The
                    value of this parameter set the ONNX OpSet version to use.
            r   
base_modelr   NzmConverting a TensorFlow model to ONNX requires `tf2onnx` to be installed. Install with `pip install tf2onnx`.z
model.onnx)output_pathr2   )save_format)r_  r   r0   rn  r2   r   Modeltf2onnxImportErrorrr   convert
from_kerasospathjoinsave	Exceptionr   rg  r   )r   
export_dirrl  rr  emodel_protoexternal_tensor_storages          r+   export_modelz=_build_eager_tf_policy.<locals>.eager_policy_cls.export_model  sg   " g&&JDJ55J tz4bhnEEJ  R!&& ! ! !*5   !	!! <C?;U;U
-$&GLL\$J$J <V < <8K!8!8!8R
-22:42PPPPP$ R R R'PQQQQQQR HIIIIIs*   A 
A7"A22A7!C' ')DDc                     t          | j        t          j        j                  r| j        j        S | j                                        S )z9Return the list of all savable variables for this policy.)r0   r   r2   r   rq  rO  rX  s    r+   rO  z:_build_eager_tf_policy.<locals>.eager_policy_cls.variables$  s9    $*bhn55 .z++z++---r-   c                     | j         S r/   )r   rX  s    r+   loss_initializedzA_build_eager_tf_policy.<locals>.eager_policy_cls.loss_initialized+  s    ))r-   c                    | xj         dz  c_         t          j        |t          j                           d         j        d         }|r&t                              |t          j                  nd }i }t          	                    t                    5  rM | | j        |t          j                 |||          }	t          |	          dk    r	|	\  }
}}}ntd }g }|	\  }
}nir	  | | j        |||||d          \  }| _        }n# t          $ rZ}d|j        d         v sd	|j        d         v r0 | | j        |t          j                 ||d
          \  }| _        }n|Y d }~nd }~ww xY wt#          | j        t          j        j                  rct          ||          }|r!d|vrt)          |          D ]\  }}||d| <   |                     |           |                     |          \  }}}n|                     |||          \  }}|                     || j                  }| j                            |||          \  }
}d d d            n# 1 swxY w Y   |6t                              |          |t          j        <   ||t          j        <   |||t          j        <   r|                     |                      |
||fS )Nr   r   rT   )r   rS   rQ      F)r   r  r*  r   rS   r"  zpositional argumentzunexpected keyword argument)r   rS   r"  )r*  
state_in_0r  )action_distributionrS   r   )rq   r9   r  r   OBSr  r2   r$  r%  variable_creator_scoperk   r   r  r   r   rH   rW   r0   r   rq  r  r  r   get_exploration_actionexpACTION_PROBACTION_LOGPACTION_DIST_INPUTSr   )r   r   r  rQ   r   rS   
batch_sizer*  extra_fetchesaction_sampler_outputsr(  r'  r,  	state_outr|  r  r  r.  r   r   r   s                     r+   r   zH_build_eager_tf_policy.<locals>.eager_policy_cls._compute_actions_helper.  s    ""a'"" j&ABB1EKANJ>KUrwwzw:::QUH M **+ABB K K$ J->->
";#67 '!)!). . .* 122a77@V={II&*$&	(>- 1#(
 !7 6 $ $
+5.;)1(/)1,1	! 	! 	!	 + $ )	  ) ( ( ( 5 B B#@AF1I#M#M %;$:$($(J$.{$?,3-505%" %" %"	!"$/$(O$-I '( %.IIII(& $DJ?? 
%0h%O%O%O
( @\-K-K(1-(@(@ @ @1>?
?q?? ; ;..z:::@D

:@V@V=Y15&x2 2.Y #'//+tz"J"JK %)$4$K$K,7!) ' %L % %MGTOK K K K K K K K K K K K K K K\ 9;k569=k56&@Kk<=" @$$%8%8%>%>???I}44s?   AI%!DI
E,AE'"I'E,,CIIIc                     | xj         dz  c_         t                              t                    5  |                     |          \  }}}d d d            n# 1 swxY w Y   |                     |           |S )Nr   )rq   r2   r  rk   r   r   )r   r   _ray_trace_ctxrE  r-  rC  s         r+   r   zG_build_eager_tf_policy.<locals>.eager_policy_cls._learn_on_batch_helper  s    
 ""a'""**+ABB S S+/+I+I'+R+R(5S S S S S S S S S S S S S S S((888Ls   AAAc                 @    t                               | j                  S r/   )r2   r4   r   rX  s    r+   _get_is_training_placeholderzM_build_eager_tf_policy.<locals>.eager_policy_cls._get_is_training_placeholder  s    ''(9:::r-   c                 ^  
 | xj         dz  c_         t          | j        t          j        j                  r| j        j        n| j                                        t                              du          5 
|                     | | j        | j	        |          }ddd           n# 1 swxY w Y   t          |          }rPt          
          }| j        d         r | |gt          |          z  |          }n$ | ||d                   g}n
fd|D             }t          d          r1|D ].}|D ])\  }}|"t                              d|j                    */| j        d         rd	 |D             }n|d         }d
 |D             }|                     | ||          }	|||	fS )z,Computes and returns grads as eager tensors.r   N)
persistent%_tf_policy_handles_more_than_one_lossr   c           
      r    g | ]3}t          t                              |                              4S r%   )r   r   r   )r(   r   r   rO  s     r+   rO   z^_build_eager_tf_policy.<locals>.eager_policy_cls._compute_gradients_helper.<locals>.<listcomp>  sI     " " " T]]4;;YGGHH" " "r-   	grad_varszOptimizing variable c                 &    g | ]}d  |D             S )c                     g | ]\  }}|S r%   r%   r(   rI  r-  s      r+   rO   zi_build_eager_tf_policy.<locals>.eager_policy_cls._compute_gradients_helper.<locals>.<listcomp>.<listcomp>  s    0001!000r-   r%   )r(   g_and_vs     r+   rO   z^_build_eager_tf_policy.<locals>.eager_policy_cls._compute_gradients_helper.<locals>.<listcomp>  s'    OOOW00000OOOr-   c                     g | ]\  }}|S r%   r%   r  s      r+   rO   z^_build_eager_tf_policy.<locals>.eager_policy_cls._compute_gradients_helper.<locals>.<listcomp>  s    666tq!666r-   )rq   r0   r   r2   r   rq  rJ  GradientTaper   r   r   r   ro   r   r"   r   r   rh   _stats)r   r   losses	optimizerrE  r  rI  r*   r   rC  r   rO  r   s             @@r+   r   zJ_build_eager_tf_policy.<locals>.eager_policy_cls._compute_gradients_helper  sd    ""a'"" $*bhn55 = J:		 J::<<	 ,@,LMM PQUD$*dowOOP P P P P P P P P P P P P P P''F $ 
 .d33	;FG X%9%9ykCKK7& &NN
 ';&:4FSTI&V&V%WNN" " " " " &" " "
 $$ I- I IG ' I I1="KK(Gqv(G(GHHHI {BC 7OOOOO "0!266~666KKgu55E!5%//s   ;#B**B.1B.c                 z   | xj         dz  c_         r5| j        d         r | | j        |           d S  | | j        |           d S | j        d         rAt	          | j                  D ]*\  }}|                    d ||         D                        +d S | j                            d |D                        d S )Nr   r  c                      g | ]\  }}|||fS r/   r%   r(   rI  r*   s      r+   rO   z\_build_eager_tf_policy.<locals>.eager_policy_cls._apply_gradients_helper.<locals>.<listcomp>  s!    SSS1Q]aV]]]r-   c                      g | ]\  }}|||fS r/   r%   r  s      r+   rO   z\_build_eager_tf_policy.<locals>.eager_policy_cls._apply_gradients_helper.<locals>.<listcomp>  s!    LLLDAqam!Qmmmr-   )rq   ro   r   r   r  r   )r   rE  r  oapply_gradients_fns       r+   r   zH_build_eager_tf_policy.<locals>.eager_policy_cls._apply_gradients_helper  s   
 ""a'""! ;FG N&&tT-=~NNNNN&&tT_nMMMMM;FG  )$*: ; ;  1))SSq0ASSS    
 O33LLNLLL    r-   c           	          i }r"t           ||                    |t          <   n
i |t          <   r+|                    t           |                                r-|                    t           | ||                               |S r/   )dictr   r   )r   outputsr   r   fetchesextra_learn_fetches_fngrad_stats_fnr   s        r+   r  z7_build_eager_tf_policy.<locals>.eager_policy_cls._stats  s    G 0-1((7G2L2L-M-M)**-/)*% Ct$:$:4$@$@AABBB JtMM$$G$GHHIIINr-   c                     t          |t                    st          |          }|                    t                     |S r/   )r0   r   set_get_interceptorr1   )r   rA  s     r+   r  zB_build_eager_tf_policy.<locals>.eager_policy_cls._lazy_tensor_dict  s?    1;?? G&12E&F&F#33NCCC&&r-   c                      t          |           S r/   )r   r   s    r+   r   z=_build_eager_tf_policy.<locals>.eager_policy_cls.with_tracing  s    ',,,r-   r   )NNNNNNN)NNNT)NN)Fr/   )1rx   ry   rz   r   r   r   r   r   r!   r   r   r   r   r   r   r   r    r   r   r   r0  r2  r   r   r   r   r   rQ  rU  rY  r[  r]  r   r   rW  rh  r  rO  r  r   r   r  r   r   r  r  r   r   r   )r   r   r   r   r  r   r   r   r   r  r   r  r   r   r   r6  r   r   s   @r+   r   r   J  s       }	+ }	+ }	+ }	+ }	+ }	+ }	+ }	+ }	+ }	+ }	+ }	+ }	+ }	+ }	+~ 
&		 !&*,	) ,	)S*_-,	) ,	) sm	,	) :tJ/c:o1FFG,	) ,	) ,	) 
	,	)\ 
&		 9=QUQU48'+&*&*$	 $	T"235EEF$	 $D$45$	  %T*:%;=M%MN	$	
  %T*:%;=M%MN$	 !c4i1$	 tn$	 d^$	 sm$	 :tJ/c:o1FFG$	 $	 $	 
	$	L 
	&		
 ""#5	# 5	# 5	# 5	# 5	# 5	# 
	 
5	#n 
&		BF	  	  	  	  	  
		  
	&		#	+ #	+ 
	 
#	+J 
&			4'2	4>4Z#889	4 	4 	4 
		4& 
&			^ 	 	 	 	 
		 
&			2 	2 	2 
		2 
&			 	 
		 
&			B 	B 
		B 
&			& 	& 
		& 
&			+ 	+ 
		+ 
&			 	 
		
 
&			{ 	 	 	 	 	 
		 
&			%; 	%4 	% 	% 	% 	% 	% 
		%2 
&		+	J +	J# +	J$ +	J +	J +	J 
	+	JZ	. 	. 	.	* 	* 	* 
i	5 i	5 i	5 i	5 i	5 i	5 
i	5^
	 
	 
	 
		; 	; 	; 
:	0 :	0 :	0 :	0 
:	0x	 	 	 	 	.	 	 	 	 	 	 		' 	' 	' 	' 	' 
	- 	- 
	- 	- 	- 	- 	-r-   r   _eager)r   rw   r   r	   rx   rz   )rh   r   get_default_configr6  r   r   r   r  r  r  r   r   r   r   r   r   r   r   mixinsr   r   r   r   baser   s    ` ``````````````` `     r+   _build_eager_tf_policyr    sJ   L mV,,D%)999 @MMMM*)/DD	
 	
 	
 	
 4JRVWWWWM- M- M- M- M- M- M- M- M- M- M- M- M- M- M- M- M- M- M- M- M- M- M-4 M- M- M-^ !%x$(8O!r-   r/   )Ir{   r^   loggingrv  r   typingr   r   r   r   r   r9   ray._common.deprecationr   r	   ray.rllib.models.catalogr
    ray.rllib.models.repeated_valuesr   ray.rllib.policy.policyr   r   ray.rllib.policy.rnn_sequencingr   ray.rllib.policy.sample_batchr   ray.rllib.utilsr   r   ray.rllib.utils.annotationsr   r   ray.rllib.utils.errorr   ray.rllib.utils.frameworkr   ray.rllib.utils.metricsr   r   r   $ray.rllib.utils.metrics.learner_infor   ray.rllib.utils.numpyr   "ray.rllib.utils.spaces.space_utilsr   ray.rllib.utils.tf_utilsr   ray.rllib.utils.threadingr   ray.rllib.utils.typingr   r   r    r!   ray.util.debugr"   r   r2   tfv	getLoggerrx   r   r1   rK   ra   re   rk   ru   rw   r   r   r  r%   r-   r+   <module>r     s  A A      				     5 5 5 5 5 5 5 5 5 5 5 5 5 5         2 1 1 1 1 1 ; ; ; ; ; ; 7 7 7 7 7 7 7 7 O O O O O O 5 5 5 5 5 5 2 2 2 2 2 2 2 2 = = = = = = = = K K K K K K 3 3 3 3 3 3         
 C B B B B B 2 2 2 2 2 2 ? ? ? ? ? ? 4 4 4 4 4 4 / / / / / /            $ # # # # #}R		8	$	$   4
 
 
  $      ( 	 	 	 	 	F 	 	 	s s slG G G G G G G G  ##3 1C C C C C Cr-   