
    &`i1-                     z   d dl Z d dlmZmZmZ d dlZd dlZd dl	Z	d dl
mZmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZ d dlmZ d dlmZ d dl m!Z!m"Z" d dl#m$Z$m%Z% d dl&m'Z'  e            \  Z(Z)Z* e j+        e,          Z-e G d de                      Z.e G d de.                      Z/dS )    N)DictListTuple)DiscreteMultiDiscrete)deprecation_warning)ModelV2)	TFModelV2)add_time_dimension)SampleBatch)ViewRequirement)OldAPIStackoverride)try_import_tf)get_base_struct_from_space)flatten_inputs_to_1d_tensorone_hot)ModelConfigDict
TensorType)log_oncec                       e Zd ZdZ ee          deeef         de	e         dede
ee	e         f         fd            Zdede	e         dede
ee	e         f         fdZde	e         fd	Zd
S )RecurrentNetworka  Helper class to simplify implementing RNN models with TFModelV2.

    Instead of implementing forward(), you can implement forward_rnn() which
    takes batches with the time dimension added already.

    Here is an example implementation for a subclass
    ``MyRNNClass(RecurrentNetwork)``::

        def __init__(self, *args, **kwargs):
            super(MyModelClass, self).__init__(*args, **kwargs)
            cell_size = 256

            # Define input layers
            input_layer = tf.keras.layers.Input(
                shape=(None, obs_space.shape[0]))
            state_in_h = tf.keras.layers.Input(shape=(256, ))
            state_in_c = tf.keras.layers.Input(shape=(256, ))
            seq_in = tf.keras.layers.Input(shape=(), dtype=tf.int32)

            # Send to LSTM cell
            lstm_out, state_h, state_c = tf.keras.layers.LSTM(
                cell_size, return_sequences=True, return_state=True,
                name="lstm")(
                    inputs=input_layer,
                    mask=tf.sequence_mask(seq_in),
                    initial_state=[state_in_h, state_in_c])
            output_layer = tf.keras.layers.Dense(...)(lstm_out)

            # Create the RNN model
            self.rnn_model = tf.keras.Model(
                inputs=[input_layer, seq_in, state_in_h, state_in_c],
                outputs=[output_layer, state_h, state_c])
            self.rnn_model.summary()
    
input_dictstateseq_lensreturnc                     t          d          rt          d           |J |d         }t          ||d          }|                     |||          \  }}t                              |d| j        g          |fS )	zAdds time dimension to batch before sending inputs to forward_rnn().

        You should implement forward_rnn() in your subclass.recurrent_network_tfz2ray.rllib.models.tf.recurrent_net.RecurrentNetwork)oldNobs_flattf)padded_inputsr   	framework)r   r   r   forward_rnnr!   reshapenum_outputs)selfr   r   r   flat_inputsinputsoutput	new_states           u/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/rllib/models/tf/recurrent_net.pyforwardzRecurrentNetwork.forward?   s     *++ 	H    ### ,#%D
 
 
 !,,
 
	
 zz&2t'7"8999DD    r*   c                      t          d          )a  Call the model with the given input tensors and state.

        Args:
            inputs: observation tensor with shape [B, T, obs_size].
            state: list of state tensors, each with shape [B, T, size].
            seq_lens: 1d tensor holding input sequence lengths.

        Returns:
            (outputs, new_state): The model output tensor of shape
                [B, T, num_outputs] and the list of new state tensors each with
                shape [B, size].

        Sample implementation for the ``MyRNNClass`` example::

            def forward_rnn(self, inputs, state, seq_lens):
                model_out, h, c = self.rnn_model([inputs, seq_lens] + state)
                return model_out, [h, c]
        'You must implement this for a RNN modelNotImplementedError)r(   r*   r   r   s       r-   r%   zRecurrentNetwork.forward_rnn]   s    * ""KLLLr/   c                      t          d          )a  Get the initial recurrent state values for the model.

        Returns:
            list of np.array objects, if any

        Sample implementation for the ``MyRNNClass`` example::

            def get_initial_state(self):
                return [
                    np.zeros(self.cell_size, np.float32),
                    np.zeros(self.cell_size, np.float32),
                ]
        r1   r2   r(   s    r-   get_initial_statez"RecurrentNetwork.get_initial_statet   s     ""KLLLr/   N)__name__
__module____qualname____doc__r   r	   r   strr   r   r   r.   r%   r6    r/   r-   r   r      s       ! !F XgEj)E JE 	E
 
z4
++	,E E E E:M M)-j)9MEOM	z4
++	,M M M M.M4
#3 M M M M M Mr/   r   c                       e Zd ZdZdej        j        dej        j        dedede	f
 fdZ
 ee          dee	ef         d	ee         d
edeeee         f         f fd            Z ee          ded	ee         d
edeeee         f         fd            Z ee          deej                 fd            Z ee          defd            Z xZS )LSTMWrapperzGAn LSTM wrapper serving as an interface for ModelV2s that set use_lstm.	obs_spaceaction_spacer'   model_confignamec                 d   t          t          |                               ||d ||           | j        0t	          t          j        | j        j                            | _        |d         | _	        |d         | _
        |d         | _        t          | j                  | _        d| _        t!          j        | j                  D ]}t%          |t&                    r| xj        |j        z  c_        -t%          |t*                    r(| xj        t          j        |j                  z  c_        j|j        5| xj        t	          t          j        |j                            z  c_        | xj        t	          t1          |                    z  c_        | j
        r| xj        | j        z  c_        | j        r| xj        dz  c_        t2          j        j                            d | j        fd          }|| _        t2          j        j                            | j	        fd          }t2          j        j                            | j	        fd	          }	t2          j        j                            d
dt2          j                  }
t3          j        j                            | j	        ddd          |t2                              |
          ||	g          \  }}}t3          j        j                             | j        t2          j        j!        j"        d          |          }t3          j        j                             dd d          |          }t2          j        #                    ||
||	g||||g          | _$        tJ          &                    tN          j(                  r| j$        )                                 |d         r3tU          tV          j,        | j        d          | j-        tV          j.        <   |d         r/tU          tV          j/        d          | j-        tV          j0        <   d S d S )Nlstm_cell_sizelstm_use_prev_actionlstm_use_prev_rewardr      r*   )shaperB   hcr<   seq_in)rH   rB   dtypeTlstm)return_sequencesreturn_staterB   )r*   maskinitial_statelogits)
activationrB   values)r*   outputsr$   )spaceshift)rW   )1superr>   __init__r'   intnpprodr?   rH   	cell_sizeuse_prev_actionuse_prev_rewardr   r@   action_space_struct
action_dimtreeflatten
isinstancer   nr   sumnveclenr!   keraslayersInputint32LSTMsequence_maskDenseactivationslinearModel
_rnn_modelloggerisEnabledForloggingINFOsummaryr   r   ACTIONSview_requirementsPREV_ACTIONSREWARDSPREV_REWARDS)r(   r?   r@   r'   rA   rB   rV   input_layer
state_in_h
state_in_crK   lstm_outstate_hstate_crR   rT   	__class__s                   r-   rY   zLSTMWrapper.__init__   s    	k4  ))|T<	
 	
 	
 #"274>+?#@#@AAD%&67+,BC+,BC#=d>O#P#P \$":;; 	3 	3E%** 357*E=11 326%*#5#55(3rwu{';';#<#<<3s5zz??2  	0/ 	"! ho++)* , 
 
 'X_**$.1B*MM
X_**$.1B*MM
&&Rhbh&OO &(X_%9%9NT6 &: &
 &
 !!&))%z2&
 &
 &
"'7 &&)=)D8 ' 
 

  &&qT&II(SS (..Z@VWg6 ) 
 

 w|,, 	&O##%%% ./ 	?N#4+<B@ @ @D";#;< ./ 	?N#2@ @ @D";#;<<<	 	r/   r   r   r   r   c                    |J |                      |g d           \  }}g }| j        d         r|t          j                 }| j        d         r+|                    t          || j        d                     nt          | j        t          t          f          rt          || j                  }|                    t                              t                              |t          j                  d| j        g                     | j        d         rc|                    t                              t                              |t          j                 t          j                  ddg                     |r t                              |g|z   d          }||d	<   t'                                          |||          S )
NrE   _disable_action_flatteningF)spaces_struct	time_axisr$   rF   rG   )axisr    )_wrapped_forwardrA   r   r{   appendr   r`   rd   r@   r   r   r   r!   r&   castfloat32ra   r}   concatrX   r.   )	r(   r   r   r   wrapped_out_prev_a_rprev_ar   s	           r-   r.   zLSTMWrapper.forward   s    ###..z2tDDQ  34 	 89F  !=> /&*&>"'      d/(M1JKK @$VT->??FJJrwwvrz::R<QRR   34 	OO

GGJ{'?@"*MMPRTUw     	F))[MH$<1)EEK "-
:wwz5(;;;r/   r*   c                 V    |                      ||g|z             \  }| _        }}|||gfS N)rs   
_value_out)r(   r*   r   r   	model_outrI   rJ   s          r-   r%   zLSTMWrapper.forward_rnn  s:     ,0??FH;MPU;U+V+V(	4?Aq1a&  r/   c                     t          j        | j        t           j                  t          j        | j        t           j                  gS r   )r[   zerosr]   r   r5   s    r-   r6   zLSTMWrapper.get_initial_state  s4     HT^RZ00HT^RZ00
 	
r/   c                 D    t                               | j        dg          S )Nr$   )r!   r&   r   r5   s    r-   value_functionzLSTMWrapper.value_function#  s    zz$/B4000r/   )r7   r8   r9   r:   gymspacesSpacerZ   r   r;   rY   r   r   r   r   r   r   r.   r%   r	   r[   ndarrayr6   r   __classcell__)r   s   @r-   r>   r>      s       QQV:#V j&V 	V
 &V V V V V V Vp X1<j)1< J1< 	1<
 
z4
++	,1< 1< 1< 1< 1<  1<f X! !)-j)9!EO!	z4
++	,! ! !  ! Xg
4
#3 
 
 
 
 Xg1
 1 1 1 1 1 1 1 1r/   r>   )0rv   typingr   r   r   	gymnasiumr   numpyr[   rb   gymnasium.spacesr   r   ray._common.deprecationr   ray.rllib.models.modelv2r	   ray.rllib.models.tf.tf_modelv2r
   ray.rllib.policy.rnn_sequencingr   ray.rllib.policy.sample_batchr   !ray.rllib.policy.view_requirementr   ray.rllib.utils.annotationsr   r   ray.rllib.utils.frameworkr   "ray.rllib.utils.spaces.space_utilsr   ray.rllib.utils.tf_utilsr   r   ray.rllib.utils.typingr   r   ray.util.debugr   tf1r!   tfv	getLoggerr7   rt   r   r>   r<   r/   r-   <module>r      s    $ $ $ $ $ $ $ $ $ $          4 4 4 4 4 4 4 4 7 7 7 7 7 7 , , , , , , 4 4 4 4 4 4 > > > > > > 5 5 5 5 5 5 = = = = = = = = = = = = = = 3 3 3 3 3 3 I I I I I I I I I I I I I I > > > > > > > > # # # # # #}R		8	$	$ gM gM gM gM gMy gM gM gMT _1 _1 _1 _1 _1" _1 _1 _1 _1 _1r/   