
    &`i/                     l   d dl mZmZmZmZ d dlZd dlZd dl	Z	d dl
mZmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZmZ d dlmZ d dl m!Z! d dl"m#Z#m$Z$ d dl%m&Z&m'Z' d dl(m)Z)  e            \  Z*Z+e G d de                      Z,e G d de,e+j-                              Z.dS )    )DictListTupleUnionN)DiscreteMultiDiscrete)deprecation_warning)ModelV2)SlimFC)TorchModelV2)add_time_dimension)SampleBatch)ViewRequirement)OldAPIStackoverride)try_import_torch)get_base_struct_from_space)flatten_inputs_to_1d_tensorone_hot)ModelConfigDict
TensorType)log_oncec                       e Zd ZdZ ee          deeef         de	e         dede
ee	e         f         fd            Zdede	e         dede
ee	e         f         fdZd	S )
RecurrentNetworka  Helper class to simplify implementing RNN models with TorchModelV2.

    Instead of implementing forward(), you can implement forward_rnn() which
    takes batches with the time dimension added already.

    Here is an example implementation for a subclass
    ``MyRNNClass(RecurrentNetwork, nn.Module)``::

        def __init__(self, obs_space, num_outputs):
            nn.Module.__init__(self)
            super().__init__(obs_space, action_space, num_outputs,
                             model_config, name)
            self.obs_size = _get_size(obs_space)
            self.rnn_hidden_dim = model_config["lstm_cell_size"]
            self.fc1 = nn.Linear(self.obs_size, self.rnn_hidden_dim)
            self.rnn = nn.GRUCell(self.rnn_hidden_dim, self.rnn_hidden_dim)
            self.fc2 = nn.Linear(self.rnn_hidden_dim, num_outputs)

            self.value_branch = nn.Linear(self.rnn_hidden_dim, 1)
            self._cur_value = None

        @override(ModelV2)
        def get_initial_state(self):
            # Place hidden states on same device as model.
            h = [self.fc1.weight.new(
                1, self.rnn_hidden_dim).zero_().squeeze(0)]
            return h

        @override(ModelV2)
        def value_function(self):
            assert self._cur_value is not None, "must call forward() first"
            return self._cur_value

        @override(RecurrentNetwork)
        def forward_rnn(self, input_dict, state, seq_lens):
            x = nn.functional.relu(self.fc1(input_dict["obs_flat"].float()))
            h_in = state[0].reshape(-1, self.rnn_hidden_dim)
            h = self.rnn(x, h_in)
            q = self.fc2(h)
            self._cur_value = self.value_branch(h).squeeze(1)
            return q, [h]
    
input_dictstateseq_lensreturnc                 d   t          d          rt          d           |d                                         }| j                            dd          | _        t          ||d| j                  }|                     |||          \  }}t          	                    |d	| j
        g          }||fS )
zAdds time dimension to batch before sending inputs to forward_rnn().

        You should implement forward_rnn() in your subclass.recurrent_network_tfz5ray.rllib.models.torch.recurrent_net.RecurrentNetwork)oldobs_flat_time_majorFtorch)r   	framework
time_major)r   r	   floatmodel_configgetr&   r   forward_rnnr$   reshapenum_outputs)selfr   r   r   flat_inputsinputsoutput	new_states           x/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/rllib/models/torch/recurrent_net.pyforwardzRecurrentNetwork.forwardF   s     *++ 	K    !,2244 +//uEE#	
 
 
 !,,VUHEE	vD,<'=>>y      r0   c                      t          d          )a  Call the model with the given input tensors and state.

        Args:
            inputs: Observation tensor with shape [B, T, obs_size].
            state: List of state tensors, each with shape [B, size].
            seq_lens: 1D tensor holding input sequence lengths.
                Note: len(seq_lens) == B.

        Returns:
            (outputs, new_state): The model output tensor of shape
                [B, T, num_outputs] and the list of new state tensors each with
                shape [B, size].

        Examples:
            def forward_rnn(self, inputs, state, seq_lens):
                model_out, h, c = self.rnn_model([inputs, seq_lens] + state)
                return model_out, [h, c]
        z(You must implement this for an RNN model)NotImplementedError)r.   r0   r   r   s       r3   r+   zRecurrentNetwork.forward_rnng   s    * ""LMMMr5   N)__name__
__module____qualname____doc__r   r
   r   strr   r   r   r4   r+    r5   r3   r   r      s        ) )V Xg!j)! J! 	!
 
z4
++	,! ! ! !@N N)-j)9NEON	z4
++	,N N N N N Nr5   r   c                       e Zd ZdZdej        j        dej        j        dedede	f
 fdZ
 ee          dee	ef         d	ee         d
edeeee         f         f fd            Z ee          ded	ee         d
edeeee         f         fd            Z ee          deeej                 ee         f         fd            Z ee          defd            Z xZS )LSTMWrapperzGAn LSTM wrapper serving as an interface for ModelV2s that set use_lstm.	obs_spaceaction_spacer-   r)   namec                 .   t           j                            |            t          t          |                               ||d ||           | j        0t          t          j        | j	        j
                            | _        |d         | _        |                    dd          | _        |d         | _        |d         | _        t!          | j                  | _        d| _        t)          j        | j                  D ]}t-          |t.                    r| xj        |j        z  c_        -t-          |t2                    r(| xj        t          j        |j                  z  c_        j|j
        5| xj        t          t          j        |j
                            z  c_        | xj        t          t9          |                    z  c_        | j        r| xj        | j        z  c_        | j        r| xj        dz  c_        t                               | j        | j        | j                   | _        || _        t?          | j        | j        d t@          j         j!        j"        	          | _#        t?          | j        dd t@          j         j!        j"        	          | _$        |d         r3tK          tL          j'        | j        d
          | j(        tL          j)        <   |d         r/tK          tL          j*        d
          | j(        tL          j+        <   d S d S )Nlstm_cell_sizer#   Flstm_use_prev_actionlstm_use_prev_rewardr      )batch_first)in_sizeout_sizeactivation_fninitializerr'   )spaceshift)rN   ),nnModule__init__superr?   r-   intnpprodr@   shape	cell_sizer*   r&   use_prev_actionuse_prev_rewardr   rA   action_space_struct
action_dimtreeflatten
isinstancer   nr   sumnveclenLSTMlstmr   r$   initxavier_uniform__logits_branch_value_branchr   r   ACTIONSview_requirementsPREV_ACTIONSREWARDSPREV_REWARDS)r.   r@   rA   r-   r)   rB   rM   	__class__s          r3   rQ   zLSTMWrapper.__init__   s    		4   k4  ))|T<	
 	
 	
 #"274>+?#@#@AAD%&67&**=%@@+,BC+,BC#=d>O#P#P \$":;; 	3 	3E%** 357*E=11 326%*#5#55(3rwu{';';#<#<<3s5zz??2  	0/ 	"! GGdndo:M  
 
	 ' %N%5	
 
 
 $N5	
 
 
 ./ 	?N#4+<B@ @ @D";#;< ./ 	?N#2@ @ @D";#;<<<	 	r5   r   r   r   r   c                 h   |J |                      |g d           \  }}g }| j        d         r|t          j                 }| j        d         r+|                    t          || j        d                     nt          | j        t          t          f          r(t          |                                | j                  }n|                                }|                    t                              |d| j        g                     | j        d         rR|                    t                              |t          j                                                 ddg                     |r t                              |g|z   d          }||d	<   t%                                          |||          S )
NrE   _disable_action_flatteningF)spaces_struct	time_axisr'   rF   rG   )dimr"   )_wrapped_forwardr)   r   rk   appendr   rZ   r^   rA   r   r   r   r(   r$   r,   r[   rm   catrR   r4   )	r.   r   r   r   wrapped_out_prev_a_rprev_arn   s	           r3   r4   zLSTMWrapper.forward   s    ###..z2tDDQ  34 	N 89F  !=> N/d.FRW      d/(M1JKK ,$V\\^^T5FGGFF#\\^^Ffr4?6K L LMMM34 	OOj)ABHHJJRQRGTT  
  	E))[MH$<!)DDK "-
:wwz5(;;;r5   r0   c                 h   |                      |t                              |d         d          t                              |d         d          g          \  | _        \  }}|                     | j                  }|t                              |d          t                              |d          gfS )Nr   rG   )rd   r$   	unsqueeze	_featuresrg   squeeze)r.   r0   r   r   hc	model_outs          r3   r+   zLSTMWrapper.forward_rnn  s     "&U__U1Xq115??58Q3O3OP"
 "
A ''77	5==A..a0C0CDDDr5   c                 p   t          | j        j                                                  }|j                            d| j                                                                      d          |j                            d| j                                                                      d          g}|S )NrG   r   )	nextrg   _modelchildrenweightnewrW   zero_r~   )r.   linearr   s      r3   get_initial_statezLSTMWrapper.get_initial_state  s     d)099;;<<Ma006688@@CCMa006688@@CC
 r5   c                     | j         
J d            t                              |                     | j                   dg          S )Nzmust call forward() firstr'   )r}   r$   r,   rh   )r.   s    r3   value_functionzLSTMWrapper.value_function%  s?    ~))+F)))}}T//??"FFFr5   )r8   r9   r:   r;   gymspacesSpacerS   r   r<   rQ   r   r   r   r   r   r   r4   r+   r
   r   rT   ndarrayr   r   __classcell__)rn   s   @r3   r?   r?      s       QQM:#M j&M 	M
 &M M M M M M M` X-<j)-< J-< 	-<
 
z4
++	,-< -< -< -< -<  -<^ XE E)-j)9EEOE	z4
++	,E E E  E. Xg5bj)94
;K)K#L     XgG
 G G G G G G G Gr5   r?   )/typingr   r   r   r   	gymnasiumr   numpyrT   r\   gymnasium.spacesr   r   ray._common.deprecationr	   ray.rllib.models.modelv2r
   ray.rllib.models.torch.miscr   $ray.rllib.models.torch.torch_modelv2r   ray.rllib.policy.rnn_sequencingr   ray.rllib.policy.sample_batchr   !ray.rllib.policy.view_requirementr   ray.rllib.utils.annotationsr   r   ray.rllib.utils.frameworkr   "ray.rllib.utils.spaces.space_utilsr   ray.rllib.utils.torch_utilsr   r   ray.rllib.utils.typingr   r   ray.util.debugr   r$   rO   r   rP   r?   r=   r5   r3   <module>r      s   + + + + + + + + + + + +          4 4 4 4 4 4 4 4 7 7 7 7 7 7 , , , , , , . . . . . . = = = = = = > > > > > > 5 5 5 5 5 5 = = = = = = = = = = = = = = 6 6 6 6 6 6 I I I I I I L L L L L L L L > > > > > > > > # # # # # #	r bN bN bN bN bN| bN bN bNJ hG hG hG hG hG"BI hG hG hG hG hGr5   