
    &`iF              
          d dl Z d dlmZ d dlmZmZmZmZ d dlZ	d dl
Zd dlmZ d dlmZ d dlmZmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZmZm Z  d dl!m"Z" d dl#m$Z$m%Z%m&Z&  e            \  Z'Z(Z) e             \  Z*Z+e G d d                      Z,edede-defd            Z.ee(fdededede&fd            Z/i Z0ee(fdededede&fd            Z1dS )    N)OrderedDict)AnyDictListUnion)Space)
Deprecated)RepeatedValuesPreprocessorget_preprocessor)RepeatedValues)SampleBatch)ViewRequirementNullContextManager)OldAPIStack)
TensorTypetry_import_tftry_import_torch)Repeated)ModelConfigDictModelInputDictTensorStructTypec                      e Zd ZdZdedededededefdZd	e	e
         fd
Zdeee
f         de	e
         de
d	e
e	e
         ffdZd	e
fdZde
deee
f         d	ee	e
         e
f         fdZd	eee
f         fdZ	 	 d deeef         de	e         de
d	e
e	e
         ffdZd	e
fdZd	ej        fdZ	 d!ded	ee	e
         eee
f         f         fdZ	 d!ded	ee	e
         eee
f         f         fdZd	efdZ ed          d             ZdS )"ModelV2a  Defines an abstract neural network model for use with RLlib.

    Custom models should extend either TFModelV2 or TorchModelV2 instead of
    this class directly.

    Data flow:
        obs -> forward() -> model_out
            \-> value_function() -> V(s)
    	obs_spaceaction_spacenum_outputsmodel_configname	frameworkc                     || _         || _        || _        || _        |pd| _        || _        d| _        | j                            d          | _        t          j
        t          d| j                   i| _        dS )a  Initializes a ModelV2 instance.

        This method should create any variables used by the model.

        Args:
            obs_space: Observation space of the target gym
                env. This may have an `original_space` attribute that
                specifies how to unflatten the tensor into a ragged tensor.
            action_space: Action space of the target gym
                env.
            num_outputs: Number of output units of the model.
            model_config: Config for the model, documented
                in ModelCatalog.
            name: Name (scope) for the model.
            framework: Either "tf" or "torch".
        default_modelN_time_majorr   )shiftspace)r   r   r   r   r   r    _last_outputget
time_majorr   OBSr   view_requirements)selfr   r   r   r   r   r    s          l/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/rllib/models/modelv2.py__init__zModelV2.__init__$   s~    4 !*#/ +-90	' +//>> O_1DNKKK"
    returnc                     g S )a~  Get the initial recurrent state values for the model.

        Returns:
            List of np.array (for tf) or Tensor (for torch) objects containing the
            initial hidden state of an RNN, if applicable.

        .. testcode::
            :skipif: True

            import numpy as np
            from ray.rllib.models.modelv2 import ModelV2
            class MyModel(ModelV2):
                # ...
                def get_initial_state(self):
                    return [
                        np.zeros(self.cell_size, np.float32),
                        np.zeros(self.cell_size, np.float32),
                    ]
         r+   s    r,   get_initial_statezModelV2.get_initial_stateK   s	    ( 	r.   
input_dictstateseq_lensc                     t           )aV  Call the model with the given input tensors and state.

        Any complex observations (dicts, tuples, etc.) will be unpacked by
        __call__ before being passed to forward(). To access the flattened
        observation tensor, refer to input_dict["obs_flat"].

        This method can be called any number of times. In eager execution,
        each call to forward() will eagerly evaluate the model. In symbolic
        execution, each call to forward creates a computation graph that
        operates over the variables of this model (i.e., shares weights).

        Custom models should override this instead of __call__.

        Args:
            input_dict: dictionary of input tensors, including "obs",
                "obs_flat", "prev_action", "prev_reward", "is_training",
                "eps_id", "agent_id", "infos", and "t".
            state: list of state tensors with sizes matching those
                returned by get_initial_state + the batch dimension
            seq_lens: 1d tensor holding input sequence lengths

        Returns:
            A tuple consisting of the model output tensor of size
            [BATCH, num_outputs] and the list of new RNN state(s) if any.

        .. testcode::
            :skipif: True

            import numpy as np
            from ray.rllib.models.modelv2 import ModelV2
            class MyModel(ModelV2):
                # ...
                def forward(self, input_dict, state, seq_lens):
                    model_out, self._value_out = self.base_model(
                        input_dict["obs"])
                    return model_out, state
        NotImplementedError)r+   r4   r5   r6   s       r,   forwardzModelV2.forwarda   s    V "!r.   c                     t           )ae  Returns the value function output for the most recent forward pass.

        Note that a `forward` call has to be performed first, before this
        methods can return anything and thus that calling this method does not
        cause an extra forward pass through the network.

        Returns:
            Value estimate tensor of shape [BATCH].
        r8   r2   s    r,   value_functionzModelV2.value_function   s
     "!r.   policy_lossloss_inputsc                     |S )a  Override to customize the loss function used to optimize this model.

        This can be used to incorporate self-supervised losses (by defining
        a loss over existing input and output tensors of this model), and
        supervised losses (by defining losses over a variable-sharing copy of
        this model's layers).

        You can find an runnable example in examples/custom_loss.py.

        Args:
            policy_loss: List of or single policy loss(es) from the policy.
            loss_inputs: map of input placeholders for rollout data.

        Returns:
            List of or scalar tensor for the customized loss(es) for this
            model.
        r1   )r+   r=   r>   s      r,   custom_losszModelV2.custom_loss   s
    ( r.   c                     i S )a  Override to return custom metrics from your model.

        The stats will be reported as part of the learner stats, i.e.,
        info.learner.[policy_id, e.g. "default_policy"].model.key1=metric1

        Returns:
            The custom metrics for this model.
        r1   r2   s    r,   metricszModelV2.metrics   s	     	r.   Nc                 v   t          |t                    r|                    d          }n|                                }|seg }d}d                    |          |v rJ|                    |d                    |                              |dz  }d                    |          |v J||                    t          j                  }| j                            d          r|d         |d	<   nt          |d         | j	        | j
                  |d<   	 t          |d         j                  d
k    rt          |d         | j
                  |d	<   n|d         |d	<   n# t          $ r |d         |d	<   Y nw xY w|                                 5  |                     ||pg |          }ddd           n# 1 swxY w Y   t          |t                    r,|j        d	hz
  |_        |j        |_        |j        d	hz
  |_        t          |t(                    st          |t*                    rt          |          d
k    r"t-          d                    |                    |\  }}t          |t(                    s"t-          d                    |                    || _        |t          |          dk    r|n|pg fS )aB  Call the model with the given input tensors and state.

        This is the method used by RLlib to execute the forward pass. It calls
        forward() internally after unpacking nested observation tensors.

        Custom models should override forward() instead of __call__.

        Args:
            input_dict: Dictionary of input tensors.
            state: list of state tensors with sizes matching those
                returned by get_initial_state + the batch dimension
            seq_lens: 1D tensor holding input sequence lengths.

        Returns:
            A tuple consisting of the model output tensor of size
                [BATCH, output_spec.size] or a list of tensors corresponding to
                output_spec.shape_list, and a list of state tensors of
                [BATCH, state_size_i] if any.
        T)shallowr   zstate_in_{}   N_disable_preprocessor_apiobsobs_flat   z@forward() must return a tuple of (output, state) tensors, got {}zState output is not a list: {})
isinstancer   copyformatappendr'   SEQ_LENSr   restore_original_dimensionsr   r    lenshapeflattenAttributeErrorcontextr:   accessed_keysdeleted_keys
added_keyslisttuple
ValueErrorr&   )	r+   r4   r5   r6   restorediresoutputs	state_outs	            r,   __call__zModelV2.__call__   s,   @ j+.. 	)!t44HH!((H  	EA&&q))Z77Z(<(<Q(?(?@AAAQ  &&q))Z77 !~~k&:;;H   !<== 	9#-e#4HZ  
 :5!4>4> HUO9z%(.//!33+2:e3Ddn+U+UHZ((+5e+<HZ(! 9 9 9'1%'8$$$9 \\^^ 	@ 	@,,x"h??C	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ j+.. 	G'/'='LJ$&.&;J#$,$7:,$FJ!3%% 	je.D.D 	SUV   !)T** 	Q=DDYOOPPP#S^^a%7%7		ekrJJs%   AE E54E5F22F69F6c                     | j         S )z8Returns the last output returned from calling the model.)r&   r2   s    r,   last_outputzModelV2.last_output  s      r.   c                     t                      S )z6Returns a contextmanager for the current forward pass.r   r2   s    r,   rT   zModelV2.context  s    !###r.   Fas_dictc                     t           )a<  Returns the list (or a dict) of variables for this model.

        Args:
            as_dict: Whether variables should be returned as dict-values
                (using descriptive str keys).

        Returns:
            The list (or dict if `as_dict` is True) of all variables of this
            ModelV2.
        r8   r+   rd   s     r,   	variableszModelV2.variables  
     "!r.   c                     t           )a[  Returns the list of trainable variables for this model.

        Args:
            as_dict: Whether variables should be returned as dict-values
                (using descriptive keys).

        Returns:
            The list (or dict if `as_dict` is True) of all trainable
            (tf)/requires_grad (torch) variables of this ModelV2.
        r8   rf   s     r,   trainable_variableszModelV2.trainable_variables,  rh   r.   c                     | j         du S )zIf True, data for calling this ModelV2 must be in time-major format.

        Returns
            Whether this ModelV2 requires a time-major (TxBx...) data
            format.
        T)r(   r2   s    r,   is_time_majorzModelV2.is_time_major;  s     $&&r.   T)errorc                     d S )Nr1   )r+   argskwargss      r,   import_from_h5zModelV2.import_from_h5D  s    r.   )NN)F) __name__
__module____qualname____doc__r   intr   strr-   r   r   r3   r   r:   r<   r   r@   rB   r   r   r   r`   rb   
contextlibAbstractContextManagerrT   boolrg   rj   rl   r	   rq   r1   r.   r,   r   r      s        %
%
 %
 	%

 &%
 %
 %
 %
 %
 %
N4
#3    ,+"j)+" J+" 	+"
 d:&	'+" +" +" +"Z
"
 
" 
" 
" 
"%48j4I	tJ+	,   ,	c:o. 	 	 	 	  #	XK XK+~56XK CyXK 	XK
 d:&	'XK XK XK XKt!Z ! ! ! !$: $ $ $ $
 $" ""	tJc:o!66	7" " " "  $" ""	tJc:o!66	7" " " "'t ' ' ' ' Zd    r.   r   rG   r    r/   c                     |dv r+t          j        j                                        |           S |dk    r%t          J t                              | d          S t          d|          )zFlatten the given tensor.)tf2tftorchNrE   )	start_dimrR   )tf1keraslayersFlattenr~   rR   r9   )rG   r    s     r,   rR   rR   I  sk     M!!y''))#...	g		   }}SA}...!)Y777r.   r   	tensorlibc                     |dv rt           J t           }n-|dk    rt          J t          }n|dk    rt          J t          }t          |d|          }t	          | ||          S )a  Unpacks Dict and Tuple space observations into their original form.

    This is needed since we flatten Dict and Tuple observations in transit
    within a SampleBatch. Before sending them to the model though, we should
    unflatten them into Dicts or Tuples of tensors.

    Args:
        obs: The flattened observation tensor.
        obs_space: The flattened obs space. If this has the
            `original_space` attribute, we will unflatten the tensor to that
            shape.
        tensorlib: The library used to unflatten (reshape) the array/tensor.

    Returns:
        single tensor or dict / tuple of tensors matching the original
        observation space.
    )r}   r|   Nr~   numpyoriginal_spacer   )r}   r~   npgetattr_unpack_obs)rG   r   r   r   s       r,   rO   rO   U  s}    , M!!~~~			g		   			g		~~~	Y(8)DDNsNi@@@@r.   r%   c                 	   t          |t          j        j        t          j        j        t
          f          rPt          |t          j        j                  rt          | t          t          f          s4t          |t          j        j                  rt          | t                    r| S t          |          t          v rt          t          |                   }nG t          |          |          }t          t                    dk     r|t          t          |          <   t          | j                  dk     s| j        d         |j        d         k    r3t          d                    |j        d         | j                            d}|t           k    rd fd| j        dd         D             }nt          | j        dd                   }t          |t          j        j                  rt          |j                  t          |j                  k    s2J t          |j                  t          |j                  k                g }t%          |j        |j                  D ]q\  }}| d	|||j        z   f         }	||j        z  }|                    t+          |                    |	|t          |j                  z             ||
                     rnt          |t          j        j                  rt          |j                  t          |j                  k    s2J t          |j                  t          |j                  k                t/                      }t%          |j        |j                                                  D ]d\  }\  }
}| d	|||j        z   f         }	||j        z  }t+          |                    |	|t          |j                  z             ||
          ||
<   ent          |t2                    s
J |            |j        j        }| d         }|                    | d	ddf         ||j        |gz             }t+          ||j        |
          }t;          |||j        j                  S |S | S )a  Unpack a flattened Dict or Tuple observation array/tensor.

    Args:
        obs: The flattened observation tensor, with last dimension equal to
            the flat size and any number of batch dimensions. For example, for
            Box(4,), the obs may have shape [B, 4], or [B, N, M, 4] in case
            the Box was nested under two Repeated spaces.
        space: The original space prior to flattening
        tensorlib: The library used to unflatten (reshape) the array/tensor
    i  rI   r   z1Expected flattened obs shape of [..., {}], got {}c                 X    | dS t          | t                    r| S | j        dS | j        S )Nr   )rJ   rv   value)vs    r,   	get_valuez_unpack_obs.<locals>.get_value  s6    923'' #HW_27Nr.   c                 &    g | ]} |          S r1   r1   ).0r   r   s     r,   
<listcomp>z_unpack_obs.<locals>.<listcomp>  s!    ???1))A,,???r.   N.r   ).r   rE   )lengthsmax_len)rJ   gymspacesr   Tupler   rX   rY   dictid_cacher   rP   rQ   rZ   rL   r}   preprocessorszipsizerM   r   reshaper   itemsr
   child_preprocessorr   child_spacer   
_obs_space)rG   r%   r   prepoffset
batch_dimsupr   	obs_slicek
child_sizer   with_repeat_dimr   s                 @r,   r   r   |  sb    %#*/3:+;XFGG Oucj.// 	JsT5M4R4R 	ucjo..	3=c43H3H	 Je99"U))$DD*#E**511D6{{S  $(r%yy!sy>>A2$*Q-!?!?CJJJqM39   
 ??# # # @???	#2#???JJcin--JeSZ-.. (	Wt)**c%,.?.????"B BU\""B#??? AD.== 	 	1Vfqvo%= =>	!& !)))Z$qw--5OPP"+     	 sz// 	Wt)**c%,.?.????"B BU\""B#??? A !3U\5G5G5I5IJJ  	6AqVfqvo%= =>	!& "%%id17mm1KLL'  ! d$>??EEEE?05J&kG'//CGjEM:+FF O OU->)TTTA!!Wdo>UVVVV
r.   )2rx   collectionsr   typingr   r   r   r   	gymnasiumr   r   r   gymnasium.spacesr   ray._common.deprecationr	   ray.rllib.models.preprocessorsr
   r    ray.rllib.models.repeated_valuesr   ray.rllib.policy.sample_batchr   !ray.rllib.policy.view_requirementr   ray.rllib.utilsr   ray.rllib.utils.annotationsr   ray.rllib.utils.frameworkr   r   r   ray.rllib.utils.spaces.repeatedr   ray.rllib.utils.typingr   r   r   r   r}   tfvr~   _r   rw   rR   rO   r   r   r1   r.   r,   <module>r      s       # # # # # # ) ) ) ) ) ) ) ) ) ) ) )         " " " " " " . . . . . . W W W W W W W W ; ; ; ; ; ; 5 5 5 5 5 5 = = = = = = . . . . . . 3 3 3 3 3 3 Q Q Q Q Q Q Q Q Q Q 4 4 4 4 4 4 T T T T T T T T T T}Rq m m m m m m m m`	 8 8 8
 8 8 8 8 8:A A	A %A25AA A A AF 
 @B [ [Z [ [# [GW [ [ [ [ [ [r.   