
    &`i8N                        d dl mZ d dlmZ d dlmZmZ d dlZd dl	Z	d dl
mZmZ d dlmZ d dlmZ d dlmZmZ d d	lmZmZmZmZ  e            \  ZZZ e            \  ZZd
ZdZdZ dZ!e edd          d=de"de"dej#        fd                        Z$e edd          	 d>deej#                 dee%         dej#        fd                        Z&ed?dede%defd            Z'e	 	 d@dej#        dej#        deej#                 dee(         dej#        f
d             Z)e	 	 	 dAd!ed"ee         d#e%d$e%def
d%            Z*ed&             Z+edBdej#        d(e,dej#        fd)            Z-edej#        dej#        fd*            Z.e	 	 	 	 dCdej#        deej#                 d+eej#                 de%d,e,f
d-            Z/ed d'd.ej0        fdeee"f         d/e"d0e,d1e,d2e1dej#        fd3            Z2eee"         fd4            Z3edDdej#        d5e,dej#        fd6            Z4edEdej#        d7e%dej#        fd8            Z5e	 dFdeej#        e6f         d:e"d;ee,         dej#        fd<            Z7dS )G    )OrderedDict)MappingProxyType)ListOptionalN)DiscreteMultiDiscrete)
Deprecated)	PublicAPI)try_import_tftry_import_torch)SpaceStructTensorStructType
TensorTypeUniongư>i    z)RLlib itself has no use for this anymore.F)helperror@   sizealignreturnc                    | |j         z  }t          j        ||dz
  z   t          j                  }|j        j        |z  }|dk    rdn||z
  }|dk    r)|||dz            dd                             |          }n ||||z                                |          }t          |          | k    sJ t          |                      |j        j        |z  dk    sJ |j        j                    |S )a  Returns an array of a given size that is 64-byte aligned.

    The returned array can be efficiently copied into GPU memory by TensorFlow.

    Args:
        size: The size (total number of items) of the array. For example,
            array([[0.0, 1.0], [2.0, 3.0]]) would have size=4.
        dtype: The numpy dtype of the array.
        align: The alignment to use.

    Returns:
        A np.ndarray with the given specifications.
       dtyper   )itemsizenpemptyuint8ctypesdataviewlen)r   r   r   nr   
data_alignoffsetoutputs           i/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/rllib/utils/numpy.pyaligned_arrayr*      s    & 	u~AHQ%!)_BH555E"U*J//QQ
(:FAvvv
*+AaC055e<<v
*+0077v;;$F=%***FM,>***M    items
time_majorc                    t          |           dk    rg S t          |           dk    r| d         S t          | d         t          j                  r| d         j        t          j        t          j        t          j        fv rO| d         j        }t          t          d | D                       |          }||du rEt          d | D                       }| d         j
        d         |f| d         j
        dd         z   }nwt          d | D                       }|| d         j
        d         f| d         j
        dd         z   }n2t          d	 | D                       }|f| d         j
        dd         z   }|                    |          }|j        j        d
z  dk    sJ |j        j                    t          j        | ||rdnd           |S t          j        | |rdnd          S )a  Concatenate arrays, ensuring the output is 64-byte aligned.

    We only align float arrays; other arrays are concatenated as normal.

    This should be used instead of np.concatenate() to improve performance
    when the output array is likely to be fed into TensorFlow.

    Args:
        items: The list of items to concatenate and align.
        time_major: Whether the data in items is time-major, in which
            case, we will concatenate along axis=1.

    Returns:
        The concat'd and aligned array.
    r   r   c              3   $   K   | ]}|j         V  d S N)r   .0ss     r)   	<genexpr>z!concat_aligned.<locals>.<genexpr>a   s$       7 7A 7 7 7 7 7 7r+   NTc              3   0   K   | ]}|j         d          V  dS )r   Nshaper1   s     r)   r4   z!concat_aligned.<locals>.<genexpr>d   (      ::q
::::::r+   r   c              3   0   K   | ]}|j         d          V  dS r   Nr6   r1   s     r)   r4   z!concat_aligned.<locals>.<genexpr>i   r8   r+   c              3   0   K   | ]}|j         d          V  dS r:   r6   r1   s     r)   r4   z!concat_aligned.<locals>.<genexpr>n   s(      661AGAJ666666r+   r   )outaxisr=   )r$   
isinstancer   ndarrayr   float32float64r    r*   sumr7   reshaper!   r"   concatenate)r,   r-   r   flat	batch_dim	new_shaper(   s          r)   concat_alignedrI   =   s   0 5zzQ		Uq Qx	E!Hbj	)	) BeAhn




A / /
 aS 7 7 7 7 777??!T!!::E:::::	"1X^A.	;e?? 		  ::E:::::	&aq(9;e?? 		 6666666I"uQx~abb'99Ii((}!B&!+++V]-?+++
u&J/EqqAFFFF~ez*@!!qAAAAr+   Txreduce_typec                 8    fd}t          j        ||           S )a  Converts values in `stats` to non-Tensor numpy or python types.

    Args:
        x: Any (possibly nested) struct, the values in which will be
            converted and returned as a new struct with all torch/tf tensors
            being converted to numpy types.
        reduce_type: Whether to automatically reduce all float64 and int64 data
            into float32 and int32 data, respectively.

    Returns:
        A new struct with the same structure as `x`, but with all
        values converted to numpy arrays (on CPU).
    c                 l   t           rt          | t           j                  rt          |                                           dk    r&|                                                                 n7|                                                                                                 }not          rft          | t          j        t          j
        f          r@t          | d          r0t                                          sJ |                                 }n| }rt          |t          j                  rt          j        |j        t          j                  r |                    t          j                  }n>t          j        |j        t(                    r|                    t          j                  }|S )Nr   numpy)torchr?   Tensorr$   r   cpuitemdetachrN   tfVariablehasattrexecuting_eagerlyr   r@   
issubdtyper   floatingastyperA   intint32)rR   retrK   s     r)   mappingz!convert_to_numpy.<locals>.mapping   s[    	Zel33 	 tyy{{##q(( 

!!![[]]&&((..00 C 	dRY$<==	BI$PWBXBX	 '')))))**,,CCC 	+:c2:66 	+}SY44 +jj,,sy#.. +jj**
r+   )treemap_structure)rJ   rK   r^   s    ` r)   convert_to_numpyra   x   s1    $    * gq)))r+   weightsbiases	frameworkc                    d	d} ||           } |dk    o7| j         d         |j         d         k    o| j         d         |j         d         k    } |||          } ||          }t          j        | |          |dn|z   S )
a  Calculates FC (dense) layer outputs given weights/biases and input.

    Args:
        x: The input to the dense layer.
        weights: The weights matrix.
        biases: The biases vector. All 0s if None.
        framework: An optional framework hint (to figure out,
            e.g. whether to transpose torch weight matrices).

    Returns:
        The dense layer's output.
    Fc                    t           rRt          | t           j                  r8|                                                                                                 } t          rGt                                          r.t          | t          j                  r|                                 } |rt          j
        |           } | S r0   )rO   r?   rP   rQ   rS   rN   rT   rW   rU   r   	transpose)r"   rg   s     r)   map_zfc.<locals>.map_   s     	3$-- 3xxzz((**0022 	$"&&(( 	$$,, $zz|| 	&<%%Dr+   rO   r   r   )rg   N        F)r7   r   matmul)rJ   rb   rc   rd   rh   rg   s         r)   fcrl      s    (	 	 	 	 	QAW$ 	
gmA&&I171:q9I+I  d7i000GT&\\F9Q  6>CCvFFr+   inputsspaces_struct	time_axis
batch_axisc           	      4   |r|sJ t          j        |           }|t          j        |          ndgt          |          z  }d}d}g }t          ||          D ]\  }	||rj        d         }|rj        d         }t          |	t                    rb|rt          j        ||z  g          |	                    t          |	j                                      t          j                             t          |	t                    r|rt          j        ||z  dg          |rI|	                    t          j        fdt!          |	j                  D             d                     |	                    t          j        fdt!          |	j                  D             d                     bt          t$                    rt          j        g          |rt          j        ||z  dg          n0|rt          j        |dg          nt          j        dg          |	                                        t          j                             t          j        |d          }
|rt          j        |
||dg          }
|
S )	a	  Flattens arbitrary input structs according to the given spaces struct.

    Returns a single 1D tensor resulting from the different input
    components' values.

    Thereby:
    - Boxes (any shape) get flattened to (B, [T]?, -1). Note that image boxes
    are not treated differently from other types of Boxes and get
    flattened as well.
    - Discrete (int) values are one-hot'd, e.g. a batch of [1, 0, 3] (B=3 with
    Discrete(4) space) results in [[0, 1, 0, 0], [1, 0, 0, 0], [0, 0, 0, 1]].
    - MultiDiscrete values are multi-one-hot'd, e.g. a batch of
    [[0, 2], [1, 4]] (B=2 with MultiDiscrete([2, 5]) space) results in
    [[1, 0,  0, 0, 1, 0, 0], [0, 1,  0, 0, 0, 0, 1]].

    Args:
        inputs: The inputs to be flattened.
        spaces_struct: The (possibly nested) structure of the spaces that `inputs`
            belongs to.
        time_axis: Whether all inputs have a time-axis (after the batch axis).
            If True, will keep not only the batch axis (0th), but the time axis
            (1st) as-is and flatten everything from the 2nd axis up.
        batch_axis: Whether all inputs have a batch axis.
            If True, will keep that batch axis as-is and flatten everything from the
            other dims up.

    Returns:
        A single 1D tensor resulting from concatenating all
        flattened/one-hot'd input components. Depending on the time_axis flag,
        the shape is (B, n) or (B, T, n).

    .. testcode::
        :skipif: True

        # B=2
        from ray.rllib.utils.tf_utils import flatten_inputs_to_1d_tensor
        from gymnasium.spaces import Discrete, Box
        out = flatten_inputs_to_1d_tensor(
            {"a": [1, 0], "b": [[[0.0], [0.1]], [1.0], [1.1]]},
            spaces_struct=dict(a=Discrete(2), b=Box(shape=(2, 1)))
        )
        print(out)

        # B=2; T=2
        out = flatten_inputs_to_1d_tensor(
            ([[1, 0], [0, 1]],
             [[[0.0, 0.1], [1.0, 1.1]], [[2.0, 2.1], [3.0, 3.1]]]),
            spaces_struct=tuple([Discrete(2), Box(shape=(2, ))]),
            time_axis=True
        )
        print(out)

    .. testoutput::

        [[0.0, 1.0,  0.0, 0.1], [1.0, 0.0,  1.0, 1.1]]  # B=2 n=4
        [[[0.0, 1.0, 0.0, 0.1], [1.0, 0.0, 1.0, 1.1]],
        [[1.0, 0.0, 2.0, 2.1], [0.0, 1.0, 3.0, 3.1]]]  # B=2 T=2 n=4
    Nr   r   depthc                     g | ]=\  }}t          d d |f         |                              t          j                  >S )Nrr   one_hotrZ   r   rA   r2   ir%   input_s      r)   
<listcomp>z/flatten_inputs_to_1d_tensor.<locals>.<listcomp>-  sV        $1 $F111a4L:::AA"*MM  r+   r>   c                 ~    g | ]9\  }}t          |         |                               t          j                  :S )rr   rv   rx   s      r)   r{   z/flatten_inputs_to_1d_tensor.<locals>.<listcomp>7  sN        $1 $F1IQ777>>rzJJ  r+   )r_   flattenr$   zipr7   r?   r   r   rD   appendrw   r%   rZ   rA   r   rE   	enumeratenvecfloatarray)rm   rn   ro   rp   flat_inputsflat_spacesBTr<   spacemergedrz   s              @r)   flatten_inputs_to_1d_tensorr      s   D -*---,v&&K $ 	]###Vc+&&&  	AA
C[+66 02 0299QA $LO eX&& (	2 5FQUG44JJwvUW555<<RZHHIIII}-- #	2 9FQUBK88 

N   (1%*(=(=          

N   (1%*(=(=          &%(( ,6(++ 2FQUBK88 2FQG44FRD11JJv}}RZ001111^Cb)))F 0FQ2J//Mr+   c                    t          | t          j                  r|                     d           | S t          | t                    rt          t          |                     S t          | t                    rt          |           S | S )a{  Flags actions immutable to notify users when trying to change them.

    Can also be used with any tree-like structure containing either
    dictionaries, numpy arrays or already immutable objects per se.
    Note, however that `tree.map_structure()` will in general not
    include the shallow object containing all others and therefore
    immutability will hold only for all objects contained in it.
    Use `tree.traverse(fun, action, top_down=False)` to include
    also the containing object.

    Args:
        obj: The object to be made immutable.

    Returns:
        The immutable object.

    .. testcode::
        :skipif: True

        import tree
        import numpy as np
        from ray.rllib.utils.numpy import make_action_immutable
        arr = np.arange(1,10)
        d = dict(a = 1, b = (arr, arr))
        tree.traverse(make_action_immutable, d, top_down=False)
    F)write)r?   r   r@   setflagsr   r   dict)objs    r)   make_action_immutabler   S  s|    8 #rz"" 5!!!
	C	%	% S		***	C		 $$$
r+         ?deltac                     t          j        t          j        |           |k     t          j        | d          dz  |t          j        |           d|z  z
  z            S )z4Reference: https://en.wikipedia.org/wiki/Huber_loss.       @g      ?)r   whereabspower)rJ   r   s     r)   
huber_lossr   z  sS     8
q		E28As++c15BF1IIe<S3T  r+   c                 T    t          j        t          j        |                     dz  S )zComputes half the L2 norm of a tensor (w/o the sqrt): sum(x**2) / 2.

    Args:
        x: The input tensor.

    Returns:
        The l2-loss output according to the above formula given `x`.
    r   )r   rC   square)rJ   s    r)   l2_lossr     s!     6")A,,#%%r+   initial_internal_statesforget_biasc                 (   | j         |rdnd         }| j         |rdnd         }|j         d         dz  }|/t          j        ||f          }	t          j        ||f          }
n|d         }	|d         }
|rt          j        |||f          }nt          j        |||f          }t          |          D ]V}|r| |ddddf         n| dd|ddf         }t          j        ||
fd          }t          j        ||          |z   }t          |dd|dz  |dz  f         |z             }t          j        |	|          }	t          |ddd|f                   }t          j        |dd||dz  f                   }t          j	        |	t          j        ||                    }	t          |dd|dz  |dz  f                   }t          j        |t          j        |	                    }
|r|
||ddddf<   J|
|dd|ddf<   X||	|
ffS )	a  Calculates LSTM layer output given weights/biases, states, and input.

    Args:
        x: The inputs to the LSTM layer including time-rank
            (0th if time-major, else 1st) and the batch-rank
            (1st if time-major, else 0th).
        weights: The weights matrix.
        biases: The biases vector. All 0s if None.
        initial_internal_states: The initial internal
            states to pass into the layer. All 0s if None.
        time_major: Whether to use time-major or not. Default: False.
        forget_bias: Gets added to first sigmoid (forget gate) output.
            Default: 1.0.

    Returns:
        Tuple consisting of 1) The LSTM layer's output and
        2) Tuple: Last (c-state, h-state).
    r   r      Nr6   r>   r      )
r7   r   zerosrangerE   rk   sigmoidmultiplytanhadd)rJ   rb   rc   r   r-   r   sequence_length
batch_sizeunitsc_statesh_statesunrolled_outputstinput_matrixinput_matmul_matrix	sigmoid_1	sigmoid_2tanh_3	sigmoid_4s                      r)   lstmr     s~   6 g:4aa15Oj/a0JM!!E&8:u"56668:u"5666*1-*1-  P8?J*NOOO8:*NOOO ?## 1 1%/?qAAAqqqzzQqqq!QQQwZ~|X&>QGGG ig>>G/519uqy3H0HIKWXX	;x33/1U7
;<<	,QQQ	0A-ABCC6(BK	6$B$BCC/519uqy3H0HIJJ	;y"'(*;*;<<  	1(0Q111W%%(0QQQ111W%%h111r+   ri   rs   on_value	off_valuer   c                    t          | t                    r!t          j        | t          j                  } n5t
          r.t          | t
          j                  r|                                 } | j        t          j	        k    r!| 
                    t          j                  } d}|dk    rt          j        |           dz   }t          j        |           |k     s0J d                    t          j        |           |                      | j        }t          j        g ||R           |z  }g }t!          | j                  D ]}dg| j        z  }	dg| j        z  }
d|
|<   t          j        ||                                       |
          }|dk    r&||dz
           |	|dz
  <   t          j        ||	          }|                    |           |                    |            ||t-          |          <   |
                    |          S )a  One-hot utility function for numpy.

    Thanks to qianyizhang:
    https://gist.github.com/qianyizhang/07ee1c15cad08afb03f5de69349efc30.

    Args:
        x: The input to be one-hot encoded.
        depth: The max. number to be one-hot encoded (size of last rank).
        on_value: The value to use for on. Default: 1.0.
        off_value: The value to use for off. Default: 0.0.

    Returns:
        The one-hot encoded equivalent of the input array.
    r   r   r   r   z<ERROR: The max. index of `x` ({}) is larger than depth ({})!r6   rt   )r?   r[   r   r   r\   rO   rP   rN   r   bool_rZ   int_maxformatr7   onesr   ndimarangerD   tiler   tuple)rJ   rs   r   r   r   r7   r<   indicesry   tilesr3   rs               r)   rw   rw     s   0 !S HQbh'''	 :a.. GGII 	w"(HHRW zzq		A
q		EELL
q		5  	 GE
'%
(
(
(9
4CG16]]  afC!&L!IeAh''**q55 Q<E!a%L5!!AqNN1"Cg::er+   c                      t           r.t           t           j                  r                                   j        t          j         fdt          |          D             d          S )Nc                     g | ]X\  }}t          t                    d k    r|         ndd|f         |                              t          j                  YS )r   Nrr   )rw   r$   rZ   r   rA   )r2   ry   r%   r7   rJ   s      r)   r{   z)one_hot_multidiscrete.<locals>.<listcomp>  sl     	
 	
 	
1 CJJ!OOAaDD111a4BBBII"*UU	
 	
 	
r+   rt   r>   )rO   r?   rP   rN   r7   r   rE   r   )rJ   depthsr7   s   ` @r)   one_hot_multidiscreter     s      Au|,, GGIIGE>	
 	
 	
 	
 	
!&))	
 	
 	
    r+   alphac                 4    t          j        | | |z  |           S )zImplementation of the leaky ReLU function.

    y = x * alpha if x < 0 else x

    Args:
        x: The input values.
        alpha: A scaling ("leak") factor to use for negative x.

    Returns:
        The leaky ReLU output for x.
    )r   maximum)rJ   r   s     r)   relur   "  s     :aUA&&&r+   
derivativec                 L    |r| d| z
  z  S ddt          j        |            z   z  S )aY  
    Returns the sigmoid function applied to x.
    Alternatively, can return the derivative or the sigmoid function.

    Args:
        x: The input to the sigmoid function.
        derivative: Whether to return the derivative or not.
            Default: False.

    Returns:
        The sigmoid function (or its derivative) applied to x.
    r   )r   exp)rJ   r   s     r)   r   r   2  s3      $AE{Ar

N##r+   rt   r=   epsilonc                     |pt           }t          j        |           }t          j        |t          j        ||d          z  |          S )a{  Returns the softmax values for x.

    The exact formula used is:
    S(xi) = e^xi / SUMj(e^xj), where j goes over all elements in x.

    Args:
        x: The input to the softmax function.
        axis: The axis along which to softmax.
        epsilon: Optional epsilon as a minimum value. If None, use
            `SMALL_NUMBER`.

    Returns:
        The softmax over x.
    T)keepdims)SMALL_NUMBERr   r   r   rC   )rJ   r=   r   x_exps       r)   softmaxr   F  sE    $ %GF1IIE :ebfUD4@@@@'JJJr+   )r   r0   )T)NN)NFT)r   )NNFr   )ri   rj   )rt   N)8collectionsr   typesr   typingr   r   rN   r   r_   gymnasium.spacesr   r   ray._common.deprecationr	   ray.rllib.utils.annotationsr
   ray.rllib.utils.frameworkr   r   ray.rllib.utils.typingr   r   r   r   tf1rT   tfvrO   _r   LARGE_INTEGERMIN_LOG_NN_OUTPUTMAX_LOG_NN_OUTPUTr[   r@   r*   boolrI   ra   strrl   r   r   r   r   r   r   rA   typerw   r   r   r   listr    r+   r)   <module>r      s   # # # # # # " " " " " " ! ! ! ! ! ! ! !      4 4 4 4 4 4 4 4 . . . . . . 1 1 1 1 1 1 E E E E E E E E S S S S S S S S S S S S}Rq    	4
    3 
   	  
: 	4
  
 ;?3B 3B
3B)1$3BZ3B 3B 3B	  
3Bl &* &*( &*t &*GW &* &* &* &*R  $(#	&G &G	z&GZ&G RZ &G }	&G
 Z&G &G &G &GR  ,0	C CCK(C C 	C
 C C C CL # # #L  "* U RZ     	&rz 	&bj 	& 	& 	& 	&  $(48B2 B2ZB2 RZ B2 &bj1	B2
 B2 B2 B2 B2 B2J  *9 9Z_99 9 	9
 9 Z9 9 9 9x $(I     ' 'BJ 'u 'rz ' ' ' ' $ $rz $t $
 $ $ $ $& KOK KRZK&)K9A%KZK K K K K Kr+   