
    &`iY                        d Z ddlmZmZmZmZ ddlZddlZ	ddl
Z
ddlmZmZmZ ddlmZ ddlmZ ddlmZmZmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlm Z m!Z! ddl"m#Z# ddl$m%Z% ddl&m'Z'm(Z( ddl)m*Z*m+Z+m,Z, ddl-m.Z.  e#            \  Z/Z0Z1e  G d de0re0j2        j3        j4        ne5                      Z6e  G d de                      Z7 G d de          Z8 G d de          Z9dS )a  
[1] - Attention Is All You Need - Vaswani, Jones, Shazeer, Parmar,
      Uszkoreit, Gomez, Kaiser - Google Brain/Research, U Toronto - 2017.
      https://arxiv.org/pdf/1706.03762.pdf
[2] - Stabilizing Transformers for Reinforcement Learning - E. Parisotto
      et al. - DeepMind - 2019. https://arxiv.org/pdf/1910.06764.pdf
[3] - Transformer-XL: Attentive Language Models Beyond a Fixed-Length Context.
      Z. Dai, Z. Yang, et al. - Carnegie Mellon U - 2019.
      https://www.aclweb.org/anthology/P19-1285.pdf
    )AnyDictOptionalUnionN)BoxDiscreteMultiDiscrete)deprecation_warning)ModelV2)GRUGateRelativeMultiHeadAttentionSkipConnection)RecurrentNetwork)	TFModelV2)SampleBatch)ViewRequirement)OldAPIStackoverride)try_import_tf)get_base_struct_from_space)flatten_inputs_to_1d_tensorone_hot)ListModelConfigDict
TensorType)log_oncec                   P     e Zd ZdZ	 d
dededee         f fdZdedefd	Z	 xZ
S )PositionwiseFeedforwardzA 2x linear layer with ReLU activation in between described in [1].

    Each timestep coming from the attention head will be passed through this
    layer separately.
    Nout_dim
hidden_dimoutput_activationc                 F    t                      j        di | t          j        j                            |t          j        j                  | _        t          j        j                            ||          | _	        t          d          rt          d           d S d S )N
activationpositionwise_feedforward_tfz5rllib.models.tf.attention_net.PositionwiseFeedforwardold )super__init__tfkeraslayersDensennrelu_hidden_layer_output_layerr   r
   )selfr   r    r!   kwargs	__class__s        u/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/rllib/models/tf/attention_net.pyr*   z PositionwiseFeedforward.__init__/   s     	""6"""X_22uz 3 
 

  X_22 1 3 
 
 122 	K     	 	    inputsreturnc                 X    ~|                      |          }|                     |          S N)r1   r2   )r3   r8   r4   outputs       r6   callzPositionwiseFeedforward.callE   s,    ##F++!!&)))r7   r;   )__name__
__module____qualname____doc__intr   r   r*   r   r=   __classcell__r5   s   @r6   r   r   '   s          ,0	   $C=	     ,*: *J * * * * * * * *r7   r   c                       e Zd ZdZdej        j        dej        j        dedede	deded	ed
edef fdZ
 ee          dedee         dedeee         ffd            Z ee          deej                 fd            Z xZS )TrXLNetz"A TrXL net Model described in [1].observation_spaceaction_spacenum_outputsmodel_confignamenum_transformer_unitsattention_dim	num_headshead_dimposition_wise_mlp_dimc                    t          d          rt          d           t                                          |||||           || _        || _        || _        |	| _        |d         | _        |j	        d         | _
        t          j        j                            | j        | j
        fd          }t          j        j                            |          |          }t!          | j                  D ]} t#          t%          |||	dd	
          d	          |          } t#          t'          ||
                    |          }t          j        j                            d          |          }t          j        j                            | j        t          j        j        j        d          |          }t          j        j                            |g|g          | _        d	S )ar  Initializes a TrXLNet object.

        Args:
            num_transformer_units: The number of Transformer repeats to
                use (denoted L in [2]).
            attention_dim: The input and output dimensions of one
                Transformer unit.
            num_heads: The number of attention heads to use in parallel.
                Denoted as `H` in [3].
            head_dim: The dimension of a single(!) attention head within
                a multi-head attention unit. Denoted as `d` in [3].
            position_wise_mlp_dim: The dimension of the hidden layer
                within the position-wise MLP (after the multi-head attention
                block within one Transformer unit). This is the size of the
                first of the two layers within the PositionwiseFeedforward. The
                second layer always has size=`attention_dim`.
        trxl_net_tfz%rllib.models.tf.attention_net.TrXLNetr&   max_seq_lenr   r8   shaperK   FNr   rN   rO   input_layernormr!   )fan_in_layeraxislogitsr$   rK   )r   r
   r)   r*   rL   rM   rN   rO   rS   rU   obs_dimr+   r,   r-   Inputr.   ranger   r   r   LayerNormalizationrI   activationslinearmodelsModel
base_model)r3   rG   rH   rI   rJ   rK   rL   rM   rN   rO   rP   r8   E_out_MHA_outr\   r5   s                   r6   r*   zTrXLNet.__init__O   s   < M"" 	;    	|[,	
 	
 	
 &;"*" '6(.q1&&#T\2 ' 
 
 %%m44V<<t122 	G 	GA	n*)'%$)&*   "	 	 	 	 	GN'7LMM  E HO66B6??FFEE &&)=)D8 ' 
 

  (///6(CCr7   r8   stateseq_lensr9   c                    |d         }t                               ||fd          d d | j         d f         }|                     |g          }t                               |          d         }|d d | d f         }||gfS )Nr      rZ   )r+   concatrS   rf   rU   )r3   r8   rj   rk   observationsr\   Ts          r6   forward_rnnzTrXLNet.forward_rnn   s     Qxyy,!7ay@@TEUDUDWDWAWX,00HHVQA233~%%r7   c                 Z    t          j        | j        | j        ft           j                  gS r;   )npzerosrS   r^   float32r3   s    r6   get_initial_statezTrXLNet.get_initial_state   s%    
 $*DL92:FFGGr7   )r>   r?   r@   rA   gymspacesSpacerB   r   strr*   r   r   r   r   rq   rs   ndarrayrw   rC   rD   s   @r6   rF   rF   K   sn       ,,GD:+GD j&GD 	GD
 &GD GD  #GD GD GD GD  #GD GD GD GD GD GDR X& &)-j)9&EO&
d:&	'& & &  &" XH4
#3 H H H  H H H H Hr7   rF   c                   d    e Zd ZdZdddddddddd	ej        j        d
ej        j        dee         de	de
dedededededededef fdZ ee          dee         dedeee         ffd            Z ee          deej                 fd            Z ee          defd            Z xZS )GTrXLNetas  A GTrXL net Model described in [2].

    This is still in an experimental phase.
    Can be used as a drop-in replacement for LSTMs in PPO and IMPALA.

    To use this network as a replacement for an RNN, configure your Algorithm
    as follows:

    Examples:
        >> config["model"]["custom_model"] = GTrXLNet
        >> config["model"]["max_seq_len"] = 10
        >> config["model"]["custom_model_config"] = {
        >>     num_transformer_units=1,
        >>     attention_dim=32,
        >>     num_heads=2,
        >>     memory_inference=100,
        >>     memory_training=50,
        >>     etc..
        >> }
    rm   @      2       g       @)rL   rM   rN   memory_inferencememory_trainingrO   rP   init_gru_gate_biasrG   rH   rI   rJ   rK   rL   rM   rN   r   r   rO   rP   r   c                    t                                          |||||           | _        | _        | _        |	 _        |
 _        | _        |d          _        |j	        d          _
        t          j        j                            d j
        fd          } fdt           j                  D             }t          j        j                             j                  |          }|g}t           j                  D ](} t#          t%           j        ||dt          j        j                  t+          |          d	                    |d
z                       |||                   } t#          t          j                            t          j        j                            d          t3           j        |t          j        j                  f          t+          |          d                    |d
z                       |          }|                    |           *d _        d _        |rt          j        j                             j        dd          |           _        t          j        j                            d
dd          |          } j        |g}n|g} j         _        t          j                            |g|z   ||dd         z              _         j                                          t           j                  D ]}tC          dd j        f          }tE          d                    |          d                     j                   j        |           j#        d                    |          <   tE          |d           j#        d                    |          <   dS )a  Initializes a GTrXLNet instance.

        Args:
            num_transformer_units: The number of Transformer repeats to
                use (denoted L in [2]).
            attention_dim: The input and output dimensions of one
                Transformer unit.
            num_heads: The number of attention heads to use in parallel.
                Denoted as `H` in [3].
            memory_inference: The number of timesteps to concat (time
                axis) and feed into the next transformer unit as inference
                input. The first transformer unit will receive this number of
                past observations (plus the current one), instead.
            memory_training: The number of timesteps to concat (time
                axis) and feed into the next transformer unit as training
                input (plus the actual input sequence of len=max_seq_len).
                The first transformer unit will receive this number of
                past observations (plus the input sequence), instead.
            head_dim: The dimension of a single(!) attention head within
                a multi-head attention unit. Denoted as `d` in [3].
            position_wise_mlp_dim: The dimension of the hidden layer
                within the position-wise MLP (after the multi-head attention
                block within one Transformer unit). This is the size of the
                first of the two layers within the PositionwiseFeedforward. The
                second layer always has size=`attention_dim`.
            init_gru_gate_bias: Initial bias values for the GRU gates
                (two GRUs per Transformer unit, one after the MHA, one after
                the position-wise MLP).
        rS   r   Nr8   rT   c           	          g | ]M}t           j        j                            d j        ft           j        d                    |                    NS )Nzmemory_in_{})rU   dtyperK   )r+   r,   r-   r_   rM   ru   format.0ir3   s     r6   
<listcomp>z%GTrXLNet.__init__.<locals>.<listcomp>  se     
 
 
  HO!!T/0j#**1-- "  
 
 
r7   TrV   zmha_{}rm   )rX   rK   )memoryrY   rZ   )r   r    r!   zpos_wise_mlp_{}r\   r]   values)r8   outputsg      g      ?rU   zstate_out_{}-{}:-1)shiftbatch_repeat_valuespacestate_in_{}F)r   used_for_training)$r)   r*   rL   rM   rN   r   r   rO   rS   rU   r^   r+   r,   r-   r_   r`   r.   r   r   r/   r0   r   r   
Sequentialra   r   append_logits
_value_outrI   re   
trxl_modelsummaryr   r   view_requirements)r3   rG   rH   rI   rJ   rK   rL   rM   rN   r   r   rO   rP   r   input_layer
memory_insrg   memory_outsr   ri   
values_outoutsr   r5   s   `                      r6   r*   zGTrXLNet.__init__   s    \ 	|[,	
 	
 	
 &;"*" 0. '6(.q1 ho++42FX+VV
 
 
 
 4566
 
 

 %%d&899+FF g t122 	& 	&A
n* .'%$(&(ej   %%788__QU++
 
 
 JqM
+ 
+ 
+GN##:::CC/$($6'<.0ej  	 	 %%788&--a!e44    E" u%%%% "8?00 T 1   DL ..qT.QQRWXXJL*-DD7D#1D(..=:-tk#2#>N7N ) 
 
 	!!! t122 	 	Ac$*<)>???E>M%%a((ood&;<<#'#3? ? ?D"=#7#7#:#:; @Ou@ @ @D">#8#8#;#;<<	 	r7   rj   rk   r9   c           	          |J t                               |          d         }|t          j                 }t                               |          }|d         |z  }t                               |t                               d|g|dd          gd                    }                     |g|z             } j        @t                               |d         d j        g          }	|d          _	        |dd          }
n2t                               |d         d j
        g          }	|dd          }
|	 fd|
D             fS )Nr   rY   rm   rZ   r   c                 T    g | ]$}t                               |d j        g          %S )rY   )r+   reshaperM   )r   mr3   s     r6   r   z$GTrXLNet.forward.<locals>.<listcomp>r  s/    RRRRZZB(:#;<<RRRr7   )r+   rU   r   OBSr   rn   r   r   rI   r   rM   )r3   
input_dictrj   rk   Bro   rU   rp   all_outoutr   s   `          r6   forwardzGTrXLNet.forwardZ  s-    ### HHXq!!+/2&&!HMzz,		B7E!""I:NUV	0W0WXX//<.5"899<#**WQZ"d.>)?@@C%ajDO!!""+KK**WQZ"d.@)ABBC!!""+KRRRRkRRRRRr7   c                 D      fdt           j                  D             S )Nc                     g | ]D}t                               j        d                     |                   j        j                  ES r   )r+   rt   r   r   r   rU   r   s     r6   r   z.GTrXLNet.get_initial_state.<locals>.<listcomp>v  sQ     
 
 
 HHT+M,@,@,C,CDJPQQ
 
 
r7   )r`   rL   rv   s   `r6   rw   zGTrXLNet.get_initial_statet  s9    
 
 
 
4566
 
 
 	
r7   c                 D    t                               | j        dg          S )NrY   )r+   r   r   rv   s    r6   value_functionzGTrXLNet.value_function{  s    zz$/B4000r7   )r>   r?   r@   rA   rx   ry   rz   r   rB   r   r{   floatr*   r   r   r   r   r   r   rs   r|   rw   r   rC   rD   s   @r6   r~   r~      s        : &' "!%'$'O O O:+O j&O c]	O
 &O O  #O O O O O O  #O "O O O O O Od XgS!%j!1S=GS
d:&	'S S S S2 X
4
#3 
 
 
  
 Xg1
 1 1 1 1 1 1 1 1r7   r~   c                   T    e Zd ZdZdej        j        dej        j        dedede	f
 fdZ
 ee          dee	ef         d	ee         d
edeee         ffd            Z ee          defd            Z ee          deeej                 ee         f         fd            Z xZS )AttentionWrapperzGGTrXL wrapper serving as interface for ModelV2s that set use_attention.	obs_spacerH   rI   rJ   rK   c                 4   t          d          rt          d           t                                          ||d ||           |d         | _        |d         | _        t          | j                  | _        d| _	        t          j        | j                  D ]}t          |t                    r| xj	        |j        z  c_	        -t          |t                    r(| xj	        t!          j        |j                  z  c_	        j|j        5| xj	        t)          t!          j        |j                            z  c_	        | xj	        t)          t-          |                    z  c_	        | j        r| xj        | j        | j	        z  z  c_        | j        r| xj        | j        z  c_        |}|d         | _        | j        Nt2          j                            t9          d          t9          d	          | j        ft           j        
          }n|}t=          ||d |d|d         | j        |d         |d         |d         |d         |d         |d                   | _        t@          j!        j"        #                    | j        j        f          }	|| _        tA          j!        j"        $                    | j        d           |	          }
t@          j!        j%        &                    |	g|
g          | _'        tA          j!        j"        $                    dd           |	          }
t@          j!        j%        &                    |	g|
g          | _(        | j        j)        | _)        | j*        | j)        d         _+        | j        rKtY          tZ          j.        | j        d/                    | j                            | j)        tZ          j0        <   | j        rGtY          tZ          j1        d/                    | j                            | j)        tZ          j2        <   d S d S )N attention_wrapper_tf_deprecationz2ray.rllib.models.tf.attention_net.AttentionWrapperr&   attention_use_n_prev_actionsattention_use_n_prev_rewardsr   rM   z-infinf)rU   r   gtrxlattention_num_transformer_unitsattention_num_headsattention_head_dimattention_memory_inferenceattention_memory_trainingattention_position_wise_mlp_dimattention_init_gru_gate_bias)rL   rM   rN   rO   r   r   rP   r   r   r#   rm   obsr   )r   r   )r   )3r   r
   r)   r*   use_n_prev_actionsuse_n_prev_rewardsr   rH   action_space_struct
action_dimtreeflatten
isinstancer   nr	   rs   sumnvecrU   rB   prodlenrI   rM   rx   ry   r   r   ru   r~   r   r+   r,   r-   r_   r.   rd   re   _logits_branch_value_branchr   r   r   r   r   ACTIONSr   PREV_ACTIONSREWARDSPREV_REWARDS)r3   r   rH   rI   rJ   rK   r   cfgin_spaceinput_r   r5   s              r6   r*   zAttentionWrapper.__init__  s    677 	H    	L$dKKK"./M"N"./M"N#=d>O#P#P \$":;; 	3 	3E%** 357*E=11 326%*#5#55(3rwu{';';#<#<<3s5zz??2 " 	J 7$/ II" 	8 77 1'z~~fuU||D4D3Fbj &  HH !H
 "%&G"H,/0-. !=> ;<"%&G"H"#AB
 
 

$ &&dj.D-F&GG ' ho##D$4#FFvNN ho33VHseDDho##A$#77??X_22F8cUCC!%!=.2nu%+ " 	?N#'ood&=>>@ @ @D";#;<
 " 	?N#8??4;R+S+S@ @ @D";#;<<<	 	r7   r   rj   rk   r9   c                    |J |                      |g d           \  }}g }| j        r|t          j                 }| j        d         rit          || j        d          }t                              |t          	                    |          d         dg          }|
                    |           ntt          | j        t                    rKt          | j                  D ]4}	|
                    t          |d d |	f         | j                             5nt          | j        t                     rt          d| j        | j        j	        d                   D ]m}	|
                    t          t                              |d d |	|	| j        j	        d         z   f         t          j                  | j                             nn`|
                    t                              t                              |t          j                  d| j        | j        z  g                     | j        rh|
                    t                              t                              |t          j                 t          j                  d| j        g                     |r t                              |g|z   d          }|x|d	<   |d
<   |                     |||          \  | _        }
|                     | j                  }||
fS )N_disable_action_flatteningT)spaces_struct	time_axisr   rY   )r   rm   rZ   obs_flatr   )_wrapped_forwardr   r   r   rJ   r   r   r+   r   rU   r   r   rH   r   r`   r   r	   castru   r   r   r   rn   r   	_featuresr   )r3   r   rj   rk   wrapped_outrh   prev_a_rprev_n_actionsflatr   r   	model_outs               r6   r   zAttentionWrapper.forward  s    ###..z2tDDQ  " -	'(@AN  !=> (2""&":"   zz$$):B(?@@%%%%
 d/:: "4#:;;   #N111a4$8$:KLL      1=AA "42D4E4KA4N    !# "$2()1q43D3J13M/M+M(M%& %'J	!" !" '+&7  
 
 
 
 OO

GGNBJ??!84?!JK    " 	OO

GGJ{'?@"*MM01     	F))[MH$<1)EEK 6A@
:E!2&*jjUH&M&M#''77	+%%r7   c                     | j         
J d            t                              |                     | j                   dg          S )NzMust call forward() first!rY   )r   r+   r   r   rv   s    r6   r   zAttentionWrapper.value_function4  s?    ~))+G)))zz$,,T^<<rdCCCr7   c                 N      fdt           j        j                  D             S )Nc                     g | ]C}t          j        j        j        d                     |                   j        j                  DS r   )rs   rt   r   r   r   r   rU   r   s     r6   r   z6AttentionWrapper.get_initial_state.<locals>.<listcomp>;  sQ     
 
 
 HTZ1-2F2Fq2I2IJPVWW
 
 
r7   )r`   r   rL   rv   s   `r6   rw   z"AttentionWrapper.get_initial_state9  s;    
 
 
 
4:;<<
 
 
 	
r7   )r>   r?   r@   rA   rx   ry   rz   rB   r   r{   r*   r   r   r   r   r   r   r   r   r   rs   r|   rw   rC   rD   s   @r6   r   r     s{       QQ^:#^ j&^ 	^
 &^ ^ ^ ^ ^ ^ ^@ XN&j)N& JN& 	N&
 d:&	'N& N& N&  N&` XgD
 D D D D Xg
5bj)94
;K)K#L 
 
 
 
 
 
 
 
r7   r   ):rA   typingr   r   r   r   	gymnasiumrx   numpyrs   r   gymnasium.spacesr   r   r	   ray._common.deprecationr
   ray.rllib.models.modelv2r   ray.rllib.models.tf.layersr   r   r   !ray.rllib.models.tf.recurrent_netr   ray.rllib.models.tf.tf_modelv2r   ray.rllib.policy.sample_batchr   !ray.rllib.policy.view_requirementr   ray.rllib.utils.annotationsr   r   ray.rllib.utils.frameworkr   "ray.rllib.utils.spaces.space_utilsr   ray.rllib.utils.tf_utilsr   r   ray.rllib.utils.typingr   r   r   ray.utilr   tf1r+   tfvr,   r-   Layerobjectr   rF   r~   r   r(   r7   r6   <module>r      s  	 	 . - - - - - - - - - - -          9 9 9 9 9 9 9 9 9 9 7 7 7 7 7 7 , , , , , ,         
 ? > > > > > 4 4 4 4 4 4 5 5 5 5 5 5 = = = = = = = = = = = = = = 3 3 3 3 3 3 I I I I I I I I I I I I I I D D D D D D D D D D      }R  *  *  *  *  *rEbho33v  *  *  *F cH cH cH cH cH cH cH cHLK1 K1 K1 K1 K1 K1 K1 K1\~
 ~
 ~
 ~
 ~
y ~
 ~
 ~
 ~
 ~
r7   