
    &`iհ                        d dl Z d dlZd dlZd dlZd dlZd dlmZmZmZm	Z	m
Z
mZ d dlZd dlZd dlZd dlmZmZmZmZmZm
Z d dlZd dlmZ d dlmZ d dlmZmZ d dlm Z m!Z! d dl"m#Z# d d	l$m%Z% d d
l&m'Z'm(Z(m)Z) d dl*m+Z+m,Z,m-Z-m.Z. d dl/m0Z0 d dl1m2Z2 erd dl3m4Z4m5Z5 d dl6m7Z7  e'            \  Z8Z9 e(            \  Z:Z;Z< e)            \  Z=Z9 e j>        e?          Z@ eddd          d             ZA eddd          d             ZB eddd          d             ZCdKdZD	 dLdZEdMdeFfd ZGde, d!fd"d#d$eHd%e	eI         d&eJfd'ZK	 dNd)e0d*eHd+eHd,e	eH         fd-ZLd.e0d,dfd/ZMe#d.e0fd0            ZNdOd1ZOd2d3d4ed5         d6d7d8eeJef         d9eFd,df
d:ZPdPd;eFd,d<fd=ZQ G d> d?          ZRd@d5dAejS        d,ejS        fdBZT	 	 	 	 dQd@eJdDd7dEeIdFeIdGe	e
eJdHf                  dIeIfdJZUdS )R    N)TYPE_CHECKINGAnyDictOptionalTupleType)Boxr   DiscreteMultiBinaryMultiDiscreter   )tune)
Deprecated)DEFAULT_MODULE_IDColumns)is_atariwrap_deepmind)OldAPIStack)UnsupportedSpaceException)try_import_jaxtry_import_tftry_import_torch)'DIFF_NUM_GRAD_UPDATES_VS_SAMPLER_POLICYENV_RUNNER_RESULTSEVALUATION_RESULTSNUM_ENV_STEPS_TRAINED)
ResultDict)TRAINING_ITERATION)	AlgorithmAlgorithmConfig)DatasetReaderz8ray.rllib.utils.test_utils.add_rllib_example_script_argsz6ray.rllib.examples.utils.add_rllib_example_script_argsF)oldnewerrorc                      ddl m}  || i |S )Nr   )add_rllib_example_script_args)ray.rllib.examples.utilsr%   )argskwargsr%   s      n/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/rllib/utils/test_utils.pyr%   r%   7   s-     GFFFFF(($9&999    z&ray.rllib.utils.test_utils.should_stopz$ray.rllib.examples.utils.should_stopc                      ddl m}  || i |S )Nr   )should_stop)r&   r,   )r'   r(   r,   s      r)   r,   r,   B   s+     544444;''''r*   z>ray.rllib.utils.test_utils.run_rllib_example_script_experimentz<ray.rllib.examples.utils.run_rllib_example_script_experimentc                      ddl m}  || i |S )Nr   )#run_rllib_example_script_experiment)r&   r.   )r'   r(   r.   s      r)   r.   r.   M   s-     MLLLLL..????r*      c           	      b   t          | t                    rt          |t                    s
J d            t          |                                           }|                                 D ]I\  }}||v sJ d| d|             t          |||         ||||           |                    |           J|r+J d                    t          |          |                      dS t          | t          t          f          rt          |t          t          f          s
J d            t          |          t          |           k    s8J d                    t          |          t          |                                 t          |           D ] \  }	}t          |||	         ||||           !dS t          | t          j        t          f          rf|du r1t          |           t          |          usJ d	|  d
| d            dS t          |           t          |          u sJ d	|  d| d            dS | A|?t          | t                    s*t          | t                     rKt          |t                     r6|du r| |k    sJ d	|  d| d            dS | |k    sJ d	|  d| d            dS t#          | d          r7| j        t&          k    s<t          | j                                      d          st          | t*                    rV	 t          j                            | |           |du rJ d	|  d| d            dS # t0          $ r}
|du r|
Y d}
~
dS d}
~
ww xY wt2          t          |t2          j        t2          j        f          r=t8                                          r|                                }nt?          d          t          | t2          j        t2          j        f          rt2                                          r|                                 } n[t2                                           5 }|!                    |           } t          | |||||          cddd           S # 1 swxY w Y   tD          t          | tD          j                  r8| #                                $                                                                } t          |tD          j                  r8|#                                $                                                                }ddl%m&} t          | |          r| '                                } t          ||          r|'                                }|Z|X	 t          j        (                    | ||           |du rJ d	|  d| d            dS # t0          $ r}
|du r|
Y d}
~
dS d}
~
ww xY w|d}|d}	 t          j        )                    | |||           |du rJ d	|  d| d            dS # t0          $ r}
|du r|
Y d}
~
dS d}
~
ww xY w)ao  
    Checks two structures (dict, tuple, list,
    np.array, float, int, etc..) for (almost) numeric identity.
    All numbers in the two structures have to match up to `decimal` digits
    after the floating point. Uses assertions.

    Args:
        x: The value to be compared (to the expectation: `y`). This
            may be a Tensor.
        y: The expected value to be compared to `x`. This must not
            be a tf-Tensor, but may be a tf/torch-Tensor.
        decimals: The number of digits after the floating point up to
            which all numeric values have to match.
        atol: Absolute tolerance of the difference between x and y
            (overrides `decimals` if given).
        rtol: Relative tolerance of the difference between x and y
            (overrides `decimals` if given).
        false: Whether to check that x and y are NOT the same.
    z2ERROR: If x is dict, y needs to be a dict as well!z ERROR: y does not have x's key='z'! y=)decimalsatolrtolfalsez3ERROR: y contains keys ({}) that are not in x! y={}z>ERROR: If x is tuple/list, y needs to be a tuple/list as well!z7ERROR: y does not have the same length as x ({} vs {})!Tz
ERROR: x (z) is y ()!z) is not y (Nz) is the same as y (z) is not the same as y (dtypez<UFzD`y` (expected value) must not be a Tensor. Use numpy.ndarray insteadr   )	StatsBase)decimalgHz>)r2   r3   )*
isinstancedictsetkeysitemscheckremoveformatlisttuplelen	enumeratenpbool_boolstrinthasattrr6   object
startswithbytestestingassert_array_equalAssertionErrortf1TensorVariabletfexecuting_eagerlynumpy
ValueErrorSessionruntorchdetachcpuray.rllib.utils.metrics.statsr7   peekassert_almost_equalassert_allclose)xyr1   r2   r3   r4   y_keyskeyvalueiesessr7   s                r)   r>   r>   X   s\   * !T xL!T""XX$XXX"QVVXX'')) 	 	JC!888MMM!MM888%3(DPUVVVVMM# 	
 	
PWWLL!
 
 	
 	
z 	
 	
 
At}	%	% mLt}
 
 	L 	LK	L 	L 
 1vv
 
 
 
 
DKKFFCFF
 

 
 

 "! 	U 	UHAu%1t$eTTTTT	U 	U 
A$'	(	( aLD==77$q'')))+H+H+H1+H+H+H)))))77d1gg%%%'HA'H'H1'H'H'H%%%%% 	
	9a q# #-a#5#5  D==666DDDqDDD66666666HHH1HHH66666 	7OL!"F!2!2c!'ll6M6Md6S6S!2	Au		 "3	J))!Q///}}GG1GG!GGGGGu } 	 	 	~~ ~~~~~	 ?!cj#,788 '')) 		AA$4   !cj#,788 
((** 		AA  $ HHQKK$q8$TQV                    
 !U\** -HHJJNN$$**,,!U\** -HHJJNN$$**,, 	<;;;;;a## 	Aa## 	A <DLL
..q!X.FFF D==K"Kq"K"Ka"K"K"KKK5 != "   E>>G ">>>>> ||L
**1ad*FFF
 D==K"Kq"K"Ka"K"K"KKK5 !=	 "   E>>G ">>>>>sZ   5M 
MMM9*Q00Q47Q49"V2 2
W<WW#X 
X.X))X.c                     ddl m}m |	 t          t	           j                                                                                           n# t          $ r
  j	        Y nw xY wj
        j         fd} fD ]J}| u rft           dd          }|sJ |j        s                               j        }n|j                            d           }t          |d|          }nj        }d	g|u rd
gng z   D ]}dD ]}	| u rddgndgD ]}
t!          j        dd          }dD ]}|rdgng dD ]}t%          d           t%          d|            t%          d|            t%          d|	            t%          d|
            t%          d|            t%          d|             |||||
|	|||           ČɐLdS )a  Tests different combinations of args for algorithm.compute_single_action.

    Args:
        algorithm: The Algorithm object to test.
        include_state: Whether to include the initial state of the Policy's
            Model in the `compute_single_action` call.
        include_prev_action_reward: Whether to include the prev-action and
            -reward in the `compute_single_action` call.

    Raises:
        ValueError: If anything unexpected happens.
    r   )DEFAULT_POLICY_IDSampleBatchc           
         i }| u r
||d<   |d<   |                                 }	t          |t                    rt          j        |	dd          }	d }
rq                                }
|
s[g }
d}d| j        v rK|
                    j        d|          j                                                    |dz  }d| j        v Kr                                 nd }rdnd }|dk    r>| u sJ j	        |	i}r||j
        <   ||j        <   |
r>| j                            d	d
          r|
|d<   nt          |
          D ]\  }}||d| <    t          j        d |                    } j        d|||d|}t          |d         t$                    r)t          j        |d                   |d         |d         f}t          j        d |          }	  j        d|||d|}|s*j                            d          st+          ||           n&# t,          $ r Y nw xY w | j        |	|
f||||||d|}d }|
s|s| u r|\  }}}|rt/          t          j        |
          t          j        |                    D ]b\  }}t2                              |          r|j                                        }nt%          |j                  }t+          ||j                   c|| j        d         }|| j        d         }|dk    r| k    r                    |          s1|s|st          t                    st=          d| d|  d           t          t                    r`|s`| j                            d          rHt          j        t          j         |          dk              r t=          d| d|  d          d S d S d S d S d S d S )N
full_fetch	policy_id            ?r   	state_in_   
input_dictenable_rl_module_and_learnerFstate_inc                 ,    t          j        | d          S Nr   )rE   expand_dimsss    r)   <lambda>z<check_compute_single_action.<locals>._test.<locals>.<lambda>/  s    R^Aq-A-A r*   )rs   exploretimestep   c                     | d         S rw    ry   s    r)   r{   z<check_compute_single_action.<locals>._test.<locals>.<lambda><  s
    !A$ r*   noisy)prev_actionprev_rewardr|   r}   unsquash_actionclip_actionnormalize_actionsclip_actionssinglezReturned action (z) of algorithm/policy z not in Env's action_space g      .@zC should be in normalized space, but seems too large/small for that!r   )!sampler9   r	   rE   clipget_initial_stateview_requirementsappendspaceOBSPREV_ACTIONSPREV_REWARDSconfiggetrD   treemap_structurecompute_actions_from_input_dictrA   arraycompute_single_actionr>   	TypeErrorzipflattenrT   	is_tensorshapeas_listcontainsrW   anyabs) whatmethod_to_test	obs_spacerm   r|   r}   unsquashr   call_kwargsobsru   rf   	action_in	reward_inrs   rz   input_dict_batchedactionaction2	state_out_sisosi_shaperk   action_space	algorithminclude_prev_action_rewardinclude_statemodelpidpols                            r)   _testz*check_compute_single_action.<locals>._test  se    9(2K%'*K$  i%% 	*'#tS))C 		..00H %!oo)@@@OO/A@FMMOO   FA	 &!oo)@@@
 .HQL'')))T	5?CC4	\))3;;;;%/3/J) A7@
;347@
;34 8;??#A5II 8-5Jz** )( 3 3 8 8167
?q??33!,"#A#A:NN" " 9S8 -!  	 F &)T** E(6!9--vay&)D'??F3#3 )#%  "	   +sz~~g'>'> +&'***    0T/
 &%! ( 
 
 
 
F 	 	*z 	*TS[[#) FIq 		*dl844dl96M6MNN * *B<<## .  "x//11HH#BH~~Hh)))){#67H<;~.D
 X%%$)*;*;((00  (2<(E(E !@ @ @d @ @1=@ @   <--
 
 KOO$788
 F26&>>D011	
 !     d        ! &%*;*;
 
 
 
 
 
 
 
s   =H 
H&%H&env_runner_groupNc                     | j         S N)observation_space)ps    r)   r{   z-check_compute_single_action.<locals>.<lambda>  s	    a1 r*   )rn   original_spacer   rs   )TFFTi )TFNzP--------------------------------------------------------------------------------zwhat=zmethod_to_test=zexplore=zfull_fetch=z	unsquash=zclip=)ray.rllib.policy.sample_batchrj   rk   nextiter
env_runnerget_policies_to_train
get_policyAttributeErrorpolicyr   r   getattrlocal_env_runnerr   
for_policyrandomrandintprint)r   r   r   rj   r   r   
worker_setr   r   r|   rm   r}   r   r   rk   r   r   r   r   s   ```           @@@@@r)   check_compute_single_actionr      s     MLLLLLLL C 4	,BBDDEEFF""3''    IE#Lz z z z z z z z z z z z| i  & &9 !,>EEJ:. %0055G		&7BB11S C  	  	+;YGGII-I'jdckk\NNrR 	 	N(  3793D3D5$--5'  J%~a88H$7  /7$PUGG=P=P=P  D!(OOO!.$..111!"DN"D"DEEE!"6W"6"6777!"<
"<"<===!"8h"8"8999!.$..111!E $ . ) * ' ( ( $	 	 	 		!& &s   AA A10A1d   	max_stepsc           
         ddl m} t          j        |          }t	          |          rAt          || j        d         d         | j        d                             d                    }|                                \  }}d\  }}}	d}
|sS|	sU|
|k     rQ || dd||||	|          }|	                    |d         d                   \  }}}}	}|
d	z  }
|s|	s|
|k     Id
S d
S d
S d
S d
S d
S )a[  Checks whether the given policy can infer actions from an env with connectors.

    Args:
        policy: The policy to check.
        env_name: Name of the environment to check
        max_steps: The maximum number of steps to run the environment for.

    Raises:
        ValueError: If the policy cannot infer actions from the environment.
    r   )local_policy_inferencer   dim
framestack)r   r   )        FF)env_idagent_idr   reward
terminated	truncatedinforr   N)
ray.rllib.utils.policyr   gymmaker   r   r   r   resetstep)r   env_namer   r   envr   r   r   r   r   ts
action_outs               r)   check_inference_w_connectorsr     sV    >=====
(8

C }} 
g&u-}W-11,??
 
 
 		IC$5!FJ		
B  rI~~++!	
 	
 	

 4788JqM!<L3M3M0VZD
a   rI~~~~      ~~  r*   z/episode_return_meantune_resultsztune.ResultGrid	min_value
evaluationmetricc                    g }|                                                                  D ]X\  }}|s|4t           d| |v r&|                    |t           d|                     =|                    ||                    Yt	          |          }||k     rt          d| d| d          t          d| d| d           dS )ak  Throws an error if `min_reward` is not reached within tune_results.

    Checks the last iteration found in tune_results for its
    "episode_return_mean" value and compares it to `min_reward`.

    Args:
        tune_results: The tune.Tuner().fit() returned results object.
        min_reward: The min reward that must be reached.
        evaluation: If True, use `evaluation/env_runners/[metric]`, if False, use
            `env_runners/[metric]`, if None, use evaluation sampler results if
            available otherwise, use train sampler results.

    Raises:
        ValueError: If `min_reward` not reached.
    N/`z` of z not reached!z reached! ok)get_dataframeiterrowsr   r   maxrW   r   )r   r   r   r   recorded_valuesr   row
best_values           r)   check_learning_achievedr     s   . O,,..7799 0 03 	0&8#C#C6#C#Cs#J#J""3*<'G'Gv'G'G#HIIII""3v;////_%%JIBVBB)BBBCCC	
2f
2
29
2
2
233333r*   r   resultsupper_limitlower_limitreturnc           	          ddl m} ddlm} | d         |         }||vrdS ||         t                   }||cxk    r|k    sn t          d| d| d| d	          |S )
ay  Verifies that the off-policy'ness of some update is within some range.

    Off-policy'ness is defined as the average (across n workers) diff
    between the number of gradient updates performed on the policy used
    for sampling vs the number of gradient updates that have been performed
    on the trained policy (usually the one on the local worker).

    Uses the published DIFF_NUM_GRAD_UPDATES_VS_SAMPLER_POLICY metric inside
    a training results dict and compares to the given bounds.

    Note: Only works with single-agent results thus far.

    Args:
        results: The training results dict.
        upper_limit: The upper limit to for the off_policy_ness value.
        lower_limit: The lower limit to for the off_policy_ness value.

    Returns:
        The off-policy'ness value (described above).

    Raises:
        AssertionError: If the value is out of bounds.
    r   rj   LEARNER_INFOr   Nz`off_policy_ness` (z) is outside the given bounds (z - r5   )r   rj   $ray.rllib.utils.metrics.learner_infor   r   rP   )r   r   r   rj   r   learner_infooff_policy_nesss          r)   check_off_policynessr     s    < @?????AAAAAA 6?<0L,,t"#45/O
 ?9999k99990/ 0 00 0 +0 0 0
 
 	

 r*   train_resultsc                    ddl m}m}m}m} ||||t
          dfD ]}|| v sJ d| d|  d            t          | d         t                    s
J d            ddlm	}  |            
                    d	| d         d	         i          j        }| |         }|s2t          |          dk    st          |v sJ dt           d
| d            |                                D ]r\  }	}
|	dk    r|
                                D ]Q\  }}|                    d          s|                    d          r"t!          j        |          sJ d| d            Rs| S )Checks proper structure of a Algorithm.train() returned dict.

    Args:
        train_results: The train results dict to check.

    Raises:
        AssertionError: If `train_results` doesn't have the proper structure or
            data in it.
    r   )r   FAULT_TOLERANCE_STATSLEARNER_RESULTSTIMERSr   ' ' not found in `train_results` (r5   &`config` in results not a python dict!r   policiesz' not found in train_results['z']!__all_modules___min_max'key' value not a scalar ()ray.rllib.utils.metricsr   r  r  r  r   r9   r:   %ray.rllib.algorithms.algorithm_configr   update_from_dictis_multi_agentrC   r   r=   endswithrE   isscalar)r  r   r  r  r  rd   r   r  learner_results	module_idmodule_metricsre   s               r)   !check_train_results_new_api_stackr  2  s6               	 
F 
F =   EsEEMEEE !    h  0 0/0 0  FEEEEE 			:}X'>z'JK	L	L	  $O4O  
?##q((,=,P,P,P3! 3 3-3 3 3 -Q,PP
 &5%:%:%<%< 
R 
R!	> ))) )..00 	R 	RJC||F## Rs||F';'; R{5))QQ+Q+Q+Q+QQQ)	R
 r*   c                    ddl m} ddlm}m} ddt
          dddd	d
dddt          fD ]}|| v sJ d| d|  d            dD ]-}|| t
                   v sJ d| d| t
                    d            .t          | d         t                    s
J d            ddl	m
}  |                                d| d         d         i          j        }| d         }||v sJ d| d            d|v st          |v sJ d| d            ||         }|s(t          |          dk    s||v sJ d| d| d            |                                D ]\  }	}
|	dk    r|	dk    r||
v r	|
|         }n|
}|                                D ]Q\  }}|                    d          s|                    d          r"t#          j        |          sJ d| d            R| S )r  r   r   )r   LEARNER_STATS_KEYr   custom_metricsr   iterations_since_restorenum_healthy_workersperftime_since_restoretime_this_iter_stimerstime_total_sr  r  r5   )	episode_len_meanepisode_reward_maxepisode_reward_meanepisode_reward_min
hist_statspolicy_reward_maxpolicy_reward_meanpolicy_reward_minsampler_perfz4' not found in `train_results[ENV_RUNNER_RESULTS]` (r	  r
  r  z)'learner' not in train_results['infos'] (num_steps_trainedz:'num_(env_)?steps_trained' not in train_results['infos'] (z2' not found in train_results['infos']['learner'] (batch_count__all__min_max_r  )r   rj   r   r   r  r   r   r9   r:   r  r   r  r  r   rC   r=   rL   rE   r  )r  rj   r   r  rd   r   r  r   r   r   policy_statslearner_statsre   s                r)   check_train_resultsr4  x  s-    @?????TTTTTTTT 	" F F =   EsEEMEEE !   
 
 
 m$677776 6 6016 6 6 8777 h  0 0/0 0  FEEEEE 			:}X'>z'JK	L	L	   D4!UT!U!U!Ut##'<'D'D'DLDLLL (E'DD %L  
 <  A%%):l)J)J)JC! C C2>C C C *K)JJ
 *//11 R R\- ) ,,():;MM(M'--// 	R 	RJC~~f%% R)?)? R{5))QQ+Q+Q+Q+QQQ)	R
 r*   c           	      $   ddl m}m} t          |           t          |          u sRJ d                    t          t          |                     t          t          |                                          d	d}t          |           |u r || |           dS t          |           |u r| j        |j        k    sJ t                      }| j        	                                D ]2\  }} |||j        |         |           |
                    |           3t          |j                                                  }|                    |          }	|	rJ d|	 d            dS t          dt          t          |                     z             )
a  Check if both batches are (almost) identical.

    For MultiAgentBatches, the step count and individual policy's
    SampleBatches are checked for identity. For SampleBatches, identity is
    checked as the almost numerical key-value-pair identity between batches
    with ray.rllib.utils.test_utils.check(). unroll_id is compared only if
    both batches have an unroll_id.

    Args:
        batch1: Batch to compare against batch2
        batch2: Batch to compare against batch1
    r   )MultiAgentBatchrk   z.Input batches are of different types {} and {}Nc                 F   |                      dd           }|                     dd           }|
|||k    sJ t                      }|                                 D ]7\  }}|dk    rt          |||                    |                    |           8t          |                                          }|                    d           |                    |          }	|r"|	rJ d                    ||	                      d S |	rJ d                    |	                      d S )N	unroll_idz]SampleBatches for policy with ID {} don't share information on the following information: 
{}zGSampleBatches don't share information on the following information: 
{})	r   r;   r=   r>   addr<   discardsymmetric_differencer@   )
_batch1_batch2
_policy_idunroll_id_1unroll_id_2batch1_keyskvbatch2_keys_differences
             r)   check_sample_batchesz.check_same_batch.<locals>.check_sample_batches  sU   kk+t44kk+t44"{'>+----eeMMOO 	 	DAqK!WQZ   OOA',,..))K(((!66{CC  	"   6*k22	 ?   #  6+&& ?  r*   z:MultiAgentBatches don't share the following information: 
.zUnsupported batch type r   )r   r6  rk   typer@   rH   countr;   policy_batchesr=   r9  r<   r;  rW   )
batch1batch2r6  rk   rF  
batch1_idsrn   policy_batch
batch2_ids
differences
             r)   check_same_batchrQ    s    KJJJJJJJ<<4    7>>DLL3tF||,,   ! ! ! !F F||{""VV,,,,,	f	(	(|v|++++UU
'-'<'B'B'D'D 	& 	&#I|  f3I>	   NN9%%%% .335566
44Z@@
	W 	WVVVV	W 	W	W 	W 2Sf5F5FFGGGr*   rr   )training_iteration
algo_classr   algo_configr   	fw_kwargsrR  c          	      .   ddl m} ddlm} t          |i}dD ]{}|                    d                              |d                              t          t          j
                            d	d
                                                  t          t          j
                            d	d
                              }t          d| j         d| d           t          d           t          j        |                                           t#          j        | |                                t#          j        |d                                                    }|                                j        }t#          j        | |                                t#          j        |d                                                    }	|	                                j        }	t/          |t0                   d         |	t0                   d                    |j        r6t/          |d         |         |         |	d         |         |                    ;t/          |d         |         |         d         |	d         |         |         d                    }dS )a  Check if the algorithm is reproducible across different testing conditions:

        frameworks: all input frameworks
        num_gpus: int(os.environ.get("RLLIB_NUM_GPUS", "0"))
        num_workers: 0 (only local workers) or
                     4 ((1) local workers + (4) remote workers)
        num_envs_per_env_runner: 2

    Args:
        algo_class: Algorithm class to test.
        algo_config: Base config to use for the algorithm.
        fw_kwargs: Framework iterator keyword arguments.
        training_iteration: Number of training iterations to run.

    Returns:
        None

    Raises:
        It raises an AssertionError if the algorithm is not reproducible.
    r   r   r   )r   r~   *   )seedr~   )num_env_runnersnum_envs_per_env_runnerRLLIB_NUM_GPUS0)num_gpus_per_learnernum_gpuszTesting reproducibility of z with z workersz
/// configrr   )stopverbose)param_space
run_configr(  r   r3  N)r   rj   r   r   r   	debuggingenv_runnerslearnersrI   osenvironr   	resourcesr   __name__pprintto_dictr   Tuner	RunConfigfitget_best_resultmetricsr>   r   rt   )
rS  rT  rU  rR  rj   r   	stop_dictnum_workersresults1results2s
             r)   check_reproducibiltyrv  .  s   : @?????AAAAAA#%78I  6 6!!r!**66 +Q 7   X%(8H#)N)N%O%O    YRZ^^,<cBBCC    	 	+**= + + + + +	
 	
 	
 	lk))++,,,:#++--~9a@@@
 
 
 #%%	 	
 ++--5:#++--~9a@@@
 
 
 #%%	 	
 ++--5 	'(6'(6	
 	
 	
 3 		 ./@A ./@A   
  ./@A/R ./@A/R   g6 6r*   
batch_sizer    c                 $   ddl m} ddlm} ddlm}m} d}d|d} | |                                d|	                    \  }} | |                                | 
                              d          d          }	 |||	          }
|
S )zReturns a DatasetReader for the cartpole dataset.
    Args:
        batch_size: The batch size to use for the reader.
    Returns:
        A rllib DatasetReader for the cartpole dataset.
    r   r
  )	IOContext)r    get_dataset_and_shardsz&offline/tests/data/cartpole/large.jsonjson)r@   pathsdataset)input_input_config)train_batch_sizeT)actions_in_input_normalized)r   worker_index)	ray.rllib.algorithmsr   ray.rllib.offlinery   ray.rllib.offline.dataset_readerr    rz  offline_datatraining)rw  r   ry  r    rz  pathr  r}  r   ioctxreaders              r)   get_cartpole_dataset_readerr    s     544444++++++       
 4D$t44L''&&il&SS JGQ IOXzX22\d\;;  E ]7E**FMr*   c                   0    e Zd ZdZd ZddedefdZd	 Zd
S )ModelCheckera  Helper class to compare architecturally identical Models across frameworks.

    Holds a ModelConfig, such that individual models can be added simply via their
    framework string (by building them with config.build(framework=...).
    A call to `check()` forces all added models to be compared in terms of their
    number of trainable and non-trainable parameters, as well as, their
    computation results given a common weights structure and values and identical
    inputs to the models.
    c                     || _         i | _        i | _        t          j                            dd          | _        i | _        d S )Ng{Gzg{Gz?)r   param_countsoutput_valuesrE   r   uniformrandom_fill_input_valuemodels)selfr   s     r)   __init__zModelChecker.__init__  sD     
 (*y'8'8'E'E$ r*   rZ   TF	frameworkr   c                    | j                             |          x}| j        |<   t          j        dg|rdgng z   t          | j         j                  z   | j                  }|rt          j	        |i}|r#t          j        d |          |t          j        <   |dk    rddlm}  ||          } ||          }|                    | j        f            ||          }|                                | j        |<   |dk    rt          j        d |          | j        |<   nt          j        d	 |          | j        |<   |S )
z+Builds a new Model for the given framework.)r  rr   c                 N    t          j        dgt          |           z             S )Nrr   )r   )rE   zerosrA   ry   s    r)   r{   z"ModelChecker.add.<locals>.<lambda>  s    "(!tAww777 r*   rZ   r   )convert_to_torch_tensor)value_sequencec                 V    | &|                                                                  nd S r   )r[   rV   ry   s    r)   r{   z"ModelChecker.add.<locals>.<lambda>  s#    !((****,,,4 r*   c                 2    | |                                  nd S r   )rV   ry   s    r)   r{   z"ModelChecker.add.<locals>.<lambda>  s    q}!'')))$ r*   )r   buildr  rE   fullrA   
input_dimsr  r   r   r   r   STATE_INray.rllib.utils.torch_utilsr  _set_to_dummy_weightsget_num_parametersr  r  )	r  r  r   stater   inputsr  outputscomparable_outputss	            r)   r9  zModelChecker.add  s   )-):):Y):)O)OOI& C%'A33R(40F+G+GG(
 
  	+k6*F 	'+'977( (F7#$ KKKKKK,,V44F %-- 	##D4P3R#SSS #U6]] (-'?'?'A'A)$,0,>GG"- -Dy))
 -1,>>>@R- -Dy) r*   c                 L   t          t          | j                                                            }| j                                        D ]}t          || j        |                    | j                                        D ]}t          || j        |         d            dS )zECompares all added Models with each other and possibly raises errors.gMb@?)r2   N)r   r   r  r<   r  valuesr>   r  )r  main_keycrC  s       r)   r>   zModelChecker.check  s     T[--//0011 "))++ 	2 	2A!T&x01111 #**,, 	@ 	@A!T'1?????	@ 	@r*   N)rZ   TF)	rj  
__module____qualname____doc__r  rH   r   r9  r>   r   r*   r)   r  r    si            , ,S ,c , , , ,\@ @ @ @ @r*   r  algr   c                     g }t          d          D ]7}|                    t          |                     |                               8t	          j        |          S )aQ  Returns the mean action computed by the given algorithm.

    Note: This makes calls to `Algorithm.compute_single_action`

    Args:
        alg: The constructed algorithm to run inference on.
        obs: The observation to compute the action for.

    Returns:
        The mean action computed by the algorithm over 5000 samples.

    i  )ranger   floatr   rE   mean)r  r   outr   s       r)   _get_mean_action_from_algorithmr    sW     C4[[ : :

52237788999973<<r*   Tr   traincheck_bounds
frameworks.use_gpuc                    ddl m} ddlm ddlm ddlm t          d          t          ddd	t          j        
          t          dddt          j        
          t          g d          t          t          d          t          d          t          ddd	t          j        
          g          t          t          d          t          dddt          j        
          t          dt          t          d          t          d          g          i          d          dt!          g d          t          d          t          ddd	t          j        
          t          dddt          j        
          t          dddt          j        
          t          t          d          t          ddd	t          j        
          g          t          t          d          t          ddd	t          j        
          d          dg dddgdx}}d|d<   ||d<   f	d}	|sd }t#          j        |	          }
|
                    |rd!nd"          }
                                D ].}|}t#          j        |
                    | |||                     /                                D ].}|}t#          j        |
                    | |||                     /d#S )$aD  Checks whether the given algorithm supports different action and obs spaces.

        Performs the checks by constructing an rllib algorithm from the config and
        checking to see that the model inside the policy is the correct one given
        the action and obs spaces. For example if the action space is discrete and
        the obs space is an image, then the model should be a vision network with
        a categorical action distribution.

    Args:
        alg: The name of the algorithm to test.
        config: The config to use for the algorithm.
        train: Whether to train the algorithm for a few iterations.
        check_bounds: Whether to check the bounds of the action space.
        frameworks: The frameworks to test the algorithm with.
        use_gpu: Whether to check support for training on a gpu.


    r   )	RandomEnv)ComplexInputNetwork)FullyConnectedNetwork)VisionNetworkr/   ro   rp   )r/   )r6      )r~   r  )rr   r~   r     r~   rr   a)action_choice
parametersyet_another_nested_dict)discrete
continuousint_actionsmultidiscreterB   r:   )r  
   r  )r/   r/   )T   r  rr   r  )taskposition)multi_binaryr  r  vector2dimagerB   r:   )r  r  r  r  rB   r:   r  r  ERROR	log_levelr   c                   	 |                                 }|                                 |j        rf|vr/t                              d                    |                     d S |vr/t                              d                    |                     d S |d         }|         }|         }t          d                    | |||                     t          j                    }|                    t          t          ||t          dddt          j                  d          	                     d
}		 |                                }
| dvr|dv r*t          |
                                j                  sJ na|dk    r*t          |
                                j                  sJ n1|dk    r+t          |
                                j        f          sJ r|
                                 |
                                 n# t&          j        j        $ rt}t-          |j                  dk    r#t          |j        d         t0                    rd}	n/t          |j        d         j        d         t0                    rd}	n Y d }~nd }~wt0          $ r d}	Y nw xY wt          d                    |	t          j                    |z
                       d S )Nz1Skipping PPO test with RLModules for obs space {}z4Skipping PPO test with RLModules for action space {}r  z7=== Testing {} (fw={}) action_space={} obs_space={} ===rp   r   )r   r6   )r   r   reward_spacep_terminatedcheck_action_bounds)
env_configok)SACPPO)atarir  r  r  r~   unsupportedr   zTest: {}, ran in {}s)copyvalidatert   loggerwarningr@   r   timer  r:   r	   rE   float32r  r9   r   r   r  r`  ray
exceptionsRayActorErrorrC   r'   r   )r  r   a_nameo_nameconfig_copyfwr   r   t0statalgorg   TorchComplexNet
TorchFCNetTorchVisionNetaction_spaces_to_testr  observation_spaces_to_test rlmodule_supported_action_spaces%rlmodule_supported_observation_spacesr  s               r)   	_do_checkz)check_supported_spaces.<locals>._do_checkk  s     kkmm3 	BBBGNNvVV   ===JQQ   
 K ,V4.v6	ELLRy 	
 	
 	

 Y[[!-&/!$S#Rrz!J!J!J!$(4    
	
 
	
 
	
 	<<>>D .((///%doo&7&7&=~NNNNNN|++%doo&7&7&=zJJJJJJ z))%))//:1N      

IIKKKK3 ~+ 	 	 	16{{aJqvay:S$T$T$AF1IN1-/HII $( 	! 	! 	! DDD	!& 	$++D$)++2BCCDDDDDs   :H J! A*JJ! J!)tf2rT   rZ   rr   r^  N)*ray.rllib.examples.envs.classes.random_envr  (ray.rllib.models.torch.complex_input_netr  ray.rllib.models.torch.fcnetr   ray.rllib.models.torch.visionnetr  r
   r	   rE   r  int32r   GymTupleGymDictr   r  remoteoptionsr<   r   )r  r   r  r  r  r  r  default_observation_spacedefault_action_spacer  _do_check_remoter  r  r  r  r  r  r  r  r  s     ``         @@@@@@@r)   check_supported_spacesr    s   6 EDDDDD      QPPPPPPPPPPP QKK$T<<<1arx888&|||44a[[(1++s4d"*'M'M'MN
 
 !)!!$TDDD+2(HQKK!#=>>?, , 
 
 * $KKK00QKK$T<<<c6<<<T32:>>>8B<<T3BJ)O)O)OPQQ c4rzBBB 
 
" " - - -) )3L'A$7AA 4!F;F5MGE GE GE GE GE GE GE GE GE GE GE GE GER  ,+
z),,'//g9L1/MM',,.. F F* ''VVVDDEEEE -1133 F F% ''VVVDDEEEEF Fr*   )r/   NNF)FF)r   )r   )r   Nr  )TFNF)Vloggingrg  rk  r   r  typingr   r   r   r   r   r   	gymnasiumr   rV   rE   r   gymnasium.spacesr	   r  r
   r   r   r  r  r   ray._common.deprecationr   ray.rllib.corer   r   %ray.rllib.env.wrappers.atari_wrappersr   r   ray.rllib.utils.annotationsr   ray.rllib.utils.errorr   ray.rllib.utils.frameworkr   r   r   r  r   r   r   r   ray.rllib.utils.typingr   ray.tune.resultr   r  r   r   r  r    jaxr   rQ   rT   tfvrZ   	getLoggerrj  r  r%   r,   r.   r>   r   rI   r   r  rG   rH   r   r   r  r4  rQ  rv  r  r  ndarrayr  r  r   r*   r)   <module>r     s    				                                           


       . . . . . . 5 5 5 5 5 5 5 5 I I I I I I I I 3 3 3 3 3 3 ; ; ; ; ; ; U U U U U U U U U U            . - - - - - . . . . . . ?????????>>>>>>			Q}Rq		8	$	$ B@
  
: : 
: 0.
  
( ( 
( HF
  
@ @ 
@ML ML ML MLb @ED D D DN( (c ( ( ( (\ "&'===	"4 "4#"4"4 "4 	"4 "4 "4 "4P 0 000 0 e_	0 0 0 0fCZ CD C C C CL dz d d d dNKH KH KH KHf  Y Y Y[!Y"Y CH~	Y
 Y 
Y Y Y Yx C     >U@ U@ U@ U@ U@ U@ U@ U@p 2: "*    , ,0mF mF	mFmF mF 	mF
 sCx)mF mF mF mF mF mF mFr*   