
    &`i                     j   d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlm	Z	 d dl
mZmZmZmZmZmZmZ d dlmZ d dlmZmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lm Z m!Z! d dl"m#Z# d dl$m%Z%m&Z& d dl'm(Z( d dl)m*Z* d dl+m,Z, d dl-m.Z. erd dl/mZ0 d dl1m2Z2  ej3        e4          Z5 G d d          Z6dedede7dee7         dee         deeef         fdZ8de9dedede9fdZ:dede9dee;e<e=ee%f         fdZ>ded edefd!Z?e, G d" d#e                       Z@e, G d$ d%e                       ZAdS )&    N)Path)TYPE_CHECKINGCallableDictListOptionalTupleUnion)TRAINING_ITERATION)_FutureTrainingResult_TrainingResult)
Checkpoint)	TuneError)Trial)DEFAULT_METRIC)FIFOSchedulerTrialScheduler)SearchGenerator)DomainFunction)format_vars)SafeFallbackEncoder)	PublicAPI)log_once)TuneControllerc                   *    e Zd ZdZdefdZdefdZdS )_PBTTrialStatez%Internal PBT state tracked per-trial.trialc                 d    |j         | _        d | _        d | _        d| _        d| _        d | _        d S Nr   )experiment_tagorig_tag
last_scorelast_checkpointlast_perturbation_timelast_train_timelast_result)selfr   s     k/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/tune/schedulers/pbt.py__init__z_PBTTrialState.__init__$   s>    ,.2TX+,#$%  	    returnc                     | j         j        dz   d                    d | j                                        D                       z   dz   S )N(z, c              3   2   K   | ]\  }}|d v 	| d| V  dS ))r#   r$   r&   r%   =N ).0kvs      r)   	<genexpr>z*_PBTTrialState.__repr__.<locals>.<genexpr>3   sV       
 
Aq  

q

   
 
r+   ))	__class____name__join__dict__itemsr(   s    r)   __repr__z_PBTTrialState.__repr__.   sc     N#ii 
 
 M//11
 
 
 
 
 	
r+   N)r8   
__module____qualname____doc__r   r*   strr=   r1   r+   r)   r   r   !   sP        //e    
# 
 
 
 
 
 
r+   r   config	mutationsresample_probabilityperturbation_factorscustom_explore_fnr,   c                    i }t          j        |           }|                                D ]>\  }}t          |t                    rRt          | |         ||         ||d          \  }	}
|                    ||	i           |                    ||
i           mt          |t          t          f          rt          j	                    |k     s
| |         |vrt          j
        |          ||<   d||<   t          j
        ddg          }|                    | |                   }||z   }t          t          |d          t          |          dz
            }||         ||<   d|dk    rdnd	 ||k    rd
nd ||<   Rt          |t          t           f          rt          j	                    |k     r=t          |t                    r|                    d          n	 |            ||<   d||<   n*t          j
        |          }| |         |z  ||<   d| ||<   t          | |         t$                    rt%          ||                   ||<   !t'          dt)          |                     |r ||          }|
J d            ||fS )aT  Return a perturbed config and string descriptors of the operations performed
    on the original config to produce the new config.

    Args:
        config: Original hyperparameter configuration.
        mutations: Specification of mutations to perform as documented
            in the PopulationBasedTraining scheduler.
        resample_probability: Probability of allowing resampling of a
            particular variable.
        perturbation_factors: Scaling factors to choose between when mutating
            a continuous hyperparameter.
        custom_explore_fn: Custom explore function applied after built-in
            config perturbations.

    Returns:
        new_config: New hyperparameter configuration (after random mutations).
        operations: Map of hyperparams -> strings describing mutation operations
            performed
    N)rF   resample   r   zshift leftrightz (noop) z* z.Unsupported hyperparameter distribution type: z-Custom explore fn failed to return new config)copydeepcopyr;   
isinstancedict_exploreupdatelisttuplerandomchoiceindexminmaxlenr   r   sampleint
ValueErrortype)rB   rC   rD   rE   rF   
operations
new_configkeydistributionnested_new_config
nested_opsshiftold_idxnew_idxperturbation_factors                  r)   rR   rR   B   s   4 Jv&&J&__.. 9 9\lD)) 8	,4s#$$"&- - -)z s$56777sJ/0000tUm44 -	 "666#;l22 #)-"="=
3",
3 r1g..&,,VC[99!E/c'1oos</@/@1/DEE".w"7
3Au{{VV A$+w$6$6yyBA A 3 vx&899 	 }!555 ",77(L''---% 3
 #-
3 '-m4H&I&I#"(+0C"C
3"<':"<"<
3&+s++ 7 #&jo"6"6
3UlASASUU    W '&z22
%%'V%%%z!!r+   r"   c                     i }|                                 D ]}||         |d|f<   d                    | t          |                    S )zBAppends perturbed params to the trial name to show in the console.rB   z{}@perturbed[{}])keysformatr   )r"   rB   rC   resolved_varsr3   s        r)   _make_experiment_tagrn      sU     M^^ 1 1'-ayxm$$$$X{=/I/IJJJr+   attrsearch_spacec                    t          |t                    r |            | |<   dS t          |t                    r|                    d          | |<   dS t          |t          t
          f          rt          j        |          | |<   dS t          |t                    r6i | |<   |	                                D ]\  }}t          | |         ||           dS dS )zAdd attr to config by sampling from search_space.

    This is a helper used to set initial hyperparameter values if the user doesn't
    specify them in the Tuner `param_space`.
    N)rP   r   r   r\   rT   rU   rV   rW   rQ   r;   _fill_config)rB   ro   rp   r3   r4   s        r)   rr   rr      s     ,)) 	-#|~~t	L&	)	) -#**400t	L4-	0	0 -}\22t	L$	'	' -t &&(( 	- 	-DAqq!,,,,- -	- 	-r+   hyperparam_mutationsc                     i }| D ]O}||vrt          | |         t                    r"t          | |         ||                   }|||<   D| |         ||<   P|S )a7  Filter out hyperparameters from a config so that only parameters specified
    within hyperparam_mutations remain. This recursively filters nested configs.

    Example:
    >>> config = {
    ...     "a": {"b": 2, "c": 0, "d": {"e": 0.1}},
    ...     "f": {"g": 0.5},
    ... }
    >>> hyperparam_mutations = {
    ...     "a": {"b": [1, 2], "c": [-1, 0]},
    ... }
    >>> _filter_mutated_params_from_config(config, hyperparam_mutations) == {
    ...     "a": {"b": 2, "c": 0}
    ... }
    True

    Args:
        config: The config dict that we want to filter.
        hyperparam_mutations: A dict containing a subset of hyperparameters from
            config, used to filter the config.

    Returns:
        mutated_params: A copy of config containing only params specified in
            hyperparam_mutations
    )rP   rQ   "_filter_mutated_params_from_config)rB   rs   mutated_params
param_namenested_paramss        r)   ru   ru      s    8 N 
< 
<
111fZ($// 	<>z"$8$D M *7N:&&)/
);N:&&r+   c                   v    e Zd ZdZ	 	 	 	 	 	 	 	 	 	 	 	 	 d9d
edee         dee         dededeeee	e
eeef         f         dededeeef         dee         dededef fdZdee         dee         defdZdddefdZdddededefdZd ed!ededefd"Zdeddd#ee         d$ee         fd%Zd&ed'eded(ed)ef
d*Zded(edeeef         fd+Z	 	 d:d-ed.ed/ee         d0edef
d1Zddded(efd2Zdeee         ee         f         fd3Zdddee         fd4Zd5 Z d6ee         dee         fd7Z!defd8Z" xZ#S );PopulationBasedTraininga  Implements the Population Based Training (PBT) algorithm.

    https://www.deepmind.com/blog/population-based-training-of-neural-networks

    PBT trains a group of models (or agents) in parallel. Periodically, poorly
    performing models clone the state of the top performers, and a random
    mutation is applied to their hyperparameters in the hopes of
    outperforming the current top models.

    Unlike other hyperparameter search algorithms, PBT mutates hyperparameters
    during training time. This enables very fast hyperparameter discovery and
    also automatically discovers good annealing schedules.

    This Tune PBT implementation considers all trials added as part of the
    PBT population. If the number of trials exceeds the cluster capacity,
    they will be time-multiplexed as to balance training progress across the
    population. To run multiple trials, use `tune.TuneConfig(num_samples=<int>)`.

    In {LOG_DIR}/{MY_EXPERIMENT_NAME}/, all mutations are logged in
    `pbt_global.txt` and individual policy perturbations are recorded
    in pbt_policy_{i}.txt. Tune logs: [target trial tag, clone trial tag,
    target trial iteration, clone trial iteration, old config, new config]
    on each perturbation step.

    Args:
        time_attr: The training result attr to use for comparing time.
            Note that you can pass in something non-temporal such as
            `training_iteration` as a measure of progress, the only requirement
            is that the attribute should increase monotonically.
        metric: The training result objective value attribute. Stopping
            procedures will use this attribute. If None but a mode was passed,
            the `ray.tune.result.DEFAULT_METRIC` will be used per default.
        mode: One of {min, max}. Determines whether objective is
            minimizing or maximizing the metric attribute.
        perturbation_interval: Models will be considered for
            perturbation at this interval of `time_attr`. Note that
            perturbation incurs checkpoint overhead, so you shouldn't set this
            to be too frequent.
        burn_in_period: Models will not be considered for
            perturbation before this interval of `time_attr` has passed. This
            guarantees that models are trained for at least a certain amount
            of time or timesteps before being perturbed.
        hyperparam_mutations: Hyperparams to mutate. The format is
            as follows: for each key, either a list, function,
            or a tune search space object (tune.loguniform, tune.uniform,
            etc.) can be provided. A list specifies an allowed set of
            categorical values. A function or tune search space object
            specifies the distribution of a continuous parameter. You must
            use tune.choice, tune.uniform, tune.loguniform, etc.. Arbitrary
            tune.sample_from objects are not supported.
            A key can also hold a dict for nested hyperparameters.
            You must specify at least one of `hyperparam_mutations` or
            `custom_explore_fn`.
            Tune will sample the search space provided by
            `hyperparam_mutations` for the initial hyperparameter values if the
            corresponding hyperparameters are not present in a trial's initial `config`.
        quantile_fraction: Parameters are transferred from the top
            `quantile_fraction` fraction of trials to the bottom
            `quantile_fraction` fraction. Needs to be between 0 and 0.5.
            Setting it to 0 essentially implies doing no exploitation at all.
        resample_probability: The probability of resampling from the
            original distribution when applying `hyperparam_mutations`. If not
            resampled, the value will be perturbed by a factor chosen from
            `perturbation_factors` if continuous, or changed to an adjacent value
            if discrete.
        perturbation_factors: Scaling factors to choose between when mutating
            a continuous hyperparameter.
        custom_explore_fn: You can also specify a custom exploration
            function. This function is invoked as `f(config)` after built-in
            perturbations from `hyperparam_mutations` are applied, and should
            return `config` updated as needed. You must specify at least one of
            `hyperparam_mutations` or `custom_explore_fn`.
        log_config: Whether to log the ray config of each model to
            local_dir at each exploit. Allows config schedule to be
            reconstructed.
        require_attrs: Whether to require time_attr and metric to appear
            in result for every iteration. If True, error will be raised
            if these values are not present in trial result.
        synch: If False, will use asynchronous implementation of
            PBT. Trial perturbations occur every perturbation_interval for each
            trial independently. If True, will use synchronous implementation
            of PBT. Perturbations will occur only after all trials are
            synced at the same time_attr every perturbation_interval.
            Defaults to False. See Appendix A.1 here
            https://arxiv.org/pdf/1711.09846.pdf.

    .. code-block:: python

        import random
        from ray import tune
        from ray.tune.schedulers import PopulationBasedTraining

        pbt = PopulationBasedTraining(
            time_attr="training_iteration",
            metric="episode_reward_mean",
            mode="max",
            perturbation_interval=10,  # every 10 `time_attr` units
                                       # (training_iterations in this case)
            hyperparam_mutations={
                # Perturb factor1 by scaling it by 0.8 or 1.2. Resampling
                # resets it to a value sampled from the lambda function.
                "factor_1": lambda: random.uniform(0.0, 20.0),
                # Alternatively, use tune search space primitives.
                # The search space for factor_1 is equivalent to factor_2.
                "factor_2": tune.uniform(0.0, 20.0),
                # Perturb factor3 by changing it to an adjacent value, e.g.
                # 10 -> 1 or 10 -> 100. Resampling will choose at random.
                "factor_3": [1, 10, 100, 1000, 10000],
                # Using tune.choice is NOT equivalent to the above.
                # factor_4 is treated as a continuous hyperparameter.
                "factor_4": tune.choice([1, 10, 100, 1000, 10000]),
            })
        tuner = tune.Tuner(
            trainable,
            tune_config=tune.TuneConfig(
                scheduler=pbt,
                num_samples=8,
            ),
        )
        tuner.fit()

    time_total_sN      N@              ?g333333?g?TF	time_attrmetricmodeperturbation_intervalburn_in_periodrs   quantile_fractionrD   rE   rF   
log_configrequire_attrssynchc           	         |pi }|                                 D ]c}t          |t          t          t          t
          t          f          st          d          t          |t                    rt          d          d|s|
st          d          |dk    s|dk     r"t          d                    |                    |dk    r"t          d                    |                    |r|dv s
J d	            t                                                       || _        || _        d | _        | j        d
k    rd| _        n| j        dk    rd| _        || _        || _        || _        || _        || _        || _        |	| _        i | _        |
| _        || _        || _        || _        t;          | j        | j                  | _        d| _        d| _         d S )Nzk`hyperparam_mutation` values must be either a List, Tuple, Dict, a tune search space object, or a callable.zarbitrary tune.sample_from objects are not supported for `hyperparam_mutation` values.You must use other built in primitives liketune.uniform, tune.loguniform, etc.zZYou must specify at least one of `hyperparam_mutations` or `custom_explore_fn` to use PBT.g      ?r   zQYou must set `quantile_fraction` to a value between 0 and0.5. Current value: '{}'zSperturbation_interval must be a positive number greater than 0. Current value: '{}')rY   rZ   z`mode` must be 'min' or 'max'.rZ         ?rY         )!valuesrP   rQ   rT   rU   r   r   	TypeErrorr   r^   r   rl   superr*   _metric_mode
_metric_op
_time_attr_perturbation_interval_burn_in_period_hyperparam_mutations_quantile_fraction_resample_probability_perturbation_factors_trial_state_custom_explore_fn_log_config_require_attrs_synchrZ   _next_perturbation_sync_num_checkpoints_num_perturbations)r(   r   r   r   r   r   rs   r   rD   rE   rF   r   r   r   valuer7   s                  r)   r*   z PopulationBasedTraining.__init__e  s5   $  49r)0022 	 	EedD%%JKK "  
 %**  :   $ 	,= 	5  
 s""&7!&;&;++162C+D+D  
 !A%%..4f5J.K.K  
  	L>)))+K)))
:!DOOZ5  "DO#&;#-%9""3%9"%9"9;"3%+'*' (
 (
$ !""#r+   r,   c                     | j         r|rdS | j        r|rdS |r|| _         |r|| _        | j        dk    rd| _        n| j        dk    rd| _        | j         | j        rt          | _         dS )NFrZ   r   rY   r   T)r   r   r   r   )r(   r   r   specs       r)   set_search_propertiesz-PopulationBasedTraining.set_search_properties  s     < 	F 	5: 	$ 	5 	"!DL 	DJ:!DOOZ5  "DO<DJ)DLtr+   tune_controllerr   r   c                     |j         Lt          |j         t                    r2t          d                    | j        j        |j                             | j        r| j        s8t          d                    | j        j        | j        | j	                            |j
        j        j        }|j        r.|j        dk    r#t          d          rt          j        d           t#          |          | j        |<   | j                                        D ]m}||j        vrbt          |dz             rt,                              d           t1          |j        || j        |                    |j        |         |j        |<   nd S )NzFSearch algorithms cannot be used with {} schedulers. Please remove {}.z{} has been instantiated without a valid `metric` ({}) or `mode` ({}) parameter. Either pass these parameters when instantiating the scheduler, or pass them as parameters to `tune.TuneConfig()`   pbt_num_to_keepzUsing `CheckpointConfig.num_to_keep <= 2` with PBT can lead to restoration problems when checkpoint are deleted too early for other trials to exploit them. If this happens, increase the value of `num_to_keep`.z-missingzNCannot find {} in config. Using search space provided by hyperparam_mutations.)
search_algrP   r   r^   rl   r7   r8   r   r   r   run_metadatacheckpoint_managercheckpoint_confignum_to_keepr   warningswarnr   r   r   rk   rB   loggerdebugrr   evaluated_params)r(   r   r   r   ro   s        r)   on_trial_addz$PopulationBasedTraining.on_trial_add  s   %1j&7
 7
1 006N+_-G1 1   | 	4? 	) *0N+T\4:* *	   ".AS)
	!-22*++ 3 M$   $2%#8#8% .3355 	B 	BD5<''D:-.. LLB   U\41KD1QRRR/4|D/A&t,	B 	Br+   resultc                 ~     j         |vr]d                     j         |          } j        rt          |dz             t	          d          rt
                              |            j        |vr]d                     j        |          } j        rt          |dz             t	          d          rt
                              |            j        |vs	 j         |vrt          j	        S | j                  } j
                 }| j        k     r1t
                              d| d j                    t          j	        S ||j        z
  }| j        k     r1t
                              d| d j                    t          j	        S t
                              d	 d
|                                 |||            j        s||_                                         \  }	}
t          j	        }|                                D ]/}|j        t(          j        t(          j        fv rt          j        } n0                     ||
|	           j        t(          j        k    rt          j        n|S t5           fd|                                D                       r t
                              d d           nSt
                              d                                            \  }	}
|                                }g }|D ]}||	vr||
vr|                    |            t
                              d|
 d|	 d|            |
|z   |	z   }|D ]I}t
                              d|            | j
        |         _                             |||
|	           J fd|                                D             }t;          |          }t;           j         j        z   |           _        t
                              d j                    j        t(          j        k    rt          j        nt          j        S )NzxCannot find time_attr {} in trial result {}. Make sure that this attribute is returned in the results of your Trainable.zgIf this error is expected, you can change this to a warning message by setting PBT(require_attrs=False)zpbt-time_attr-errorzuCannot find metric {} in trial result {}. Make sure that this attribute is returned in the results of your Trainable.zpbt-metric-errorzStill in burn-in period: z < z#Perturbation interval not reached: zUpdating trial state for trial z	 at time c              3   Z   K   | ]%}j         |         j        j        k     o|k    V  &d S N)r   r&   r   )r2   tr(   r   s     r)   r5   z:PopulationBasedTraining.on_trial_result.<locals>.<genexpr>L  sX          !!$4t7SS J     r+   z?Sync: Other trials are not at perturb time, yet. Pausing trial z	 to wait.z%Sync: All trials are at perturb time.z!Trial statistics
Upper quantile: z
Lower quantile: z
Not in quantile: zPerturbing trial c                 4    g | ]}j         |         j        S r1   r   r&   )r2   r   r(   s     r)   
<listcomp>z;PopulationBasedTraining.on_trial_result.<locals>.<listcomp>q  s4     # # # %a(8# # #r+   zNext perturb at time )r   rl   r   RuntimeErrorr   r   warningr   r   CONTINUEr   r   r   r%   r   _save_trial_stater   
_quantiles
get_trialsstatusr   PENDINGPAUSEDPAUSE_checkpoint_or_exploitNOOPanyget_live_trialsappendrZ   r   )r(   r   r   r   time_missing_msgmetric_missing_msgtimestatetime_since_perturblower_quantileupper_quantiledecisionother_trial
all_trialsnot_in_quantiler   all_train_timesmax_last_train_times   ` `               r)   on_trial_resultz'PopulationBasedTraining.on_trial_result  s-    ?&((- .4VDOV-L-L	  " 	5"$77   122 5NN#3444<v%%- .4VDL&-I-I	  " 7"& *7 7   .// 7NN#5666<v%%)F)F!**do&!%( $&&&LLTTTTd>RTTUUU!** "E$@@ ;;;LLH%H H*.*EH H   "**MuMMtMMNNNudFE:::{ F	+/E(-1__->->*NN%.H.99;;  %%-)FFF-3HE G ''   +0,%,*F*F>&&HT       )88::     .U
 6%*6 6 6    DEEE151B1B.,7799
"$# 2 2A..1N3J3J'..q111:'5: :'5: : )8: :   ,o=N
#  ALL!8Q!8!8999BFD%a(?//?NN   # # # #,7799# # # '*/&:&:#/2043NN'0 0, ST5QSSTTT <5<// ###)r+   r   r   c                 Z    | j         || j                 z  }||_        ||_        ||_        |S )a  Saves necessary trial information when result is received.
        Args:
            state: The state object for the trial.
            time: The current timestep of the trial.
            result: The trial's result dictionary.
            trial: The trial object.
        )r   r   r#   r&   r'   )r(   r   r   r   r   scores         r)   r   z)PopulationBasedTraining._save_trial_state  s4     &"66  $"r+   r   r   c                 P   | j         |         }||v rt                              d| d           |j        t          j        k    r|j        j        rOt          |j        j        t                    r0t                              d| d           |j        j        |_
        nrt                              d| d|j         d           |j        |_
        n?t                              d| d           |                    ||j                  |_
        | xj        d	z  c_        nd
|_
        ||v rt          j        |          }||usJ | j         |         }|j
        }t                              d| d| d           t          |t                    rV|                                }	|	r |	j        |_        |	j        |_
        |j
        }n t                              d| d           d
}|s"t                              d| d|            d
S |                     |||           d
S d
S )z6Checkpoint if in upper quantile, exploits if in lower.zTrial z) is in upper quantile. Saving checkpoint.z is still saving.z* is paused. Use last available checkpoint .zInstructing z	 to save.r   rJ   Nz( is in lower quantile. Exploiting trial z6PBT-scheduled checkpoint save resolved to None. Trial z: didn't save any checkpoint before and can't be exploited.z[pbt]: no checkpoint for trial z. Skip exploit for Trial )r   r   r   r   r   r   temporary_state	saving_torP   r   r$   
checkpoint_schedule_trial_saver'   r   rV   rW   resolvemetricsinfo_exploit)
r(   r   r   r   r   r   trial_to_cloneclone_stater$   training_results
             r)   r   z.PopulationBasedTraining._checkpoint_or_exploit  s    !%(N"" LLR%RRRSSS|u|++(2 =z)35J8 8 = LL!B%!B!B!BCCC,1,A,KE)) LL: : :&+&6: : :   -2,<E))<E<<<===(7(L(L%"3 )M ) )% !!Q&!!!$(E!N""#]>::N....+N;K)9OLL6 6 6$26 6 6  
 /+@AA +"1"9"9";";" 
+.=.EK+2A2LK/&1&AOOLL3)3 3 3  
 '+O" 7n 7 7/47 7   MM/5.AAAAAA #"r+   trial_state	new_stater   ra   c                    |j         |j         }}|j        }|j        }	t          j                            |j        d|z   dz             }
t          j                            |j        d|	z   dz             }|||j                            t          d          |j                            t          d          |j
        |g}t          t          j                            |j        d          d          5 }t          t          j        |t                    |           ddd           n# 1 swxY w Y   t          j                            |          rt#          j        ||
           t          |
d          5 }|                    t          j        |t                    d	z              ddd           dS # 1 swxY w Y   dS )
zLogs transition during exploit/exploit step.

        For each step, logs: [target trial tag, clone trial tag, target trial
        iteration, clone trial iteration, old config, new config].
        pbt_policy_z.txtr   zpbt_global.txtza+)cls)fileN
)r"   trial_idospathr9   local_experiment_path	local_dirr'   getr   rB   openprintjsondumpsr   existsshutilcopyfilewrite)r(   r   r   r   r   ra   
trial_nametrial_to_clone_namer   trial_to_clone_id
trial_pathtrial_to_clone_pathpolicyfs                 r)   _log_config_on_stepz+PopulationBasedTraining._log_config_on_step  sD    ,7+?AS'
>*3W\\')AF)J
 

 !gll$m6G&G&&P
 
 !!"4a88&**+=qAA!
 GLL46FGG
 
 	G$*V)<===AFFFF	G 	G 	G 	G 	G 	G 	G 	G 	G 	G 	G 	G 	G 	G 	G
 7>>-.. 	=O/<<<*d## 	HqGGDJv+>???$FGGG	H 	H 	H 	H 	H 	H 	H 	H 	H 	H 	H 	H 	H 	H 	H 	H 	H 	Hs$   /+D&&D*-D*52F44F8;F8c                 Z    t          |j        | j        | j        | j        | j                  S )a  Gets new config for trial by exploring trial_to_clone's config.

        Args:
            trial: The current trial that decided to exploit trial_to_clone.
            trial_to_clone: The top-performing trial with a hyperparameter config
                that the current trial will explore by perturbing.

        Returns:
            new_config: New hyperparameter configuration (after random mutations).
            operations: Map of hyperparams -> strings describing mutation operations
                performed
        )rR   rB   r   r   r   r   )r(   r   r   s      r)   _get_new_configz'PopulationBasedTraining._get_new_config	  s4     !&&&#
 
 	
r+   rM   
old_params
new_paramsr`   prefixc           	         d}|s|S |D ]}||         }||v s7J d| d|                                  d|                                              ||         }|| | dz  }t          |t                    r;|dz  }|                    |i           }	||                     |||	|dz             z  }|                    |d	          }
|
sd
}nd|
 d}|| d| d| dz  }|S )a  Generates a summary of hyperparameter changes from a PBT "explore" step.

        Example:
        Given the following hyperparam_mutations:

        hyperparam_mutations = {
            "a": tune.uniform(0, 1),
            "b": list(range(5)),
            "c": {
                "d": tune.uniform(2, 3),
                "e": {"f": [-1, 0, 1]},
            },
        }

        This is an example summary output of the operations performed on old_params
        to get new_params:

        a : 0.5 --- (* 0.8) --> 0.4
        b : 2 --- (resample) --> 4
        c :
            d : 2.5 --- (* 1.2) --> 3.0
            e :
                f : 0 --- (shift right) --> 1

        The summary shows the old and new hyperparameter values, with the operation
        used to perturb labeled in between.
        If the operation for a certain hyperparameter is not provided, then the summary
        will just contain arrows without a label. (ex: a : 0.5 -----> 0.4)

        Args:
            old_params: Old values of hyperparameters that are perturbed to generate
                the new config
            new_params: The newly generated hyperparameter config from PBT exploration
            operations: Map of hyperparams -> string descriptors the operations
                performed to generate the values in `new_params`
            prefix: Helper argument to format nested dict hyperparam configs

        Returns:
            summary_str: The hyperparameter change summary to print/log.
        rM   z:`old_params` and `new_params` must both contain the key: 'z'
old_params.keys() = z
new_params.keys() = z : r   z    )r`   r
  Nz----->z--- (z) --> )rk   rP   r   r   _summarize_hyperparam_changes)r(   r  r	  r`   r
  summary_strrw   old_valnew_valnested_operationsoparrows               r)   r  z5PopulationBasedTraining._summarize_hyperparam_changes  sw   ^  	$ 	@ 	@J ,G+++;/9; ;'1'8'8; ; (2'8'8; ; ,++ !,Gf5j5555K'4(( @t#$.NN:r$B$B!tAA0!G+	  B       ^^J55 .$EE-B---E'??E??G????r+   c                    | j         |         }| j         |         }| j        j        }t                              d| d                    |j        |j        |j        |j                             |                     ||          \  }}t          |j
        | j                  }	t          || j                  }
d| d|j         d}||                     |	|
|          pdz  }t                              |           | j        r|                     |||||           t          |j        || j                  }|j        t$          j        k    r| j        st+          d          n|                    |d           |                    |           |                    |           t3          j        |j                  }t7          ||j        	          |j        j        _        | xj         d
z  c_         |j!        |_!        |j"        |_"        dS )zTransfers perturbed state from trial_to_clone -> trial.

        If specified, also logs the updated hyperparam state.
        z

[zK] [Exploit] Cloning trial {} (score = {:4f}) into trial {} (score = {:4f})
z8] [Explore] Perturbed the hyperparameter config of trialz:
zNo hyperparameters mutated.zzTrials should be paused here only if in synchronous mode. If you encounter this error please raise an issue on Ray Github.Fshould_checkpoint)r   r   rJ   N)#r   r7   r8   r   r   rl   r   r#   r  ru   rB   r   r  r   r  rn   r"   r   r   r   r   r   pause_trialset_experiment_tag
set_configrN   r$   r   r'   r   r   _latest_checkpoint_resultr   r%   r&   )r(   r   r   r   r   r   
class_namera   r`   r  r	  explore_info_strnew_tagcheckpoint_to_exploits                 r)   r   z PopulationBasedTraining._exploitm  ss    '.%n5	^,
AJ A A AAG'$&	B B	
 	
 	
 "&!5!5e^!L!L
J 8!4#=
 

 82
 

#J # #~# # # 	 	..z:zRR -,	
 	$%%% 	$$Y~z   ' *d.H
 
 <5<'' ; <   '''GGG  )))$$$ -1Ii6O,P,P 0):O   	-G 	1$-6-M*&/&?###r+   c                     g } j                                         D ]\  }}t                              d                    ||                     |                                r-t                              d                    |                     |j        )|                                s|                    |           |                     fd           t          |          dk    rg g fS t          t          j        t          |           j        z                      }|t          |          dz  k    r1t          t          j        t          |          dz                      }|d|         || d         fS )zReturns trials in the lower and upper `quantile` of the population.

        If there is not enough data to compute this, returns empty lists.
        zTrial {}, state {}zTrial {} is finishedNc                 (    j         |          j        S r   )r   r#   )r   r(   s    r)   <lambda>z4PopulationBasedTraining._quantiles.<locals>.<lambda>  s    $"3A"6"A r+   rb   rJ   r   )r   r;   r   r   rl   is_finishedr#   r   sortr[   r]   mathceilr   floor)r(   trialsr   r   num_trials_in_quantiles   `    r)   r   z"PopulationBasedTraining._quantiles  st   
  -3355 	% 	%LE5LL-44UEBBCCC  "" C3::5AABBB+E4E4E4G4G+e$$$AAAABBBv;;!r6M%(	#f++(??@@& &" &Fa77),TZFa-H-H)I)I&2223V=S<S<T<T5UVVr+   c                 X    g }|                                 D ]n}|j        t          j        t          j        fv rM j        s|                    |           > j        |         j         j	        k     r|                    |           o|
                     fd           |r|d         ndS )zEnsures all trials get fair share of time (as defined by time_attr).

        This enables the PBT scheduler to support a greater number of
        concurrent trials than can fit in the cluster at any given time.
        c                 (    j         |          j        S r   r   )r   r(   s    r)   r!  z=PopulationBasedTraining.choose_trial_to_run.<locals>.<lambda>  s    $*;E*B*R r+   r"  r   N)r   r   r   r   r   r   r   r   r&   r   r$  )r(   r   
candidatesr   s   `   r)   choose_trial_to_runz+PopulationBasedTraining.choose_trial_to_run  s     
$//11 	- 	-E|    { -%%e,,,,%e,<23 3 %%e,,,RRRRSSS *4z!}}4r+   c                 "    d| _         d| _        d S r    )r   r   r<   s    r)   reset_statsz#PopulationBasedTraining.reset_stats  s    "# !r+   r(  c                     g }|D ]D}| j         |         }|j        .|                                s|                    |j                   E|S r   )r   r#   r#  r   )r(   r(  scoresr   r   s        r)   last_scoresz#PopulationBasedTraining.last_scores  sX     	0 	0E%e,E+E4E4E4G4G+e.///r+   c                 B    d                     | j        | j                  S )Nz4PopulationBasedTraining: {} checkpoints, {} perturbs)rl   r   r   r<   s    r)   debug_stringz$PopulationBasedTraining.debug_string  s%    ELL!4#:
 
 	
r+   )r{   NNr|   r}   Nr~   r~   r   NTTF)NrM   )$r8   r>   r?   r@   rA   r   floatr   r
   rQ   rT   rU   r   r   r	   boolr*   r   r   r   r   r   r]   r   r   r   r  r  r  r   r   r-  r/  r2  r4  __classcell__)r7   s   @r)   rz   rz      s2       y yz ( $"'+ # #'&*4>04"!R$ R$R$ R$ sm	R$
  %R$ R$ #tT5(F:;;
R$ !R$ $R$ $E5L1R$ $H-R$ R$ R$  !R$ R$ R$ R$ R$ R$hsm+3C=	   0/B,< /BU /B /B /B /BbB/B8=BGKB	B B B BH#+.8<EJ   (CBCB *CB U	CB
 UCB CB CB CBJ(H#(H "(H 	(H
 (H (H (H (H (HT
U 
E 
eDRVJFW 
 
 
 
2 &*M MM M TN	M
 M 
M M M M^N@)N@ N@ 	N@ N@ N@ N@`WE$u+tE{":; W W W W253C 5QV 5 5 5 5." " "
$u+ $u+    
c 
 
 
 
 
 
 
 
r+   rz   c            	           e Zd ZdZdefdZdedeeeee	ef                  f         fdZ
dddefd	Zddded
edefdZdefdZdS )PopulationBasedTrainingReplayaX  Replays a Population Based Training run.

    Population Based Training does not return a single hyperparameter
    configuration, but rather a schedule of configurations. For instance,
    PBT might discover that a larger learning rate leads to good results
    in the first training iterations, but that a smaller learning rate
    is preferable later.

    This scheduler enables replaying these parameter schedules from
    a finished PBT run. This requires that population based training has
    been run with ``log_config=True``, which is the default setting.

    The scheduler will only accept and train a single trial. It will
    start with the initial config of the existing trial and update the
    config according to the schedule.

    Args:
        policy_file: The PBT policy file. Usually this is
            stored in ``~/ray_results/experiment_name/pbt_policy_xxx.txt``
            where ``xxx`` is the trial ID.

    Example:

    .. code-block:: python

        # Replaying a result from ray.tune.examples.pbt_convnet_example
        from ray import tune

        from ray.tune.examples.pbt_convnet_example import PytorchTrainable
        from ray.tune.schedulers import PopulationBasedTrainingReplay

        replay = PopulationBasedTrainingReplay(
            "~/ray_results/pbt_test/pbt_policy_XXXXX_00001.txt")

        tuner = tune.Tuner(
            PytorchTrainable,
            run_config=tune.RunConfig(
                stop={"training_iteration": 100}
            ),
            tune_config=tune.TuneConfig(
                scheduler=replay,
            ),
        )
        tuner.fit()


    policy_filec                 |   t          |                                          }|                                s4t          d                    |                                                    |                                | _        |                     | j                  \  }| _        d                    t          j
                            | j                            | _        || _        | j        | _        d | _        d| _        d| _        t%          | j                  | _        t)          | j        d           | _        d S )NzPolicy file not found: {}z	replay_{}r   )r   
expanduserr   r^   rl   as_posixr:  _load_policy_policyr   r   basenamer!   rB   current_config_trial_current_stepr   iter_policy_iternext_next_policy)r(   r:  initial_configs      r)   r*   z&PopulationBasedTrainingReplay.__init__2  s
   ;''2244!!## 	Y8??@T@T@V@VWWXXX&//11 (,'8'89I'J'J$)001A1A$BR1S1STT$"k"# .. !2D99r+   r,   c                 &   g }t          |d          5 }|                                D ]p}	 t          j        |          }n6# t          j        $ r$ t          d                    |                    d w xY w|                    t          |                     q	 d d d            n# 1 swxY w Y   g }d }d }t          |          D ].\  }	}
}}}}|r|	|k    r n|
}|}|                    ||f           /|t          t          |                    fS )Nrtz#Could not read PBT policy file: {}.)r   	readlinesr   loadsJSONDecodeErrorr^   rl   r   rU   reversedrT   )r(   r:  
raw_policyfprow
parsed_rowr  last_new_taglast_old_confold_tagr  old_stepnew_stepold_confnew_confs                  r)   r>  z*PopulationBasedTrainingReplay._load_policyG  s   
+t$$ 	5||~~ 5 5 !%CJJ+      $=DD[QQ    !!%
"3"344445	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 HPI
 I
 
	0 
	0DGWh(H  < 7 7 "L$MMM8X.////d8F#3#34444s'   B&A B& 3A33&B&&B*-B*r   r   r   c                 h   | j         rt          d          || _         | j         j        r"| j        rt                              d           nG| j         j        r| j        s| j         j        | _        n"| j         j        s| j        st          d          | j                             | j                   d S )NzMore than one trial added to PBT replay run. This means the same schedule will be trained multiple times. Do you want to set `n_samples=1`?zsTrial was initialized with a config, which was overwritten. Did you start the PBT replay with a `config` parameter?zNo replay policy found and trial initialized without a valid config. Either pass a `config` argument to `tune.Tuner()`or consider not using PBT replay for this run.)rB  r^   rB   r?  r   r   r  )r(   r   r   s      r)   r   z*PopulationBasedTrainingReplay.on_trial_adde  s    ; 	;  
 ; 	$, 	NNJ    [ 	 	+,DKK# 	DL 	A  
 	t{+++++r+   r   c                    t           |vrt          j        S | j        st          j        S |t                    }|| _        | j        \  }}||k     rt          j        S t
                              d                    ||                     |                    ||          }|	                                }||j
        j        _        t          | j        ||          }|                    |d           |                    |           |                    |           || _        | xj        dz  c_        t)          | j        d           | _        t          j        S )NzXPopulation Based Training replay is now at step {}. Configuration will be changed to {}.r   Fr  rJ   )r   r   r   rG  rC  r   r   rl   r   r   r   r   r  rn   r!   r  r  r  rA  r   rF  rE  r   )	r(   r   r   r   step	change_atra   r   r  s	            r)   r   z-PopulationBasedTrainingReplay.on_trial_result}  sU    V++!**  	+!**()! $ 1	:)!**3396$
3K3K	
 	
 	

 !55eF5KK ..** 	-G 't':J
SS##EU#CCC  )))$$$(1$ !2D99""r+   c                 B    d                     | j        | j                  S )Nz3PopulationBasedTraining replay: Step {}, perturb {})rl   rC  r   r<   s    r)   r4  z*PopulationBasedTrainingReplay.debug_string  s%    DKK 7
 
 	
r+   N)r8   r>   r?   r@   rA   r*   r	   r   r   r]   r>  r   r   r   r4  r1   r+   r)   r9  r9     s        . .`:C : : : :*5 5dDsDyAQ<R6R0S 5 5 5 5<,,< ,U , , , ,0)#/)#8=)#GK)#	)# )# )# )#V
c 
 
 
 
 
 
r+   r9  )BrN   r   loggingr%  r   rV   r   r   pathlibr   typingr   r   r   r   r   r	   r
   ray.air.constantsr   ray.train._internal.sessionr   r   ray.tuner   ray.tune.errorr   ray.tune.experimentr   ray.tune.resultr   #ray.tune.schedulers.trial_schedulerr   r   ray.tune.searchr   ray.tune.search.sampler   r   !ray.tune.search.variant_generatorr   ray.tune.utils.utilr   ray.utilr   ray.util.debugr   	ray.trainTrainCheckpoint"ray.tune.execution.tune_controllerr   	getLoggerr8   r   r   r5  rR   rA   rn   rQ   rT   rU   rr   ru   rz   r9  r1   r+   r)   <module>rs     s       				          N N N N N N N N N N N N N N N N N N 0 0 0 0 0 0 N N N N N N N N       $ $ $ $ $ $ % % % % % % * * * * * * M M M M M M M M + + + + + + 3 3 3 3 3 3 3 3 9 9 9 9 9 9 3 3 3 3 3 3       # # # # # # B777777AAAAAA		8	$	$
 
 
 
 
 
 
 
B["["["  ["  ,	["
  )[" 4:[" [" [" ["|K3 K K K# K K K K---+0tUHf1T+U- - - -((((,(	( ( ( (V T
 T
 T
 T
 T
m T
 T
 T
n j
 j
 j
 j
 j
M j
 j
 j
 j
 j
r+   