
    &`i%K                     6   d dl Z d dlmZ d dlmZmZmZmZmZm	Z	 d dl
Zd dlZd dlmZ d dlmZ d dlmZ d dlmZ d dlmZmZ d d	lmZ erd d
lmZ d Z e            Zerd dlm Z  d dl!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z'  e j(        e)          Z*dedee+e	e,e-e.f         f         defdZ/dej0        dej0        de-dej0        de,de1dej2        fdZ3dej4        dee+ee5e5f         f         de-dededee+ee5e5f         f         deeej4        f         fdZ6 G d de          Z7dS )    N)deepcopy)TYPE_CHECKINGCallableDictOptionalTupleUnion)	TuneError)Trial)PopulationBasedTraining)_PBTTrialState)flatten_dictunflatten_dict)log_once)TuneControllerc                  6    	 dd l } n# t          $ r d } Y nw xY w| S )Nr   )sklearnImportError)r   s    k/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/tune/schedulers/pb2.pyimport_pb2_dependenciesr      s;       Ns    )GaussianProcessRegressor)UCBTV_SquaredExp	normalizeoptimize_acqselect_lengthstandardizeconfighyperparam_boundsreturnc                    i }|                                 D ]\  }}t          |t                    r#|| vri | |<   t          | |         |          ||<   =t          |t          t
          f          rv|| vrrt          |dz             rt                              d| d           t          |          dk    sJ |\  }}t          j                            ||          x| |<   ||<   |S )aj  Fills missing hyperparameters in config by sampling uniformly from the
    specified `hyperparam_bounds`.
    Recursively fills the config if `hyperparam_bounds` is a nested dict.

    This is a helper used to set initial hyperparameter values if the user doesn't
    specify them in the Tuner `param_space`.

    Returns the dict of filled hyperparameters.
    z-missingzCannot find zU in config. Initializing by sampling uniformly from the provided `hyperparam_bounds`.   )items
isinstancedict_fill_configlisttupler   loggerdebuglennprandomuniform)r   r   filled_hyperparams
param_nameboundslowhighs          r   r&   r&   ,   s.    /5577  
Ffd## 	''%'z"-9&:Lf-U-Uz**u.. 
	:V3K3K
Z/00 P: P P P   v;;!####ICBD)BSBSTC C F:!3J!?     Xrawyrawcurrentnewpointr1   num_fc                    t          | |||          }| | dddf         } || d         }t          j        t          |                                                    j        }| ddd|f         }t          j        t          j        |d          t          j        |d          f          	                    d|j
        d                   }	t          j        |	|fd          }
t          | |
          }t          |          	                    |j        d          }t          ||          t          ddd          }	 t          |d	d
          }|                    ||           ng# t          j        j        $ rP |t          j        |j
        d                   dz  z  }t          |d	d
          }|                    ||           Y nw xY w|t)          |          }nt          j        fdt+          |j
        d                   D                       }t          ||          }t          j        ||f          }t          j        ||f          }t          j        |j
        d                   }|	                    dd          }t          j        ||f          }t          ddd          }t          |d	d
          }|                    ||           t3          t4          |||          }|t          j        |d          t          j        |d          z
  z  t          j        |d          z   }|                    t          j                  }|S )a  Selects the next hyperparameter config to try.

    This function takes the formatted data, fits the GP model and optimizes the
    UCB acquisition function to select the next point.

    Args:
        Xraw: The un-normalized array of hyperparams, Time and
            Reward
        yraw: The un-normalized vector of reward changes.
        current: The hyperparams of trials currently running. This is
            important so we do not select the same config twice. If there is
            data here then we fit a second GP including it
            (with fake y labels). The GP variance doesn't depend on the y
            labels so it is ok.
        newpoint: The Reward and Time for the new point.
            We cannot change these as they are based on the *new weights*.
        bounds: Bounds for the hyperparameters. Used to normalize.
        num_f: The number of fixed params. Almost always 2 (reward+time)

    Return:
        xt: A vector of new hyperparameters.
    Nr   axisr"      g      ?g?)variancelengthscaleepsilonfmin_l_bfgs_bg|=)kernel	optimizeralphagMbP?c                     g | ]}S  rF   ).0_fixeds     r   
<listcomp>z"_select_config.<locals>.<listcomp>   s    CCCaECCCr4   )r   r,   arrayr'   valuesTconcatenatemaxminreshapeshaper   r   sizer   r   fitlinalgLinAlgErroreyer   rangehstackvstackzerosr   r   astypefloat32)r5   r6   r7   r8   r1   r9   length	base_vals	oldpointsold_limslimitsXyrB   mm1paddingXnewypadynewkernel1xtrI   s                         @r   _select_configrn   L   sV   < 4vu55F!!!D>Dfmmoo..//1IQQQYI~			"	"	"BF91$=$=$=> ga#$$  ^Xy1:::F$AD!!$)Q//Ah	**ECS#FFFF$_E
 
 
 	
a9    	RVAGAJ$&&$_E
 
 
 	
a a[[ (CCCC5q1A+B+BCCCDDGY//)Wg.//y!W&&xa())||B""y!T###sKKK%oU
 
 
 	tT	c1b%	/	/B 
rvia(((26)!+D+D+DD	EI I I 
B 
2:		BIs   (E- -A!GGdatabaseoldc           
         |                      d                              d          }|                    dgt          |                                          z             d                                         |d<   |                    dgt          |                                          z             d                                         |d<   ||d         d	k                                 d          }|j        |j        z
  |d
<   |j        |j        z  |d<   ||j        	                                                              d          }|                     d                              d          }|j
        ddddf                             d          }||d         t          |          k             }|j        st          j        |j        j                  }|dd
g         }	||                                         }
t!          j        |	|
gd          j        }||d         t          |          k             j
        dddf         dd
g         j        }t%          |||||t'          |	j                            }|                                }g }t-          |
j                  D ]R\  }}t/          ||                   } |||                   ||<   |                     |||                              S||d         t          |          k             j
        dddf         d         }||d         t          |          k             j
        dddf         j        }t          |          g|gz   |z   |gz   g}ddgt          |          z   dgz   }t!          j        ||          }t!          j        | |g                              d          } n|                                }|| fS )a|  Returns next hyperparameter configuration to use.

    This function primarily processes the data from completed trials
    and then requests the next config from the select_config function.
    It then adds the new trial to the dataframe, so that the reward change
    can be computed using the new weights.
    It returns the new point and the dataframe with the new entry.
    Time)byTdropr   Rewardre   t_changer   R_beforeiNr=   r;   rK   )r9   columns)sort_valuesreset_indexgroupbyr'   keysdiffrw   re   rx   isnailocstremptyr,   rL   rM   pdconcatrn   r+   r{   copy	enumeratetypeappend	DataFrame)ro   r1   r7   rp   rq   r   df
dfnewpointre   t_rhparamsrd   r8   new
new_configrM   icoltype_new_T
new_Rewardlstcols	new_entrys                           r   _explorer      s   " 
		V		$	$	0	0d	0	;	;B jj'T&++--%8%8899(CHHJJBsGZZ	D,?,? ?@@HMMOOBzN 
BzNQ		+	+	+	6	6BY%BzN dR[ BsG	RTYY[[L		%	%4	%	0	0B	6	"	"	.	.D	.	9	9B 
		'	'T	'	2	2B BwK3t99,-J !#HRT[!! &*%&V[[]]#IsGn1---4bkSYY./4RU;VZ<PQXQ7HfCDTDTUUU[[]]
00 	) 	)FAs %%E#eCFmmJsOMM%%A--((((2g;#d))+,1"aaa%8@7s4yy016r111u=D
CzUG#f,
|;< 4<</8*<Ld333	 y$	*++77T7BB [[]]
tr4   c                       e Zd ZdZ	 	 	 	 	 	 	 	 	 	 dded	ee         d
ee         dedeeee	e
ef         f         dededededeee	ge	f                  f fdZdddef fdZde	fdZdedededef fdZdededeeef         fdZ xZS ) PB2a  Implements the Population Based Bandit (PB2) algorithm.

    PB2 trains a group of models (or agents) in parallel. Periodically, poorly
    performing models clone the state of the top performers, and the hyper-
    parameters are re-selected using GP-bandit optimization. The GP model is
    trained to predict the improvement in the next training period.

    Like PBT, PB2 adapts hyperparameters during training time. This enables
    very fast hyperparameter discovery and also automatically discovers
    schedules.

    This Tune PB2 implementation is built on top of Tune's PBT implementation.
    It considers all trials added as part of the PB2 population. If the number
    of trials exceeds the cluster capacity, they will be time-multiplexed as to
    balance training progress across the population. To run multiple trials,
    use `tune.TuneConfig(num_samples=<int>)`.

    In {LOG_DIR}/{MY_EXPERIMENT_NAME}/, all mutations are logged in
    `pb2_global.txt` and individual policy perturbations are recorded
    in pb2_policy_{i}.txt. Tune logs: [target trial tag, clone trial tag,
    target trial iteration, clone trial iteration, old config, new config]
    on each perturbation step.

    Args:
        time_attr: The training result attr to use for comparing time.
            Note that you can pass in something non-temporal such as
            `training_iteration` as a measure of progress, the only requirement
            is that the attribute should increase monotonically.
        metric: The training result objective value attribute. Stopping
            procedures will use this attribute.
        mode: One of {min, max}. Determines whether objective is
            minimizing or maximizing the metric attribute.
        perturbation_interval: Models will be considered for
            perturbation at this interval of `time_attr`. Note that
            perturbation incurs checkpoint overhead, so you shouldn't set this
            to be too frequent.
        hyperparam_bounds: Hyperparameters to mutate. The format is
            as follows: for each key, enter a list of the form [min, max]
            representing the minimum and maximum possible hyperparam values.
            A key can also hold a dict for nested hyperparameters.
            Tune will sample uniformly between the bounds provided by
            `hyperparam_bounds` for the initial hyperparameter values if the
            corresponding hyperparameters are not present in a trial's initial `config`.
        quantile_fraction: Parameters are transferred from the top
            `quantile_fraction` fraction of trials to the bottom
            `quantile_fraction` fraction. Needs to be between 0 and 0.5.
            Setting it to 0 essentially implies doing no exploitation at all.
        custom_explore_fn: You can also specify a custom exploration
            function. This function is invoked as `f(config)`, where the input
            is the new config generated by Bayesian Optimization. This function
            should return the `config` updated as needed.
        log_config: Whether to log the ray config of each model to
            local_dir at each exploit. Allows config schedule to be
            reconstructed.
        require_attrs: Whether to require time_attr and metric to appear
            in result for every iteration. If True, error will be raised
            if these values are not present in trial result.
        synch: If False, will use asynchronous implementation of
            PBT. Trial perturbations occur every perturbation_interval for each
            trial independently. If True, will use synchronous implementation
            of PBT. Perturbations will occur only after all trials are
            synced at the same time_attr every perturbation_interval.
            Defaults to False. See Appendix A.1 here
            https://arxiv.org/pdf/1711.09846.pdf.

    Example:

        .. code-block:: python

            from ray import tune
            from ray.tune.schedulers.pb2 import PB2
            from ray.tune.examples.pbt_function import pbt_function

            pb2 = PB2(
                metric="mean_accuracy",
                mode="max",
                perturbation_interval=20,
                hyperparam_bounds={"lr": [0.0001, 0.1]},
            )
            tuner = tune.Tuner(
                pbt_function,
                tune_config=tune.TuneConfig(
                    scheduler=pb2,
                    num_samples=8,
                ),
                param_space={"lr": 0.0001},
            )
            tuner.fit()

    time_total_sN      N@      ?TF	time_attrmetricmodeperturbation_intervalr   quantile_fraction
log_configrequire_attrssynchcustom_explore_fnc                    t                      }|st          d          |pi }|st          d          t          t          |                               ||||||d|
|||	           d| _        t          j                    | _	        || _
        t          |d          | _        |                     | j                   d | _        d S )Nz'Please install scikit-learn to use PB2.z;`hyperparam_bounds` must be specified to use PB2 scheduler.r   )r   r   r   r   hyperparam_mutationsr   resample_probabilityr   r   r   r   T)prevent_delimiter)r   RuntimeErrorr
   superr   __init__last_exploration_timer   r   ro   _hyperparam_boundsr   _hyperparam_bounds_flat_validate_hyperparam_boundsr7   )selfr   r   r   r   r   r   r   r   r   r   sklearn_available	__class__s               r   r   zPB2.__init__R  s     455  	JHIII-3  	M   	c4!!"7!2/!"/!' 	" 	
 	
 	
 &'"LNN	"3'3(
 (
 (
$ 	(()EFFF
 r4   tune_controllerr   trialc                     t          |j        | j                  }|j                            t          |                     t                                          ||           d S )N)r&   r   r   evaluated_paramsupdater   r   on_trial_add)r   r   r   r/   r   s       r   r   zPB2.on_trial_add  sX    )%,8OPP%%l3E&F&FGGG_e44444r4   c                    |                                 D ]k\  }}t          |t          t          f          rt	          |          dk    rt          d| d| d          |\  }}||k    rt          d| d| d          ldS )	zCheck that each hyperparam bound is of the form [low, high].

        Raises:
            ValueError: if any of the hyperparam bounds are of an invalid format.
        r"   zM`hyperparam_bounds` values must either be a list or tuple of size 2, but got z instead for the param ''zV`hyperparam_bounds` values must be of the form [low, high] where low <= high, but got z instead for param 'z'.N)r#   r$   r'   r(   r+   
ValueError)r   r   keyvaluer2   r3   s         r   r   zPB2._validate_hyperparam_bounds  s     ,1133 	 	JCedE]33 s5zzQ 5:?5 5.15 5 5  
 ICTzz U27U UMPU U U   	 	r4   statetimeresultc                    t          t          |                               ||||          }t          | j                                                  }t          |j                  fd|D             }||| j                 g|z   |gz   g}ddg|z   dgz   }	t          j
        ||	          }
t          j        | j        |
g                              d          | _        | j        j                            d          | j        _        d S )	Nc                      g | ]
}|         S rF   rF   )rG   r   flattened_configs     r   rJ   z)PB2._save_trial_state.<locals>.<listcomp>  s    999C"3'999r4   r   rs   rw   rz   Tru   r   )r   r   _save_trial_stater'   r   r   r   r   
_time_attrr   r   r   ro   r}   r   r]   )r   r   r   r   r   scorenamesrM   r   r   entryr   r   s              @r   r   zPB2._save_trial_state  s    c4  225$NN
 T1668899'5599995999 vdo./&8E7BC 5(H:5S$///Ity%011==4=HH	)/0077	r4   trial_to_cloner    c           
      L   | j         d                                         | j        k    rd| _        t	          | j         | j        | j        ||t          |j                            \  }|                                | _         fd| j        D             }t          j
        |          }|                    d|j                  }| j         d                                         | j        k    r>| j         d                                         | _        |                                | _        nAt          j        | j        |fd          | _        t                              | j                   t!                    }| j        r!|                     |          }|
J d            |i fS )a  Gets new config for trial by exploring trial_to_clone's config using
        Bayesian Optimization (BO) to choose the hyperparameter values to explore.

        Overrides `PopulationBasedTraining._get_new_config`.

        Args:
            trial: The current trial that decided to exploit trial_to_clone.
            trial_to_clone: The top-performing trial with a hyperparameter config
                that the current trial will explore.

        Returns:
            new_config: New hyperparameter configuration (after BO).
            operations: Empty dict since PB2 doesn't explore in easily labeled ways
                like PBT does.
        rs   Nc                      g | ]
}|         S rF   rF   )rG   r   new_config_flats     r   rJ   z'PB2._get_new_config.<locals>.<listcomp>  s    LLLs#LLLr4   r=   r   r;   z5Custom explore function failed to return a new config)ro   rP   r   r7   r   r   r   r   r   r,   rL   rR   rT   rO   r)   r*   r   _custom_explore_fn)r   r   r   ro   r   r   r   s         @r   _get_new_configzPB2._get_new_config  s   $ 9V  ""T%???DL (I(L.//!
 !
 IIKK	
 MLLLt/KLLLhsmmkk!SX&&9V  ""T%???)-6):)>)>)@)@D&88::DLL>4<*=AFFFDLLL&&&#O44
" 	G00<<J&&F '&& 2~r4   )
r   NNr   Nr   TTFN)__name__
__module____qualname____doc__r   r   floatr   r	   r%   r'   r(   boolr   r   r   r   r   r   intr   r   r   __classcell__)r   s   @r   r   r      s       Y Yz ( $"'+AE#'">B3 33 3 sm	3
  %3  U4u+<%= =>3 !3 3 3 3 $HdVT\$:;3 3 3 3 3 3j5,< 5U 5 5 5 5 5 5T    (8#8+.88<8EJ8 8 8 8 8 8*7U 7E 7eDRVJFW 7 7 7 7 7 7 7 7r4   r   )8loggingr   r   typingr   r   r   r   r   r	   numpyr,   pandasr   ray.tuner
   ray.tune.experimentr   ray.tune.schedulersr   ray.tune.schedulers.pbtr   ray.tune.utils.utilr   r   ray.util.debugr   "ray.tune.execution.tune_controllerr   r   has_sklearnsklearn.gaussian_processr   ray.tune.schedulers.pb2_utilsr   r   r   r   r   r   	getLoggerr   r)   r   r%   r'   r(   r&   rL   r   ndarrayrn   r   r   r   r   rF   r4   r   <module>r      s          H H H H H H H H H H H H H H H H               % % % % % % 7 7 7 7 7 7 2 2 2 2 2 2 < < < < < < < < # # # # # # BAAAAAA   &%'' 
AAAAAA                
	8	$	$%)#uT45F/G*G%H	   @Y
(Y
(Y Y h	Y
 Y Y ZY Y Y YxK
,KeE5L))*K K 	K
 
K eE5L))*K 4K K K K\w w w w w
! w w w w wr4   