
    &`i                         d dl Z d dlmZmZmZmZmZmZ d dlZ	d dlm
Z
 d dlmZ d dlmZmZ d dlmZ d dlmZ erd dlmZ  e j        e          Z G d	 d
e          ZdS )    N)TYPE_CHECKINGAnyCallableDictOptionalUnion)
Checkpoint)
GenDataset)	RunConfigScalingConfig)DataParallelTrainer)
DeprecatedXGBoostConfigc                       e Zd ZdZdddddddddddddeeg df         eegdf         f         dee         ded         dee         d	ee	         d
eee
ef                  deej        j                 deee
ef                  dee         dee
         deee
ef                  dee         f fdZeedefd                        Z xZS )XGBoostTrainera^  A Trainer for distributed data-parallel XGBoost training.

    Example
    -------

    .. testcode::

        import xgboost

        import ray.data
        import ray.train
        from ray.train.xgboost import RayTrainReportCallback
        from ray.train.xgboost import XGBoostTrainer

        def train_fn_per_worker(config: dict):
            # (Optional) Add logic to resume training state from a checkpoint.
            # ray.train.get_checkpoint()

            # 1. Get the dataset shard for the worker and convert to a `xgboost.DMatrix`
            train_ds_iter, eval_ds_iter = (
                ray.train.get_dataset_shard("train"),
                ray.train.get_dataset_shard("validation"),
            )
            train_ds, eval_ds = train_ds_iter.materialize(), eval_ds_iter.materialize()

            train_df, eval_df = train_ds.to_pandas(), eval_ds.to_pandas()
            train_X, train_y = train_df.drop("y", axis=1), train_df["y"]
            eval_X, eval_y = eval_df.drop("y", axis=1), eval_df["y"]

            dtrain = xgboost.DMatrix(train_X, label=train_y)
            deval = xgboost.DMatrix(eval_X, label=eval_y)

            params = {
                "tree_method": "approx",
                "objective": "reg:squarederror",
                "eta": 1e-4,
                "subsample": 0.5,
                "max_depth": 2,
            }

            # 2. Do distributed data-parallel training.
            # Ray Train sets up the necessary coordinator processes and
            # environment variables for your workers to communicate with each other.
            bst = xgboost.train(
                params,
                dtrain=dtrain,
                evals=[(deval, "validation")],
                num_boost_round=1,
                callbacks=[RayTrainReportCallback()],
            )

        train_ds = ray.data.from_items([{"x": x, "y": x + 1} for x in range(32)])
        eval_ds = ray.data.from_items([{"x": x, "y": x + 1} for x in range(16)])
        trainer = XGBoostTrainer(
            train_fn_per_worker,
            datasets={"train": train_ds, "validation": eval_ds},
            scaling_config=ray.train.ScalingConfig(num_workers=2),
        )
        result = trainer.fit()
        booster = RayTrainReportCallback.get_model(result.checkpoint)

    Args:
        train_loop_per_worker: The training function to execute on each worker.
            This function can either take in zero arguments or a single ``Dict``
            argument which is set by defining ``train_loop_config``.
            Within this function you can use any of the
            :ref:`Ray Train Loop utilities <train-loop-api>`.
        train_loop_config: A configuration ``Dict`` to pass in as an argument to
            ``train_loop_per_worker``.
            This is typically used for specifying hyperparameters.
        xgboost_config: The configuration for setting up the distributed xgboost
            backend. Defaults to using the "rabit" backend.
            See :class:`~ray.train.xgboost.XGBoostConfig` for more info.
        scaling_config: The configuration for how to scale data parallel training.
            ``num_workers`` determines how many Python processes are used for training,
            and ``use_gpu`` determines whether or not each process should use GPUs.
            See :class:`~ray.train.ScalingConfig` for more info.
        run_config: The configuration for the execution of the training run.
            See :class:`~ray.train.RunConfig` for more info.
        datasets: The Ray Datasets to ingest for training.
            Datasets are keyed by name (``{name: dataset}``).
            Each dataset can be accessed from within the ``train_loop_per_worker``
            by calling ``ray.train.get_dataset_shard(name)``.
            Sharding and additional configuration can be done by
            passing in a ``dataset_config``.
        dataset_config: The configuration for ingesting the input ``datasets``.
            By default, all the Ray Dataset are split equally across workers.
            See :class:`~ray.train.DataConfig` for more details.
        resume_from_checkpoint: [Deprecated]
        metadata: [Deprecated]
    N)train_loop_configxgboost_configscaling_config
run_configdatasetsdataset_configmetadataresume_from_checkpointlabel_columnparamsnum_boost_roundtrain_loop_per_workerr   r   r   r   r   r   r   r   r   r   r   r   c                    |
||t          d          ddlm} t          t          |                               |||p	 |            |||||	|	  	         d S )NzThe legacy XGBoostTrainer API is deprecated. Please switch to passing in a custom `train_loop_per_worker` function instead. See this issue for more context: https://github.com/ray-project/ray/issues/50042r   r   )	r   r   backend_configr   r   r   r   r   r   )DeprecationWarningray.train.xgboostr   superr   __init__)selfr   r   r   r   r   r   r   r   r   r   r   r   r   	__class__s                 x/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/train/v2/xgboost/xgboost_trainer.pyr$   zXGBoostTrainer.__init__n   s    & $!*$B   	433333nd##,,"7/)<]]__))!#9 	- 
	
 
	
 
	
 
	
 
	
    
checkpointc                      t          d          )zB[Deprecated] Retrieve the XGBoost model stored in this checkpoint.zY`XGBoostTrainer.get_model` is deprecated. Use `RayTrainReportCallback.get_model` instead.)r!   )clsr)   s     r'   	get_modelzXGBoostTrainer.get_model   s     !>
 
 	
r(   )__name__
__module____qualname____doc__r   r   r   r   r   r   strr
   raytrain
DataConfigr   r	   intr$   classmethodr   r,   __classcell__)r&   s   @r'   r   r      s       Z Z@ -14826*.489=-17;&*+/)-!+
 +
 +
$Xb$h%74&$,9O%OP+
 $D>	+

 !1+
 !/+
 Y'+
 4Z01+
 !!56+
 4S>*+
 !) 4+
 sm+
 c3h(+
  "#!+
 +
 +
 +
 +
 +
Z 
: 
 
 
 Z [
 
 
 
 
r(   r   )loggingtypingr   r   r   r   r   r   	ray.trainr2   r	   ray.train.trainerr
   ray.train.v2.api.configr   r   &ray.train.v2.api.data_parallel_trainerr   ray.util.annotationsr   r"   r   	getLoggerr-   loggerr    r(   r'   <module>rB      s    F F F F F F F F F F F F F F F F                 ( ( ( ( ( ( < < < < < < < < F F F F F F + + + + + + 0//////		8	$	$Q
 Q
 Q
 Q
 Q
( Q
 Q
 Q
 Q
 Q
r(   