
    &`i9                        d dl Z d dlmZ d dlmZ d dlmZ d dlmZm	Z	m
Z
mZmZmZ d dlZd dlmZmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZ d dlmZ d dl m!Z! erd dl"m#Z#  e j$        e%          Z&e G d de                      Ze e!d           G d d                                  Z'e G d de                      Ze e!d           G d d                                  Z(dS )    N)	dataclass)cached_property)Path)TYPE_CHECKINGDictListLiteralOptionalUnion)FailureConfigScalingConfig)
RuntimeEnv)_DEPRECATED)StorageContext)FAIL_FAST_DEPRECATION_MESSAGE%TRAINER_RESOURCES_DEPRECATION_MESSAGE)date_str)	PublicAPI)UserCallbackc                       e Zd ZU dZdZee         ed<   dZe	e
         ed<   dZee         ed<   dZee	eeef         eeeef                  f                  ed<    fdZe fd	            Zed
             Zed             Z xZS )r   ac
  Configuration for scaling training.

    Args:
        num_workers: The number of workers (Ray actors) to launch.
            Each worker will reserve 1 CPU by default. The number of CPUs
            reserved by each worker can be overridden with the
            ``resources_per_worker`` argument. If the number of workers is 0,
            the training function will run in local mode, meaning the training
            function runs in the same process.
        use_gpu: If True, training will be done on GPUs (1 per worker).
            Defaults to False. The number of GPUs reserved by each
            worker can be overridden with the ``resources_per_worker``
            argument.
        resources_per_worker: If specified, the resources
            defined in this Dict is reserved for each worker.
            Define the ``"CPU"`` and ``"GPU"`` keys (case-sensitive) to
            override the number of CPU or GPUs used by each worker.
        placement_strategy: The placement strategy to use for the
            placement group of the Ray actors. See :ref:`Placement Group
            Strategies <pgroup-strategy>` for the possible options.
        label_selector: A list of label selectors for Ray Train worker placement.
            If a single label selector is provided, it will be applied to all Ray Train workers.
            If a list is provided, it must be the same length as the max number of Ray Train workers.
        accelerator_type: [Experimental] If specified, Ray Train will launch the
            training coordinator and workers on the nodes with the specified type
            of accelerators.
            See :ref:`the available accelerator types <accelerator_types>`.
            Ensure that your cluster has instances with the specified accelerator type
            or is able to autoscale to fulfill the request. This field is required
            when `use_tpu` is True and `num_workers` is greater than 1.
        use_tpu: [Experimental] If True, training will be done on TPUs (1 TPU VM
            per worker). Defaults to False. The number of TPUs reserved by each
            worker can be overridden with the ``resources_per_worker``
            argument. This arg enables SPMD execution of the training workload.
        topology: [Experimental] If specified, Ray Train will launch the training
            coordinator and workers on nodes with the specified topology. Topology is
            auto-detected for TPUs and added as Ray node labels. This arg enables
            SPMD execution of the training workload. This field is required
            when `use_tpu` is True and `num_workers` is greater than 1.
    Ntrainer_resourcesFuse_tputopologylabel_selectorc                    | j         t          t                    | j        r| j        rt          d          | j        s| j        dk    rt          d          | j        r| j        dk    rt          d          | j        rM| j        dk    rB| j        st          d          | j	        st          d          | j
        rt          d          t          | j
        t                    rFt          | j        t                    r,t          | j
                  | j        k    rt          d	          | j        dk    rt                              d
           t#                                                       d S )Nz6Cannot specify both `use_gpu=True` and `use_tpu=True`.r   z`use_tpu` is False but `TPU` was found in `resources_per_worker`. Either set `use_tpu` to True or remove `TPU` from `resources_per_worker.z`use_tpu` is True but `TPU` is set to 0 in `resources_per_worker`. Either set `use_tpu` to False or request a positive number of `TPU` in `resources_per_worker.   zY`topology` must be specified in ScalingConfig when `use_tpu=True`  and `num_workers` > 1.z``accelerator_type` must be specified in ScalingConfig when `use_tpu=True` and `num_workers` > 1.z}Cannot set `label_selector` when `use_tpu=True` because Ray Train automatically reserves a TPU slice with a predefined label.zKIf `label_selector` is a list, it must be the same length as `num_workers`.zRunning in local mode. The training function will run in the same process. If you are using it and running into issues please file a report at https://github.com/ray-project/ray/issues.)r   DeprecationWarningr   use_gpur   
ValueErrornum_tpus_per_workernum_workersr   accelerator_typer   
isinstancelistintlenloggerinfosuper__post_init__self	__class__s    k/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/train/v2/api/config.pyr*   zScalingConfig.__post_init__M   s   !-$%JKKK< 	WDL 	WUVVV| 	 81 < <;   < 	D499)   < 	D,q00=  .   (  <   "  \   t*D11	4+S11	 D'((D,<<<]   q  KK=   	    c                 N    | j         | j        rddiS t                      j        S )NTPUr   )resources_per_workerr   r)   _resources_per_worker_not_noner+   s    r.   r3   z,ScalingConfig._resources_per_worker_not_none   s-    $,| "qz!ww55r/   c                     i S N r,   s    r.   _trainer_resources_not_nonez)ScalingConfig._trainer_resources_not_none   s    	r/   c                 8    | j                             dd          S )z%The number of TPUs to set per worker.r1   r   )r3   getr7   s    r.   r    z!ScalingConfig.num_tpus_per_worker   s     266ua@@@r/   )__name__
__module____qualname____doc__r   r
   dict__annotations__r   r   boolr   strr   r   r   r*   propertyr3   r8   r    __classcell__)r-   s   @r.   r   r      s        ' 'R )-x~,,, GU4[   "Hhsm"""LPNHU4S>4S#X3G#GHIPPP7  7  7  7  7 r 6 6 6 6 X6   X A A XA A A A Ar/   r   stable)	stabilityc                       e Zd ZU dZdZee         ed<   dZee	         ed<   dZ
ed         ed<   eZeee         ee         f         ed<   eZeee         ee         f         ed	<   d
 ZdS )CheckpointConfiga  Configuration for checkpointing.

    Default behavior is to persist all checkpoints reported with
    :meth:`ray.train.report` to disk. If ``num_to_keep`` is set,
    the default retention policy is to keep the most recent checkpoints.

    Args:
        num_to_keep: The maximum number of checkpoints to keep.
            If you report more checkpoints than this, the oldest
            (or lowest-scoring, if ``checkpoint_score_attribute`` is set)
            checkpoint will be deleted.
            If this is ``None`` then all checkpoints will be kept. Must be >= 1.
        checkpoint_score_attribute: The attribute that will be used to
            score checkpoints to determine which checkpoints should be kept.
            This attribute must be a key from the metrics dictionary
            attached to the checkpoint. This attribute must have a numerical value.
        checkpoint_score_order: Either "max" or "min".
            If "max"/"min", then checkpoints with highest/lowest values of
            the ``checkpoint_score_attribute`` will be kept. Defaults to "max".
        checkpoint_frequency: [Deprecated]
        checkpoint_at_end: [Deprecated]
    Nnum_to_keepcheckpoint_score_attributemaxrK   mincheckpoint_score_ordercheckpoint_frequencycheckpoint_at_endc                    | j         t          k    rt          d          | j        t          k    rt          d          | j        #| j        dk    rt          d| j         d          | j        dvrt          d| j         d          d S )	Nz`checkpoint_frequency` is deprecated since it does not apply to user-defined training functions. Please remove this argument from your CheckpointConfig.z`checkpoint_at_end` is deprecated since it does not apply to user-defined training functions. Please remove this argument from your CheckpointConfig.r   zReceived invalid num_to_keep: z". Must be None or an integer >= 1.rL   z)Received invalid checkpoint_score_order: z. Must be 'max' or 'min'.)rO   r   r   rP   rI   r   rN   r7   s    r.   r*   zCheckpointConfig.__post_init__   s    $33$J   ![00$J   'D,<,A,A31A 3 3 3  
 &n<<*D<W * * *   =<r/   )r;   r<   r=   r>   rI   r
   r%   r@   rJ   rB   rN   r	   r   rO   r   rP   rA   r*   r6   r/   r.   rH   rH      s          . "&K#%%%0444449GL1999GR%w{/C CDRRREPuXd^W[-AABPPP    r/   rH   c                   F    e Zd ZU dZeZeeef         e	d<   dZ
ee	d<   d ZdS )r   ah  Configuration related to failure handling of each training run.

    Args:
        max_failures: Tries to recover a run from training worker errors at least this many times.
            Will recover from the latest checkpoint if present.
            Setting to -1 will lead to infinite recovery retries.
            Setting to 0 will disable retries. Defaults to 0.
        controller_failure_limit: [DeveloperAPI] The maximum number of controller failures to tolerate.
            Setting to -1 will lead to infinite controller retries.
            Setting to 0 will disable controller retries. Defaults to -1.
    	fail_fastcontroller_failure_limitc                 N    | j         t          k    rt          t                    d S r5   )rS   r   r   r   r7   s    r.   r*   zFailureConfig.__post_init__   s&    >[(($%BCCC )(r/   N)r;   r<   r=   r>   r   rS   r   rA   rB   r@   rU   r%   r*   r6   r/   r.   r   r      s`         
 
 #.IuT3Y---$&c&&&D D D D Dr/   r   c                   b   e Zd ZU dZdZee         ed<   dZee         ed<   dZ	ee
j        j                 ed<   dZee         ed<   dZee         ed<   dZeed                  ed	<   dZeeeef                  ed
<   eZeed<   eZeed<   eZeed<   eZeed<   eZeed<   d Zedefd            Z dS )	RunConfiga6  Runtime configuration for training runs.

    Args:
        name: Name of the trial or experiment. If not provided, will be deduced
            from the Trainable.
        storage_path: Path where all results and checkpoints are persisted.
            Can be a local directory or a destination on cloud storage.
            For multi-node training/tuning runs, this must be set to a
            shared storage location (e.g., S3, NFS).
            This defaults to the local ``~/ray_results`` directory.
        storage_filesystem: A custom filesystem to use for storage.
            If this is provided, `storage_path` should be a path with its
            prefix stripped (e.g., `s3://bucket/path` -> `bucket/path`).
        failure_config: Failure mode configuration.
        checkpoint_config: Checkpointing configuration.
        callbacks: [DeveloperAPI] A list of callbacks that the Ray Train controller
            will invoke during training.
        worker_runtime_env: [DeveloperAPI] Runtime environment configuration
            for all Ray Train worker actors.
    Nnamestorage_pathstorage_filesystemfailure_configcheckpoint_configr   	callbacksworker_runtime_envsync_configverbosestopprogress_reporterlog_to_filec                 L   ddl m} | j        || _        | j        st	                      | _        | j        st                      | _        t          | j        t                    r| j        	                                | _        d}g d}|D ]=}t          | |          t          k    r"t          |                    |                    >| j        sdt                       | _        | j        pg | _        | j        pi | _        ddlm t)          fd| j        D                       st+          d          t          | j        t                    st+          d	| j        j         d
          t          | j        t                    st+          d| j        j         d          d S )Nr   )DEFAULT_STORAGE_PATHaI  `RunConfig({})` is deprecated. This configuration was a Ray Tune API that did not support Ray Train usage well, so we are dropping support going forward. If you heavily rely on these configurations, you can run Ray Train as a single Ray Tune trial. See this issue for more context: https://github.com/ray-project/ray/issues/49454)r`   ra   rb   rc   rd   zray_train_run-)RayTrainCallbackc              3   8   K   | ]}t          |          V  d S r5   )r#   ).0cbrg   s     r.   	<genexpr>z*RunConfig.__post_init__.<locals>.<genexpr><  s.      MM:b"233MMMMMMr/   zAll callbacks must be instances of `ray.train.UserCallback`. Passing in a Ray Tune callback is no longer supported. See this issue for more context: https://github.com/ray-project/ray/issues/49454z!Invalid `CheckpointConfig` type: z|. Use `ray.train.CheckpointConfig` instead. See this issue for more context: https://github.com/ray-project/ray/issues/49454zInvalid `FailureConfig` type: zy. Use `ray.train.FailureConfig` instead. See this issue for more context: https://github.com/ray-project/ray/issues/49454)ray.train.constantsrf   rZ   r\   r   r]   rH   r#   r   as_posixgetattrr   r   formatrY   r   r^   r_   ray.train.v2.api.callbackrg   allr   r-   )r,   rf   run_config_deprecation_messageunsupported_paramsparamrg   s        @r.   r*   zRunConfig.__post_init__  s   <<<<<<$ 4D" 	2"///D% 	8%5%7%7D"d'.. 	= $ 1 : : < <D> 	'
 
 
 ( 	W 	WEtU##{22()G)N)Nu)U)UVVV 3 y 	6555DI-2"&"9"?R>>>>>>MMMMdnMMMMM 	B   $02BCC 	BD4J4T B B B   $-}== 	B1D1N B B B  	 	r/   returnc                 D    t          | j        | j        | j                  S )N)rZ   experiment_dir_namer[   )r   rZ   rY   r[   r7   s    r.   storage_contextzRunConfig.storage_contextT  s*    * $	#6
 
 
 	
r/   )!r;   r<   r=   r>   rY   r
   rB   r@   rZ   r[   pyarrowfs
FileSystemr\   r   r]   rH   r^   r   r_   r   r?   r   r   r`   ra   rb   rc   rd   r*   r   r   rx   r6   r/   r.   rX   rX      sV         * D(3-"&L(3-&&&:>!67>>>.2NH]+22248x 0188804Ix^,-444<@tZ'7!89@@@"K"""GSD#(s((("K"""B B BH 
 
 
 
 _
 
 
r/   rX   ))loggingdataclassesr   	functoolsr   pathlibr   typingr   r   r   r	   r
   r   
pyarrow.fsry   ray.air.configr   FailureConfigV1r   ScalingConfigV1ray.runtime_envr    ray.train.v2._internal.constantsr   (ray.train.v2._internal.execution.storager   &ray.train.v2._internal.migration_utilsr   r   ray.train.v2._internal.utilr   ray.util.annotationsr   	ray.trainr   	getLoggerr;   r'   rH   rX   r6   r/   r.   <module>r      s    ! ! ! ! ! ! % % % % % %       F F F F F F F F F F F F F F F F            ' & & & & & 8 8 8 8 8 8 C C C C C C        1 0 0 0 0 0 * * * * * * '&&&&&&		8	$	$ wA wA wA wA wAO wA wA wAt 
X7 7 7 7 7 7 7  7t D D D D DO D D D* 
Xn
 n
 n
 n
 n
 n
 n
  n
 n
 n
r/   