
    &`if4                     $   d dl Z d dlZd dlZd dlZd dlmZmZmZmZm	Z	m
Z
 d dlZd dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZmZmZmZmZmZ d d	lmZmZ d d
l m!Z!m"Z" d dl#m$Z$ d dl%m&Z&m'Z'm(Z(m)Z)m*Z* d dl+m,Z, d dl-m.Z.m/Z/ d dl0m1Z1 d dl2m3Z3 d dl4m5Z5 d dl6m7Z7m8Z8m9Z9m:Z:m;Z; d dl<m=Z= d dl>m?Z? d dl@mAZA d dlBmCZC d dlDmEZE d dlFmGZG d dlHmIZI d dlJmKZKmLZL d dlMmNZN d dlOmPZPmQZQ d dlRmSZS  e jT        eU          ZVeQ G d d                      ZWdS )     N)AnyCallableDictListOptionalUnion)*RAY_WARN_BLOCKING_GET_INSIDE_ASYNC_ENV_VAR)	usage_lib)env_bool)ActorHandle)tag_train_v2_trainer)BackendConfig
Checkpoint
DataConfigResult	RunConfigScalingConfig)+_RESUME_FROM_CHECKPOINT_DEPRECATION_WARNING$_TRAINER_RESTORE_DEPRECATION_WARNING)RAY_CHDIR_TO_TRIAL_DIRRAY_TRAIN_ENABLE_STATE_TRACKING)!_GET_METADATA_DEPRECATION_MESSAGE)AcceleratorSetupCallbackBackendSetupCallbackDatasetsSetupCallbackTPUReservationCallbackWorkingDirectorySetupCallback)_initialize_env_callbacks)ControllerMetricsCallbackWorkerMetricsCallback)PlacementGroupCleanerCallback)StateManagerCallback)UserCallbackHandler)0DEFAULT_RAY_WARN_BLOCKING_GET_INSIDE_ASYNC_VALUEMETRICS_ENABLED_ENV_VARV2_ENABLED_ENV_VARget_env_vars_to_propagateis_v2_enabled)
GenDataset)RayTrainCallback)TrainRunContext)TrainController)create_failure_policy)LocalController)create_scaling_policy)ObjectRefWrapperconstruct_train_func)UserCallback)
DeprecatedDeveloperAPI)NodeAffinitySchedulingStrategyc                      e Zd ZdZddddddddddeeg df         eegdf         f         dee         dee         dee	         dee
         d	eeeef                  d
ee         dee         deeeef                  fdZd Zdeg df         fdZdefdZdefdZdee         fdZdeg df         defdZdefdZdee         fdZee d                         Z!ee d                         Z"dS )DataParallelTrainera  Base class for distributed data parallel training on Ray.

    This class supports the SPMD parallelization pattern, where a single
    training function is executed in parallel across multiple workers,
    and different shards of data are processed by each worker.
    N)train_loop_configbackend_configscaling_config
run_configdatasetsdataset_configresume_from_checkpointmetadatatrain_loop_per_workerr8   r9   r:   r;   r<   r=   r>   r?   c                <   |pt                      | _        || _        || _        |pt	                      | _        |pt                      | _        |pi | _        |pt                      | _
        | j        j        dk    | _        t          | j        | j        | j        | j        | j        | j
                  | _        |t          t                     |	t          t"                    |                                  t'          j        d           t+          |            d S )Nr   )r;   r8   r:   r9   r<   r=   train)r   r;   r@   r8   r   r:   r   r9   r<   r   data_confignum_workersrunning_in_local_moder+   train_run_contextDeprecationWarningr   r   _validate_configsr
   record_library_usager   )
selfr@   r8   r9   r:   r;   r<   r=   r>   r?   s
             z/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/train/v2/api/data_parallel_trainer.py__init__zDataParallelTrainer.__init__K   s    %3	%:"!2,?,? B)9Z\\%)%8%D%I"!0"4..]+"
 "
 "
 "-$%PQQQ$%FGGG   &w///T"""""    c                 *   t                      st          dt           d          ddlm}m} t          | j        |          st          d| j        j         d          t          | j	        |          st          d| j	        j         d          d S )	Nz#Ray Train V2 must be enabled with `z#=1` when using this V2 Trainer API.r   )r   r   zInvalid `RunConfig` type: zu. Use `ray.train.RunConfig` instead. See this issue for more context: https://github.com/ray-project/ray/issues/49454zInvalid `ScalingConfig` type: zy. Use `ray.train.ScalingConfig` instead. See this issue for more context: https://github.com/ray-project/ray/issues/49454)
r(   
ValueErrorr&   ray.train.v2.api.configr   r   
isinstancer;   	__class__r:   )rJ   RunConfigV2ScalingConfigV2s      rK   rH   z%DataParallelTrainer._validate_configsw   s     	26H 2 2 2  
	
 	
 	
 	
 	
 	
 	
 	

 $/;77 	BT_-F B B B   $-?? 	B1D1N B B B  	 	rM   returnc                 P    t          | j        | j        | j        j        d          S )Nr@   )configtrain_func_contextfn_arg_name)r1   r@   r8   r9   rX   rJ   s    rK   _get_train_funcz#DataParallelTrainer._get_train_func   s1    #&)#2E/	
 
 
 	
rM   c                 T   |                                  }| j        r|                     |          S t          |          }|                     |t          | j                  t          | j        j	                  | j
        |                                           }|j        r|j        |S )a  Launches the Ray Train controller to run training on workers.

        Returns:
            A Result object containing the training result.

        Raises:
            ray.train.TrainingFailedError: This is a union of the ControllerError and WorkerGroupError.
                This returns a :class:`ray.train.ControllerError` if internal Ray Train controller logic
                encounters a non-retryable error or reaches the controller failure limit configured in `FailureConfig`.
                This returns a :class:`ray.train.WorkerGroupError` if one or more workers fail during
                training and reaches the worker group failure limit configured in `FailureConfig(max_failures)`.
        )train_fn_refscaling_policyfailure_policyrF   	callbacks)r[   rE   $_initialize_and_run_local_controllerr0   _initialize_and_run_controllerr/   r:   r-   r;   failure_configrF   _create_default_callbackserror)rJ   train_fnr]   results       rK   fitzDataParallelTrainer.fit   s     ''))% 	<<XFFF+H55L88)4T5HII4T_5STT"&"888:: 9  F | # l"MrM   c                 B    t          | j        j        | j                  S )N)experiment_namer<   )r.   r;   namer<   rZ   s    rK   _get_local_controllerz)DataParallelTrainer._get_local_controller   s&     O0]
 
 
 	
rM   c                    t                      }t          | j        | j                  }t	          | j                  }t          | j                  }t                      }t                      }|	                    |||||g           t          t          d          r#t                      }|                    |           t          t          d          rH|                    t                                 |                    t!          | j                             t          t"          d          r!|                    t%                                 | j        j        | j        j        ng }d |D             }	|                    t+          |	| j                             |	                    d |D                        |S )N)rF   TFc                 <    g | ]}t          |t                    |S  rQ   r2   .0cbs     rK   
<listcomp>zADataParallelTrainer._create_default_callbacks.<locals>.<listcomp>   s7     
 
 
B1M1M

 
 
rM   )user_callbacksrF   c                 <    g | ]}t          |t                    |S ro   rp   rq   s     rK   rt   zADataParallelTrainer._create_default_callbacks.<locals>.<listcomp>   s'    SSSBj\6R6RSRSSSrM   )r   r   r9   r:   r   r   rF   r   r!   extendr   r   r   appendr%   r   r    r   r"   r;   r`   r#   )
rJ   r`   accelerator_setup_callbackbackend_setup_callbackdatasets_callbacktpu_reservation_setup_callback placement_group_cleaner_callback working_directory_setup_callbackrun_config_callbacksru   s
             rK   rd   z-DataParallelTrainer._create_default_callbacks   s   -//	%=!4&
 &
" "6d6I!J!J1"4
 
 
 *@)A)A&+H+J+J(*.&0!	
 	
 	
 *D11 	?/L/N/N,=>>>+T22 	L688999243IJJKKK3U;; 	5133444 *.)B)NDO%%TV 	

 
-
 
 
 	-AW  	
 	
 	
 	SS.SSS	
 	
 	
 rM   
train_funcc                 P    |                                                      |          S )N)rl   run)rJ   r   s     rK   ra   z8DataParallelTrainer._initialize_and_run_local_controller   s$     ))++//
;;;rM   c                 L   t                      }|                    t          t                      t	          j        dt          t	          j                                                    d          d|i          t                    } |j        di |}t          j                    t          j                    u r|                     |           t	          j        |j                                                   t	          j        |j                                                  S )Nr   F)node_idsoftenv_vars)num_cpusscheduling_strategyruntime_envro   )r'   
setdefaultr	   r$   rayremoter5   get_runtime_contextget_node_idr,   	threadingcurrent_threadmain_thread_register_sigint_handlergetr   
get_result)rJ   controller_init_kwargsr   controller_actor_cls
controllers        rK   rb   z2DataParallelTrainer._initialize_and_run_controller  s   ,..6<	
 	
 	
 
sz >/11==??e! ! !
 $X. 
  
  
     1)0JJ3IJJ
 #%%)>)@)@@@))*555
%%''(((wz,3355666rM   r   c                 X    dfd}t          j         t           j        |           dS )z=Register SIGINT handler so user Ctrl C gracefully aborts run.r   c                 t   t                               d           dz  dk    r.t                               d           t          j        d           dk    r]	 t	          j        j                                                   d S # t          j        j	        $ r t          j        d           Y d S w xY wd S )Nu   Received SIGINT. Gracefully aborting the training run — this may take a few seconds. To forcefully abort immediately, you can send a different signal, such as SIGKILL.      zGReceived SIGINT at least 3 times. Forcefully aborting the training run.r   )
loggerinfosysexitr   r   abortr   
exceptionsActorDiedError)signumframer   sigint_counts     rK   sigint_handlerzDDataParallelTrainer._register_sigint_handler.<locals>.sigint_handler!  s    KK@   ALq  <   q   GJ,335566666~4       HQKKKKKK  ! s   +B	 	(B54B5N)signalSIGINT)rJ   r   r   r   s    ` @rK   r   z,DataParallelTrainer._register_sigint_handler  sC    	  	  	  	  	  	 , 	fm^44444rM   c                 *    t          t                    )z[Deprecated] Restores a Train experiment from a previously
        interrupted/failed run.

        This method is deprecated and will be removed in a future release.
        rG   r   clsargskwargss      rK   restorezDataParallelTrainer.restore9       !!EFFFrM   c                 *    t          t                    )z[Deprecated] Checks if a Train experiment can be restored from
        a previously interrupted/failed run.

        This method is deprecated and will be removed in a future release.
        r   r   s      rK   can_restorezDataParallelTrainer.can_restoreC  r   rM   )#__name__
__module____qualname____doc__r   r   r   r   r   r   r   strr)   r   r   r   rL   rH   r[   r   rh   r.   rl   r   r*   rd   ra   rb   r   r,   r   classmethodr3   r   r   ro   rM   rK   r7   r7   B   sK         -12626*.48/37;-1*# *# *#$Xb$h%74&$,9O%OP*# $D>	*#
 !/*# !/*# Y'*# 4Z01*# !,*# !) 4*# 4S>**# *# *# *#X  8
"d(!3 
 
 
 
#V # # # #J
 
 
 
 
440@+A 4 4 4 4l<"2t8,<	< < < <
7& 7 7 7 785;3O 5 5 5 58 G G Z [G G G Z [G G GrM   r7   )Xloggingr   r   r   typingr   r   r   r   r   r   r   ray._common.constantsr	   ray._common.usager
   ray._private.ray_constantsr   	ray.actorr   ray.air._internal.usager   	ray.trainr   r   r   r   r   r   ray.train.base_trainerr   r   ray.train.constantsr   r   ray.train.contextr    ray.train.v2._internal.callbacksr   r   r   r   r   -ray.train.v2._internal.callbacks.env_callbackr   (ray.train.v2._internal.callbacks.metricsr   r    9ray.train.v2._internal.callbacks.placement_group_callbackr!   .ray.train.v2._internal.callbacks.state_managerr"   .ray.train.v2._internal.callbacks.user_callbackr#    ray.train.v2._internal.constantsr$   r%   r&   r'   r(   2ray.train.v2._internal.data_integration.interfacesr)   )ray.train.v2._internal.execution.callbackr*   (ray.train.v2._internal.execution.contextr+   +ray.train.v2._internal.execution.controllerr,   1ray.train.v2._internal.execution.failure_handlingr-   1ray.train.v2._internal.execution.local_mode.utilsr.   /ray.train.v2._internal.execution.scaling_policyr/   ray.train.v2._internal.utilr0   r1   ray.train.v2.api.callbackr2   ray.util.annotationsr3   r4   ray.util.scheduling_strategiesr5   	getLoggerr   r   r7   ro   rM   rK   <module>r      s     



     = = = = = = = = = = = = = = = = 



 L L L L L L ' ' ' ' ' ' / / / / / / ! ! ! ! ! ! 8 8 8 8 8 8                       X W W W W W W W ? ? ? ? ? ?              T S S S S S             P O O O O O N N N N N N              J I I I I I F F F F F F D D D D D D G G G G G G S S S S S S M M M M M M Q Q Q Q Q Q N N N N N N N N 2 2 2 2 2 2 9 9 9 9 9 9 9 9 I I I I I I		8	$	$ HG HG HG HG HG HG HG HG HG HGrM   