
    &`i/                     R   d dl Z d dlmZmZ d dlmZmZmZmZm	Z	  G d de
          Z G d de          Z G d d	e          Z G d
 de          Z G d de          Z G d de          Z G d de          Z G d de          Z G d de          Z G d de          ZdS )    N)ListOptional)COLLECTIVE_TIMEOUT_S_ENV_VAR$DEFAULT_WORKER_GROUP_START_TIMEOUT_S%DEFAULT_WORKER_HEALTH_CHECK_TIMEOUT_S$WORKER_GROUP_START_TIMEOUT_S_ENV_VAR%WORKER_HEALTH_CHECK_TIMEOUT_S_ENV_VARc                       e Zd ZdZdS )RayTrainErrorz(Base class for all Ray Train exceptions.N__name__
__module____qualname____doc__     u/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/train/v2/_internal/exceptions.pyr   r      s        2222r   r   c                   "     e Zd ZdZ fdZ xZS )WorkerHealthCheckTimeoutErrorzBException raised when a worker health check hangs for long enough.c                     t          j        t          t                    }|dt           d| dz  }t	                                          |           d S )Nz	
Set the z> environment variable to increase the timeout (current value: z
 seconds).)osgetenvr	   r   super__init__)selfmessagetimeout	__class__s      r   r   z&WorkerHealthCheckTimeoutError.__init__   sj    )13X
 
 	3> 3 3&3 3 3	

 	!!!!!r   )r   r   r   r   r   __classcell__r   s   @r   r   r      s>        LL	" 	" 	" 	" 	" 	" 	" 	" 	"r   r   c                   4     e Zd ZdZdef fdZd Zd Z xZS )WorkerHealthCheckFailedErrorz2Exception raised when a worker health check fails.failurec                 f    t                                          |           || _        || _        d S N)r   r   _messagehealth_check_failure)r   r   r#   r   s      r   r   z%WorkerHealthCheckFailedError.__init__$   s0    !!!$+!!!r   c                 ,    | j         | j        | j        ffS r%   )r   r&   r'   r   s    r   
__reduce__z'WorkerHealthCheckFailedError.__reduce__)   s    0I JKKr   c                 @    | j         dz   t          | j                  z   S )N
)r&   strr'   r)   s    r   __str__z$WorkerHealthCheckFailedError.__str__,   s    }t#c$*C&D&DDDr   )	r   r   r   r   	Exceptionr   r*   r.   r   r    s   @r   r"   r"   !   sr        <<, , , , , , ,
L L LE E E E E E Er   r"   c                   .     e Zd ZdZdef fdZd Z xZS )WorkerGroupStartupTimeoutErrora1  Exception raised when the worker group startup times out.

    Example scenario: 4 GPUs are detected in the cluster, but when the worker
    are actually scheduled, one of the nodes goes down and only 3 GPUs are
    available. One of the worker tasks may be stuck pending, until a timeout is reached.
    num_workersc           	          t          t          j                            t          t
                              }|| _        t                                          d| d| dt           d           d S )Nz)The worker group startup timed out after z seconds waiting for a   workers. Potential causes include: (1) temporary insufficient cluster resources while waiting for autoscaling (ignore this warning in this case), (2) infeasible resource request where the provided `ScalingConfig` cannot be satisfied), and (3) transient network issues. Set the z. environment variable to increase the timeout.)	floatr   environgetr   r   r2   r   r   )r   r2   r   r   s      r   r   z'WorkerGroupStartupTimeoutError.__init__8   s    JNN44 
 
 '	< 	< 	<	< 	< <	< 	< 	<	
 	
 	
 	
 	
r   c                      | j         | j        ffS r%   )r   r2   r)   s    r   r*   z)WorkerGroupStartupTimeoutError.__reduce__M   s    !1 344r   )r   r   r   r   intr   r*   r   r    s   @r   r1   r1   0   s]         
C 
 
 
 
 
 
*5 5 5 5 5 5 5r   r1   c                       e Zd ZdZdS )WorkerGroupStartupFailedErrorzException raised when the worker group fails to start.

    Example scenario: A worker is scheduled onto a node that dies while
    the worker actor is initializing.
    Nr   r   r   r   r:   r:   Q              r   r:   c                       e Zd ZdZdS )!InsufficientClusterResourcesErrorzException raised when the cluster has insufficient resources.

    Example scenario: A worker that requires 1 GPU is scheduled onto a cluster
    that only has CPU worker node types.
    Nr   r   r   r   r=   r=   Y   r;   r   r=   c                       e Zd ZdZdS )$CheckpointManagerInitializationErrora  Exception raised when the checkpoint manager fails to initialize from a snapshot.

    Example scenarios:
    1. The checkpoint manager snapshot version is old and
        incompatible with the current version of Ray Train.
    2. The checkpoint manager snapshot JSON file is corrupted.
    3. The checkpoint manager snapshot references checkpoints that cannot be found
        in the run storage path.
    Nr   r   r   r   r?   r?   a   s           r   r?   c                       e Zd ZdZdS )CollectiveTimeoutErrorzhException raised when an internal Ray Train collective operation of
    the worker group times out.
    Nr   r   r   r   rA   rA   m   s           r   rA   c                   N     e Zd ZdZdee         dee         def fdZd Z	 xZ
S )BroadcastCollectiveTimeoutErrora  Exception raised when the broadcast operation times out.

    There are two main timeout examples:
    1. If not all workers call `ray.train.report`, the entire worker group will
        hang until the timeout before raising. This prevents indefinite worker
        group hangs.
    2. If a worker is slow in the training loop and fails to reach the broadcast
        time, the collective will time out.
    time_elapsedmissing_ranks	timeout_sc           	          || _         || _        || _        d|dd| dt           d|dd	}t	                                          |           d S )Nz)The collective operation timed out after z.2fzH seconds. The following ranks have not joined the collective operation: z"
You can set the timeout with the z& environment variable (current value: zI seconds). Disable the timeout by setting the environment variable to -1.)_time_elapsed_missing_ranks
_timeout_sr   r   r   )r   rD   rE   rF   r   r   s        r   r   z(BroadcastCollectiveTimeoutError.__init__~   s     *+#MT M MMZM M0LM M 5>NM M M 	 	!!!!!r   c                 8    | j         | j        | j        | j        ffS r%   )r   rH   rI   rJ   r)   s    r   r*   z*BroadcastCollectiveTimeoutError.__reduce__   s#    N!4doF
 	
r   )r   r   r   r   r   r4   r   r8   r   r*   r   r    s   @r   rC   rC   s   sv         "$UO"<@I"RW" " " " " " 
 
 
 
 
 
 
r   rC   c                   .    e Zd ZdZdedefdZd Zd ZdS )UserExceptionWithTracebacka"  This class wraps a user code exception raised on the worker
    with its original traceback string, for logging and debugging purposes.

    This is needed because the original exception traceback is not serialized
    with the exception when it is *returned* back to the main process.
    exctraceback_strc                 "    || _         || _        d S r%   )	_base_exc_traceback_str)r   rN   rO   s      r   r   z#UserExceptionWithTraceback.__init__   s    +r   c                 ,    | j         | j        | j        ffS r%   )r   rQ   rR   r)   s    r   r*   z%UserExceptionWithTraceback.__reduce__   s    1D EFFr   c                     | j         S r%   )rR   r)   s    r   r.   z"UserExceptionWithTraceback.__str__   s    ""r   N)	r   r   r   r   BaseExceptionr-   r   r*   r.   r   r   r   rM   rM      sa         ,M ,# , , , ,G G G# # # # #r   rM   )r   typingr   r    ray.train.v2._internal.constantsr   r   r   r   r	   r/   r   r   r"   r1   r:   r=   r?   rA   rC   rM   r   r   r   <module>rX      s%   				 ! ! ! ! ! ! ! !             3 3 3 3 3I 3 3 3" " " " "M " " "E E E E E= E E E5 5 5 5 5] 5 5 5B    M          	 	 	 	 	= 	 	 	    ]   
 
 
 
 
&< 
 
 
D# # # # # # # # # #r   