
    &`i                     ,   d dl Z d dlZd dlmZ d dlZd dlmZ d dlmZ d dl	m
Z
 d dlmZmZ d dlmZmZ d dlmZ  e j        e          Z ed	
          e G d de                                  ZdedededededefdZd Z G d de          ZdS )    N)	dataclass)ray_constants)get_address_and_port)WorkerGroup)BackendBackendConfig)*DEFAULT_JAX_DISTRIBUTED_SHUTDOWN_TIMEOUT_S"JAX_DISTRIBUTED_SHUTDOWN_TIMEOUT_S)	PublicAPIalpha)	stabilityc                   B    e Zd ZU dZeed<   dZeed<   ed             ZdS )	JaxConfigFuse_tpuuse_gpuc                     t           S N)_JaxBackend)selfs    k/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/train/v2/jax/config.pybackend_clszJaxConfig.backend_cls   s        N)	__name__
__module____qualname__r   bool__annotations__r   propertyr    r   r   r   r      sQ          GTGT  X  r   r   master_addr_with_portnum_workersindexr   r   resources_per_workerc                    t           j                            dd                                          }|s|rdt           j        d<   d}|s|rdt           j        d<   d}d|                    d          v rO|                    dd          }d                    d t          |          D                       t           j        d	<   dd
l}d|                    d          v r6|j        	                    | ||           t                              d           d|                    d          v r^|dk    rt          t          |                    }	nd}	|j        	                    | |||	           t                              d           d
S d
S )a  Set up distributed Jax training information.

    This function should be called on each worker. It sets JAX environment
    variables and initializes JAX distributed training.

    Args:
        master_addr_with_port: The master address with port for coordination.
        num_workers: Total number of workers.
        index: Index of this worker.
        use_tpu: Whether to configure for TPU. If True and JAX_PLATFORMS is not
            already set, it will be set to "tpu".
        use_gpu: Whether to configure for GPU. If True and JAX_PLATFORMS is not
            already set, it will be set to "cuda".
        resources_per_worker: The resources per worker.
    JAX_PLATFORMS tpucuda,GPUr   c              3   4   K   | ]}t          |          V  d S r   )str).0is     r   	<genexpr>z5_setup_jax_distributed_environment.<locals>.<genexpr>B   s9       6
 6
CFF6
 6
 6
 6
 6
 6
r   CUDA_VISIBLE_DEVICESNz#Initialized JAX distributed on TPU.z$Initialized JAX distributed on CUDA.)osenvirongetlowersplitjoinrangejaxdistributed
initializeloggerinfolist)
r    r!   r"   r   r   r#   jax_platformsnum_gpus_per_workerr8   local_device_idss
             r   "_setup_jax_distributed_environmentrA      s   0 JNN?B77==??M W &+
?# W &,
?#$$S))))266ua@@-0XX 6
 6
!"5666
 6
 6
 .
 .

)* JJJ##C((((""#8+uMMM9:::$$S))))""#E*=$>$>?? ""!;7G	
 	
 	
 	:;;;;; *)r   c                      	 ddl } | j                                         dS # t          $ r(}t                              d|            Y d}~dS d}~ww xY w)zShutdown JAX distributed environment.

    This function should be called on each worker during cleanup.
    If JAX distributed was not initialized, this is a no-op.
    r   N'Error during JAX distributed shutdown: )r8   r9   shutdown	Exceptionr;   warning)r8   es     r   _shutdown_jax_distributedrH   W   s{    F


  """"" F F FDDDEEEEEEEEEFs   ! 
AAAc                   .    e Zd ZdedefdZdedefdZdS )r   worker_groupbackend_configc                    |j         s	|j        sd S |                    dt                    \  }}| d| }g }t	          t          |                    D ]`}|                    |                    |t          |t          |          ||j         |j        |	                                                     at          j        |           d S )Nr   :)r    r!   r"   r   r   r#   )r   r   execute_singler   r7   lenappendexecute_single_asyncrA   get_resources_per_workerrayr3   )r   rJ   rK   master_addrmaster_portr    setup_futuresr.   s           r   on_startz_JaxBackend.on_startf   s    % 	n.D 	F#/#>#>qBV#W#W [#. > > > > s<(()) 	 	A  116*? #L 1 1*2*2)5)N)N)P)P 2 	 	    	r   c                    |j         s	|j        sdS |                    t                    }t	          j        t          t                    }	 t          j	        ||           t                              d           dS # t          j        j        $ r" t                              d| d           Y dS t          $ r(}t                              d|            Y d}~dS d}~ww xY w)zBCleanup JAX distributed resources when shutting down worker group.N)timeoutz"JAX distributed shutdown completedz)JAX distributed shutdown timed out after z= seconds. This may indicate workers are hung or unresponsive.rC   )r   r   execute_asyncrH   r   env_integerr
   r	   rS   r3   r;   debug
exceptionsGetTimeoutErrorrF   rE   )r   rJ   rK   shutdown_futures	timeout_srG   s         r   on_shutdownz_JaxBackend.on_shutdown~   s(   % 	n.D 	F (556OPP!-.6
 
			JG$i8888LL=>>>>>~- 	 	 	NNFI F F F       	J 	J 	JNNHQHHIIIIIIIII	Js   0A= =2C#2	C#;CC#N)r   r   r   r   r   rW   ra   r   r   r   r   r   e   s`        [ )    0J JY J J J J J Jr   r   )loggingr1   dataclassesr   rS   ray._privater   ray.train._internal.utilsr    ray.train._internal.worker_groupr   ray.train.backendr   r   ray.train.constantsr	   r
   ray.utilr   	getLoggerr   r;   r   r,   intr   dictrA   rH   r   r   r   r   <module>rm      s    				 ! ! ! ! ! ! 



 & & & & & & : : : : : : 8 8 8 8 8 8 4 4 4 4 4 4 4 4             		8	$	$ W
        6<6<6< 6< 	6<
 6< 6< 6< 6< 6<rF F F.J .J .J .J .J' .J .J .J .J .Jr   