
    &`iz,             (       :   d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Z
d dlmZ d dlmZmZmZmZ d dlZd dlZd dlmZ d dlZd dlZddlmZ ddlmZ ddlmZmZmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+ d d	l,m-Z-m.Z. d d
l/m0Z0 d dl1m2Z2 d dl3m4Z4m5Z5  ej6        d          Z7e78                    ej9                   dZ:dZ;dZ<dZ=d Z> G d d          Z?d Z@d ZAdZBdZC eDejE        F                    dd                    ZGdZH eDejE        F                    dd                    ZIdZJd ZKd  ZLd! ZMd" ZNd# ZOd$eDd%eDd&eDd'eDd(eDd)eDd*ePd+eDd,eDd-eDd.eDd/ed0ed1eQd2eQd3eRd4eRd5ePd6ee?         f&d7ZSdaT ejU                    ZVd8 ZWd1ddddddddd9	ZXd1dddd&d(dd-dddd:ZYd; ZZd$eDd%eeD         d&eeD         d'eeD         d(eeD         d)eeD         d+eeD         d,eeD         d-eeD         d.eeD         d/ee         d0ee         d1eeQ         d<ePd2eeQ         d3eeR         d4eeR         d5ePd6eeQeQf         f&d=Z[e5ddddddddddddd>dd?d?d@d$eDd%eeD         d&eeD         d'eeD         d(eeD         d)eeD         dAeeD         dBeeD         d-eeD         d.eeD         d/ee         d0ee         d1eeQ         d<ePd2eeQ         d3eeR         d4eeR         d6eeQeQf         f$dC            Z\e5dDdddddddddddd>dd?d?dEd$eDdFePd%eeD         d&eeD         d'eeD         d(eeD         d)eeD         dAeeD         dBeeD         d-eeD         d.eeD         d/ee         d0ee         d<ePd2eeQ         d3eeR         d4eeR         f"dG            Z]dH Z^e5dNdI            Z_da`e4 G dJ dK                      ZadL Zbd>acdM ZddS )O    N)Event)DictOptionalTupleType)Version   )$DefaultDatabricksRayOnSparkStartHook)RayOnSparkStartHook)_get_cpu_cores_get_local_ray_node_slots_get_num_physical_gpus_wait_service_upcalc_mem_ray_head_nodeexec_cmdgen_cmd_exec_failure_msg!get_avail_mem_per_ray_worker_node*get_configured_spark_executor_memory_bytesget_max_num_concurrent_tasksget_random_unused_port!get_spark_application_driver_hostget_spark_session%get_spark_task_assigned_physical_gpusis_in_databricks_runtimeis_port_in_use)build_addressparse_address)
load_class)HEAD_NODE_ID)DeveloperAPI	PublicAPIzray.util.sparkRAY_ON_SPARK_START_HOOK RAY_ON_SPARK_COLLECT_LOG_TO_PATH!RAY_ON_SPARK_START_RAY_PARENT_PIDc                      t           j        dk    rt          d          d} 	 dd l}t	          |j                  j        dk     rt          |           d S # t          $ r t          |           w xY w)Nposixz3Ray on spark only supports running on POSIX system.z-ray.util.spark module requires pyspark >= 3.3r   )   r(   r   )osnameRuntimeErrorpysparkr   __version__releaseImportError)spark_dependency_errorr,   s     o/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/util/spark/cluster_init.py_check_system_environmentr2   :   s    	w'PQQQL37&''/);;5666 <; 3 3 312223s   0A A/c                   <    e Zd ZdZd Zd Zd Zd Zd Zd Z	d Z
d	S )
RayClusterOnSparkaW  
    This class is the type of instance returned by the `_setup_ray_cluster` interface.
    Its main functionality is to:
    Connect to, disconnect from, and shutdown the Ray cluster running on Apache Spark.
    Serve as a Python context manager for the `RayClusterOnSpark` instance.

    Args
        address: The url for the ray head node (defined as the hostname and unused
                 port on Spark driver node)
        head_proc: Ray head process
        spark_job_group_id: The Spark job id for a submitted ray job
        num_workers_node: The number of workers in the ray cluster.
    c                     || _         || _        || _        || _        || _        || _        || _        || _        |	| _        |
| _	        || _
        d| _        d| _        d | _        d | _        d S )NF)address	head_procmin_worker_nodesmax_worker_nodestemp_dircluster_unique_id
start_hookray_dashboard_portspark_job_serverglobal_cluster_lock_fdray_client_server_portis_shutdownspark_job_is_canceledbackground_job_exceptionray_ctx)selfr6   r7   r8   r9   r:   r;   r<   r=   r>   r?   r@   s               r1   __init__zRayClusterOnSpark.__init__W   s}     " 0 0 !2$"4 0&<#&<# %*"(,%     c                    dd l }| j        rt          d          	  |j        | j                   | j        St          t          | j                  d         | j        t                    r | j	        
                    | j                   nS	 t          d           nB# t          $ r5}t                              dt          |           d           Y d }~nd }~ww xY wd}t!          j                    }	 t!          j        t$                     | j        t          d          | j        t)          d	  |j                    D                       d
z
  }|| j        k    r0t                              d| d           	  |j                     d S ||k    r=|}t!          j                    }t                              d| d| j         d           nt!          j                    |z
  t4          k    r|dk    r| j        j        d d         \  }}t;          j        dt?          ||           ddd i          }|                                  |j!        "                    d          }	tG          j$        |	          }
|
d         }|rt          d|           t          d          t                              d| d| j         d           	  |j                     d S #  |j                     w xY w)Nr   z9The ray cluster has been shut down or it failed to start.r6   zray.dashboard.optional_depszDependencies to launch the optional dashboard API server cannot be found. They can be installed with pip install ray[default], root cause: ()TzRay workers failed to start.c                 "    g | ]}|d          
|S )Alive ).0nodes     r1   
<listcomp>z6RayClusterOnSpark.wait_until_ready.<locals>.<listcomp>   s!    GGG$gGGGGrG   r	   zStarted zH Ray worker nodes, meet the minimum number of Ray worker nodes required.z*Ray worker nodes are starting. Progress: (z /    http://z/query_last_worker_errspark_job_group_idurljsonzutf-8last_worker_errz(Starting Ray worker node failed, error:
zBCurrent spark cluster has no resources to launch Ray worker nodes.zQTimeout in waiting for minimal ray workers to start. Started / Total requested: (zs). Current spark cluster does not have sufficient resources to launch requested minimal number of Ray worker nodes.)%rayrA   r+   initr6   r=   r   r   _RAY_DASHBOARD_STARTUP_TIMEOUTr<   on_ray_dashboard_created
__import__ModuleNotFoundError_loggerwarningreprtimesleep/_RAY_CLUSTER_STARTUP_PROGRESS_CHECKING_INTERVALrC   lennodesr8   infoshutdownr9   *_RAY_CONNECT_CLUSTER_POLL_PROGRESS_TIMEOUTr>   server_addressrequestspostr   raise_for_statuscontentdecoderV   loads)rE   rX   elast_alive_worker_countlast_progress_move_timecur_alive_worker_countjob_server_hostjob_server_portresponsedecoded_respjson_resrW   s               r1   wait_until_readyz"RayClusterOnSpark.wait_until_readyx   s   


 	K  ]	CHT\****&27Gdl++A.'.8 82
 889PQQQQ<====*   OOMBFq''M M M        '(#&*ikk#F
JKKK 0<&6 89
 GG)#)++GGGHH1L ' *T-BBBLLQ#9 Q Q Q   f CLNNNNNc *,CCC.D+.2ikk+LLP2P P7;7LP P P    	&==DE E 2Q66 !% 5 DRaR H / /'/}%=mO_.].] %= %= %= ';D%A( ( (H %55777+3+;+B+B7+K+KL'+z,'?'?H.67H.IO. 	"&2%9'6%9 %9'" '" !"
 '3%8'" '" !"  V 6V V;?;PV V V   CLNNNNNQFP CLNNNNsD   A0K B K 
C'+CK CB K D<K K K/c                     t          j                    rt          d          t          j        | j                  | _        d S )Nz!Already connected to Ray cluster.rI   )rX   is_initializedr+   rY   r6   rD   rE   s    r1   connectzRayClusterOnSpark.connect   s=     	DBCCCx555rG   c                 :    t          j                     d | _        d S N)rX   rg   rD   r|   s    r1   
disconnectzRayClusterOnSpark.disconnect   s    rG   c                    ddl }| j        s	 |                                  n# t          $ r Y nw xY wt          j                            dd           | j         |                    | j        |j	                   | j
                                         	 | j                                         nA# t          $ r4}t                              dt!          |                      Y d}~nd}~ww xY wd| _        dS dS )zR
        Shutdown the ray cluster created by the `setup_ray_cluster` API.
        r   NRAY_ADDRESSz4An Error occurred during shutdown of ray head node: T)fcntlrA   r   	Exceptionr)   environpopr?   flockLOCK_UNr>   rg   r7   	terminater^   r_   r`   )rE   r   rp   s      r1   rg   zRayClusterOnSpark.shutdown   s3    	 	$!!!!   JNN=$///*6D7GGG!**,,,((****   WdSTggWW       
  $D'	$ 	$s$   " 
//B- -
C+7*C&&C+c                     | S r   rM   r|   s    r1   	__enter__zRayClusterOnSpark.__enter__  s    rG   c                 .    |                                   d S r   )rg   )rE   exc_typeexc_valexc_tbs       r1   __exit__zRayClusterOnSpark.__exit__  s    rG   N)__name__
__module____qualname____doc__rF   ry   r}   r   rg   r   r   rM   rG   r1   r4   r4   H   s           Be e eN6 6 6
  $ $ $6      rG   r4   c                     d|                      dd           }| dv r| dt          j        |           S ||S | dt          |           S )N--_-)system_config	resourceslabels=)replacerV   dumpsstr)keyvalueconverted_keys      r1   _convert_ray_node_optionr   
  sl    0S#..00M
66655$*U"3"3555}**c%jj***rG   c                 >    d |                                  D             S )Nc                 4    g | ]\  }}t          ||          S rM   )r   )rN   kvs      r1   rP   z-_convert_ray_node_options.<locals>.<listcomp>  s'    GGGtq!$Q**GGGrG   )items)optionss    r1   _convert_ray_node_optionsr     s    GGw}}GGGGrG      <   (RAY_ON_SPARK_BACKGROUND_JOB_STARTUP_WAIT30r(   -RAY_ON_SPARK_RAY_WORKER_NODE_STARTUP_INTERVAL10x   c                  2   ddl ddlfd} d}	  | |          nA# t          $ r4 	 t          j        |           n# t
          $ r Y nw xY w | |          Y nw xY wfd	 d}t          j                            |          r{t          |d          5 }|	                                }ddd           n# 1 swxY w Y   d	 |
                                                    d
          D             }d |D             }nAg }t          |d          5  	 ddd           n# 1 swxY w Y   t          j        |d           fd|D             }t          |                                          }t          |          dk    rd}n,t!          |          dz   }t#          |          D ]
}	|	|vr|	} n||t          j                    <   t          |d          5 }|                                D ] \  }
}|                    |
 d| d
           !	 ddd           n# 1 swxY w Y   d|dz  z   }|dz   }|dk    rt+          d          n# t
          $ r                w xY wfd}t-          j        |d                                           ||fS )ad  
    If we start multiple ray workers on a machine concurrently, some ray worker
    processes might fail due to ray port conflicts, this is because race condition
    on getting free port and opening the free port.
    To address the issue, this function use an exclusive file lock to delay the
    worker processes to ensure that port acquisition does not create a resource
    contention issue due to a race condition.

    After acquiring lock, it will allocate port range for worker ports
    (for ray node config --min-worker-port and --max-worker-port).
    Because on a spark cluster, multiple ray cluster might be created, so on one spark
    worker machine, there might be multiple ray worker nodes running, these worker
    nodes might belong to different ray cluster, and we must ensure these ray nodes on
    the same machine using non-overlapping worker port range, to achieve this, in this
    function, it creates a file `/tmp/ray_on_spark_worker_port_allocation.txt` file,
    the file format is composed of multiple lines, each line contains 2 number: `pid`
    and `port_range_slot_index`, each port range slot allocates 1000 ports, and
    corresponding port range is:
     - range_begin (inclusive): 20000 + port_range_slot_index * 1000
     - range_end (exclusive): range_begin + 1000
    In this function, it first scans `/tmp/ray_on_spark_worker_port_allocation.txt`
    file, removing lines that containing dead process pid, then find the first unused
    port_range_slot_index, then regenerate this file, and return the allocated port
    range.

    Returns: Allocated port range for current worker ports
    r   Nc                    t           j        t           j        z  t           j        z  }	 t          j        | |          }t          j        | d           d}t          |          D ]N}	                     |j        j	        z             |c S # t          $ r Y nw xY wt          j        d           Ot          d|  d          # t          $ r t          j        |           Y d S w xY w)N  iX  
   zAcquiring lock on file z	 timeout.)r)   O_RDWRO_CREATO_TRUNCopenchmodranger   LOCK_EXLOCK_NBBlockingIOErrorra   rb   TimeoutErrorr   close)	file_pathmodefdmax_lock_iterr   r   s        r1   acquire_lockz8_preallocate_ray_worker_port_range.<locals>.acquire_lockC  s   y2:%
2	D))BHY'''M=)) 
 
KKEMEM$ABBB III '    D 
2MMMMNNN 	 	 	HRLLLLLL	s5   =C '#B
C 
BC B+C C('C(z2/tmp/ray_on_spark_worker_startup_barrier_lock.lockc                  f                           j                   t          j                   d S r   )r   r   r)   r   )r   lock_fds   r1   release_lockz8_preallocate_ray_worker_port_range.<locals>.release_lockh  s/    GU]+++
rG   z,/tmp/ray_on_spark_worker_port_allocation.txtr)r   c                 8    g | ]}|                     d           S ) )split)rN   lines     r1   rP   z6_preallocate_ray_worker_port_range.<locals>.<listcomp>t  s/          $(

3     rG   
c                 P    g | ]#\  }}t          |          t          |          f$S rM   )int)rN   pid_strslot_index_strs      r1   rP   z6_preallocate_ray_worker_port_range.<locals>.<listcomp>w  s?          +G^ Ws>223     rG   wr   c                 F    i | ]\  }}                     |          ||S rM   )
pid_exists)rN   pid
slot_indexpsutils      r1   
<dictcomp>z6_preallocate_ray_worker_port_range.<locals>.<dictcomp>  sC     
 
 
Z  %%

 
 
rG   r	   r    N  i  i   zqToo many ray worker nodes are running on this machine, cannot allocate worker port range for new ray worker node.c                  N    t          j        t                                    d S r   )ra   rb   !_RAY_WORKER_NODE_STARTUP_INTERVAL)r   s   r1   	hold_lockz5_preallocate_ray_worker_port_range.<locals>.hold_lock  s#    
4555rG   rM   )targetargs)r   r   r   r)   remover   pathexistsr   readstripr   r   setvaluesrd   maxr   getpidr   writer+   	threadingThreadstart)r   lock_file_pathport_alloc_filefpport_alloc_dataport_alloc_tableport_alloc_mapallocated_slot_setnew_slot_indexindexr   r   worker_port_range_beginworker_port_range_endr   r   r   r   r   s                  @@@@r1   "_preallocate_ray_worker_port_ranger   #  si   8 LLLMMM    . JN/,~.. 	/ 	/ 	/
	In%%%% 	 	 	D	,~..	/     7H 7>>/** 	.oC000 ,B"$'')), , , , , , , , , , , , , , ,   ,;,A,A,C,C,I,I$,O,O        /?     
  "oC000                 H_f---
 
 
 
#3
 
 
 !!6!6!8!899!""a''NN !344q8N~..   222%*NE 3 '5ry{{#/,,, 	2#1#7#7#9#9 2 2ZC00*00011112	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 #(.4*?"? 7$ > 5((F   )
         IB///55777"$999s   ! 
AA A
AAAAA)2I B<0I <C  I C AI D&I &D**I -D*.B0I 6H!I !H%%I (H%)%I I%c                 h    d| vri | d<   | d         }d|vrt          j        dd|id          |d<   | S )Nr   object_spilling_config
filesystemdirectory_path)typeparams)rV   r   )head_node_optionsobject_spilling_dirsys_confs      r1   #_append_default_spilling_dir_configr    se    ///-//* 1Hx//-1Z$$&9 .
 .
)* rG   c                 N    d| vri | d<   | d                              |           | S )Nr   )update)node_optionsr   s     r1   _append_resources_configr    s6    ,&&$&[!$$Y///rG   c                  ~    t           j                            t           j                            dd          d          S )N
RAY_TMPDIRz/tmprX   )r)   r   joinr   getrM   rG   r1   _get_default_ray_tmp_dirr    s(    7<<
|V<<eDDDrG   c                     t           t          j        v r, t          t          j        t                                          S t	                      rt          |           S t          |           S r   )r"   r)   r   r   r   r
   r   	is_globals    r1   _create_hook_entryr    sW    "*,,>z"*%<=>>@@@	!	#	# .3I>>>"9---rG   r9   r8   num_cpus_worker_nodenum_cpus_head_nodenum_gpus_worker_nodenum_gpus_head_nodeusing_stage_schedulingheap_memory_worker_nodeheap_memory_head_nodeobject_store_memory_worker_nodeobject_store_memory_head_noder   worker_node_optionsray_temp_root_dircollect_log_to_pathautoscale_upscaling_speedautoscale_idle_timeout_minutesr  returnc                 	   ddl }t          |          }t                      }t          j        t          |                    }t          |dd          }|g}|                                }|                    dd          }|                    dd          }|rd}nt          |dd|	          }|	                    |           t          |dd|	          }|	                    |           ||d
u rh|(t          |dd|	          }|	                    |           t          |dd|	          }|	                    |           dd| d| g}|d
u r|dgz  }ndg}t                              d| d| d| d           t          j                    j        dd         }|r|t          d          t!          j        dt           j        t           j        z  t           j        z            }	 |                    ||j        |j        z             n# t0          $ r t          d          w xY wd} t3                      }!t!          j        |!d
           t           j                            |!d          }"ntd}||                                }t           j                            |d| d|           } t!          j        | d
           t           j                            | d          }"t!          j        |"d
           t=          ||"          }ddlm }# |!                                }$ |#||||$          }%tE          ||||
dd||||	di || di||||| |||d ||!          }&|&#                    |||| ||||$          \  }'}(|&j$        })tK          j&        tN                     tQ          ||          sd|')                                (|'*                                 tK          j&        d"           tW          |)|'j,        |(          }*t[          d#|*z             t                              d$           t]          ||          }+|+t           j/        d%<   ta          |+|'|| | ||||%||&          },|1                    |,           |,S )'a  
    The public API `ray.util.spark.setup_ray_cluster` does some argument
    validation and then pass validated arguments to this interface.
    and it returns a `RayClusterOnSpark` instance.

    The returned instance can be used to connect to, disconnect from and shutdown the
    ray cluster. This instance can also be used as a context manager (used by
    encapsulating operations within `with _setup_ray_cluster(...):`). Upon entering the
    managed scope, the ray cluster is initiated and connected to. When exiting the
    scope, the ray cluster is disconnected and shut down.

    Note: This function interface is stable and can be used for
    instrumentation logging patching.
    r   Ni(#  i'  min_portmax_portinclude_dashboarddashboard_porti'  )r!  r"  exclude_listTz--dashboard-host=0.0.0.0z--dashboard-port=--dashboard-agent-listen-port=z--include-dashboard=truez--include-dashboard=falsezRay head hostname: z, port: z, ray client server port: .   zXRay on spark global mode cluster does not allow you to set 'ray_temp_root_dir' argument.z%/tmp/ray_on_spark_global_cluster.lockzAcquiring global lock failed for setting up new global mode Ray on spark cluster. If there is an active global mode Ray on spark cluster, please shut down it before you create a new one.)exist_okspillzray-r   )_start_spark_job_server)CPUGPUmemoryobject_store_memoryz
ray.worker)r   node_configmin_workersmax_workers)ray_head_ipray_head_portr;   r  ray_temp_dirr  r  spark_job_server_port)head_resourcesworker_node_typesextra_provider_configupscaling_speedidle_timeout_minutesg      ?zStart Ray head node failed!
zRay head node started.r   )r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   r@   )2r   r  r   socketgethostbynamer   r   copyr   appendr^   rf   uuiduuid4hex
ValueErrorr)   r   r   r   r   r   r   r   r   r  makedirsr   r	  get_default_temp_root_dirr  .ray.autoscaler._private.spark.spark_job_serverr+  custom_environment_variablesAutoscalingClusterr   ray_head_node_cmdra   rb   _RAY_HEAD_STARTUP_TIMEOUTr   pollr   r   
returncoder+   r   r   r4   on_cluster_created)-r9   r8   r  r  r  r  r  r  r  r  r  r   r  r  r  r  r  r  r   r<   sparkr3  r4  port_exclude_listr#  r=   r@   r6  ray_dashboard_agent_portdashboard_optionsr;   r?   r5  ray_default_tmp_dirr   r+  ray_node_custom_envr>   autoscaling_clusterray_head_proctail_output_dequerI  cmd_exec_failure_msgcluster_addressray_cluster_handlers-                                                r1   _setup_ray_clusterrZ    s   F LLL#I..JE&'H'O'OPPK*;PUVVVM& *..00)--.A4HH*../?FF 
!&!7*	"
 "
 "
 34442&	   2333 $5$=$=%!7.	" " " $$%7888#9*	$
 $
 $
  	  !9::: '4 244G-EGG
 $$"<!== (
 LL	=k 	= 	== 	= 	=#9	= 	= 	=  
 
(!, )B
 (0   "$3RY5Kbj5X"
 "
		KK.0MNNNN 	 	 	L  	 688
'$7777 gll+>HH!%$ * D D F Fw||ImII6GII
 
 	L40000 gll<AAK#d3333;.       %AACC..*E3F  -%%+#@	
 
 //5+J	   "//
 

 '*!2&<(#6#6%:	
 	
 2;?     B (;'@'@	( 	($M$ ,= 	J()))+}55 
S'##%%%JsOOO7}79J 
  
 :=QQRRRLL)***#K??O /BJ}+))+-)55   !!"5666s   /#H H-c                     ddl m} ddlm}  |                                |           }|dk    r|                    d|          } |                                |          j        S )Nr   )ResourceProfileBuilder)TaskResourceRequestsgpu)pyspark.resource.profiler\  pyspark.resource.requestsr]  cpusresourcerequirebuild)num_cpus_per_nodenum_gpus_per_noder\  r]  task_res_reqs        r1   _create_resource_profilerh    s    ??????>>>>>>''))../@AAL1#,,U4EFF!!##++L99??rG   )	r:   blockheadnode_ip_addressportnum_cpusnum_gpusdashboard_hostdashboard_agent_listen_port)r:   ri  rj  r6   rm  rn  r.  r/  rp  min_worker_portmax_worker_portc                     | D ]h}|                     d          sd|v rt          d          ||v r:d| d| d}||         }|rt          | d| d          t          | d	          id S )
Nr   r   zFor a ray node option like '--foo-bar', you should convert it to following format 'foo_bar' in 'head_node_options' / 'worker_node_options' arguments.zSetting the option 'z' for z nodes is not allowed.z You should set the 'z' option instead.z+ This option is controlled by Ray on Spark.)
startswithrC  )r  
block_keys	node_typer   common_err_msgreplacement_args         r1   _verify_node_optionsry    s      >>$ 	3#::3   *SsSS)SSS  )oO  %  O     
 !%RRR    rG   strict_modec                    t                       t                       |
pi }
|pi }t          |
t          d           t          |t          d           t
          t          d          t          j                    rt          d          t                      }|j
        j        }|dk    p|                    d          }|                    d          s,|                    d          s|d	k    s|st          d
          |rd}n_t                      r+t          t          j        d                   j        dk    rd}n&dd l}t          |j                  j        dk    rd}nd}d|v r3|t+          d          |d         }t-          j        dt0                     d|v r3|t+          d          |d         }t-          j        dt0                     d|v r3|t+          d          |d         }t-          j        dt0                     t3          |j
                                                            dd                    }||dk    rt+          d          t9          |j
                                                            dd                    }t3          |          }|dk    rLd| d }t                      rdd!lm}   |            d"| d#           nt>                               |           ||dk     rt+          d$          d% }|j
        !                    d&gd&          "                    |          #                                d         \  }}} |-|+|rd}!tI          ||          }"nKt+          d'| d(| d)          |$|"|r|}|}d}!tI          ||          }"nd}!d }"|}|}nt+          d*          tK          |||||          \  }#}$tM          ||||          }%tO          |          }&|&|%|#|$z   z  z   }'|'d+| z  k    rHd,}t                      rdd!lm}   |            d-| d.           nt>                               |           d/|v rt+          d0          | tP          k    r)|t+          d1          tS          |j
        |"          } | }n| dk    rt+          d2          d3|v rt+          d4          || }nd|cxk    r| k    sn t+          d5          g }(|d6k     r|(*                    d7| d8           |#d9k     r|(*                    d:|# d;           |(rQ|r"t+          d<+                    |(                    t>                               d=+                    |(                     ||rtY                      }nd}n|dk     rt+          d>| d?          |'|r"	 t[                      }n.# t\          $ r d}Y n w xY wd}n|dk     rt+          d@| d?          |dk    r|dk    r|	dA}dA}	nt_          ||	          \  }}	t`          5  tc          dVi dB| dC|dD|dE|dF|dG|dH|!dI|#dJ|dK|$dL|	dM|
dN|dO|dP|dQ|dR|dS|})|)a	 |)2                                 nB# t\          $ r5}*	 tg                       n# t\          $ r Y nw xY wt          dT          |*d }*~*ww xY w	 d d d            n# 1 swxY w Y   ti          |)j5                  d         }+dUtm          |+|)j7                   },|)j5        |,fS )WNzRay head node on sparkzRay worker node on sparkzCurrent active ray cluster on spark haven't shut down. Please call `ray.util.spark.shutdown_ray_cluster()` before initiating a new Ray cluster on spark.zCurrent python process already initialized Ray, Please shut down it by `ray.shutdown()` before initiating a Ray cluster on spark.localzlocal[zspark://zlocal-cluster[yarnzxRay on Spark only supports spark cluster in standalone mode, local-cluster mode, spark on yarn mode or spark local mode.FDATABRICKS_RUNTIME_VERSION   Tr   )r(      r   re  zi'num_cpus_per_node' and 'num_cpus_worker_node' arguments are equivalent. Only set 'num_cpus_worker_node'.z_'num_cpus_per_node' argument is deprecated, please use 'num_cpus_worker_node' argument instead.rf  zi'num_gpus_per_node' and 'num_gpus_worker_node' arguments are equivalent. Only set 'num_gpus_worker_node'.z_'num_gpus_per_node' argument is deprecated, please use 'num_gpus_worker_node' argument instead.object_store_memory_per_nodez'object_store_memory_per_node' and 'object_store_memory_worker_node' arguments  are equivalent. Only set 'object_store_memory_worker_node'.zu'object_store_memory_per_node' argument is deprecated, please use 'object_store_memory_worker_node' argument instead.zspark.task.cpus1z2Argument `num_cpus_worker_node` value must be > 0.zspark.task.resource.gpu.amount0z3You configured 'spark.task.resource.gpu.amount' to z,we recommend setting this value to 0 so that Spark jobs do not reserve GPU resources, preventing Ray-on-Spark workloads from having the maximum number of GPUs available.)$get_databricks_display_html_functionz<b style='color:red;'>z</b>z3Argument `num_gpus_worker_node` value must be >= 0.c                 \    ddl m}m}m}  |            } |            } |            }|||fS )Nr   )r   r   '_get_spark_worker_total_physical_memory)ray.util.spark.utilsr   r   r  )r   r   r   r  num_cpus_spark_workernum_gpus_spark_workertotal_mem_bytess          r1   _get_spark_worker_resourcesz@_setup_ray_cluster_internal.<locals>._get_spark_worker_resources  sv    	
 	
 	
 	
 	
 	
 	
 	
 	
 	
 !/ 0 0 6 6 8 8AACC "!
 	
rG   r	   a	  Current spark version does not support stage scheduling, so that you cannot set the argument `num_cpus_worker_node` and `num_gpus_worker_node` values. Without setting the 2 arguments, per-Ray worker node will be assigned with number of 'spark.task.cpus' (equals to zS) cpu cores and rounded down number of 'spark.task.resource.gpu.amount' (equals to z) GPUs. To enable spark stage scheduling, you need to upgrade spark to 3.4 version or use Databricks Runtime 12.x, and you cannot use spark local mode.zb'num_cpus_worker_node' and 'num_gpus_worker_node' arguments must beset together or unset together.g?aD  In each spark worker node, we recommend making the sum of 'spark_executor_memory + num_Ray_worker_nodes_per_spark_worker * (memory_worker_node + object_store_memory_worker_node)' to be less than 'spark_worker_physical_memory * 0.8', otherwise it might lead to spark worker physical memory exhaustion and Ray task OOM errors.z"<b style='background-color:Cyan;'>z<br></b>num_worker_nodeszn'num_worker_nodes' argument is removed, please set 'max_worker_nodes' and 'min_worker_nodes' argument instead.zIf you set 'max_worker_nodes' to 'MAX_NUM_WORKER_NODES', autoscaling is not supported, so that you cannot set 'min_worker_nodes' argument and 'min_worker_nodes' is automatically set to be equal to 'max_worker_nodes'.ztThe value of 'max_worker_nodes' argument must be either a positive integer or 'ray.util.spark.MAX_NUM_WORKER_NODES'.	autoscalez'autoscale' argument is removed. You can set 'min_worker_nodes' argument to be less than 'max_worker_nodes' to make autoscaling enabled.zZThe value of 'max_worker_nodes' argument must be an integer >= 0 and <= 'max_worker_nodes'r  zThe provided CPU resources for each ray worker are inadequate to start a ray cluster. Based on the total cpu resources available and the configured task sizing, each ray worker node would start with aF   CPU cores. This is less than the recommended value of `4` CPUs per worker. On spark version >= 3.4 or Databricks Runtime 12.x, you can set the argument `num_cpus_worker_node` to a value >= 4 to address it, otherwise you need to increase the spark application configuration 'spark.task.cpus' to a minimum of `4` to address it.l       
 zThe provided memory resources for each ray worker node are inadequate. Based on the total memory available on the spark cluster and the configured task sizing, each ray worker would start with a   bytes heap memory. This is less than the recommended value of 10GB. The ray worker node heap memory size is calculated by (SPARK_WORKER_PHYSICAL_MEMORY / num_local_spark_task_slots * 0.8) - object_store_memory_worker_node. To increase the heap space available, increase the memory in the spark cluster by using instance types with larger memory, or increase number of CPU/GPU per Ray worker node (so it leads to less Ray worker node slots per spark worker node), or apply a lower `object_store_memory_worker_node`.zYou are creating ray cluster on spark with strict mode (it can be disabled by setting argument 'strict_mode=False' when calling API 'setup_ray_cluster'), strict mode requires the spark cluster config satisfying following criterion: 
r   zCArgument `num_cpus_head_node` value must be >= 0. Current value is r'  zBArgument `num_gpus_head_node` value must be >= 0.Current value is i   @r9   r8   r  r  r  r  r  r  r  r  r  r   r  r  r  r  r  r  z"Launch Ray-on-Spark cluster failedzray://rM   )8r2   _install_sigterm_signalry  _head_node_option_block_keys_worker_node_option_block_keys_active_ray_clusterr+   rX   r{   r   sparkContextmasterrt  r   r   r)   r   majorr,   r-   r.   rC  warningswarnDeprecationWarningr   getConfr
  floatray.util.spark.databricks_hookr  r^   r_   parallelizemapcollectrh  r   r   r   MAX_NUM_WORKER_NODESr   r?  r	  r   r   r   r   _active_ray_cluster_rwlockrZ  ry   shutdown_ray_clusterr   r6   r   r@   )-r9   r8   r  r  r  r  r  r  r  r  r   r  r  rz  r  r  r  r  kwargsrN  spark_masteris_spark_local_modesupport_stage_schedulingr,   num_spark_task_cpusdefault_num_spark_task_gpusrounded_num_spark_task_gpuswarn_msgr  r  r  r  spark_worker_mem_bytesr  res_profileray_worker_node_heap_mem_bytes&ray_worker_node_object_store_mem_bytesspark_worker_ray_node_slotsspark_executor_memory_bytes"spark_worker_required_memory_bytesinsufficient_resourcesclusterrp   head_ipremote_connection_addresss-                                                r1   _setup_ray_cluster_internalr    s   . )/R-3$   
 &"   & 
 
 	
  
L
 
 	

 E%,L&'1V\5L5LX5V5V 	
++	
""#344	
 6!! " J
 
 	

  -#(   ""-BJ;<==CrII#'  7&''/9<<'+$$',$f$$+?    &&9:7	
 	
 	
 f$$+?    &&9:7	
 	
 	
 &//*65  
 +11O*P'B	
 	
 	
 e088::>>?PRUVVWW',@A,E,EMNNN #(""$$(()I3OO# # #&&A"B"B"Q&&0*0 0 0 	 $%% 		&      300227777    OOH%%%',@1,D,DNOOO
 
 
$ 	&&sA..	(	)	)	1	 L13I ',@,L# 	%)"2$&: KK P 1D	P P :P P P
 
 
 
	%*>*F# 	? $9 #8 %)"2$&: KK &+"K#6 #>  .
 
 	
 	*'	 	&. #<	# # #MU"S"S#
%),RRT	T '
 *C2H,HHHO 	 $%% 		&      30022GXGGG    OOH%%%V##J
 
 	

 ///'&   88JKXX+	Q		@
 
 	

 fN
 
 	

 +#7777'77777(
 
 	

  a%% $  
	
 
	
 
	
 &(???%%B .B B B	
 	
 	
  
? 		? T011   OODII&<==>>>! 	#!/!1!1!"!!:$6: : :  
 ! 	#'%;%=%="" ' ' '%&"""' "#!!:$6: : :   	a!##)1 !3(:%%?U!#@@
 @
<< 
$  L  L$ 
 
 
--
--
 "6!5
  21	

 "6!5
  21
 $:#9
 %C$B
 #8"7
 -S,R
 +H*G
 0/
 !4 3
 0/
 !4 3
  '@&?!
" ,J+I#
$  i%
, &	L$$&&&& 	L 	L 	L$&&&&   CDD!K	L '5 L  L  L  L  L  L  L  L  L  L  L  L  L  L  LD GO,,Q/GIw(FGGII  ?555sg   )V8 8WWAZ;Y,+Z;,
Z+7ZZ&
ZZ&ZZ&&Z++Z;;Z?Z?Fg      ?)r8   r  r  r  r  memory_worker_nodememory_head_noder  r  r   r  r  rz  r  r  r  r  r  c                     t          di d| d|d|d|d|d|d|d|d	|d
|	d|
d|d|d|d|d|d|dd|S )a  
    Set up a ray cluster on the spark cluster by starting a ray head node in the
    spark application's driver side node.
    After creating the head node, a background spark job is created that
    generates an instance of `RayClusterOnSpark` that contains configuration for the
    ray cluster that will run on the Spark cluster's worker nodes.
    After a ray cluster is set up, "RAY_ADDRESS" environment variable is set to
    the cluster address, so you can call `ray.init()` without specifying ray cluster
    address to connect to the cluster. To shut down the cluster you can call
    `ray.util.spark.shutdown_ray_cluster()`.
    Note: If the active ray cluster haven't shut down, you cannot create a new ray
    cluster.

    Args:
        max_worker_nodes: This argument represents maximum ray worker nodes to start
            for the ray cluster. you can
            specify the `max_worker_nodes` as `ray.util.spark.MAX_NUM_WORKER_NODES`
            represents a ray cluster
            configuration that will use all available resources configured for the
            spark application.
            To create a spark application that is intended to exclusively run a
            shared ray cluster in non-scaling, it is recommended to set this argument
            to `ray.util.spark.MAX_NUM_WORKER_NODES`.
        min_worker_nodes: Minimal number of worker nodes (default `None`),
            if "max_worker_nodes" value is equal to "min_worker_nodes" argument,
            or "min_worker_nodes" argument value is None, then autoscaling is disabled
            and Ray cluster is launched with fixed number "max_worker_nodes" of
            Ray worker nodes, otherwise autoscaling is enabled.
        num_cpus_worker_node: Number of cpus available to per-ray worker node, if not
            provided, if spark stage scheduling is supported, 'num_cpus_head_node'
            value equals to number of cpu cores per spark worker node, otherwise
            it uses spark application configuration 'spark.task.cpus' instead.
            **Limitation** Only spark version >= 3.4 or Databricks Runtime 12.x
            supports setting this argument.
        num_cpus_head_node: Number of cpus available to Ray head node, if not provide,
            if it is global mode Ray cluster, use number of cpu cores in spark driver
            node, otherwise use 0 instead.
            use 0 instead. Number 0 means tasks requiring CPU resources are not
            scheduled to Ray head node.
        num_gpus_worker_node: Number of gpus available to per-ray worker node, if not
            provided, if spark stage scheduling is supported, 'num_gpus_worker_node'
            value equals to number of GPUs per spark worker node, otherwise
            it uses rounded down value of spark application configuration
            'spark.task.resource.gpu.amount' instead.
            This argument is only available on spark cluster that is configured with
            'gpu' resources.
            **Limitation** Only spark version >= 3.4 or Databricks Runtime 12.x
            supports setting this argument.
        num_gpus_head_node: Number of gpus available to Ray head node, if not provide,
            if it is global mode Ray cluster, use number of GPUs in spark driver node,
            otherwise use 0 instead.
            This argument is only available on spark cluster which spark driver node
            has GPUs.
        memory_worker_node: Optional[int]:
            Heap memory configured for Ray worker node. This is basically setting
            `--memory` option when starting Ray node by `ray start` command.
        memory_head_node: Optional[int]:
            Heap memory configured for Ray head node. This is basically setting
            `--memory` option when starting Ray node by `ray start` command.
        object_store_memory_worker_node: Object store memory available to per-ray worker
            node, but it is capped by
            "dev_shm_available_size * 0.8 / num_tasks_per_spark_worker".
            The default value equals to
            "0.3 * spark_worker_physical_memory * 0.8 / num_tasks_per_spark_worker".
        object_store_memory_head_node: Object store memory available to Ray head
            node, but it is capped by "dev_shm_available_size * 0.8".
            The default value equals to
            "0.3 * spark_driver_physical_memory * 0.8".
        head_node_options: A dict representing Ray head node extra options, these
            options will be passed to `ray start` script. Note you need to convert
            `ray start` options key from `--foo-bar` format to `foo_bar` format.
            For flag options (e.g. '--disable-usage-stats'), you should set the value
            to None in the option dict, like `{"disable_usage_stats": None}`.
            Note: Short name options (e.g. '-v') are not supported.
        worker_node_options: A dict representing Ray worker node extra options,
            these options will be passed to `ray start` script. Note you need to
            convert `ray start` options key from `--foo-bar` format to `foo_bar`
            format.
            For flag options (e.g. '--disable-usage-stats'), you should set the value
            to None in the option dict, like `{"disable_usage_stats": None}`.
            Note: Short name options (e.g. '-v') are not supported.
        ray_temp_root_dir: A local disk path to store the ray temporary data. The
            created cluster will create a subdirectory
            "ray-{head_port}-{random_suffix}" beneath this path.
        strict_mode: Boolean flag to fast-fail initialization of the ray cluster if
            the available spark cluster does not have sufficient resources to fulfill
            the resource allocation for memory, cpu and gpu. When set to true, if the
            requested resources are not available for recommended minimum recommended
            functionality, an exception will be raised that details the inadequate
            spark cluster configuration settings. If overridden as `False`,
            a warning is raised.
        collect_log_to_path: If specified, after ray head / worker nodes terminated,
            collect their logs to the specified path. On Databricks Runtime, we
            recommend you to specify a local path starts with '/dbfs/', because the
            path mounts with a centralized storage device and stored data is persisted
            after Databricks spark cluster terminated.
        autoscale_upscaling_speed: If autoscale enabled, it represents the number of
            nodes allowed to be pending as a multiple of the current number of nodes.
            The higher the value, the more aggressive upscaling will be. For example,
            if this is set to 1.0, the cluster can grow in size by at most 100% at any
            time, so if the cluster currently has 20 nodes, at most 20 pending launches
            are allowed. The minimum number of pending launches is 5 regardless of
            this setting.
            Default value is 1.0, minimum value is 1.0
        autoscale_idle_timeout_minutes: If autoscale enabled, it represents the number
            of minutes that need to pass before an idle worker node is removed by the
            autoscaler. The smaller the value, the more aggressive downscaling will be.
            Worker nodes are considered idle when they hold no active tasks, actors,
            or referenced objects (either in-memory or spilled to disk). This parameter
            does not affect the head node.
            Default value is 1.0, minimum value is 0
    Returns:
        returns a tuple of (address, remote_connection_address)
        "address" is in format of "<ray_head_node_ip>:<port>"
        "remote_connection_address" is in format of
        "ray://<ray_head_node_ip>:<ray-client-server-port>",
        if your client runs on a machine that also hosts a Ray cluster node locally,
        you can connect to the Ray cluster via ``ray.init(address)``,
        otherwise you can connect to the Ray cluster via
        ``ray.init(remote_connection_address)``.
    r9   r8   r  r  r  r  r  r  r  r  r   r  r  rz  r  r  r  r  FrM   )r  )r9   r8   r  r  r  r  r  r  r  r  r   r  r  rz  r  r  r  r  s                     r1   setup_ray_clusterr    s   ` '   )))) 21 .-	
 21 .- !3 2 /. )H(G 'D&C ,+ 0/ ,+  K 0/  #<";!" (F'E#$ %
'  rG   T)is_blockingr8   r  r  r  r  r  r  r  r  r   r  rz  r  r  r  r  c                 (   t          di d| d|d|d|d|d|d|d|d	|	d
|
d|d|ddd|d|d|d|dd}|s|S 	 t                      at                                           dat	                       dS # dat	                       w xY w)a1  
    Set up a global mode cluster.
    The global Ray on spark cluster means:
    - You can only create one active global Ray on spark cluster at a time.
    On databricks cluster, the global Ray cluster can be used by all users,
    - as contrast, non-global Ray cluster can only be used by current notebook
    user.
    - It is up persistently without automatic shutdown.
    - On databricks notebook, you can connect to the global cluster by calling
    ``ray.init()`` without specifying its address, it will discover the
    global cluster automatically if it is up.

    For global mode, the ``ray_temp_root_dir`` argument is not supported.
    Global model Ray cluster always use the default Ray temporary directory
    path.

    All arguments are the same with ``setup_ray_cluster`` API except that:
    - the ``ray_temp_root_dir`` argument is not supported.
    Global model Ray cluster always use the default Ray temporary directory
    path.
    - A new argument "is_blocking" (default ``True``) is added.
    If "is_blocking" is True,
    then keep the call blocking until it is interrupted.
    once the call is interrupted, the global Ray on spark cluster is shut down and
    `setup_global_ray_cluster` call terminates.
    If "is_blocking" is False,
    once Ray cluster setup completes, return immediately.
    r9   r8   r  r  r  r  r  r  r  r  r   r  r  Nrz  r  r  r  r  TrM   )r  r    _global_ray_cluster_cancel_eventwaitr  )r9   r  r8   r  r  r  r  r  r  r  r  r   r  rz  r  r  r  rX  s                     r1   setup_global_ray_clusterr  h  sg   d 2   )))) 21 .-	
 21 .- !3 2 /. )H(G 'D&C ,+ 0/ $  K 0/  #<";!" (F'E#$ $%O*  	+077((--///+/( 		 ,0( 	s   'A? ?Bc                 *  	
 | j         }| j        d         | j        
	fd}|j                            |           |j                            t          t          |                    |          }|r%t          	          }|	                    |          }t          d u           }|                               |                    |                                          d         \  }}|s	|| _        |S d S )Nr	   c              3     K   ddl m} t          j        d          }|                                }t                      \  }}t          dd          }t          j        ddd	 d
dt                     d d d| d|dz
   d| gt                    }|                    d            t          pdt          t          t          j                              ddi}dk    r|                                }	d|	vrt%          d          d |	d         j        D             }
t)          |
          }|                    dt+          |                      d                    d |D                       |d<   |                    dd                    |                      	 d}t1          j        d t                     d!d"#          }|                                d$         sd%}t%          d&          t1          j        d t                     d'd(i#           t7          |d%|)           d S # t8          $ r1}d*| d+}t:                              |           ||fV  Y d }~d S d }~ww xY w),Nr   )TaskContextzray.util.spark.workeri'  r   r   -mray.util.spark.start_ray_nodez--num-cpus=--blockz
--address=z	--memory=z--object-store-memory=--min-worker-port=--max-worker-port=r	   r&  --temp-dir= !RAY_ENABLE_WINDOWS_OR_OSX_CLUSTERr  r^  zDCouldn't get the gpu id, Please check the GPU resource configurationc                 P    g | ]#}t          |                                          $S rM   )r   r   )rN   addrs     r1   rP   zK_start_ray_worker_nodes.<locals>.ray_cluster_job_mapper.<locals>.<listcomp>  s5       &*DJJLL!!  rG   z--num-gpus=,c                 ,    g | ]}t          |          S rM   )r   )rN   gpu_ids     r1   rP   zK_start_ray_worker_nodes.<locals>.ray_cluster_job_mapper.<locals>.<listcomp>"  s    CCCVCCCrG   CUDA_VISIBLE_DEVICESzStart Ray worker, command: r   FrR   z/check_node_id_availability)node_idrS   rT   	availableTzDStarting Ray worker node twice with the same node id is not allowed.z/notify_task_launchedrS   synchronous	extra_envz&Ray worker node process exit, reason: r'  )pyspark.taskcontextr  logging	getLoggerr
  r   r   sys
executabler   r   r?  r$   r%   r   r)   r   r   r+   	addressesr   rd   r	  rf   rj   rk   rV   r   r   r^   r_   )r   r  _worker_loggercontextr   r   $ray_worker_node_dashboard_agent_portray_worker_node_cmdray_worker_node_extra_envstask_resourcesgpu_addr_listavailable_physical_gpusis_task_reschedule_failurerv   rp   err_msgr  heap_memory_per_noder  re  rf  r  r3  r4  rS  r5  rS   r6  r  s                   r1   ray_cluster_job_mapperz7_start_ray_worker_nodes.<locals>.ray_cluster_job_mapper  s     333333 *+BCC//##
 /00	
#! 0F%%0
 0
 0
, N+-+--D{MBBDD.,..C%ACC:!8::<!6!:<<S-QSS
 '':;;
 #&&'C\'C'CDDD -.A.GR-s29;;/?/?/&
 "	&
" q  $..00NN**"$   .<U.C.M  M 'L' '#  &&<c"9::<<   BECC+BCCCB B&'=> 	I#((3F*G*GII	
 	
 	
E	6).&}2mK9NOO 2 2 2  '*< 	 	 	H ==??;/  .2*"&   M,mK9NOO , , , )*<    # 4     
  	6 	6 	6  DqCCCGOOG$$$55555555555'	6s   5BI 
J&JJr  r   )rN  ri   rS  r  setJobGroupr  listr   rh  withResourcesr  on_spark_job_createdmapPartitionsr  last_worker_error)r>   rS   spark_job_group_descr  r  r3  r4  r5  re  rf  r  r  r  r  r  rN  r  job_rddresource_profile
hook_entryr  r  rS  r6  s    `   ``````````       @@r1   _start_ray_worker_nodesr    s   H "E,;A>*>G6 G6 G6 G6 G6 G6 G6 G6 G6 G6 G6 G6 G6 G6 G6 G6 G6R 
""  0  ,,U#$$%%'7 G  :3
 
 ''(899#|t/CEEEJ##$6777*1*?*?+ +gii+'G' & -4*4rG   c                      t           5  t          t          d          t                                           daddd           dS # 1 swxY w Y   dS )z+
    Shut down the active ray cluster.
    Nz#No active ray cluster to shut down.)r  r  r+   rg   rM   rG   r1   r  r    s     
$ # #&DEEE$$&&&"# # # # # # # # # # # # # # # # # #s   2AAAc            
       :    e Zd ZdZdededededef
dZd Zd	 Zd
S )rH  z*Create a ray on spark autoscaling cluster.r7  r8  r9  r:  r;  c                     |                                 | _        t          | j        d<   |                     |||||          | _        dS )zCreate the cluster.

        Args:
            head_resources: resources of the head node, including CPU.
            worker_node_types: autoscaler node types config for worker nodes.
        NODE_ID_AS_RESOURCEN)r>  _head_resourcesr   _generate_config_config)rE   r7  r8  r9  r:  r;  s         r1   rF   zAutoscalingCluster.__init__  sP      .22446B23,,! 
 
rG   c           	         t          j        t          t          j                            t          j                            t          j                  d                              }t          j
        |          }||d<   |i dd|d         d<   t          d |                                D                       |d<   |d                             |           ||d	<   ||d
<   |S )Nzautoscaler/spark/defaults.yamlavailable_node_typesr   )r   r0  r2  zray.head.defaultc              3   ,   K   | ]\  }}|d          V  dS )r2  NrM   )rN   r   r   s      r1   	<genexpr>z6AutoscalingCluster._generate_config.<locals>.<genexpr>  s<       +
 +
!%AAm+
 +
 +
 +
 +
 +
rG   r2  providerr:  r;  )yaml	safe_loadr   r)   r   r	  dirnamerX   __file__r>  deepcopysumr   r  )rE   r7  r8  r9  r:  r;  base_configcustom_configs           r1   r  z#AutoscalingCluster._generate_config  s     nGOOCL114  
 
 k220A,-'E
 E
,-.@A (+ +
 +
):)@)@)B)B+
 +
 +
 (
 (
m$ 	j!(()>???+:'(0D,-rG   c	                 D   ddl m}	m}
m} |!t          j                            |d          }n,t          j                            t                      d          }t          |d          5 }|	                    t          j        | j                             ddd           n# 1 swxY w Y   t                      \  }}t          j        dddd	d
| d| d| d| d| d|dz
   g|}||                    d|            d| j        v r@|                    d                    | j                            d                               d| j        v r@|                    d                    | j                            d                               d| j        v r@|                    d                    | j                            d                               d| j        v r@|                    d                    | j                            d                                |
|| j                  }|                     ||                     dd|	|pdt*          t-          t	          j                              i|}|| _        t3          |d|          S )z{Start the cluster.

        After this call returns, you can connect to the cluster with
        ray.init("auto").
        r   )r$   r  r   Nzautoscaling_config.jsonr   r  r  r  z--headz--node-ip-address=z--port=z--ray-client-server-port=z--autoscaling-config=r  r  r	   r  r,  z--num-cpus={}r-  z--num-gpus={}r.  z--memory={}r/  z--object-store-memory={}AUTOSCALER_UPDATE_INTERVAL_Sr  r  Fr  )ray.util.spark.cluster_initr$   r  r   r)   r   r	  r  r   r   rV   r   r  r   r  r  r?  r  formatr   extendr%   r   r   rI  _start_ray_head_node)rE   r3  r4  r@   r5  rQ  r   r  rS  r$   r  r   autoscale_configfr   r   rI  r  s                     r1   r   zAutoscalingCluster.start  sp    	
 	
 	
 	
 	
 	
 	
 	
 	
 	
 #!w||L:STT!w||(**,E    "C(( 	.AGGDJt|,,---	. 	. 	. 	. 	. 	. 	. 	. 	. 	. 	. 	. 	. 	. 	. /00	
#! N+...%m%%@(>@@6$466:!8::<!6!:<<
 
 #$$%A<%A%ABBBD((($$&&t';'?'?'F'FGG   D((($$&&t';'?'?'F'FGG   t+++$$$$T%9%=%=h%G%GHH   !D$888$$*11(,,-BCC    54t3
 
 	  !:!:;L!M!MNNN +C,.A.GR-s29;;/?/?
 "	
	 "3#5I
 
 
 	
s   *-B##B'*B'N)	r   r   r   r   dictr  rF   r  r   rM   rG   r1   rH  rH    s        44

  
  $	

 
 $
 
 
 
0! ! !FY
 Y
 Y
 Y
 Y
rG   rH  c                 .    d }t          | |||          S )Nc                  ,    t          j                     d S r   )r)   setpgrprM   rG   r1   preexec_functionz._start_ray_head_node.<locals>.preexec_functionN  s     	
rG   )r  r  
preexec_fn)r   )rI  r  r  r  s       r1   r
  r
  M  s6       #	   rG   c                      t           rd S 	 t          j        t          j                  fd} t          j        t          j        |            da d S # t          $ r t
                              d           Y d S w xY w)Nc                     	 t                       n# t          $ r Y nw xY wt          j        t          j                   t	          j        t	          j                    t          j                   d S r   )r  r   signalSIGTERMr)   killr   )signumframe_origin_sigterm_handlers     r1   _sigterm_handlerz1_install_sigterm_signal.<locals>._sigterm_handlerq  s    $&&&&     M 7   G	V^    s    
Tz,Install Ray-on-Spark SIGTERM handler failed.)_sigterm_signal_installedr  	getsignalr  r   r^   r_   )r  r  s    @r1   r  r  h  s     ! H"("26>"B"B	 	 	 	 	 	fn&6777$(!!! H H HFGGGGGGHs   AA $A:9A:)r  N)er>  rV   r  r)   r  r<  r  r   ra   r@  r  r   typingr   r   r   r   rj   r  packaging.versionr   rX   ray._private.servicesdatabricks_hookr
   start_hook_baser   utilsr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   ray._common.network_utilsr   r   ray._common.utilsr   +ray.autoscaler._private.spark.node_providerr   ray.util.annotationsr    r!   r  r^   setLevelINFOr"   r  r$   r%   r2   r4   r   r   rJ  rZ   r   r   r
  _BACKGROUND_JOB_STARTUP_WAITrc   r   rh   r   r  r  r  r  boolr   r  rZ  r  RLockr  rh  r  r  ry  r  r  r  r  r  r  rH  r
  r  r  rM   rG   r1   <module>r-     s	      				   



              . . . . . . . . . . . .   % % % % % % 



     A A A A A A 0 0 0 0 0 0                                   $ C B B B B B B B ( ( ( ( ( ( D D D D D D 8 8 8 8 8 8 8 8
'
,
-
-       4  #E  $G !3 3 3       D+ + +H H H  !# "sJNN=tDD     34 /$'CJNNBDII% % ! .1 *H: H: H:V     E E E. . .ww w 	w
 w w w !w !w w &)w $'w w w w  !w"  %#w$ %*%w& 'w( 

)w w w wt  ,Y_.. @ @ @ $#'
  
   $&&<#'" "   2n6n6smn6 #3-n6 !	n6
 #3-n6 !n6 &c]n6 $C=n6 &.c]n6 $,C=n6  ~n6 "$n6  }n6 n6 "#n6   (!n6" %-UO#n6$ %n6( 38_)n6 n6 n6 n6b  '+*.(,*.(,(,&*5937(,*.'+)-1469%c c cc smc #3-	c
 !c #3-c !c !c smc &.c]c $,C=c  ~c "$c  }c c  "#!c"  (#c$ %-UO%c( 38_)c c c cL  &**.(,*.(,(,&*5937(,*.)-1469%S S SS S sm	S
 #3-S !S #3-S !S !S smS &.c]S $,C=S  ~S "$S S  "#!S"  (#S$ %-UO%S S S Sl^ ^ ^B # # # # $(   W
 W
 W
 W
 W
 W
 W
 W
t  0 " H H H H HrG   