
    &`iK                     2	   U d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Zd dlZd dlmZmZmZ d dlmZ d dlmZmZmZmZmZmZmZmZmZmZmZmZ d dl Z!d dl"Z#d dl$Z$d dl%Z$d dl&Z&d dl'm(Z( d dl)m*Z*m+Z+m,Z, d dl-m.Z. d dl/Z/er0d dl"Z"d dl0m1Z1 d d	l2m3Z3 d d
l4m5Z5 d dl6m7Z7m8Z8m9Z9m:Z: d dl;m<Z<m=Z= d dl>m?Z?  ej@        eA          ZBdZCdeCz  ZDdeDz  ZE eF            ZGdZHdZIedeJef         ZKdaLeKeMd<    G d d          ZN eN            ZOdeKfdZPd ZQ	 	 	 	 ddeRdeeR         de,deed                  deeR         ded         deeR         deeReSeeR         f         fdZTd ed         deRfd!ZUdeRfd"ZVd# ZWd$eSd%eSddfd&ZXd'eSdeSfd(ZYd)eeSeeS         f         deJfd*ZZd+edeSfd,Z[d-dd d.d/eSd0eSd1eJd2eeS         d3eRdeSfd4Z\	 	 	 	 	 	 dd7eJd8eeS         d9eeS         d:eeS         fd;Z]d< Z^d= Z_d> Z`	 	 	 dd?d@dAeee                  dBeeeSdCf                  dDeeeReeReRf         eeReReRf         f                  ddCf
dEZadFeSfdGZbdFeSfdHZcdIdJdedK         fdLZddMe!je        de,dedK         fdNZfdOedP         ddQfdRZgdSedQ         dedT         fdUZhdVedT         dedT         fdWZidXedY         dedT         fdZZjdOedP         d[eeeRekf                  d\d]deRfd^Zld_eSdefd`Zm eda          Zn edb          Zo G dc dde          Zp	 	 ddfeen         d?eeen         geeo         f         dgeJdheRdieRdeeoddf         fdjZq G dk dl          Zr G dm dne$js        jt                  Zu G do dpe$js        jv                  Zwddqdrdsdteg ef         dueSdveeeS                  dweRdxeRdefdyZxdzeRdeSfd{Zydddd|d}eeR         d~eeR         deeR         deeeR         eeR         f         fdZzdeJfdZ{dedeJfdZ|d Z}deRfdZ~ G d d          Zdeeedf                  deee         df         fdZdIe#j        de#j        fdZde#j        de#j        deJfdZdeeR         deeR         deeR         deeSef         deeSef         f
dZe.d'eSdeeS         fd            ZdS )    N)EmptyFullQueue)
ModuleType)TYPE_CHECKINGAnyCallableDict	GeneratorIterableIteratorListOptionalTupleTypeVarUnion)call_with_retry)DEFAULT_READ_OP_MIN_NUM_BLOCKSWARN_PREFIXDataContext)DeveloperAPI)ComputeStrategy)	RefBundle)SortKey)BlockBlockMetadataWithSchemaSchemaUserDefinedFunction)
DatasourceReader)PlacementGroupi   localexample_pyarrow_datasetc                   6    e Zd ZdZd Zd Zd Zd Zd Zd Z	dS )	_OrderedNullSentinelzSentinel value that sorts greater than any other non-null value.

    NOTE: Semantic of this sentinel is closely mirroring that one of
          ``np.nan`` for the purpose of consistency in handling of
          ``None``s and ``np.nan``s.
    c                     dS )NF selfothers     k/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/data/_internal/util.py__eq__z_OrderedNullSentinel.__eq__V   s    u    c                 J    t          |t                    pt          |          S N)
isinstancer&   is_nullr)   s     r,   __lt__z_OrderedNullSentinel.__lt__Y   s     
 %!566H'%..Hr.   c                 ,    |                      |          S r0   r3   r)   s     r,   __le__z_OrderedNullSentinel.__le__`   s     {{5!!!r.   c                 .    |                      |           S r0   )r6   r)   s     r,   __gt__z_OrderedNullSentinel.__gt__e       ;;u%%%%r.   c                 .    |                      |           S r0   r5   r)   s     r,   __ge__z_OrderedNullSentinel.__ge__h   r9   r.   c                      t          |           S r0   )idr*   s    r,   __hash__z_OrderedNullSentinel.__hash__k   s    $xxr.   N)
__name__
__module____qualname____doc__r-   r3   r6   r8   r;   r?   r(   r.   r,   r&   r&   N   s{           I I I" " "
& & && & &    r.   r&   returnc                  R    t           	 ddlma  n# t          $ r da Y nw xY wt           S )Nr   )datasetF)r$   pyarrowrF   ModuleNotFoundErrorr(   r.   r,   _lazy_import_pyarrow_datasetrI   r   sS    	%;;;;;;;" 	% 	% 	%  %	% s    c                  L    t           j        j                                         d S r0   )ray_privatearrow_utils_check_pyarrow_versionr(   r.   r,   rN   rN   ~   s    L3355555r.   parallelismtarget_max_block_sizectxdatasource_or_legacy_readermem_sizeplacement_groupr!   
avail_cpusc           	         d}t           j        }||r|                                }|[t          j        |          sG|Et          dt          ||z                      }t          dt          ||j        z                      }d}	| dk     r<| dk    rt          d          |j	        F|j	        t          k    r6|j        t          k    r&t                              d           |j	        |_        |t          j                                        }|pt#          |          }t          t%          |j        |          ||dz            } | |j        k    rd	|j         }	nH| |k    rd
|j        t&          z   d}	n.| |k    r"|j        d}
n|j        t&          z   d}
d|
 }	nd| d}	t                              d|  d| d| d           | |	|fS )a  Returns parallelism to use and the min safe parallelism to avoid OOMs.

    This detects parallelism using the following heuristics, applied in order:

     1) We start with the default value of 200. This can be overridden by
        setting the `read_op_min_num_blocks` attribute of
        :class:`~ray.data.context.DataContext`.
     2) Min block size. If the parallelism would make blocks smaller than this
        threshold, the parallelism is reduced to avoid the overhead of tiny blocks.
     3) Max block size. If the parallelism would make blocks larger than this
        threshold, the parallelism is increased to avoid OOMs during processing.
     4) Available CPUs. If the parallelism cannot make use of all the available
        CPUs in the cluster, the parallelism is increased until it can.

    Args:
        parallelism: The user-requested parallelism, or -1 for auto-detection.
        target_max_block_size: The target max block size to
            produce. We pass this separately from the
            DatasetContext because it may be set per-op instead of
            per-Dataset.
        ctx: The current Dataset context to use for configs.
        datasource_or_legacy_reader: The datasource or legacy reader, to be used for
            data size estimation.
        mem_size: If passed, then used to compute the parallelism according to
            target_max_block_size.
        placement_group: The placement group that this Dataset
            will execute inside, if any.
        avail_cpus: Override avail cpus detection (for testing only).

    Returns:
        Tuple of detected parallelism (only if -1 was specified), the reason
        for the detected parallelism (only if -1 was specified), and the estimated
        inmemory size of the dataset.
       N r   z6`parallelism` must either be -1 or a positive integer.zy``DataContext.min_parallelism`` is deprecated in Ray 2.10. Please specify ``DataContext.read_op_min_num_blocks`` instead.   z1DataContext.get_current().read_op_min_num_blocks=zOoutput blocks of size at least DataContext.get_current().target_min_block_size=z MiB	unlimitedzNoutput blocks of size at most DataContext.get_current().target_max_block_size=z9parallelism at least twice the available number of CPUs ()zAutodetected parallelism=z# based on estimated_available_cpus=z and estimated_data_size=.)sysmaxsizeestimate_inmemory_data_sizenpisnanmaxinttarget_min_block_size
ValueErrormin_parallelismr   read_op_min_num_blocksloggerwarningrK   utilget_current_placement_group_estimate_avail_cpusminMiBrP   debug)rO   rP   rQ   rR   rS   rT   rU   min_safe_parallelismmax_reasonable_parallelismreasondisplay_vals              r,   _autodetect_parallelismru      s~   V !$7.JJLL"" 	!-"1c(5J*J&K&KLL%(C3;T0T,U,U%V%V"FQ"UVVV +#'EEE*.LLLNNQ   *-)<C& "!hBBDDOH#7#H#H
*,FGG N
 
 #4440-0 0 F 6669,s29 9 9 F
 000(0)!$!:S!@FFF!! ! F*&* * * 
 	/ / /(2/ /#+/ / /	
 	
 	
 ((r.   cur_pgc                    t          t          j                                        dd                    }t          t          j                                        dd                    }| rd}| j        D ]x}|                    dd          t          d|          z  }|                    dd          t          d|          z  }t          ||          }|dt          ||z            z  z  }yt          ||          S |S )aW  Estimates the available CPU parallelism for this Dataset in the cluster.

    If we aren't in a placement group, this is trivially the number of CPUs in the
    cluster. Otherwise, we try to calculate how large the placement group is relative
    to the size of the cluster.

    Args:
        cur_pg: The current placement group, if any.
    CPUrW   GPUr   rZ   )rd   rK   cluster_resourcesgetbundle_specsrc   rn   )rv   cluster_cpuscluster_gpuspg_cpusbundlecpu_fractiongpu_fractionmax_fractions           r,   rm   rm      s     s,..225!<<==Ls,..225!<<==L
  *) 		< 		<F "::eQ//#a2F2FFL!::eQ//#a2F2FFL|\::L q3|l:;;;;GG<)))r.   c                  \    t           j                                        } t          |           S )zEstimates the available CPU parallelism for this Dataset in the cluster.
    If we are currently in a placement group, take that into account.)rK   rk   rl   rm   )rv   s    r,   _estimate_available_parallelismr     s%     X1133F'''r.   c                     t          j                                        dd          }| r9||dz  k    r2|dk    r.t                              t
           d|  d| d           d S d S d S d S )Nrx   rW      i  z The requested parallelism of zE is more than 4x the number of available CPU slots in the cluster of a>  . This can lead to slowdowns during the data reading phase due to excessive task creation. Reduce the parallelism to match with the available CPU slots in the cluster, or set parallelism to -1 for Ray Data to automatically determine the parallelism. You can ignore this message if the cluster is expected to autoscale.)rK   available_resourcesr{   ri   rj   r   )requested_parallelismnum_read_tasksavailable_cpu_slotss      r,   _warn_on_high_parallelismr   "  s    13377qAA
01444d"" S S:O S S"S S S		
 		
 		
 		
 		

 
44""r.   modulepackagec                    	 t          j        |           dS # t          $ r' t          d| j        j         d| d| d| d	          w xY w)aR  Check if a required dependency is installed.

    If `module` can't be imported, this function raises an `ImportError` instructing
    the user to install `package` from PyPI.

    Args:
        obj: The object that has a dependency.
        module: The name of the module to import.
        package: The name of the package on PyPI.
    `z` depends on 'z-', but Ray Data couldn't import it. Install 'z' by running `pip install z`.N)	importlibimport_moduleImportError	__class__r@   )objr   r   s      r,   _check_importr   5  s    
''''' 
 
 
Q& Q Qf Q Q#)Q QELQ Q Q
 
 	

s	    1A	pathc                 \   t           j                            |           }|j        t          k    r|j        |j        z   } nl|j        t          k    r\t          j	        t                    j        j        dz  dz  }||j        |j        z   z  } t          |                                           } | S )zReturns the resolved path if the given path follows a Ray-specific custom
    scheme. Othewise, returns the path unchanged.

    The supported custom schemes are: "local", "example".
    examplesdata)urllibparseurlparsescheme_LOCAL_SCHEMEnetlocr   _EXAMPLE_SCHEMEpathlibPath__file__parentstrresolve)r   
parsed_uriexample_data_paths      r,   _resolve_custom_schemer   I  s     &&t,,JM)) :?2		o	-	-#L229@:MPVV J$5
$GH4<<>>""Kr.   pathsc                    t          | t                    r| g} t          | t          j                  rt          |           g} n_t          | t                    rt          d | D                       rt          d          t          |           dk    rt          d          t          d | D                       }|dk    r%|t          |           k     rt          d|            |t          |           k    S )zReturns True if the given paths are in local scheme.
    Note: The paths must be in same scheme, i.e. it's invalid and
    will raise error if paths are mixed with different schemes.
    c              3   B   K   | ]}t          |t                     V  d S r0   )r1   r   ).0ps     r,   	<genexpr>z#_is_local_scheme.<locals>.<genexpr>b  s/      +R+Rq
1c0B0B,B+R+R+R+R+R+Rr.   z6paths must be a path string or a list of path strings.r   zMust provide at least one path.c              3   p   K   | ]1}t           j                            |          j        t          k    V  2d S r0   )r   r   r   r   r   )r   r   s     r,   r   z#_is_local_scheme.<locals>.<genexpr>f  s:      TTdfl##D))0MATTTTTTr.   zHThe paths must all be local-scheme or not local-scheme, but found mixed )	r1   r   r   r   listanyrf   lensum)r   nums     r,   _is_local_schemer   Y  s	   
 % %&& <Ut$$ <+R+RE+R+R+R(R(R <QRRR	Uq:;;;
TTeTTT
T
TC
Qww3U##'$' '
 
 	
 #e**r.   r   c                 d    t          |           }t          |          dk    r|dd         dz   }|S )zGUtility to return a truncated object representation for error messages.   Nz...)r   r   )r   msgs     r,   _truncated_reprr   o  s3    
c((C
3xx#~~$3$i%Jr.   T)insert_after	directiveskip_matchesmessagepatternr   r   r   c          	         d|v rt          d|           | j                                        }|sd}|dk    r|r|}d}n|}|                    |          }	|}
|	dk    r|r|	t	          |          z   }n |d |	                             d          dz   }|d |         }||d          }|
dz  }
|
dk    rnR|s||	|z
  t	          |          z   d          }|                    |          }	|	dk    t          d| d| d	|           t          t          t          |	                                                    }t	          |          dk    r|}nAt          t          t          t          |	                                                              }t	          |          dk    sJ d
t	          |d                   t	          |d                                                   z
  z  }|                    d          }|0| d| d}|                    dd|z   dz             }||z   dz   |z   }n||                    dd|z             z   }||dk    z  rd|z   }| |dk    z  r|dz   }|||g}d                    |          | _        d S )N
zomessage shouldn't contain any newlines, since this function will insert its own linebreaks when text wrapping: rX   rY   rW   r   zPattern z not found after z skips in docstring  z.. z::
z    z

)rf   rC   stripfindr   rfindr   filterbool
splitlinesreversedlstripreplacejoin)r   r   r   r   r   r   docheadtailiskip_matches_leftoffsetafter_lineslinesindentbasepartss                    r,   _insert_doc_at_patternr   w  s    w@6=@ @
 
 	

 +



C "}}} IIg(2gg 2S\\) bqb--1=D=D" A%%! 9AJW5778		'""A 2gg" 7  \     
 vdDOO$5$56677K
;!VD(4??+<+<"="=>>??u::>>>>CaMMCa(9(9$:$::;FmmD!!G,,Y,,,//$v(?@@-')G37??4???w&() # 7"W./ # F" 7D!E''%..CKKKr.   F	Examples:if_more_than_readdatasource_metadataextra_conditiondelegatec                 |   	 d}|r||z   	n'| sd|z   	nd}|	|d| dz  }|||dz   z  }|dz   |z   		fd	}|S )
zAnnotate the function with an indication that it's a consumption API, and that it
    will trigger Dataset execution.
    zN will trigger execution of the lazy transformations performed on this dataset.zThis operationz.If this dataset consists of more than a read, Nz
or if the zC can't be determined from the metadata provided by the datasource, z, zthen this operationc                 0    t          | d           | S )Nnoter   r   r   r   r   )r   r   r   r   s    r,   wrapz_consumption_api.<locals>.wrap  s0    %	
 	
 	
 	
 
r.   r(   )
r   r   r   r   r   r   r   	conditionr   r   s
       ``   @r,   _consumption_apir     s    	 	  ;T/ ;"T)D	*80 8 8 8I &4//I33d:       Kr.   c                      t          |           dk    rEt          |          dk    r2t          | d                   r t                      | d                   S t          | i |S )r   rW   r   )r   callabler   argskwargss     r,   ConsumptionAPIr     sb     4yyA~~#f++**xQ/@/@*!!!$q'***T,V,,,r.   c                      d }|S )Annotate the function with an indication that it's a all to all API, and that it
    is an operation that requires all inputs to be materialized in-memory to execute.
    c                 .    t          | dddd           | S )NzXThis operation requires all inputs to be materialized in object store for it to execute.r   Fr   r   r   )r   s    r,   r   z_all_to_all_api.<locals>.wrap  s4    B  		
 		
 		
 		
 
r.   r(   )r   r   r   s      r,   _all_to_all_apir     s    
   Kr.   c                      t          |           dk    r(t          |          dk    rt          | d                   sJ  t                      | d                   S )r   rW   r   )r   r   r   r   s     r,   AllToAllAPIr     sT    
 t99>>c&kkQ..8DG3D3D..D?T!W%%%r.   fnr   fn_constructor_argscomputer   concurrencyc                    ddl m}m} ddlm} t          | |          rd}nd}|t          d|  d          |^|r,|d	k    st          ||          rt          d
|  d| d          |s,|dk    st          ||          rt          d|  d| d          |S |t                              d           t          |t                    rt          |          dvst          d |D                       st          d| d          |st          d|  d          t          |          dk    r ||d         |d                   S  ||d         |d         |d                   S t          |t                    r|r ||          S  ||          S t          d| d          |r |dd          S  |            S )aa  Get `ComputeStrategy` based on the function or class, and concurrency
    information.

    Args:
        fn: The function or generator to apply to a record batch, or a class type
            that can be instantiated to create such a callable.
        fn_constructor_args: Positional arguments to pass to ``fn``'s constructor.
        compute: Either "tasks" (default) to use Ray Tasks or an
                :class:`~ray.data.ActorPoolStrategy` to use an autoscaling actor pool.
        concurrency: The number of Ray workers to use concurrently.

    Returns:
       The `ComputeStrategy` for execution.
    r   )ActorPoolStrategyTaskPoolStrategy)CallableClassTFNzj``fn_constructor_args`` can only be specified if providing a callable class instance for ``fn``, but got: r]   tasksz!You specified the callable class z as your UDF with the compute z, but Ray Data can't schedule callable classes with the task pool strategy. To fix this error, pass an ActorPoolStrategy to compute or None to use the default compute strategy.actorszYou specified the function z, but Ray Data can't schedule regular functions with the actor pool strategy. To fix this error, pass a TaskPoolStrategy to compute or None to use the default compute strategy.zThe argument ``concurrency`` is deprecated in Ray 2.51. Please specify argument ``compute`` instead. For more information, see https://docs.ray.io/en/master/data/transforming-data.html#stateful-transforms.)rZ      c              3   @   K   | ]}t          |t                    V  d S r0   )r1   rd   )r   cs     r,   r   z'get_compute_strategy.<locals>.<genexpr>_  s=       9 9'(
1c""9 9 9 9 9 9r.   zG``concurrency`` is expected to be set as a tuple of integers, but got: zS``concurrency`` is set as a tuple of integers, but ``fn`` is not a callable class: zD. Use ``concurrency=n`` to control maximum number of workers to use.rZ   rW   )min_sizemax_size)r  r  initial_size)sizezU``concurrency`` is expected to be set as an integer or a tuple of integers, but got: )ray.data._internal.computer   r   ray.data.blockr   r1   rf   ri   rj   tupler   allrd   )r   r   r   r   r   r   r   is_callable_classs           r,   get_compute_strategyr
    s$   * ONNNNNNN,,,,,,"m$$ 
  "*F@BF F F  
  	w*W6F"G"G=B = == = =   # 	x:g7H#I#I=b = == = =   		 #	
 	
 	
 k5)) &	;v--S 9 9,79 9 9 6 6- !9*59 9 9   %  @02@ @ @   ;1$$(((^k!n    )((^(^!,Q   
 S)) 		  :((k::::''[9999>/:> > >  
  	&$$a$????##%%%r.   sc                 L    | d                                          | dd         z   S )zCapitalize the first letter of a string

    Args:
        s: String to capitalize

    Returns:
       Capitalized string
    r   rW   N)upperr  s    r,   capfirstr    s#     Q4::<<!ABB%r.   c                 f    d                     d |                     d          D                       S )zCapitalize a string, removing '_' and keeping camelcase.

    Args:
        s: String to capitalize

    Returns:
        Capitalized string with no underscores.
    rX   c              3   4   K   | ]}t          |          V  d S r0   )r  )r   xs     r,   r   zcapitalize.<locals>.<genexpr>  s(      5518A;;555555r.   _)r   splitr  s    r,   
capitalizer    s/     7755555555r.   dfpandas.DataFrame)r   r   c                     ddl m}m}m} |                    |                                           }|                                }| |j        ||                                          fS )Nr   BlockAccessorBlockExecStatsr   stats)	r  r  r  r   	for_blockto_arrowbuilder
from_blockbuild)r  r  r  r   blockr  s         r,   pandas_df_to_arrow_blockr$    sz     VUUUUUUUUU##B''0022E""$$E4)4U%++--PPPPPr.   ndarrayc                     ddl m}m}m} t	          j        |           |                                }|                    d| i          }| |j        ||	                                          fS )Nr   r  r   r  )
r  r  r  r   r   _set_currentr   batch_to_blockr!  r"  )r%  rQ   r  r  r   r  r#  s          r,   ndarray_to_blockr)    s     VUUUUUUUUUS!!!""$$E((&'):;;E4)4U%++--PPPPPr.   table)zpyarrow.Tabler  r   c                     ddl m}m} |                                } |j        | |                                          S )Nr   )r  r   r  )r  r  r   r   r!  r"  )r*  r  r   r  s       r,   get_table_block_metadata_schemar,    sO     GFFFFFFF""$$E-"-e5;;==IIIIr.   block_metadata_with_schemasr   c                     g }| D ]5}|j         ,|j        |j        dk    r|                    |j                    6t          |          S )a\  For the input list of BlockMetadata, return a unified schema of the
    corresponding blocks. If the metadata have no valid schema, returns None.

    Args:
        block_metadata_with_schemas: List of BlockMetadata to unify

    Returns:
        A unified schema of the input list of schemas, or None if no valid schemas
        are provided.
    Nr   schemanum_rowsappendunify_schemas_with_validation)r-  schemas_to_unifyms      r,   unify_block_metadata_schemar6    sX    & ( . .8QZ%71:>>##AH---()9:::r.   r4  c                     | rPddl m} 	 dd ln# t          $ r d Y nw xY w(t	          fd| D                       r || d          S | d         S d S )Nr   )unify_schemasc              3   B   K   | ]}t          |j                  V  d S r0   )r1   r   )r   r  pas     r,   r   z0unify_schemas_with_validation.<locals>.<genexpr>  s/      !U!Uq*Q	":":!U!U!U!U!U!Ur.   T)promote_types).ray.data._internal.arrow_ops.transform_pyarrowr8  rG   r   r  )r4  r8  r:  s     @r,   r3  r3    s      #PPPPPP	      	 	 	BBB	 >c!U!U!U!UDT!U!U!UUU> =!1FFFF  ""4s    ref_bundlesr   c                     g }| D ]O}|j         F|                                |                                dk    r|                    |j                    Pt          |          S )Nr   r/  )r=  r4  r   s      r,   unify_ref_bundles_schemar?    sk      3 3=$OO%):):Q)>)>##FM222()9:::r.   desiredsort_keyr   c           	         |                                 }|                                }dt          |           }}t          t          |                    D ]}||k    r|c S ||         }| |                                         ||         }	||         }
|
t
          }
|}||         du r}t          j        t          |	          dz
  dd          }|t          |	          t          j        |	|
d|          z
  z   }|t          |	          t          j        |	|
d|          z
  z   }|t          j        |	|
d	          z   }|t          j        |	|
d	          z   }|d         du r|n|S )
av  For the given block, find the index where the desired value should be
    added, to maintain sorted order.

    We do this by iterating over each column, starting with the primary sort key,
    and binary searching for the desired value in the column. Each binary search
    shortens the "range" of indices (represented by ``left`` and ``right``, which
    are indices of rows) where the desired value could be inserted.

    Args:
        table: The block to search in.
        desired: A single tuple representing the boundary to partition at.
            ``len(desired)`` must be less than or equal to the number of columns
            being sorted.
        sort_key: The sort key to use for sorting, providing the columns to be
            sorted and their directions.

    Returns:
        The index where the desired value should be inserted to maintain sorted
        order.
    r   NTrW   rY   right)sidesorterleft)rD  )	get_columnsget_descendingr   rangeto_numpyNULL_SENTINELra   arangesearchsorted)r*  r@  rA  columns
descendingrF  rC  r   col_namecol_valsdesired_valprevleftrE  s                r,   find_partition_indexrT    s   2 ""$$G((**JSZZ%D3w<<   (T (T5==LLL1:?++--d5j9aj 'Ka=D   Ys8}}q0"b99FH/ !	  D H/!	  EE boh&QQQQDrx7SSSSEEqMT))55t3r.   
class_namec                     ddl m} |                     d          }t          |          dk     rt	          d|  d          d                    |dd                   }|d         }t           ||          |          S )zGet Python attribute from the provided class name.

    The caller needs to make sure the provided class name includes
    full module name, and can be imported successfully.
    r   )r   r]   rZ   zCannot create object from NrY   )r   r   r  r   rf   r   getattr)rU  r   r   module_nameattribute_names        r,   get_attribute_from_class_namerZ  F  s     ('''''S!!E
5zzA~~CjCCCDDD((5":&&K2YN==--~>>>r.   TUc                   b     e Zd ZdZdZ	 d
dedeej                 f fdZ	d fd	Z
d fd		Z xZS )_InterruptibleQueueznExtension of Python's `queue.Queue` providing ability to get interrupt its
    method callers in other threadsg      ?Nr  interrupted_eventc                     t                                          |           |pt          j                    | _        d S )N)r_   )super__init__	threadingEvent_interrupted_event)r*   r  r_  r   s      r,   rb  z_InterruptibleQueue.__init__a  s;     	***"3"Hy7H7Hr.   Tc                    |r|"t                                          ||          S 	 | j                                        rt	                      	 t                                          d| j                  S # t          $ r Y nw xY waNT)r#  timeout)ra  r{   re  is_setInterruptedError INTERRUPTION_CHECK_FREQUENCY_SECr   )r*   r#  rh  r   s      r,   r{   z_InterruptibleQueue.getg  s     	/+77;;ug...		&--// )&(((ww{{(M #       		s   'A9 9
BBc                     |r|%t                                          |||           d S 	 | j                                        rt	                      	 t                                          |d| j                   d S # t          $ r Y nw xY wdrg  )ra  putre  ri  rj  rk  r   )r*   itemr#  rh  r   s       r,   rm  z_InterruptibleQueue.put~  s     	+GGKKeW---F
	&--// )&(((d.S        
	s   )A? ?
BBr0   )TN)r@   rA   rB   rC   rk  rd   r   rc  rd  rb  r{   rm  __classcell__r   s   @r,   r^  r^  [  s        ' ' (+$ MQI II080II I I I I I     .         r.   r^  rW   base_iteratorpreserve_orderingnum_workersbuffer_sizec              #      K   t          j        dd          dk     rt          d          t          j                    |rdn
dz   z  dfdt                    D             fdt                    D              fd}fd	t          j        |d
 d          }|                                 fdt                    D             }|D ]}|                                 	 }	t          |	          dk    rxg |	D ]P}
|
	                                }t          |t                    r||t          u r                    |
           L|V  Qrfd|	D             }	t          |	          dk    x                                 dS #                                  w xY w)a=  Returns a generator (iterator) mapping items from the
    provided iterator applying provided transformation in parallel (using a
    thread-pool).

    NOTE: There are some important constraints that needs to be carefully
          understood before using this method

        1. If `preserve_ordering` is True
            a. This method would unroll input iterator eagerly (irrespective
                of the speed of resulting generator being consumed). This is necessary
                as we can not guarantee liveness of the algorithm AND preserving of the
                original ordering at the same time.

            b. Resulting ordering of the output will "match" ordering of the input, ie
               that:
                    iterator = [A1, A2, ... An]
                    output iterator = [map(A1), map(A2), ..., map(An)]

        2. If `preserve_ordering` is False
            a. No more than `num_workers * (queue_buffer_size + 1)` elements will be
                fetched from the iterator

            b. Resulting ordering of the output is unspecified (and is
            non-deterministic)

    Args:
        base_iterator: Iterator yielding elements to map
        fn: Transformation to apply to each element
        preserve_ordering: Whether ordering has to be preserved
        num_workers: The number of threads to use in the threadpool (defaults to 1)
        buffer_size: Number of objects to be buffered in its input/output
                     queues (per queue; defaults to 2). Total number of objects held
                     in memory could be calculated as:

                        num_workers * buffer_size * 2 (input and output)

    Returns:
        An generator (iterator) of the elements corresponding to the source
        elements mapped by provided transformation (while *preserving the ordering*)
    r   irW   z&Size of threadpool must be at least 1.rY   c                 0    g | ]}t                    S r(   r^  )r   r  input_queue_buf_sizer_  s     r,   
<listcomp>z"make_async_gen.<locals>.<listcomp>  s5        	02CDD  r.   c                 0    g | ]}t                    S r(   rw  )r   r  rt  r_  s     r,   ry  z"make_async_gen.<locals>.<listcomp>  s2       @AK):;;  r.   c                     	 t                    D ]#\  } }| z                               |           $t                    D ]%} | z                               t                     &d S # t          $ r Y d S t
          $ rN}t                              d|           t                    D ]}|                    |           Y d }~d S d }~ww xY w)Nz#Caught exception in filling worker!exc_info)		enumeraterm  rI  SENTINELrj  	Exceptionri   rj   r   )	idxrn  eoutput_queuerq  input_queuesnum_input_queuesrs  output_queuess	       r,   _run_filling_workerz+make_async_gen.<locals>._run_filling_worker  s/   	$ '}55 ? ?	TS#33488>>>> [)) C CS#33488BBBBC C   	 	 	DD 	$ 	$ 	$NN@1NMMM
 !) 7 7 $ $  ####$ $ $ $ $ $	$s   A(A- -
C:	CACCc                 b   	 t          | j        t                    } |          D ]}|                    |           |                    t                     d S # t          $ r Y d S t
          $ r<}t                              d|           |                    |           Y d }~d S d }~ww xY w)Nz(Caught exception in transforming worker!r|  )iterr{   r  rm  rj  r  ri   rj   )input_queuer  input_queue_iterresultr  r   s        r,   _run_transforming_workerz0make_async_gen.<locals>._run_transforming_worker  s    	   $KOX>>"-.. ) )  (((( X&&&&& 	 	 	DD 	  	  	 NNEPQNRRR Q		 s   AA 
B.)	B.21B))B.zmap_tp_filling_worker-T)targetnamedaemonc           	      n    g | ]1}t          j        d  d| |z           |         fd          2S )zmap_tp_transforming_worker--T)r  r  r   r  )rc  Thread)r   r  r  gen_idr  r  r  s     r,   ry  z"make_async_gen.<locals>.<listcomp>2  sn     # # #  	+=v====s%556c8JK		
 	
 	
# # #r.   c                     g | ]}|v|	S r(   r(   )r   qempty_queuess     r,   ry  z"make_async_gen.<locals>.<listcomp>c  s*     + + +!<:O:OA:O:O:Or.   N)randomrandintrf   rc  rd  rI  r  startr   r{   r1   r  r  r2  set)rq  r   rr  rs  rt  r  filling_worker_threadtransforming_worker_threadstremaining_output_queuesr  rn  r  r  r  rx  r  r_  r  r  s   `` ``       @@@@@@@@r,   make_async_genr    s     ` ^Ay))FQABBB ")).  !& +a;>    '((  L
    EJ;EWEW  M
$ $ $ $ $ $ $ $ $:         0 &,".f..  
 !!!# # # # # # # # %%# # # )  					2 "/)**Q.. L !8 
 
#''))dI.. J8## ''5555JJJJ + + + +6+ + +'? )**Q..\ 	s   
BF- -Gc            	       X    e Zd Z	 	 ddej        dededefdZd Zd	e	d
e
fdZd Zd ZdS )RetryingContextManager
       fcontextmax_attemptsmax_backoff_sc                 >    || _         || _        || _        || _        d S r0   )_f_data_context_max_attempts_max_backoff_s)r*   r  r  r  r  s        r,   rb  zRetryingContextManager.__init__v  s)     $)+r.   c                 V    d| j         j         d| j                                         dS )N<z fs=>)r   r@   handlerunwrapr>   s    r,   __repr__zRetryingContextManager.__repr__  s.    H4>*HH0C0C0E0EHHHHr.   	operationdescriptionc                 R    t          ||| j        j        | j        | j                  S z"Execute an operation with retries.)r  matchr  r  )r   r  retried_io_errorsr  r  r*   r  r  s      r,   _retry_operationz'RetryingContextManager._retry_operation  s4    #$6+-
 
 
 	
r.   c                 B    |                      | j        j        d          S )Nzenter file context)r  r  	__enter__r>   s    r,   r  z RetryingContextManager.__enter__  s    $$TW%68LMMMr.   c                 F                            fdd           d S )Nc                  <    j                                        S r0   )r  __exit__)exc_type	exc_valuer*   	tracebacks   r,   <lambda>z1RetryingContextManager.__exit__.<locals>.<lambda>  s    DG$$Xy)DD r.   zexit file contextr  )r*   r  r  r  s   ````r,   r  zRetryingContextManager.__exit__  s@    DDDDDDD	
 	
 	
 	
 	
r.   Nr  r  )r@   rA   rB   rG   
NativeFiler   rd   rb  r  r	   r   r  r  r  r(   r.   r,   r  r  u  s        
 
, 
,
, 
, 	
,
 
, 
, 
, 
,I I I
( 
 
 
 
 
N N N
 
 
 
 
r.   r  c            
            e Zd Zd fdZedee         fd            Zd Ze		 	 dd	d
dee         de
de
fd            Zd Ze	d             Z xZS )RetryingPyFileSystemr  RetryingPyFileSystemHandlerc                     t          |t                    st          d          sJ t                                          |           d S )Nz-handler must be a RetryingPyFileSystemHandler)r1   r  rf   ra  rb  )r*   r  r   s     r,   rb  zRetryingPyFileSystem.__init__  sJ    '#>?? 	OMNNNNN!!!!!r.   rD   c                     | j         j        S r0   )r  _retryable_errorsr>   s    r,   retryable_errorsz%RetryingPyFileSystem.retryable_errors  s    |--r.   c                 4    | j                                         S r0   )r  r  r>   s    r,   r  zRetryingPyFileSystem.unwrap  s    |""$$$r.   r  r  fspyarrow.fs.FileSystemr  r  r  c                 j    t          |t                    r|S t          ||||          } | |          S r0   )r1   r  r  )clsr  r  r  r  r  s         r,   r   zRetryingPyFileSystem.wrap  sE     b.// 	I- ,
 
 s7||r.   c                      | j         | j        ffS r0   )r   r  r>   s    r,   
__reduce__zRetryingPyFileSystem.__reduce__  s    00r.   c                      | | S r0   r(   )r  states     r,   __setstate__z!RetryingPyFileSystem.__setstate__  s     sE{r.   )r  r  r  )r@   rA   rB   rb  propertyr   r   r  r  classmethodrd   r   r  r  ro  rp  s   @r,   r  r    s        " " " " " "
 .$s) . . . X.% % % 
  # s) 	
    [1 1 1   [    r.   r  c            	       P   e Zd ZdZ e            ddfdddee         dedefd	Zd
e	defdZ
d ZdedefdZdedefdZdefdZd)dedefdZdefdZd ZdddefdZdee         fdZd Zd  Zdedefd!Zdedefd"Z	 d*dedd$fd%Zdedd$fd&Z	 d*dedd$fd'Zdedd$fd(Zd#S )+r  zWrapper for filesystem objects that adds retry functionality for file operations.

    This class wraps any filesystem object and adds automatic retries for common
    file operations that may fail transiently.
    r  r  r  r  r  r  r  c                 |    t          |t                    r
J d            || _        || _        || _        || _        dS )a  Initialize the retrying filesystem wrapper.

        Args:
            fs: The underlying filesystem to wrap
            context: DataContext for retry settings
            max_attempts: Maximum number of retry attempts
            max_backoff_s: Maximum backoff time in seconds
        z"Cannot wrap a RetryingPyFileSystemN)r1   r  _fsr  r  r  )r*   r  r  r  r  s        r,   rb  z$RetryingPyFileSystemHandler.__init__  sZ     $
 
 	0 	0/	0 	0 
 !1)+r.   r  r  c                 H    t          ||| j        | j        | j                  S r  )r   r  r  r  r  s      r,   r  z,RetryingPyFileSystemHandler._retry_operation  s1    #(+-
 
 
 	
r.   c                     | j         S r0   )r  r>   s    r,   r  z"RetryingPyFileSystemHandler.unwrap  s	    xr.   srcdestc                 J                            fdd d           S )zCopy a file.c                  :    j                                        S r0   )r  	copy_filer  r*   r  s   r,   r  z7RetryingPyFileSystemHandler.copy_file.<locals>.<lambda>  s    DH&&sD11 r.   zcopy file from  to r  r*   r  r  s   ```r,   r  z%RetryingPyFileSystemHandler.copy_file  sB    $$1111113TS3T3Td3T3T
 
 	
r.   r   	recursivec                 D                            fdd           S )z&Create a directory and subdirectories.c                  <    j                                        S )N)r  )r  
create_dir)r   r  r*   s   r,   r  z8RetryingPyFileSystemHandler.create_dir.<locals>.<lambda>  s    DH''	'BB r.   zcreate directory r  )r*   r   r  s   ```r,   r  z&RetryingPyFileSystemHandler.create_dir  s:    $$BBBBBB&&&
 
 	
r.   c                 @                            fdd           S )z1Delete a directory and its contents, recursively.c                  8    j                                        S r0   )r  
delete_dirr   r*   s   r,   r  z8RetryingPyFileSystemHandler.delete_dir.<locals>.<lambda>  s    DH''-- r.   zdelete directory r  r*   r   s   ``r,   r  z&RetryingPyFileSystemHandler.delete_dir  s6    $$-----/I4/I/I
 
 	
r.   Fmissing_dir_okc                 D                            fdd           S )z+Delete a directory's contents, recursively.c                  <    j                                        S )N)r  r  delete_dir_contents)r  r   r*   s   r,   r  zARetryingPyFileSystemHandler.delete_dir_contents.<locals>.<lambda>   s    DH00n0UU r.   zdelete directory contents r  )r*   r   r  s   ```r,   r  z/RetryingPyFileSystemHandler.delete_dir_contents  s:    $$UUUUUU///
 
 	
r.   c                 @                            fdd           S )zDelete a file.c                  8    j                                        S r0   )r  delete_filer  s   r,   r  z9RetryingPyFileSystemHandler.delete_file.<locals>.<lambda>  s    DH((.. r.   zdelete file r  r  s   ``r,   r  z'RetryingPyFileSystemHandler.delete_file  s6    $$.....0Et0E0E
 
 	
r.   c                 6                            fdd          S )Nc                  <     j                             dd          S )N/T)accept_root_dirr  r>   s   r,   r  zFRetryingPyFileSystemHandler.delete_root_dir_contents.<locals>.<lambda>  s    DH00d0KK r.   zdelete root dir contentsr  r>   s   `r,   delete_root_dir_contentsz4RetryingPyFileSystemHandler.delete_root_dir_contents
  s*    $$KKKK&
 
 	
r.   r+   rD   c                 6    | j                             |          S )z'Test if this filesystem equals another.)r  equalsr)   s     r,   r  z"RetryingPyFileSystemHandler.equals  s    xu%%%r.   r   c                 @                            fdd           S )zGet info for the given files.c                  8    j                                        S r0   r  get_file_info)r   r*   s   r,   r  z;RetryingPyFileSystemHandler.get_file_info.<locals>.<lambda>  s    DH**511 r.   get file info for r  )r*   r   s   ``r,   r  z)RetryingPyFileSystemHandler.get_file_info  s6    $$11111(((
 
 	
r.   c                 @                            fdd           S )Nc                  8    j                                        S r0   r  )selectorr*   s   r,   r  zDRetryingPyFileSystemHandler.get_file_info_selector.<locals>.<lambda>  s    DH**844 r.   r  r  )r*   r  s   ``r,   get_file_info_selectorz2RetryingPyFileSystemHandler.get_file_info_selector  s6    $$44444+++
 
 	
r.   c                     dS )Nr  r(   r>   s    r,   get_type_namez)RetryingPyFileSystemHandler.get_type_name!  s    %%r.   c                 J                            fdd d           S )z"Move / rename a file or directory.c                  :    j                                        S r0   )r  mover  s   r,   r  z2RetryingPyFileSystemHandler.move.<locals>.<lambda>'  s    DHMM#t,, r.   z
move from r  r  r  s   ```r,   r  z RetryingPyFileSystemHandler.move$  sB    $$,,,,,,.J3.J.JD.J.J
 
 	
r.   c                 @                            fdd           S )zNormalize filesystem path.c                  8    j                                        S r0   )r  normalize_pathr  s   r,   r  z<RetryingPyFileSystemHandler.normalize_path.<locals>.<lambda>-  s    DH++D11 r.   znormalize path r  r  s   ``r,   r  z*RetryingPyFileSystemHandler.normalize_path*  s6    $$111113KT3K3K
 
 	
r.   Nzpyarrow.NativeFilec                 D                            fdd           S )zOpen an output stream for appending.

        Compression is disabled in this method because it is handled in the
        PyFileSystem abstract class.
        c                  >    j                             d            S N)compressionmetadata)r  open_append_streamr  r   r*   s   r,   r  z@RetryingPyFileSystemHandler.open_append_stream.<locals>.<lambda>;  '    DH// ! 0   r.   zopen append stream for r  r*   r   r  s   ```r,   r  z.RetryingPyFileSystemHandler.open_append_stream0  J     $$     
 -d,,
 
 	
r.   c                 @                            fdd           S )zOpen an input stream for sequential reading.

        Compression is disabled in this method because it is handled in the
        PyFileSystem abstract class.
        c                  <    j                              d           S )N)r  )r  open_input_streamr  s   r,   r  z?RetryingPyFileSystemHandler.open_input_stream.<locals>.<lambda>M  s    DH..t.FF r.   zopen input stream for r  r  s   ``r,   r  z-RetryingPyFileSystemHandler.open_input_streamC  s8     $$FFFFF+T++
 
 	
r.   c                 D                            fdd           S )zOpen an output stream for sequential writing."

        Compression is disabled in this method because it is handled in the
        PyFileSystem abstract class.
        c                  >    j                             d            S r  )r  open_output_streamr  s   r,   r  z@RetryingPyFileSystemHandler.open_output_stream.<locals>.<lambda>\  r  r.   zopen output stream for r  r  s   ```r,   r   z.RetryingPyFileSystemHandler.open_output_streamQ  r  r.   c                 @                            fdd           S )z-Open an input file for random access reading.c                  8    j                                        S r0   )r  open_input_filer  s   r,   r  z=RetryingPyFileSystemHandler.open_input_file.<locals>.<lambda>g  s    DH,,T22 r.   zopen input file r  r  s   ``r,   r#  z+RetryingPyFileSystemHandler.open_input_filed  s6    $$222224Mt4M4M
 
 	
r.   )Fr0   )r@   rA   rB   rC   r  r   r   rd   rb  r	   r  r  r  r   r  r  r  r  r  r  r  r  r
  r  r  r  r  r   r#  r(   r.   r,   r  r    s         ',egg, ,#, s), 	,
 , , , ,.
( 
 
 
 
 
  
S 
 
 
 
 

s 
t 
 
 
 

s 
 
 
 

 
 
T 
 
 
 

 
 
 
 

 
 
&3 & & & & &
49 
 
 
 

 
 
& & &
 
3 
 
 
 

3 
3 
 
 
 
 
 

 
	
 
 
 
&

 

 
 
 
" 
 

 
	
 
 
 
&
C 
,@ 
 
 
 
 
 
r.   r  r  r  )r  r  r  iterable_factoryr  r  r  r  c             #     K   |dk    sJ d| d            d}t          |          D ]}	  |             }t          |          D ]\  }}	||k     r|dz  }|	V   dS # t          $ r|du pt          fd|D                       }
|
ro|dz   |k     rft	          d|dz   z  |          t          j                    z  }t                              d|dz    d	| d
| d           t          j	        |           ndY ddww xY wdS )a  Iterate through an iterable with retries.

    If the iterable raises an exception, this function recreates and re-iterates
    through the iterable, while skipping the items that have already been yielded.

    Args:
        iterable_factory: A no-argument function that creates the iterable.
        match: A list of strings to match in the exception message. If ``None``, any
            error is retried.
        description: An imperitive description of the function being retried. For
            example, "open the file".
        max_attempts: The maximum number of attempts to retry.
        max_backoff_s: The maximum number of seconds to backoff.
    rW   z%`max_attempts` must be positive. Got r]   r   Nc              3   :   K   | ]}|t                    v V  d S r0   )r   )r   r   r  s     r,   r   z%iterate_with_retry.<locals>.<genexpr>  s.      /W/Wg3q660A/W/W/W/W/W/Wr.   rZ   z	Retrying z attempts to z after z	 seconds.)
rI  r~  r  r   rn   r  ri   rp   timesleep)r$  r  r  r  r  num_items_yieldedattemptiterable
item_indexrn  is_retryablebackoffr  s               @r,   iterate_with_retryr/  k  s     , 1UlUUU&& " "	"''))H$-h$7$7   
D 111!Q&!



FF 	" 	" 	" D=WC/W/W/W/WQV/W/W/W,W,WL 	"!l : :qWq[1MBBV]__T0	 0 0 0 0$0 0 0   
7####T! $####	"" "s   /A
D(BD  D	num_bytesc                     | dk    rt          | dz             d}n1| dk    rt          | dz             d}nt          | dz             d}|S )Ng    eAGBg    .AMBg     @@KB)round)r0  num_bytes_strs     r,   #convert_bytes_to_human_readable_strr7    sp    C S11555	c		 S11555 S11555r.   )num_rows_per_filemin_rows_per_filemax_rows_per_filer8  r9  r:  c                    | 4ddl }|                    dt          d           |t          d          | }||dk    rt          d          ||dk    rt          d          ||||k    rt          d	| d
| d          ||fS )a{  Helper method to validate and handle rows per file arguments.

    Args:
        num_rows_per_file: Deprecated parameter for number of rows per file
        min_rows_per_file: New parameter for minimum rows per file
        max_rows_per_file: New parameter for maximum rows per file

    Returns:
        A tuple of (effective_min_rows_per_file, effective_max_rows_per_file)
    Nr   zk`num_rows_per_file` is deprecated and will be removed in a future release. Use `min_rows_per_file` instead.r   )
stacklevelz~Cannot specify both `num_rows_per_file` and `min_rows_per_file`. Use `min_rows_per_file` as `num_rows_per_file` is deprecated.z,max_rows_per_file must be a positive integerz,min_rows_per_file must be a positive integerzmin_rows_per_file (z,) cannot be greater than max_rows_per_file (r\   )warningswarnDeprecationWarningrf   )r8  r9  r:  r=  s       r,   _validate_rows_per_file_argsr@    s     $/	 	 	
 	
 	
 (P   . $):a)?)?GHHH $):a)?)?GHHH 	%) 1117"3 7 7"37 7 7
 
 	

 ///r.   c                 x    	 t          | t                    ot          j        |           S # t          $ r Y dS w xY w)z+Returns true if provide value is ``np.nan``F)r1   floatra   rb   	TypeErrorvalues    r,   is_nanrF    sF    %'';BHUOO;   uus   (+ 
99rE  c                 (    | du pt          |           S )zYThis generalization of ``is_nan`` util qualifying both None and np.nan
    as null valuesN)rF  rD  s    r,   r2   r2     s     D=)F5MM)r.   c                     t          |           t          |          k    rdS t          | |          D ],\  }}t          |          rt          |          s	||k    s dS -dS )NFT)r   ziprF  )keys1keys2k1k2s       r,   
keys_equalrN    sk    
5zzSZZueU##  B 	r

 	rRxx554r.   c                      t          j                                                    } t           j        j                                        }| |v sJ d|  d|             ||          d         S )zsReturn the total object store memory on the current node.

    This function incurs an RPC. Use it cautiously.
    zExpected node 'z' to be in resources: object_store_memory)rK   get_runtime_contextget_node_idrL   r  total_resources_per_node)node_idrS  s     r,   get_total_obj_store_mem_on_noderU    st    
 %''3355G"|1JJLL++++RRR8PRR 	,++#G,-BCCr.   c                       e Zd ZdZdee         fdZd Zd Zd Z	dee
         fdZd	 Zdeej        ej        f         fd
Zd Zde
fdZeej        defd                        ZdS )MemoryProfilera$  A context manager that polls the USS of the current process.

    This class approximates the max USS by polling memory and subtracting the amount
    of shared memory from the resident set size (RSS). It's not a
    perfect estimate (it can underestimate, e.g., if you use Torch tensors), but
    estimating the USS is much cheaper than computing the actual USS.

    .. warning::

        This class only works with Linux. If you use it on another platform,
        `estimate_max_uss` always returns ``None``.

    Example:

        .. testcode::

            with MemoryProfiler(poll_interval_s=1.0) as profiler:
                for i in range(10):
                    ...  # Your code here
                    print(f"Max USS: {profiler.estimate_max_uss()}")
                    profiler.reset()
    poll_interval_sc                     || _         t          j        t          j                              | _        d| _        t          j                    | _	        d| _
        d| _        dS )z

        Args:
            poll_interval_s: The interval to poll the USS of the process. If `None`,
                this class won't poll the USS.
        N)_poll_interval_spsutilProcessosgetpid_process_max_ussrc  Lock_max_uss_lock_uss_poll_thread_stop_uss_poll_event)r*   rX  s     r,   rb  zMemoryProfiler.__init__  sP     !0ry{{33&^-- $$(!!!r.   c                     d| j          dS )NzMemoryProfiler(poll_interval_s=r\   )rZ  r>   s    r,   r  zMemoryProfiler.__repr__*  s    I1FIIIIr.   c                 ~    |                                  r(| j        !|                                 \  | _        | _        | S r0   )_can_estimate_ussrZ  _start_uss_poll_threadrc  rd  r>   s    r,   r  zMemoryProfiler.__enter__-  sG    !!## 	.(=(I ++--%) r.   c                 @    | j         |                                  d S d S r0   )rc  _stop_uss_poll_thread)r*   r  exc_valexc_tbs       r,   r  zMemoryProfiler.__exit__6  s+     ,&&((((( -,r.   rD   c                 8   |                                  s| j        J dS | j        5  | j        |                                 | _        n,t	          | j        |                                           | _        ddd           n# 1 swxY w Y   | j        J | j        S )zGet an estimate of the max USS of the current process.

        Returns:
            An estimate of the max USS of the process in bytes, or ``None`` if an
            estimate isn't available.
        N)rg  r`  rb  _estimate_ussrc   r>   s    r,   estimate_max_usszMemoryProfiler.estimate_max_uss:  s     %%'' 	=(((4 	I 	I}$ $ 2 2 4 4 #DM43E3E3G3G H H		I 	I 	I 	I 	I 	I 	I 	I 	I 	I 	I 	I 	I 	I 	I }(((}s   ABBBc                 T    | j         5  d | _        d d d            d S # 1 swxY w Y   d S r0   )rb  r`  r>   s    r,   resetzMemoryProfiler.resetN  sv     	! 	! DM	! 	! 	! 	! 	! 	! 	! 	! 	! 	! 	! 	! 	! 	! 	! 	! 	! 	!s   !!c                       j         J                                  sJ t          j                     fd}t          j        |d          }|                                 |fS )Nc                  h                                    s j        5   j                                          _        n,t	           j                                                    _        d d d            n# 1 swxY w Y                        j                                                    d S d S r0   )ri  rb  r`  rn  rc   waitrZ  )r*   
stop_events   r,   poll_ussz7MemoryProfiler._start_uss_poll_thread.<locals>.poll_ussX  s    '')) 7' Q Q},(,(:(:(<(<(+DM4;M;M;O;O(P(P	Q Q Q Q Q Q Q Q Q Q Q Q Q Q Q
  5666 !'')) 7 7 7 7 7s   AA77A;>A;T)r  r  )rZ  rg  rc  rd  r  r  )r*   rv  threadru  s   `  @r,   rh  z%MemoryProfiler._start_uss_poll_threadR  s    $000%%'''''_&&
	7 	7 	7 	7 	7 	7 !$???z!!r.   c                 |    | j         4| j                                          | j                                         d S d S r0   )rd  r  rc  r   r>   s    r,   rj  z$MemoryProfiler._stop_uss_poll_threadf  sC    $0%))+++!&&((((( 10r.   c                 ~    |                                  sJ | j                                        }|j        |j        z
  S r0   )rg  r_  memory_inforssshared)r*   rz  s     r,   rn  zMemoryProfiler._estimate_ussk  s?    %%'''''m//11 !333r.   c                  0    t          j                    dk    S )NLinux)platformsystemr(   r.   r,   rg  z MemoryProfiler._can_estimate_usss  s       G++r.   N)r@   rA   rB   rC   r   rB  rb  r  r  r  rd   ro  rq  r   rc  r  rd  rh  rj  rn  staticmethod	functoolscacher   rg  r(   r.   r,   rW  rW    s"        .) ) ) ) ) J J J  ) ) )(3-    (! ! !"i.>	.O(P " " " "() ) )
4s 4 4 4 4 _,t , , , _ \, , ,r.   rW  r   .c                 T    t          t          t          t          |                      S )zUnzips a list of tuples into a tuple of lists

    Args:
        data: A list of tuples to unzip.

    Returns:
        A tuple of lists, where each list corresponds to one element of the tuples in
        the input list.
    )r  mapr   rI  )r   s    r,   unzipr  z  s      T3:&&'''r.   c                    |                                  } fdg }g }t          | j                  }|D ]}| |         j        dk    rhdt	          j                    j         d| d}| |                                       | |<   |                    |           |                    |           {|                    |           | 	                    |          }|r|
                    |          }|S )zESort DataFrame by columns and rows, and also handle unhashable types.c                    t          | t          t          j        f          rt	          fd| D                       S t          | t
                    r:t	          t          fd|                                 D                                 S | S )Nc              3   .   K   | ]} |          V  d S r0   r(   )r   r   to_sortables     r,   r   z0_sort_df.<locals>.to_sortable.<locals>.<genexpr>  s+      33AQ333333r.   c              3   8   K   | ]\  }}| |          fV  d S r0   r(   )r   kvr  s      r,   r   z0_sort_df.<locals>.to_sortable.<locals>.<genexpr>  s4      JJ1KKNN 3JJJJJJr.   )r1   r   ra   r%  r  dictsorteditems)r  r  s    r,   r  z_sort_df.<locals>.to_sortable  s    a$
+,, 	43333333333a 	LJJJJ		JJJJJKKKr.   object__sort_proxy_r  __)rN  )copyr  rN  dtypeuuiduuid4hexr  r2  sort_valuesdrop)r  	sort_cols	temp_colsrN  coltemp_col	sorted_dfr  s          @r,   _sort_dfr    s   	B     IIRZ  G 	" 	"c7=H$$ Btz||'7AA#AAAHc7;;{33BxLX&&&X&&&&S!!!!y))I 6NN9N55	r.   actualexpectedc                 `   t          |           t          |          k    rdS t          |           dk    rdS 	 t          j                            t	          |                               d          t	          |                              d          d           dS # t          $ r Y dS w xY w)a  Check if two DataFrames have the same rows.

    Unlike the built-in pandas equals method, this function ignores indices and the
    order of rows. This is useful for testing Ray Data because its interface doesn't
    usually guarantee the order of rows.
    Fr   T)r  )check_dtype)r   pdtestingassert_frame_equalr  reset_indexAssertionError)r  r  s     r,   	rows_samer    s     6{{c(mm##u
6{{at

%%V((d(33X***55 	& 	
 	
 	

 t   uus   A$B 
B-,B-num_cpusnum_gpusmemoryray_remote_argsc                 X    |                                 }| | |d<   |||d<   |||d<   |S )az  Convert the given resources to Ray remote args.

    Args:
        num_cpus: The number of CPUs to be added to the Ray remote args.
        num_gpus: The number of GPUs to be added to the Ray remote args.
        memory: The memory to be added to the Ray remote args.
        ray_remote_args: The Ray remote args to be merged.

    Returns:
        The converted arguments.
    Nr  r  r  )r  )r  r  r  r  s       r,   "merge_resources_to_ray_remote_argsr    sK    " &**,,O&.
#&.
#$*!r.   c                     dd l }d }	 |j                            |           j        }nB# t          t
          f$ r. dd l} |j        |           j        }|r|dd          dk    rd}Y nw xY w|S )Nr   rW   snappy)	rG   Codecdetectr  rf   rC  r   r   suffix)r   r:  r  r   r  s        r,   infer_compressionr    s    K	#hood++0	" # # #d##* 	#fQRRjH,,"K# s   ( <A'&A')NNNN)FNNNr   F)NNN)rW   rW   )r  r   loggingr]  r   r  r  r^   rc  r'  urllib.parser   r  queuer   r   r   typesr   typingr   r   r	   r
   r   r   r   r   r   r   r   r   numpyra   pandasr  rG   
pyarrow.fsrK   ray._common.retryr   ray.data.contextr   r   r   ray.util.annotationsr   r[  r  r   'ray.data._internal.execution.interfacesr   2ray.data._internal.planner.exchange.sort_task_specr   r  r   r   r   r   ray.data.datasourcer   r    ray.util.placement_groupr!   	getLoggerr@   ri   KiBro   GiBr  r  r   r   r   
LazyModuler$   __annotations__r&   rK  rI   rN   rd   r   ru   rm   r   r   r   r   r   r   r   r   r   r   r   r
  r  r  r$  r%  r)  r,  r6  r3  r?  rB  rT  rZ  r[  r\  r^  r  r  r  PyFileSystemr  FileSystemHandlerr  r/  r7  r@  rF  r2   rN  rU  rW  r  	DataFramer  r  r  r  r(   r.   r,   <module>r     ss             				    



           $ $ $ $ $ $ $ $ $ $                                               



 - - - - - - U U U U U U U U U U - - - - - -  8MMM::::::AAAAAAJJJJJJ            76666666777777		8	$	$ 
Sj
Sj 688  4z)*
# * # # #       B %$&&	j 	 	 	 	6 6 6 LP"26 $t) t)t)#C=t) 
t) "*%0F*G!H	t)
 smt) ./t) t) 3Xc]"#t) t) t) t)n*:!;     D( ( ( ( (
 
 
&
# 
 
 
 
 
 
(      E#tCy.1 d    ,      #Q! Q! Q! Q! 	Q!
 Q! }Q! Q! 	Q! Q! Q! Q!j $)-%)"( ((!#( c]( sm	( ( ( (V- - -  *& & & 487;OS	k& k&k&!(3-0k& eC!2234k& %U38_eCcM6J JKL	k&
 k& k& k& k&\	  	  	  	  	 	6# 	6 	6 	6 	6QQ
-.Q Q Q Q	QZ	Q)	Q
-.	Q 	Q 	Q 	QJ45JJ J J J;!%&?!@;h; ; ; ;4x(h   (	;k"	;h	; 	; 	; 	;G445G45e$%G4 G4 		G4 G4 G4 G4T?c ?c ? ? ? ?" GCLLGCLL: : : : :% : : :B Z  Z A;Z (1++,Z  Z  	Z 
 Z  q$}Z  Z  Z  Z z!
 !
 !
 !
 !
 !
 !
 !
H# # # # #7:2 # # #Li
 i
 i
 i
 i
'*"> i
 i
 i
` "&/" /" /"r8|,/"/" DI	/"
 /" /" 	/" /" /" /"d3 3     (,'+'+	30 30 30}30  }30  }	30
 8C=(3-'(30 30 30 30lT    *3 *4 * * * *  
D 
D 
D 
D 
Du, u, u, u, u, u, u, u,p
(U38_% 
(%S	3*? 
( 
( 
( 
(   ",        Fbl bl t    0smsm SM #s(^	
 
#s(^   6 C HSM      r.   