
    &`i                        U d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZmZ d dl	m
Z
mZmZmZmZmZ d dlZd dlmZmZmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ e
rd d
lm Z  d dl!m"Z"  ej#        e$          Z%da&de'd<    ej(                    Z) ed           G d de*ej+                              Z,dZ-dZ.dZ/dZ0dZ1dZ2 edd          Z3dZ4dZ5 edd          Z6 e7ej8        9                    dd                    Z:ej8        9                    de,j;                  Z< edd           Z=d!Z>d"Z?d#Z@dZAdZB e7 eCej8        9                    d$d%                              ZDdZE ed&d          ZF ed'd          ZG ed(d          ZHdZIdZJ e7 eCej8        9                    d)d%                              ZK ed*d          ZL ed+d          ZM e7 eCej8        9                    d,d-                              ZN e7 ed.d                      ZO ed/d          ZP ed0d1          ZQ e7 ed2d                     ZR ed3d          ZSdZTd4ZUd5ZVd6ZWdZXdZYd7ZZ ed8d          Z[ e\ej8        9                    d9d:                    Z]d Z^d;Z_d<Z`d=Zad>ZbdZc ed?d@          Zd edAd>          Ze e7 eCej8        9                    dBd%                              Zf edCdD          Zg edEdF          Zh edGdH          Zie\e'dI<    edJdK          Zje\e'dL<    edMdN          ZkeCe'dO<    edPd          Zle7e'dQ<   ee G dR dS                                  Zmd\dVZndTe,fdWZod]dYZpee G dZ d[                                  ZqeqZrdS )^    N)	dataclassfield)TYPE_CHECKINGAnyDictListOptionalUnion)env_bool	env_floatenv_integer)WORKER_MODE) update_dataset_logger_for_worker)DeveloperAPI)log_once)SchedulingStrategyTExecutionOptionsIssueDetectorsConfigurationzOptional[DataContext]_default_contextalpha)	stabilityc                       e Zd ZdZdZdZdZdS )ShuffleStrategyzkShuffle strategy determines shuffling algorithm employed by operations
    like aggregate, repartition, etcsort_shuffle_pull_basedsort_shuffle_push_basedhash_shuffleN)__name__
__module____qualname____doc__SORT_SHUFFLE_PULL_BASEDSORT_SHUFFLE_PUSH_BASEDHASH_SHUFFLE     d/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/data/context.pyr   r      s+        ( ( 87!LLLr'   r   i   i   @g      ?i   i   T%RAY_DATA_PANDAS_BLOCK_IGNORE_METADATAF   "RAY_DATA_ITER_GET_BLOCK_BATCH_SIZE    RAY_DATA_PUSH_BASED_SHUFFLE!RAY_DATA_DEFAULT_SHUFFLE_STRATEGY%RAY_DATA_MAX_HASH_SHUFFLE_AGGREGATORS   SPREADDEFAULTi   RAY_DATA_EAGER_FREE0 RAY_DATA_DEFAULT_MIN_PARALLELISM(RAY_DATA_ENABLE_TENSOR_EXTENSION_CASTINGRAY_DATA_USE_ARROW_TENSOR_V2RAY_DATA_TRACE_ALLOCATIONS+RAY_DATA_LOG_INTERNAL_STACK_TRACE_TO_STDOUT%RAY_DATA_RAISE_ORIGINAL_MAP_EXCEPTIONRAY_TQDM1RAY_DATA_DISABLE_PROGRESS_BARS,RAY_DATA_ENABLE_PROGRESS_BAR_NAME_TRUNCATIONRAY_DATA_PROGRESS_LOG_INTERVAL   "RAY_DATA_ENABLE_RICH_PROGRESS_BARSRAY_DATA_ENFORCE_SCHEMAS)AWS Error INTERNAL_FAILUREAWS Error NETWORK_CONNECTIONAWS Error SLOW_DOWN#AWS Error UNKNOWN (HTTP status 503))rC   rD   rE   rF   zAWS Error SERVICE_UNAVAILABLEl           'RAY_DATA_ENABLE_OP_RESOURCE_RESERVATIONRAY_DATA_OP_RESERVATION_RATIOz0.5u   ⚠️ u   ✔️ i      &RAY_DATA_DEFAULT_WAIT_FOR_MIN_ACTORS_SDRAY_DATA_ACTOR_DEFAULT_MAX_TASKS_IN_FLIGHT_TO_MAX_CONCURRENCY_FACTORRAY_DATA_PER_NODE_METRICS3RAY_DATA_MIN_HASH_SHUFFLE_AGGREGATOR_WAIT_TIME_IN_Si,  :RAY_DATA_HASH_SHUFFLE_AGGREGATOR_HEALTH_WARNING_INTERVAL_S   4RAY_DATA_DEFAULT_ACTOR_POOL_UTIL_UPSCALING_THRESHOLDg       @+DEFAULT_ACTOR_POOL_UTIL_UPSCALING_THRESHOLD6RAY_DATA_DEFAULT_ACTOR_POOL_UTIL_DOWNSCALING_THRESHOLDg      ?-DEFAULT_ACTOR_POOL_UTIL_DOWNSCALING_THRESHOLD/RAY_DATA_DEFAULT_ACTOR_POOL_MAX_UPSCALING_DELTA   &DEFAULT_ACTOR_POOL_MAX_UPSCALING_DELTA6RAY_DATA_ENABLE_DYNAMIC_OUTPUT_QUEUE_SIZE_BACKPRESSURE5DEFAULT_ENABLE_DYNAMIC_OUTPUT_QUEUE_SIZE_BACKPRESSUREc                   >    e Zd ZU dZeZeed<   eZ	eed<   e
Zeed<   dS )AutoscalingConfiga  Configuration for autoscaling of Ray Data.

    Args:
        actor_pool_util_upscaling_threshold: Actor Pool utilization threshold for upscaling.
            Once Actor Pool exceeds this utilization threshold it will start adding new actors.
            Actor Pool utilization is defined as ratio of number of submitted tasks to the
            number of available concurrency-slots to run them in the current set of actors.
            This utilization value could exceed 100%, when the number of submitted tasks
            exceed available concurrency-slots to run them in the current set of actors.
            This is possible when `max_tasks_in_flight_per_actor`
            (defaults to 2 x of `max_concurrency`) > Actor's `max_concurrency`
            and allows to overlap task execution with the fetching of the blocks
            for the next task providing for ability to negotiate a trade-off
            between autoscaling speed and resource efficiency (i.e.,
            making tasks wait instead of immediately triggering execution).
        actor_pool_util_downscaling_threshold: Actor Pool utilization threshold for downscaling.
        actor_pool_max_upscaling_delta: Maximum number of actors to scale up in a single scaling decision.
            This limits how many actors can be added at once to prevent resource contention
            and scheduling pressure. Defaults to 1 for conservative scaling.
    #actor_pool_util_upscaling_threshold%actor_pool_util_downscaling_thresholdactor_pool_max_upscaling_deltaN)r   r    r!   r"   rS   r]   float__annotations__rU   r^   rX   r_   intr&   r'   r(   r\   r\     sf          , 	4 (    	6 *5   
 +Q"CPPPPPr'   r\   returnr   c                  "    ddl m}   |             S )Nr   r   )'ray.data._internal.execution.interfacesr   r   s    r(   _execution_options_factoryrf   )  s#    HHHHHHr'   c                      t           r&t                              d           t          j        S d t          D             } t
          | v s)J dd                    |            dt
           d            t
          S )NzqRAY_DATA_PUSH_BASED_SHUFFLE is deprecated, please use RAY_DATA_DEFAULT_SHUFFLE_STRATEGY to set shuffling strategyc                     g | ]}|S r&   r&   ).0ss     r(   
<listcomp>z5_deduce_default_shuffle_algorithm.<locals>.<listcomp>9  s    )))Aa)))r'   z8RAY_DATA_DEFAULT_SHUFFLE_STRATEGY has to be one of the [,z] (got ))DEFAULT_USE_PUSH_BASED_SHUFFLEloggerwarningr   r$   DEFAULT_SHUFFLE_STRATEGYjoin)vss    r(   !_deduce_default_shuffle_algorithmrt   0  s    % (J	
 	
 	

 66)))))'2---0sxxPR|| 0 0,0 0 0 .--
 ('r'   r   c                  "    ddl m}   |             S )Nr   r   )?ray.data._internal.issue_detection.issue_detector_configurationr   r   s    r(   _issue_detectors_config_factoryrw   C  s1          '&(((r'   c                       e Zd ZU dZeZee         ed<   e	Z
eed<   eZeed<   eZeed<   eZeed<   eZeed<    ee          Zeed	<   eZeed
<    e            Zeed<   dZeed<   eZeed<   dZee         ed<   e Z!eed<   e"Z#eed<   dZ$ee         ed<   dZ%e&ed<   dZ'e&ed<   dZ(e&ed<   e)Z*e+ed<   e,Z-e+ed<   e.Z/eed<   e0Z1eed<   e2Z3eed<   e4Z5eed<   e6Z7eed<   eZ8eed<   e9Z:eed<   e;Z<eed <   e=Z>eed!<   dZ?ee         ed"<   e@ZAeed#<   eBZCeed$<   eDZEeed%<    eeF          ZGd&ed'<   eHZIeed(<   eJZKeed)<   dZLeed*<   eMZNeed+<   eOZPeed,<   eQZReed-<   eSZTeed.<   eUZVeWeX         ed/<   eYZZeed0<   e[Z\e]eeWe^         f         ed1<   e_Z`eed2<   eaZbeed3<   ecZdeed4<   eeZfe&ed5<   egZheed6<   eiZjeed7<   ekZleed8<   dZmeed9<   enZoeed:<   epZqeed;<   dZree         ed<<    ed=           ZseWeX         ed><   etZueed?<   dZve&ed@<   dAZwee&         edB<   dZxeeX         edC<   dDZyeedE<    eez          Z{dFedG<   dZ|e&edH<   dZ}eedI<   e~ZeedJ<   eZeedK<   eZeedL<   dM ZdNeXdOedPdf fdQZed_dR            Zed`dT            ZedPefdU            Zej        dOedPdfdV            ZdadWeXdXedPefdYZdWeXdOedPdfdZZdWeXdPdfd[Zd_d\Zd]eXdPdfd^Z xZS )bDataContexta*  Global settings for Ray Data.

    Configure this class to enable advanced features and tune performance.

    .. warning::
        Apply changes before creating a :class:`~ray.data.Dataset`. Changes made after
        won't take effect.

    .. note::
        This object is automatically propagated to workers. Access it from the driver
        and remote workers with :meth:`DataContext.get_current()`.

    Examples:
        >>> from ray.data import DataContext
        >>> DataContext.get_current().enable_progress_bars = False

    Args:
        target_max_block_size: The max target block size in bytes for reads and
            transformations. If `None`, this means the block size is infinite.
        target_min_block_size: Ray Data avoids creating blocks smaller than this
            size in bytes on read. This takes precedence over
            ``read_op_min_num_blocks``.
        streaming_read_buffer_size: Buffer size when doing streaming reads from local or
            remote storage.
        enable_pandas_block: Whether pandas block format is enabled.
        actor_prefetcher_enabled: Whether to use actor based block prefetcher.
        iter_get_block_batch_size: Maximum number of block object references to resolve
            in a single ``ray.get()`` call when iterating over datasets.
        autoscaling_config: Autoscaling configuration.
        use_push_based_shuffle: Whether to use push-based shuffle.
        pipeline_push_based_shuffle_reduce_tasks:
        scheduling_strategy: The global scheduling strategy. For tasks with large args,
            ``scheduling_strategy_large_args`` takes precedence.
        scheduling_strategy_large_args: Scheduling strategy for tasks with large args.
        large_args_threshold: Size in bytes after which point task arguments are
            considered large. Choose a value so that the data transfer overhead is
            significant in comparison to task scheduling (i.e., low tens of ms).
        use_polars: Whether to use Polars for tabular dataset sorts, groupbys, and
            aggregations.
        eager_free: Whether to eagerly free memory.
        decoding_size_estimation: Whether to estimate in-memory decoding data size for
            data source.
        min_parallelism: This setting is deprecated. Use ``read_op_min_num_blocks``
            instead.
        read_op_min_num_blocks: Minimum number of read output blocks for a dataset.
        enable_tensor_extension_casting: Whether to automatically cast NumPy ndarray
            columns in Pandas DataFrames to tensor extension columns.
        use_arrow_tensor_v2: Config enabling V2 version of ArrowTensorArray supporting
            tensors > 2Gb in size (off by default)
        enable_fallback_to_arrow_object_ext_type: Enables fallback to serialize column
            values not suppported by Arrow natively (like user-defined custom Python
            classes for ex, etc) using `ArrowPythonObjectType` (simply serializing
            these as bytes)
        enable_auto_log_stats: Whether to automatically log stats after execution. If
            disabled, you can still manually print stats with ``Dataset.stats()``.
        verbose_stats_logs: Whether stats logs should be verbose. This includes fields
            such as `extra_metrics` in the stats output, which are excluded by default.
        trace_allocations: Whether to trace allocations / eager free. This adds
            significant performance overheads and should only be used for debugging.
        execution_options: The
            :class:`~ray.data._internal.execution.interfaces.execution_options.ExecutionOptions`
            to use.
        use_ray_tqdm: Whether to enable distributed tqdm.
        enable_progress_bars: Whether to enable progress bars.
        enable_operator_progress_bars: Whether to enable progress bars for individual
            operators during execution.
        enable_progress_bar_name_truncation: If True, the name of the progress bar
            (often the operator name) will be truncated if it exceeds
            `ProgressBar.MAX_NAME_LENGTH`. Otherwise, the full operator name is shown.
        enable_rich_progress_bars: Whether to use the new rich progress bars instead
            of the tqdm TUI.
        progress_bar_log_interval: The interval in seconds for logging progress bar
            updates in non-interactive terminals.
        enable_get_object_locations_for_metrics: Whether to enable
            ``get_object_locations`` for metrics. This is useful for tracking whether
            the object input of a task is local (cache hit) or not local (cache miss)
            to the node that task is running on.
        write_file_retry_on_errors: A list of substrings of error messages that should
            trigger a retry when writing files. This is useful for handling transient
            errors when writing to remote storage systems.
        warn_on_driver_memory_usage_bytes: If driver memory exceeds this threshold,
            Ray Data warns you. For now, this only applies to shuffle ops because most
            other ops are unlikely to use as much driver memory.
        actor_task_retry_on_errors: The application-level errors that actor task should
            retry. This follows same format as :ref:`retry_exceptions <task-retries>` in
            Ray Core. Default to `False` to not retry on any errors. Set to `True` to
            retry all errors, or set to a list of errors to retry.
        actor_init_retry_on_errors: Whether to retry when actor initialization fails.
            Default to `False` to not retry on any errors. Set to `True` to retry
            all errors.
        actor_init_max_retries: Maximum number of consecutive retries for actor
            initialization failures. The counter resets when an actor successfully
            initializes. Default is 3. Set to -1 for infinite retries.
        op_resource_reservation_enabled: Whether to enable resource reservation for
            operators to prevent resource contention.
        op_resource_reservation_ratio: The ratio of the total resources to reserve for
            each operator.
        max_errored_blocks: Max number of blocks that are allowed to have errors,
            unlimited if negative. This option allows application-level exceptions in
            block processing tasks. These exceptions may be caused by UDFs (e.g., due to
            corrupted data samples) or IO errors. Data in the failed blocks are dropped.
            This option can be useful to prevent a long-running job from failing due to
            a small number of bad blocks.
        log_internal_stack_trace_to_stdout: Whether to include internal Ray Data/Ray
            Core code stack frames when logging to stdout. The full stack trace is
            always written to the Ray Data log file.
        raise_original_map_exception: Whether to raise the original exception
            encountered in map UDF instead of wrapping it in a `UserCodeException`.
        print_on_execution_start: If ``True``, print execution information when
            execution starts.
        s3_try_create_dir: If ``True``, try to create directories on S3 when a write
            call is made with a S3 URI.
        wait_for_min_actors_s: The default time to wait for minimum requested
            actors to start before raising a timeout, in seconds.
        max_tasks_in_flight_per_actor: Max number of tasks that could be submitted
            for execution to individual actor at the same time. Note that only up to
            `max_concurrency` number of these tasks will be executing concurrently
            while remaining ones will be waiting in the Actor's queue. Buffering
            tasks in the queue allows us to overlap pulling of the blocks (which are
            tasks arguments) with the execution of the prior tasks maximizing
            individual Actor's utilization
        retried_io_errors: A list of substrings of error messages that should
            trigger a retry when reading or writing files. This is useful for handling
            transient errors when reading from remote storage systems.
        default_hash_shuffle_parallelism: Default parallelism level for hash-based
            shuffle operations if the number of partitions is unspecifed.
        max_hash_shuffle_aggregators: Maximum number of aggregating actors that can be
            provisioned for hash-shuffle aggregations.
        min_hash_shuffle_aggregator_wait_time_in_s: Minimum time to wait for hash
            shuffle aggregators to become available, in seconds.
        hash_shuffle_aggregator_health_warning_interval_s: Interval for health warning
            checks on hash shuffle aggregators, in seconds.
        max_hash_shuffle_finalization_batch_size: Maximum batch size for concurrent
            hash-shuffle finalization tasks. If `None`, defaults to
            `max_hash_shuffle_aggregators`.
        join_operator_actor_num_cpus_per_partition_override: Override CPU allocation
            per partition for join operator actors.
        hash_shuffle_operator_actor_num_cpus_per_partition_override: Override CPU
            allocation per partition for hash shuffle operator actors.
        hash_aggregate_operator_actor_num_cpus_per_partition_override: Override CPU
            allocation per partition for hash aggregate operator actors.
        use_polars_sort: Whether to use Polars for tabular dataset sorting operations.
        enable_per_node_metrics: Enable per node metrics reporting for Ray Data,
            disabled by default.
        override_object_store_memory_limit_fraction: Override the fraction of object
            store memory limit. If `None`, uses Ray's default.
        memory_usage_poll_interval_s: The interval to poll the USS of map tasks. If `None`,
            map tasks won't record memory stats.
        dataset_logger_id: Optional logger ID for dataset operations. If `None`, uses
            default logging configuration.
        issue_detectors_config: Configuration for issue detection and monitoring during
            dataset operations.
        downstream_capacity_backpressure_ratio: Ratio for downstream capacity
            backpressure control. A higher ratio causes backpressure to kick-in
            later. If `None`, this type of backpressure is disabled.
        downstream_capacity_backpressure_max_queued_bundles: Maximum number of queued
            bundles before applying backpressure. If `None`, no limit is applied.
        enable_dynamic_output_queue_size_backpressure: Whether to cap the concurrency
        of an operator based on it's and downstream's queue size.
        enforce_schemas: Whether to enforce schema consistency across dataset operations.
        pandas_block_ignore_metadata: Whether to ignore pandas metadata when converting
            between Arrow and pandas formats for better type inference.
    target_max_block_sizetarget_min_block_sizestreaming_read_buffer_sizeenable_pandas_blockactor_prefetcher_enablediter_get_block_batch_size)default_factoryautoscaling_configuse_push_based_shuffle_shuffle_strategyT(pipeline_push_based_shuffle_reduce_tasks default_hash_shuffle_parallelismNmax_hash_shuffle_aggregators*min_hash_shuffle_aggregator_wait_time_in_s1hash_shuffle_aggregator_health_warning_interval_s(max_hash_shuffle_finalization_batch_size%join_operator_actor_num_cpus_override-hash_shuffle_operator_actor_num_cpus_override/hash_aggregate_operator_actor_num_cpus_overridescheduling_strategyscheduling_strategy_large_argslarge_args_threshold
use_polarsuse_polars_sort
eager_freedecoding_size_estimationmin_parallelismread_op_min_num_blocksenable_tensor_extension_castinguse_arrow_tensor_v2(enable_fallback_to_arrow_object_ext_typeenable_auto_log_statsverbose_stats_logstrace_allocationsr   execution_optionsuse_ray_tqdmenable_progress_barsenable_operator_progress_bars#enable_progress_bar_name_truncationenable_rich_progress_barsprogress_bar_log_interval'enable_get_object_locations_for_metricswrite_file_retry_on_errors!warn_on_driver_memory_usage_bytesactor_task_retry_on_errorsactor_init_retry_on_errorsactor_init_max_retriesop_resource_reservation_enabledop_resource_reservation_ratiomax_errored_blocks"log_internal_stack_trace_to_stdoutraise_original_map_exceptionprint_on_execution_starts3_try_create_dirwait_for_min_actors_smax_tasks_in_flight_per_actorc                  *    t          t                    S N)listDEFAULT_RETRIED_IO_ERRORSr&   r'   r(   <lambda>zDataContext.<lambda>h  s    %> ? ? r'   retried_io_errorsenable_per_node_metrics+override_object_store_memory_limit_fractionrW   memory_usage_poll_interval_sdataset_logger_idF_enable_actor_pool_on_exit_hookr   issue_detectors_config&downstream_capacity_backpressure_ratio3downstream_capacity_backpressure_max_queued_bundles-enable_dynamic_output_queue_size_backpressureenforce_schemaspandas_block_ignore_metadatac                    i | _         i | _        | j        | j        j        _        | j        | j        j        _        t          | _	        t          j                            d          d u}|rZt          j                    j        j        t"          k    }|r)t%          d          rt&                              d           d| _        d S d| _        d S )N
RAY_JOB_ID3ray_data_disable_operator_progress_bars_in_ray_jobszDisabling operator-level progress bars by default in Ray Jobs. To enable progress bars for all operators, set `ray.data.DataContext.get_current().enable_operator_progress_bars = True`.FT) _task_pool_data_task_remote_args_kv_configsr   r   hash_shuffle_detector_configdetection_time_interval_sr   min_wait_time_s.DEFAULT_MAX_NUM_BLOCKS_IN_STREAMING_GEN_BUFFER'_max_num_blocks_in_streaming_gen_bufferosenvirongetrayget_runtime_contextworkermoder   r   ro   infor   )self
is_ray_job	is_drivers      r(   __post_init__zDataContext.__post_init__  s     AC- ,. B 	#@Z ; 	#@P
 ; 	4 Z^^L11=
 	6/118=LI XE   >   27D... 26D...r'   namevaluerc   c                 x   |dk    r&|t           k    rt          j        dt                     nj|dk    rt          j        dt                     nI|dk    rt          j        d           || _        n'|dk    r!t          j        dt                     || _        t                                          ||           d S )	Nr   zR`write_file_retry_on_errors` is deprecated! Configure `retried_io_errors` instead.r   zM`use_push_based_shuffle` is deprecated! Configure `shuffle_strategy` instead.target_shuffle_max_block_sizezY`target_shuffle_max_block_size` is deprecated! Configure `target_max_block_size` instead.r   zH`use_polars` is deprecated, please configure `use_polars_sort`  instead.)"DEFAULT_WRITE_FILE_RETRY_ON_ERRORSwarningswarnDeprecationWarningrz   r   super__setattr__)r   r   r   	__class__s      r(   r   zDataContext.__setattr__  s    000;;;M/"    ---M."    444Mk   */D&&\!!M."  
 $)D D%(((((r'   c                  |    t           5  t          t                      at          cddd           S # 1 swxY w Y   dS )a  Get or create the current DataContext.

        When a Dataset is created, the current DataContext will be sealed.
        Changes to `DataContext.get_current()` will not impact existing Datasets.

        Examples:

            .. testcode::
                import ray

                context = ray.data.DataContext.get_current()

                context.target_max_block_size = 100 * 1024 ** 2
                ds1 = ray.data.range(1)
                context.target_max_block_size = 1 * 1024 ** 2
                ds2 = ray.data.range(1)

                # ds1's target_max_block_size will be 100MB
                ds1.take_all()
                # ds2's target_max_block_size will be 1MB
                ds2.take_all()

        Developer notes: Avoid using `DataContext.get_current()` in data
        internal components, use the DataContext object captured in the
        Dataset and pass it around as arguments.
        N)_context_lockr   ry   r&   r'   r(   get_currentzDataContext.get_current  s    >  	$ 	$'#.== #		$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$s   155contextc                 j    t           rt           j        | j        k    rt          | j                   | a dS )zSet the current context in a remote worker.

        This is used internally by Dataset to propagate the driver context to
        remote workers used for parallelization.
        N)r   r   r   )r   s    r(   _set_currentzDataContext._set_current  s;     !	H1W5NNN,W-FGGG"r'   c                 j    | j         r&t                              d           t          j        S | j        S )NzT`use_push_based_shuffle` is deprecated, please configure `shuffle_strategy` instead.)r   ro   rp   r   r$   r   r   s    r(   shuffle_strategyzDataContext.shuffle_strategy	  s<    & 	;NN.  
 #::%%r'   c                     || _         d S r   )r   )r   r   s     r(   r   zDataContext.shuffle_strategy  s    !&r'   keydefaultc                 8    | j                             ||          S )a  Get the value for a key-value style config.

        Args:
            key: The key of the config.
            default: The default value to return if the key is not found.
        Returns: The value for the key, or the default value if the key is not found.
        )r   r   )r   r   r   s      r(   
get_configzDataContext.get_config  s     ##C111r'   c                     || j         |<   dS )zSet the value for a key-value style config.

        Args:
            key: The key of the config.
            value: The value of the config.
        N)r   )r   r   r   s      r(   
set_configzDataContext.set_config#  s     !&r'   c                 <    | j                             |d           dS )z`Remove a key-value style config.

        Args:
            key: The key of the config.
        N)r   pop)r   r   s     r(   remove_configzDataContext.remove_config,  s#     	S$'''''r'   c                 *    t          j        |           S )z)Create a copy of the current DataContext.)copydeepcopyr   s    r(   r   zDataContext.copy4  s    }T"""r'   
dataset_idc                     || _         dS )zSet the current dataset logger id.

        This is used internally to propagate the current dataset logger id to remote
        workers.
        N)r   )r   r   s     r(   set_dataset_logger_idz!DataContext.set_dataset_logger_id8  s     ",r'   )rc   ry   )r   ry   rc   Nr   )r   r    r!   r"   DEFAULT_TARGET_MAX_BLOCK_SIZErz   r	   rb   ra   DEFAULT_TARGET_MIN_BLOCK_SIZEr{   "DEFAULT_STREAMING_READ_BUFFER_SIZEr|   DEFAULT_ENABLE_PANDAS_BLOCKr}   bool DEFAULT_ACTOR_PREFETCHER_ENABLEDr~   !DEFAULT_ITER_GET_BLOCK_BATCH_SIZEr   r   r\   r   rn   r   rt   r   r   r   DEFAULT_MIN_PARALLELISMr   r   2DEFAULT_MIN_HASH_SHUFFLE_AGGREGATOR_WAIT_TIME_IN_Sr   9DEFAULT_HASH_SHUFFLE_AGGREGATOR_HEALTH_WARNING_INTERVAL_Sr   r   r   r`   r   r   DEFAULT_SCHEDULING_STRATEGYr   r   &DEFAULT_SCHEDULING_STRATEGY_LARGE_ARGSr   DEFAULT_LARGE_ARGS_THRESHOLDr   DEFAULT_USE_POLARSr   DEFAULT_USE_POLARS_SORTr   DEFAULT_EAGER_FREEr   (DEFAULT_DECODING_SIZE_ESTIMATION_ENABLEDr   r   DEFAULT_READ_OP_MIN_NUM_BLOCKSr   'DEFAULT_ENABLE_TENSOR_EXTENSION_CASTINGr   DEFAULT_USE_ARROW_TENSOR_V2r   r   DEFAULT_AUTO_LOG_STATSr   DEFAULT_VERBOSE_STATS_LOGr   DEFAULT_TRACE_ALLOCATIONSr   rf   r   DEFAULT_USE_RAY_TQDMr   DEFAULT_ENABLE_PROGRESS_BARSr   r   +DEFAULT_ENABLE_PROGRESS_BAR_NAME_TRUNCATIONr   !DEFAULT_ENABLE_RICH_PROGRESS_BARSr   !DEFAULT_PROGRESS_BAR_LOG_INTERVALr   /DEFAULT_ENABLE_GET_OBJECT_LOCATIONS_FOR_METRICSr   r   r   r   str)DEFAULT_WARN_ON_DRIVER_MEMORY_USAGE_BYTESr   "DEFAULT_ACTOR_TASK_RETRY_ON_ERRORSr   r
   BaseException"DEFAULT_ACTOR_INIT_RETRY_ON_ERRORSr   DEFAULT_ACTOR_INIT_MAX_RETRIESr   &DEFAULT_ENABLE_OP_RESOURCE_RESERVATIONr   %DEFAULT_OP_RESOURCE_RESERVATION_RATIOr   DEFAULT_MAX_ERRORED_BLOCKSr   *DEFAULT_LOG_INTERNAL_STACK_TRACE_TO_STDOUTr   -DEFAULT_RAY_DATA_RAISE_ORIGINAL_MAP_EXCEPTIONr   r   DEFAULT_S3_TRY_CREATE_DIRr   DEFAULT_WAIT_FOR_MIN_ACTORS_Sr   r   r   DEFAULT_ENABLE_PER_NODE_METRICSr   r   r   r   r   rw   r   r   r   rZ   r   DEFAULT_ENFORCE_SCHEMASr   $DEFAULT_PANDAS_BLOCK_IGNORE_METADATAr   r   r   r   staticmethodr   r   propertyr   setterr   r   r   r   r   __classcell__)r   s   @r(   ry   ry   L  sA        b bJ ,I8C=HHH!>3>>>&HHHH ;;;;%EdEEE%FsFFF,1EBS,T,T,T)TTT $BDAAA)J)J)L)LLLL59,d999 -D$cCCC 37 (3-666 	; /   
 	B 6s    ?C,hsmBBB 48)5777;?15???=A3UAAA/J,JJJ. #$7    !=#<<<)J)))3OT333)J)))%MdMMM2OS222"@C@@@,S#TSSS ;;;;?C,htnCCC"8488888887t777,1E2- - -)    .L$---!=$=== +/!4...3 (    'HtGGG%FsFFF7 ,T    -OS	NNN-V%sVVV 	+ d=!!! + + + (JIII"@C@@@,R#TRRR+P!5PPP88882 '    *W $VVV%)d)))7t777 "?3>>>37!8C=777#(5??$ $ $tCy    %DTCCC9=/===45 (5/555'+x}+++ -2#T111<AE7= = =9    59*E888?C7CCC 	> 24    4OT333)M $MMM,6 ,6 ,6\!) !)C !)D !) !) !) !) !) !)F "$ "$ "$ \"$H # # # \# 	&/ 	& 	& 	& X	& 'o '$ ' ' ' '2 2c 2C 23 2 2 2 2&c &# &$ & & & &( ( ( ( ( (# # # #, , , , , , , , , ,r'   ry   )rc   r   )rc   r   )sr   enumloggingr   	threadingr   dataclassesr   r   typingr   r   r   r   r	   r
   r   ray._private.ray_constantsr   r   r   ray._private.workerr   ray.data._internal.loggingr   ray.util.annotationsr   ray.util.debugr   ray.util.scheduling_strategiesr   re   r   rv   r   	getLoggerr   ro   r   ra   Lockr   r  Enumr   r   %DEFAULT_SHUFFLE_TARGET_MAX_BLOCK_SIZEMAX_SAFE_BLOCK_SIZE_FACTORr   r   r   r)  r  r  r  r  r   r   rn   r%   rq   $DEFAULT_MAX_HASH_SHUFFLE_AGGREGATORSr  r  r	  r
  r  rb   r  r  r  r  r  r  r  r  r#  r$  r  r  r  r  r  r(  r  r   r   r  r  r  r  r   r`   r!  r"  WARN_PREFIX	OK_PREFIXLEGACY_DEFAULT_BATCH_SIZEr   r%  r&  ;DEFAULT_ACTOR_MAX_TASKS_IN_FLIGHT_TO_MAX_CONCURRENCY_FACTORr'  r  r  rS   rU   rX   rZ   r\   rf   rt   rw   ry   DatasetContextr&   r'   r(   <module>rD     s      				      ( ( ( ( ( ( ( ( B B B B B B B B B B B B B B B B 



 G G G G G G G G G G + + + + + + G G G G G G - - - - - - # # # # # # > > > > > > HHHHHH      
	8	$	$ -1 ) 0 0 0	      " " " " "c49 " " ! " !2  ); %
 !  /  &6 "" '/x+U( ( $ "% #(  $/K("% % ! "&JNN0$77" "  :>>')E   (3{+S( ( $ '  *3 &/   T##bjnn-BCHHIIJJ +/ (%+&H#NN *2(.+ + ' 'h'EtLL  !  DRZ^^4PRU%V%V!W!WXX -5X15. . * 19+U1 1 - tCC
z3 ? ?@@AA  $(4K0!44$ $    /7h2D/ / +
 %0K0PRS$T$T ! %)DK4a88% % ! #(#=uEE 27 /
& "  -C )%* "%* "!" )1-t* * & ).JNN2E::) ) %    	 !  23 . "  +,b! !  ?JkJA? ? ;
 #'$C
2C8899# #  6A[936 6 2 =HK@"= = 9
 6?Y:6 6 +U   
 8Ay<8 8 -u   
 /:k5/ / &    ?Gh<e? ? 5t   
 
 Q  Q  Q  Q  Q  Q  Q   QF   (? ( ( ( (&) ) ) ) 
p, p, p, p, p, p, p,  p,h r'   