
    &`iAo                        d dl Z d dlZd dlZd dlmZmZ d dlmZ d dlm	Z	m
Z
mZmZmZmZmZmZmZmZmZ d dlZd dlZd dlZd dlmZmZ d dlmZ d dlmZ d dl m!Z! e	r d dl"Z"d dlZd d	l#m$Z$ d d
l%m&Z& d dl'm(Z( d dl)m*Z*  edd          Z+ edd          Z, ed          Z- ed          Z.ed         Z/ee0ddf         Z1ed         Z2ed         Z3 ej4        e5          Z6e! G d de                      Z7e! G d de8e                      Z9eddee8ej:        f         f         Z;ee2ej:        f         Z<e0Z= G d d ee+e,f                   Z>eee+ge,f         ee+gee,         f         e0d          f         Z?eeee/         d!f                  Z@ed!         ZAg d"ZBd#ZCd$ee1         d%eDfd&ZEd'ed(         d%ed(         fd)ZFd*ee8         d%e8fd+ZGe!d,ed!         d%ed-         fd.            ZHe! G d/ d0                      ZI G d1 d2          ZJe!e G d3 d-                                  ZKd4  eeK          D             ZLe!e G d5 d!eK                                  ZM e!d67          e G d8 d9eM                                  ZNe! G d: d;                      ZO e!d<7           G d= d>                      ZPd?eQej:                 d%ej:        fd@ZRdS )A    N)	dataclassfields)Enum)TYPE_CHECKINGAnyCallableDictIteratorListOptionalProtocolTupleTypeVarUnion)_check_pyarrow_version_truncated_repr)	ObjectRef)log_once)DeveloperAPI)BlockBuilderPandasBlockSchema)SortKey)AggregateFnTT)contravariantU)	covariantKeyTypeAggType)pyarrow.Tablepandas.DataFramer   pyarrow.lib.Schema)pyarrow.ChunkedArraypyarrow.Arraypandas.Series)r&   z
np.ndarrayr%   r$   c                       e Zd ZdZdZdS )	BlockTypearrowpandasN)__name__
__module____qualname__ARROWPANDAS     b/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/data/block.pyr(   r(   D   s        EFFFr1   r(   c                       e Zd ZdZdZdZdS )BatchFormatpyarrowr*   numpyN)r+   r,   r-   r.   r/   NUMPYr0   r1   r2   r4   r4   J   s         EFEEEr1   r4   r!   r"   c                   :    e Zd Zdedeeee         f         fdZdS )_CallableClassProtocol_CallableClassProtocol__argreturnc                     d S Nr0   )selfr:   s     r2   __call__z_CallableClassProtocol.__call__`   s    r1   N)r+   r,   r-   r   r   r   r
   r?   r0   r1   r2   r9   r9   _   sC        a E!Xa[.$9      r1   r9   BlockMetadata)r*   r5   r6   Nr6   schemar;   c                 L    ddl m} | d u pt          | |          r| j         n|  S )Nr   r   )ray.data._internal.pandas_blockr   
isinstancenames)rA   r   s     r2   _is_empty_schemarF   w   sG    AAAAAAT> f/00	FLZr1   schemasSchemac                 6    | D ]}t          |          s|c S dS )zReturn the first non-empty schema from an iterator of schemas.

    Args:
        schemas: Iterator of schemas to check.

    Returns:
        The first non-empty schema, or None if all schemas are empty.
    N)rF   )rG   rA   s     r2   _take_first_non_empty_schemarJ      s6       '' 	MMM	4r1   given_batch_formatc                 h    | dk    rt           } | t          vrt          d|  dt           d          | S )NdefaultzThe given batch format z isn't allowed (must be one of z).)DEFAULT_BATCH_FORMATVALID_BATCH_FORMATS
ValueError)rK   s    r2   _apply_batch_formatrQ      s]    Y&&1!444(&8 ( (#( ( (
 
 	
 r1   metas
BlockStatsc                     d | D             S )Nc                 6    g | ]}|                                 S r0   )to_stats).0ms     r2   
<listcomp>zto_stats.<locals>.<listcomp>   s     (((QAJJLL(((r1   r0   )rR   s    r2   rV   rV      s    ((%((((r1   c                   6    e Zd ZdZd Zedd            Zd ZdS )	BlockExecStatsa  Execution stats for this block.

    Attributes:
        wall_time_s: The wall-clock time it took to compute this block.
        cpu_time_s: The CPU time it took to compute this block.
        node_id: A unique id for the node that computed this block.
        max_uss_bytes: An estimate of the maximum amount of physical memory that the
            process was using while computing this block.
    c                     d | _         d | _        d | _        d| _        d | _        t
          j                                                                        | _	        d| _
        d | _        d S )Nr   )start_time_s
end_time_swall_time_s
udf_time_s
cpu_time_srayruntime_contextget_runtime_contextget_node_idnode_idmax_uss_bytestask_idxr>   s    r2   __init__zBlockExecStats.__init__   s\    -1+/,0+,+/*>>@@LLNN"#'+r1   r;   _BlockExecStatsBuilderc                      t                      S r=   )rk   r0   r1   r2   builderzBlockExecStats.builder   s    %'''r1   c                 R    t          | j        | j        | j        | j        d          S )N)r_   ra   r`   rf   )reprr_   ra   r`   rf   ri   s    r2   __repr__zBlockExecStats.__repr__   s3    #/"o"o<	 
 
 	
r1   N)r;   rk   )r+   r,   r-   __doc__rj   staticmethodrm   rp   r0   r1   r2   r[   r[      s\         , , , ( ( ( \(
 
 
 
 
r1   r[   c                        e Zd ZdZd ZddZdS )rk   zHelper class for building block stats.

    When this class is created, we record the start time. When build() is
    called, the time delta is saved as part of the stats.
    c                 f    t          j                    | _        t          j                    | _        d S r=   )timeperf_counter_start_timeprocess_time
_start_cpuri   s    r2   rj   z_BlockExecStatsBuilder.__init__   s'    ,..+--r1   r;   r[   c                     t          j                    }t          j                    }t                      }| j        |_        ||_        || j        z
  |_        || j        z
  |_	        |S r=   )
ru   rv   rx   r[   rw   r]   r^   r_   ry   ra   )r>   end_timeend_cpustatss       r2   buildz_BlockExecStatsBuilder.build   sb    $&&#%%   !-#$t'77"T_4r1   N)r;   r[   )r+   r,   r-   rq   rj   r~   r0   r1   r2   rk   rk      sA         . . .     r1   rk   c                   \    e Zd ZU dZee         ed<   ee         ed<   ee         ed<   d ZdS )rS   z#Statistics about the block producednum_rows
size_bytes
exec_statsc                 P    | j         t          | j         t                    sJ d S d S r=   )r   rD   intri   s    r2   __post_init__zBlockStats.__post_init__   s4    ?& dos33333 '& 43r1   N)	r+   r,   r-   rq   r   r   __annotations__r[   r   r0   r1   r2   rS   rS      s`          .- sm((((4 4 4 4 4r1   c                     h | ]	}|j         
S r0   )name)rW   fs     r2   	<setcomp>r      s    ???qAF???r1   c                   L     e Zd ZU dZeee                  ed<   d Z fdZ	 xZ
S )r@   zMetadata about the block.input_filesc                 >     t          di  fdt          D             S )Nc                 <    i | ]}|                     |          S r0   )__getattribute__)rW   keyr>   s     r2   
<dictcomp>z*BlockMetadata.to_stats.<locals>.<dictcomp>  s)    SSS3sD))#..SSSr1   r0   )rS   _BLOCK_STATS_FIELD_NAMESri   s   `r2   rV   zBlockMetadata.to_stats   s7     
 
SSSS:RSSS
 
 	
r1   c                 h    t                                                       | j        	g | _        d S d S r=   )superr   r   )r>   	__class__s    r2   r   zBlockMetadata.__post_init__  s8    #!D $#r1   )r+   r,   r-   rq   r   r   strr   rV   r   __classcell__r   s   @r2   r@   r@      sh          $#
 $s)$$$$
 
 

" " " " " " " " "r1   alpha)	stabilityc                        e Zd ZU dZee         ed<   ddeded         f fdZ	 dde	ded         d	d fd
Z
ed	efd            Z xZS )BlockMetadataWithSchemaNrA   metadatarH   c                     t                                          |j        |j        |j        |j                   || _        d S )N)r   r   r   r   )r   rj   r   r   r   r   rA   )r>   r   rA   r   s      r2   rj   z BlockMetadataWithSchema.__init__  sI     ,*&*	 	 	
 	
 	
 r1   blockr}   r[   r;   c                     t                               |           }|                    |          }|                                }t	          ||          S )N)r   )r   rA   )BlockAccessor	for_blockget_metadatarA   r   )r   r}   accessormetarA   s        r2   
from_blockz"BlockMetadataWithSchema.from_block  sO     !**511$$$66""&VDDDDr1   c                 P    t          | j        | j        | j        | j                  S )N)r   r   r   r   )r@   r   r   r   r   ri   s    r2   r   z BlockMetadataWithSchema.metadata!  s.    ](	
 
 
 	
r1   r=   )r+   r,   r-   rA   r   rH   r   r@   rj   Blockr   propertyr   r   r   s   @r2   r   r     s           $FHV###  8J       ;?E EE%&67E	"E E E E 
- 
 
 
 X
 
 
 
 
r1   r   c                   2   e Zd ZdZdefdZdedee         fdZ	dQdeded	ede
fd
Zdee         de
fdZdee         de
fdZdeee                  de
fdZdeeef         de
fdZdedede
fdZdee         de
fdZdRdZ	 dSdeeeee         f                  deej        eeej        f         f         fdZdTdZde
fdZde
fdZdee         defd Z defd!Z!dee"d"f         fd#Z#	 	 dUd$eee                  d%ee$         de%fd&Z&dVd)Z'e(dWd+            Z)e*	 dSd,ed-ee+         de
fd.            Z,e*d,eee-f         de
fd/            Z.e*d,eee-f         de
fd0            Z/e(d1e
dd2fd3            Z0d4ed5d6dd(fd7Z1dQd8ed9edee2         fd:Z3d8ed9edee2         fd;Z4d8ed9edee2         fd<Z5d8ed9edee2         fd=Z6d8ed9edee2         fd>Z7	 dSd8ed9ed?ee2         dee2         fd@Z8dXdAZ9dBee         d5d6ded(         fdCZ:dDd6dEe;dF         de
fdGZ<e(dHed(         d5d6de;e
e=f         fdI            Z>e(	 dYdHee
         d5d6dEe;dF         dKede;e
e=f         f
dL            Z?dBee;e-                  d5d6dee
         fdMZ@de+fdNZAdOee         dej        fdPZBdS )Zr   a  Provides accessor methods for a specific block.

    Ideally, we wouldn't need a separate accessor classes for blocks. However,
    this is needed if we want to support storing ``pyarrow.Table`` directly
    as a top-level Ray object, without a wrapping class (issue #17186).
    r;   c                     t           )z2Return the number of rows contained in this block.NotImplementedErrorri   s    r2   r   zBlockAccessor.num_rows4      !!r1   public_row_formatc                     t           )zIterate over the rows of this block.

        Args:
            public_row_format: Whether to cast rows into the public Dict row
                format (this incurs extra copy conversions).
        r   )r>   r   s     r2   	iter_rowszBlockAccessor.iter_rows8  
     "!r1   Fstartendcopyc                     t           )a(  Return a slice of this block.

        Args:
            start: The starting index of the slice (inclusive).
            end: The ending index of the slice (exclusive).
            copy: Whether to perform a data copy for the slice.

        Returns:
            The sliced block result.
        r   )r>   r   r   r   s       r2   slicezBlockAccessor.sliceA  s
     "!r1   indicesc                     t           )zReturn a new block containing the provided row indices.

        Args:
            indices: The row indices to return.

        Returns:
            A new block containing the provided row indices.
        r   )r>   r   s     r2   takezBlockAccessor.takeN  s
     "!r1   columnsc                     t           )z<Return a new block with the list of provided columns droppedr   r>   r   s     r2   dropzBlockAccessor.dropY  r   r1   c                     t           )z3Return a new block containing the provided columns.r   r   s     r2   selectzBlockAccessor.select]  r   r1   columns_renamec                     t           )z0Return the block reflecting the renamed columns.r   )r>   r   s     r2   rename_columnszBlockAccessor.rename_columnsa  r   r1   column_namecolumn_datac                     t                      )aq  
        Upserts a column into the block. If the column already exists, it will be replaced.

        Args:
            column_name: The name of the column to upsert.
            column_data: The data to upsert into the column. (Arrow Array/ChunkedArray for Arrow blocks, Series or array-like for Pandas blocks)

        Returns:
            The updated block.
        r   )r>   r   r   s      r2   upsert_columnzBlockAccessor.upsert_columne  s     "###r1   random_seedc                     t           )zRandomly shuffle this block.r   )r>   r   s     r2   random_shufflezBlockAccessor.random_shuffler  r   r1   r"   c                     t           )z+Convert this block into a Pandas dataframe.r   ri   s    r2   	to_pandaszBlockAccessor.to_pandasv  r   r1   Nc                     t           )zConvert this block (or columns of block) into a NumPy ndarray.

        Args:
            columns: Name of columns to convert, or None if converting all columns.
        r   r   s     r2   to_numpyzBlockAccessor.to_numpyz  
     "!r1   r!   c                     t           )z'Convert this block into an Arrow table.r   ri   s    r2   to_arrowzBlockAccessor.to_arrow  r   r1   c                     t           )z/Return the base block that this accessor wraps.r   ri   s    r2   to_blockzBlockAccessor.to_block  r   r1   c                 *    |                                  S )z1Return the default data format for this accessor.)r   ri   s    r2   
to_defaultzBlockAccessor.to_default  s    }}r1   batch_formatc                 >   ||                                  S |dk    s|dk    r|                                 S |dk    r|                                 S |dk    r|                                 S |dk    r|                                 S t          dt           d|           )	zConvert this block into the provided batch format.

        Args:
            batch_format: The batch format to convert this block to.

        Returns:
            This block formatted as the provided batch format.
        NrM   nativer*   r5   r6   z The batch format must be one of z, got: )r   r   r   r   r   rP   rO   )r>   r   s     r2   to_batch_formatzBlockAccessor.to_batch_format  s     ==??"Y&&,(*B*B??$$$X%%>>###Y&&==??"W$$==??""3F " "" "  r1   c                     t           )z3Return the approximate size in bytes of this block.r   ri   s    r2   r   zBlockAccessor.size_bytes  r   r1   r#   c                     t           )z7Return the Python type or pyarrow schema of this block.r   ri   s    r2   rA   zBlockAccessor.schema  r   r1   r   r   c                 p    t          |                                 |                                 ||          S )z)Create a metadata object from this block.)r   r   r   r   )r@   r   r   )r>   r   r   s      r2   r   zBlockAccessor.get_metadata  s9     ]]__((#!	
 
 
 	
r1   otherr   c                     t           )z<Zip this block with another block of the same type and size.r   )r>   r   s     r2   zipzBlockAccessor.zip  r   r1   r   c                      t           )z%Create a builder for this block type.r   r0   r1   r2   rm   zBlockAccessor.builder  s
     "!r1   batch
block_typec                    t          |t          j                  r t          dt	          |           d          t          |t
          j        j                  r||t          j	        k    rtddl
m} 	 |                     |          S # |$ rP}t          d          rt                              d| d           ||                     |          cY d}~S |d}~ww xY w|t          j        k    sJ |                     |          S |S )	z-Create a block from user-facing data formats.Error validating z: Standalone numpy arrays are not allowed in Ray 2.5. Return a dict of field -> array, e.g., `{'data': array}` instead of `array`.Nr   )ArrowConversionError!_fallback_to_pandas_block_warningz)Failed to convert batch to Arrow due to: z; falling back to Pandas block)rD   npndarrayrP   r   collectionsabcMappingr(   r.   $ray.air.util.tensor_extensions.arrowr   batch_to_arrow_blockr   loggerwarningbatch_to_pandas_blockr/   )clsr   r   r   es        r2   batch_to_blockzBlockAccessor.batch_to_block  sY    eRZ(( 	8>OE$:$: > > >   {677 	8!Z9?%B%BUUUUUU 33E:::+ 
  
  
  CDD < < < <  
 ")"88????????
  "Y%5555500777s%   3B CACCCCc                 8    ddl m} |                    |          S )z4Create an Arrow block from user-facing data formats.r   )ArrowBlockBuilder)ray.data._internal.arrow_blockr   _table_from_pydict)r   r   r   s      r2   r   z"BlockAccessor.batch_to_arrow_block  s+     	EDDDDD 33E:::r1   c                 8    ddl m} |                    |          S )z4Create a Pandas block from user-facing data formats.r   )PandasBlockBuilder)rC   r   r   )r   r   r   s      r2   r   z#BlockAccessor.batch_to_pandas_block  s+     	GFFFFF!44U;;;r1   r   zBlockAccessor[T]c                    t                       ddl}ddl}t          | |j        |j        f          rddlm}  ||           S t          | |j                  rddl	m
}  ||           S t          | t                    rddlm} |                    |           S t          | t                    r t          dt          |            d          t!          d                    | t%          |                               )z,Create a block accessor for the given block.r   N)ArrowBlockAccessor)PandasBlockAccessorr   z: Standalone Python objects are not allowed in Ray 2.5. To use Python objects in a dataset, wrap them in a dict of numpy arrays, e.g., return `{'item': batch}` instead of just `batch`.zNot a block type: {} ({}))r   r*   r5   rD   TableRecordBatchr   r   	DataFramerC   r   bytes
from_byteslistrP   r   	TypeErrorformattype)r   r*   r5   r   r   s        r2   r   zBlockAccessor.for_block  sB    	   egmW-@ABB 	TIIIIII%%e,,,v/00 	TKKKKKK&&u---u%% 	TIIIIII%00777t$$ 		TDOE$:$: D D D   7>>ud5kkRRSSSr1   	n_samplessort_keyr   c                     t           )z0Return a random sample of items from this block.r   )r>   r  r  s      r2   samplezBlockAccessor.sample  r   r1   onignore_nullsc                     t           )z=Returns a count of the distinct values in the provided columnr   r>   r
  r  s      r2   countzBlockAccessor.count  r   r1   c                     t           )z2Returns a sum of the values in the provided columnr   r  s      r2   sumzBlockAccessor.sum!  r   r1   c                     t           )z2Returns a min of the values in the provided columnr   r  s      r2   minzBlockAccessor.min%  r   r1   c                     t           )z2Returns a max of the values in the provided columnr   r  s      r2   maxzBlockAccessor.max)  r   r1   c                     t           )z3Returns a mean of the values in the provided columnr   r  s      r2   meanzBlockAccessor.mean-  r   r1   r  c                     t           )zBReturns a sum of diffs (from mean) squared for the provided columnr   )r>   r
  r  r  s       r2   sum_of_squared_diffs_from_meanz,BlockAccessor.sum_of_squared_diffs_from_mean1  r   r1   c                     t           )z9Returns new block sorted according to provided `sort_key`r   )r>   r  s     r2   sortzBlockAccessor.sort:  r   r1   
boundariesc                     t           )z1Return a list of sorted partitions of this block.r   r>   r  r  s      r2   sort_and_partitionz BlockAccessor.sort_and_partition>  s
     "!r1   r   aggsr   c                     t           )z3Combine rows with the same key into an accumulator.r   )r>   r   r  s      r2   
_aggregatezBlockAccessor._aggregateD  r   r1   blocksc                     t           )z9Return a sorted block by merging a list of sorted blocks.r   )r"  r  s     r2   merge_sorted_blocksz!BlockAccessor.merge_sorted_blocksH  s
    
 "!r1   Tfinalizec                     t           )z/Aggregate partially combined and sorted blocks.r   )r"  r  r  r%  s       r2   _combine_aggregated_blocksz(BlockAccessor._combine_aggregated_blocksO  r   r1   c                     t           )zNOTE: PLEASE READ CAREFULLY

        Returns dataset partitioned using list of boundaries

        This method requires that
            - Block being sorted (according to `sort_key`)
            - Boundaries is a sorted list of tuples
        r   r  s      r2   _find_partitions_sortedz%BlockAccessor._find_partitions_sortedY  s
     "!r1   c                     t           )z$Return the block type of this block.r   ri   s    r2   r   zBlockAccessor.block_typeh  r   r1   keysc                 L   |                                  dk    r t          j        g t          j                  S |s(t          j        d|                                  g          S |                     |          }t          t          |                                                    S )a  
        NOTE: THIS METHOD ASSUMES THAT PROVIDED BLOCK IS ALREADY SORTED

        Compute boundaries of the groups within a block based on provided
        key (a column or a list of columns)

        NOTE: In each column, NaNs/None are considered to be the same group.

        Args:
            block: sorted block for which grouping of rows will be determined
                    based on provided key
            keys: list of columns determining the key for every row based on
                    which the block will be grouped

        Returns:
            A list of starting indices of each group and an end index of the last
            group, i.e., there are ``num_groups + 1`` entries and the first and last
            entries are 0 and ``len(array)`` respectively.
        r   )dtype)r   r   arrayint32r   "_get_group_boundaries_sorted_numpyr  values)r>   r+  projected_blocks      r2   _get_group_boundaries_sortedz*BlockAccessor._get_group_boundaries_sortedl  s    * ==??a8Bbh//// 	28Q0111 ----1$7M7M7O7O2P2PQQQr1   F)r;   r"   r=   )r;   r!   )NN)r   r   r;   r   )r;   r   )r  r   r;   r   )T)Cr+   r,   r-   rq   r   r   boolr
   r   r   r   r   r   r   r   r   r   r   r	   r   BlockColumnr   r   r   r   r   r   r   r   r   r   	DataBatchr   r   r  rA   r[   r@   r   r   rr   rm   classmethodr(   r   r   r   r   r   r	  r   r  r  r  r  r  r  r  r  r   r!  r   r$  r'  r)  r   r3  r0   r1   r2   r   r   +  s        "# " " " ""4 "HQK " " " "" "3 "S " " " " " "	"DI 	"% 	" 	" 	" 	""DI "% " " " ""d8C=1 "e " " " ""T#s(^ " " " " "$ $; $5 $ $ $ $"(3- "E " " " "" " " "
 :>" "c49n 56"	rz4RZ00	1" " " "" " " ""% " " " "E    HSM i    2"C " " " ""d$889 " " " " ,0/3
 
d3i(
 ^,
 
	
 
 
 
" " " " " " " \"  +/# ## Y'# 
	# # # [#J ;c3h ;E ; ; ; [; <$sCx. <U < < < [< T T#5 T T T \T:" "y "W " " " "" " "4 "HQK " " " ""c " "(1+ " " " ""c " "(1+ " " " ""c " "(1+ " " " ""s "$ "8A; " " " " !	" "" " qk	"
 
!" " " "" " " ""q'"-6"	g" " " ""i "u]/C " " " " " "W")2"	u--	." " " \" 
 	" "U"" M"" 	"
 
u--	." " " \""s$" " 
e	" " " ""I " " " "Rc Rrz R R R R R Rr1   r   betac            
          e Zd ZdZdefdZdddededee         fd	Z	dddededee         fd
Z
dddededee         fdZdddededee         fdZdddededee         fdZdddedededee         fdZdefdZdeeef         fdZdefdZdefdZdefdZdefdZddddedee         dedee         fdZdee         fdZd!dedej        fdZdeee         df         fdZ e!dedd fd             Z"dS )"BlockColumnAccessorzbProvides vendor-neutral interface to apply common operations
    to block's (Pandas/Arrow) columnscolc                     || _         d S r=   )_column)r>   r<  s     r2   rj   zBlockColumnAccessor.__init__  s    r1   T)as_pyr  r?  r;   c                    t                      )z4Returns a count of the distinct values in the columnr   r>   r  r?  s      r2   r  zBlockColumnAccessor.count      !###r1   c                    t                      S )z)Returns a sum of the values in the columnr   rA  s      r2   r  zBlockColumnAccessor.sum  s    "$$$r1   c                    t                      )z)Returns a min of the values in the columnr   rA  s      r2   r  zBlockColumnAccessor.min  rB  r1   c                    t                      )z)Returns a max of the values in the columnr   rA  s      r2   r  zBlockColumnAccessor.max  rB  r1   c                    t                      )z*Returns a mean of the values in the columnr   rA  s      r2   r  zBlockColumnAccessor.mean  rB  r1   qc                    t                      )z.Returns requested quantile of the given columnr   )r>   rG  r  r?  s       r2   quantilezBlockColumnAccessor.quantile  s     "###r1   c                     t                      )zBReturns new column holding only distinct values of the current oner   ri   s    r2   uniquezBlockColumnAccessor.unique  rB  r1   c                     t                      r=   r   ri   s    r2   value_countsz BlockColumnAccessor.value_counts      !###r1   c                     t                      )a  
        Computes a 64-bit hash value for each row in the column.

        Provides a unified hashing method across supported backends.
        Handles complex types like lists or nested structures by producing a single hash per row.
        These hashes are useful for downstream operations such as deduplication, grouping, or partitioning.

        Internally, Polars is used to compute row-level hashes even when the original column
        is backed by Pandas or PyArrow.

        Returns:
            A column of 64-bit integer hashes, returned in the same format as the
            underlying backend (e.g., Pandas Series or PyArrow Array).
        r   ri   s    r2   hashzBlockColumnAccessor.hash  s     "###r1   c                     t                      )z;Flattens nested lists merging them into top-level containerr   ri   s    r2   flattenzBlockColumnAccessor.flatten  s     "###r1   c                     t                      r=   r   ri   s    r2   dropnazBlockColumnAccessor.dropna  rN  r1   c                     t                      )z
        Checks whether the column is composed of list-like elements.

        Returns:
            True if the column is made up of list-like values; False otherwise.
        r   ri   s    r2   is_composed_of_listsz(BlockColumnAccessor.is_composed_of_lists  s     "###r1   N)r  r?  r  c                    t                      )z9Returns a sum of diffs (from mean) squared for the columnr   )r>   r  r  r?  s       r2   r  z2BlockColumnAccessor.sum_of_squared_diffs_from_mean  s     "###r1   c                     t                      )z8Converts block column to a list of Python native objectsr   ri   s    r2   	to_pylistzBlockColumnAccessor.to_pylist  rB  r1   Fzero_copy_onlyc                     t                      )z#Converts underlying column to Numpyr   )r>   rZ  s     r2   r   zBlockColumnAccessor.to_numpy  rB  r1   r%   c                     t                      )zAConverts block column into a representation compatible with Arrowr   ri   s    r2   _as_arrow_compatiblez(BlockColumnAccessor._as_arrow_compatible  rB  r1   c                 <   t                       ddl}t          | t          j                  st          | t          j                  rddlm}  ||           S t          | |j                  rddl	m
}  ||           S t          dt          |            d          )z-Create a column accessor for the given columnr   N)ArrowBlockColumnAccessor)PandasBlockColumnAccessorzEExpected either a pandas.Series or pyarrow.Array (ChunkedArray) (got ))r   r*   rD   paArrayChunkedArrayr   r_  SeriesrC   r`  r  r  )r<  pdr_  r`  s       r2   
for_columnzBlockColumnAccessor.for_column  s     	   c28$$ 	
3(H(H 	OOOOOO++C000RY'' 	QQQQQQ,,S111%S		% % %  r1   r4  )#r+   r,   r-   rq   r6  rj   r5  r   r   r  r  r  r  r  floatrI  rK  r	   r   r   rM  rP  rR  rT  rV  r  r   rY  r   r   r   r   r]  rr   rg  r0   r1   r2   r;  r;    sM       ) )K     :> $ $ $T $$ $(1+ $ $ $ $ 8< % % %4 % % % % % % 8< $ $ $4 $ $ $ $ $ $ 8< $ $ $4 $ $ $ $ $ $ 9= $ $ $D $ $! $ $ $ $
 >B$ $ $$)-$6:$	!$ $ $ $$ $ $ $ $$d39o $ $ $ $$k $ $ $ $"$ $ $ $ $
$ $ $ $ $$d $ $ $ $ !$ $ $ $ qk	$
 $ 
!$ $ $ $$49 $ $ $ $$ $t $
 $ $ $ $$eDI,F&G $ $ $ $  (=    \  r1   r;  r   c                    g }g }g }| D ]}t          j        |j        t           j                  r0t          j        |d                   r|                    |           Vt          j        |j        t           j                  s|d         |                    |           |                    |           g }t          |          dk    rE|                    t          j        d |D                                           d                     t          |          dk    rE|                    t          j        d |D                                           d                     t          |          dk    rE|                    t          j        d |D                                           d                     t          j	        dgt          j
        |                              d                                          d         dz   t          | d                   gg                              t                    }|S )Nr   c                 :    g | ]}|d d         |dd         k    S    Nrj  r0   rW   arrs     r2   rY   z6_get_group_boundaries_sorted_numpy.<locals>.<listcomp>  s-    EEEss122w#crc(*EEEr1   )axisc           	          g | ]R}|d d         |dd         k    t          j        |d d                   t          j        |dd                   z  z  SS rl  )r   isfinitern  s     r2   rY   z6_get_group_boundaries_sorted_numpy.<locals>.<listcomp>"  sq         WCRC({3qrr7++bk#crc(.C.CCE  r1   c           	          g | ]U}|d d         |dd         k    t          j        |d d         d          t          j        |dd         d          z   z  VS rl  )r   equalrn  s     r2   rY   z6_get_group_boundaries_sorted_numpy.<locals>.<listcomp>/  sx         WCRC(QRR$//"(3ss8T2J2JJKL  r1   rm  )r   
issubdtyper-  numberisnanappendlenvstackanyhstackcolumn_stacknonzeroastyper   )r   general_arraysnum_arrays_with_nancat_arrays_with_nonero  diffsr  s          r2   r0  r0    sa    N ' '=BI.. 	'28CG3D3D 	'&&s++++sy")44 	'R '',,,,!!#&&&&
 E
>QIEEnEEEFFJJPQJRR	
 	
 	
 !## 	I   3    cqckk	
 	
 	
   1$$ 	I   4    cqckk	
 	
 	
 C_U##''Q'//7799!<q@__	
  fSkk  r1   )Sr   loggingru   dataclassesr   r   enumr   typingr   r   r   r	   r
   r   r   r   r   r   r   r6   r   r5   rb  rb   ray.data._internal.utilr   r   	ray.typesr   ray.utilr   ray.util.annotationsr   r*    ray.data._internal.block_builderr   rC   r   2ray.data._internal.planner.exchange.sort_task_specr   ray.data.aggregater   r   r   r   r    r   r  rH   r6  BatchColumn	getLoggerr+   r   r(   r   r4   r   r7  DataBatchColumnCallableClassr9   UserDefinedFunctionBlockPartitionBlockPartitionMetadatarO   rN   r5  rF   rJ   rQ   rV   r[   rk   rS   r   r@   r   r   r;  r  r0  r0   r1   r2   <module>r     sK         ) ) ) ) ) ) ) )                                        



 K K K K K K K K             - - - - - - /MMMNNN======AAAAAAJJJJJJ...... GCt$$$GC4   
')


')

 	12 
t(*>>	? LM J
 
	8	$	$        
     #t    /#5tCO7LLM	 RZ/0     Xad^    aS!VaS(1+	!"$  eIe,o=>? o. :::  Xf- $    (8*< (AS    HSM c     )D) )d<.@ ) ) ) ) !
 !
 !
 !
 !
 !
 !
 !
H       4 
4 4 4 4 4 4 4  4" @?FF:,>,>???  
" " " " "J " "  "(    

 
 
 
 
m 
 
  ! 
< _R _R _R _R _R _R _R _RD s s s s s s s  sl?RZ0@ ?RZ ? ? ? ? ? ?r1   