
    &`isf                        d dl Z d dlZd dlZd dlmZmZmZmZmZm	Z	m
Z
mZmZmZ d dlZd dlZd dlmZmZmZ d dlmZ d dlmZ d dlmZ d dlmZmZmZ d dl m!Z!m"Z" d d	l#m$Z$ d d
l%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+m,Z, d dl-m.Z. d dl/m0Z0 erd dlZd dl1Z1d dl2m3Z3 d dl%m4Z4  ed          Z5dZ6 ej7        e8          Z9da:d Z; G d de	          Z< G d de)          Z= G d de"          Z> e j?        dddg          Z@ G d de!          ZAdS )    N)
TYPE_CHECKINGAnyDictIteratorListMappingOptionalTupleTypeVarUnion)is_object_dtype	is_scalaris_string_dtype)TENSOR_COLUMN_NAME)_should_convert_to_tensor)convert_to_numpy)row_reprrow_repr_prettyrow_str)TableBlockAccessorTableBlockBuilder)is_null)BlockBlockAccessorBlockColumnBlockColumnAccessorBlockExecStats	BlockTypeU)DataContext)Expr)SortKeyBlockMetadataWithSchemaT   c                  *    t           dd l} | a t           S Nr   )_pandaspandasr*   s    s/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/data/_internal/pandas_block.pylazy_import_pandasr-   7   s    N    c                       e Zd ZdZdefdZdeeee         f         defdZ	de
fdZd Zdeeef         fd	Zd
 Zd Zd ZdS )	PandasRowzF
    Row of a tabular Dataset backed by a Pandas DataFrame block.
    rowc                     || _         d S N)_row)selfr1   s     r,   __init__zPandasRow.__init__E   s    			r.   keyreturnc                      ddl m dt          t                   dt          f fd}t          |t                    }|r|gn|} ||          }|d S |r|d         S |S )Nr   TensorArrayElementkeysr8   c                 ~   j         |          }t          |          dk    rd S |j        d         }t          |j        d                   rt	          d |D                       S 	 t	          d |D                       S # t
          t          f$ r,}t                              d| d|           |cY d }~S d }~ww xY w)Nr   c              3   >   K   | ]}|                                 V  d S r3   to_numpy.0items     r,   	<genexpr>z:PandasRow.__getitem__.<locals>.get_item.<locals>.<genexpr>T   s*      ??T]]__??????r.   c              3      K   | ]}|V  d S r3    rA   s     r,   rD   z:PandasRow.__getitem__.<locals>.get_item.<locals>.<genexpr>Y   s"      44dT444444r.   zFailed to convert z to a tuple)exc_info)	r4   leniloc
isinstancetupleAttributeError
ValueErrorloggerwarning)r<   colitemser;   r5   s       r,   get_itemz'PandasRow.__getitem__.<locals>.get_itemK   s    )D/C3xx1}}tHQKE%*Q-);<< @ ????????	 44e444444"J/   FEFFFQRSSS 	s   &A? ?B<!B71B<7B<)ray.data.extensionsr;   r   strr   rJ   )r5   r7   rS   is_single_itemr<   rQ   r;   s   `     @r,   __getitem__zPandasRow.__getitem__H   s    ::::::	49 	 	 	 	 	 	 	 	, $C--&/uuC=4 	8OLr.   c              #   0   K   | j         j        D ]}|V  d S r3   )r4   columns)r5   ks     r,   __iter__zPandasRow.__iter__n   s/      " 	 	AGGGG	 	r.   c                 &    | j         j        d         S )N   )r4   shaper5   s    r,   __len__zPandasRow.__len__r   s    yq!!r.   c                     i }|                                  D ]-\  }}t          |          r|t          j        u rd ||<   (|||<   .|S r3   )rQ   r   pdNA)r5   pydictr7   values       r,   	as_pydictzPandasRow.as_pydictu   sY    !#**,, 	$ 	$JC  $ERUNN"s#sr.   c                      t          |           S r3   )r   r_   s    r,   __str__zPandasRow.__str__   s    t}}r.   c                      t          |           S r3   )r   r_   s    r,   __repr__zPandasRow.__repr__   s    ~~r.   c                 $    t          | ||          S r3   )r   )r5   pcycles      r,   _repr_pretty_zPandasRow._repr_pretty_   s    tQ...r.   N)__name__
__module____qualname____doc__r   r6   r   rU   r   rW   r   r[   r`   r   rf   rh   rj   rn   rF   r.   r,   r0   r0   @   s         C    $uS$s)^4 $ $ $ $ $L(    " " "4S>        / / / / /r.   r0   c            
           e Zd Zd  fdZdddededee         fd	Zdddededee         fd
Zdddededee         fdZ	dddededee         fdZ
dddededee         fdZdddedededee         fdZdeeeef                  fdZdefdZdefdZdefdZdefdZ	 	 d!dedee         dedee         fdZdee         fdZd"dedej        fdZdeee         df         fdZd ZdefdZ  xZ!S )#PandasBlockColumnAccessorrP   pandas.Seriesc                 J    t                                          |           d S r3   superr6   )r5   rP   	__class__s     r,   r6   z"PandasBlockColumnAccessor.__init__   s!    r.   T)as_pyignore_nullsrz   r8   c                `    |r| j                                         nt          | j                   S r3   )_columncountrH   r5   r{   rz   s      r,   r~   zPandasBlockColumnAccessor.count   s*    '3Jt|!!###T\9J9JJr.   c                f    |                                  rd S | j                            |d          S )Nr]   )skipna	min_count)_is_all_nullr}   sumr   s      r,   r   zPandasBlockColumnAccessor.sum   s:      	4
 ||qAAAr.   c                d    |                                  rd S | j                            |          S Nr   )r   r}   minr   s      r,   r   zPandasBlockColumnAccessor.min   6      	4||444r.   c                d    |                                  rd S | j                            |          S r   )r   r}   maxr   s      r,   r   zPandasBlockColumnAccessor.max   r   r.   c                    |                      |          }t          |          s||                     |          z  n|S )Nr{   )r   r   r~   )r5   r{   rz   sum_s       r,   meanzPandasBlockColumnAccessor.mean   sG     xx\x22 AHWD4::<:8888SW	
r.   qc                8    | j                             |          S )N)r   )r}   quantile)r5   r   r{   rz   s       r,   r   z"PandasBlockColumnAccessor.quantile   s     |$$q$)))r.   c                     | j                                         }t          |          dk    rd S |j                                        |j                                        dS )Nr   )valuescounts)r}   value_countsrH   indextolistr   )r5   r   s     r,   r   z&PandasBlockColumnAccessor.value_counts   s`    |0022|!!4"(//11")0022
 
 	
r.   c                    ddl m} t          d | j        D             d           }t	          ||          r | j                            d           | _        dd l}|                    | j                                                  }|	                                
                    |j        d          }|                                S )Nr   r:   c              3      K   | ]}||V  	d S r3   rF   rB   xs     r,   rD   z1PandasBlockColumnAccessor.hash.<locals>.<genexpr>   "      HHQ!-q----HHr.   c                 *    |                                  S r3   r?   r   s    r,   <lambda>z0PandasBlockColumnAccessor.hash.<locals>.<lambda>   s    

 r.   T)wrap_numerical)%ray.air.util.tensor_extensions.pandasr;   nextr}   rJ   applypolarsfrom_pandasto_frame	hash_rowscastInt64	to_pandas)r5   r;   first_non_nullpldfhashess         r,   hashzPandasBlockColumnAccessor.hash   s    LLLLLLHH$,HHH$OOn&899 	F<--.D.DEEDL^^DL113344$$RXd$CC!!!r.   c                    t                      }	 |                                 r| j                            d           }n| j        } |j        |                                          S # t          $ rU}dt          |          v r> |j        | j                                                                                  cY d }~S  d }~ww xY w)Nc                 (    | | nt          |           S r3   )rK   )ls    r,   r   z2PandasBlockColumnAccessor.unique.<locals>.<lambda>   s    aiU1XX r.   z buffer source array is read-only)	r-   is_composed_of_listsr}   mapSeriesuniquerM   rU   copy)r5   rb   rP   rR   s       r,   r   z PandasBlockColumnAccessor.unique   s    !!	((** # l&&'M'MNNl29SZZ\\*** 	 	 	1SVV;; !ry!2!2!4!4!;!;!=!=>>>>>>>>	s%   AA) )
C3A	C<CCCc                 H   ddl m t          d | j        D             d           }t	          |          s| j        }n| j                            fd          }|                                 r|                    d           }||         }|                    d          S )Nr   r:   c              3      K   | ]}||V  	d S r3   rF   r   s     r,   rD   z4PandasBlockColumnAccessor.flatten.<locals>.<genexpr>   r   r.   c                 P    t          |           r|                                 n| S r3   )rJ   r@   )r   r;   s    r,   r   z3PandasBlockColumnAccessor.flatten.<locals>.<lambda>   s$    *Q8J*K*KR!**,,,QR r.   c                 0    | d uot          |           dk    S r(   )rH   r   s    r,   r   z3PandasBlockColumnAccessor.flatten.<locals>.<lambda>   s    !4-*FCFFQJ r.   Tignore_index)r   r;   r   r}   rJ   r   r   explode)r5   r   columnmaskr;   s       @r,   flattenz!PandasBlockColumnAccessor.flatten   s    LLLLLLHH$,HHH$OO.*<== 	\FF\''RRRR F $$&& 	"<< F FGGDD\F~~4~000r.   c                 4    | j                                         S r3   )r}   dropnar_   s    r,   r   z PandasBlockColumnAccessor.dropna   s    |""$$$r.   Nr   c                     ||                      |          }t          |          r|S | j        |z
  dz                      |          S )Nr      r   )r   r   r}   r   )r5   r{   r   rz   s       r,   sum_of_squared_diffs_from_meanz8PandasBlockColumnAccessor.sum_of_squared_diffs_from_mean   sR     <99,977D4== 	K$*//|/DDDr.   c                 4    | j                                         S r3   )r}   to_listr_   s    r,   	to_pylistz#PandasBlockColumnAccessor.to_pylist  s    |##%%%r.   Fzero_copy_onlyc                 :    | j                             |           S )zqNOTE: Unlike Arrow, specifying `zero_copy_only=True` isn't a guarantee
        that no copy will be made
        r   )r}   r@   )r5   r   s     r,   r@   z"PandasBlockColumnAccessor.to_numpy  s     
 |$$n*<$===r.   zpyarrow.Arrayc                 *    |                                  S r3   )r   r_   s    r,   _as_arrow_compatiblez.PandasBlockColumnAccessor._as_arrow_compatible  s    ~~r.   c                 Z    | j                                                                          S r3   )r}   notnaanyr_   s    r,   r   z&PandasBlockColumnAccessor._is_all_null  s%    <%%''++----r.   c                     ddl m} t          t          j        |f}t          d | j        D             d           }t          ||          S )Nr   r:   c              3      K   | ]}||V  	d S r3   rF   r   s     r,   rD   zAPandasBlockColumnAccessor.is_composed_of_lists.<locals>.<genexpr>"  r   r.   )r   r;   listnpndarrayr   r}   rJ   )r5   r;   typesr   s       r,   r   z.PandasBlockColumnAccessor.is_composed_of_lists  sS    LLLLLLrz#56HH$,HHH$OO.%000r.   )rP   ru   NTF)"ro   rp   rq   r6   boolr	   r   r~   r   r   r   r   floatr   r   rU   r   r   r   r   r   r   r   r   r   r   r   r   r@   r   r   r   r   __classcell__ry   s   @r,   rt   rt      sJ             :> K K KT K$ K(1+ K K K K 8< 	B 	B 	B4 	B 	B 	B 	B 	B 	B 8< 5 5 54 5 5 5 5 5 5 8< 5 5 54 5 5 5 5 5 5 9= 
 
 
D 
 
! 
 
 
 
 >B* * **)-*6:*	!* * * *

htCI7 
 
 
 
"k " " " "    ,1 1 1 1 1&% % % % % !	E EE qkE 	E
 
!E E E E&49 & & & &> >t >
 > > > > eDI,F&G        . . .1d 1 1 1 1 1 1 1 1r.   rt   c                        e Zd Z fdZedeeee         f         ddfd            Z	eded         ddfd            Z
edefd            Zedd	            Zdefd
Z xZS )PandasBlockBuilderc                 p    t                      }t                                          |j                   d S r3   )r-   rx   r6   	DataFrame)r5   r*   ry   s     r,   r6   zPandasBlockBuilder.__init__'  s/    #%%)*****r.   rY   r8   pandas.DataFramec                     ddl m t                      } |j        fd|                                 D                       S )Nr   TensorArrayc                     i | ]C\  }}|t          |          d k    r(t          ||          r t          |                    n|DS r   )rH   r   r   )rB   column_namecolumn_valuesr   s      r,   
<dictcomp>z9PandasBlockBuilder._table_from_pydict.<locals>.<dictcomp>2  st        /K =))A--1-MM .  K 0 ? ?@@@ '  r.   )$ray.data.extensions.tensor_extensionr   r-   r   rQ   )rY   r*   r   s     @r,   _table_from_pydictz%PandasBlockBuilder._table_from_pydict+  sg    DDDDDD#%%v    3:--//  

 

 
	
r.   tablesc                    t                      }ddlm} t          |           dk    r* |j        | d          }|                    dd           n| d         }t          j                    }|j        r ||          }|S )Nr   ))_cast_ndarray_columns_to_tensor_extensionr]   Tr   dropinplace)	r-   "ray.air.util.data_batch_conversionr   rH   concatreset_indexr    get_currentenable_tensor_extension_casting)r   r*   r   r   ctxs        r,   _combine_tablesz"PandasBlockBuilder._combine_tables=  s    #%%	
 	
 	
 	
 	
 	
 v;;??vD999BNNdN3333B%''. 	?::2>>B	r.   c                      dS r   rF   rF   r.   r,   _concat_would_copyz%PandasBlockBuilder._concat_would_copyP  s    tr.   c                  <    t                      }  | j                    S r3   )r-   r   r+   s    r,   _empty_tablezPandasBlockBuilder._empty_tableT  s    #%%v!!!r.   c                     t           j        S r3   r   PANDASr_   s    r,   
block_typezPandasBlockBuilder.block_typeY      r.   r8   r   )ro   rp   rq   r6   staticmethodr   rU   r   r   r   r   r   r   r   r   r  r   r   s   @r,   r   r   &  s       + + + + + 
Dd3i$8 
=O 
 
 
 \
" %7 8 =O    \$     \ " " " \" I                r.   r   PandasBlockSchemanamesr   c            	           e Zd ZeZd; fdZdedefdZdee	         fdZ
de	d	edefd
Zedededej        fd            Zd<dedededdfdZdee         ddfdZdee	         defdZdee	         ddfdZdee	e	f         ddfdZde	deddfdZdee         ddfdZdefdZd=d Z	 d>dee e	ee	         f                  de ej        ee	ej        f         f         fd"Z!d?d$Z"defd%Z#defd&Z$d'e%ddfd(Z&ede'fd)            Z(ed=d*            Z)d+ed,d-ddfd.Z*d@d/Z+d0ee,         d,d-dee         fd1Z-ed2ee         d,d-de.ed3f         fd4            Z/de0fd5Z1d6ede2e e3ej        f                  fd7Z4dAd:Z5 xZ6S )BPandasBlockAccessortabler   c                 J    t                                          |           d S r3   rw   )r5   r
  ry   s     r,   r6   zPandasBlockAccessor.__init__e  s!    r.   r   r8   c                 V    |                      ||dz   d          }t          |          S )Nr]   Fr   )slicer0   )r5   r   base_rows      r,   _get_rowzPandasBlockAccessor._get_rowh  s+    ::eUQYU:;;"""r.   c                 >    | j         j                                        S r3   )_tablerY   r   r_   s    r,   column_namesz PandasBlockAccessor.column_namesl  s    {"))+++r.   namere   c                     t          |t          j        t          j        f          r|                     ||          S  | j        j        di ||iS )NrF   )rJ   rb   r   r   r   upsert_columnr  assign)r5   r  re   s      r,   fill_columnzPandasBlockAccessor.fill_columno  sR    ebi455 	3%%dE222!t{!22T5M222r.   r1   row_idxc                     ddl m} | t                   j        |         }t	          ||          r|                                }|S )Nr   r:   )rT   r;   r   rI   rJ   r@   )r1   r  r;   tensors       r,   _build_tensor_rowz%PandasBlockAccessor._build_tensor_rowv  sP    ::::::'(-g6f011 	' __&&Fr.   Fstartendr   c                     | j         ||         }|                    dd           |r|                    d          }|S )NTr   deep)r  r   r   )r5   r  r  r   views        r,   r  zPandasBlockAccessor.slice  sK    {59%dD111 	(99$9''Dr.   indicesc                 h    | j                             |          }|                    dd           |S )NTr   )r  taker   )r5   r"  r
  s      r,   r$  zPandasBlockAccessor.take  s5      ))tT222r.   rY   c                 :    | j                             |d          S )NrY   )axis)r  r   r5   rY   s     r,   r   zPandasBlockAccessor.drop  s    {i888r.   c                 t    t          d |D                       st          d| d          | j        |         S )Nc              3   @   K   | ]}t          |t                    V  d S r3   rJ   rU   )rB   rP   s     r,   rD   z-PandasBlockAccessor.select.<locals>.<genexpr>  s,      ;;C:c3'';;;;;;r.   zZColumns must be a list of column name strings when aggregating on Pandas blocks, but got: .)allrM   r  r'  s     r,   selectzPandasBlockAccessor.select  sY    ;;7;;;;; 	6+26 6 6   {7##r.   columns_renamec                 <    | j                             |dd          S )NF)rY   r   r   )r  rename)r5   r.  s     r,   rename_columnsz"PandasBlockAccessor.rename_columns  s    {!!.%e!TTTr.   r   column_datac                     dd l }t          ||j        |j        f          r|                                } | j        j        di ||iS )Nr   rF   )pyarrowrJ   ArrayChunkedArrayr   r  r  )r5   r   r2  r4  s       r,   r  z!PandasBlockAccessor.upsert_column  sZ     	kGM73G#HII 	2%//11K!t{!??[+$>???r.   random_seedc                 l    | j                             d|          }|                    dd           |S )Nr]   )fracrandom_stateTr   )r  sampler   )r5   r7  r
  s      r,   random_shufflez"PandasBlockAccessor.random_shuffle  s:    """DDtT222r.   c                    | j         j        }t          |j                                        |j                                                  }t          d |j        D                       rt          d|j        d          |S )N)r  r   c              3   B   K   | ]}t          |t                     V  d S r3   r*  )rB   r  s     r,   rD   z-PandasBlockAccessor.schema.<locals>.<genexpr>  s/      BBT:dC(((BBBBBBr.   zwA Pandas DataFrame with column names of non-str types is not supported by Ray Dataset. Column names of this DataFrame: r+  )	r  dtypesr  r   r   r   r   r  rM   )r5   r?  schemas      r,   r@  zPandasBlockAccessor.schema  s    #",%%''v}/C/C/E/E
 
 

 BBV\BBBBB 	1%|1 1 1  
 r.   c                 j    ddl m} t          j                    }| j        }|j        r ||          }|S )Nr   ) _cast_tensor_columns_to_ndarrays)r   rB  r    r   r  r   )r5   rB  r   r
  s       r,   r   zPandasBlockAccessor.to_pandas  sI    WWWWWW%''. 	<44U;;Er.   Nc                    |!| j         j                                        }d}nt          |t                    rd}n|g}d}t          | j         j                  }|D ]7}||vr1t          d| d| j         j                                                   8g }|D ]4}|                    | j         |                                                    5|r	|d         }nt          t          ||                    }|S )NFTzCannot find column z, available columns: r   )r  rY   r   rJ   r   setrM   appendr@   dictzip)r5   rY   should_be_single_ndarraycolumn_names_setr   arrayss         r,   r@   zPandasBlockAccessor.to_numpy  s1    ?k)0022G',$$&& 	,',$$iG'+$t{233 	 	F--- 6& 6 6{*11336 6   .  	: 	:FMM$+f-66889999# 	0AYFF#gv..//Fr.   pyarrow.Tablec                    dd l }ddlm} |j                            | j        d          }i }t          | j        j                  D ]\  }}| j        |         }t          |j	        |          r(|
                                                                s;|                    t          |          |                                          |||f<   |                                D ]\  \  }}}|                    |||          } |S )Nr   )TensorDtypeF)preserve_index)type)r4  r   rM  Tabler   r  	enumeraterY   rJ   dtyper   r   nullsrH   nullrQ   
set_column)	r5   parM  arrow_tablenull_coerced_columnsidxcol_namerP   null_cols	            r,   to_arrowzPandasBlockAccessor.to_arrow  s#   EEEEEE h**4;u*MM  "&t{':;; 	 	MC+h'C #)[11 99;;??$$ 8:HH27799 9A 9 9$c8_5 *>)C)C)E)E 	J 	J%OS(X%00hIIKKr.   c                 &    | j         j        d         S r(   )r  r^   r_   s    r,   num_rowszPandasBlockAccessor.num_rows  s    { ##r.   c                    ddl m} ddlmm} t                      fd| j                            dd          }|f}t          }| j        j	        D ]q}| j        |         j
        }t          |          s t          |          st          ||          r-t          | j        |                   }t          ||          }	|	dk    ru| j        |                             |	          j        }
	 t          |
|          r2t%          j        |
d         j        t$          j                  r|
j        }n4t%          j        fd	          }t%          j         ||
                    }|||	z  z  }||xx         t3          |          z  cc<   9# t4          $ r+}t6                              d
| d|            Y d }~id }~ww xY ws|                                }t3          |          S )Nr   r   )r;   rM  c                 N   t                      }d}t          j        | g          }|r|                                }t	          |t
          t          t          t          f          rt          j
        |          }||z  }Yt          |          |v rk|                    t          |                     	 t          j
        |          }n# t          $ r d}Y nw xY w||z  }t	          |t          j                  r||j        |z
  z  }nt	          |j                  r0||                    dd                                          |z
  z  }nt	          |t(          t*          t           f          r|                    |           nt	          |t.                    rO|                    |                                           |                    |                                           n7t	          |          r'|                    |                                           ||S )zhCalculates the memory size of objects,
            including nested objects using an iterative approach.r   Tr   r   )rD  collectionsdequepoprJ   rU   bytesintr   sys	getsizeofidadd	TypeErrorr   r   nbytesr   memory_usager   r   rK   extendrF  r<   r   r@   )objseen
total_sizeobjectscurrentsizer;   rb   s         r,   get_deep_sizez5PandasBlockAccessor.size_bytes.<locals>.get_deep_size  s    55DJ!'..G #7!++-- gUC'?@@ =11D$&J
 g;;$&&G%%%=11DD    DDDd"
 grz22 7'.4"77JJ66 
7,,4d,CCGGIIDPJJ  $s);<< 7NN7++++.. 7NN7<<>>222NN7>>#3#34444);<< 7NN7#3#3#5#5666G  #7H s   5C
 
CCTFra  )nc                      |           S r3   rF   )r   ru  s    r,   r   z0PandasBlockAccessor.size_bytes.<locals>.<lambda>`  s    mmTUFVFV r.   z#Error calculating size for column 'z': )r   r   rT   r;   rM  r-   r  rm  #_PANDAS_SIZE_BYTES_MAX_SAMPLE_COUNTrY   rR  r   r   rJ   rH   r   r;  r   r   
issubdtypenumpy_dtypenumberrl  	vectorizer   rf  	ExceptionrN   rO   )r5   r   rM  rm  object_need_checkmax_sample_countr   rR  rq  sample_sizesampled_datacolumn_memory_samplevectorized_size_calccolumn_memoryrR   total_memory_usager;   ru  rb   s                   @@@r,   
size_byteszPandasBlockAccessor.size_bytes  sw   EEEEEEGGGGGGGG!!*	 *	 *	 *	 *	 *	^ {//d/GG )N> k) #	Y #	YF K'-E&&Y"5))Y e%677Y
 !V!455
 "*.>??!###{6299K9HHOY!,<< $Q3RYB B  0</B,,/1|<V<V<V<V/W/W,/1v00>>0 0, %9J<T$UM (((C,>,>>((((  Y Y YNN#W#W#WTU#W#WXXXXXXXXY;YD *--//%&&&s   4BF
G GGaccc                 .   |                                                      d          }|                                 }|j        D ]P}||         }t          |j                  }||v r)d}|}||v rd                    ||          }|dz  }||v |}|||<   Q|S )NFr  r]   z{}_{})r   r   rY   r   format)	r5   r  rsrZ  rP   r  inew_names	            r,   _zipzPandasBlockAccessor._zipq  s    NN!!u!--MMOO	 	 	HH+C	??L<''#,..&~~h::HFA ,.. $AhKKr.   c                      t                      S r3   )r   rF   r.   r,   builderzPandasBlockAccessor.builder  s    !###r.   c                  4    t                                           S r3   )r   r   rF   r.   r,   r   z PandasBlockAccessor._empty_table  s    !..000r.   	n_samplessort_keyr"   c                 j    | j         |                                                             |d          S )NTr   )r  get_columnsr;  )r5   r  r  s      r,   _samplezPandasBlockAccessor._sample  s/    {8//11299)RV9WWWr.   c                 $   |                                 s J d|                                  d            | j        j        d         dk    r|                                 S |                                \  }}| j                            ||          S )Nz'Sorting columns couldn't be empty (got )r   by	ascending)r  r  r^   r   to_pandas_sort_argssort_values)r5   r  rY   r  s       r,   sortzPandasBlockAccessor.sort  s      ""	O 	ONX5I5I5K5KNNN	O 	O" ;Q1$$$$&&&%99;;{&&'Y&GGGr.   
boundariesc                 "                          |          }|j        d         dk    r+ fdt          t          |          dz             D             S t          |          dk    r|gS t	          j        |                              ||          S )Nr   c                 8    g | ]}                                 S rF   )r   )rB   _r5   s     r,   
<listcomp>z:PandasBlockAccessor.sort_and_partition.<locals>.<listcomp>  s%    LLLAD%%''LLLr.   r]   )r  r^   rangerH   r   	for_block_find_partitions_sorted)r5   r  r  r
  s   `   r,   sort_and_partitionz&PandasBlockAccessor.sort_and_partition  s     		(##;q>Q MLLLs:7J1K1KLLLL__!!7N&u--EE
 
 	
r.   blocksr$   c                    t                      }t          j                    }d | D             } t          |           dk    rt                                          }n_t          j        | t          j	                  }  |j
        | d          }|                                \  }}|                    ||          }ddlm} | |j        ||                                          fS )Nc                 4    g | ]}|j         d          d k    |S r   )r^   )rB   bs     r,   r  z;PandasBlockAccessor.merge_sorted_blocks.<locals>.<listcomp>  s#    666qwqzA~~!~~~r.   r   Tr   r  r#   )stats)r-   r   r  rH   r	  r   r   normalize_block_typesr   r  r   r  r  ray.data.blockr$   
from_blockbuild)r  r  rb   r  retrY   r  r$   s           r,   merge_sorted_blocksz'PandasBlockAccessor.merge_sorted_blocks  s      !!&((66V666v;;!%2244CC (=fiFVWWF")F666C!)!=!=!?!?GY//W	/BBC::::::6+6s%++--PPPPPr.   c                     t           j        S r3   r   r_   s    r,   r  zPandasBlockAccessor.block_type  r  r.   public_row_formatc              #      K   |                                  }t          |          D ]4}|                     |          }|r|                                V  0|V  5d S r3   )r^  r  r  rf   )r5   r  r^  r  r1   s        r,   	iter_rowszPandasBlockAccessor.iter_rows  so       ==??x 	 	A--""C  mmoo%%%%					 	r.   predicate_exprr!   c                 p    | j         j        r| j         S ddlm}  ||| j                   }| j         |         S )z,Filter rows based on a predicate expression.r   )	eval_expr)r  empty?ray.data._internal.planner.plan_expression.expression_evaluatorr  )r5   r  r  r   s       r,   filterzPandasBlockAccessor.filter  sY    ; 	;	
 	
 	
 	
 	
 	

 y55 {4  r.   )r
  r   r   r  r3   )r8   rK  )r  r"   )r  r!   r8   r   )7ro   rp   rq   r0   ROW_TYPEr6   rf  r  r   rU   r  r   r   r  r  r   r   r  r   r  r$  r   r-  r   r1  r   r  r	   r<  r  r@  r   r   r@   r\  r^  r  r   r  r   r  r   r  r  r%   r  r
   r  r   r  r   r   r  r  r   r   s   @r,   r	  r	  b  s       H           #c #i # # # #,d3i , , , ,3 3C 3E 3 3 3 3 y 3 2:    \ 3 S  AS    DI *<    
9DI 9% 9 9 9 9$d3i $,> $ $ $ $UT#s(^ U@R U U U U@@-8@	@ @ @ @(3- <N    
)        :> c49n 56	rz4RZ00	1   <& & & &P$# $ $ $ $d'C d' d' d' d'L *<    " $' $ $ $ \$ 1 1 1 \1X X	 X>P X X X X	H 	H 	H 	H
q'
-6
	e
 
 
 
  QUQ'0Q	u//	0Q Q Q \Q$ I        	!%		%+,	-	 	 	 	! ! ! ! ! ! ! !r.   r	  )Brb  loggingrg  typingr   r   r   r   r   r   r	   r
   r   r   numpyr   r*   rb   pandas.api.typesr   r   r   ray.air.constantsr   $ray.air.util.tensor_extensions.utilsr    ray.data._internal.numpy_supportr   ray.data._internal.rowr   r   r   ray.data._internal.table_blockr   r   ray.data._internal.utilr   r  r   r   r   r   r   r   r   ray.data.contextr    ray.data.expressionsr!   r4  2ray.data._internal.planner.exchange.sort_task_specr"   r$   r%   rx  	getLoggerro   rN   r)   r-   r0   rt   r   
namedtupler  r	  rF   r.   r,   <module>r     s7        



                                H H H H H H H H H H 0 0 0 0 0 0 J J J J J J = = = = = = E E E E E E E E E E P P P P P P P P + + + + + +                  ) ( ( ( ( ( % % % % % % 7MMMNNNJJJJJJ666666GCLL&) #		8	$	$
  I/ I/ I/ I/ I/ I/ I/ I/XW1 W1 W1 W1 W1 3 W1 W1 W1t4  4  4  4  4 * 4  4  4 r +K*+>'@RSS t! t! t! t! t!, t! t! t! t! t!r.   