
    &`ib                         d dl Z d dlZd dlmZ d dlmZmZmZmZm	Z	 d dl
mZ d dlmZ d dlmZmZ  e j        e          Z G d de          Z G d	 d
e          ZdS )    N)Enum)DictIteratorListOptionalTuple)_check_import)BlockMetadata)
DatasourceReadTaskc                   >    e Zd ZdZdZedee         fd            ZdS )HudiQueryTypesnapshotincrementalreturnc                     d | D             S )Nc                     g | ]	}|j         
S  )value).0es     /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/data/_internal/datasource/hudi_datasource.py
<listcomp>z1HudiQueryType.supported_types.<locals>.<listcomp>   s    %%%A%%%    r   )clss    r   supported_typeszHudiQueryType.supported_types   s    %%%%%%r   N)	__name__
__module____qualname__SNAPSHOTINCREMENTALclassmethodr   strr   r   r   r   r   r      sG        HK&S	 & & & [& & &r   r   c                       e Zd ZdZ	 	 	 ddededeeeeeef                           deeeef                  deeeef                  f
dZ		 dd	e
d
ee
         ded         fdZdee
         fdZdS )HudiDatasourcez/Hudi datasource, for reading Apache Hudi table.N	table_uri
query_typefiltershudi_optionsstorage_optionsc                     t          | dd           || _        t          |                                          | _        |pg | _        |pi | _        |pi | _        d S )Nhudizhudi-python)modulepackage)r	   
_table_urir   lower_query_type_filters_hudi_options_storage_options)selfr&   r'   r(   r)   r*   s         r   __init__zHudiDatasource.__init__   sg     	d6=AAAA#()9)9););<<2)/R / 52r   parallelismper_task_row_limitr   r   c           	      \    dd l }dd lddlm} dt          dt
          t                   dt          t          t          f         dt          d         ffd|                     j	                  
                     j                                       j                                      d	d
                                          }t                               d j	                    j        t&          j        k    r|                    | j                  }n j        t&          j        k    ra j                            d          } j                            d          }|                    ||          }	|                    |	|          }n2t7          d j         dt&                                           d          t                               d j	                   i |                                |                                |                                }
g }|D ]}d}g }g }d}|D ]y}||j         z  }|!                                }|"                    |           tF          j$        %                     j	        |          }|"                    |           ||j&        z  }z j        t&          j        k    rtO          |||d           }n( j        t&          j        k    rtO          d |d d           }tQ          |f fd	||
|          }|"                    |           	|S )Nr   )HudiTableBuilderr&   base_file_pathsoptionsr   zpyarrow.Tablec              3      K   ddl m} |D ]@} || |          }|                    |          }j                            |g          V  Ad S )Nr   )HudiFileGroupReader)r,   r>   !read_file_slice_by_base_file_pathTablefrom_batches)r&   r;   r<   r>   pfile_group_readerbatchpyarrows          r   _perform_readz4HudiDatasource.get_read_tasks.<locals>._perform_read0   s{      
 100000$ : :$7$7	7$K$K!)KKANNm00%999999: :r   z#hoodie.read.use.read_optimized.modetruez,Collecting file slices for Hudi table at: %sz&hoodie.read.file_group.start_timestampz$hoodie.read.file_group.end_timestampzUnsupported query type: z. Supported types are: .z)Creating read tasks for Hudi table at: %s)num_rowsinput_files
size_bytes
exec_statsc                 (     j         |           S N)r/   )pathsrF   reader_optionsr5   s    r   <lambda>z/HudiDatasource.get_read_tasks.<locals>.<lambda>   s    ]]OUN6 6 r   )read_fnmetadataschemar8   ))numpyrE   r,   r:   r#   r   r   r   from_base_urir/   with_hudi_optionsr3   with_storage_optionsr4   with_hudi_optionbuildloggerinfor1   r   r    get_file_slices_splitsr2   r!   getget_file_slices_betweenarray_split
ValueErrorr   r*   r)   
get_schemanum_recordsbase_file_relative_pathappendospathjoinbase_file_sizer
   r   )r5   r7   r8   npr:   
hudi_tablefile_slices_splitsstart_tsend_tsfile_slicesrT   
read_tasksfile_slices_splitrI   relative_pathsrJ   rK   
file_slicerelative_path	full_pathrS   	read_taskrF   rE   rP   s   `                     @@@r   get_read_taskszHudiDatasource.get_read_tasks)   s    	))))))
	:
	:!#Y
	: #s(^
	: o&	
	: 
	: 
	: 
	: 
	: 
	: **4?;;t122!!$"788 CVLLUWW 	 	BDOTTT}555!+!B!BT]" " !:::)--.VWWH'++,RSSF$<<XvNNK!#[!I!Iv4+;vvTaTqTqTsTsvvv   	?QQQ
((**
%%''

 &&((
!3 *	) *	)HNKJ/ 
8 
8

 J22 * B B D D%%m444GLL-HH	""9---j77

=#999(% +)#	   !]%>>> )! +##	   !%3        "#5  I i((((r   c                     d S rN   r   )r5   s    r   estimate_inmemory_data_sizez*HudiDatasource.estimate_inmemory_data_size   s    tr   )NNNrN   )r   r   r   __doc__r#   r   r   r   r   r6   intrw   ry   r   r   r   r%   r%      s       99 9=15486 66 6 $uS#s]345	6
 tCH~.6 "$sCx.16 6 6 6" EIg gg4<SMg	j	g g g gRXc]      r   r%   )loggingrf   enumr   typingr   r   r   r   r   ray.data._internal.utilr	   ray.data.blockr
   ray.data.datasource.datasourcer   r   	getLoggerr   r[   r   r%   r   r   r   <module>r      s	    				       8 8 8 8 8 8 8 8 8 8 8 8 8 8 1 1 1 1 1 1 ( ( ( ( ( ( ? ? ? ? ? ? ? ?		8	$	$& & & & &D & & &~ ~ ~ ~ ~Z ~ ~ ~ ~ ~r   