
    &`i                         d dl Z d dlmZmZmZmZmZmZmZ d dl	Z
d dlmZ d dlmZ d dlmZ d dlmZ d dlmZmZ erd dlZ e j        e          Z G d d	e          Zd
ed         fdZd
ed         fdZdS )    N)TYPE_CHECKINGAnyDictIteratorListOptionalUnioncall_with_retry)_check_import)BlockMetadata)DataContext)
DatasourceReadTaskc                      e Zd ZdZdgZdZdZ	 	 	 	 	 ddedee	e
ef                  deee                  d	ee         d
eeeef                  deeeef                  fdZ	 dde
dee
         dee         fdZdee
         fdZdS )LanceDatasourcez,Lance datasource, for reading Lance dataset.zLanceError(IO)
       Nuriversioncolumnsfilterstorage_optionsscanner_optionsc                    t          | dd           dd l}|| _        |pi | _        |
|| j        d<   |
|| j        d<   || _        |                    |||          | _        g }|                    | j                   |                    t          j
                    j                   d|| j        | j        d	| _        d S )
Nlancepylance)modulepackager   r   r   )r   r   r   zread lance fragments)descriptionmatchmax_attemptsmax_backoff_s)r   r   r   r   r   datasetlance_dsextendREAD_FRAGMENTS_ERRORS_TO_RETRYr   get_currentretried_io_errorsREAD_FRAGMENTS_MAX_ATTEMPTS(READ_FRAGMENTS_RETRY_MAX_BACKOFF_SECONDS_retry_params)	selfr   r   r   r   r   r   r   r!   s	            /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/data/_internal/datasource/lance_datasource.py__init__zLanceDatasource.__init__   s     	d7I>>>>.4".5D +-3D *.Wo & 
 
 T8999[,..@AAA1 <!J	
 
    parallelismper_task_row_limitreturnc                    g }| j                             d          }|| j                                        }t	          j        ||          D ]}t          |          dk    rd |D             }t          d |D                       }d |D             }t          |d |d           }	| j         | j        | j	        t          |ffd	|	|d         j        |          }
|                    |
           |S )	N	fragmentsr   c                 &    g | ]}|j         j        S  )metadataid.0fs     r.   
<listcomp>z2LanceDatasource.get_read_tasks.<locals>.<listcomp>K   s    ===aAJM===r0   c              3   >   K   | ]}|                                 V  d S N)
count_rowsr:   s     r.   	<genexpr>z1LanceDatasource.get_read_tasks.<locals>.<genexpr>L   s*      ==a1<<>>======r0   c                 d    g | ]-}|                                 D ]}|                                .S r7   )
data_filespath)r;   r<   	data_files      r.   r=   z2LanceDatasource.get_read_tasks.<locals>.<listcomp>M   sK       %&Q\\^^ 8A	     r0   )num_rows
size_bytesinput_files
exec_statsc                 (    t          |           S r?   )_read_fragments_with_retry)r<   r%   retry_paramsr   s    r.   <lambda>z0LanceDatasource.get_read_tasks.<locals>.<lambda>]   s    'A# 	( ( r0   )schemar2   )r   getr%   get_fragmentsnparray_splitlensumr   r,   r   rN   append)r-   r1   r2   
read_tasksds_fragmentsr5   fragment_idsrF   rH   r8   	read_taskr%   rL   r   s              @@@r.   get_read_taskszLanceDatasource.get_read_tasks?   s]    
+//<<=6688LkBB  	)  	)I9~~""==9===L==9=====H *3  K
 %!'	  H #2O}H-L %         |*#5
 
 
I i((((r0   c                     d S r?   r7   )r-   s    r.   estimate_inmemory_data_sizez+LanceDatasource.estimate_inmemory_data_sizej   s    tr0   )NNNNNr?   )__name__
__module____qualname____doc__r'   r*   r+   strr   r	   intr   r   r   r/   r   rZ   r\   r7   r0   r.   r   r      s*       66 '7%7""$/1,
 .2'+ $4848 
  
 
 %S/* 
 $s)$	 

  
 "$sCx.1 
 "$sCx.1 
  
  
  
F EI) ))4<SM)	h) ) ) )VXc]      r0   r   r3   zpyarrow.Tablec                 ,     t           fdfi |S )Nc                  &    t                     S r?   )_read_fragments)rX   r%   r   s   r.   rM   z,_read_fragments_with_retry.<locals>.<lambda>v   s    hHH r0   r
   )rX   r%   r   rL   s   ``` r.   rK   rK   o   s8     HHHHHH 
  r0   c              #      K   ddl }fd| D             }||d<    j        di |}|                                D ]}|j                            |g          V   dS )zRead Lance fragments in batches.

    NOTE: Use fragment ids, instead of fragments as parameter, because pickling
    LanceFragment is expensive.
    r   Nc                 :    g | ]}                     |          S r7   )get_fragment)r;   r9   r%   s     r.   r=   z#_read_fragments.<locals>.<listcomp>   s'    BBBr&&r**BBBr0   r5   r7   )pyarrowscanner	to_readerTablefrom_batches)rX   r%   r   ri   r5   rj   batchs    `     r.   re   re   {   s       NNNBBBB\BBBI#,OK h1111G""$$ 2 2m((%1111112 2r0   )loggingtypingr   r   r   r   r   r   r	   numpyrQ   ray._common.retryr   ray.data._internal.utilr   ray.data.blockr   ray.data.contextr   ray.data.datasource.datasourcer   r   ri   	getLoggerr]   loggerr   rK   re   r7   r0   r.   <module>ry      sX    L L L L L L L L L L L L L L L L L L     - - - - - - 1 1 1 1 1 1 ( ( ( ( ( ( ( ( ( ( ( ( ? ? ? ? ? ? ? ? NNN 
	8	$	$Y Y Y Y Yj Y Y Yx	
 o	 	 	 	2 o	2 2 2 2 2 2r0   