
    &`i!                        U d dl Z d dlmZmZmZmZmZ d dlmZ d dl	m
Z
 d dlmZmZmZ d dlmZ d dlmZmZ erd dlZdZee         ed<   	 d d	lmZ d
e j        vr e            rd dlZd dlZd dlZd dlZd dlm Z   e ej!        "                    d                    Z# e d          Z$e#e$k     rej%        &                    ej'        (                                d          Z)ej*        +                    d
e)          Z,ej*        -                    e,          Z.e.e j        e,j/        <   e,j0        1                    e.           n# e$ rZ2e2ZY dZ2[2ndZ2[2ww xY w G d de          Z3dS )    N)TYPE_CHECKINGIterableListOptionalUnion)pyarrow_table_from_pydict)_check_pyarrow_version)BlockBlockAccessorBlockMetadata)Dataset)
DatasourceReadTaskTRANSFORMERS_IMPORT_ERROR)is_datasets_availabledatasets_modules)parsedatasetsz4.0.0z__init__.pyc                       e Zd ZdZ	 dded         defdZeded         defd            Z	de
e         fd	Zdee         fd
Z	 ddede
e         dee         fdZdS )HuggingFaceDatasourceah  Hugging Face Dataset datasource, for reading from a
    `Hugging Face Datasets Dataset <https://huggingface.co/docs/datasets/package_reference/main_classes#datasets.Dataset/>`_.
    This Datasource implements a streamed read using a
    single read task, most beneficial for a
    `Hugging Face Datasets IterableDataset <https://huggingface.co/docs/datasets/package_reference/main_classes#datasets.IterableDataset/>`_
    or datasets which are too large to fit in-memory.
    For an in-memory Hugging Face Dataset (`datasets.Dataset`), use :meth:`~ray.data.from_huggingface`
    directly for faster performance.
       dataset)zdatasets.Datasetzdatasets.IterableDataset
batch_sizec                 >    t           t           || _        || _        d S N)r   _dataset_batch_size)selfr   r   s      /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/data/_internal/datasource/huggingface_datasource.py__init__zHuggingFaceDatasource.__init__D   s%    
 %0++%    returnc                    ddl }|j        j        }|j        j        }t	          |j                  }t          ||j                  s:ddl m} 	  ||||          }|j	        |j	        k    rg S n# t          $ r g cY S w xY wddl}d| d| d| }	|                    |	          }
|
j        |j        d         k    r|
                                S g S )	zReturn list of Hugging Face hosted parquet file URLs if they
        exist for the data (i.e. if the dataset is a public dataset that
        has not been transformed) else return an empty list.r   N)load_dataset)splitz$https://huggingface.co/api/datasets/z	/parquet//ok)r   infodataset_nameconfig_namestrr%   
isinstanceIterableDatasetr$   _fingerprint	Exceptionrequestsgetstatus_codecodesjson)clsr   r   r)   r*   
split_namer$   dsr0   
public_urlresps              r   list_parquet_urls_from_datasetz4HuggingFaceDatasource.list_parquet_urls_from_datasetO   s,    	
 |0l.''
 '8#;<< 
	------!\,:NNN?g&:::I ;    			
 	3< 3 3#3 3&03 3 	 ||J''x~d33399;;Is   A. .A=<A=c                     | j         j        S r   )r   dataset_size)r   s    r   estimate_inmemory_data_sizez1HuggingFaceDatasource.estimate_inmemory_data_size~   s    }))r!   c              #     K   dd l }dd l}dd l}| j                            d                              | j                  D ]}t          ||j        |j	        t          |j        f          s t          dt          |           d          t          ||j                  rd|i}t          |t                    rt          |          }t!          j        |                                          }|V  d S )Nr   arrow)r   zBatch format z isn't supported. Only the following batch formats are supported: dict (corresponds to `None` in `dataset.with_format()`), pyarrow.Table, np.array, pd.DataFrame.item)numpypandaspyarrowr   with_formatiterr   r,   Table	DataFramedictarray
ValueErrortypendarrayr   r   	for_block
to_default)r   nppdrC   batchblocks         r   _read_datasetz#HuggingFaceDatasource._read_dataset   s#      	]..w77<<' = 
 
 	 	E egmR\4%RSS  >DKK > > >   %,, (%&& 91%88!+E22==??EKKKK5	 	r!   Nparallelismper_task_row_limitc                 x    t                       t          d d d d           }t          | j        ||          g}|S )N)num_rows
size_bytesinput_files
exec_stats)rU   )r	   r   r   rS   )r   rT   rU   meta
read_taskss        r   get_read_tasksz$HuggingFaceDatasource.get_read_tasks   sa     	    	
 
 
 "#5  &

 r!   )r   r   )__name__
__module____qualname____doc__r   intr    classmethodr   r:   r   r=   r   r
   rS   r   r   r]    r!   r   r   r   9   s         	& 	&EF	& 	& 	& 	& 	& ,JK,	, , , [,\*Xc] * * * *"x " " " "N -1  %SM 
h	     r!   r   )4systypingr   r   r   r   r   $ray.air.util.tensor_extensions.arrowr   ray.data._internal.utilr	   ray.data.blockr
   r   r   ray.data.datasetr   ray.data.datasourcer   r   r   r   ImportError__annotations__transformers.utilsr   modules	importlibimportlib.metadataosdatasets.loadpackaging.versionr   metadataversionDATASETS_VERSION'DATASETS_VERSION_WITHOUT_SCRIPT_SUPPORTpathjoinloadinit_dynamic_modulesdynamic_modules_pathutilspec_from_file_locationspecmodule_from_specr   nameloaderexec_moduleer   rd   r!   r   <module>r      sx   




 A A A A A A A A A A A A A A J J J J J J : : : : : : > > > > > > > > > > $ $ $ $ $ $ 4 4 4 4 4 4 4 4 OOO 48 8K0 7 7 7&" 988888,,1F1F1H1H,!!!!			++++++ !5!3!;!;J!G!GHH27%../EEE#%7<<2244m$ $  >99"$8 D  )~>>tDD%5CK	"K##$4555 " " " !"F F F F FJ F F F F Fs   C3E   EEE