
    Pi                         d dl Z d dlZd dlmZmZmZ d dlZd dlmZ	 ddl
mZmZmZmZ ddlmZmZ ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZmZ ddlmZ  G d de          Z  G d d          Z!dS )    N)BinaryIOOptionalUnion   )DatasetFeatures
NamedSplitconfig)$get_writer_batch_size_from_data_size#get_writer_batch_size_from_features)query_table)_PACKAGED_DATASETS_MODULES)Parquet)tqdm)NestedDataStructureLikePathLike   )AbstractDatasetReaderc                        e Zd Z	 	 	 	 	 	 ddee         dee         dee         dede	de	d	ee
         f fd
Zd Z xZS )ParquetDatasetReaderNFpath_or_pathssplitfeatures	cache_dirkeep_in_memory	streamingnum_procc           
           t                      j        |f||||||d| t          |t                    r|n| j        |i}t
          d         d         }	t          d||||	d|| _        d S )N)r   r   r   r   r   r   parquetr   )r   
data_filesr   hash )super__init__
isinstancedictr   r   r   builder)selfr   r   r   r   r   r   r   kwargsr!   	__class__s             g/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/datasets/io/parquet.pyr$   zParquetDatasetReader.__init__   s     			
)		
 		
 		
 		
 		
 *4M4)H)Hitz[hNi))4Q7 
$	
 

 
 
    c                     | j         r!| j                            | j                  }nSd }d }d }d }| j                            ||||| j                   | j                            | j        || j                  }|S )N)r   )download_configdownload_modeverification_mode	base_pathr   )r   r0   	in_memory)r   r'   as_streaming_datasetr   download_and_preparer   
as_datasetr   )r(   datasetr.   r/   r0   r1   s         r+   readzParquetDatasetReader.read2   s    > 	l77dj7IIGG #O M $IL-- /+"3# .    l--j4EQUQd .  G r,   )NNNFFN)__name__
__module____qualname__r   r   r   r	   r   strboolintr$   r7   __classcell__)r*   s   @r+   r   r      s         '+'+$"&
 
.x8
 
#
 8$	

 
 
 
 3-
 
 
 
 
 
>      r,   r   c                       e Zd Z	 	 	 	 ddedeeef         dee         dee	         dee
e	f         de
fd	Zd
efdZdeded
efdZdS )ParquetDatasetWriterNTr6   path_or_buf
batch_sizestorage_optionsuse_content_defined_chunkingwrite_page_indexc                    || _         || _        |pBt          |j                  p.t	          t          |          |                                          | _        |pi | _        || _	        |du rt          j        }|| _        || _        d S )NT)r6   rA   r   r   r   len_estimate_nbytesrB   rC   parquet_writer_kwargsr
   DEFAULT_CDC_OPTIONSrD   rE   )r(   r6   rA   rB   rC   rD   rE   rI   s           r+   r$   zParquetDatasetWriter.__init__K   s     & ^273CDD^3CLL'BZBZB\B\]] 	
  /4"%:"'4//+1+E(,H) 0r,   returnc                 @   t          | j        t          t          t          j        f          rRt          j        | j        dfi | j        pi 5 } | j	        d|| j
        d| j        }d d d            n# 1 swxY w Y   n | j	        d| j        | j
        d| j        }|S )Nwb)file_objrB   r"   )r%   rA   r;   bytesosr   fsspecopenrC   _writerB   rI   )r(   bufferwrittens      r+   writezParquetDatasetWriter.writec   s
   d&eR[(ABB 	T-tTT8L8RPRTT X^%$+ ##  0                "dk )?  , G
 s   A33A7:A7rN   c           	      F   d}|                     dd          }| j        j        j        }t	          j        |f|| j        | j        d|}t          t          dt          | j                  |          dd          D ]X}t          | j        j        t          |||z             | j        j                  }	|                    |	           ||	j        z  }Y| j        d	ur.|                    d
t%          j        | j                  i           |                                 |S )zWrites the pyarrow table as Parquet to a binary file handle.

        Caller is responsible for opening and closing the handle.
        r   rA   N)schemarD   rE   baz"Creating parquet from Arrow format)unitdesc)tablekeyindicesFcontent_defined_chunking)popr6   r   arrow_schemapqParquetWriterrD   rE   hf_tqdmrangerG   r   _dataslice_indiceswrite_tablenbytesadd_key_value_metadatajsondumpsclose)
r(   rN   rB   rI   rU   _rX   writeroffsetbatchs
             r+   rS   zParquetDatasetWriter._writes   sH   
 !%%mT::&3!
)-)J!2	
 

 $
 
 !S&&
335
 
 
 	$ 	$F
  l(&&:"566-  E
 u%%%u|#GG ,E99))+EtzRVRsGtGt*uvvvr,   )NNTT)r8   r9   r:   r   r   r   r   r   r=   r&   r<   r$   rV   rS   r"   r,   r+   r@   r@   J   s        
 %)*.:>!%1 11 8X-.1 SM	1
 "$1 ',D$J&71 1 1 1 10s     #x #S #VY # # # # # #r,   r@   )"rl   rP   typingr   r   r   rQ   pyarrow.parquetr   rb    r   r   r	   r
   arrow_writerr   r   
formattingr   packaged_modulesr    packaged_modules.parquet.parquetr   utilsr   rd   utils.typingr   r   abcr   r   r@   r"   r,   r+   <module>r}      sf    				 , , , , , , , , , ,        4 4 4 4 4 4 4 4 4 4 4 4 d d d d d d d d $ $ $ $ $ $ 9 9 9 9 9 9 6 6 6 6 6 6 # # # # # # < < < < < < < < & & & & & &5 5 5 5 50 5 5 5pL L L L L L L L L Lr,   