
    Pi)                         d dl Z d dlZd dlmZmZmZ d dlZddlmZm	Z	m
Z
mZ ddlmZ ddlmZ ddlmZ ddlmZmZ d	d
lmZ  G d de          Z G d d          ZdS )    N)BinaryIOOptionalUnion   )DatasetFeatures
NamedSplitconfig)query_table)Json)tqdm)NestedDataStructureLikePathLike   )AbstractDatasetReaderc                        e Zd Z	 	 	 	 	 	 	 ddee         dee         dee         dede	de	d	ee         d
ee
         f fdZd Z xZS )JsonDatasetReaderNFpath_or_pathssplitfeatures	cache_dirkeep_in_memory	streamingfieldnum_procc	           
           t                      j        |f||||||d|	 || _        t          |t                    r|n| j        |i}t          d||||d|	| _        d S )N)r   r   r   r   r   r   )r   
data_filesr   r    )super__init__r   
isinstancedictr   r   builder)selfr   r   r   r   r   r   r   r   kwargs	__class__s             d/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/datasets/io/json.pyr    zJsonDatasetReader.__init__   s     			
)		
 		
 		
 		
 		
 
)3M4)H)Hitz[hNi 
$	
 

 
 
    c                     | j         r!| j                            | j                  }nSd }d }d }d }| j                            ||||| j                   | j                            | j        || j                  }|S )N)r   )download_configdownload_modeverification_mode	base_pathr   )r   r,   	in_memory)r   r#   as_streaming_datasetr   download_and_preparer   
as_datasetr   )r$   datasetr*   r+   r,   r-   s         r'   readzJsonDatasetReader.read0   s    > 	l77dj7IIGG #O M $IL-- /+"3# .    l--j4EQUQd .  G r(   )NNNFFNN)__name__
__module____qualname__r   r   r   r	   r   strboolintr    r3   __classcell__)r&   s   @r'   r   r      s         '+'+$#"&
 
.x8
 
#
 8$	

 
 
 
 }
 3-
 
 
 
 
 
@      r(   r   c                       e Zd Z	 	 	 ddedeeef         dee         dee         dee	         f
dZ
defd	Zd
 ZdedefdZdS )JsonDatasetWriterNr2   path_or_buf
batch_sizer   storage_optionsc                     ||dk    rt          d| d          || _        || _        |r|nt          j        | _        || _        d| _        |pi | _        || _	        d S )Nr   z	num_proc z must be an integer > 0.zutf-8)

ValueErrorr2   r=   r
   DEFAULT_MAX_BATCH_SIZEr>   r   encodingr?   to_json_kwargs)r$   r2   r=   r>   r   r?   rD   s          r'   r    zJsonDatasetWriter.__init__I   sw     HMMKKKKLLL&(2U**8U .4",r(   returnc                 N   | j                             dd           }| j                             dd          }| j                             d|dk    rdnd          }d| j         vr|dv r
d| j         d<   t          | j        t          t
          t          j        f          rd	nd }| j                             d
|          }|dvrt          d| d          |s$| j	        | j
        j        k     rt          d          t          | j        t          t
          t          j        f          rPt          j        | j        dfd
|i| j        pi 5 } | j        d|||d| j         }d d d            n# 1 swxY w Y   n0|rt          d| d           | j        d| j        ||d| j         }|S )Nr=   orientrecordslinesTFindex)r   tableinfercompression)NrL   gzipbz2xzz&`datasets` currently does not support z compressionzOutput JSON will not be formatted correctly when lines = False and batch_size < number of rows in the dataset. Use pandas.DataFrame.to_json() instead.wb)file_objrG   rI   zUThe compression parameter is not supported when writing to a buffer, but compression=z1 was passed. Please provide a local path instead.r   )rD   popr!   r=   r7   bytesosr   NotImplementedErrorr>   r2   num_rowsfsspecopenr?   _write)r$   _rG   rI   default_compressionrM   bufferwrittens           r'   writezJsonDatasetWriter.write]   s`   ##M488$((9==#''99L9LRWXX$---&<N2N2N+0D( *4D4DsESUS^F_)`)`jggfj)--m=PQQBBB%&h{&h&h&hiii 	4<+@@@% i   d&eR[(ABB 	p $ 4?DHDXD^\^  j%$+ivfEiiUYUhiij j j j j j j j j j j j j j j
  )Hlw H H H   "dko4+;FRWoo[_[nooGs   E))E-0E-c                 6   |\  }}}}t          | j        j        t          ||| j        z             | j        j                  } |                                j        dd ||d|}|                    d          s|dz  }|	                    | j
                  S )N)rK   keyindices)r=   rG   rI   
r   )r   r2   dataslicer>   _indices	to_pandasto_jsonendswithencoderC   )r$   argsoffsetrG   rI   rD   batchjson_strs           r'   _batch_jsonzJsonDatasetWriter._batch_json~   s    04-~,#fft677L)
 
 

 -5??$$,lfTYll]kll  && 	Ht}---r(   rR   c                    d}| j         | j         dk    rmt          t          dt          | j                  | j                  dd          D ]3}|                     |f          }||                    |          z  }4nt          | j                  | j        }	}t          j	        | j                   5 }
t          |

                    | j        fdt          d||	          D                       ||	z  r||	z  dz   n||	z  dd          D ]}||                    |          z  }	 ddd           n# 1 swxY w Y   |S )	zWrites the pyarrow table as JSON lines to a binary file handle.

        Caller is responsible for opening and closing the handle.
        r   Nr   bazCreating json from Arrow format)unitdescc                     g | ]}|f	S r   r   ).0rl   rI   rG   rD   s     r'   
<listcomp>z,JsonDatasetWriter._write.<locals>.<listcomp>   s"    nnnV&&%@nnnr(   )totalrr   rs   )r   hf_tqdmrangelenr2   r>   ro   r_   multiprocessingPoolimap)r$   rR   rG   rI   rD   r^   rl   rn   rW   r>   pools     ```      r'   rZ   zJsonDatasetWriter._write   s    = DMQ$6$6!aT\**DO<<6   4 4
  ++VVUN,STT8>>(3334 $'t|#4#4dojH %dm44 
8 'II(nnnnnnuUVX`blOmOmnnn  ;CZ:Ok8z1Q66U]akUk:! ! ! 	8 	8H x~~h777GG	8
8 
8 
8 
8 
8 
8 
8 
8 
8 
8 
8 
8 
8 
8 
8 s   9A5D;;D?D?)NNN)r4   r5   r6   r   r   r   r   r   r9   r"   r    r_   ro   rZ   r   r(   r'   r<   r<   H   s        
 %)"&*.- -- 8X-.- SM	-
 3-- "$- - - -(s    B. . .## 
# # # # # #r(   r<   )r{   rU   typingr   r   r   rX    r   r   r	   r
   
formattingr   packaged_modules.json.jsonr   utilsr   rx   utils.typingr   r   abcr   r   r<   r   r(   r'   <module>r      s-       				 , , , , , , , , , ,  4 4 4 4 4 4 4 4 4 4 4 4 $ $ $ $ $ $ - - - - - - # # # # # # < < < < < < < < & & & & & &6 6 6 6 6- 6 6 6rf f f f f f f f f fr(   