
    Pi'                     "   d dl Z d dlmZ d dlmZ d dlZd dlZd dl	m
Z d dlZd dlZd dlmZ d dlmZ d dlmZ ej        j                            e          Zd Zd Zd	 Ze G d
 dej                              Z G d dej                  ZdS )    N)	dataclass)Optional)Key)
table_cast)readlinec                      	 t          j        j        j        | i |S # t          $ r t          j        j        j        | i |cY S w xY wN)pdiojsonujson_dumpsAttributeErrordumpsargskwargss     w/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/datasets/packaged_modules/json/json.pyr   r      \    1uz%t6v666 1 1 1uz0000001    %AAc                      	 t          j        j        j        | i |S # t          $ r t          j        j        j        | i |cY S w xY wr	   )r
   r   r   ujson_loadsr   loadsr   s     r   r   r      r   r   c                 d    t           j        j        j        dk    rd|d<   t	          j        | fi |S )N   pyarrowdtype_backend)datasetsconfigPANDAS_VERSIONmajorr
   	read_json)path_or_bufr   s     r   pandas_read_jsonr#   #   s7    %+q00"+<..v...    c                        e Zd ZU dZdZeej                 ed<   dZ	e
ed<   dZee
         ed<   dZee
         ed<   dZeed	<   dZee         ed
<   dZeed<   dZee         ed<    fdZ xZS )
JsonConfigzBuilderConfig for JSON.Nfeaturesutf-8encodingencoding_errorsfieldTuse_threads
block_sizei   	chunksizenewlines_in_valuesc                 H    t                                                       d S r	   )super__post_init__)self	__class__s    r   r2   zJsonConfig.__post_init__6   s    r$   )__name__
__module____qualname____doc__r'   r   r   Features__annotations__r)   strr*   r+   r,   boolr-   intr.   r/   r2   __classcell__)r4   s   @r   r&   r&   )   s         !!,0Hhx()000Hc%)OXc])))E8C=K $J$$$Is)----                 r$   r&   c                   N    e Zd ZeZd Zd Zdej        dej        fdZ	d Z
d ZdS )	Jsonc                 >   | j         j        0t                              d           | j         j        | j         _        | j         j        durt                              d           | j         j        t          d          t          j	        | j         j
                  S )NzTThe JSON loader parameter `block_size` is deprecated. Please use `chunksize` insteadTzZThe JSON loader parameter `use_threads` is deprecated and doesn't have any effect anymore.zEThe JSON loader parameter `newlines_in_values` is no longer supported)r'   )r   r-   loggerwarningr.   r,   r/   
ValueErrorr   DatasetInfor'   )r3   s    r   _infoz
Json._info=   s    ;!-NNqrrr$(K$:DK!;"$..NNl   ;)5deee#T[-ABBBBr$   c           	         | j         j        st          d| j         j                   dj        _                            | j         j                  }                    |          }g }|                                D ]E\  }}fd|D             }|                    t          j
        ||||         d                     F|S )z-We handle string, list and dicts in datafilesz=At least one data file must be specified, but got data_files=Tc                 :    g | ]}                     |          S  )
iter_files).0extracted_file
dl_managers     r   
<listcomp>z*Json._split_generators.<locals>.<listcomp>R   s'    kkkz44^DDkkkr$   )files_iterables
base_files)name
gen_kwargs)r   
data_filesrD   download_configextract_on_the_flydownloadextractitemsappendr   SplitGenerator)r3   rM   base_data_filesextracted_data_filessplits
split_nameextracted_filesrO   s    `      r   _split_generatorszJson._split_generatorsI   s    {% 	wu]a]h]suuvvv8<
"5$--dk.DEE)11/BB+?+E+E+G+G 	 	'Jkkkk[jkkkOMM'#3BRablRmnn      r$   pa_tablereturnc           
         | j         j        t          | j         j                  t          |j                  z
  D ]f}| j         j        j                            |          j        }|                    |t          j	        d gt          |          z  |                    }gt          |j                  D ]\  }}t          j                            ||         j                  r| j         j                            |d           t          j        d          k    r||                             t$          j                                      dd          }t          j	        d d|z                       d	          D             t          j                              }|                    |||          }t1          || j         j        j                  }|S )
N)typestring)types_mapperrecordsT)orientlinesc              3   H   K   | ]}|d |                                 z   V  dS ){N)rstrip)rK   xs     r   	<genexpr>z#Json._cast_table.<locals>.<genexpr>l   s5      TTaRSTqxxzz)TTTTTTr$   
z
{)r   r'   setcolumn_namesarrow_schemar+   rd   append_columnpaarraylen	enumeratetypes	is_structgetr   Value	to_pandasr
   
ArrowDtypeto_jsonsplitre   
set_columnr   )r3   ra   column_namerd   ijsonlstring_arrays          r   _cast_tablezJson._cast_table[   s   ;+"4;#7883x?T;U;UU l l{+8>>{KKP#11+rxQTU]Q^Q^H^ei?j?j?jkk"+H,A"B"B Q Q;8%%h{&;&@AA QdkFZF^F^G G^H--G. G. !-">> 	>> 
 $&8TTD5L3G3G3N3NTTT[][d[f[f$ $ $L  (221k<PPH "(DK,@,MNNHr$   c              #      K   |E d {V  d S r	   rI   )r3   rP   rO   s      r   _generate_shardszJson._generate_shardst   s$      r$   c              #   @
  K   t          |          D ]
\  }}|D ] }| j        j        4t          || j        j        | j        j                  5 }t          |                                          }d d d            n# 1 swxY w Y   || j        j                 }t          t          j
        t          |                              }|j                                        dgk    r-| j        j        rt          | j        j                  ndg|_        t           j                            |d          }	t'          |d          |                     |	          fV  Dt          |d          5 }d}
t+          | j        j        dz  d          }| j        j        | j        j        nd	}	 |                    | j        j                  }|sn7	 ||                                z  }n.# t0          t          j        f$ r |t/          |          z  }Y nw xY w| j        j        dk    r4|                    | j        j        |                              d          }	 	 	 t9          j        t          j        |          t9          j        |                    }	n# t           j         t           j!        f$ r}tE          |t           j                   rdtG          |          vs|tI          |          k    r tJ          &                    dtI          |           d| d|dz   d           |dz  }Y d }~nd }~ww xY wސn# t           j         $ r}	 t          || j        j        | j        j                  5 }t          |          }d d d            n# 1 swxY w Y   n@# tN          $ r3 tJ          (                    d| dtS          |           d|            |w xY w|j                                        dgk    r-| j        j        rt          | j        j                  ndg|_        	 t           j                            |d          }	n[# t           j         $ rI}tJ          (                    d| dtS          |           d|            tO          d| d          d d }~ww xY wt'          |d          |                     |	          fV  Y d }~n6d }~ww xY wt'          ||
          |                     |	          fV  |
dz  }
Zd d d            n# 1 swxY w Y   d S )N)r)   errorsr   textF)preserve_indexrb    i @  strictTr(   )r   )r-   )read_options
straddlingz	Batch of z* bytes couldn't be parsed with block_size=z. Retrying with block_size=r   .zFailed to load JSON from file 'z' with error z: z=Failed to convert pandas DataFrame to Arrow Table from file 'z<Failed to convert pandas DataFrame to Arrow Table from file    )*rw   r   r+   openr)   r*   r   readr#   r   StringIOr   columnstolistr'   listrt   Tablefrom_pandasr   r   maxr.   r   r   UnsupportedOperationdecodeencodepajr!   BytesIOReadOptionsArrowInvalidArrowNotImplementedError
isinstancer;   rv   rB   debugrD   errorrd   )r3   rP   rO   	shard_idxfiles_iterablefilefdatasetdfra   	batch_idxr-   r*   batches                  r   _generate_tableszJson._generate_tablesw   s     )2?)C)C P	+ P	+%I~& O+ O+;$0dT[-A$+Jefff 8jk"-affhh"7"78 8 8 8 8 8 8 8 8 8 8 8 8 8 8 &dk&78G)"+k'6J6J*K*KLLBz((**qc11CG;CW%eT$+*>%?%?%?^d]e
!x33Bu3MMHi++T-=-=h-G-GGGGGG dD)) @+Q$%	 &))>")Dh%O%O
;?;;V;bDK77hp (8+$%FF4;+@$A$AE#( & %5 % 5$2B4K#L 5 5 5 %! 45  ${3w>>(-T[5IRa(b(b(i(ijq(r(r*&!<%<36=,.Ju,=,=COgqLrLrLr4* 4* 4* ).,.OR=X+Y %< %< %<,6q"/,J,J)<0<CFF0J0J/9CJJ/F/F,1 -3LL 1kCJJ  1k  1kr|  1k  1k  Zd  gh  Zh  1k  1k  1k-. -. -. -7!OJJJJJJ%<!<
 ). $&? & & &!,)-(,t{7KTXT_To*& *& *& %A)*-=a-@-@%A %A %A %A %A %A %A %A %A %A %A %A %A %A %A (2 !, !, !,$*LL1tSW1t1tfjklfmfm1t1tqr1t1t$u$u$u*+G!, $&:#4#4#6#61##=#=OS{Oc1qdk6J1K1K1KjpiqBJ!0/1x/C/CBW\/C/]/]HH') !0 !0 !0$*LL )Jhl  )J  )J{  AB  |C  |C  )J  )J  GH  )J  )J%& %& %& +5(ngk(n(n(n+& +&+/%0	!0 '*)Q&7&79I9I(9S9S&S S S S %+&, #&i";";T=M=Mh=W=W"WWWW%NIq8+@+ @+ @+ @+ @+ @+ @+ @+ @+ @+ @+ @+ @+ @+ @+O+P	+ P	+s   "A>>BB.ATG%$T%(H	TH	ATL9;JL9L3	1A8L.	)L9.L3	3L97T9S	
&N0N NNNNNS	=OAS	$!QS	RARR*S	TS	1TTTN)r5   r6   r7   r&   BUILDER_CONFIG_CLASSrF   r`   rt   r   r   r   r   rI   r$   r   r@   r@   :   s        %
C 
C 
C  $BH     2  Q+ Q+ Q+ Q+ Q+r$   r@   ) r   dataclassesr   typingr   pandasr
   r   rt   pyarrow.jsonr   r   r   datasets.configdatasets.builderr   datasets.tabler   datasets.utils.file_utilsr   utilslogging
get_loggerr5   rB   r   r   r#   BuilderConfigr&   ArrowBasedBuilderr@   rI   r$   r   <module>r      sh   				 ! ! ! ! ! !                                      % % % % % % . . . . . . 
		*	*8	4	41 1 11 1 1/ / /          '       N+ N+ N+ N+ N+8% N+ N+ N+ N+ N+r$   