
    Pi06                        d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZ d dlZ		 d dl
mZ d dlmZ n# e$ r dZY nw xY w	 d dlZd dlmZ d dlmZ n# e$ r dxZZY nw xY wej        j        Zej        j        d             Zej        j        d             Zej        j        d             Zej        j        d             Zej        j        d	             Zej        j        d
             Zd Zd Zej        j        ej                             dg d          d                         Z!ej        j        ej                             ddg          d                         Z"ej        j        ej                             dddg          d                         Z#d Z$ej        j%        ej                             dd          d                         Z&d Z'd Z(d Z)d Z*d Z+d  Z,d! Z-dS )"    N)mock)_write_table)alltypes_samplec                     t          d          } t          j                            |           }t	          j                    }t          ||dd           |                    d           t          j	        |          }|                    d           t          j
        ||          }t          j        | |                                                                           d S )N'  sizesnappy2.6)compressionversionr   )metadata)r   paTablefrom_pandasioBytesIOr   seekpqread_metadataParquetFiletmassert_frame_equalread	to_pandas)dfa_tablebufr   filehs        {/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/pyarrow/tests/parquet/test_parquet_file.pytest_pass_separate_metadatar!   1   s     
e	$	$	$Bh""2&&G
*,,C#8UCCCCHHQKKK$$HHHQKKKN3222E"ejjll446677777    c                     d\  } }t          |           }t          j                            |          }t	          j                    }t          ||| |z  dd           |                    d           t          j	        |          j
        |k    sJ fdt          |          D             }t          j        |          }t          j        ||                                           d S )Nr      r   r
   r   row_group_sizer   r   r   c                 :    g | ]}                     |          S  read_row_group).0ipfs     r    
<listcomp>z.test_read_single_row_group.<locals>.<listcomp>W   s'    9991"##A&&999r"   )r   r   r   r   r   r   r   r   r   r   num_row_groupsrangeconcat_tablesr   r   r   )NKr   r   r   
row_groupsresultr.   s          @r    test_read_single_row_groupr7   E   s     DAq	a	 	 	 Bh""2&&G
*,,C#a!e%u6 6 6 6 HHQKKK			B!!!!9999a999Jj))F"f..0011111r"   c                     d\  } }t          |           }t          j                            |          }t	          j                    }t          ||| |z  dd           |                    d           t          j	        |          t          |j        d d                   fdt          |          D             }t          j        |          }t          j        |         |                                           fd	t          |          D             }t          j        |          }t          j        |         |                                           d S )
Nr$   r   r
   r   r&   r      c                 >    g | ]}                     |           S columnsr*   r,   r-   colsr.   s     r    r/   zAtest_read_single_row_group_with_column_subset.<locals>.<listcomp>j   s,    GGG"##At#44GGGr"   c                 D    g | ]}                     |z              S r;   r*   r>   s     r    r/   zAtest_read_single_row_group_with_column_subset.<locals>.<listcomp>p   s0    NNN"##Atd{#;;NNNr"   )r   r   r   r   r   r   r   r   r   r   listr=   r1   r2   r   r   r   )	r3   r4   r   r   r   r5   r6   r?   r.   s	          @@r    -test_read_single_row_group_with_column_subsetrB   \   sM   DAq	a	 	 	 Bh""2&&G
*,,C#a!e%u6 6 6 6 HHQKKK			B
2A2DGGGGGeAhhGGGJj))F"T(F$4$4$6$6777 ONNNNU1XXNNNJj))F"T(F$4$4$6$677777r"   c                     d\  } }t          |           }t          j                            |          }t	          j                    }t          ||| |z  dd           |                    d           t          j	        |          }|j
        |k    sJ |                    t          |                    }t          j        ||                                           d S )Nr$   r   r
   r   r&   r   )r   r   r   r   r   r   r   r   r   r   r0   read_row_groupsr1   r   r   r   )r3   r4   r   r   r   r.   r6   s          r    test_read_multiple_row_groupsrE   u   s    DAq	a	 	 	 Bh""2&&G
*,,C#a!e%u6 6 6 6 HHQKKK			B!!!!a))F"f..0011111r"   c                     d\  } }t          |           }t          j                            |          }t	          j                    }t          ||| |z  dd           |                    d           t          j	        |          }t          |j        d d                   }|                    t          |          |          }t          j        ||         |                                           |                    t          |          ||z             }t          j        ||         |                                           d S )	Nr$   r   r
   r   r&   r   r9   r<   )r   r   r   r   r   r   r   r   r   r   rA   r=   rD   r1   r   r   r   )r3   r4   r   r   r   r.   r?   r6   s           r    0test_read_multiple_row_groups_with_column_subsetrG      s&   DAq	a	 	 	 Bh""2&&G
*,,C#a!e%u6 6 6 6 HHQKKK			B
2A2Da$77F"T(F$4$4$6$6777 a$+>>F"T(F$4$4$6$677777r"   c                     d\  } }t          |           }t          j                            |          }t	          j                    }t          ||| |z  dd           |                    d           t          j	        |          }|
                                dk    sJ |
                    |j        d d                   dk    sJ d S )	Nr$   r   r
   r   r&   r   r   r%   )r   r   r   r   r   r   r   r   r   r   scan_contentsr=   )r3   r4   r   r   r   r.   s         r    test_scan_contentsrJ      s    DAq	a	 	 	 Bh""2&&G
*,,C#a!e%u6 6 6 6 HHQKKK			B&&&&BJrrN++u444444r"   c                    | dz  }t          j        t          |                     dt          |           d}t          j        t
                    5 }t          j        |           d d d            n# 1 swxY w Y   |                    t                    rt          j        dk    rd S |                    |           d S )N	directoryzCannot open for reading: path 'z' is a directorywin32)osmkdirstrpytestraisesIOErrorr   r   errisinstancePermissionErrorsysplatformmatch)tempdirpathmsgexcs       r    0test_parquet_file_pass_directory_instead_of_filer]      s    [ DHSYY
GCII
G
G
GC	w		 3
t              
)) clg.E.EIIcNNNNNs   A44A8;A8c                     t          j        t          j        ddg          t          j        ddg          gddg          } t          j                    }t	          j        | |           t	          j        |                                          }|j        	                    d          
                                ddgk    sJ |j        	                    d	          
                                ddgk    sJ d
D ]T}t          j        t          t          f          5  |j        	                    |           d d d            n# 1 swxY w Y   Ud S )Nr%      foobarintsstrs)namesr      )r9   )r   tablearrayBufferOutputStreamr   write_tabler   getvaluereaderread_column	to_pylistrQ   rR   
ValueError
IndexError)rg   biofindexs       r    test_read_column_invalid_indexrt      sv   Hbh1v&&%(@(@A"F+- - -E


!
!CN5#
s||~~&&A8"",,..1a&88888"",,..5%.@@@@ ( (]J
344 	( 	(H  '''	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	(( (s   EE	E	
batch_size)i,    i  c           	      d   d}d}t          |          }| dz  }t          j                            |          }t	          ||d|           t          j        |          }|j        d d         |j        dd          fD ]}|                    ||          }	t          d	||z   |          }
t          |	|
          D ]s\  }}t          |||z             }t          j        |                                |j        ||d d f         j        d d |f                             d
                     td S )Ni  rv   r   pandas_roundtrip.parquetr   r   
chunk_size
   )ru   r=   r   Tdrop)r   r   r   r   r   r   r   r=   iter_batchesr1   zipminr   r   r   iloclocreset_index)rY   ru   
total_sizerz   r   filenamearrow_tablefile_r=   batchesbatch_startsbatchstartends                 r     test_iter_batches_columns_readerr      sc    JJ	j	)	)	)B33H(&&r**Kh&( ( ( ( N8$$EJssORZ_5  $$
G$LLQ
: 5zBB66 	 	LE5j%*"455C!!!c	111%)!!!W*5AAtALL   	 r"   rz   rv   c                 2   t          dd          }| dz  }t          j                            |          }|j        j        J t          ||d|           t          j        |          }d }t           ||                    }d}t          |j                  D ]}	t          j        ||                                         |                    |	g                                                              d	                     |d
z  }t          j        ||                                                             d          |                    |	g                                          j        d	d                              d                     |d
z  }d S )Nr   T)r	   categoricalrx   r   ry   c              3   z   K   t          | j                  D ]#}|                     d|g          }|D ]}|V  $d S )N  )ru   r5   )r1   r0   r~   )rr   	row_groupr   r   s       r    get_all_batchesz1test_iter_batches_reader.<locals>.get_all_batches   sk      q/00 	 	Inn%; %  G
 !  	 	r"   r   r   re   r|   )r   r   r   r   schemapandas_metadatar   r   r   rA   r1   r0   r   r   r   rD   headr   r   )
rY   rz   r   r   r   r   r   r   batch_nor-   s
             r    test_iter_batches_readerr      s    
e	6	6	6B33H(&&r**K-999h&( ( ( ( N8$$E   ??5))**GH5'((  
H''))!!1#&&002277<<	
 	
 	

 	A
H''))5545@@!!1#&&00227=II J  	
 	
 	
 	A r"   
pre_bufferFTc                 V   d\  }}t          |          }t          j                            |          }t	          j                    }t          ||||z  dd           |                    d           t          j	        ||           }|
                                j        |k    sJ d S )Nr$   r   r
   r   r&   r   )r   )r   r   r   r   r   r   r   r   r   r   r   num_rows)r   r3   r4   r   r   r   r.   s          r    test_pre_bufferr     s     DAq	a	 	 	 Bh""2&&G
*,,C#a!e%u6 6 6 6 HHQKKK	
	3	3	3B7799""""""r"   c                 v   |                      d          }t          j        ddgddgd          }t          j        ||           t          |d          5 }t          j        |          5 }|                                 |j        rJ |j        rJ 	 ddd           n# 1 swxY w Y   |j        rJ |j        rJ 	 ddd           n# 1 swxY w Y   |j        sJ |j        sJ t          j        |          5 }|                                 |j        rJ 	 ddd           n# 1 swxY w Y   |j        sJ dS )z
    Unopened files should be closed explicitly after use,
    and previously opened files should be left open.
    Applies to read_table, ParquetDataset, and ParquetFile
    zfile.parquetr   re   )col1col2rbN)	joinpathr   rg   r   rj   openr   r   closed)rY   fnrg   rr   ps        r    #test_parquet_file_explicitly_closedr   "  s    
		.	)	)BHq!fq!f5566EN5" 
b$ 1^A 	 !FFHHHx<x<	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  8|8|               8OO88OO8 
		 q	8|               8OO8OOsH   C	+'BC	B#	#C	&B#	'C		CC:D%%D),D)use_uri)TFc                 
   | \  }}}|r|n|f}|ri nt          |          }t          j        dt          d          i          }t	          j        |||           t	          j        |i |}|                                |k    sJ |j        rJ |	                                 |j        sJ t	          j        |i |5 }	|	                                |k    sJ |	j        rJ 	 d d d            n# 1 swxY w Y   |	j        sJ d S )N
filesystemar{   )
dictr   rg   r1   r   rj   r   r   r   close)
s3_example_fsr   s3_fss3_uris3_pathargskwargsrg   parquet_filerr   s
             r    !test_parquet_file_with_filesystemr   ?  sm    +E67*FF7,D6RR 6 6 6FHc599%&&EN5'e4444>42622L%''''""""		(	(	( Avvxx5    8|               8OO8OOs   >$C//C36C3c                  &   t          j        dt          j        g d          i          } t          j                    }t          | |           |                    d           t          j        |          	                                j
        d         j        d         j        }|j        du sJ |j        dk    sJ |j        J |j        du sJ |j        dk    sJ |j        sJ |j        dk    sJ |j        sJ t+          |          d	k    sJ d S )
Nvalue)rf   N   r   Tre   Frf   r   zmarrow.ArrayStatistics<null_count=1, distinct_count=None, min=-1, is_min_exact=True, max=3, is_max_exact=True>)r   rg   rh   r   r   r   r   r   r   r   r=   chunks
statisticsis_null_count_exact
null_countdistinct_countis_distinct_count_exactr   is_min_exactmaxis_max_exactrepr)rg   r   r   s      r    test_read_statisticsr   V  s;   Hgrx66788E
*,,CHHQKKK$$))++3A6=a@KJ)T1111 A%%%%$,,, -6666>R"""">Q""""
 !< = = = = = =r"   c                     |  d}t          j        |                                          }|j        ddgk    sJ |d                                         g dk    sJ d S )Nz/unknown-logical-type.parquetzcolumn with known typezcolumn with unknown type)s   unknown string 1s   unknown string 2s   unknown string 3)r   r   r   column_namesrn   )parquet_test_datadir	test_filerg   s      r     test_read_undefined_logical_typer   m  s    'FFFIN9%%**,,E":<V!WWWWW+,6688 = = =      r"   c                     t          j        d           t          j        dt	          d          i          } t          j        | d           t          j        d          }|                     |          sJ d}t          j	        t          j
        |          5  t          j        d           d d d            d S # 1 swxY w Y   d S )Nfsspecr   r{   fsspec+memory://example.parquetz#Unrecognized filesystem type in URIrX   znon-existing://example.parquet)rQ   importorskipr   rg   r1   r   rj   
read_tableequalsrR   ArrowInvalid)rg   table2r[   s      r     test_parquet_file_fsspec_supportr   y  s    
!!!Hc599%&&EN5;<<<]<==F<<
/C	rc	2	2	2 8 8
67778 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8s   B<<C C c                     	 ddl m}  n$# t          $ r t          j        d           Y nw xY wt          j        dt          d          i          } |             }|                    dd           |	                    d          sJ d	}t          j        |d
|           t          j        d          }|                    |          sJ d S )Nr   MemoryFileSystemz&fsspec is not installed, skipping testbr{   z/path/to/prefixT)create_parentszfsspec+memory://path/to/prefixz	b.parquetr   z(fsspec+memory://path/to/prefix/b.parquet)fsspec.implementations.memoryr   ImportErrorrQ   skipr   rg   r1   rO   existsr   rj   r   r   )r   rg   fsfs_strr   s        r    <test_parquet_file_fsspec_support_through_filesystem_argumentr     s    >BBBBBBB > > ><=====> Hc599%&&E				BHHtH44499&'''''-FN5+&9999]EFFF<<s   	 **c                     	 ddl m}  n$# t          $ r t          j        d           Y nw xY wt          j        d          }| |_        t          j	        
                    dd|i          5  d}t          j        dt          d          i          }t          j        ||           t          j        |          }|                    |          sJ 	 d d d            d S # 1 swxY w Y   d S )	Nr   r   z3fsspec is not installed, skipping Hugging Face testhuggingface_hubzsys.modulesz'hf://datasets/apache/arrow/test.parquetr   r{   )r   r   r   rQ   r   types
ModuleTypeHfFileSystemr   patchr   r   rg   r1   r   rj   r   r   )r   fake_hf_moduleurirg   r   s        r    $test_parquet_file_hugginface_supportr     sI   KBBBBBBB K K KIJJJJJK %&788N"2N	):N(K	L	L $ $7#uRyy)**
uc"""s##||F######$ $ $ $ $ $ $ $ $ $ $ $ $ $ $ $ $ $s   	 ***A&CC"%C"c                     	 dd l } t          j        d           n# t          $ r Y nw xY wt	          j        d          }t          j        t          |          5  t          j        d           d d d            d S # 1 swxY w Y   d S )Nr   z"fsspec is available, skipping testzI`fsspec` is required to handle `fsspec+<filesystem>://` and `hf://` URIs.r   r   )	r   rQ   r   r   reescaperR   r   r   )r   r[   s     r    1test_fsspec_uri_raises_if_fsspec_is_not_availabler     s    : 	89999    
 )SU UC	{#	.	.	. 9 9
78889 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9s    
((A<<B B c                 \   t          j        g           }t           j                            g |          }| dz  }t	          j        ||           t	          j        |          }t          j        t                    5  |
                    d           d d d            d S # 1 swxY w Y   d S )N)r   zempty_file.parquetr   )ru   )r   r   r   from_batchesr   rj   r   rQ   rR   ro   r~   )rY   r   empty_tableparquet_file_pathr   s        r    (test_iter_batches_raises_batch_size_zeror     s    Yr]]F(''6'::K"66N; 1222>"344L	z	"	" 0 0!!Q!///0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0s   =B!!B%(B%).r   rN   r   rV   r   rQ   unittestr   pyarrowr   pyarrow.parquetparquetr   pyarrow.tests.parquet.commonr   r   pandaspdpandas.testingtestingr   r   mark
pytestmarkr!   r7   rB   rE   rG   rJ   r]   rt   parametrizer   r   r   r   s3r   r   r   r   r   r   r   r   r)   r"   r    <module>r      s  $ 
			 				 				 



                  9999999   	BBB<<<<<<<   NB [ 
 8 8 8& 2 2 2, 8 8 80 2 2 2( 8 8 8, 5 5 5 
 
 

( 
( 
( '8'8'899  :9 . v..( ( /. (V t}55# # 65 #  : M22  32 *= = =.	 	 	
8 
8 
8     $$ $ $ 9 9 90 0 0 0 0s   1 ;;A 	AA