
    Pi	                        d dl Z d dlZd dlmZ d dlZd dlZd dlmZ d dlm	Z	 d dl
Z
d dlZd dlmZ d dlmZ d dlmZmZmZ 	 d dlmZ d dlmZmZ n# e$ r dZY nw xY w	 d dlZd dlmZ d d	lm Z  d d
lm!Z! n# e$ r dxZZY nw xY w	 d dl"Z#n# e$ r dZ#Y nw xY we
j$        j        Z%d Z&d Z'e
j$        j"        d             Z(e
j$        j"        d             Z)e
j$        j        d             Z*e
j$        j        d             Z+e
j$        j        d             Z,d Z-d Z.d Z/e
j$        j0        d             Z1e
j$        j        d             Z2e
j$        j        d             Z3d Z4e
j$        j        d             Z5d Z6e
j$        7                    dd ej8                    g          e
j$        7                    dd          d                         Z9d Z:d  Z;d! Z<d" Z=d# Z>d$ Z?d% Z@d& ZAe
j$        j        d'             ZBe
j$        j        d(             ZCe
j$        j        d)             ZDd* ZEd+ ZFe
j$        j        d,             ZGd- ZHe
j$        j        e
j$        jI        e
j$        J                    d.          e
j$        J                    d/          d0                                                 ZKe
j$        7                    d1d2 d3 d4 d5 g          e
j$        7                    d6d7d8g          d9                         ZLd: ZMd; ZNd< ZOd= ZPd> ZQd? ZRe
j$        jS        d@             ZTe
j$        7                    dAdBdCdDgg          dE             ZUdS )F    N)OrderedDict)copytree)Decimal)fs)util)_check_roundtrip_roundtrip_table_test_table)_read_table_write_table)dataframe_with_lists)alltypes_samplec                 X   t          j        dg di          }t          j        t          d          5  t          || dz  d           d d d            n# 1 swxY w Y   t          j        t          d          5  t          || dz  d	           d d d            d S # 1 swxY w Y   d S )
Na         z"Unsupported Parquet format versionmatchztest_version.parquetz2.2versionz%Unsupported Parquet data page version)data_page_version)patablepytestraises
ValueErrorr   )tempdirr   s     t/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/pyarrow/tests/parquet/test_basic.pytest_parquet_invalid_versionr!   <   sW   Hc999%&&E	z)M	N	N	N M MUG&<<eLLLLM M M M M M M M M M M M M M M	z *! 
" 
" 
" . .UG&<<',	. 	. 	. 	.. . . . . . . . . . . . . . . . . .s#   AAA<BB#&B#c                      t          j        g ddz            } t           j                            | gdg          }ddg}|D ]}t	          ||           d S )Nr   i f0namesi   i   )data_page_size)r   arrayTablefrom_arraysr   )arrt
page_sizestarget_page_sizes       r    test_set_data_page_sizer.   F   su    
(999v%
&
&C
cU4&11A 7#J& = =+;<<<<<= =    c                  J    t          d          } t          | ddd           d S )Nd   
   r   2.4)r&   write_batch_sizer   )r
   r   r   s    r    test_set_write_batch_sizer6   P   s9    Eb1e     r/   c                      t          d          } t          | ddd           t          j        t                    5  t          | ddd           d d d            d S # 1 swxY w Y   d S )Nr1   r   r2   r3   )dictionary_pagesize_limitr&   r   r   )r
   r   r   r   	TypeErrorr5   s    r    "test_set_dictionary_pagesize_limitr:   Y   s    EUa$&7 7 7 7 
y	!	! ; ;#(*E	; 	; 	; 	;; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ;s   AA!$A!c            	         g } t           j                            t          d                    }|                     t           j                            |gdz                       t                      \  }}t           j                            |          }|                     t           j                            |gdz                       dD ]}dD ]}| D ]}t          |d||            d S )Nr2   sizer   )z1.0z2.0)TF2.6)r   r   use_dictionary)	r   RecordBatchfrom_pandasr   appendr(   from_batchesr   r   )tablesbatchdf_r   r?   r   s          r    test_chunked_table_writerH   e   s    FN&&B'?'?'?@@E
MM"(''!44555 ""EBN&&r**E
MM"(''!44555+ 3 3+ 	3 	3N 3 3 5&7#13 3 3 3 33	33 3r/   c                 ~   t          d          }t          j                            |          }t	          |ddid           t          | dz            }t          |d          5 }t          ||d	           d d d            n# 1 swxY w Y   t          j	        |d
          }|
                    |          sJ d S )Nr2   r<   
memory_mapTr>   read_table_kwargsr   tmp_filewbr   )rJ   r   r   r(   rA   r   stropenr   pqread_pandasequalsr   rF   r   filenamef
table_reads         r    test_memory_maprY   x   s	   	b	!	!	!BH  $$EU|T.B"$ $ $ $ 7Z'((H	h		 .UAu----. . . . . . . . . . . . . . .T:::JU#######   &BB	B	c                 ~   t          d          }t          j                            |          }t	          |ddid           t          | dz            }t          |d          5 }t          ||d	           d d d            n# 1 swxY w Y   t          j	        |d
          }|
                    |          sJ d S )Nr2   r<   buffer_sizei  r>   rK   rM   rN   r   i   )r\   rO   rU   s         r    test_enable_buffered_streamr]      s	   	b	!	!	!BH  $$EU}d.C"$ $ $ $ 7Z'((H	h		 .UAu----. . . . . . . . . . . . . . .d;;;JU#######rZ   c                 v   t           j                            t          j        dg          gdg          }d}| |z  }|                                rJ t          |t          |                     |                                sJ t          t          |                    }|                    |          sJ d S )N*   intsz	foo # bar)	r   r(   r)   r'   existsr   rP   r   rT   )r   r   rV   pathrX   s        r    test_special_chars_filenamerc      s    H  "(B4..!1F8<<EHXD{{}}D		""";;===SYY''JU#######r/   c                  $   t          j        t          d          5  t          j        d            d d d            n# 1 swxY w Y   t          j        t          d          5  t          j        d            d d d            d S # 1 swxY w Y   d S )NNoner   )r   r   r9   rR   
read_tableParquetFile r/   r    test_invalid_sourceri      s	    
y	/	/	/  
d               
y	/	/	/  
t                 s!   =AA#BB	B	c                 t   ddl m}  G d d          }| dz  }t          j        dg di          }t	          ||           |                    d|	          5  t          j        t          d
          5  t          j
        |dg           d d d            n# 1 swxY w Y   t          j        t          d          5  t          j
        |ddg           d d d            n# 1 swxY w Y   t          j        t          d          5  t          j
        ||j                   d d d            n# 1 swxY w Y   t          j        t          d          5  t          j
        |            d d d            n# 1 swxY w Y   t          j
        |          }||k    sJ 	 d d d            d S # 1 swxY w Y   d S )Nr   )mockc                       e Zd Zd ZdS );test_read_table_without_dataset.<locals>.MockParquetDatasetc                      t          d          )NMockParquetDataset)ImportError)selfargskwargss      r    __init__zDtest_read_table_without_dataset.<locals>.MockParquetDataset.__init__   s    2333r/   N)__name__
__module____qualname__rt   rh   r/   r    ro   rm      s#        	4 	4 	4 	4 	4r/   ro   test.parquetr   r   z#pyarrow.parquet.core.ParquetDataset)newzthe 'filters' keywordr   )integer=r   )filterszthe 'partitioning' keywordweekcolor)partitioningzthe 'schema' argumentschemathe 'source' argument)unittestrk   r   r   r   patchr   r   r   rR   rf   r   )r   rk   ro   rb   r   results         r    test_read_table_without_datasetr      s!   4 4 4 4 4 4 4 4 ^#DHc999%&&E	9?Q	R	R 
 
]:-DEEE 	? 	?M$)<(=>>>>	? 	? 	? 	? 	? 	? 	? 	? 	? 	? 	? 	? 	? 	? 	?]:-IJJJ 	@ 	@M$fg->????	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@]:-DEEE 	5 	5M$u|4444	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5]:-DEEE 	# 	#M'"""	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	#t$$
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
s   F-4BF-B	F-B	 F->C#F-#C'	'F-*C'	+F-	D1%F-1D5	5F-8D5	9F-E8,F-8E<	<F-?E<	 F--F14F1c                      t          j        t          t          d                    gdg          } t	          | d           d S )Ni@  r#   r$   r   )row_group_size)r   r   listranger   )r+   s    r    (test_file_with_over_int16_max_row_groupsr      sD     	$uU||$$%dV444AQq))))))r/   c                     t          d          } t          j                            |           }t          j                            d |                                D             |j        j                  }|j                            d          j	        t          j
                    k    sJ |j                            d          j	        t          j        t          j
                              k    sJ t          |d           d S )	Nr2   r<   c                 H    g | ]}|                     d           dd           S )r   N)chunk).0cols     r    
<listcomp>z.test_empty_table_roundtrip.<locals>.<listcomp>   s+    999c1bqb	999r/   r$   null	null_listr>   r   )r   r   r(   rA   r)   itercolumnsr   r%   fieldtyper   list_r   )rF   r   s     r    test_empty_table_roundtripr      s    	b	!	!	!B H  $$EH  99U%6%6%8%8999l  ! " "E <f%%*bgii7777<k**/28BGII3F3FFFFFu     r/   c                      t          j                    } t          j                            | d          }t          |           d S )NF)preserve_index)pd	DataFramer   r(   rA   r   )rF   emptys     r    test_empty_table_no_columnsr      s;    	BH  E ::EUr/   c                  t   t          t          j                    t          j        t          j                                        g t          dd          gg} fd| D             }fd|D             }t          j                            |t          j                            }t          |           d S )N)int32list_stringr   )Gc                     g | ]<}t          j        |t          j                                                             =S )r   )r   r'   structflattenr   rE   colss     r    r   zEtest_write_nested_zero_length_array_chunk_failure.<locals>.<listcomp>   sL     $ $ $ %bioo666>>@@ $ $ $r/   c                 v    g | ]5}t           j                            |t          j                             6S )r   )r   r@   r)   r   r   s     r    r   zEtest_write_nested_zero_length_array_chunk_failure.<locals>.<listcomp>   sF     * * * .,,U29T??,KK * * *r/   )	r   r   r   r   stringr(   rC   r   r   )data	my_arrays
my_batchestblr   s       @r    1test_write_nested_zero_length_array_chunk_failurer      s    hjjHRY[[))  D 1&999<=D$ $ $ $"$ $ $I* * * *(* * *J
(


BIdOO
<
<CSr/   c                 $   | dz  }t          j        dt          j        dt          j                  i          }t          ||           t          |          }|                                }t          j	        ||           t          |           dz   }t          j        dt          j        dt          j                  i          }t          ||           t          |          }|                                }t          j	        ||           d S )Nzzzz.parquetxr2   dtype)r   r   nparangeint64r   r   	to_pandastmassert_frame_equalrP   )r   rb   rF   rX   df_reads        r    test_multiple_path_typesr     s     ]"D	sBIb999:	;	;BTT""J""$$G"g&&& w<<-'D	sBIb999:	;	;BTT""J""$$G"g&&&&&r/   c                 |   | dz  }t          j        dg di          }t          ||           t          j        |          }t          |          }|                    |          sJ t          j        t                    5  t          |t          j                               d d d            d S # 1 swxY w Y   d S )Nrx   r   r   
filesystem)r   r   r   r   FSProtocolClassr   rT   r   r   r9   r   
FileSystem)r   rb   r   fs_protocol_objr   s        r    test_fspathr     s   ^#DHc999%&&E*400O))F== 
y	!	! A AO@@@@A A A A A A A A A A A A A A A A A As   #B11B58B5r   name)data.parquetu   例.parquetc                 f   t          j        dg di          }| |z  }t          j        |t	          |                     t          j        |           5  t          j        ||          }d d d            n# 1 swxY w Y   |                    |          sJ |	                                 |
                                rJ t          j        |           5  t          j        |||           d d d            n# 1 swxY w Y   t          j        |          }|                    |          sJ d S )Nr   r   r   )r   r   rR   write_tablerP   r   
change_cwdrf   rT   unlinkra   )r   r   r   r   rb   r   s         r    test_relative_pathsr   $  s    Hc999%&&ET>D N5#d))$$$		!	! < <t
;;;< < < < < < < < < < < < < < <==KKMMM{{}} 
	!	! ; ;
udz::::; ; ; ; ; ; ; ; ; ; ; ; ; ; ;]4  F==s$   A77A;>A;C;;C?C?c                      t          j        t                    5  t          j        d           d d d            d S # 1 swxY w Y   d S )Nzi-am-not-existing.parquet)r   r   FileNotFoundErrorrR   rf   rh   r/   r    test_read_non_existing_filer   =  s    	(	)	) 3 3
12223 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3s   <A A c                       G d dt           j                  } t          j        t          d          5  t          j         | d                     d d d            d S # 1 swxY w Y   d S )Nc                       e Zd Zd Zd ZdS )3test_file_error_python_exception.<locals>.BogusFilec                      t          d          NzorglubZeroDivisionErrorrq   rr   s     r    readz8test_file_error_python_exception.<locals>.BogusFile.readE      #I...r/   c                      t          d          r   r   r   s     r    seekz8test_file_error_python_exception.<locals>.BogusFile.seekH  r   r/   N)ru   rv   rw   r   r   rh   r/   r    	BogusFiler   D  s2        	/ 	/ 	/	/ 	/ 	/ 	/ 	/r/   r   r   r   r/   )ioBytesIOr   r   r   rR   rf   )r   s    r     test_file_error_python_exceptionr   C  s    / / / / /BJ / / / 
(		:	:	: & &
iinn%%%& & & & & & & & & & & & & & & & & &s   AA#&A#c                 0   t          j        dg di          }t          j        |t	          | dz                       t          t	          | dz            d          5 }t          j        |          }d d d            n# 1 swxY w Y   |                    |          sJ t          t	          | dz            d          5 }t          j        t          j        |                    }d d d            n# 1 swxY w Y   |                    |          sJ d S )Nr   r   r   rb)	r   r   rR   r   rP   rQ   rf   rT   
PythonFile)r   r   rW   r   s       r    test_parquet_read_from_bufferr   P  su   Hc999%&&EN5#g677888	c'N*++T	2	2 "aq!!" " " " " " " " " " " " " " "==	c'N*++T	2	2 1ar}Q//001 1 1 1 1 1 1 1 1 1 1 1 1 1 1==s$   A??BB'C44C8;C8c            
      ^   t          j        t          t          t          t          d                                        } t          j        t          t          t          t          d                                        }t          j        ddgdz            }| | g}t           j                            |ddg          }t          ||ddd	           t          ||ddgdg	           t          ||dddgddg	           t           j                            | | ||gg d
          }t          ||ddgddg           t           j                            |gdg          }t          j        t          d          5  t          ||dd           d d d            d S # 1 swxY w Y   d S )Nr1   TF2   r   br$   gzip)expectedcompressionr?   use_byte_stream_splitr   r   cdr   r   )r   r?   r   tmpBYTE_STREAM_SPLIT only supportsr   )r   r   r?   )r   r'   r   mapfloatr   intr(   r)   r   r   r   IOError)	arr_floatarr_intarr_bool
data_floatr   mixed_tables         r    test_byte_stream_splitr   ^  s%   c%s445566IhtCU3ZZ001122Gxu*++HY'JH  C: >>E UU$)G G G G
 UU%(E,/52 2 2 2
 UU%(#J,/:7 7 7 7
 (&&	9gw'O-A-A-A ' C CK[;%(#J,/:7 7 7 7 H  (E7 ;;E	w&G	H	H	H / /d(-	/ 	/ 	/ 	// / / / / / / / / / / / / / / / / /s   F""F&)F&c           
         t          j        t          t          t          t          d                              t          j        dd                    }t          j        t          t          t          t          d                              t          j        dd                    }t          j        t          t          t          t          d                              t          j        dd                    }t          j        dd	gd
z            }|||g}t           j                            |g d          }t          ||dd	d           t          j                            | d          }t          j        ||dd	d           t          j        |          }|j                            d          }	|j                            d          }
|	j        dk    sJ |
j        dk    sJ t          ||dd	dddd           t           j                            ||||gg d          }t          ||d	d           d S )Nr1      r   r      	      TFr   r   r   r   r$   r   )r   r   r?   store_decimal_as_integerrx   )r   r?   r   r   r   INT32INT64DELTA_BINARY_PACKEDr   r   )r   r   r?   r   column_encodingr   )r   r?   r   )r   r'   r   r   r   r   
decimal128r(   r)   r   osrb   joinrR   r   rg   r   columnphysical_type)r   arr_decimal_1_9arr_decimal_10_18arr_decimal_gt18r   data_decimalr   pqtestfile_path
pqtestfilepqcol_decimal_1_9pqcol_decimal_10_18r   s               r    test_store_decimal_as_integerr    sj   htCs$<$<==$&M!Q$7$79 9 9Oc'5::&>&>!?!?&(mB&:&:< < <xS%**%=%= > >%']2q%9%9; ; ;xu*++H#%68HILH  ___ EEE U#!'$).2	4 4 4 4 gll7N;;ON5/%"',02 2 2 2
 00J")0033$+22155*g5555,7777 U#!'$).233& &    (&&	+-=xH""" ' $ $K [)$).24 4 4 4 4 4r/   c                     t          j        t          t          t          t          d                                        } t          j        t          t          t          t          d                                        }t          j        d t          d          D             t          j                              }t          j        d t          d          D             t          j        d                    }t          j        g ddz            }t           j        	                    | ||||gg d	          }t          ||d
ddddd           t          ||d
d           t          ||d
dddd           t          ||d
dddd           t          ||d
ddddd           t          ||d
ddi           t          j        t          d          5  t          ||d
dddd           d d d            n# 1 swxY w Y   t          j        t          d          5  t          ||d
dddd           d d d            n# 1 swxY w Y   t          j        t          d          5  t          ||d
d           d d d            n# 1 swxY w Y   t          j        t          d          5  t          ||d
ddi           d d d            n# 1 swxY w Y   t          j        t                    5  t          ||dgddi           d d d            n# 1 swxY w Y   t          j        t                    5  t          ||ddi           d d d            n# 1 swxY w Y   t          j        t                    5  t          ||d
dgdddd            d d d            n# 1 swxY w Y   t          j        t                    5  t          ||d
d!dddd            d d d            n# 1 swxY w Y   t          j        t                     5  t          ||d
d!           d d d            d S # 1 swxY w Y   d S )"Nr1   c                 ,    g | ]}t          |          S rh   )rP   r   r   s     r    r   z(test_column_encoding.<locals>.<listcomp>  s    3331A333r/   r   c                 R    g | ]$}t          |                              d           %S )r2   )rP   zfillr  s     r    r   z(test_column_encoding.<locals>.<listcomp>  s*    ...aQb		...r/   r2   )FTFF   )r   r   r   r   er$   FBYTE_STREAM_SPLITPLAINr   )r   r?   r  r  r   DELTA_LENGTH_BYTE_ARRAYDELTA_BYTE_ARRAYr  RLEr   r   )r   r   r  z)DELTA_BINARY_PACKED encoder only supportsz+'RLE_DICTIONARY' is already used by defaultRLE_DICTIONARYz/Unsupported column encoding: 'MADE_UP_ENCODING'r   MADE_UP_ENCODINGr   )r   r  )r   r?   r   r  T)r   r'   r   r   r   r   r   binaryr(   r)   r   r   r   r   OSErrorr   r9   )r   r   arr_binarr_flbar   r   s         r    test_column_encodingr$    s   c%s445566IhtCU3ZZ001122Gh33c

333")++FFFGx..5::...RYr]]D D DHx333b899H(&&	GWh9''' ' ) )K [;u+>+>+2+>&@ &@A A A A [;$)%,. . . .
 [;$)+2+@+2&4 &45 5 5 5 [;$)+2+@+D&F &FG G G G [;$)+2+@+=+=&? &?@ @ @ @ [;$)&)5\3 3 3 3 
w>
@ 
@ 
@ E E{(-/6/6/B*D *D	E 	E 	E 	EE E E E E E E E E E E E E E E 
wH
J 
J 
J 9 9{(-/D/6/6*8 *8	9 	9 	9 	99 9 9 9 9 9 9 9 9 9 9 9 9 9 9 
zJ
L 
L 
L ; ;{(-)9	; 	; 	; 	;; ; ; ; ; ; ; ; ; ; ; ; ; ; ; 
zN
P 
P 
P D D{(-*-/A)B	D 	D 	D 	DD D D D D D D D D D D D D D D 
z	"	" 9 9{),*-w	9 	9 	9 	99 9 9 9 9 9 9 9 9 9 9 9 9 9 9 
z	"	" 9 9{*-w	9 	9 	9 	99 9 9 9 9 9 9 9 9 9 9 9 9 9 9 
z	"	" 9 9{(-03u/4/B/6*8 *8	9 	9 	9 	99 9 9 9 9 9 9 9 9 9 9 9 9 9 9 
z	"	" 9 9{(-/3/4/B/6*8 *8	9 	9 	9 	99 9 9 9 9 9 9 9 9 9 9 9 9 9 9 
y	!	! / /{(-)-	/ 	/ 	/ 	// / / / / / / / / / / / / / / / / /s   "HH
H
,III6JJJ<KK"%K"L%%L),L)	M**M.1M.N44N8;N8O==PP!QQ	Qc            
         t          j        t          t          t          t          d                                        } | | g}t           j                            |ddg          }t          ||dd           t          ||dd           t          ||dd	d
           t          ||dddd
           t          ||dd           t          ||dd           g d}t          j
                    }|D ]P\  }}t          j        t          t          f          5  t          ||||           d d d            n# 1 swxY w Y   Qd S )N  r   r   r$   r   r   )r   r   compression_levelr   snappyr  )r   r   r   r   lz4r   ))r(     )r   i)re   i  )lzo   )r   r'  )r   r'   r   r   r   r   r(   r)   r   r   r   r   r   r   r!  r   )r*   r   r   invalid_combinationsbufcodeclevels          r    test_compression_levelr1  :  s   
(4Ct--..
/
/C:DH  c3Z 88E UU'(* * * *
 UU'(* * * * UU'-H!=!=? ? ? ? UU-.Q'7'79 9 9 9
 UU'(* * * * UU'(* * * *8 8 8
*,,C. 2 2]J011 	2 	2+02 2 2 2	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	22 2s   D>>E	E	c                      t          j        g d          } d}t           j                            | g|g          }t	          |ddi          }d}|j        d         j        |k    sJ d S )N)r   r   r   r   r*  zprohib; ,	{}flavorspark)write_table_kwargsprohib______r   )r   r'   r(   r)   r	   r   r   )a0r   r   r   expected_names        r     test_sanitized_spark_field_namesr9  g  sr    	///	"	"BDH  "v..Ee78KLLLF"M= M111111r/   c                  v   t          d          } t          j                            |           }t	          j                    }t          ||dd           |                    d           t          |d          }|                    d           t          |d	          }|	                    |          sJ d S )
Ni'  r<   SNAPPYr>   )r   r   r   T)use_threadsF)
r   r   r(   rA   r   r   r   r   r   rT   )rF   r   r.  table1table2s        r    test_multithreaded_readr?  r  s    	e	$	$	$BH  $$E
*,,C5AAAAHHQKKK$///FHHQKKK%000F==       r/   c                     t          j        t          j        d          gg d          } t          j                            |                                           }t          j	                    }t          ||d           |                    d           t          |          }|                    |          sJ t          j        t                     5  t          ||d           d d d            d S # 1 swxY w Y   d S )Nr*  )ABCD)columns)
chunk_sizer   )r   r   r   r   r   r(   rA   reset_indexr   r   r   r   r   rT   r   r   r   )r   r   r.  r   s       r    test_min_chunksizerI    s%   <10D0D0DEEEDH  !1!1!3!344E
*,,C++++HHQKKKF==	z	"	" / /UCA..../ / / / / / / / / / / / / / / / / /s   C66C:=C:c                    t          j        t          d          t          t          dd                    t	          j        dd                              d          t	          j        ddd	
          g dt          j        t          d                    t          j        dd          t          j        ddd          t          j        ddd          d	          }t          j
                            |          }| dz  }	 t          ||d           n# t          j        $ r Y nw xY w|                                rJ d S )Nabcr   r*  r      u1      @      @float64r   TFT20130101periodsz
US/Eastern)rT  tzns)rT  freq)	r   r   r   r   r  rW   ghirM   r3   r   )r   r   r   r   r   r   astypeCategorical
date_ranger   r(   rA   r   ArrowExceptionra   )r   rF   pdfrV   s       r    (test_write_error_deletes_incomplete_filer`    sK    
DKK q!--IaOO22488Ic3i@@@///N4;;77M*a@@@M*a-9; ; ;M*adKKK	M 	M 	
N 	
NB (

r
"
"C#H 	S(E22222           s   D D+*D+c                     d}	 t          j        |           d S # t          $ r}||j        d         v sJ Y d }~d S d }~ww xY w)Nznonexistent-file.parquetr   )rR   rf   	Exceptionrr   )r   rb   r  s      r    test_read_non_existent_filerc    sj    %D!
d ! ! !qvay          !s    
A ;A c                     t          j                    5  t          j        d           t          j        | dz             d d d            d S # 1 swxY w Y   d S )Nerror)actionzv0.7.1.parquet)warningscatch_warningssimplefilterrR   rf   )datadirs    r    test_read_table_doesnt_warnrk    s    		 	"	" 2 2W----
g 001112 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2s   -AAAc                     t           j                            t          j        ddg          gdg          } t	          j                    }t          j        | |d           |                    d           t          j	        |          }t          j        |                                |                                            d S )NrK  defsome_colr   r   r   )r   r(   r)   r'   r   r   rR   r   r   rf   r   r   r   )r   rW   	roundtrips      r    test_zlib_compression_bugrq    s     H  "(E5>":":!;j\JJE

AN5!0000FF1IIIa  I)--//1B1BCCCCCr/   c                 (   t          | dz            }t          j        t          j        d          5  t          |d          5 }	 d d d            n# 1 swxY w Y   t          j        |           d d d            n# 1 swxY w Y   t          j        t          j        d          5  t          |d          5 }|                    d           d d d            n# 1 swxY w Y   t          j        |           d d d            d S # 1 swxY w Y   d S )Nrx   zsize is 0 bytesr   rN   zsize is 4 bytess   ffff)	rP   r   r   r   ArrowInvalidrQ   rR   rf   write)r   rb   rW   s      r    test_parquet_file_too_smallru    s   w'((D	r.?	@	@	@  $ 		 	 	 	 	 	 	 	 	 	 	 	 	 	 	
d              
 
r.?	@	@	@  $ 	GGG	 	 	 	 	 	 	 	 	 	 	 	 	 	 	
d                 sk   A=AA=A	A=A	A==BB(D9CDC	D"C	#DDDzignore:RangeIndex:FutureWarningz.ignore:tostring:DeprecationWarning:fastparquetc                 4   t          j        d          }t          j        t	          d          t	          t          dd                    t          j        ddd          g d	t          j        d
d          t          j	        g d          d          }t          j        |          }t          | dz            }t          j        ||d            |                    |          }|                                }t#          j        ||           t          | dz            }|                    ||           t          j        |          }|d                             t,                    |d<   t#          j        |                                |           d S )NfastparquetrK  r   r*  rN  rO  rP  r   rQ  rR  r   rS  )r   r   r   )r   r   r   r   r  rW   zcross_compat_arrow.parquetro  z cross_compat_fastparquet.parquetrW   )r   importorskipr   r   r   r   r   r   r]  r\  r   r   rP   rR   r   rg   r   r   r   rt  rS   r[  object)	r   fprF   r   
file_arrowfp_filedf_fpfile_fastparquettable_fps	            r    $test_fastparquet_cross_compatibilityr    sv   
 
	]	+	+B	eeAqkk""39555$$$z155500		
 		

 
B HRLLE W;;<<JN5*$7777nnZ((GE"e$$$ 7%GGHHHHr"""~.//H gnnV$$BsG(,,..33333r/   array_factoryc                  4    t          j        dd gdz            S Nr   r2   r   r'   rh   r/   r    <lambda>r     s    BHaY^$$ r/   c                  X    t          j        dd gdz                                            S r  r   r'   dictionary_encoderh   r/   r    r  r    s$    BHaY^$$6688 r/   c                  4    t          j        dd gdz            S N r2   r  rh   r/   r    r  r    s    BHb$Z"_%% r/   c                  X    t          j        dd gdz                                            S r  r  rh   r/   r    r  r    s$    BHb$Z"_%%7799 r/   read_dictionaryFTc                    t           j                            d |             i          }t          j                    }t          j        ||d           |                    d           |rdgnd }t          j        |d|          }|j	        D ]G}|j
        \  }|                                d         }|                                |j        dz  k    sJ Hd S )	Nr   T)r?   r   F)r<  r  r       )r   r(   from_pydictr   r   rR   r   r   rf   rE  chunksbuffers
to_pybytesr=   )r  r  
orig_tablebior   r   r   r.  s           r    test_buffer_contentsr    s     %%ummoo&>??J
*,,CN:s48888HHQKKK!0:uggdOM#5*9; ; ;E } 4 4*mmooa ~~38e#3333334 4r/   c                    t          j        t          j        t          d                    gdg          }| dz  }t	          j        ||d           t	          j        |          }|                    |          sJ d S )Nr*  r`   r$   zarrow-10480.pyarrow.gzGZIPro  )r   r   r'   r   rR   r   rf   rT   )r   r   rb   r   s       r    "test_parquet_compression_roundtripr    s|    
 HbhuQxx(()&:::E--DN5$F3333]4  F==r/   c                    t           j                            t          j        g d          gdg          }| dz  }d}t	          j        ||j                  5 }t          |          D ]}|                    |           	 d d d            n# 1 swxY w Y   t	          j	        |          }|j
        j        |k    sJ t          |          D ],}|                    |                              |          sJ -d S )Nr   r   r#   zempty_row_groups.parquetr   )r   r(   r)   r'   rR   ParquetWriterr   r   r   rg   metadatanum_row_groupsread_row_grouprT   )r   r   rb   
num_groupswriterrZ  readers          r    test_empty_row_groupsr  &  sJ   H  "(2G"<"<"<!=vFFE//DJ		$	-	- &z"" 	& 	&Au%%%%	&& & & & & & & & & & & & & & & ^D!!F?)Z7777: 6 6$$Q''..u5555556 6s   (BBBc                     d gdz  }|                     dg           t          j                            |gdg          }| dz  }t	          j        ||           t	          j        |          }||k    sJ d S )Ni   r   r  zarrow-11607.parquet)rB   r   r(   r)   rR   r   rf   )r   r   r   rb   r>  s        r    test_reads_over_batchr  8  s|    6WDKK H  $(44E**DN5$]4  FF??????r/   c                    | dz  }|                     d           t          j        g dg dgddg          }t          j        ||d	z             t          j        g d
g dgddg          }t          j        ||dz             t          j        t          |                    }t          j        g dg dgddg          }||k    sJ d S )N dataset_column_order_permutationT)exist_okr   )皙?皙?333333?r   r   r$   zdata1.parquet)皙?      ?333333?)r*  r   rL  zdata2.parquet)r   r   r   r*  r   rL  )r  r  r  r  r  r  )mkdirr   r   rR   r   rf   rP   )r   casedata1data2r   r>  s         r     test_permutation_of_column_orderr  F  s    77DJJJHiii.sCjAAAEN5$0111HlllIII.sCjAAAEN5$0111M#d))$$EX)))5557 #J( ( (F F??????r/   c                    | dz  }t          j        t          t          d                              }d}t          j        |g|z  d t          |          D                       }t          j        ||           t          j        t          d          5  t          j
        |d|z  	           d d d            n# 1 swxY w Y   t          j        t          d          5  t          j
        ||
           d d d            n# 1 swxY w Y   t          j
        |d|z  	          }||k    sJ t          j
        |d|z  
          }||k    sJ t          j
        |          }||k    sJ d S )Nzlargethrift.parquetr2   r&  c                     g | ]}d | S )some_long_column_name_rh   )r   rZ  s     r    r   z+test_thrift_size_limits.<locals>.<listcomp>`  s!    EEE+++EEEr/   r$   z1Couldn't deserialize thrift:.*Exceeded size limitr   r   )thrift_string_size_limit)thrift_container_size_limitr1   r   )r   r'   r   r   r   rR   r   r   r   r!  rf   )r   rb   r'   num_colsr   gots         r    test_thrift_size_limitsr  Y  s'   **DHT%))__%%EHH	(EEU8__EEEG G GE N5$	E
G 
G 
G D D 	dR(]CCCCD D D D D D D D D D D D D D D 
E
G 
G 
G B B 	dAAAAB B B B B B B B B B B B B B B
 -sX~
F
F
FC%<<<<
-!h,
G
G
GC%<<<<
-

C%<<<<<<s$   B==CC#DD
D
c                    | dz  }t          j        dg di          }t          j        ||d           t          j        |d          }||k    sJ t          |                                          }|d         |d         k    sJ |d         |d         c|d<   |d<   | d	z  }|                    |           t          j        |d
          }||k    sJ |t          j        dg di          k    sJ t          j	        t          d          5  t          j        |d          }ddd           n# 1 swxY w Y   t          j        |d
          }|                                }	|	|k    sJ |	t          j        dg di          k    sJ t          j        |d          }t          j	        t          d          5  |                                }ddd           dS # 1 swxY w Y   dS )zUCheck that checksum verification works for datasets created with
    pq.write_table()zcorrect.parquetr   r   r   r   r*  Twrite_page_checksumpage_checksum_verification   $   zcorrupted.parquetFr   r   r   r*  CRC checksum verificationr   N)r   r   rR   r   rf   	bytearray
read_byteswrite_bytesr   r   r!  rg   r   )
r   original_path
table_origtable_checkbin_datacorrupted_pathtable_corruptrG   corrupted_pq_filetable_corrupt2s
             r    +test_page_checksum_verification_write_tabler  t  s   
 //M3-..JN:}$GGGG -$OOOK$$$$
 113344H B<8B<''''!)"x|HRL(2, 22Nx((( M.=BD D DM J&&&&BHc<<<%8999999 
w&A	B	B	B K KM.TJJJK K K K K K K K K K K K K K K
 ~BGI I I&++--NZ''''RXsLLL&9:::::: ~BFH H H 
w&A	B	B	B % %""$$% % % % % % % % % % % % % % % % % %s$   D33D7:D7?G!!G%(G%c                 2   t          j        dg di          }| dz  }t          j        ||d           t	          |                                          }t          |          dk    sJ |d         }t          j        |d          }||k    sJ t          |	                                          }|d	         |d
         k    sJ |d
         |d	         c|d	<   |d
<   | dz  }t          ||           ||j        z  }|                    |           t          j        |d          }	|	|k    sJ |	t          j        dg di          k    sJ t          j        t          d          5  t          j        |d          }
ddd           dS # 1 swxY w Y   dS )zXCheck that checksum verification works for datasets created with
    pq.write_to_datasetr   r  correct_dirTr  r   r   r  r  r  corrupted_dirFr  r  r   N)r   r   rR   write_to_datasetr   iterdirlenrf   r  r  r   r   r  r   r   r!  )r   r  original_dir_pathoriginal_file_path_listr  r  r  corrupted_dir_pathcorrupted_file_pathr  rG   s              r    test_checksum_write_to_datasetr    sA   
 3-..J  -/
),02 2 2 2
 ##4#<#<#>#>??&''1,,,,+A.M-$OOOK$$$$
 113344H B<8B<''''!)"x|HRL(2, !?2 2333,}/AA##H--- M"5=BD D DM J&&&&BHc<<<%8999999 
w&A	B	B	B P PM-$OOOP P P P P P P P P P P P P P P P P Ps   (FFFsourcez/tmp/z/tmp/file1.parquetz/tmp/file2.parquetc                     |                      t          j        dd            t          j        t
          d          5  t          j        |           d d d            d S # 1 swxY w Y   d S )Nzpyarrow.datasetr   r   )r  )setitemsysmodulesr   r   r   rR   rf   )monkeypatchr  s     r    9test_read_table_raises_value_error_when_ds_is_unavailabler    s     %6===	z)@	A	A	A % %
V$$$$% % % % % % % % % % % % % % % % % %s   A  A$'A$)Vr  r  collectionsr   r   rg  shutilr   decimalr   r   pyarrowr   r   pyarrow.testsr   pyarrow.tests.parquet.commonr   r	   r
   pyarrow.parquetparquetrR   r   r   rp   pandasr   pandas.testingtestingr   pyarrow.tests.pandas_examplesr   r   numpyr   mark
pytestmarkr!   r.   r6   r:   rH   rY   r]   rc   ri   r   slowr   r   r   r   r   r   parametrizeLocalFileSystemr   r   r   r   r   r  r$  r1  r9  r?  rI  r`  rc  rk  rq  ru  rw  filterwarningsr  r  r  r  r  r  r  r  datasetr  r  rh   r/   r    <module>r     s  $ 
			 



 # # # # # # 				                              7 7 7 7 7 7 7 7 7 7      FFFFFFFFF   	BBBBBBBBB<<<<<<<   NB   	BBB
 [ 
. . .= = =    ; ; ; 3 3 3$ $ $ $ $ $ $$ $ $    0 * * *        8 ' ' '$A A A  
"


(   !@AA    BA  *3 3 3
& 
& 
&     #/ #/ #/L34 34 34l}/ }/ }/@*2 *2 *2Z2 2 2 ! ! !" / / /  ! ! !4! ! !2 2 2 D D D
 
 
 =>>LMM!4 !4 NM ?>  !4H $$88%%99	+   *UDM::4 4 ;: 4(	  	  	 6 6 6$    &  67% 7% 7%t 1P 1P 1Ph w-/CDEG G% %G G% % %s6   A AAA6 6	BBB BB