
    Pi[                     4   d dl Z d dlZ	 d dlZn# e$ r dZY nw xY wd dlZd dlZd dlm	Z	m
Z
 d dlmZ d dlmZ 	 d dlmZ d dlmZmZmZ n# e$ r dZY nw xY w	 d dlZd dlmZ d dlmZmZ n# e$ r dxZZY nw xY wej        j        Zej        j        d             Zej        j        d             Z ej        j        d	             Z!ej        j        d
             Z"ej        j        d             Z#ej        j        d             Z$ej        j        d             Z%ej        j        d             Z&ej        j        d             Z'ej        j        d             Z(ej        j        d             Z)ej        j        d             Z*ej        j        d             Z+ej        j        d             Z,ej        j        d             Z-ej        j        d             Z.ej        j        d             Z/ej        j        d             Z0ej        j        d             Z1ej        j        d             Z2ej        j        d             Z3ej        j        d             Z4ej        j        d             Z5ej        j        d             Z6ej        j        d             Z7ej        j        ej        8                    d g d!          ej        8                    d"d#d$g          d%                                     Z9ej        j        d&             Z:ej        j        d'             Z;dS )(    N)LocalFileSystemSubTreeFileSystem)guid)Version)_read_table_test_dataframe_write_table)_roundtrip_pandas_dataframealltypes_samplec                 p   t          d          }| dz  }t          j                            |          }d|j        j        v sJ t          ||           t          j        |          j        }d|v sJ t          j
        |d                             d                    }|d         dd ddd	d
gk    sJ d S )N'  sizepandas_roundtrip.parquet   pandasutf8index_columnsranger      )kindnamestartstopstep)r   paTablefrom_pandasschemametadatar	   pqread_metadatajsonloadsdecode)tempdirdffilenamearrow_tabler   jss         u/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/pyarrow/tests/parquet/test_pandas.py#test_pandas_parquet_custom_metadatar+   7   s    	e	$	$	$B33H(&&r**K*33333h'''))2H    	HY'..v66	7	7BoG,0-.,-$/ $/ #0 0 0 0 0 0 0    c           	         t          j        t          j        dt          j                              t          j        dt          j                              t          j        dt          j                              g          }t          j        t          j	        dt          j
                  t          j	        dt          j                  g dd          }t          j        dd	gd
d gd d gd          }t           j                            ||d          }t           j                            ||d          }|j                            |j        d          rJ |j                            |j                  sJ t          j        | dz  |          }|                    |           |                    |           d S )Nintfloatstring   dtype)ABBAEDDAACDC)r.   r/   r0         g?F)r   preserve_indexT)check_metadatazmerged.parquet)r   )r   r   fieldint16float32r0   pd	DataFramenparangeuint8r   r   equalsr    ParquetWriterwrite_table)r%   r   df1df2table1table2writers          r*   :test_merging_parquet_tables_with_different_pandas_metadatarK   K   s    Y


##
"*,,''
29;;''  F
 ,y"(+++1BJ///***   C
 ,1vt,   C
 X!!#fU!KKFX!!#fU!KKF}##FM$#GGGGG=.....g(88HHHF
v
vr,   c                    | dz  }t          d          }ddddd|_        t          j                            |          }d	|j        j        d
         v sJ t          ||           t          j	        |          j        }t          j        |d
                             d                    }d|v sJ |d         |j        k    sJ d S )Nzmetadata_persistence.parquetr   r   zhalf-precisionzsingle precisionzdouble precisionz%Attributes Persistence Test DataFrame)float16r=   float64
desciptions
   attributesr   r   
attributes)r   attrsr   r   r   r   r   r	   r    r!   r"   r#   r$   )r%   r'   r&   tabler   r)   s         r*   $test_attributes_metadata_persistencerS   h   s     77H	e	$	$	$B#%%=	 BH H  $$EEL1)<<<<<!!!))2H	HY'..v66	7	7B2lrx''''''r,   c                    t          d          }t          j                            t	          t          |j        |j        d d d                             ddg          |_        | dz  }t          j        	                    |          }|j
        j        J t          ||           t          j        |          }|                                }t!          j        ||           d S )N
   r   level_1level_2namesr   )r   r>   
MultiIndexfrom_tupleslistzipcolumnsr   r   r   r   pandas_metadatar	   r    read_pandas	to_pandastmassert_frame_equal)r%   r&   r'   r(   
table_readdf_reads         r*   %test_pandas_parquet_column_multiindexrg      s    	b	!	!	!B**SRZ"-..//)$ +  BJ
 33H(&&r**K-999h'''))J""$$G"g&&&&&r,   c                    t          d          }| dz  }t          j                            |d          }|j        j        }|d         rJ |d         sJ t          ||           t          j        |          }|j        j        }|d         rJ |j        j	        }|j        j	        |k    sJ |
                                }t          j        ||           d S )Nr   r   r   Fr9   r   r_   )r   r   r   r   r   r`   r	   r    ra   r   rb   rc   rd   )r%   r&   r'   r(   r)   re   r!   rf   s           r*   <test_pandas_parquet_2_roundtrip_read_pandas_no_index_writtenrj      s    	e	$	$	$B33H(&&r%&@@K			+B/"""" i==h'''))J			*B/""""%.M&-7777""$$G"g&&&&&r,   c                  h   t          d          } t          j                            |           }t          j                    }t          ||d           |                                }t          j        |          }t          |          	                                }t          j        | |           d S )Nr   2.6versionr   r   r   r   BufferOutputStreamr	   getvalueBufferReaderr   rb   rc   rd   r&   r(   imosbufreaderrf   s         r*   )test_pandas_parquet_native_file_roundtriprw      s    			B(&&r**K ""DdE2222
--//C_S!!F&!!++--G"g&&&&&r,   c                     t          d          } t          j                            |           }t          j                    }t          ||d           |                                }t          j        |          }t          j	        |ddg          
                                }t          j        | ddg         |           d S )Nr   rl   rm   stringsrB   r_   )r   r   r   r   rp   r	   rq   rr   r    ra   rb   rc   rd   rs   s         r*   test_read_pandas_column_subsetr{      s    			B(&&r**K ""DdE2222
--//C_S!!FnG,  ikk  "i12G<<<<<r,   c                  h   t          d          } t          j                            |           }t          j                    }t          ||d           |                                }t          j        |          }t          |          	                                }t          j        | |           d S )Nr   rl   rm   ro   rs   s         r*   #test_pandas_parquet_empty_roundtripr}      s    			B(&&r**K ""DdE2222
--//C_S!!F&!!++--G"g&&&&&r,   c                      ddiddiddigdd} t          j        |           }t          j                            |          }t          j                    }t          ||           d S )	N	page_typer   record_typenon_consecutive_homer   1001)agg_col	uid_first)data)r>   r?   r   r   r   rp   r	   )r   r&   r(   rt   s       r*   !test_pandas_can_write_nested_datar      s     !A#Q'

  D 
4	 	 	 B(&&r**K ""Dd#####r,   c                    | dz  }d}t          j        t          j        |t          j                  t          j        |t          j                  t          j        |t          j                  t          j                            |          dk    g dd          }t          j
                            |          }|                    d          5 }t          ||d	           d d d            n# 1 swxY w Y   t          j        |                                          }t#          |          }|                                }t'          j        ||           d S )
Nzpandas_pyfile_roundtrip.parquetr8   r2   r   )foobarNbazqux)int64r=   rN   boolry   wbrl   rm   )r>   r?   r@   rA   r   r=   rN   randomrandnr   r   r   openr	   ioBytesIO
read_bytesr   rb   rc   rd   )	r%   r'   r   r&   r(   fr   re   rf   s	            r*   $test_pandas_parquet_pyfile_roundtripr      sl   ::HD	4rx0009T4449T444	%%)555  
 
B (&&r**K	t		 4[!U33334 4 4 4 4 4 4 4 4 4 4 4 4 4 4 :h))++,,DT""J""$$G"g&&&&&s   C22C69C6c                    d}t           j                            d           t          j        t          j        |t           j                  t          j        |t           j                  t          j        |t           j                  t          j        |t           j	                  t          j        |t           j
                  t          j        |t           j
                  t          j        |t           j                  t          j        |t           j                  t          j        |t           j                  t          j        |t           j                  t           j                            |          dk    d          }| dz  }t           j                            |          }dD ]M}t'          ||d|           t)          |          }|                                }t-          j        ||           NdD ]M}t'          ||d|	           t)          |          }|                                }t-          j        ||           Nd
D ]x}	|	dk    r%t           j        j                            |	          s-t'          ||d|	           t)          |          }|                                }t-          j        ||           yd S )Nr   r   r2   )rB   uint16uint32uint64int8r<   int32r   r=   rN   r   r   )TFrl   )rn   use_dictionary)rn   write_statistics)NONESNAPPYGZIPLZ4ZSTDr   )rn   compression)r@   r   seedr>   r?   rA   rB   r   r   r   r<   r   r   r=   rN   r   r   r   r   r	   r   rb   rc   rd   libCodecis_available)
r%   r   r&   r'   r(   r   re   rf   r   r   s
             r*   )test_pandas_parquet_configuration_optionsr      s   DINN1	4rx000)D	222)D	222)D	222	$bh///4rx0004rx0004rx0009T4449T444	%%)  
 
B 33H(&&r**K' + +[(E$2	4 	4 	4 	4 **
&&((
b'****) + +[(E&6	8 	8 	8 	8 **
&&((
b'****@ + +6!!FL--k:: "[(E!,	. 	. 	. 	. **
&&((
b'****+ +r,   c                      t          d          } t          j        ddt          |           z  d          | _        d| j        _        t          | ddi          }t          j        ||            d S )Nd   r   r   rU   r   flavorspark)	r   r@   rA   lenindexr   r
   rc   rd   )r&   results     r*   +test_spark_flavor_preserves_pandas_metadatar   )  sg    	c	"	"	"ByBRL"--BHBHM(h-@AAF&"%%%%%r,   c                 @   t          j        d          dt          j        d          dit          j        d          t          j        d          t          j        d          t          j        d          id}t          | dz            }t          j        |d                              dd	
          }t
          j                            |          }t          ||           t          |          }|
                                }t          j        ||           d S )Nz2017-06-30 01:31:00g*_c@z2017-06-30 01:32:00)closetimedata.parquetzdatetime64[us]r2   r   Fdrop)r>   	Timestampstrr?   	set_indexr   r   r   r	   r   rb   rc   rd   )r%   r   pathdfxtdfxr(   	result_dfs          r*    test_index_column_name_duplicater   3  s    L.//1CL.//1C

 L.//%2 2 L.//%2 2	
 D w'((D
 ,t#3
4
4
4
>
>vE
>
R
RC8$$Dtd##K%%''I)S)))))r,   c                    d}t          t          |                    }t          j                            g d|gddg          }t          j        d|i|          }t          j                            |          }| dz  }t          ||           t          |          }|                    |          sJ |                                }t          j        ||           d S )	Nr1   )r   r   r   foobarsome_numbersrY   numbers)r   zdup_multi_index_levels.parquet)r]   r   r>   r[   from_arraysr?   r   r   r   r	   r   rC   rb   rc   rd   )	r%   num_rowsr   r   r&   rR   r'   result_tabler   s	            r*    test_multiindex_duplicate_valuesr   Q  s    H5??##GM%%			(( &  E
 
y'*%	8	8	8BH  $$E99H!!!x((L<<%%%%%&&((I)R(((((r,   c                     d}t          j        t          j        |          dd dd          }t	          | dz            }|                                }t          j        ||           d S )N  carat        cut  color  clarity  depth  table  price     x     y     z
 0.23      Ideal      E      SI2   61.5   55.0    326  3.95  3.98  2.43
 0.21    Premium      E      SI1   59.8   61.0    326  3.89  3.84  2.31
 0.23       Good      E      VS1   56.9   65.0    327  4.05  4.07  2.31
 0.29    Premium      I      VS2   62.4   58.0    334  4.20  4.23  2.63
 0.31       Good      J      SI2   63.3   58.0    335  4.34  4.35  2.75
 0.24  Very Good      J     VVS2   62.8   57.0    336  3.94  3.96  2.48
 0.24  Very Good      I     VVS1   62.3   57.0    336  3.95  3.98  2.47
 0.26  Very Good      H      SI1   61.9   55.0    337  4.07  4.11  2.53
 0.22       Fair      E      VS2   65.1   61.0    337  3.87  3.78  2.49
 0.23  Very Good      H      VS1   59.4   61.0    338  4.00  4.05  2.39\s{2,}r   pythonsep	index_colheaderenginezv0.7.1.parquet)r>   read_csvr   r   r   rb   rc   rd   datadirexpected_stringexpectedrR   r   s        r*   &test_backwards_compatible_index_namingr   g  su    KO {2:o66I%)!HF F FH"2233E__F&(+++++r,   c                     d}t          j        t          j        |          dg ddd                                          }t          | dz            }|                                }t          j        ||           d S )Nr   r   cutcolorclarityr   r   r   zv0.7.1.all-named-index.parquet)	r>   r   r   r   
sort_indexr   rb   rc   rd   r   s        r*   1test_backwards_compatible_index_multi_level_namedr   |  s    KO {

?##---   jll	  "BBCCE__F&(+++++r,   c                 @   d}t          j        t          j        |          dg ddd                                          }|j                            g d          |_        t          | dz            }|                                }t          j
        ||           d S )	Nr   r   r   r   r   r   )r   Nr   zv0.7.1.some-named-index.parquet)r>   r   r   r   r   r   	set_namesr   rb   rc   rd   r   s        r*   6test_backwards_compatible_index_multi_level_some_namedr     s    KO {

?##!<!<!<   jll	 
 ^--.F.F.FGGHN"CCDDE__F&(+++++r,   c           	         t          d          t          t          j                  k    rt          j        d           t          j        g dg dt          j        ddd          d	          }t          j                            g d	t          j        ddd          gd
d g          |_	        | dz  }t          |          }|                                }t          j        ||           t          |dg          }|                                }t          j        ||dg                             d                     d S )Nz2.2.0zRegression in pandas 2.2.0r      r1   )g?g?g333333?z
2017-01-01r1   zEurope/Brussels)periodstzabcr   rY   z'v0.7.1.column-metadata-handling.parquetr   rz   Tr   )r   r>   __version__pytestskipr?   
date_ranger[   r   r   r   rb   rc   rd   reset_index)r   r   r   rR   r   s        r*   2test_backwards_compatible_column_metadata_handlingr     sR   w72>2222 	0111|iilllmL!8IJJJ	L 	LM MH ]..		|Q3D	E	E	E	Go /  HN
 >>DE__F&(+++se  E__F&(C5/"="=4"="H"HIIIIIr,   c                  ,   t          j        ddgddggddg          } | d                             d          | d<   |                     dg          } t          j                            |           }t	          j                    }t          j	        ||           t          j
        |                                                                          }t          |j        t           j                  sJ |j                            | j                  sJ d S )	Nr   r   r   dc1c2rz   category)r>   r?   astyper   r   r   r   rp   r    rE   ra   rq   rb   
isinstancer   CategoricalIndexrC   )r&   rR   bosref_dfs       r*   )test_categorical_index_survives_roundtripr     s     
SzC:.t	E	E	EB$xz**BtH	tf		BH  $$E


!
!CN5#^CLLNN++5577FflB$788888<rx(((((((r,   c                     t          j        dt          j        g dg dd          i          } t          j                            |           }t          j                    }t          j        ||           |	                                }t          j
        |                                          }t          j        ||            d S )Nr   )r   r   r   r   )r   r   r   T)
categoriesordered)r>   r?   Categoricalr   r   r   rp   r    rE   rq   ra   rb   rc   rd   )r&   rR   r   contentsr   s        r*   )test_categorical_order_survives_roundtripr     s     
sBN$H H H I 
J 
JB H  $$E


!
!CN5#||~~H^H%%//11F&"%%%%%r,   c                     t          j        d gdz  dgdz  d          } |                     ddd          }t          j                            |           }t          j                            |          }t          j                    }t          j        ||dd           t          j	        |
                                          }|d                             |d                   sJ |d	                             |d	                   sJ d S )
Nr   g      ?)colr.   r   rl   rU   )rn   
chunk_sizer   r   )r>   r?   r   r   r   r   rp   r    rE   
read_tablerq   rC   )r&   df_categoryrR   	table_catru   r   s         r*   *test_pandas_categorical_na_type_row_groupsr    s     
tfslC53;??	@	@B))JzBBCCKH  $$E$$[11I


!
!C N9c5R@@@@]3<<>>**F !9E!H%%%%%!9E!H%%%%%%%r,   c                  ,   t          j        g dd          } g d}t          j        dt          j                            | |          i          }t          j                    }t          j	        t          j
        |          |           t          j        |                                                                          }|j        j        dk    sJ |j        j        j        |k                                    sJ t'          j        ||           d S )N)r   r   r   r   r   rV   r   r   r2   )r   r   r   x)r   r   )r@   arrayr>   r?   r   
from_codesr   rp   r    rE   rR   r  rq   rb   r	  r3   catr   allrc   rd   )codesr   r&   ru   r   s        r*   !test_pandas_categorical_roundtripr    s    
 H+++7;;;E&&&J	sBN55* 6 & & ' 
( 
(B 

!
!CN28B<<%%%]3<<>>**4466F8>Z''''HL#z16688888&"%%%%%r,   c                    t          t          j                  t          d          k     rt          j        d           t          j        dg did          }|                    d          }t          j        dg di          }|                    d          }t          j        |d                   	                                t          j        |d                   	                                k    sJ t          j        |d         j
        j        j                  	                                t          j        |d         j
        j        j                  	                                k    sJ t          | dz            }t          j        t          j        |          |           t          j        |                                          }t'          j        ||           d S )	Nz1.3.0z:PyArrow backed string data type introduced in pandas 1.3.0r	  )r   r   r   zstring[pyarrow]r2   r   zcat.parquet)r   r>   r   r   r   r?   r   r   r
  	to_pylistr  r   valuesr   r    rE   rR   r  rb   rc   rd   )r%   rF   rG   r   r   s        r*   )test_categories_with_string_pyarrow_dtyper    s    r~!1!111PQQQ
,2223;L
M
M
MC
**Z
 
 C
,2223
4
4C
**Z
 
 C 8CH''))RXc#h-?-?-I-I-K-KKKKK8CHL+233==??28C&D( D((1	4 4 4 4 w&''DN28C==$''']4  **,,F&#&&&&&r,   c                    t          j        dg dd          }|d                             d          |d<   t          j        |          }t          j        |t          | dz            dg           t          j        t          | dz                      	                                }t          j        |dg         |dg                    t          j        |t          | d	z                       t          j        t          | d	z                      	                                }t          j        |dg         |dg                    t          j        |t          | d
z                       t          j        t          | d
z                      	                                }t          j        |dg         |dg                    d S )Nr   r   partr  r  Int64case1r  partition_colscase2r   )r>   r?   r   r   rR   r    write_to_datasetr   r  rb   rc   rd   rE   )r%   r&   rR   r   s       r*   5test_write_to_dataset_pandas_preserve_extensiondtypesr  $  s   	s99955	6	6B5	  ))BuIHRLLEs7W$%%vh    ]3w01122<<>>F&%/2ug;777s7W#455666]3w01122<<>>F&%/2ug;777N5#g677888]3w78899CCEEF&%/2ug;77777r,   c                    t          j        g dg dd          }t          j        g dd          |_        t	          j        |          }|ddg                                         }|d                             d	          |d<   t          j	        |t          | d
z            dg           t          j        t          | d
z                                                      }t          j        ||           t          j	        |t          | dz                       t          j        t          | dz                                                      }t          j        ||           t          j        |t          | dz                       t          j        t          | dz                                                      }t          j        ||           d S )N)r   r   r   r   r  r   idxr   r  r  r   r  r  r  r   )r>   r?   Indexr   r   rR   copyr   r    r  r   r  rb   rc   rd   rE   )r%   r&   rR   df_catr   s        r*   +test_write_to_dataset_pandas_preserve_indexr$  9  s    
yyyAA	B	BBxe444BHHRLLE %%''FF^**:66F6Ns7W$%%vh    ]3w01122<<>>F&&)))s7W#455666]3w01122<<>>F&"%%%N5#g677888]3w78899CCEEF&"%%%%%r,   r9   )TFNmetadata_fname	_metadata_common_metadatac                    d}d}| t                      z  }|                                 g }g }g }t          |          D ]}	t          ||	          }
t	          j        t          j        |	|z  |	dz   |z  d          d          |
_        ||	 dz  }t          j
                            |
|	          }|                    d           }|j        j        J t          ||           |                    |           |                    |
           |                    |           t          j
                            |
|	          }t#          j        |j        ||z             t#          j        |          }d
dg|                                                              }t	          j        fd|D                       }|dur|
j        j        nd |j        _        t1          j        ||           d S )Nr8   )r   r   r   r2   r   r   z.parquetri   rB   ry   rz   c                      g | ]
}|         S  r*  ).0r	  r_   s     r*   
<listcomp>z<test_dataset_read_pandas_common_metadata.<locals>.<listcomp>~  s    555!G*555r,   F)r   mkdirr   r   r>   r!  r@   rA   r   r   r   r   replace_schema_metadatar   r   r	   appendr    write_metadataParquetDatasetra   rb   concatr   rc   rd   )r%   r9   r%  nfilesr   dirpath	test_dataframespathsir&   r   rR   table_for_metadatadatasetr   r   r_   s                    @r*   (test_dataset_read_pandas_common_metadatar;  R  s    FDGMMOOOIFE6]]  T***8Ia$hQ$g>>>W
 
 
 A'$$R$GG --d33|$,,,UD!!!bT --
> .   (/>1IJJJ((G	"G   11;;==Fy5555f55566H'u44$ N&(+++++r,   c                 ,   t          j        dg di          }| dz  }t          ||           t          j        dt          t          |           t                                          }|                    t          j
        |                    sJ d S )Nr   r   r   )
filesystem)r>   r?   r	   r    ra   r   r   r   rC   r   rR   )r%   r&   r'   r   s       r*   %test_read_pandas_passthrough_keywordsr>    s     
sIII&	'	'B'HX^$S\\?3D3DEE  F =="&&&&&&&r,   c                 t   t          j        t          j        ddgddgg          t          j        ddg          d          }| dz  }t          j        t          j                    t          j                              }t          j        t          j        d	|          t          j        d
t          j                              g          }t          j        	                    ||          }t          ||           t          j        |                                          }t          j        ||           d S )N)id	something)value2else)r@  
something2)valueelse2r   r   )col1col2r   rG  rH  )r>   r?   Seriesr   map_r0   r   r;   r   r   r	   r    ra   rb   rc   rd   )r%   r&   r'   udtr   r(   r   s          r*   test_read_pandas_map_fieldsrL    s    
	 "45!#56
   	5%.))  
 
B 'H
'")++ry{{
+
+CY--rx	/L/LMNNF(&&r622Kh'''^H%%//11F&"%%%%%r,   )<r   r"   numpyr@   ImportErrorr   pyarrowr   
pyarrow.fsr   r   pyarrow.utilr   pyarrow.vendored.versionr   pyarrow.parquetparquetr    pyarrow.tests.parquet.commonr   r   r	   pandasr>   pandas.testingtestingrc   r
   r   mark
pytestmarkr+   rK   rS   rg   rj   rw   r{   r}   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  r$  parametrizer;  r>  rL  r*  r,   r*   <module>r\     s  $ 
			    	BBB      9 9 9 9 9 9 9 9       , , , , , ,      < < < < < < < < < < <   	BBB? ? ? ? ? ? ? ? ?   NB [ 
 0 0 0&   8 ( ( (0 ' ' '$ ' ' '0 ' ' ' 
= 
= 
= ' ' ' $ $ $  ' ' '. )+ )+ )+X & & & * * *: ) ) )* , , ,( , , ,0 , , ,2 J J J0 ) ) ) & & & & & &" & & &$ ' ' '. 8 8 8( & & &0 )+>+>+>??)K9K+LMM,, ,, NM @? ,,^ ' ' ' & & & & &s/    A
 
AAA+ +	A76A7