
    Pi$                     Z   d dl Z d dlZd dlZd dlZd dlZ	 d dlZn# e$ r dZY nw xY wd dlZd dl	m
Z
 d dlZd dlmZ d dlmZmZmZmZmZmZ d dlmZ d dlmZ 	 d dlmZ d dlmZm Z m!Z!m"Z" n# e$ r dZY nw xY w	 d dl#Z$d dl%m&Z' n# e$ r dxZ$Z'Y nw xY wej(        j        ej(        j)        gZ*d Z+ej(        j#        d             Z,ej(        j#        d             Z-ej(        j#        d	             Z.ej(        j#        d
             Z/ej(        0                    e1e2fd          ej(        j#        d                         Z3ej(        j#        d             Z4ej(        j#        d             Z5ej(        j#        d             Z6ej(        j#        d             Z7ej(        j#        d             Z8ej(        j#        ej(        9                    ddgdgg ej:        d          dk      ej:        dd          dk      ej:        dd          ;                     ej<                              dk     f          ej(        9                    dd          d                                     Z=ej(        j#        d             Z>ej(        j?        d             Z@ej(        j?        d             ZAej(        j#        d              ZBej(        j#        ej(        j?        d!                         ZCd" ZDd# ZEd$ ZFej(        j#        d%             ZGej(        j#        d&             ZHej(        j#        d'             ZIej(        j        d(             ZJej(        j        d)             ZKej(        j        d*             ZLdUd-ZMd. ZNej(        j        ej(        9                    d/d0d1g          d2                         ZOej(        j        d3             ZPej(        j        d4             ZQej(        j        ej(        9                    d/d0d1g          d5                         ZRd6 ZSd7 ZT	 	 	 dVd8ZU	 dWd9ZVej(        j#        d:             ZWej(        j#        d;             ZXej(        j#        d<             ZYej(        j#        d=             ZZej(        j#        d>             Z[ej(        j#        ej(        j?        d?                         Z\ej(        j#        ej(        j?        ej(        ]                    ej^        d@k    dAB          dC                                     Z_ej(        j#        ej(        j?        dD                         Z`ej(        j#        dE             ZadXdGZbej(        j#        dH             Zcej(        j#        dI             ZddJ ZedK ZfdL ZgdM ZhdN ZidO ZjdP Zkej(        9                    dQdR          dS             Zlej(        j#        dT             ZmdS )Y    N)FileSelector
FileSystemLocalFileSystemPyFileSystemSubTreeFileSystemFSSpecHandler)util)guid)_read_table_test_dataframe_test_table_write_tablec                    t          j        dg di          }| dz  }|                                 |dz  }t          j        |t          |                     t          j        |t                                }|                    |          sJ t          j        dt          j
        |                     }|                    |          sJ d S )Na         data_dirdata.parquet
filesystemzdata_dir/data.parquet)patablemkdirpqwrite_tablestr
read_tabler   equalsr	   _filesystem_uri)tempdirr   	directorypathresults        v/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/pyarrow/tests/parquet/test_dataset.pytest_filesystem_urir'   ;   s    Hc999%&&E*$IOO~%DN5#d))$$$ ]**, , ,F== ]D,@,I,IK K KF==    c                 B    t                      }t          ||            d S N)r   _partition_test_for_filesystem)r"   locals     r&   test_read_partitioned_directoryr-   N   s#    E"5'22222r(   c                     t                      }| }t          ||           t          j        |          }|                    dg          }|j        dgk    sJ d S )Nvaluescolumns)r   r+   r   ParquetDatasetreadcolumn_names)r"   r,   	base_pathdatasetr%   s        r&   'test_read_partitioned_columns_selectionr7   T   se     EI"5)444	**G\\8*\--F8*,,,,,,r(   c                    t                      }| }ddg}g d}ddg}d|gd|gd|gg}t          j        t          j        |d	
                              d          t          j        t          j        t          j        |t          
          d          d          t          j        t          j        t          j        |d
          d          d          t          j        d          d          }t          ||||           t          j        ||g d          }|                                }	|	                                                    d          }
d|
d         j        vsJ d|
d         j        vsJ d|
d         j        vsJ g dddgg}t          j        |||          }|                                }	|	                                                    d          }
|
d         dk    |
d         dk    z  |
d         dk    z  }t          j        |
d                   dk    |
d         dk    z  }|                                dk    sJ |                                dk    sJ |
j        d         |                                |                                z   k    sJ dggdggfD ]8}t          j        |||          }|                                j        dk    sJ 9d S )Nr   r   r   bcTFintegerstringbooleani4dtype      r   boolr      r<   r=   r>   r/   ))r<   =r   )r=   !=r:   )r>   ==Truer   filtersdropr:   )r<   rG   r   )r>   rI   FalserJ   rO   )r=   rI   s   1 a)r=   rI   z1 a)r   pd	DataFramenparrayrepeattileobjectarange_generate_partition_directoriesr   r2   r3   	to_pandasreset_indexr/   sumshapenum_rows)r"   r,   r5   integer_keysstring_keysboolean_keyspartition_specdfr6   r   	result_dfrL   df_filter_1df_filter_2s                 r&   test_filters_equivalencyrf   a   sF   EIq6L!//K%=L	L!	;	L!N 
8L555<<R@@'"'"(;f"E"E"EqII1MM72728L#G#G#GKKQOO)B--	  
 
B $E9nbIII e, , ,  G
 LLNNE""..D.99IIi(/////i)00000	),33333	
 	
 	

 
89G eW6 6 6GLLNNE!!--4-88I Y'1,X#%'Y6)+K 8Ii011Q6Y7*,K??q    ??q    ?1+//"3"3koo6G6G"GHHHH/01./02 , ,#%: : :||~~&!+++++	, ,r(   c                    t                      }| }g d}d|gg}d}t          j        t          j        |          t          j        |d          dddg          }t          ||||           t          j        ||d	d
g          }|	                                }|
                                                    d                              d          }	d t          t          |	d         j                  D             }
|
ddgk    sJ d S )Nr   r   r   r      integersrC   r?   r@   indexrj   rl   r0   )rj   <ri   )rj   >r   rK   byTrM   c                     g | ]}|S  rr   .0xs     r&   
<listcomp>z9test_filters_cutoff_exclusive_integer.<locals>.<listcomp>   s    EEE1EEEr(   r   r   r   rP   rQ   rR   rW   rS   rX   r   r2   r3   rY   sort_valuesrZ   mapintr/   r"   r,   r5   r^   ra   Nrb   r6   r   rc   result_lists              r&   %test_filters_cutoff_exclusive_integerr~      s7   EI"??L	\"N 	
A	1H\666  $
& 
& 
&B
 $E9nbIIIe  
  G LLNNE"""{g{.."{{--  FEc#y'<'CDDEEEK1a&      r(   z5Loss of type information in creation of categoricals.)raisesreasonc           	      N   t                      }| }t          j        ddd          t          j        ddd          t          j        ddd          t          j        ddd          t          j        ddd          g}d|gg}d	}t          j        t          j        |          t          j        |d
          dddg          }t          ||||           t          j
        ||ddg          }|                                }|                                                    d                              d          }	t          j        t          j        t          j        ddd          gd
          t          j        |d
                    }
|	d         j        |
k    sJ d S )Ni  ri   	   
            datesrC   
datetime64r@   )rl   r   rl   r0   )r   rm   z
2018-04-12)r   rn   z
2018-04-10rK   ro   TrM   
categories)r   datetimedaterP   rQ   rR   rW   rS   rX   r   r2   r3   rY   rx   rZ   Categoricalr/   )r"   r,   r5   	date_keysra   r|   rb   r6   r   rc   expecteds              r&   &test_filters_cutoff_exclusive_datetimer      s    EI 	dAq!!dAr""dAr""dAr""dAr""I 
)N 	
A	1)<888  !
# 
# 
#B
 $E9nbIIIe((
  G LLNNE"""{g{.."{{--  ~
(-a,,-\BBB8I\:::< < <H W$000000r(   c           
      f   | dz  }t          j        t          j        ddd          t          d          d                              |d           t          j        |d	d
t          j        ddd          fg          }|                    d          	                                g dk    sJ d S )Nztimestamps.parquetz
2020-01-01r   D)periodsfreq)r   idT)use_deprecated_int96_timestampsr   <=i  r   rC   rL   r   rh   )
rP   rQ   
date_rangerange
to_parquetr   r   r   column	to_pylist)r"   r$   r   s      r&   test_filters_inclusive_datetimer      s     ))DL|RcBBBBii    z$z===M$	$)$1556)   E <<''))___<<<<<<r(   c                    t                      }| }g d}d|gg}d}t          j        t          j        |          t          j        |d          dddg          }t          ||||           t          j        ||d	d
g          }|	                                }|
                                                    d                              d          }	d t          t          |	d         j                  D             }
|
ddgk    sJ d S )Nrh   rj   rC   r?   r@   rk   rl   r0   )rj   r   r   )rj   z>=r   rK   ro   TrM   c                 ,    g | ]}t          |          S rr   )rz   rs   s     r&   rv   z2test_filters_inclusive_integer.<locals>.<listcomp>*  s    JJJa3q66JJJr(   r   r   rw   r{   s              r&   test_filters_inclusive_integerr     s7   EI"??L	\"N 	
A	1H\666  $
& 
& 
&B
 $E9nbIIIe!!
  G LLNNE""++))+4+((  KJ3sIj,A,H#I#IJJJK1a&      r(   c                 >   t                      }| }ddg}g d}ddg}d|gd|gd|gg}t          j        t          j        |d	
                              d          t          j        t          j        t          j        |t          
          d          d          t          j        t          j        t          j        |d
          d          d          t          j        d          d          }t          ||||           t          j        ||dg          }|                                }	|	                                                    d          }
d|
d         j        v sJ d|
d         j        v sJ d|
d         j        vsJ t          j        ||dddgfddddhfg          }|                                }	|	                                                    d          }
d|
d         j        vsJ d|
d         j        vsJ d|
d         j        vsJ d S )Nr   r   r9   TFr<   r=   r>   r?   r@   rB   rC   r   rD   r   rE   rF   )r=   inabrK   rM   r   r:   r;   r   )r=   r   r   r:   znot inrO   )r   rP   rQ   rR   rS   rT   rU   rV   rW   rX   r   r2   r3   rY   rZ   r/   )r"   r,   r5   r^   r_   r`   ra   rb   r6   r   rc   s              r&   test_filters_inclusive_setr   .  sV   EIq6L!//K%=L	L!	;	L!N 
8L555<<R@@'"'"(;f"E"E"EqII1MM72728L#G#G#GKKQOO)B--	  
 
B $E9nbIIIe'(  G LLNNE""..D.99I)H%,,,,,)H%,,,,,i)00000eTA3')EXy13  G
 LLNNE""..D.99IIi(/////i)00000	),3333333r(   c                 n   t                      }| }g d}d|gg}d}t          j        t          j        |          t          j        |d          dddg          }t          ||||           t          j        t                    5  t          j        ||d	g
           d d d            n# 1 swxY w Y   t          j        t                    5  t          j        ||dg
           d d d            n# 1 swxY w Y   t          j        ||ddt                      fg
          }|                                j        dk    sJ t          j        ||dddhfg
          }t          j        t                     5  |                                j        dk    sJ 	 d d d            d S # 1 swxY w Y   d S )Nrh   rj   rC   r?   r@   rk   rl   r0   )rj   r   r   rK   )rj   z=<r   r   r   rH   r   )r   rP   rQ   rR   rW   rS   rX   pytestr   	TypeErrorr   r2   
ValueErrorsetr3   r]   NotImplementedError)r"   r,   r5   r^   ra   r|   rb   r6   s           r&   test_filters_invalid_pred_opr   ]  s   EI"??L	\"N 	
A	1H\666  $
& 
& 
&B
 $E9nbIII	y	!	! = =
)%*#8";	= 	= 	= 	== = = = = = = = = = = = = = =
 
z	"	" = =
)%*#8";	= 	= 	= 	== = = = = = = = = = = = = = = 	+0*4dCEE)B(EG G GG <<>>"a''''	+0*4dQC)@(AC C CG 
*	+	+ , ,||~~&!+++++, , , , , , , , , , , , , , , , , ,s6   B--B14B1C66C:=C:< F**F.1F.c                    t                      }| }g d}d|gg}d}t          j        t          j        |          t          j        |d          dddg          }t          ||||           d	}t          j        t          |
          5  t          j        ||dg                                           d d d            d S # 1 swxY w Y   d S )Nrh   rj   rC   r?   r@   rk   rl   r0   z1No match for FieldRef.Name\(non_existent_column\)match)non_existent_columnrm   r   rK   )r   rP   rQ   rR   rW   rS   rX   r   r   r   r   r2   r3   )r"   r,   r5   r^   ra   r|   rb   msgs           r&   test_filters_invalid_columnr     sK    EI"??L!<01N	A	1H\666  $
& 
& 
&B
 $E9nbIII
>C	z	-	-	- N N
)#B"E	G 	G 	GGKtvvvN N N N N N N N N N N N N N N N N Ns   +CCCrL   )rj   rm   r   rj   r   nestedr   r:   read_method)r   read_pandasc           
         t          t          |          }t                      }| }g d}d|gg}t          |          }t	          j        t          j        |          t          j        |d          t          j        d t          |          D                       d          }	t          ||||	           t          ||          }
 ||fi |
}|j        dk    sJ d S )	Nrh   rj   r?   r@   c                 2    g | ]}|t          |          d S )r   )r   rt   is     r&   rv   z+test_filters_read_table.<locals>.<listcomp>  s&    DDDa!#a&&11DDDr(   )rl   rj   r   rK   r   )getattrr   r   lenrP   rQ   rR   rW   rS   r   rX   dictr]   )r"   rL   r   r3   r,   r5   r^   ra   r|   rb   kwargsr   s               r&   test_filters_read_tabler     s     2{##DEI"??L	\"N 	LA	1H\666(DD588DDDEE  
 
B $E9nbIIIUG444FD%%f%%E>Qr(   c                    t                      }| }ddg}d|gg}d}t          j        t          j        |          t          j        |d          dddg	          }t          ||||           t          j        |          }|	                                }|
                    d                                          |k    sJ d S )
N2019_22019_3	year_weekr   rV   r@   )rl   r   rl   r0   )r   rP   rQ   rR   rW   rS   rX   r   r2   r3   r   r   )	r"   r,   r5   r_   ra   r|   rb   r6   r%   s	            r&   $test_partition_keys_with_underscoresr     s     EIX&K	k"N 	
A	1Xk:::  %
' 
' 
'B
 $E9nbIII	**G\\^^F==%%//11[@@@@@@r(   c                     | \  }}|dz   }t          j        dg di          }t          |||           t          ||          }|                    |          sJ d S Nz/test.parquetr   r   r   r   r   r   r   r    )s3_example_s3fsfsr$   r   r%   s        r&   test_read_s3fsr     sr    HB/!DHc999%&&E,,,,"---F==r(   c                     | \  }}|dz   }t          j        dg di          }t          |||           t          ||          }|                    |          sJ d S r   r   )r   r   r#   r$   r   r%   s         r&   test_read_directory_s3fsr     sr    #MB	&DHc999%&&E,,,,r222F==r(   c                     t          | dz            }t          j        dg di          }t          ||           t	          j        |g                                          }|                    |          sJ d S )Nr   r   r   )r   r   r   r   r   r2   r3   r    )r"   	data_pathr   r%   s       r&   test_read_single_file_listr     sx    Gn,--IHc999%&&E	"""	{++0022F==r(   c                 0    | \  }}t          ||           d S r*   )r+   r   r   r$   s      r&   $test_read_partitioned_directory_s3fsr     s#     HB"2t,,,,,r(   c                    ddg}g d}d|gd|gg}d}t          j        t          j        |          t          j        |d                              d	          t          j        t          j        t          j        |t                    d
          d          t          j        	                    |          dg d          }t          | |||           t          j        ||           }|                                }|                                                    d                              d          }	|                    d                              d                              |	j                  }
|
d                             d          |
d<   |
d                             d          |
d<   |	j        g dk                                    sJ t+          j        |	|
           d S )Nr   r   r9   foobarrE   r?   r@   rB   rC   r   )rl   r   r   r/   r0   r   rl   ro   TrM   category)rl   r/   r   r   )rP   rQ   rR   rW   rS   rT   rU   rV   randomrandnrX   r   r2   r3   rY   rx   rZ   reindexr1   astypealltmassert_frame_equal)r   r5   foo_keysbar_keysra   r|   rb   r6   r   rc   expected_dfs              r&   r+   r+      s   1vHH		N 	A	1x---44R88wrwrx???CCQGG)//!$$	 
 100
2 
2 
2B $B	>2FFF	b999GLLNNE""++))+4+((  >>W>--KTK**GI$5G66  %U+22:>>K$U+22:>>K!B!B!BBGGIIIII)[11111r(   c           	           t           t                    st          t                                t	                    t           dt           dd                     fd |dg            d S )Npathsepsep/c                    |         \  }}|D ]}|||fgz   }                     t          |           | d| g          }                    |           |dz
  k    r&ddlm}                      |t                      g          }	t          |          }
t          j        	                    |
          }
                    |	          5 }t          ||           d d d            n# 1 swxY w Y                       |	          j        |j        k    sJ                     |	          j        |j        k    sJ                      |dg          }
                    |          5 }	 d d d            n# 1 swxY w Y   y ||dz   |                                |dg          }
                    |          5 }	 d d d            n# 1 swxY w Y   d S )NrG   r   r   )FileType_SUCCESS)joinr   
create_dir
pyarrow.fsr   r
   _filter_partitionr   Tablefrom_pandasopen_output_streamr   get_file_infotypeNotFoundFile)base_dirlevel	part_keysnamer/   valuethis_part_keys	level_dirr   	file_pathfiltered_df
part_tableffile_successDEPTH_visit_levelrb   r   ra   r   s                 r&   r   z5_generate_partition_directories.<locals>._visit_level2  s   %e,f 	 	E&4-8NH!!%!!&  I MM)$$$	!!//////#LL)TVV)<==	/NCCX11+>>
**955 0 Q///0 0 0 0 0 0 0 0 0 0 0 0 0 0 0''	2278;LLLLL''	2278=HHHH&||Y
,CDD**<88 A               Y	>BBB&||Y
,CDD**<88 A              7	 	s6   C,,C0	3C0	-E;;E?	E?	GG	G	r   )
isinstancer   r   r   r   r   )r   r   ra   rb   r   r   r   s   ` ``@@@r&   rX   rX   '  s     b*%% --++,,Eb)WR%<%<==G         @ L1b!!!!!r(   c                 \   t          j        t          |           t                    }g }|D ]c\  }}|                    |           t          |t          j        t          j        f          rt          j	        |          }|| |         |k    z  }d| |         
                    |d          S )Nr@   r   )axis)rR   onesr   rD   appendr  r   r   rP   	TimestamprN   )rb   r   	predicateto_dropr   r   s         r&   r   r   U  s    Bt,,,IG  ' 'et ehmX->?@@ 	(L''ERX&&		i=gA...r(   c                 L   | dz  }|                                  t          j                            t	          j        dg di                    }t          j        ||dz             | dz  }|                                  t          j                            t	          j        dg di                    }t          j        ||dz             t          j        | dgg          }|	                    d          
                    t          j        g dg                    sJ d S )	NzA=0Br   r   zA=1r9   )ArI   r   r   )r   r   r   r   rP   rQ   r   r   r   r   r    chunked_array)r"   dir1table1dir2table2r   s         r&   "test_filter_before_validate_schemar  e  s    U?DJJLLLX!!",YYY/?"@"@AAFN64.0111U?DJJLLLX!!",___/E"F"FGGFN64.0111 M'^,<+=>>>E<<##B$4iii[$A$ABBBBBBBr(   c                 T   d}d}| t                      z  }|                                 g }g }t          |          D ]}t          ||          }|d                             t
          j                  |d<   || dz  }t          j        	                    |          }	t          |	|           |                    |	           |                    |           |dz                                   dd}
 |
|          t          j        |          }                    |          sJ d	d
dj        dz
  g}fd|D             }t!          j        ||          }t          j                            fd|D             |j        j                  }|                    |          sJ t!          j        |d           t          ||          j        d d d df         }| t                       dz  }t          j        	                    |          }t          ||           d S )Nr   rC   seeduint32.parquetz_SUCCESS.crcTc                 T    t          j        | fi |}|                    ||          S )N)r1   use_threads)r   r2   r3   )pathsr1   r  r   r6   s        r&   read_multiple_filesz5test_read_multiple_files.<locals>.read_multiple_files  s0    #E44V44||G|EEEr(   r   r      r   c                 D    g | ]}                     |          j        S rr   )fieldr   rt   r   r%   s     r&   rv   z,test_read_multiple_files.<locals>.<listcomp>  s&    777!a%777r(   r0   c                 :    g | ]}                     |          S rr   )r   r  s     r&   rv   z,test_read_multiple_files.<locals>.<listcomp>  s%    $G$G$G!V]]1%5%5$G$G$Gr(   )namesmetadata)r  ri   )NT)r
   r   r   r   r   rR   int64r   r   r   r   r  touchconcat_tablesr    num_columnsr   r   from_arraysschemar!  iloc)r"   nfilessizedirpath	test_datar  r   rb   r$   r   r  r   to_read	col_namesout	bad_applebad_apple_pathtr%   s                     @r&   test_read_multiple_filesr3  {  sa   FDGMMOOOIE6]]  T*** (|**28448A'$$R((UD!!!T ~$$&&&F F F F ! ''F	**H==""""" !Q*Q./G7777w777I
-
3
3
3Cx##$G$G$G$Gw$G$G$G*3-3]-C $ E EH ::h M't,,,,  1---2111bqb59I$&&2222N
Y''AN#####r(   c                    d}d}| t                      z  }|                                 g }g }g }t          |          D ]}t          ||          }t	          j        ||z  |dz   |z            |_        d|j        _        || dz  }	t          j	        
                    |          }
t          |
|	           |                    |
           |                    |           |                    |	           t          j        |          }ddg|                                                              }t#          j        fd	|D                       }t'          j        ||           |                    t+                                                              }|j        |j        k    sJ t'          j        |                    |j                  |           d S )
NrC   r  r   rl   r  uint8stringsr0   c                      g | ]
}|         S rr   rr   )rt   ru   r1   s     r&   rv   z,test_dataset_read_pandas.<locals>.<listcomp>  s    555!G*555r(   )r
   r   r   r   rR   rW   rl   r   r   r   r   r   r  r   r2   r   rY   rP   concatr   r   r   r\   r   r1   )r"   r)  r*  r+  r,  framesr  r   rb   r$   r   r6   r%   r   r1   s                 @r&   test_dataset_read_pandasr:    s   FDGMMOOOIFE6]]  T***9QXA~66A'$$R((UD!!!bT((G	"G   11;;==Fy5555f55566H&(+++   W 66@@BBF<8>))))&..1A.BBHMMMMMr(   c                    | t                      z  }|                                 t          dd          }|dz  }t          ||d           t	          j        |d          }|                                                    |          sJ d S )	Nr   r   r  	0.parquet2.6versionT)
memory_map)r
   r   r   r   r   r2   r3   r    )r"   r+  r   r$   r6   s        r&   test_dataset_memory_maprA    s     GMMOOO###E[ De,,,,D" " "G<<>>  '''''''r(   c                    | t                      z  }|                                 t          dd          }|dz  }t          ||d           t	          j        t                    5  t          j        |d           d d d            n# 1 swxY w Y   d	D ]A}t          j        ||          }|	                                
                    |          sJ Bd S )
Nr   r   r  r<  r=  r>  i)buffer_size)   i   )r
   r   r   r   r   r   r   r   r2   r3   r    )r"   r+  r   r$   rC  r6   s         r&   #test_dataset_enable_buffered_streamrE    s.   GMMOOO###E[ De,,,,	z	"	" & &
	& 	& 	& 	&& & & & & & & & & & & & & & & # , ,#. . .||~~$$U++++++, ,s   'B

BBc                    | t                      z  }|                                 t          dd          }|dz  }t          ||d           dD ]n}t	          j        ||          }|                                                    |          sJ t	          j        ||          }|                    |          sJ od S )	Nr   r   r  r<  r=  r>  )TF)
pre_buffer)	r
   r   r   r   r   r2   r3   r    r   )r"   r+  r   r$   rG  r6   actuals          r&   test_dataset_enable_pre_bufferrI    s    GMMOOO###E[ De,,,,# $ $
#
, , ,||~~$$U+++++w:>>>}}U######$ $r(   r   rC   c                     g }g }t          |          D ]S}t          ||          }| | dz  }|                    t          ||                     |                    |           T|S )Nr  r  )r   r   r  r   )r5   r)  
file_nrowsr,  r  r   r   r$   s           r&   _make_example_multifile_datasetrL    s{    IE6]]  JQ///a>>>)eT22333TLr(   c                 l    d |D             }t          |          t          | j                  k    sJ d S )Nc                 P    g | ]#}t          |                                          $S rr   )r   as_posix)rt   r$   s     r&   rv   z)_assert_dataset_paths.<locals>.<listcomp>)  s(    444dS!!444r(   )r   files)r6   r  s     r&   _assert_dataset_pathsrQ  (  s<    44e444Eu::W]++++++++r(   
dir_prefix_.c                     | t                      z  }|                                 t          |dd          }|| dz                                   t          j        |          }t          ||           d S )Nr   rC   r)  rK  stagingr
   r   rL  r   r2   rQ  r"   rR  r+  r  r6   s        r&   test_ignore_private_directoriesrZ  -  s     GMMOOO+GB78: : :E *%%%%,,...((G'5)))))r(   c                    | t                      z  }|                                 t          |dd          }|dz                      d          5 }|                    d           d d d            n# 1 swxY w Y   |dz                      d          5 }|                    d           d d d            n# 1 swxY w Y   t          j        |          }t          ||           d S )Nr   rC   rV  z	.DS_Storewbs	   gibberishz.privater
   r   rL  openwriter   r2   rQ  r"   r+  r  r   r6   s        r&   test_ignore_hidden_files_dotra  >  s^   GMMOOO+GB78: : :E K
	%	%d	+	+ q	               J
	$	$T	*	* a	               ((G'5)))))$   A22A69A6B77B;>B;c                    | t                      z  }|                                 t          |dd          }|dz                      d          5 }|                    d           d d d            n# 1 swxY w Y   |dz                      d          5 }|                    d           d d d            n# 1 swxY w Y   t          j        |          }t          ||           d S )Nr   rC   rV  _committed_123r\  s   abcd_started_321r]  r`  s        r&   #test_ignore_hidden_files_underscorerf  Q  s_   GMMOOO+GB78: : :E $
$	*	*4	0	0 A	               N
"	(	(	.	. !	               ((G'5)))))rb  c                    | | dz  t                      z  }|                    d           t          |dd          }t          j        |          }t          ||           t          j        |          }t          ||           d S )NdataTparentsr   rC   rV  rX  rY  s        r&   /test_ignore_no_private_directories_in_base_pathrk  d  s    
 :++++dff4GMM$M+GB78: : :E &&G'5))) ((G'5)))))r(   c           	         dgdz  dgdz  z   }t          j        t          j        t          t	          |                              t          j        |                                          gddg          }t          j        |t          |           dg           | dz  }|	                                 t          j        |t          |          dg           t          j
        | d	g
          }|                    |          sJ d S )Nxxxr   yyyrl   _partr   partition_cols_private_duplicate_private)ignore_prefixes)r   r   rS   r   r   dictionary_encoder   write_to_datasetr   r   r   r    )r"   partr   private_duplicater3   s        r&   test_ignore_custom_prefixesrz  w  s   7Q;%1$DH
s4yy!!""
((** w! ! !E
 s7||WIFFFF"66s#455(/y2 2 2 2 =*/ / /D ;;ur(   c                     | dz  }|                                  t          j        |          }|                                }|j        dk    sJ |j        dk    sJ d S )Nr6   r   )r   r   r2   r3   r]   r%  )r"   	empty_dirr6   r%   s       r&   test_empty_directoryr}    sf    )#IOO	**G\\^^F?a""""""r(   c                 H   dd l }dd lm} dd lm}  |j        t          d          t          d          t          t          d                    t          j	        gdz  t          j
        ddd                              d	          d
          }|j                                        }ddg}	t          j                            ||dd          }
 |j        |
| |	|           t$          j                            t+          |           d          }|E|                    |d          5 } |j        |
j        |           d d d            n# 1 swxY w Y   n |j        |
j        |            |j        | |          }t5          |j        j                  }|t5          |
j        j                  k    sJ |                                }|                                }|j                                        }|	|dt=          |	          z  d          k    sJ ||         }|	D ] }||                             d          ||<   !|rJ|                    d          j         !                                }|d                             |          |d<    |j"        ||           d S )Nr   
aaabbbbccc
eefeffgeeer   
2017-01-01
2017-01-11datetime64[D]r@   datetime64[ns])group1group2numnanr   r  r  F)r'  safepreserve_indexr   _common_metadatar\  r   r   )#pandaspandas.testingtestingpyarrow.parquetparquetrQ   listr   rR   r  rW   r   r1   tolistr   r   r   rw  osr$   r   r   r^  write_metadatar'  r2   r   r   r3   rY   r   r  r   to_pandas_dtyper   )r5   r   r'  
index_namerP   r   r   	output_dfcolspartition_byoutput_tablemetadata_pathr   r6   dataset_colsinput_tableinput_dfinput_df_colscolexpected_date_types                       r&   &_test_write_to_dataset_with_partitionsr    s           |$$|$$E"IIx"}	,OLLLSS    I ##%%Dh'L8''	&u7< ( > >LBi#-/ / / / GLLY1CDDM__]D11 	6QBl11555	6 	6 	6 	6 	6 	6 	6 	6 	6 	6 	6 	6 	6 	6 	6 	,-}===b	+57 7 7G w~+,,L3|28999999,,..K$$&&H $++--M=c,.?.?)?)@)@AAAAA~H ; ;"3..z::	# I#\\&116FFHH%f-445GHH	&B)X.....s   *EEEc           
         dd l }dd lm}  |j        t	          d          t	          d          t	          t          d                    t          j        ddd                              d	          d
          }|j	        
                                }t          j                            |          }|t                      }n1t          |t                     st#          t%          |                    }d}t          |          D ]} |j        || |           t)          t+          |           dd          }	|                    |	          }
d |
D             }t/          |          |k    sJ  |j        | |                                          }|                                }|                                }||         }t9          j        ||           d S )Nr   r  r  r   r  r  r  r@   r  )r  r  r  r   rC   r   FT)allow_not_found	recursivec                 F    g | ]}|j                             d           |S )r  )r$   endswith)rt   infos     r&   rv   z8_test_write_to_dataset_no_partitions.<locals>.<listcomp>  s-    MMMTdi.@.@.L.LMDMMMr(   )r  r  r  rQ   r  r   rR   rW   r   r1   r  r   r   r   r   r  r   r   r   rw  r   r   r   r   r2   r3   rY   drop_duplicatesr   r   )r5   r   rP   r   r  r  r  nr   selectorinfosoutput_filesr  r  s                 r&   $_test_write_to_dataset_no_partitionsr    s
          |$$|$$E"II	,OLLLSS 	   I ##%%D8''	22L$&&


J// =!-
";";<<
 	
A1XX 3 3L)'1	3 	3 	3 	3 	3 C	NNE&*, , ,H $$X..EMMUMMML|!!!! $"#j  
dff  $$&&H''))H~H)X.....r(   c                 >    t          t          |                      d S r*   r  r   r"   s    r&   %test_write_to_dataset_with_partitionsr    s    *3w<<88888r(   c                    t          j        t          j        dt          j                              t          j        dt          j                              t          j        dt          j                              t          j        dt          j                              t          j        dt          j        d                    g          }t          t          |           |	           d S )
Nr  )r   r  r  r  r   us)unitr'  )	r   r'  r  r=   r"  int32	timestampr  r   )r"   r'  s     r&   0test_write_to_dataset_with_partitions_and_schemar    s    Y	<<<	<<<RXZZ888RXZZ888bl.E.E.EFFF	H I IF
 +GV% % % % % %r(   c                 B    t          t          |           d           d S )Nr  )r  r  r  s    r&   4test_write_to_dataset_with_partitions_and_index_namer    s.    *G/ / / / / /r(   c                 >    t          t          |                      d S r*   )r  r   r  s    r&   #test_write_to_dataset_no_partitionsr    s    (W66666r(   c                 N    t          | dz             t          | dz             d S )Ntest1test2)r  r  r  s    r&   test_write_to_dataset_pathlibr    s.    *7W+<===(7):;;;;;r(   c                 .   |\  }}t          j        t          d          5  t          | dz  |           d d d            n# 1 swxY w Y   t          j        t          d          5  t	          | dz  |           d d d            d S # 1 swxY w Y   d S )Nz"path-like objects are only allowedr   r  r   r  )r   r   r   r  r  )r"   r   r   rS  s       r&   &test_write_to_dataset_pathlib_nonlocalr  #  s6    EB	y(L	M	M	M . ..g"	. 	. 	. 	.. . . . . . . . . . . . . . . 
y(L	M	M	M . .,g"	. 	. 	. 	.. . . . . . . . . . . . . . . . . .s#   AA	A(B

BBwin32z,test fails because of unsupported characters)r   c                 2    | \  }}t          ||           d S Nr   )r  r   s      r&   *test_write_to_dataset_with_partitions_s3fsr  2  s3     HB*     r(   c                 2    | \  }}t          ||           d S r  )r  r   s      r&   (test_write_to_dataset_no_partitions_s3fsr  >  s3     HB(     r(   c                 .   t          j        dg di          }t          j                            |          }t          |           }t          j        ||t                                 t          j	        |          }|
                    |          sJ d S )Nr  r   r   )rP   rQ   r   r   r   r   r   rw  r   r   r    )r"   rb   r   r$   r%   s        r&    test_write_to_dataset_filesystemr  G  s    	sIII&	'	'BH  $$Ew<<Dt0A0ABBBB]4  F==r(   d   c                    | dz  }t                      }t          j        t          j        |          t          j                            |          dddg          }t          j        	                    |          }d}t          j        ||j                  5 }t          |          D ]}|                    |           	 d d d            n# 1 swxY w Y   t          j        |          }	|	j        j        |k    sJ | dz  }
|                    t'          |
                    5 }t          j        |j        |           d d d            n# 1 swxY w Y   t          j        | |          }|S )	Nr   )rl   r/   rl   r/   r0   r   	_metadatar   )r   rP   rQ   rR   rW   r   r   r   r   r   r   ParquetWriterr'  r   r   ParquetFiler!  num_row_groupsr   r   r  r2   )r"   r|   r$   r,   rb   r   
num_groupswriterr   readerr  r   r6   s                r&   _make_dataset_for_picklingr  R  s   ^#DE	1)//!$$  "
$ 
$ 
$B H  $$EJ		$	-	- &z"" 	& 	&Au%%%%	&& & & & & & & & & & & & & & & ^D!!F?)Z7777k)M		!	!#m"4"4	5	5 +
%,***+ + + + + + + + + + + + + + + E# # #G Ns$   (CCC&EEEc                 J    fd}t          |           } ||          sJ d S )Nc                 \    |                                          |                     k    S r*   )loadsdumps)objpickle_modules    r&   is_pickleablez*test_pickle_dataset.<locals>.is_pickleablep  s*    m))-*=*=c*B*BCCCCr(   )r  )r"   r  r  r6   s    `  r&   test_pickle_datasetr  n  sL    D D D D D )11G=!!!!!!!r(   c                 R   | dz  }t          j        g dg dg dd          }t          j                            |          }t          j        |t          |          ddg           t          j        |          	                                }t          j
        ||d	z             d S )
Nz
ARROW-3208)r  r   g      @r    r   g333333=@)r  r   r   r  r  r   r   )r   r   r   r   r   r   r   )onetwothreer  r  )	root_pathrr  zoutput.parquet)rP   rQ   r   r   r   r   rw  r   r2   r3   r   )r"   r$   rb   r   s       r&   test_partitioned_datasetr  w  s     \!D	000,,,&&&  
 
B
 H  $$ET(-u~7 7 7 7d##((**EN5$!1122222r(   c                    | dz  }t          j        d t          d          D             dz  gdg          }t          j        d t          d          D             dz  gdg          }t          j        |t          |                     t          j        |t          |                     t          j        |dg	                                          }|d
                             d
          	                                |d
                             d
          	                                g}|d
         j
        dk    sJ |d
                             d
          |d
                             d          }}|                    |d
                   r|                    |d                   sJ d S |                    |d                   sJ |                    |d
                   sJ d S )NzARROW-3325-datasetc                 6    g | ]}t          j        d           S r   r	   randsr   s     r&   rv   z0test_dataset_read_dictionary.<locals>.<listcomp>       555qDJrNN555r(   rC   r   f0rp  c                 6    g | ]}t          j        d           S r  r  r   s     r&   rv   z0test_dataset_read_dictionary.<locals>.<listcomp>  r  r(   )r  )read_dictionaryr   r   r   )r   r   r   r   rw  r   r2   r3   chunkrv  
num_chunksr    )r"   r$   t1t2r%   	ex_chunksc0c1s           r&   test_dataset_read_dictionaryr    s   ))D	55E!HH555:;D6	J	J	JB	55E!HH555:;D6	J	J	JBc$ii0000c$ii0000tf& & &&*dff  AQ1133AQ11335I !91$$$$AY__Q!3!3B	yy1 'yy1&&&&&&&yy1&&&&&yy1&&&&&&&r(   c                    t          j        dt          j        g dt          j                              i          }t	          j        || dz             t	          j        || dz             t          j        dg          }t	          j        | dz  |          }t          j        dg di|          }|                    |          sJ t	          j        | |          }t          j        dg di|          }|                    |          sJ t	          j	        | |          }t          j        dg di|          }|
                                                    |          sJ d S )Nr   r   zdata1.parquetzdata2.parquet)r   r"  r  )r   r   r   r   r   r   )r   r   rS   r  r   r   r'  r   r    r2   r3   )r"   r   r'  r%   r   s        r&   test_read_table_schemar    sg   Hc28IIIrxzz::;<<EN5'O3444N5'O3444Y'((F ]7_4VDDDFxiii(888H==""""" ]76222Fx0001&AAAH=="""""wv666Fx0001&AAAH;;==)))))))r(   c                    t          j        t          j        g dt          j                              t          j        g dt          j                              d          }t          j        || dz             t          j        | dz  ddg          }t          j        ddg          }|j	        ddgk    sJ |j        |k    sJ d S )Nr   r   r   r   r0   )r   r  )
r   r   rS   r  r5  r   r   r   r'  r4   )r"   r   r%   expected_schemas       r&   *test_read_table_duplicate_column_selectionr    s    H28IIIrxzz::8IIIrxzz::< < = =EN5'N2333]7^3c3ZHHHFi @AAO3*,,,,=O++++++r(   c                    dd l m} | dz  }|dz  dz  dz                      d           t          j        dg d	i          }t          j        |t          |dz  dz  dz  d
z                       |                    g d          }t          j	        t          |          |          }|j
        g dk    sJ t          j        t          |          |                                          }|j
        g dk    sJ d S )Nr   test_partitioning20121001Tri  r   r   r   )yearmonthday)field_names)partitioning)r   r  r  r  )pyarrow.datasetr6   r   r   r   r   r   r   r  r   r4   r2   r3   )r"   dsr  r   rx  r%   s         r&   test_dataset_partitioningr    s>          --I$%,,T,:::Hc999%&&ENs9v%,t3nDEEG G G ??'?'?'??@@D]IT+ + +F"?"?"?????IT+ + ++/466 "?"?"???????r(   c                 :   t          j        dg di          }t          j        || dz             t	          t          |           t                                }t          j        d|          }|                                }|	                    |          sJ d S )Nr   r   r   rT  r   )
r   r   r   r   r   r   r   r2   r3   r    )r"   r   r   r6   r%   s        r&   #test_parquet_dataset_new_filesystemr
    s    Hc999%&&EN5'N2333"3w<<1B1BCCJ
;;;G\\^^F==r(   c                 d   t          j        d          }|                    d          }t          j        dg di          }t          j        || dz             t          |                               dd          }t          j	        ||          }|d	z   }|j
        d
         j        |k    sJ d S )Nfsspecfiler   r   r   \r   r   z/data.parquetr   )r   importorskipr   r   r   r   r   r   replacer2   	fragmentsr$   )r"   r  r   r   r$   r6   r   s          r&   6test_parquet_dataset_partitions_piece_path_with_fsspecr    s      **F""6**JHc999%&&EN5'N2333 w<<c**D% % %G o%HQ$000000r(   c                    t          j        dg di          }| dz  }g fd}d}t          j        ||dg||           |dz  dz  |d	z  dz  |d
z  dz  h}t	          t          t          j                            }||k    sJ d S )Nr   r   r  c                 <                         | j                   d S r*   )r  r$   )written_filepaths_writtens    r&   file_visitorzDtest_parquet_write_to_dataset_exposed_keywords.<locals>.file_visitor  s     \./////r(   zpart-{i}.parquet)r  r  basename_template1zpart-0.parquet23)r   r   r   rw  r   ry   pathlibPath)r"   r   r$   r  r  expected_pathspaths_written_setr  s          @r&   .test_parquet_write_to_dataset_exposed_keywordsr     s    Hc999%&&E^#DM0 0 0 0 0 +t3%%1*;= = = =
 	s
%%s
%%s
%%N
 Cm<<==......r(   write_dataset_kwarg))r   T)r   Fc                    ddl m} t          j        dg di          }| dz  }t	          j        |j                  }|\  }}|t	          j        t          j                  j	        vsJ ||j	        v sJ t          j                            |dd          5 }t          j        ||fi ||i |j        d         \  }	}
}||         |k    sJ 	 ddd           dS # 1 swxY w Y   dS )	zEVerify kwargs in pq.write_to_dataset are passed onto ds.write_datasetr   Nr   r   zout.parquetwrite_datasetT)autospec)r  r6   r   r   inspect	signaturer#  r   rw  
parametersmockpatchrV   
mock_calls)r"   r!  r  r   r$   r&  keyargmock_write_dataset_name_argsr   s               r&   #test_write_to_dataset_kwargs_passedr0    sV    !     Hc999%&&E]"D!""233I"HC g'(;<<GGGGG)&&&&&			2		>	> "!
E466C:6661<Q?ufc{c!!!!!	" " " " " " " " " " " " " " " " " "s   5CC Cc                 B   t          j        t          j        g dg d          g dd          }t          j        |          }| dz  }t          j        || dz  dg           d	 |                                D             }t          |          d
k    sJ d|vsJ d S )N)r   r:   r   r9   r   r   )catr  r6   r2  rq  c                 D    g | ]}|                                 |j        S rr   )is_dirr   )rt   r   s     r&   rv   z;test_write_to_dataset_category_observed.<locals>.<listcomp>5  s'    <<<!<qv<<<r(   r   zcat=c)	rP   rQ   r   r   r   r   rw  iterdirr   )r"   rb   r   r$   subdirss        r&   'test_write_to_dataset_category_observedr7  '  s    
 
~ooo///JJJyy  
 
B HRLLEYDw"E7    =<t||~~<<<Gw<<1'!!!!!!r(   )r   rC   )NNNr*   )r  )nr   r%  r  r  sysnumpyrR   ImportErrorr   unittest.mockr(  pyarrowr   pyarrow.computecomputepcr   r   r   r   r   r   r   pyarrow.testsr	   pyarrow.utilr
   r  r  r   pyarrow.tests.parquet.commonr   r   r   r   r  rP   r  r  r   markr6   
pytestmarkr'   r-   r7   rf   r~   xfailr   AssertionErrorr   r   r   r   r   r   parametrizer  castr"  r   r   s3r   r   r   r   r+   rX   r   r  r3  r:  rA  rE  rI  rL  rQ  rZ  ra  rf  rk  rz  r}  r  r  r  r  r  r  r  r  skipifplatformr  r  r  r  r  r  r  r  r  r  r
  r  r   r0  r7  rr   r(   r&   <module>rL     s
  $   				  



   	BBB                 H H H H H H H H H H H H H H H H                  A A A A A A A A A A A A A   	BBB   NB k!6;#67
     & 3 3 3
 	- 	- 	- C, C, C,L ! ! !B  ~&B	    '1 '1  '1T = = =  ! ! !B +4 +4 +4\ %, %, %,P N N N. /0012""(:..2""(8S11A5""(8S1166xrxzzBBQF	HI I (EFF  GFI I 4 A A A.                   - -  -
$2 $2 $2N+" +" +"\/ / /  C C C* 5$ 5$ 5$N "N "N "NJ ( ( ( , , ,$ $ $ $ 	 	 	 	, , ,
 Sz22* * 32 * * * *$ * * *$ Sz22* * 32 *"  *# # # 7;266::/ :/ :/ :/| 59+/ +/ +/ +/\ 9 9 9 % % % / / /
 7 7 7 < < <
 
. 
.  
. CLG+I  K K K K  
             8 " " " 3 3 3 ' ' '.* * *.
, 
, 
,@ @ @0     1 1 1"/ / /0 . 1  " "	 "* " " " " "s0    %%A, ,A65A6:
B 	BB