
    Pi&                         d dl mZ d dlZd dlmZ d dlmZmZmZm	Z	 d dlm
Z d dlmZ d dlmZ  G d d          Zd	 Zh d
Z	 	 ddZ	 	 ddZddZdS )    )SequenceN)_pandas_api)CodecTableconcat_tablesschema)_feather)FeatherErrorc                   0    e Zd ZdZddZd	dZd Zd
dZdS )FeatherDataseta  
    Encapsulates details of reading a list of Feather files.

    Parameters
    ----------
    path_or_paths : List[str]
        A list of file names
    validate_schema : bool, default True
        Check that individual file schemas are all the same / compatible
    Tc                 "    || _         || _        d S N)pathsvalidate_schema)selfpath_or_pathsr   s      c/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/pyarrow/feather.py__init__zFeatherDataset.__init__*   s    "
.    Nc                 >   t          | j        d         |          }|g| _        |j        | _        | j        dd         D ]J}t          ||          }| j        r|                     ||           | j                            |           Kt          | j                  S )a,  
        Read multiple feather files as a single pyarrow.Table

        Parameters
        ----------
        columns : List[str]
            Names of columns to read from the file

        Returns
        -------
        pyarrow.Table
            Content of the file as a table (of columns)
        r   columns   N)
read_tabler   _tablesr   r   validate_schemasappendr   )r   r   _filpathtables        r   r   zFeatherDataset.read_table.   s     $*Q-999vkJqrrN 	' 	'DtW555E# 3%%dE222L&&&&T\***r   c                     | j                             |j                   s"t          d| d| j          d|j                    d S )Nz
Schema in z was different. 
z

vs

)r   equals
ValueError)r   piecer    s      r   r   zFeatherDataset.validate_schemasG   st    {!!%,// 	G F% F F $F F7<|F F G G G	G 	Gr   c                 V    |                      |                              |          S )a  
        Read multiple Parquet files as a single pandas DataFrame

        Parameters
        ----------
        columns : List[str]
            Names of columns to read from the file
        use_threads : bool, default True
            Use multiple threads when converting to pandas

        Returns
        -------
        pandas.DataFrame
            Content of the file as a pandas DataFrame (of columns)
        r   )use_threadsr   	to_pandas)r   r   r&   s      r   read_pandaszFeatherDataset.read_pandasL   s2      w//99# : % % 	%r   )Tr   )NT)__name__
__module____qualname____doc__r   r   r   r)    r   r   r   r      sl        	 	/ / / /+ + + +2G G G
% % % % % %r   r   c                     |j         dk    rd S |j        t          j                    t          j                    fv rt          d|  d          t          d|  d|j         d          )Nr   zColumn 'zg' exceeds 2GB maximum capacity of a Feather binary column. This restriction may be lifted in the futurez
' of type zU was chunked on conversion to Arrow and cannot be currently written to Feather format)
num_chunkstypeextbinarystringr#   )namecols     r   check_chunked_overflowr7   `   s    
~
xCJLL#*,,/// 0D 0 0 0 1 1 	1
 @t @ @sx @ @ @
 
 	
r   >   lz4zstduncompressed   c                    t           j        r?t           j        r3t          | t           j        j                  r|                                 } t          j        |           ru|dk    rd}n|dk    rd}nt          d          t          j
        | |          }|dk    r7t          |j        j                  D ]\  }}	||         }
t          |	|
           n| }|dk    rit          |j                  t          t#          |j                            k    rt          d          |t          d          |t          d	          n>|t%          j        d
          rd}n%|#|t(          vrt          d| dt(                     	 t+          j        ||||||           dS # t.          $ rB t          |t0                    r+	 t3          j        |           n# t2          j        $ r Y nw xY w w xY w)a  
    Write a pandas.DataFrame to Feather format.

    Parameters
    ----------
    df : pandas.DataFrame or pyarrow.Table
        Data to write out as Feather format.
    dest : str
        Local destination path.
    compression : string, default None
        Can be one of {"zstd", "lz4", "uncompressed"}. The default of None uses
        LZ4 for V2 files if it is available, otherwise uncompressed.
    compression_level : int, default None
        Use a compression level particular to the chosen compressor. If None
        use the default compression level
    chunksize : int, default None
        For V2 files, the internal maximum size of Arrow RecordBatch chunks
        when writing the Arrow IPC file format. None means use the default,
        which is currently 64K
    version : int, default 2
        Feather file version. Version 2 is the current. Version 1 is the more
        limited legacy format
    r   Fr;   Nz%Version value should either be 1 or 2)preserve_indexz'cannot serialize duplicate column namesz2Feather V1 files do not support compression optionz0Feather V1 files do not support chunksize option	lz4_framer8   zcompression="z " not supported, must be one of )compressioncompression_level	chunksizeversion)r   have_pandas
has_sparse
isinstancepdSparseDataFrameto_denseis_data_framer#   r   from_pandas	enumerater   namesr7   lencolumn_namessetr   is_available_FEATHER_SUPPORTED_CODECSr	   write_feather	Exceptionstrosremoveerror)dfdestr?   r@   rA   rB   r=   r    ir5   r6   s              r   rR   rR   s   sr   2  " 	2{~=>>	B $$  a<<"NN\\!NNDEEE!"^DDDa<<$U\%788 2 24Ah&tS1111!||u!""SU-?)@)@%A%AAAFGGG" & ' ' '   & ' ' ' ! 5#5k#B#BKK%!::: C[ C C'@C C D D D
ud1B)2G	E 	E 	E 	E 	E 	E    dC   		$8   s0   F! !G-GG-G(%G-'G((G-TFc                 B     t          | |||          j        dd|i|S )a  
    Read a pandas.DataFrame from Feather format. To read as pyarrow.Table use
    feather.read_table.

    Parameters
    ----------
    source : str file path, or file-like object
        You can use MemoryMappedFile as source, for explicitly use memory map.
    columns : sequence, optional
        Only read a specific set of columns. If not provided, all columns are
        read.
    use_threads : bool, default True
        Whether to parallelize reading using multiple threads. If false the
        restriction is used in the conversion to Pandas as well as in the
        reading from Feather format.
    memory_map : boolean, default False
        Use memory mapping when opening file on disk, when source is a str.
    **kwargs
        Additional keyword arguments passed on to `pyarrow.Table.to_pandas`.

    Returns
    -------
    df : pandas.DataFrame
        The contents of the Feather file as a pandas.DataFrame
    )r   
memory_mapr&   r&   r.   r'   )sourcer   r&   r\   kwargss        r   read_featherr_      sO    6+JJ! ! !!*N N7BNFLN N Or   c                    t          j        | ||          }||                                S t          |t                    s4t          d                    t          |          j                            d |D             }t          t          d |                    r|                    |          }nUt          t          d |                    r|                    |          }n!d |D             }t          d| d	|           |j        d
k     r|S t          t          |                    |k    r|S |                    |          S )a  
    Read a pyarrow.Table from Feather format

    Parameters
    ----------
    source : str file path, or file-like object
        You can use MemoryMappedFile as source, for explicitly use memory map.
    columns : sequence, optional
        Only read a specific set of columns. If not provided, all columns are
        read.
    memory_map : boolean, default False
        Use memory mapping when opening file on disk, when source is a str
    use_threads : bool, default True
        Whether to parallelize reading using multiple threads.

    Returns
    -------
    table : pyarrow.Table
        The contents of the Feather file as a pyarrow.Table
    )use_memory_mapr&   Nz&Columns must be a sequence but, got {}c                 ,    g | ]}t          |          S r.   )r1   ).0columns     r   
<listcomp>zread_table.<locals>.<listcomp>  s    777VDLL777r   c                     | t           k    S r   )intts    r   <lambda>zread_table.<locals>.<lambda>  s
    c r   c                     | t           k    S r   )rT   rh   s    r   rj   zread_table.<locals>.<lambda>  s
    18 r   c                     g | ]	}|j         
S r.   )r*   )rc   ri   s     r   re   zread_table.<locals>.<listcomp>  s    >>>AQZ>>>r   z.Columns must be indices or names. Got columns z
 of types    )r	   FeatherReaderreadrE   r   	TypeErrorformatr1   r*   allmapread_indices
read_namesrB   sortedrO   select)r]   r   r\   r&   readercolumn_typesr    column_type_namess           r   r   r      s   * #z{D D DF {{}}gx(( 9@W 6779 9 	9 87w777L
3!!<0011 O##G,,	S##\22	3	3 O!!'**>>>>> N'.N N:KN N O O 	O ~	G			(	( ||G$$$r   )NNNr;   )NTF)NFT)collections.abcr   rU   pyarrow.pandas_compatr   pyarrow.libr   r   r   r   libr2   pyarrowr	   pyarrow._featherr
   r   r7   rQ   rR   r_   r   r.   r   r   <module>r      sN  & % $ $ $ $ $ 				 - - - - - -0 0 0 0 0 0 0 0 0 0 0 0             ) ) ) ) ) )?% ?% ?% ?% ?% ?% ?% ?%D
 
 
  <;;  AE*+P P P Pf 48!O O O O@1% 1% 1% 1% 1% 1%r   