
    Pih              
          d dl Z d dlZd dlmZmZmZ d dlmZ d dlm	Z	m
Z
mZmZmZmZ d dlZd dlZd dlZddlmZ ddlmZmZmZmZ ddlmZ dd	lmZ  ed
          Z  ed          Z! ed          Z" ed          Z#de$de%fdZ&de	fdZ'dedee(e)e$e*ef         dedej        fdZ+dedee(e)e$e*ef         dej        fdZ,dej-        de%fdZ. G d dee!e"e#f                   Z/de0e*e1e          f         de0e*e f         fdZ2 G d de/ej        ej-        ej        f                   Z3 G d de/e0e1e0f                   Z4 G d  d!e/e0ej5        e0f                   Z6 G d" d#e/ej7        ej8        ej7        f                   Z9 G d$ d%          Z: G d& d'          Z; G d( d)e          Z< G d* d+e<          Z= G d, d-e<          Z> G d. d/ee!e"e#f                   Z? G d0 d1e?e!e"e#f                   Z@ G d2 d3e?e!e"e#f                   ZA G d4 d5eAej        ej-        ej        f                   ZB G d6 d7e?ee1ef                   ZC G d8 d9eAej7        ej8        ej7        f                   ZD G d: d;e?e0e"e0f                   ZEde*d<e1e*         ddfd=ZFdee(e)e$ef         d>e(ddfd?ZGdee(e)e$e*ef         de*fd@ZH	 dFdedee(e)e$e*ef         dee         dej        fdAZI	 	 dGdedee(e)e$e*ef         dCe?dDee1         fdEZJdS )H    N)IterableMappingMutableMapping)partial)AnyCallableGenericOptionalTypeVarUnion   )Features)_ArrayXDExtensionType_is_zero_copy_onlydecode_nested_examplepandas_types_mapper)Table)no_op_if_value_is_nullT	RowFormatColumnFormatBatchFormatkeyreturnc                 8    | j         dk    o| j        | j        k    S )N   )stepstopstartr   s    r/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/datasets/formatting/formatting.py_is_range_contiguousr"   (   s    8q=2SX22    c                 H    t          d|  dt          |            d          )NzWrong key type: 'z' of type 'z6'. Expected one of int, slice, range, str or Iterable.)	TypeErrortyper    s    r!   _raise_bad_key_typer'   ,   s0    
mCmmDIImmm  r#   tableindicesc           	         t          |t                    rY                    |j        z  d                              d          d                                         }t          | |          S t          |t                    r!t          |	                    j                   }t          |t                    rnt          |          r^|j        dk    rSt          | d                     |j        |j        |j        z
                                d          D                       S 	 t          |t                    rK|                     |g          } t          |                     d                                                    S t          |t                     rt          | fd|D                       S t#          |           dS )aE  
    Query a pyarrow Table to extract the subtable that correspond to the given key.
    The :obj:`indices` parameter corresponds to the indices mapping in case we cant to take into
    account a shuffling or an indices selection for example.
    The indices table must contain one column named "indices" of type uint64.
    r   r   c                 6    g | ]}|                                 S  )as_py).0is     r!   
<listcomp>z5_query_table_with_indices_mapping.<locals>.<listcomp>C   s     iiia		iiir#   c                     g | ]C}                     |d                               d          d                                         DS )r   r   )
fast_slicecolumnr-   )r.   r/   r)   s     r!   r0   z5_query_table_with_indices_mapping.<locals>.<listcomp>K   sI    #_#_#_VWG$6$6q!$<$<$C$CA$F$Fq$I$O$O$Q$Q#_#_#_r#   N)
isinstanceintr2   num_rowsr3   r-   _query_tablesliceranger)   r"   r   r   strselect	to_pylistr   r'   )r(   r   r)   s     `r!   !_query_table_with_indices_mappingr=   2   s    #s (  w'7!7;;BB1EEaHNNPPE3'''#u 4S[[!1223#u $$ 	aii7+=+=ciTWT]I]+^+^+e+efg+h+hiii   #s BcU##E7>>!#4#4#>#>#@#@AAA#x   aE#_#_#_#_[^#_#_#_```r#   c                 &   t          t                    r|                     | j        z  d          S t          t                    r!t                              | j                   t          t
                    rCt                    r3j        dk    r(|                     j        j	        j        z
            S 	 t          t                    r+| j                            fd| j        D                       S t          t                    rjt          j        t          j                  t%                    dk    r| j                            dd          S |                     | j        z            S t)                     dS )zY
    Query a pyarrow Table to extract the subtable that correspond to the given key.
    r   r   c                      g | ]
}|k    |S r,   r,   )r.   r3   r   s     r!   r0   z _query_table.<locals>.<listcomp>^   s     Z Z ZFFVYMMMMMr#   N)r4   r5   r2   r6   r8   r9   r)   r"   r   r   r:   r(   dropcolumn_namesr   npfromiterint64lenfast_gatherr'   )r(   r   s    `r!   r7   r7   P   sj    #s 9en 4a888#u 2S[[001#u $$ 	a##CIsx#)/CDDD#s \{ Z Z Z Ze6H Z Z Z[[[#x   7k#rx((s88q==;$$Q***  u~!5666r#   pa_arrayc                     | j         dk    S Nr   )
null_count)rG   s    r!   _is_array_with_nullsrK   i   s    ""r#   c                   `    e Zd ZdZdej        defdZdej        defdZ	dej        de
fdZdS )BaseArrowExtractorz
    Arrow extractor are used to extract data from pyarrow tables.
    It makes it possible to extract rows, columns and batches.
    These three extractions types have to be implemented.
    pa_tabler   c                     t           NNotImplementedErrorselfrN   s     r!   extract_rowzBaseArrowExtractor.extract_rowt       !!r#   c                     t           rP   rQ   rS   s     r!   extract_columnz!BaseArrowExtractor.extract_columnw   rV   r#   c                     t           rP   rQ   rS   s     r!   extract_batchz BaseArrowExtractor.extract_batchz   rV   r#   N)__name__
__module____qualname____doc__par   r   rU   r   rX   r   rZ   r,   r#   r!   rM   rM   m   s         "BH " " " " ""rx "L " " " ""bh "; " " " " " "r#   rM   py_dictc                 >    d |                                  D             S )z:Return the first element of a batch (dict) as a row (dict)c                 &    i | ]\  }}||d          S )r   r,   )r.   r   arrays      r!   
<dictcomp>z_unnest.<locals>.<dictcomp>   s"    <<<jc5Cq<<<r#   )items)r`   s    r!   _unnestrf   ~   s    <<GMMOO<<<<r#   c                   z    e Zd Zdej        dej        fdZdej        dej        fdZdej        dej        fdZdS )SimpleArrowExtractorrN   r   c                     |S rP   r,   rS   s     r!   rU   z SimpleArrowExtractor.extract_row       r#   c                 ,    |                     d          S rI   )r3   rS   s     r!   rX   z#SimpleArrowExtractor.extract_column   s    q!!!r#   c                     |S rP   r,   rS   s     r!   rZ   z"SimpleArrowExtractor.extract_batch   rj   r#   N)	r[   r\   r]   r_   r   rU   ArrayrX   rZ   r,   r#   r!   rh   rh      s        BH     "rx "BH " " " "bh 28      r#   rh   c                   \    e Zd Zdej        defdZdej        defdZdej        defdZ	dS )PythonArrowExtractorrN   r   c                 D    t          |                                          S rP   )rf   	to_pydictrS   s     r!   rU   z PythonArrowExtractor.extract_row   s    x))++,,,r#   c                 P    |                     d                                          S rI   )r3   r<   rS   s     r!   rX   z#PythonArrowExtractor.extract_column   s     q!!++---r#   c                 *    |                                 S rP   )rq   rS   s     r!   rZ   z"PythonArrowExtractor.extract_batch   s    !!###r#   N)
r[   r\   r]   r_   r   dictrU   listrX   rZ   r,   r#   r!   ro   ro      s        -BH - - - - -.rx .D . . . .$bh $4 $ $ $ $ $ $r#   ro   c                       e Zd Zd Zdej        defdZdej        dej	        fdZ
dej        defdZdej        dej	        fdZd	S )
NumpyArrowExtractorc                     || _         d S rP   )np_array_kwargs)rT   ry   s     r!   __init__zNumpyArrowExtractor.__init__   s    .r#   rN   r   c                 F    t          |                     |                    S rP   )rf   rZ   rS   s     r!   rU   zNumpyArrowExtractor.extract_row   s    t))(33444r#   c                 N    |                      ||j        d                            S rI   )_arrow_array_to_numpyrA   rS   s     r!   rX   z"NumpyArrowExtractor.extract_column   s#    ))(83H3K*LMMMr#   c                 .      fdj         D             S )Nc                 H    i | ]}|                     |                   S r,   )r}   )r.   colrN   rT   s     r!   rd   z5NumpyArrowExtractor.extract_batch.<locals>.<dictcomp>   s-    ```3T//>>```r#   )rA   rS   s   ``r!   rZ   z!NumpyArrowExtractor.extract_batch   s$    `````(J_````r#   rG   c                 Z   t          |t          j                  rt          |j        t                    r/t          |j        j        d          fd|j        D             nt          |j                  ot          d |j        D                       fd|j        D             nt          |j        t                    r2t          |j        j        d          |	                              nLt          |j                  ot          |           |	                                                              t                    dk    rt          fdD                       rdt          j                            t          j                  d	k    rt          j        t&          
          S t          j        dt&                    S t          j                            t          j                  d	k    rt          j                  S t          j        d          S )NT)unnestc                 F    g | ]}|                                D ]}|S zero_copy_onlyto_numpyr.   chunkrowr   s      r!   r0   z=NumpyArrowExtractor._arrow_array_to_numpy.<locals>.<listcomp>   I       !^l@m@m 9<C   r#   c              3   6   K   | ]}t          |           V  d S rP   )rK   )r.   r   s     r!   	<genexpr>z<NumpyArrowExtractor._arrow_array_to_numpy.<locals>.<genexpr>   sG       K K8=,U333K K K K K Kr#   c                 F    g | ]}|                                D ]}|S r   r   r   s      r!   r0   z=NumpyArrowExtractor._arrow_array_to_numpy.<locals>.<listcomp>   r   r#   r   r   c              3      K   | ]m}t          |t          j                  r&|j        t          k    p>|j        d          j        k    p(t          |t                    ot          j        |          V  ndS )r   N)r4   rB   ndarraydtypeobjectshapefloatisnan)r.   xrc   s     r!   r   z<NumpyArrowExtractor._arrow_array_to_numpy.<locals>.<genexpr>   s          Arz**_60A0^QWPUVWPXP^E^ :q%((8RXa[[     r#   z2.0.0b1)r   F)copyr   r   )r4   r_   ChunkedArrayr&   r   r   storage_dtypechunksallr   rK   tolistrE   anyrB   libNumpyVersion__version__asarrayr   rc   )rT   rG   rc   r   s     @@r!   r}   z)NumpyArrowExtractor._arrow_array_to_numpy   sG   h00 	X(-)>?? !3HM4OX\!]!]!]   %-_   "4HM!B!B "s K KAIK K K H H   %-_   (-)>?? X!3HM4OX\!]!]!]&//~/NN!3HM!B!B!iK_`hKiKiGi&//~/NNUUWWu::>>          A
 6&&r~66)CC:e6::::xE@@@@6r~..);;:e$$$8E....r#   N)r[   r\   r]   rz   r_   r   rt   rU   rB   r   rX   rZ   rm   r}   r,   r#   r!   rw   rw      s        / / /5BH 5 5 5 5 5Nrx NBJ N N N Nabh a4 a a a a$/bh $/2: $/ $/ $/ $/ $/ $/r#   rw   c                   z    e Zd Zdej        dej        fdZdej        dej        fdZ	dej        dej        fdZ
dS )PandasArrowExtractorrN   r   c                 `    |                     d                              t                    S )Nr   )lengthtypes_mapper)r8   	to_pandasr   rS   s     r!   rU   z PandasArrowExtractor.extract_row   s(    ~~Q~''11?R1SSSr#   c                     |                     dg                              t                    |j        d                  S )Nr   r   )r;   r   r   rA   rS   s     r!   rX   z#PandasArrowExtractor.extract_column   s7    s##--;N-OOPXPefgPhiir#   c                 8    |                     t                    S )Nr   )r   r   rS   s     r!   rZ   z"PandasArrowExtractor.extract_batch   s    !!/B!CCCr#   N)r[   r\   r]   r_   r   pd	DataFramerU   SeriesrX   rZ   r,   r#   r!   r   r      s        TBH T T T T Tjrx jBI j j j jDbh D2< D D D D D Dr#   r   c                       e Zd Z	 ddee         deeeeeedf         f                  fdZ	dedefdZ
ded	edefd
ZdedefdZdS )PythonFeaturesDecoderNfeaturestoken_per_repo_idc                 "    || _         || _        d S rP   r   r   rT   r   r   s      r!   rz   zPythonFeaturesDecoder.__init__   s     !!2r#   r   r   c                 V    | j         r!| j                             || j                  n|S N)r   )r   decode_exampler   )rT   r   s     r!   
decode_rowz PythonFeaturesDecoder.decode_row   s.    ^b^ktt}++C4CY+ZZZqttr#   r3   column_namec                 X    | j         r"| j                             ||| j                  n|S r   )r   decode_columnr   )rT   r3   r   s      r!   r   z#PythonFeaturesDecoder.decode_column   s5     }DM''tOe'fff	
r#   batchc                 V    | j         r!| j                             || j                  n|S r   )r   decode_batchr   rT   r   s     r!   r   z"PythonFeaturesDecoder.decode_batch   s.    ^b^kvt}))%4CY)ZZZqvvr#   rP   )r[   r\   r]   r
   r   rt   r:   r   boolrz   r   ru   r   r   r,   r#   r!   r   r      s        mq3 3 *3?GSRWX[]acgXgRhMhHi?j3 3 3 3ud ut u u u u
D 
s 
t 
 
 
 
w$ w4 w w w w w wr#   r   c                       e Zd Zdee         fdZdej        dej        fdZdej	        de
dej	        fdZd	ej        dej        fd
ZdS )PandasFeaturesDecoderr   c                     || _         d S rP   r   )rT   r   s     r!   rz   zPandasFeaturesDecoder.__init__   s     r#   r   r   c                       j         r% fd j                                         D             ni }|r7|                    |          |t          |                                          <   |S )Nc           	          i | ]:\  }}j         j        |         |t          t          t          |                    ;S r,   )r   _column_requires_decodingr   r   r   )r.   r   featurerT   s      r!   rd   z4PandasFeaturesDecoder.decode_row.<locals>.<dictcomp>   sT       (K=:;G3G<QSZ4[4[\\  r#   )r   re   	transformru   keys)rT   r   decodes   `  r!   r   z PandasFeaturesDecoder.decode_row   s     }    ,0M,?,?,A,A     	  	='*}}V'<'<CV[[]]##$
r#   r3   r   c                     | j         rH|| j         v r?| j         j        |         r-t          t          t          | j         |                             nd }|r|                    |          }|S rP   )r   r   r   r   r   r   )rT   r3   r   r   s       r!   r   z#PandasFeaturesDecoder.decode_column   sx     }!,!=!=$-BijuBv!= #7+@$-P[B\#]#]^^^ 	
  	.%%f--Fr#   r   c                 ,    |                      |          S rP   )r   r   s     r!   r   z"PandasFeaturesDecoder.decode_batch	  s    u%%%r#   N)r[   r\   r]   r
   r   rz   r   r   r   r   r:   r   r   r,   r#   r!   r   r      s        !(!3 ! ! ! !bl r|    BI C BI    &", &2< & & & & & &r#   r   c                       e Zd ZdZdej        ddfdZd Zd Zd Z	ddZ
d Zd Zd Zd Zd Zd Zd Zd Zedd            Zd Zd Zd
S )LazyDictzeA dictionary backed by Arrow data. The values are formatted on-the-fly when accessing the dictionary.rN   	formatter	Formatterc                     || _         || _        t                              |j                  | _        t          | j                                                  | _        d S rP   )	rN   r   rt   fromkeysrA   datasetr   keys_to_format)rT   rN   r   s      r!   rz   zLazyDict.__init__  sH     "MM("788	!$).."2"233r#   c                 *    t          | j                  S rP   )rE   r   rT   s    r!   __len__zLazyDict.__len__  s    49~~r#   c                     | j         |         }|| j        v r9|                     |          }|| j         |<   | j                            |           |S rP   )r   r   formatremoverT   r   values      r!   __getitem__zLazyDict.__getitem__  sS    	#$%%%KK$$E"DIcN&&s+++r#   c                 `    || j         v r| j                             |           || j        |<   d S rP   r   r   r   r   s      r!   __setitem__zLazyDict.__setitem__"  s6    $%%%&&s+++	#r#   r   Nc                 \    || j         v r| j                             |           | j        |= d S rP   r   rT   r   s     r!   __delitem__zLazyDict.__delitem__'  s4    $%%%&&s+++IcNNNr#   c                 *    t          | j                  S rP   )iterr   r   s    r!   __iter__zLazyDict.__iter__,  s    DIr#   c                     || j         v S rP   )r   r   s     r!   __contains__zLazyDict.__contains__/  s    dir#   c                 R    |                                   t          | j                  S rP   )_format_allreprr   r   s    r!   __repr__zLazyDict.__repr__2  s"    DIr#   c                    t          |t                    ry|                                 }|                                }|                                 |xj        |j                                        z  c_        |j        |j        z  |_        |S t          |t                    rG|                                 }|xj        |                                z  c_        |j        |z  |_        |S t          S rP   	r4   r   r   r   r   r   r   rt   NotImplementedrT   otherinsts      r!   __or__zLazyDict.__or__6  s    eX&& 	99;;DJJLLE5:??#4#44	EJ.DIKeT"" 	99;;D5::<</	E)DIKr#   c                    t          |t                    ry|                                 }|                                }|                                 |xj        |j                                        z  c_        |j        |j        z  |_        |S t          |t                    rG|                                 }|xj        |                                z  c_        ||j        z  |_        |S t          S rP   r   r   s      r!   __ror__zLazyDict.__ror__E  s    eX&& 	99;;DJJLLE5:??#4#44
TY.DIKeT"" 	99;;D5::<</	)DIKr#   c                 ^   t          |t                    re|                                }|                                 | xj        |j                                        z  c_        | xj        |j        z  c_        n2| xj        |                                z  c_        | xj        |z  c_        | S rP   )r4   r   r   r   r   r   r   )rT   r   s     r!   __ior__zLazyDict.__ior__T  s    eX&& 	JJLLE5:??#4#44II#III5::<</IIIIr#   c                    | j                             | j                   }|j                            | j                   | j        d                                         |j        d<   | j        d                                         |j        d<   |S )Nr   r   )	__class____new____dict__updater   )rT   r   s     r!   __copy__zLazyDict.__copy___  sv    ~%%dn55T]+++ $f 5 : : < <f*.-8H*I*N*N*P*P&'r#   c                 4    dd l }|                     |           S rI   r   )rT   r   s     r!   r   zLazyDict.copyh  s    yyr#   c                     t           rP   rQ   )clsiterabler   s      r!   r   zLazyDict.fromkeysm  s    !!r#   c                     t           rP   rQ   r   s     r!   r   zLazyDict.formatq  rV   r#   c                     | j         D ]}|                     |          | j        |<    | j                                          d S rP   )r   r   r   clearr   s     r!   r   zLazyDict._format_allt  sH    & 	. 	.C![[--DIcNN!!#####r#   )r   NrP   )r[   r\   r]   r^   r_   r   rz   r   r   r   r   r   r   r   r   r   r   r  r   classmethodr   r   r   r,   r#   r!   r   r     s5       oo4 4k 4 4 4 4      
   
             	 	 	    
 " " " ["" " "$ $ $ $ $r#   r   c                       e Zd Zd ZdS )LazyRowc                 t    | j                             | j                            |g                    d         S rI   r   format_columnrN   r;   r   s     r!   r   zLazyRow.format{  s/    ~++DM,@,@#,G,GHHKKr#   Nr[   r\   r]   r   r,   r#   r!   r  r  z  s(        L L L L Lr#   r  c                       e Zd Zd ZdS )	LazyBatchc                 h    | j                             | j                            |g                    S rP   r  r   s     r!   r   zLazyBatch.format  s*    ~++DM,@,@#,G,GHHHr#   Nr  r,   r#   r!   r  r    s(        I I I I Ir#   r  c                       e Zd ZdZeZeZeZ	e
Z	 	 ddee         deeeeeedf         f                  fdZdej        dedeeeef         fd	Zdej        defd
Zdej        defdZdej        defdZdS )r   z
    A formatter is an object that extracts and formats data from pyarrow tables.
    It defines the formatting for rows, columns and batches.
    Nr   r   c                     || _         || _        t          | j         | j                  | _        t	          | j                   | _        d S rP   )r   r   r   python_features_decoderr   pandas_features_decoderr   s      r!   rz   zFormatter.__init__  sB    
 !!2'<T]DLb'c'c$'<T]'K'K$$$r#   rN   
query_typer   c                     |dk    r|                      |          S |dk    r|                     |          S |dk    r|                     |          S d S Nr   r3   r   )
format_rowr  format_batch)rT   rN   r  s      r!   __call__zFormatter.__call__  se    ??8,,,8##%%h///7""$$X... #"r#   c                     t           rP   rQ   rS   s     r!   r  zFormatter.format_row  rV   r#   c                     t           rP   rQ   rS   s     r!   r  zFormatter.format_column  rV   r#   c                     t           rP   rQ   rS   s     r!   r  zFormatter.format_batch  rV   r#   NN)r[   r\   r]   r^   rh   simple_arrow_extractorro   python_arrow_extractorrw   numpy_arrow_extractorr   pandas_arrow_extractorr
   r   rt   r:   r   r   rz   r_   r   r   r   r   r  r  r  r  r,   r#   r!   r   r     s6        
 21/1 (,IML L8$L $DeCtO.D)D$EFL L L L/ /s /uYP\^iEi?j / / / /"28 "	 " " " ""bh "< " " " ""RX "+ " " " " " "r#   r   c                       e Zd ZdefdZdS )TensorFormatterdata_structc                     t           rP   rQ   )rT   r'  s     r!   recursive_tensorizez#TensorFormatter.recursive_tensorize  rV   r#   N)r[   r\   r]   rt   r)  r,   r#   r!   r&  r&    s/        "t " " " " " "r#   r&  c                   $    e Zd ZU eed<   eed<   dS )TableFormatter
table_typecolumn_typeN)r[   r\   r]   r:   __annotations__r,   r#   r!   r+  r+    s'         OOOr#   r+  c                       e Zd ZdZdZdej        dej        fdZdej        dej        fdZ	dej        dej        fdZ
dS )	ArrowFormatterzarrow tablezarrow arrayrN   r   c                 P    |                                                      |          S rP   )r!  rU   rS   s     r!   r  zArrowFormatter.format_row  s"    **,,88BBBr#   c                 P    |                                                      |          S rP   )r!  rX   rS   s     r!   r  zArrowFormatter.format_column  s"    **,,;;HEEEr#   c                 P    |                                                      |          S rP   )r!  rZ   rS   s     r!   r  zArrowFormatter.format_batch  s"    **,,::8DDDr#   N)r[   r\   r]   r,  r-  r_   r   r  rm   r  r  r,   r#   r!   r0  r0    s        JKC28 C C C C CFbh F28 F F F FERX E"( E E E E E Er#   r0  c                   n     e Zd Zd	 fd	Zdej        defdZdej        defdZ	dej        defdZ
 xZS )
PythonFormatterNFc                 Z    t                                          ||           || _        d S rP   )superrz   lazy)rT   r   r8  r   r   s       r!   rz   zPythonFormatter.__init__  s)    #4555			r#   rN   r   c                     | j         rt          ||           S |                                                     |          }| j                            |          }|S rP   )r8  r  r"  rU   r  r   rT   rN   r   s      r!   r  zPythonFormatter.format_row  sV    9 	+8T***))++77AA*55c::
r#   c                     |                                                      |          }| j                            ||j        d                   }|S rI   )r"  rX   r  r   rA   rT   rN   r3   s      r!   r  zPythonFormatter.format_column  G    ,,..==hGG-;;FHDYZ[D\]]r#   c                     | j         rt          ||           S |                                                     |          }| j                            |          }|S rP   )r8  r  r"  rZ   r  r   rT   rN   r   s      r!   r  zPythonFormatter.format_batch  sV    9 	-Xt,,,++--;;HEE,99%@@r#   )NFN)r[   r\   r]   rz   r_   r   r   r  ru   r  r  __classcell__r   s   @r!   r5  r5    s             28     bh 4    
RX '        r#   r5  c                       e Zd ZdZdZdej        dej        fdZ	dej        dej
        fdZdej        dej        fdZdS )	PandasFormatterzpandas dataframezpandas seriesrN   r   c                     |                                                      |          }| j                            |          }|S rP   )r$  rU   r  r   r:  s      r!   r  zPandasFormatter.format_row  s<    ))++77AA*55c::
r#   c                     |                                                      |          }| j                            ||j        d                   }|S rI   )r$  rX   r  r   rA   r<  s      r!   r  zPandasFormatter.format_column  r=  r#   c                     |                                                      |          }| j                            |          }|S rP   )r$  rZ   r  r   r:  s      r!   r  zPandasFormatter.format_batch  s<    ))++99(CC*77<<
r#   N)r[   r\   r]   r,  r-  r_   r   r   r   r  r   r  r  r,   r#   r!   rC  rC    s        #J!K28     
bh 29    
RX ",      r#   rC  c                        e Zd ZdZd
deegef         f fdZdej        defdZ	dej        de
fdZdej        defd	Z xZS )CustomFormattera  
    A user-defined custom formatter function defined by a ``transform``.
    The transform must take as input a batch of data extracted for an arrow table using the python extractor,
    and return a batch.
    If the output batch is not a dict, then output_all_columns won't work.
    If the output batch has several fields, then querying a single column won't work since we don't know which field
    to return.
    Nr   c                 \    t                                          ||           || _        d S )Nr   )r7  rz   r   )rT   r   r   r   kwargsr   s        r!   rz   zCustomFormatter.__init__  s,    (>OPPP"r#   rN   r   c                     |                      |          }	 t          |          S # t          $ r}t          d|           |d }~ww xY w)Nz]Custom formatting function must return a dict of sequences to be able to pick a row, but got )r  rf   	Exceptionr%   rT   rN   formatted_batchexcs       r!   r  zCustomFormatter.format_row  sq    ++H55	?+++ 	 	 	 Bp  B  B 	s   & 
AAAc                    |                      |          }t          |d          rXt          |                                          dk    r2t	          dt          |                                           d          nt	          d|           	 ||j        d                  S # t          $ r}t	          d|           |d }~ww xY w)Nr   r   zTried to query a column but the custom formatting function returns too many columns. Only one column was expected but got columns .zPCustom formatting function must return a dict to be able to pick a row, but got r   )r  hasattrrE   r   r%   ru   rA   rL  rM  s       r!   r  zCustomFormatter.format_column  s   ++H55?F++ 		?''))**Q..dDHI]I]I_I_D`D`d d d   / tcrtt  	"8#8#;<< 	 	 	tcrtt 	s   B$ $
C.CCc                     |                                                      |          }| j                            |          }|                     |          S rP   )r"  rZ   r  r   r   r?  s      r!   r  zCustomFormatter.format_batch  sI    ++--;;HEE,99%@@~~e$$$r#   r   )r[   r\   r]   r^   r   rt   rz   r_   r   r  r   r  r  r@  rA  s   @r!   rH  rH    s         # #(D64<"8 # # # # # #28     bh <    &%RX %$ % % % % % % % %r#   rH  columnsc                 8    | |vrt          d|  d|           d S )NzColumn z5 not in the dataset. Current columns in the dataset: )KeyError)r   rT  s     r!   _check_valid_column_keyrW     s5    
'ddd[bddeee r#   sizec                    t          | t                    r,| dk     r	| |z   dk     s| |k    rt          d|  d|           d S t          | t                    rd S t          | t                    rSt          |           dk    r>t          t          |           |           t          t          |           |           d S d S t          | t                    rmt          |           dk    rXt          t          t          |                     |           t          t          t          |                     |           d S d S t          |            d S )Nr   zInvalid key: z is out of bounds for size )rX  )r4   r5   
IndexErrorr8   r9   rE   _check_valid_index_keymaxminr   r'   )r   rX  s     r!   r[  r[  %  sP   #s !!GGd
QC4KKSSSSTSSTTT	C		 !	C		 	!s88a<<"3s88$7777"3s88$777777 < 
C	"	" !s88a<<"3s3xx==t<<<<"3s3xx==t<<<<<< < 	C     r#   c                     t          | t          j                  rdS t          | t                    rdS t          | t          t
          t          f          rdS t          |            d S r  )r4   numbersIntegralr:   r8   r9   r   r'   r    s    r!   key_to_query_typera  8  se    #w'(( u	C		 x	C%1	2	2 wr#   c                    t          |t          t          t          t          t
          f          s5	 t          j        |          }n# t          $ r t          |           Y nw xY wt          |t                    rt          || j                   n ||j        n| j        }t          ||           |t          | |          }nt          | ||          }|S )a1  
    Query a Table to extract the subtable that correspond to the given key.

    Args:
        table (``datasets.table.Table``): The input Table to query from
        key (``Union[int, slice, range, str, Iterable]``): The key can be of different types:
            - an integer i: the subtable containing only the i-th row
            - a slice [i:j:k]: the subtable containing the rows that correspond to this slice
            - a range(i, j, k): the subtable containing the rows that correspond to this range
            - a string c: the subtable containing all the rows but only the column c
            - an iterable l: the subtable that is the concatenation of all the i-th rows for all i in the iterable
        indices (Optional ``datasets.table.Table``): If not None, it is used to re-map the given key to the table rows.
            The indices table must contain one column named "indices" of type uint64.
            This is used in case of shuffling or rows selection.


    Returns:
        ``pyarrow.Table``: the result of the query on the input table
    N)r)   )r4   r5   r8   r9   r:   r   operatorindexr%   r'   rW  rA   r6   r[  r7   r=   )r(   r   r)   rX  pa_subtables        r!   query_tablerf  B  s    2 cCsH=>> %	%.%%CC 	% 	% 	%$$$$$	%#s *U%78888#*#6wENsD)))"5#..7sGTTTs   A A! A!Fr   format_columnsc                 <   t          | t                    r| j        }n| }t          |          }t	          |j                  } |||          S |dk    r|v r |||          S  |||          S |                    fd|j        D                       } |||          }	|rpt          |	t                    rI|                    fd|j        D                       }
 ||
|          }|		                    |           nt          d|	           |	S )a  
    Format a Table depending on the key that was used and a Formatter object.

    Args:
        table (``datasets.table.Table``): The input Table to format
        key (``Union[int, slice, range, str, Iterable]``): Depending on the key that was used, the formatter formats
            the table as either a row, a column or a batch.
        formatter (``datasets.formatting.formatting.Formatter``): Any subclass of a Formatter such as
            PythonFormatter, NumpyFormatter, etc.
        format_columns (:obj:`List[str]`, optional): if not None, it defines the columns that will be formatted using the
            given formatter. Other columns are discarded (unless ``output_all_columns`` is True)
        output_all_columns (:obj:`bool`, defaults to False). If True, the formatted output is completed using the columns
            that are not in the ``format_columns`` list. For these columns, the PythonFormatter is used.


    Returns:
        A row, column or batch formatted object defined by the Formatter:
        - the PythonFormatter returns a dictionary for a row or a batch, and a list for a column.
        - the NumpyFormatter returns a dictionary for a row or a batch, and a np.array for a column.
        - the PandasFormatter returns a pd.DataFrame for a row or a batch, and a pd.Series for a column.
        - the TorchFormatter returns a dictionary for a row or a batch, and a torch.Tensor for a column.
        - the TFFormatter returns a dictionary for a row or a batch, and a tf.Tensor for a column.
    r   N)r  r3   c              3   $   K   | ]
}|v|V  d S rP   r,   r.   r   rg  s     r!   r   zformat_table.<locals>.<genexpr>  s/      *m*m3SV^lSlSl3SlSlSlSl*m*mr#   c              3   $   K   | ]
}|v |V  d S rP   r,   rj  s     r!   r   zformat_table.<locals>.<genexpr>  s:       @ @C><Q<QC<Q<Q<Q<Q@ @r#   z\Custom formatting function must return a dict to work with output_all_columns=True, but got )r4   r   r(   ra  r5  r   r@   rA   r   r   r%   )r(   r   r   rg  output_all_columnsrN   r  python_formatterpa_table_to_formatformatted_outputpa_table_with_remaining_columnsremaining_columns_dicts      `        r!   format_tablerr  m  s   < % ;"3''J&	0BCCCyj9999	x		.  9Xz222##HDDDD%]]*m*m*m*m(:O*m*m*mmm$9%7JOOO 
	*N;; 	2:-- @ @ @ @#+#8@ @ @ 3 3/ *:)9:Yfp)q)q)q& ''(>???? F  tD  F  F    r#   rP   )NF)Kr_  rc  collections.abcr   r   r   	functoolsr   typingr   r   r	   r
   r   r   numpyrB   pandasr   pyarrowr_   r   r   features.featuresr   r   r   r   r(   r   utils.py_utilsr   r   r   r   r   r9   r   r"   r'   r5   r8   r:   r=   r7   rm   rK   rM   rt   ru   rf   rh   ro   r   rw   r   r   r   r   r   r   r  r  r   r&  r+  r0  r5  rC  rH  rW  r[  ra  rf  rr  r,   r#   r!   <module>r{     sT     = = = = = = = = = =       D C C C C C C C C C C C C C C C                   u u u u u u u u u u u u       3 3 3 3 3 3 GCLLGK  	w~&&gm$$3e 3 3 3 3 3S    S%X=>INX   < E#ueS(*J$K PRPX    2#28 # # # # #" " " " "L+!EF " " ""=T#tAw,' =DaL = = = =
    -bh"(.JK   $ $ $ $ $-dD$.>? $ $ $1/ 1/ 1/ 1/ 1/,T2:t-CD 1/ 1/ 1/hD D D D D-blBIr|.ST D D Dw w w w w w w w*& & & & & & & &@j$ j$ j$ j$ j$~ j$ j$ j$ZL L L L Lh L L L
I I I I I I I I
$" $" $" $" $"	<<= $" $" $"N" " " " "i	< DE " " "
    Yy,CD   
E E E E E^BHbh$@A E E E    iw 67   2    nR\29bl%JK   (-% -% -% -% -%ilD 89 -% -% -%`f ftCy fT f f f f
!c5%&A B !# !RV ! ! ! !&5eUC!AB s      $( ((	sE5#x/	0( e_( X	( ( ( (^ &*9  9 9 	sE5#x/	09  9  TN	9  9  9  9  9  9 r#   