
    PiF                    N   d dl mZ d dlmZmZ d dlZd dlmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZ d d	lmZ d dlZd d
lmZmZ d dlmZ d dlmZmZ d dlmZmZmZm Z  d dl!m"Z"m#Z#m$Z$ erd dlm%Z% e j&        e j'        e j(        e j)        e j*        e j+        e j+        dZ,e j(        ej-        dfe j+        ej.        efe j&        ej/        dfe j'        ej/        dfe j)        ej/        dfe j0        ej.        dfe j*        ej1        d fiZ2ej/        dej-        dej.        diZ3 G d de          Z4dS )    )annotations)TYPE_CHECKINGAnyN)using_python_scalars)infer_dtype)iNaT)NoBufferPresent)cache_readonly)BaseMaskedDtype)
ArrowDtypeDatetimeTZDtype)is_string_dtype)PandasBufferPandasBufferPyarrow)ColumnColumnBuffersColumnNullType	DtypeKind)ArrowCTypes
Endiannessdtype_to_arrow_c_fmt)Buffer)iufbUMmzThis column is non-nullablezThis column uses NaN as nullz!This column uses a sentinel valuec                      e Zd ZdZd"d#d	Zd$dZed$d            Zed%d            Z	d%dZ
ed             Zed             Zed$d            Zed&d            Zd$dZd'd(dZd)dZd*dZd+dZd,d!ZdS )-PandasColumna  
    A column object, with only the methods and properties required by the
    interchange protocol defined.
    A column can contain one or more chunks. Each chunk can contain up to three
    buffers - a data buffer, a mask buffer (depending on null representation),
    and an offsets buffer (if variable-size binary; e.g., variable-length
    strings).
    Note: this Column object can only be produced by ``__dataframe__``, so
          doesn't need its own version or ``__column__`` protocol.
    Tcolumn	pd.Series
allow_copyboolreturnNonec                    t          |t          j                  rt          d|j         d          t          |t          j                  s t          dt          |           d          || _        || _	        dS )zu
        Note: doesn't deal with extension arrays yet, just assume a regular
        Series/ndarray for now.
        zExpected a Series, got a DataFrame. This likely happened because you called __dataframe__ on a DataFrame which, after converting column names to string, resulted in duplicated names: zD. Please rename these columns before using the interchange protocol.zColumns of type  not handled yetN)

isinstancepd	DataFrame	TypeErrorcolumnsSeriesNotImplementedErrortype_col_allow_copy)selfr#   r%   s      r/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/pandas/core/interchange/column.py__init__zPandasColumn.__init__V   s    
 fbl++ 	2 !.2 2 2   &"),, 	Y%&Wf&W&W&WXXX 	%    intc                    | j         j        S )z2
        Size of the column, in elements.
        )r3   sizer5   s    r6   r;   zPandasColumn.sizej   s     y~r8   c                    dS )z7
        Offset of first element. Always zero.
        r    r<   s    r6   offsetzPandasColumn.offsetp   s	     qr8   tuple[DtypeKind, int, str, str]c                   | j         j        }t          |t          j                  rJ| j         j        j        }|                     |j                  \  }}}}t          j	        ||t          j        fS t          |          rLt          | j                   dv r't          j        dt          |          t          j        fS t!          d          |                     |          S )N)stringempty   z.Non-string object dtypes are not supported yet)r3   dtyper+   r,   CategoricalDtypevaluescodes_dtype_from_pandasdtyper   CATEGORICALr   NATIVEr   r   STRINGr   r1   )r5   rE   rH   _bitwidthc_arrow_dtype_f_strs         r6   rE   zPandasColumn.dtypex   s    	eR011 	7I$*E ,,U[99# %#!	  U## 
	749%%)<<<$(//%	  &&VWWW//666r8   c                   t                               |j        d          }|t          d| d          t	          |t
                    r|j        j        }nKt	          |t                    r|j	        j        }n)t	          |t                    r|j        j        }n|j        }|dk    r||j        t          j        |fS ||j        dz  t          |          |fS )z/
        See `self.dtype` for details.
        N
Data type z& not supported by interchange protocolzbool[pyarrow]rD   )	_NP_KINDSgetkind
ValueErrorr+   r   numpy_dtype	byteorderr   baser   itemsizer   BOOLr   )r5   rE   rT   rW   s       r6   rI   z$PandasColumn._dtype_from_pandasdtype   s     }}UZ..<W%WWWXXXeZ(( 	()3II// 	(
,II// 	()3IIIO##  	  U^a')=e)D)DiOOr8   c                    | j         d         t          j        k    st          d          | j        j        j        dt          t          j	        | j        j        j
                            dS )a:  
        If the dtype is categorical, there are two options:
        - There are only values in the data buffer.
        - There is a separate non-categorical Column encoding for categorical values.

        Raises TypeError if the dtype is not categorical

        Content of returned dict:
            - "is_ordered" : bool, whether the ordering of dictionary indices is
                             semantically meaningful.
            - "is_dictionary" : bool, whether a dictionary-style mapping of
                                categorical values to other objects exists
            - "categories" : Column representing the (implicit) mapping of indices to
                             category values (e.g. an array of cat1, cat2, ...).
                             None if not a dictionary-style categorical.
        r   zCdescribe_categorical only works on a column with categorical dtype!T)
is_orderedis_dictionary
categories)rE   r   rJ   r.   r3   catorderedr"   r,   r0   r^   r<   s    r6   describe_categoricalz!PandasColumn.describe_categorical   sg    $ z!}	 555U  
 )-/!&ry1I'J'JKK
 
 	
r8   c                   t          | j        j        t                    rt          j        }d}||fS t          | j        j        t                    rP| j        j        j        j	        d         
                                d         t          j        d fS t          j        dfS | j        d         }	 t          |         \  }}n&# t          $ r}t          d| d          |d }~ww xY w||fS )N   r   rQ   z not yet supported)r+   r3   rE   r   r   USE_BYTEMASKr   array	_pa_arraychunksbuffersNON_NULLABLEUSE_BITMASK_NULL_DESCRIPTIONKeyErrorr1   )r5   column_null_dtype
null_valuerT   nullvalueerrs          r6   describe_nullzPandasColumn.describe_null   s    dio77 	1 . ;J$j00dioz22 	1 y(/2::<<Q?G%2D88!-q00z!}	V+D1KD%% 	V 	V 	V%&K4&K&K&KLLRUU	V U{s   /C   
C#
CC#c                    | j                                                                         }t                      s|                                }|S )zB
        Number of null elements. Should always be known.
        )r3   isnasumr   item)r5   results     r6   
null_countzPandasColumn.null_count   sA    
 !!%%''#%% 	#[[]]Fr8   dict[str, pd.Index]c                    d| j         j        iS )z8
        Store specific metadata of the column.
        zpandas.index)r3   indexr<   s    r6   metadatazPandasColumn.metadata   s    
 	00r8   c                    dS )zE
        Return the number of chunks the column consists of.
        rc   r>   r<   s    r6   
num_chunkszPandasColumn.num_chunks   s	     qr8   Nn_chunks
int | Nonec              #     K   |rr|dk    rlt          | j                  }||z  }||z  dk    r|dz  }t          d||z  |          D ].}t          | j        j        |||z            | j                  V  /dS | V  dS )zy
        Return an iterator yielding the chunks.
        See `DataFrame.get_chunks` for details on ``n_chunks``.
        rc   r   N)lenr3   ranger"   ilocr4   )r5   r   r;   stepstarts        r6   
get_chunkszPandasColumn.get_chunks   s      
  
	1ty>>D8#Dh!##	q$/488  "IN554<#78$:J      
 JJJJJr8   r   c                    |                                  ddd}	 |                                 |d<   n# t          $ r Y nw xY w	 |                                 |d<   n# t          $ r Y nw xY w|S )a`  
        Return a dictionary containing the underlying buffers.
        The returned dictionary has the following contents:
            - "data": a two-element tuple whose first element is a buffer
                      containing the data and whose second element is the data
                      buffer's associated dtype.
            - "validity": a two-element tuple whose first element is a buffer
                          containing mask values indicating missing data and
                          whose second element is the mask value buffer's
                          associated dtype. None if the null representation is
                          not a bit or byte mask.
            - "offsets": a two-element tuple whose first element is a buffer
                         containing the offset values for variable-size binary
                         data (e.g., variable-length strings) and whose second
                         element is the offsets buffer's associated dtype. None
                         if the data buffer does not have an associated offsets
                         buffer.
        N)datavalidityoffsetsr   r   )_get_data_buffer_get_validity_bufferr	   _get_offsets_buffer)r5   rh   s     r6   get_bufferszPandasColumn.get_buffers  s    ( ))++"
 "
	"&";";"="=GJ 	 	 	D		!%!9!9!;!;GI 	 	 	D	 s   2 
??A 
A('A(.tuple[Buffer, tuple[DtypeKind, int, str, str]]c                   | j         d         t          j        k    rt          | j         d                   dk    r2| j        j                            d                                          }n| j                                        }t          || j	                  }t          j
        dt          j        t          j        f}nF| j         d         t          j
        t          j        t          j        t          j        fv r| j         }| j        j        }t'          | j        j         t(                    rL|j        j        d         }t/          |                                d         t          |                    }||fS t'          | j        j         t2                    r|j        }n|j        }t          || j	                  }nG| j         d         t          j        k    rB| j        j        j        }t          || j	                  }|                     |j                   }n| j         d         t          j         k    r| j                                        }tC                      }|D ]@}t'          |tD                    r)|#                    |$                    d	
                     At          tK          j&        |d                    }t          j        dt          j'        t          j        f}ntQ          d| j        j          d          ||fS )zZ
        Return the buffer containing the data and the buffer's associated dtype.
        r         N)r%   @   rc   lengthutf-8encodinguint8)rE   rD   rQ   r*   ))rE   r   DATETIMEr   r3   dt
tz_convertto_numpyr   r4   INTr   INT64r   rK   UINTFLOATrZ   re   r+   r   rf   rg   r   rh   r   _data_ndarrayrJ   rG   _codesrI   rL   	bytearraystrextendencodenp
frombufferUINT8r1   )	r5   np_arrbufferrE   arrrH   bufr   objs	            r6   r   zPandasColumn._get_data_buffer5  s    :a=I... 4:a=!!A%%0066??AA++--!&T5EFFFF!!	EE Z]MNON	
 
 
 JE)/C$)/:66 % m*1-,KKMM!$s88   u}$$)/?;; &!&T5EFFFFFZ]i333I$+E!%D4DEEEF00==EEZ]i...)$$&&CA  ; ;c3'' ;HHSZZZ99::: ""-"A"A"ABBF !!	EE &&T49?&T&T&TUUUu}r8   tuple[Buffer, Any] | Nonec                z   | j         \  }}t          | j        j        t                    r| j        j        j        j        d         }t          j	        dt          j	        t          j        f}|                                d         dS t          |                                d         t          |                    }||fS t          | j        j        t                     rH| j        j        j        }t%          |          }t          j	        dt          j	        t          j        f}||fS | j        d         t          j        k    r| j                                        }|dk    }| }t+          j        t          |          ft*          j                  }t1          |          D ]!\  }	}
t          |
t2                    r|n|||	<   "t%          |          }t          j	        dt          j	        t          j        f}||fS 	 t4          |          d}n"# t6          $ r}t9          d          |d}~ww xY wt;          |          )	z
        Return the buffer containing the mask values indicating missing data and
        the buffer's associated dtype.
        Raises NoBufferPresent if null representation is not a bit or byte mask.
        r   rc   Nr   rD   shaperE   z! so does not have a separate maskzSee self.describe_null)rr   r+   r3   rE   r   re   rf   rg   r   rZ   r   r   rK   rh   r   r   r   _maskr   rL   r   r   zerosbool_	enumerater   _NO_VALIDITY_BUFFERrl   r1   r	   )r5   ro   invalidr   rE   r   maskr   validr   r   msgrq   s                r6   r   z!PandasColumn._get_validity_buffer  s    *gdioz22 	! )/+215C^Q(8*:KLE{{}}Q't(a 3xx  F 5= dio77 	!9?(D!$''F^Q(8*:KLE5= :a=I,,, )$$&&C qLEiG83s88+RX>>>D#C.. E E3#-c3#7#7D%%WQ "$''F ^Q(8*:KLE5= 	I(.QQQCC 	I 	I 	I%&>??SH	I c"""s   ;H 
H+H&&H+tuple[PandasBuffer, Any]c                   | j         d         t          j        k    r| j                                        }d}t          j        t          |          dz   ft
          j                  }t          |          D ]J\  }}t          |t                    r(|                    d          }|t          |          z  }|||dz   <   Kt          |          }t          j        dt          j        t"          j        f}nt'          d          ||fS )a  
        Return the buffer containing the offset values for variable-size binary
        data (e.g., variable-length strings) and the buffer's associated dtype.
        Raises NoBufferPresent if the data buffer does not have an associated
        offsets buffer.
        r   rc   r   r   r   r   zJThis column has a fixed-length dtype so it does not have an offsets buffer)rE   r   rL   r3   r   r   r   r   int64r   r+   r   r   r   r   r   r   r   rK   r	   )	r5   rG   ptrr   r   vr   r   rE   s	            r6   r   z PandasColumn._get_offsets_buffer  s    :a=I,,,Y''))FChc&kkAo%7rxHHHG!&)) % %1 a%% "'22A3q66MC!$A "'**F !!	EE "5  
 u}r8   )T)r#   r$   r%   r&   r'   r(   )r'   r9   )r'   r@   )r'   ry   )N)r   r   )r'   r   )r'   r   )r'   r   )r'   r   )__name__
__module____qualname____doc__r7   r;   propertyr?   r
   rE   rI   ra   rr   rx   r|   r~   r   r   r   r   r   r>   r8   r6   r"   r"   J   s       	 	& & & & &(       X 7 7 7 ^7:P P P PB 
 
 X
8   X&    ^ 1 1 1 X1       "# # # #JI I I IV7# 7# 7# 7#r& & & & & &r8   r"   )5
__future__r   typingr   r   numpyr   pandas._configr   pandas._libs.libr   pandas._libs.tslibsr   pandas.errorsr	   pandas.util._decoratorsr
   pandas.core.dtypes.dtypesr   pandasr,   r   r   pandas.api.typesr   pandas.core.interchange.bufferr   r   *pandas.core.interchange.dataframe_protocolr   r   r   r   pandas.core.interchange.utilsr   r   r   r   r   r   r   rZ   rL   r   rR   USE_NANUSE_SENTINELri   rJ   rd   rk   r   r"   r>   r8   r6   <module>r      s   " " " " " "       
     / / / / / / ( ( ( ( ( ( $ $ $ $ $ $ ) ) ) ) ) ) 2 2 2 2 2 2 5 5 5 5 5 5            - , , , , ,                             BAAAAAA 
									 	 On,d34d;MN/6N^0$7N^0$7 N7<~2A6  !>:!D U U U U U6 U U U U Ur8   