
    &`i                         d dl Z d dlmZmZmZmZ d dlZd dlZ	d dl
mZ d dlmZ  e j        e          Z ed           G d de                      ZdS )	    N)AnyDictListOptional)Preprocessor)	PublicAPIalpha)	stabilityc                        e Zd ZdZdZ	 	 	 	 ddee         dedeej	                 de
d	e
f
 fd
Zdej        ddfdZdej        fdZdee         fdZdee         fdZd Zdeeef         ddf fdZ xZS )Concatenatora  Combine numeric columns into a column of type
    :class:`~ray.air.util.tensor_extensions.pandas.TensorDtype`. Only columns
    specified in ``columns`` will be concatenated.

    This preprocessor concatenates numeric columns and stores the result in a new
    column. The new column contains
    :class:`~ray.air.util.tensor_extensions.pandas.TensorArrayElement` objects of
    shape :math:`(m,)`, where :math:`m` is the number of columns concatenated.
    The :math:`m` concatenated columns are dropped after concatenation.
    The preprocessor preserves the order of the columns provided in the ``colummns``
    argument and will use that order when calling ``transform()`` and ``transform_batch()``.

    Examples:
        >>> import numpy as np
        >>> import pandas as pd
        >>> import ray
        >>> from ray.data.preprocessors import Concatenator

        :py:class:`Concatenator` combines numeric columns into a column of
        :py:class:`~ray.air.util.tensor_extensions.pandas.TensorDtype`.

        >>> df = pd.DataFrame({"X0": [0, 3, 1], "X1": [0.5, 0.2, 0.9]})
        >>> ds = ray.data.from_pandas(df)  # doctest: +SKIP
        >>> concatenator = Concatenator(columns=["X0", "X1"])
        >>> concatenator.transform(ds).to_pandas()  # doctest: +SKIP
           concat_out
        0  [0.0, 0.5]
        1  [3.0, 0.2]
        2  [1.0, 0.9]

        By default, the created column is called `"concat_out"`, but you can specify
        a different name.

        >>> concatenator = Concatenator(columns=["X0", "X1"], output_column_name="tensor")
        >>> concatenator.transform(ds).to_pandas()  # doctest: +SKIP
               tensor
        0  [0.0, 0.5]
        1  [3.0, 0.2]
        2  [1.0, 0.9]

        >>> concatenator = Concatenator(columns=["X0", "X1"], dtype=np.float32)
        >>> concatenator.transform(ds)  # doctest: +SKIP
        Dataset(num_rows=3, schema={Y: object, concat_out: TensorDtype(shape=(2,), dtype=float32)})

        When ``flatten=True``, nested vectors in the columns will be flattened during concatenation:

        >>> df = pd.DataFrame({"X0": [[1, 2], [3, 4]], "X1": [0.5, 0.2]})
        >>> ds = ray.data.from_pandas(df)  # doctest: +SKIP
        >>> concatenator = Concatenator(columns=["X0", "X1"], flatten=True)
        >>> concatenator.transform(ds).to_pandas()  # doctest: +SKIP
           concat_out
        0  [1.0, 2.0, 0.5]
        1  [3.0, 4.0, 0.2]

    Args:
        columns: A list of columns to concatenate. The provided order of the columns
             will be retained during concatenation.
        output_column_name: The desired name for the new column.
            Defaults to ``"concat_out"``.
        dtype: The ``dtype`` to convert the output tensors to. If unspecified,
            the ``dtype`` is determined by standard coercion rules.
        raise_if_missing: If ``True``, an error is raised if any
            of the columns in ``columns`` don't exist.
            Defaults to ``False``.
        flatten: If ``True``, nested vectors in the columns will be flattened during
            concatenation. Defaults to ``False``.

    Raises:
        ValueError: if `raise_if_missing` is `True` and a column in `columns` or
            doesn't exist in the dataset.
    F
concat_outNcolumnsoutput_column_namedtyperaise_if_missingflattenc                     t                                                       || _        || _        || _        || _        || _        d S N)super__init__r   r   r   r   r   )selfr   r   r   r   r   	__class__s         w/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/data/preprocessors/concatenator.pyr   zConcatenator.__init__Y   sD     	"4
 0    dfreturnc                     t          | j                  t          |          z
  }|r?d| j         d| }| j        rt          |          t                              |           d S d S )NzMissing columns specified in 'z': )setr   r   
ValueErrorloggerwarning)r   r   missing_columnsmessages       r   	_validatezConcatenator._validateh   s{    dl++c"gg5 	(SSS/SS  $ ( )))w'''''	( 	(r   c                                           |            j        r.| j                                                 } fd|D             }n&| j                                      j                  }|                     j                  }t          j        t          |                    |j	        d d  j
        f<   |S )Nc                 P    g | ]"}t          j        fd |D                       #S )c                     g | ]I}j         t          j        |          n+t          j        |                              j                   JS r   )r   np
atleast_1dastype).0elemr   s     r   
<listcomp>z=Concatenator._transform_pandas.<locals>.<listcomp>.<listcomp>z   s_        !  :- d+++]40077
CC  r   )r(   concatenate)r+   rowr   s     r   r-   z2Concatenator._transform_pandas.<locals>.<listcomp>x   s_     
 
 
      %(	   
 
 
r   )r   r   )r$   r   r   to_numpyr   droppdSerieslistlocr   )r   r   concatenateds   `  r   _transform_pandaszConcatenator._transform_pandass   s    r< 	Gdl+4466L
 
 
 
 (
 
 
LL dl+444:4FFLWWT\W** .0YtL7I7I-J-Jqqq$))*	r   c                     | j         S r   r0   r   s    r   get_input_columnszConcatenator.get_input_columns   s
    |r   c                     | j         gS r   )r   r:   s    r   get_output_columnszConcatenator.get_output_columns   s    '((r   c                     dd d ddd}g }|                                 D ]5\  }}t          | |          }||k    r|                    | d|            6| j        j         dd                    |           dS )Nr   F)r   r   r   r   r   =(z, ))itemsgetattrappendr   __name__join)r   default_valuesnon_default_arguments	parameterdefault_valuevalues         r   __repr__zConcatenator.__repr__   s    ". %
 
 !#(6(<(<(>(> 	E 	E$I}D),,E%%%,,	-C-CE-C-CDDD.)OODII6K,L,LOOOOr   statec                 |    t                                          |           t          | d          s	d| _        d S d S )Nr   F)r   __setstate__hasattrr   )r   rM   r   s     r   rO   zConcatenator.__setstate__   sD    U### tY'' 	! DLLL	! 	!r   )r   NFF)rE   
__module____qualname____doc___is_fittabler   strr   r(   r   boolr   r3   	DataFramer$   r8   r;   r=   rL   r   r   rO   __classcell__)r   s   @r   r   r      sd       F FP L
 #/$(!& c   !	
       	(BL 	(T 	( 	( 	( 	(BL    249    )DI ) ) ) )P P P"!$sCx. !T ! ! ! ! ! ! ! ! ! !r   r   )loggingtypingr   r   r   r   numpyr(   pandasr3   ray.data.preprocessorr   ray.util.annotationsr   	getLoggerrE   r    r    r   r   <module>ra      s     , , , , , , , , , , , ,         . . . . . . * * * * * *		8	$	$ WZ! Z! Z! Z! Z!< Z! Z! Z! Z! Z!r   