
    Pi<V                    Z   d dl mZ d dlZd dlmZ d dlZd dlmZ d dl	m
Z
mZ d dlmZ d dlmZ d dlmc mZ d dlmZ d d	lmZ d d
lmZ erd dlmZ d dlmZ d dlmZ d(dZ  ed          	 	 	 	 	 	 d)d*d            Z! ed          d+d,d!            Z" ed          	 d-d.d'            Z#dS )/    )annotationsN)TYPE_CHECKING)
set_module)is_iteratoris_list_like)concat_compat)notna)
MultiIndex)concat)
to_numeric)Hashable)AnyArrayLike)	DataFramevariablestrreturnlistc                    | ]t          |           s| gS t          |t                    r't          | t                    st	          | d          t          |           S g S )Nz7 must be a list of tuples when columns are a MultiIndex)r   
isinstancer
   r   
ValueError)arg_varsr   columnss      l/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/pandas/core/reshape/melt.pyensure_list_varsr      st    H%% 	":,, 	"Z$5O5O 	"TTT   >>!	    pandasvalueTframer   
value_namer   ignore_indexboolc           
     
    | j         v rt          d| d          t          |d j                   }|du}t          |d j                   }t           j                             |                    t          |          k    rt          d          |s|r| j                             |          }n j         }||z   }	|                    |	          }
|
dk    }|                                r.d t          |	|d	
          D             }t          d|           |r$ j	        ddt          j        |
          f          n-                     d           n                     d           | j                             |           _         |t           j         t                    rt           j         j                  t          t!           j         j                            k    r j         j        }nd t#          t           j         j                            D             }n׉ j         j         j         j        ndg}nt'          |          rt           j         t                    r|t)          |          rt+          |          }t          |          t           j                   k    r8t          d|dt          |           dt           j                    d          nt          d|d          |g} j        \  }}|t          |          z
  }i }|D ]}                     |          }t          |j        t2          j                  sH|dk    rt5          |g|z  d	          ||<   U t7          |          g |j        |j                  ||<   ~t3          j        |j        |          ||<   ||z   |gz   } j        d         dk    r\t          d  j        D                       s>t5           fdt#           j        d                   D             d	          j        ||<   n j                             d          ||<   tC          |          D ]5\  }} j         "                    |          #                    |          ||<   6 $                    ||          }|sSt3          j        t3          j%        t                               |          } j&        '                    |          |_&        |S )a  
    Unpivot a DataFrame from wide to long format, optionally leaving identifiers set.

    This function is useful to reshape a DataFrame into a format where one
    or more columns are identifier variables (`id_vars`), while all other
    columns are considered measured variables (`value_vars`), and are "unpivoted" to
    the row axis, leaving just two non-identifier columns, 'variable' and
    'value'.

    Parameters
    ----------
    frame : DataFrame
        The DataFrame to unpivot.
    id_vars : scalar, tuple, list, or ndarray, optional
        Column(s) to use as identifier variables.
    value_vars : scalar, tuple, list, or ndarray, optional
        Column(s) to unpivot. If not specified, uses all columns that
        are not set as `id_vars`.
    var_name : scalar, tuple, list, or ndarray, optional
        Name to use for the 'variable' column. If None it uses
        ``frame.columns.name`` or 'variable'. Must be a scalar if columns are a
        MultiIndex.
    value_name : scalar, default 'value'
        Name to use for the 'value' column, can't be an existing column label.
    col_level : scalar, optional
        If columns are a MultiIndex then use this level to melt.
    ignore_index : bool, default True
        If True, original index is ignored. If False, the original index is retained.
        Index labels will be repeated as necessary.

    Returns
    -------
    DataFrame
        Unpivoted DataFrame.

    See Also
    --------
    DataFrame.melt : Identical method.
    pivot_table : Create a spreadsheet-style pivot table as a DataFrame.
    DataFrame.pivot : Return reshaped DataFrame organized
        by given index / column values.
    DataFrame.explode : Explode a DataFrame from list-like
            columns to long format.

    Notes
    -----
    Reference :ref:`the user guide <reshaping.melt>` for more examples.

    Examples
    --------
    >>> df = pd.DataFrame(
    ...     {
    ...         "A": {0: "a", 1: "b", 2: "c"},
    ...         "B": {0: 1, 1: 3, 2: 5},
    ...         "C": {0: 2, 1: 4, 2: 6},
    ...     }
    ... )
    >>> df
    A  B  C
    0  a  1  2
    1  b  3  4
    2  c  5  6

    >>> pd.melt(df, id_vars=["A"], value_vars=["B"])
    A variable  value
    0  a        B      1
    1  b        B      3
    2  c        B      5

    >>> pd.melt(df, id_vars=["A"], value_vars=["B", "C"])
    A variable  value
    0  a        B      1
    1  b        B      3
    2  c        B      5
    3  a        C      2
    4  b        C      4
    5  c        C      6

    The names of 'variable' and 'value' columns can be customized:

    >>> pd.melt(
    ...     df,
    ...     id_vars=["A"],
    ...     value_vars=["B"],
    ...     var_name="myVarname",
    ...     value_name="myValname",
    ... )
    A myVarname  myValname
    0  a         B          1
    1  b         B          3
    2  c         B          5

    Original index values can be kept around:

    >>> pd.melt(df, id_vars=["A"], value_vars=["B", "C"], ignore_index=False)
    A variable  value
    0  a        B      1
    1  b        B      3
    2  c        B      5
    0  a        C      2
    1  b        C      4
    2  c        C      6

    If you have multi-index columns:

    >>> df.columns = [list("ABC"), list("DEF")]
    >>> df
    A  B  C
    D  E  F
    0  a  1  2
    1  b  3  4
    2  c  5  6

    >>> pd.melt(df, col_level=0, id_vars=["A"], value_vars=["B"])
    A variable  value
    0  a        B      1
    1  b        B      3
    2  c        B      5

    >>> pd.melt(df, id_vars=[("A", "D")], value_vars=[("B", "E")])
    (A, D) variable_0 variable_1  value
    0      a          B          E      1
    1      b          B          E      3
    2      c          B          E      5
    zvalue_name (z3) cannot match an element in the DataFrame columns.id_varsN
value_varsz)id_vars cannot contain duplicate columns.c                    g | ]	\  }}||
S  r'   ).0lab	not_founds      r   
<listcomp>zmelt.<locals>.<listcomp>   s1       &Yy  r   T)strictzFThe following id_vars or value_vars are not present in the DataFrame: F)deepc                    g | ]}d | S )	variable_r'   )r(   is     r   r+   zmelt.<locals>.<listcomp>   s    UUUOOOUUUr   r   z	var_name=z has z, items, but the dataframe columns only have z levels.z must be a scalar.r   )r    )namedtype   c              3  Z   K   | ]&}t          |t          j                   o|j        V  'd S N)r   npr2   _supports_2d)r(   dts     r   	<genexpr>zmelt.<locals>.<genexpr>  sI       & &=?Jr28$$$8& & & & & &r   c                2    g | ]}j         d d |f         S r5   )iloc)r(   r0   r   s     r   r+   zmelt.<locals>.<listcomp>	  s(    ===!UZ1===r   Fr   )(r   r   r   lenget_indexer_forget_level_valuesanyzipKeyErrorr;   algosuniquecopyr   r
   namessetranger1   r   r   r   shapepopr2   r6   r   typetile_valuesdtypesvaluesravel	enumerate_get_level_valuesrepeat_constructorarangeindextake)r   r#   r$   var_namer   	col_levelr    value_vars_was_not_nonelevellabelsidxmissingmissing_labelsnum_rowsKnum_cols_adjustedmdatacolid_datamcolumnsr0   resulttakers   `                      r   meltrj   ,   sp   N U]""%: % % %
 
 	
 w	5=AAG(4!*lEMJJJ 5=((1122S\\AADEEE '* ' M229==EEME:%##F++);;== 	 *-fgd*K*K*K  N 3"03 3   # 	+Jqqq%,s"3"334EEJJEJ**EE


&&66yAAemZ00 	5=&''3s5=3F/G/G+H+HHH =.UUU3u}?R;S;S5T5TUUU ',m&8&D""*HH 
h		 emZ00 		?8$$ *>>8}}s5=1111 Xx X Xs8}} X X;>u};M;MX X X   2 ====>>>:+KHaCLL(*,E 
E 
E))C..'-22 	E 1$$#WI0A$APTUUUc

 +T']]2GLVVVc

2CDDE#JJ!ZL0H{1~# & &CH<& & & # # #====uU[^'<'<===D
 
 

 	j "M//44jH%% I I3]44Q77>>xHHc

x88F /	#e**--/@AA{''..Mr   datagroupsdictdropnac                    i }g }t                      }t          t          t          |                                                              }|                                D ]q\  }}t          |          |k    rt          d           fd|D             }	t          |	          ||<   |                    |           |	                    |          }rt           j                            |                    }
|
D ]%}t          j         |         j        |          ||<   &|rt          j        t          ||d                            t"                    |D ]}t%          ||                   z                                  s fd|                                D             }                     ||
|z             S )a  
    Reshape wide-format data to long. Generalized inverse of DataFrame.pivot.

    Accepts a dictionary, ``groups``, in which each key is a new column name
    and each value is a list of old column names that will be "melted" under
    the new column name as part of the reshape.

    Parameters
    ----------
    data : DataFrame
        The wide-format DataFrame.
    groups : dict
        {new_name : list_of_columns}.
    dropna : bool, default True
        Do not include columns whose entries are all NaN.

    Returns
    -------
    DataFrame
        Reshaped DataFrame.

    See Also
    --------
    melt : Unpivot a DataFrame from wide to long format, optionally leaving
        identifiers set.
    pivot : Create a spreadsheet-style pivot table as a DataFrame.
    DataFrame.pivot : Pivot without aggregation that can handle
        non-numeric data.
    DataFrame.pivot_table : Generalization of pivot that can handle
        duplicate values for one index/column pair.
    DataFrame.unstack : Pivot based on the index values instead of a
        column.
    wide_to_long : Wide panel to long format. Less flexible but more
        user-friendly than melt.

    Examples
    --------
    >>> data = pd.DataFrame(
    ...     {
    ...         "hr1": [514, 573],
    ...         "hr2": [545, 526],
    ...         "team": ["Red Sox", "Yankees"],
    ...         "year1": [2007, 2007],
    ...         "year2": [2008, 2008],
    ...     }
    ... )
    >>> data
       hr1  hr2     team  year1  year2
    0  514  545  Red Sox   2007   2008
    1  573  526  Yankees   2007   2008

    >>> pd.lreshape(data, {"year": ["year1", "year2"], "hr": ["hr1", "hr2"]})
          team  year   hr
    0  Red Sox  2007  514
    1  Yankees  2007  573
    2  Red Sox  2008  545
    3  Yankees  2008  526
    z$All column lists must be same lengthc                *    g | ]}|         j         S r'   )rN   )r(   re   rk   s     r   r+   zlreshape.<locals>.<listcomp>\  s     8883T#Y&888r   r   )r2   c                (    i | ]\  }}||         S r'   r'   )r(   kvmasks      r   
<dictcomp>zlreshape.<locals>.<dictcomp>k  s#    :::DAqQ$:::r   r=   )rH   r>   nextiterrP   itemsr   r   appendunionr   r   
differencer6   rM   rN   onesr!   r	   allrU   )rk   rl   rn   rd   
pivot_colsall_colsrb   targetrG   	to_concatid_colsre   crt   s   `            @r   lreshaper     s   x EJ!eeHDfmmoo&&''((A ) )u::??CDDD8888%888	%i00f&!!!>>%((4<**84455G 3 3WT#Y.22c

 ;ws5A/00=== 	$ 	$AE%(OO#DDxxzz 	;::::EKKMM:::EUGj,@AAAr    \d+dfsepsuffixc                   dd}dd}t          |          s|g}nt          |          }| j                            |                                          rt          d          t          |          s|g}nt          |          }| |                                                                         rt          d          g }g }	|D ]H}
 || |
||          }|	                    |           |                     || |
||||                     It          |d	
          }| j        
                    |	          }| |         }t          |          d	k    r(|                    |                              |          S |                    |                                |                              g ||          S )aV!  
    Unpivot a DataFrame from wide to long format.

    Less flexible but more user-friendly than melt.

    With stubnames ['A', 'B'], this function expects to find one or more
    group of columns with format
    A-suffix1, A-suffix2,..., B-suffix1, B-suffix2,...
    You specify what you want to call this suffix in the resulting long format
    with `j` (for example `j='year'`)

    Each row of these wide variables are assumed to be uniquely identified by
    `i` (can be a single column name or a list of column names)

    All remaining variables in the data frame are left intact.

    Parameters
    ----------
    df : DataFrame
        The wide-format DataFrame.
    stubnames : str or list-like
        The stub name(s). The wide format variables are assumed to
        start with the stub names.
    i : str or list-like
        Column(s) to use as id variable(s).
    j : str
        The name of the sub-observation variable. What you wish to name your
        suffix in the long format.
    sep : str, default ""
        A character indicating the separation of the variable names
        in the wide format, to be stripped from the names in the long format.
        For example, if your column names are A-suffix1, A-suffix2, you
        can strip the hyphen by specifying `sep='-'`.
    suffix : str, default '\\d+'
        A regular expression capturing the wanted suffixes. '\\d+' captures
        numeric suffixes. Suffixes with no numbers could be specified with the
        negated character class '\\D+'. You can also further disambiguate
        suffixes, for example, if your wide variables are of the form A-one,
        B-two,.., and you have an unrelated column A-rating, you can ignore the
        last one by specifying `suffix='(!?one|two)'`. When all suffixes are
        numeric, they are cast to int64/float64.

    Returns
    -------
    DataFrame
        A DataFrame that contains each stub name as a variable, with new index
        (i, j).

    See Also
    --------
    melt : Unpivot a DataFrame from wide to long format, optionally leaving
        identifiers set.
    pivot : Create a spreadsheet-style pivot table as a DataFrame.
    DataFrame.pivot : Pivot without aggregation that can handle
        non-numeric data.
    DataFrame.pivot_table : Generalization of pivot that can handle
        duplicate values for one index/column pair.
    DataFrame.unstack : Pivot based on the index values instead of a
        column.

    Notes
    -----
    All extra variables are left untouched. This simply uses
    `pandas.melt` under the hood, but is hard-coded to "do the right thing"
    in a typical case.

    Examples
    --------
    >>> np.random.seed(123)
    >>> df = pd.DataFrame(
    ...     {
    ...         "A1970": {0: "a", 1: "b", 2: "c"},
    ...         "A1980": {0: "d", 1: "e", 2: "f"},
    ...         "B1970": {0: 2.5, 1: 1.2, 2: 0.7},
    ...         "B1980": {0: 3.2, 1: 1.3, 2: 0.1},
    ...         "X": dict(zip(range(3), np.random.randn(3), strict=True)),
    ...     }
    ... )
    >>> df["id"] = df.index
    >>> df
      A1970 A1980  B1970  B1980         X  id
    0     a     d    2.5    3.2 -1.085631   0
    1     b     e    1.2    1.3  0.997345   1
    2     c     f    0.7    0.1  0.282978   2
    >>> pd.wide_to_long(df, ["A", "B"], i="id", j="year")
    ... # doctest: +NORMALIZE_WHITESPACE
                    X  A    B
    id year
    0  1970 -1.085631  a  2.5
    1  1970  0.997345  b  1.2
    2  1970  0.282978  c  0.7
    0  1980 -1.085631  d  3.2
    1  1980  0.997345  e  1.3
    2  1980  0.282978  f  0.1

    With multiple id columns

    >>> df = pd.DataFrame(
    ...     {
    ...         "famid": [1, 1, 1, 2, 2, 2, 3, 3, 3],
    ...         "birth": [1, 2, 3, 1, 2, 3, 1, 2, 3],
    ...         "ht1": [2.8, 2.9, 2.2, 2, 1.8, 1.9, 2.2, 2.3, 2.1],
    ...         "ht2": [3.4, 3.8, 2.9, 3.2, 2.8, 2.4, 3.3, 3.4, 2.9],
    ...     }
    ... )
    >>> df
       famid  birth  ht1  ht2
    0      1      1  2.8  3.4
    1      1      2  2.9  3.8
    2      1      3  2.2  2.9
    3      2      1  2.0  3.2
    4      2      2  1.8  2.8
    5      2      3  1.9  2.4
    6      3      1  2.2  3.3
    7      3      2  2.3  3.4
    8      3      3  2.1  2.9
    >>> long_format = pd.wide_to_long(df, stubnames="ht", i=["famid", "birth"], j="age")
    >>> long_format
    ... # doctest: +NORMALIZE_WHITESPACE
                      ht
    famid birth age
    1     1     1    2.8
                2    3.4
          2     1    2.9
                2    3.8
          3     1    2.2
                2    2.9
    2     1     1    2.0
                2    3.2
          2     1    1.8
                2    2.8
          3     1    1.9
                2    2.4
    3     1     1    2.2
                2    3.3
          2     1    2.3
                2    3.4
          3     1    2.1
                2    2.9

    Going from long back to wide just takes some creative use of `unstack`

    >>> wide_format = long_format.unstack()
    >>> wide_format.columns = wide_format.columns.map("{0[0]}{0[1]}".format)
    >>> wide_format.reset_index()
       famid  birth  ht1  ht2
    0      1      1  2.8  3.4
    1      1      2  2.9  3.8
    2      1      3  2.2  2.9
    3      2      1  2.0  3.2
    4      2      2  1.8  2.8
    5      2      3  1.9  2.4
    6      3      1  2.2  3.3
    7      3      2  2.3  3.4
    8      3      3  2.1  2.9

    Less wieldy column names are also handled

    >>> np.random.seed(0)
    >>> df = pd.DataFrame(
    ...     {
    ...         "A(weekly)-2010": np.random.rand(3),
    ...         "A(weekly)-2011": np.random.rand(3),
    ...         "B(weekly)-2010": np.random.rand(3),
    ...         "B(weekly)-2011": np.random.rand(3),
    ...         "X": np.random.randint(3, size=3),
    ...     }
    ... )
    >>> df["id"] = df.index
    >>> df  # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS
       A(weekly)-2010  A(weekly)-2011  B(weekly)-2010  B(weekly)-2011  X  id
    0        0.548814        0.544883        0.437587        0.383442  0   0
    1        0.715189        0.423655        0.891773        0.791725  1   1
    2        0.602763        0.645894        0.963663        0.528895  1   2

    >>> pd.wide_to_long(df, ["A(weekly)", "B(weekly)"], i="id", j="year", sep="-")
    ... # doctest: +NORMALIZE_WHITESPACE
             X  A(weekly)  B(weekly)
    id year
    0  2010  0   0.548814   0.437587
    1  2010  1   0.715189   0.891773
    2  2010  1   0.602763   0.963663
    0  2011  0   0.544883   0.383442
    1  2011  1   0.423655   0.791725
    2  2011  1   0.645894   0.528895

    If we have many columns, we could also use a regex to find our
    stubnames and pass that list on to wide_to_long

    >>> stubnames = sorted(
    ...     set(
    ...         [
    ...             match[0]
    ...             for match in df.columns.str.findall(r"[A-B]\(.*\)").values
    ...             if match != []
    ...         ]
    ...     )
    ... )
    >>> list(stubnames)
    ['A(weekly)', 'B(weekly)']

    All of the above examples have integers as suffixes. It is possible to
    have non-integers as suffixes.

    >>> df = pd.DataFrame(
    ...     {
    ...         "famid": [1, 1, 1, 2, 2, 2, 3, 3, 3],
    ...         "birth": [1, 2, 3, 1, 2, 3, 1, 2, 3],
    ...         "ht_one": [2.8, 2.9, 2.2, 2, 1.8, 1.9, 2.2, 2.3, 2.1],
    ...         "ht_two": [3.4, 3.8, 2.9, 3.2, 2.8, 2.4, 3.3, 3.4, 2.9],
    ...     }
    ... )
    >>> df
       famid  birth  ht_one  ht_two
    0      1      1     2.8     3.4
    1      1      2     2.9     3.8
    2      1      3     2.2     2.9
    3      2      1     2.0     3.2
    4      2      2     1.8     2.8
    5      2      3     1.9     2.4
    6      3      1     2.2     3.3
    7      3      2     2.3     3.4
    8      3      3     2.1     2.9

    >>> long_format = pd.wide_to_long(
    ...     df, stubnames="ht", i=["famid", "birth"], j="age", sep="_", suffix=r"\w+"
    ... )
    >>> long_format
    ... # doctest: +NORMALIZE_WHITESPACE
                      ht
    famid birth age
    1     1     one  2.8
                two  3.4
          2     one  2.9
                two  3.8
          3     one  2.2
                two  2.9
    2     1     one  2.0
                two  3.2
          2     one  1.8
                two  2.8
          3     one  1.9
                two  2.4
    3     1     one  2.2
                two  3.3
          2     one  2.3
                two  3.4
          3     one  2.1
                two  2.9
    stubr   r   r   c                    dt          j        |           t          j        |           | d}| j        | j        j                            |                   S )N^$)reescaper   r   match)r   r   r   r   regexs        r   get_var_namesz#wide_to_long.<locals>.get_var_nameso  sK    ?RYt__?binn?f???z"*...u5566r   c                f   t          | |||                    |          |          }||         j                            t	          j        ||z             dd          ||<   	 t          ||                   ||<   n# t          t          t          f$ r Y nw xY w|
                    g ||          S )N)r#   r$   r   rY   r   T)r   )rj   rstripr   replacer   r   r   	TypeErrorr   OverflowError	set_index)r   r   r0   jr$   r   newdfs          r   	melt_stubzwide_to_long.<locals>.melt_stubs  s    !{{3''
 
 
 8<''	$*(=(=r'NNa	!%(++E!HH:}5 	 	 	D	 wwAw'''s   $A= =BBz,stubname can't be identical to a column namez3the id variables need to uniquely identify each rowr3   )axis)onN)r   r   r   r   r   r   )r   r   r   r   )r   r   r   isinrA   r   
duplicatedextendry   r   r{   r>   r   joinmergereset_index)r   	stubnamesr0   r   r   r   r   r   _meltedvalue_vars_flattenedr   	value_varmeltedr#   news                  r   wide_to_longr   p  s   ~7 7 7 7( ( ( (& 	"" $K		OO		zy!!%%'' IGHHH?? CGG	!u PNOOOG B B!M"dC88	##I...yyT1aC@@AAAAG!$$$Fj##$899G
W+C
1vv{{}}Q$$V,,,yy++--!y44>>wwAwGGGr   )r   r   r   r   )NNNr   NT)r   r   r   r   r    r!   r   r   )T)rk   r   rl   rm   rn   r!   r   r   )r   r   )r   r   r   r   r   r   r   r   )$
__future__r   r   typingr   numpyr6   pandas.util._decoratorsr   pandas.core.dtypes.commonr   r   pandas.core.dtypes.concatr   pandas.core.dtypes.missingr	   pandas.core.algorithmscore
algorithmsrD   pandas.core.indexes.apir
   pandas.core.reshape.concatr   pandas.core.tools.numericr   collections.abcr   pandas._typingr   r   r   r   rj   r   r   r'   r   r   <module>r      s   " " " " " " 				                 . . . . . .        4 3 3 3 3 3 , , , , , , & & & & & & & & & . . . . . . - - - - - - 0 0 0 0 0 0 !((((((++++++          H "i i i i iX HSB SB SB SB SBl HAGsH sH sH sH sH sH sHr   