
    Pi=Q                     	   d Z ddlmZ ddlmZ ddlZddlZddlm	Z	 ddl
mZmZmZ ddlmZ ej                            d          Zej                            d          Zej                            d	          Zed
             Zd Zej                            dg dddgg          d             Zej                            dddg          d             Zed             Zd Zed             Zej                            ddddgidfddgddgddfddgddgddfg          d              Z ed!d"d#g          Z eej                            d$dddgid%g d&d'd% e d(d)           e d(d*           e d(d+           e d,d-           e d.d/           e d.d0          gd'g          d1                         Z!eej                            d$dddgid2g d&d'd2 e d(d)           e d(d*           e d(d+           e d,d-           e d.d/           e d.d0          gd'g          d3                         Z"eej                            d$dddgid2g d&d'd2 e d(d)           e d(d*           e d(d+           e d,d-           e d.d/           e d.d0          gd'g          d4                         Z#ed5             Z$ed6             Z%ed7             Z&ed8             Z'ej                            d9d:d;g          d<             Z(eej                            d$i ddig          d=                         Z)ej                            d>i g d?fd@g dAig dAfg          dB             Z*ej                            dd(d,gdCg          dD             Z+edE             Z,eej                            dFdG eg dHg ej-        g dI          J          fdK eg dHg ej-        g dL          J          fdM eg dNg ej-        g dO          J          fg          dP                         Z.eej                            dddgg          ej                            dQddRdSgdSdRgdSdTgg          dU                                     Z/edV             Z0edW             Z1edX             Z2edY             Z3edZ             Z4d[ Z5ed\             Z6ed]             Z7ed^             Z8d_ Z9dS )`zx
Tests that the file header is properly handled or inferred
during parsing for all of the parsers defined in parsers.py
    )
namedtuple)StringION)ParserError)	DataFrameIndex
MultiIndexz=ignore:Passing a BlockManager to DataFrame:DeprecationWarningpyarrow_xfailpyarrow_skipc                     | }d}t          j        t          |          5  |                    t	          d          dg           d d d            d S # 1 swxY w Y   d S )Nzbut only \d+ lines in filematchz,,
   headerpytestraises
ValueErrorread_csvr   )all_parsersparsermsgs      v/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/pandas/tests/io/parser/test_header.pytest_read_with_bad_headerr      s    F
'C	z	-	-	- 5 5t4445 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5s   &AAAc                     | }d}t          j        t          d          5  |                    t	          |          d           d d d            d S # 1 swxY w Y   d S )N$1,2,3,4,5
6,7,8,9,10
11,12,13,14,15
zUPassing negative integer to header is invalid. For no header, use header=None insteadr   r   r   r   r   datas      r   test_negative_headerr    &   s    FD 
1
 
 
 3 3
 	r2223 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3   %AAAr   )r         c                     | }d}t          j        t          d          5  |                    t	          |          |           d d d            d S # 1 swxY w Y   d S )Nz<1,2,3,4,5
        6,7,8,9,10
        11,12,13,14,15
        z8cannot specify multi-index header with negative integersr   r   r   )r   r   r   r   s       r    test_negative_multi_index_headerr&   5   s     FD 
T
 
 
 7 7 	v6667 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7r!   TFc                     | }d}d}t          j        t          |          5  |                    t	          |          |           d d d            d S # 1 swxY w Y   d S )NzMyColumn
a
b
a
bz#Passing a bool to header is invalidr   r   )r   r   	TypeErrorr   r   )r   r   r   r   r   s        r   test_bool_header_argr)   C   s     FD 0C	y	,	,	, 7 7v6667 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7   %AAAc                     | }d}g d}|                     t          |          |          }t          g dg dg dgg dg d          }t          j        ||           d S )	Nzfoo,1,2,3
bar,4,5,6
baz,7,8,9
ABCnames   r"      )r#         )      	   )foobarbazindexcolumnsr   r   r   tmassert_frame_equal)r   r   r   r1   resultexpecteds         r   test_header_with_index_colrE   R   s    FD OOE__Xd^^5_99F	IIIyyy)###  H
 &(+++++    c                     | }d}d}|                     t          |          dd          }|                     t          |          dd          }t          j        ||           d S )Nzggot,to,ignore,this,line
got,to,ignore,this,line
index,A,B,C,D
foo,2,3,4,5
bar,7,8,9,10
baz,12,13,14,15
z7index,A,B,C,D
foo,2,3,4,5
bar,7,8,9,10
baz,12,13,14,15
r"   r   r   	index_colr   r   rA   rB   )r   r   r   data2rC   rD   s         r   test_header_not_first_linerL   d   sj    FDE __Xd^^A_CCFxqAFFH&(+++++rF   c           	      l   | }d}|                     t          |          g dddg          }d fdt          d          D             }t          j        d	 t          d          D             d
 t          d          D             gddg          }t          j        d t          d          D             d t          d          D             d t          d          D             d t          d          D             gg d          }t          |||          }t          j        ||           d S )N  C0,,C_l0_g0,C_l0_g1,C_l0_g2

C1,,C_l1_g0,C_l1_g1,C_l1_g2
C2,,C_l2_g0,C_l2_g1,C_l2_g2
C3,,C_l3_g0,C_l3_g1,C_l3_g2
R0,R1,,,
R_l0_g0,R_l1_g0,R0C0,R0C1,R0C2
R_l0_g1,R_l1_g1,R1C0,R1C1,R1C2
R_l0_g2,R_l1_g2,R2C0,R2C1,R2C2
R_l0_g3,R_l1_g3,R3C0,R3C1,R3C2
R_l0_g4,R_l1_g4,R4C0,R4C1,R4C2
r   r3   r"   r4   r   r3   rH   c                     d|  d| S )NRr/    )rcs     r   <lambda>z)test_header_multi_index.<locals>.<lambda>   s    k!kkakk rF   c                 J    g | ]fd t          d          D             S )c                 (    g | ]} |          S rR   rR   ).0rT   
data_gen_frS   s     r   
<listcomp>z6test_header_multi_index.<locals>.<listcomp>.<listcomp>   s%    000!ZZ1000rF   r4   )range)rX   rS   rY   s    @r   rZ   z+test_header_multi_index.<locals>.<listcomp>   s8    CCCQ00000uQxx000CCCrF   r5   c                     g | ]}d | S )R_l0_grR   rX   is     r   rZ   z+test_header_multi_index.<locals>.<listcomp>   s    	)	)	)1,1,,	)	)	)rF   c                     g | ]}d | S )R_l1_grR   r^   s     r   rZ   z+test_header_multi_index.<locals>.<listcomp>   s    +K+K+KQLQLL+K+K+KrF   R0R1r0   c                     g | ]}d | S )C_l0_grR   r^   s     r   rZ   z+test_header_multi_index.<locals>.<listcomp>       ,,,a\a\\,,,rF   r4   c                     g | ]}d | S )C_l1_grR   r^   s     r   rZ   z+test_header_multi_index.<locals>.<listcomp>   rf   rF   c                     g | ]}d | S )C_l2_grR   r^   s     r   rZ   z+test_header_multi_index.<locals>.<listcomp>   rf   rF   c                     g | ]}d | S )C_l3_grR   r^   s     r   rZ   z+test_header_multi_index.<locals>.<listcomp>   rf   rF   )C0C1C2C3)r?   r>   )r   r   r[   r   from_arraysr   rA   rB   )r   r   r   rC   r>   r?   rD   rY   s          @r   test_header_multi_indexrr   x   sa   FD __Xd^^LLLQPQF_SSF))JCCCC%((CCCD"	)	)a	)	)	)+K+K%((+K+K+KLTl  E $,,588,,,,,588,,,,,588,,,,,588,,,		
 '&&  G we<<<H&(+++++rF   z
kwargs,msgrI   r:   r;   z]index_col must only contain integers of column positions when specifying a multi-index headerr3   )rI   r1   z9cannot specify names when specifying a multi-index header)rI   usecolsz;cannot specify usecols when specifying a multi-index headerc                     d}| }t          j        t          |          5   |j        t	          |          fdg di| d d d            d S # 1 swxY w Y   d S )NrN   r   r   rO   r   )r   kwargsr   r   r   s        r   test_header_multi_index_invalidrv      s    ,D F	z	-	-	- G GFF|||FvFFFG G G G G G G G G G G G G G G G G Gs    AAA
_TestTuplefirstsecondru   r4   )aq)r{   rS   )r{   s)bt)rT   u)rT   v)skiprowsr1   r{   r|   rS   r}   r~   r   rT   r   r   c                     | }t          g dg dgddgt          j        g d                    }d} |j        t	          |          fdd	i|}t          j        ||           d S )
Nr3   r"   r4   r#   r5   r6   r7   r8   r9   r         onetworz   r=   zC,a,a,a,b,c,c
,q,r,s,t,u,v
,,,,,,
one,1,2,3,4,5,6
two,7,8,9,10,11,12rI   r   r   r   from_tuplesr   r   rA   rB   r   ru   r   rD   r   rC   s         r   &test_header_multi_index_common_format1r      s    < F			2223en&TTT
 
  HD V_Xd^^CCqCFCCF&(+++++rF   r"   c                     | }t          g dg dgddgt          j        g d                    }d} |j        t	          |          fdd	i|}t          j        ||           d S )
Nr   r   r   r   rz   r=   z<,a,a,a,b,c,c
,q,r,s,t,u,v
one,1,2,3,4,5,6
two,7,8,9,10,11,12rI   r   r   r   s         r   &test_header_multi_index_common_format2r      s    < F			2223en&TTT
 
  HD
 V_Xd^^CCqCFCCF&(+++++rF   c                     | }t          g dg dgddgt          j        g d                    }|                    d          }d	} |j        t          |          fd
d i|}t          j        ||           d S )Nr   r   r   r   rz   r=   T)drop2a,a,a,b,c,c
q,r,s,t,u,v
1,2,3,4,5,6
7,8,9,10,11,12rI   )r   r   r   reset_indexr   r   rA   rB   r   s         r   &test_header_multi_index_common_format3r   )  s    < F			2223en&TTT
 
  H ###..HD
 V_Xd^^FFtFvFFF&(+++++rF   c           
      8   | }t          t          j        g dg dgd          t          ddg          t	          g dg dgg d	g d
gddg                    }d}|                    t          |          ddgd          }t          j        ||           d S )Nr"   r4   r#   r5   r6   r8   r9   r   r   r   int64dtyper3   r7   r{   r~   rT   rS   r}   r   r   r   r   r   r3   r"   r"   r   r3   r"   r4   r#   r{   r|   levelscodesr1   r=   r   r   rH   )	r   nparrayr   r   r   r   rA   rB   r   r   rD   r   rC   s        r   0test_header_multi_index_common_format_malformed1r   Y  s    F
///#5#5#56gFFFQFmm#OO%>%>%>?"??OOO4*
 
 
  HD
 __Xd^^QFa_HHF(F+++++rF   c           
      8   | }t          t          j        g dg dgd          t          ddd          t	          g dg d	gg d
g dgd dg                    }d}|                    t          |          ddgd          }t          j        ||           d S )Nr   r   r   r   r3      r6   r   r   r   r   r|   r   r=   1,a,a,b,c,c
q,r,s,t,u,v
1,2,3,4,5,6
7,8,9,10,11,12r   rH   )	r   r   r   r[   r   r   r   rA   rB   r   s        r   0test_header_multi_index_common_format_malformed2r   n  s    F
///#5#5#56gFFFAr1oo#OO%>%>%>?"??OOO4+
 
 
  HD
 __Xd^^QFa_HHF(F+++++rF   c           
      T   | }t          t          j        g dg dgd          t          ddgddggd	dgd	dgg
          t          g dg dgg dg dgd dg                    }d}|                    t          |          d	dgd	dg          }t          j        ||           d S )N)r4   r#   r5   r6   )r9   r   r   r   r   r   r3   r7   r"   r8   r   )r   r   r   )r}   r   r   r   )r   r3   r"   r"   rO   r|   r   r=   r   rH   )r   r   r   r   r   r   rA   rB   r   s        r   0test_header_multi_index_common_format_malformed3r     s    F
,,,0@@@!Q!Q 0!Q!Q8HIII#OO%9%9%9:<<.+
 
 
  HD
 __Xd^^QFq!f_MMF(F+++++rF   c                     | }d d gddgddgg}t          j        ddg          }t          ||          }d}|                    t	          |          d	dg
          }t          j        ||           d S )Nr3   r"   r4   r#   )r{   r-   )r~   r.   r?   za,b
A,B
,
1,2
3,4r   r   )r   r   r   r   r   rA   rB   )r   r   r   r?   rD   rC   s         r   "test_header_multi_index_blank_liner     s     F4L1a&1a&)D$j*%=>>Gw///H"D__Xd^^QF_;;F(F+++++rF   zdata,header)1,2,3
4,5,6N)zfoo,bar,baz
1,2,3
4,5,6r   c                 R   | }|j         dk    r7|5t          j                            d          }|                    |           |                    t          d          g d          }|                    t          |          g d|          }t          j        ||           d S )NpyarrowzDataFrame.columns are different)reasonr   r   r0   r1   r   )	enginer   markxfailapplymarkerr   r   rA   rB   )r   r   r   requestr   r   rD   rC   s           r   !test_header_names_backward_compatr     s    
 F}	!!f&8{  (I JJD!!!x77OOH__Xd^^???6_RRF&(+++++rF   c                     | }t          g d          } |j        t          d          fi |}t          j        ||           d S )Nr   r   za,b,cr   r   r   rA   rB   )r   ru   r   rD   rC   s        r   test_read_only_header_no_rowsr     sW     F111HV_Xg..99&99F&(+++++rF   zkwargs,namesr   r1   )r:   r;   r<   quuxpandac                     | }d}t          g dg dg dg|          } |j        t          |          fdd i|}t          j        ||           d S )Nr   )r3   r"   r4   r#   r5   )r6   r7   r8   r9   r   )r   r   r         r   r   r   )r   ru   r1   r   r   rD   rC   s          r   test_no_headerr     s     FD 	***,@,@,@A5  H V_Xd^^CCDCFCCF&(+++++rF   string_headerc                     d}d}| }t          j        t          |          5  |                    t	          |          |           d d d            d S # 1 swxY w Y   d S )Nz*header must be integer or list of integersz1,2
3,4r   r   r   )r   r   r   r   r   s        r   test_non_int_headerr     s     7CDF	z	-	-	- 7 7v6667 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7r*   c                     d}| }t          ddgddgddgd          }|                    t          |          dg          }t          j        ||           d S )Nza,b,c
0,1,2
1,2,3r   r3   r"   r4   r   r   r   )r   r   r   rD   rC   s        r   test_singleton_headerr     sg     %DF1vQF!Q@@AAH__Xd^^QC_88F&(+++++rF   zdata,expectedz#A,A,A,B
one,one,one,two
0,40,34,0.1)r   (   "   皙?)r-   r   r-   zone.1)r-   zone.2r.   r   r   z%A,A,A,B
one,one,one.1,two
0,40,34,0.1)r   r   r-   zone.1.1r   z/A,A,A,B,B
one,one,one.1,two,two
0,40,34,0.1,0.1)r   r   r   r   r   )r   r   r   r   )r.   ztwo.1c                     | }|                     t          |          ddg          }t          j        ||           d S )Nr   r3   r   rJ   )r   r   rD   r   rC   s        r   test_mangles_multi_indexr     sB    R F__Xd^^QF_;;F&(+++++rF   r?    Unnamed
NotUnnamedc                    | }ddg}|d                     |pddg          dz   }n d                     dg|pddgz             dz   }|                    t          |          ||          }g }|g d}t          |          D ])\  }}	|	sd	||n|dz    d
}	|                    |	           *t          j        t          |ddg                    }t          ddgddgg|          }
t          j
        ||
           d S )Nr   r3   ,r   z
0,1
2,3
4,5
z
,0,1
0,2,3
1,4,5
rH   )r   r   r   z	Unnamed: _level_001r"   r4   r#   r5   r   )joinr   r   	enumerateappendr   r   zipr   rA   rB   )r   rI   r?   r   r   r   rC   exp_columnsr_   colrD   s              r   test_multi_index_unnamedr     s?    FVFxx+B8,,/BBxx 3B84558PP__Xd^^Fi_PPFK,,G$$    3 	JI9#4aa!a%IIIC3$SsCj%A%ABBG1a&1a&)7;;;H&(+++++rF   c                     | }d}|                     t          |          dg d          }t          ddgddgd	d
gd          }t          j        ||           d S )Nza, b
1,2,3
5,6,4
r   r,   )r   r1   r3   r5   r"   r6   r4   r#   r@   r   r   r   rC   rD   s        r   6test_names_longer_than_header_but_equal_with_data_rowsr   D  so     FD __Xd^^A____MMF1vQF!Q@@AAH&(+++++rF   c                 r   | }d}d}t          j        g d          }t          g dg dg|          }|                    t	          |          ddg	          }t          j        ||j        d d                    |                    t	          |          ddg	          }t          j        ||           d S )
NzFMale, Male, Male, Female, Female
R, R, L, R, R
.86, .67, .88, .78, .81z^Male, Male, Male, Female, Female
R, R, L, R, R
.86, .67, .88, .78, .81
.86, .67, .88, .78, .82))MalerQ   ) Male R)r   z L) Femaler   )r   z R.1)Q?q=
ףp?)\(?(\?gQ?)r   r   r   r   g=
ףp=?r   r   r3   r   )r   r   r   r   r   rA   rB   iloc)r   r   s1s2mirD   df1df2s           r    test_read_csv_multiindex_columnsr   Q  s     F	SB	"  
		
 	
 	

 
B 	'	'	')G)G)GHRT  H //(2,,1v/
6
6C#x}RaR0111
//(2,,1v/
6
6C#x(((((rF   c                     | }d}t          j        t          d          5  |                    t	          |          ddg           d d d            d S # 1 swxY w Y   d S )Nz1row11,row12,row13
row21,row22, row23
row31,row32
z1Header rows must have an equal number of columns.r   r   r"   r   r   r   r   r   r   )r   r   cases      r   'test_read_csv_multi_header_length_checkr   q  s     FD
 
N
 
 
 7 7 	1v6667 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7s   'AAAc                     | }d}|                     t          |          ddgd           }t          g ddt          j        t          j        gdg d	          }t          j        ||           d S )
Nzx,1,5
y,2
z,3
r{   r~   r   r2   r5   r{   r~   )xyz)r>   )r   r   r   r   nanrA   rB   r   s        r   #test_header_none_and_implicit_indexr     s~     FD__Xd^^C:d_KKFiiq"&"&122///  H &(+++++rF   c                     | }d}t          j        t          d          5  |                    t	          |          ddgd            d d d            d S # 1 swxY w Y   d S )Nx,1
y,2,5
z,3
z"Expected 2 fields in line 2, saw 3r   r{   r~   r   r   r   s      r   1test_header_none_and_implicit_index_in_second_rowr     s     FD	{*N	O	O	O G GsCjFFFG G G G G G G G G G G G G G G G G Gs   (AAAc                     | }d}|                     t          |          ddgd d          }t          ddgdd	gd
          }t          j        ||           d S )Nr   r{   r~   skip)r1   r   on_bad_linesr   r   r3   r4   r   r@   r   s        r   &test_header_none_and_on_bad_lines_skipr    so    FD__sCjF   F SzA7788H&(+++++rF   c                     | }d}d}t          j        t          |          5  |                    t	          |          g d           d d d            d S # 1 swxY w Y   d S )Nza,b
1,2
z;Passed header=\[0,1,2\], len of 3, but only 2 lines in filer   )r   r3   r"   r   r   )r   r   r   r   s       r   test_header_missing_rowsr    s     FD IC	z	-	-	- : :yyy999: : : : : : : : : : : : : : : : : :s   'AAAc                     | }d}|                     t          |          d          }t          dgddd          }t          j        ||           d S )Nz1aa    bb(1,1)   cc(1,1)
                0  2  3.5\s+sepr   r"   g      @)aazbb(1,1)zcc(1,1)r@   r   s        r    test_header_multiple_whitespacesr    s_     FD __Xd^^_88FcBBCCH&(+++++rF   c                     | }d}|                     t          |          d          }t          dddgi          }t          j        ||           d S )Nza,b
1,2
3,4
    r  r	  za,bz1,2z3,4r@   r   s        r   test_header_delim_whitespacer    s\     FD __Xd^^_88F%%011H&(+++++rF   c                     | }d}|                     t          |          d ddgddd          }t          ddgd	d
ggd          }t          j        ||           d S )Nz
a,i,x
b,j,y
r   r3   zstring[pyarrow]r   )r   rs   r   dtype_backendr   r{   r_   r~   jr   r@   )pyarrow_parser_onlyr   r   rC   rD   s        r   test_usecols_no_header_pyarrowr    s     FD __A   F 3*sCj19JKKKH&(+++++rF   ):__doc__collectionsr   ior   numpyr   r   pandas.errorsr   pandasr   r   r   pandas._testing_testingrA   r   filterwarnings
pytestmarkusefixturesxfail_pyarrowskip_pyarrowr   r    parametrizer&   r)   rE   rL   rr   rv   rw   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  r  rR   rF   r   <module>r"     s'	   
 # " " " " "            % % % % % %         
      [''C 
 ''88{&&~66 5 5 53 3 3 ZZZB7#<==
7 
7 >=
7 D%=117 7 217 , , ,", , ,( ", ", ",J  5%.)'	
 a&E5>::H	

 a&eU^<<J	
 *G G+ *G( Zw&9::
 	Aq6  
	
 
	
 
3$$
3$$
3$$
3$$
3$$
3$$
	
 
	
 8, ,9  :,& 	Aq6  
	
 
	
 
3$$
3$$
3$$
3$$
3$$
3$$
	
 
	
 8, ,9  :,$ 	Aq6  
	
 
	
 
3$$
3$$
3$$
3$$
3$$
3$$
	
 
	
 8, ,9  :,& , , ,( , , ,* , , ,( , , , *,LM , , , Be(<#=>>, , ?> , 	___<<<=222	
	 	
, 
,	 	
, S#J#@AA7 7 BA7 , , ,  4I!!!".
.PPP   	
 6I!!!".
.RRR   	
 @I&&&'.
.     	
'"% %L, ,M% % N, taSk22I9b/i=VX , ,  32 
,D 	, 	, 	, ) ) )> 7 7 7 , , , G G G, , , : : : , , , 	, 	, 	,, , , , ,rF   