
    PiN                        d dl Z d dlZd dlZd dlmZ d dlmZ d dlmZ d dl	m
Z
 d dlZd dlZd dlmZ d dlZd dlmZmZmZ d dlmZmZmZmZ d dlmZ d	Zd
ZdZdZ dZ!d Z"d Z#d Z$d Z%d Z&d Z'd Z(d Z)d Z*d Z+d Z,d Z-d Z.d Z/ej0        1                    d          d             Z2d Z3d Z4d  Z5ej0        6                    d!e          d"             Z7ej0        6                    d!e          d#             Z8d$ Z9d% Z:d& Z;d' Z<d( Z=ej0        6                    d!e          d)             Z>ej0        6                    d*g d+          ej0        6                    d,d-d.g          ej0        6                    d/g d0          ej0        6                    d!e          d1                                                 Z?ej0        6                    d!e          d2             Z@d3 ZAej0        6                    d!e          d4             ZBd5 ZCdS )6    N)BZ2File)	resources)BytesIO)NamedTemporaryFile)dump_svmlight_fileload_svmlight_fileload_svmlight_files)assert_allcloseassert_array_almost_equalassert_array_equalcreate_memmap_backed_data)CSR_CONTAINERSzsklearn.datasets.tests.datazsvmlight_classification.txtzsvmlight_multilabel.txtzsvmlight_invalid.txtzsvmlight_invalid_order.txtc                 :    t          j        t                    | z  S N)r   filesTEST_DATA_MODULE)filenames    /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/sklearn/datasets/tests/test_svmlight_format.py_svmlight_local_test_file_pathr      s    ?+,,x77    c                     t          |           }|                    d          5 }t          |fi |cddd           S # 1 swxY w Y   dS )zG
    Helper to load resource `filename` with `importlib.resources`
    rbN)r   openr   )r   kwargs	data_pathfs       r   _load_svmlight_local_test_filer   "   s     /x88I			 /!!..v../ / / / / / / / / / / / / / / / / /s   ?AAc                     t          t                    \  } }| j        j        d         dk    sJ | j        d         dk    sJ | j        d         dk    sJ |j        d         dk    sJ dD ]\  }}}| ||f         |k    sJ | d         dk    sJ | d         dk    sJ | d	         dk    sJ | d
         dk    sJ | d         dk    sJ | dxx         dz  cc<   | d         dk    sJ t	          |g d           d S )Nr               )r      g      @r   
   g)r      g      ?r!            ?r!      )r$         )r      )r   r)   )r!      )r!      )r$      )r   r$   r$   r)   )r!   r$   r0      r!   r$   )r   datafileindptrshaper   Xyijvals        r   test_load_svmlight_filer>   +   s:   )(33DAq 8>!!!!!71:????71:71:????  	1c Aw#~~~~~ T7a<<<<T7a<<<<T7a<<<<U8q====U8q==== dGGGqLGGGT7a<<<< q,,,-----r   c                     t          j        t                    t          z  } t	          |           } t          |           \  }}t          j        | t          j                  }	 t          |          \  }}t          |j
        |j
                   t          ||           t          j        |           d S # t          j        |           w xY wr   )r   r   r   r5   strr   osr   O_RDONLYr   dataclose)r   X1y1fdX2y2s         r   test_load_svmlight_file_fdrJ   N   s      011H<III	**FB	BK	(	(B#B''B!"'27333!"b)))
s   #<B5 5Cc                      t          t                    } t          t          |                     \  }}t          |           \  }}t	          |j        |j                   t	          ||           d S r   )r   r5   r   r@   r
   rC   )r   rE   rF   rH   rI   s        r   test_load_svmlight_pathlibrL   a   sa    .x88II//FB	**FBBGRW%%%Br   c                  L    t          t          d          \  } }|g dk    sJ d S )NT
multilabel))r   r!   )r$    )r!   r$   )r   	multifile)r9   r:   s     r   "test_load_svmlight_file_multilabelrR   k   s6    ))EEEDAq*********r   c                  P   t          t                    } t          t          |           gdz  t          j                  \  }}}}t          |                                |                                           t          ||           |j	        t          j        k    sJ |j	        t          j        k    sJ t          t          |           gdz  t          j
                  \  }}}}}	}
|j	        |j	        k    sJ |j	        |	j	        k    sJ |	j	        t          j
        k    sJ d S )Nr$   )dtyper0   )r   r5   r	   r@   npfloat32r   toarrayr   rT   float64)r   X_trainy_trainX_testy_testrE   rF   rH   rI   X3y3s              r   test_load_svmlight_filesr_   p   s   .x88I':	Y1BJ( ( ($GWff w((&..*:*:;;;gv...=BJ&&&&<2:%%%%0#i..1AA1ERZXXXBBB8rx8rx8rz!!!!!!r   c                  v   t          t          d          \  } }| j        j        d         dk    sJ | j        d         dk    sJ | j        d         dk    sJ dD ]\  }}}| ||f         |k    sJ t	          j        t                    5  t          t          d           d d d            d S # 1 swxY w Y   d S )	N   )
n_featuresr   r   r    r!   )r#   r%   r(   r+   r.   )r   r5   r6   r7   pytestraises
ValueErrorr8   s        r   "test_load_svmlight_file_n_featuresrf      s*   )(rBBBDAq 8>!!!!!71:????71: L  	1cAw#~~~~~ 
z	"	" @ @&xB????@ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @s   
B..B25B2c                     t          t                    \  } }t          dd          5 }|                                 t	          t                                        d          5 }t          j        |j        d          5 }t          j	        ||           d d d            n# 1 swxY w Y   d d d            n# 1 swxY w Y   t          |j                  \  }}t          j        |j                   d d d            n# 1 swxY w Y   t          |                                 |                                           t          ||           t          dd          5 }|                                 t	          t                                        d          5 }t          |j        d          5 }t          j	        ||           d d d            n# 1 swxY w Y   d d d            n# 1 swxY w Y   t          |j                  \  }}t          j        |j                   d d d            n# 1 swxY w Y   t          |                                 |                                           t          ||           d S )Nzsklearn-testz.gz)prefixsuffixr   wbz.bz2)r   r5   r   rD   r   r   gzipnameshutilcopyfileobjr   rA   remover   rW   r   )	r9   r:   tmpr   fh_outXgzygzXbzybzs	            r   test_load_compressedrv      s   )(33DAq	>%	@	@	@ C		+H55::4@@ 	.A38T** .f"1f---. . . . . . . . . . . . . . .	. 	. 	. 	. 	. 	. 	. 	. 	. 	. 	. 	. 	. 	. 	. &ch//S 		#(               aiikk3;;==999a%%%	>&	A	A	A S		+H55::4@@ 	.A4(( .F"1f---. . . . . . . . . . . . . . .	. 	. 	. 	. 	. 	. 	. 	. 	. 	. 	. 	. 	. 	. 	. &ch//S 		#(               aiikk3;;==999a%%%%%s   <D %B9 B"B9"B&&B9)B&*B9-D 9B=	=D  B=	3D  DD <H2G+2GG+GG+GG+H2+G/	/H22G/	33H22H69H6c                      t          j        t                    5  t          t                     d d d            d S # 1 swxY w Y   d S r   )rc   rd   re   r   invalidfilerP   r   r   test_load_invalid_filery      s    	z	"	" 4 4&{3334 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4   <A A c                      t          j        t                    5  t          t                     d d d            d S # 1 swxY w Y   d S r   )rc   rd   re   r   invalidfile2rP   r   r   test_load_invalid_order_filer}      s    	z	"	" 5 5&|4445 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5rz   c                      t          d          } t          j        t                    5  t	          | d           d d d            d S # 1 swxY w Y   d S )Ns   -1 4:1.
1 0:1
F
zero_based)r   rc   rd   re   r   )r   s    r   test_load_zero_basedr      s    #$$A	z	"	" 0 01////0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0s   AAAc                     d} d}t          |           }t          |d          \  }}|j        dk    sJ t          |           }t          |          }t          ||gd          \  }}}}	|j        dk    sJ |j        dk    sJ d S )Ns   -1 1:1 2:2 3:3
s   -1 0:0 1:1
autor   )r!   r0   )r!   r4   )r   r   r7   r	   )
data1data2f1r9   r:   f2rE   rF   rH   rI   s
             r   test_load_zero_based_autor      s    EE	BbV444DAq7f	B	B("bfEEENBB8v8vr   c                     d} t          t          |           d          \  }}t          |g d           t          |                                ddgddgd	dgg           t	          t          |           gd
          }t          t          |           d
          }||fD ]U\  }}}t          |g d           t          |g d           t          |                                ddgddgd	dgg           Vd S )NsM   
    3 qid:1 1:0.53 2:0.12
    2 qid:1 1:0.13 2:0.1
    7 qid:2 1:0.87 2:0.12Fquery_id)r0   r$   r   g(\?gQ?gp=
ף?皙?gףp=
?T)r!   r!   r$   )r   r   r   rW   r	   )rC   r9   r:   res1res2qids         r   test_load_with_qidr      s   D gdmme<<<DAqq)))$$$qyy{{dD\D#;t$MNNN>>>Dgdmmd;;;DD\ S S	1c1iii(((3			***199;;$c{T4L(QRRRRS Sr   zPtesting the overflow of 32 bit sparse indexing requires a large amount of memoryc                  B   d                     d t          dd          D                       } t          t          |           d          \  }}}t	          |dd         g d	           t	          t          j        |          t          j        dd                     dS )
zU
    load large libsvm / svmlight file with qid attribute. Tests 64-bit query ID
       
c              3   d   K   | ]+}d                      |                                          V  ,dS )z.3 qid:{0} 1:0.53 2:0.12
2 qid:{0} 1:0.13 2:0.1N)formatencode).0r;   s     r   	<genexpr>z&test_load_large_qid.<locals>.<genexpr>   sP       	
 	
 >DDQGGNNPP	
 	
 	
 	
 	
 	
r   r!   i ZbTr   N)r0   r$   r0   r$   )joinranger   r   r   rU   uniquearange)rC   r9   r:   r   s       r   test_load_large_qidr      s     ::	
 	
1.//	
 	
 	
 D #74==4@@@IAq#qv|||,,,ry~~ry4D'E'EFFFFFr   c                  ,   t          j        t                    5  t          t                    } t          t
                    }t          t          |           t          |          t          |           g           d d d            d S # 1 swxY w Y   d S r   )rc   rd   re   r   r5   rx   r	   r@   )r   invalid_paths     r   test_load_invalid_file2r      s    	z	"	" Q Q28<<	5kBBS^^S->->IOPPPQ Q Q Q Q Q Q Q Q Q Q Q Q Q Q Q Q Qs   A"B		BBc                      t          j        t                    5  t          d           d d d            d S # 1 swxY w Y   d S )NgzG?)rc   rd   	TypeErrorr   rP   r   r   test_not_a_filenamer      s     
y	!	! ! !4   ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! !   7;;c                      t          j        t                    5  t          d           d d d            d S # 1 swxY w Y   d S )Nztrou pic nic douille)rc   rd   OSErrorr   rP   r   r   test_invalid_filenamer      s    	w		 3 312223 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3r   csr_containerc                    t          t                    \  }}|                                } | t          j        |                    }|t          j        |j        d                            }|t          j        |j        d                            }|||fD ]8}|||fD ].}dD ]'}	t          j        t          j        t          j	        t          j
        fD ]}
t                      }t          j        |          r|j        d         dk    r|j        }|                    |
          }t!          |||d|	           |                    d           |                                }t'          |d          }dt(          j        z  |v sJ |                                }t'          |d          }dd	g|	         d
z   |v sJ t-          ||
|	          \  }}|j        |
k    sJ t1          |                                j        |j                   |                                }t          j        |          r|                                }n|}|
t          j        k    r9t7          ||d           t7          |                    |
d          |d           t7          ||d           t7          |                    |
d          |d           )0:d S )Nr   )TFr!   testcommentr   utf-8zscikit-learn %sonezeroz-based)rT   r   r4   F)copyr'   )r   r5   rW   rU   
atleast_2dr   r7   rV   rX   int32int64r   spissparseTastyper   seekreadliner@   sklearn__version__r   rT   r   sorted_indicesindicesr   )r   X_sparsey_denseX_densey_sparseX_slicedy_slicedr9   r:   r   rT   r   X_inputr   rH   rI   X2_denseX_input_denses                     r   	test_dumpr     s   6x@@Hg  G}R]73344H 	(."3445H	(."3445H* 8 8GX. 7	 7	A+ 6 6
 j"*bhI 5 5E		A
 {1~~  !'!*// C  hhuooG&Av*    FF1IIIjjllG!'733G,w/BBgMMMMjjllG!'733G!6?:6AWLLLL/:VVVFB8u,,,,&r'8'8':':'BBJOOO!zz||H{7++ 0(/(9(9(/
**1-1MMM1#NN5uN==r1   
 2-2NNN1#NN5uN==r2   g567	8 8r   c                 ^   g dg dg dg}g dg dg dg} | |          }||fD ]}t                      }t          |||d           |                    d	           |                                d
k    sJ |                                dk    sJ |                                dk    sJ d S )N)r!   r   r0   r   r)   r   r   r   r   r   )r   r)   r   r!   r   r   r!   r   )r!   r   r!   )r!   r!   r   TrN   r   s   1 0:1 2:3 4:5
s   0,2 
s   0,1 1:5 3:1
)r   r   r   r   )r   r9   r   r   r:   r   s         r   test_dump_multilabelr   H  s    	///???;Ayy)))YYY/G}W%%Hx  0 0II1at4444	q			zz||11111zz||y((((zz||//////0 0r   c                  n   d} d}d}d}d}| ||||gg dg dg dg dg}| ||||g}t                      }t          |||           |                    d           |                                d	k    sJ |                                d
k    sJ |                                dk    sJ |                                dk    sJ |                                dk    sJ |                    d           t	          |          \  }}	t          ||                                           t          ||	           d S )Nr!   g @gGz@g     ?r*   )g    eAg NgmCgkcEr   r   r   r   s+   1 0:1 1:2.1 2:3.01 3:1.000000000000001 4:1
s!   2.1 0:1000000000 1:2e+18 2:3e+27
s   3.01 
s   1.000000000000001 
s   1 
)r   r   r   r   r   r   rW   )
r   twothreeexactalmostr9   r:   r   rH   rI   s
             r   test_dump_conciser   W  sX   
C
CEEF	c5%(	A 
c5%(A		Aq!QFF1III::<<JJJJJ::<<@@@@@::<<:%%%%::<<22222::<<7""""FF1III""FBa...a$$$$$r   c                     t          t                    \  } }|                                 } t                      }d}t	          | |||d           |                    d           t          |d          \  }}t          | |                                           t          ||           d}t                      }t          j	        t                    5  t	          | |||           d d d            n# 1 swxY w Y   |                    d          }t                      }t	          | |||d           |                    d           t          |d          \  }}t          | |                                           t          ||           t                      }t          j	        t                    5  t	          | ||d	           d d d            d S # 1 swxY w Y   d S )
Nz*This is a comment
spanning multiple lines.Fr   r   r   s   It is true that
½² = ¼)r   r   zI've got a  .)r   r5   rW   r   r   r   r   r   rc   rd   UnicodeDecodeErrordecodere   )r9   r:   r   ascii_commentrH   rI   utf8_commentunicode_comments           r   test_dump_commentr   v  s;   )(33DAq			A		AAMq!Q%HHHHFF1IIIe444FBa...a$$$ CL		A	)	*	* : :1aL9999: : : : : : : : : : : : : : : #))'22O		Aq!QEJJJJFF1IIIe444FBa...a$$$		A	z	"	" > >1a,<====> > > > > > > > > > > > > > > > > >s$   C44C8;C88GG Gc                     t          t                    \  } }t                      }|g}t          j        t
                    5  t          | ||           d d d            n# 1 swxY w Y   t                      }t          j        t
                    5  t          | |d d         |           d d d            d S # 1 swxY w Y   d S )N)r   r5   r   rc   rd   re   r   )r9   r:   r   y2ds       r   test_dump_invalidr     s3   )(33DAq		A#C	z	"	" & &1c1%%%& & & & & & & & & & & & & & & 			A	z	"	" ) )1afa((() ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) )s$   A  A$'A$B99B= B=c                     t          t                    \  } }|                                 } t          j        | j        d                   dz  }t                      }t          | |||d           |                    d           t          |dd          \  }}}t          | |                                           t          ||           t          ||           d S )Nr   r$   Tr   r   )r   r5   rW   rU   r   r7   r   r   r   r   r   )r9   r:   r   r   rE   rF   	query_id1s          r   test_dump_query_idr     s    )(33DAq			Ay$$)H		Aq!QdCCCCFF1III*1tMMMBIa...a$$$h	22222r   c                     d} t          t          |           d          \  }}}g dg dg dg dg}g d}g d}t          ||           t          |                                |           t          ||           t                      }t	          ||||d           |                    d	           t          |dd          \  }}}t          ||           t          |                                |           t          ||           |                    d	           t          |d
d          \  }}t          ||           t          |                                |           d S )Ns   
    1 qid:0 0:1 1:2 2:3
    0 qid:72048431380967004 0:1440446648 1:72048431380967004 2:236784985
    0 qid:-9223372036854775807 0:1440446648 1:72048431380967004 2:236784985
    3 qid:9223372036854775807  0:1440446648 1:72048431380967004 2:236784985Tr   )r!   r$   r0   )ixU   \.,N^iY)r!   r   r   r0   )r   r   l l    r   r   F)r   r   r   rW   r   r   )rC   r9   r:   r   true_Xtrue_ytrueQIDr   s           r   test_load_with_long_qidr     s{   OD
 #74==4@@@IAq# 			222222222	F \\FOOOGq&!!!qyy{{F+++sG$$$		Aq!Q>>>>FF1III"1tEEEIAq#q&!!!qyy{{F+++sG$$$FF1IIIa%DAAADAqq&!!!qyy{{F+++++r   c                    t                      } | t          j        d                    }t          j        g d          }t	          |||           dD ]p}|                    d           t          |d|          \  }}t          ||           t          |                                |                                           qd S )N)r0   r4   r7   r   )r   TFr   r4   )rb   r   )	r   rU   zerosarrayr   r   r   r   rW   )r   r   r   r   r   r9   r:   s          r   test_load_zerosr     s    		A]28&11122FXiii  Fvvq)))+ A A
	q			!!jIII1!!V,,,!!))++v~~/?/?@@@@	A Ar   sparsity)r   r   g      ?gGz?r!   	n_samples   e   rb   )r$   r   )   c                    t           j                            d          }|                    dd||f          }| r	d||| k     <    ||          }|                    dd|          }t                      }t          |||           |                    d           t          |	                                          }d}	|dz  }
|
|	z
  }d|z  dz  }||
z
  }t          |||	|	          \  }}t          |||
|	          \  }}t          |||
          \  }}t          j        |||g          }t          j        |||g          }t          ||           t          |                                |                                           d S )Nr           r*   lowhighsizer$   r0   r4   r)   )rb   offsetlength)rb   r   )rU   randomRandomStateuniformrandintr   r   r   lengetvaluer   concatenater   vstackr   rW   )r   r   rb   r   rngr9   r:   r   r   mark_0mark_1length_0mark_2length_1X_0y_0X_1y_1X_2y_2y_concatX_concats                         r   test_load_with_offsetsr    s   
 )


"
"C#Y
,CDDA !h,aA	22A		Aq!QFF1IIIqzz||D FQYFHX]FH "	j  HC "	j  HC "!
6JJJHC~sCo..Hy#sC))Ha***aiikk8+;+;+=+=>>>>>r   c           
         t           j                            d          }t          j        g dg dg dg dg dg dg dg          } | |          }|j        \  }}|                    dd|          }t          j        |          dz  }t                      }t          ||||	           |	                    d           t          |                                          }t          |          D ]}	|	                    d           t          ||d
d|	          \  }
}}t          ||d
|	d          \  }}}t          j        ||g          }t          j        ||g          }t          j        |
|g          }t#          ||           t%          ||           t#          |                                |                                           d S )Nr   )r   r   r   r   r   r   )r!   r$   r0   r4   r   r    )r!   r   r0   r   r   r   )r   r   r   r   r   r!   )r!   r   r   r   r   r   r$   r   r   T)rb   r   r   r   r   )rU   r   r   r   r7   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rW   )r   r  r9   r   rb   r:   r   r   r   markr  r  q_0r	  r
  q_1q_concatr  r  s                      r   "test_load_offset_exhaustive_splitsr    s   
)


"
"C
	

	 
	A 	aAGIz	22Ay##q(H		Aq!Q2222FF1IIIqzz||D d C C	q			**tAd
 
 
S# +*tD
 
 
S# >3*-->3*--9c3Z((!!X...8X...!!))++x/?/?/A/ABBBBC Cr   c                      t          j        t          d          5  t          t          dd           d d d            d S # 1 swxY w Y   d S )Nzn_features is required)matchr0   )r   r   )rc   rd   re   r   r5   rP   r   r   test_load_with_offsets_errorr  8  s    	z)A	B	B	B E E&x!DDDDE E E E E E E E E E E E E E E E E Es   AAAc                    t          | dz            }t          j                            d          }|                    dd                              t          j                  }t          j        g d          }t          j        g d          }t          j        g d          } ||||fd	          }t          |||d
           t          |d
          \  }	}
g d}|
|k    sJ dS )z
    Ensure that if y contains explicit zeros (i.e. elements of y.data equal to
    0) then those explicit zeros are not encoded.
    svm_explicit_zero*   r0   r)   )r   r$   r0   r    )r   r$   r$   r   r!   r$   )r   r!   r!   r!   r!   r   )r0   r0   r   TrN   )g       @r  )r   r*   N)
r@   rU   r   r   randnr   rX   r   r   r   )tmp_pathr   	save_pathr  r9   r6   r   rC   r:   _y_loady_trues               r    test_multilabel_y_explicit_zerosr#  =  s     H2233I
)


#
#C		!Qrz**AXlll##Fh)))**G8&&&''DtWf-V<<<A q!Y48888"9>>>IAv)))FVr   c                    t           j                            d          }|                    dd          }|                    d          }t	          ||g          \  }}t          | dz            }t          |||           dS )zEnsure that there is no ValueError when dumping a read-only `X`.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/28026
    r  r)   r$   svm_read_onlyN)rU   r   r   r  r   r@   r   )r  r  r9   r:   r  s        r   test_dump_read_onlyr&  W  sz     )


#
#C		!QA		!A %aV,,DAqH.//Iq!Y'''''r   )Drk   rA   rm   bz2r   	importlibr   ior   tempfiler   numpyrU   rc   scipy.sparsesparser   r   sklearn.datasetsr   r   r	   sklearn.utils._testingr
   r   r   r   sklearn.utils.fixesr   r   r5   rQ   rx   r|   r   r   r>   rJ   rL   rR   r_   rf   rv   ry   r}   r   r   r   r  skipr   r   r   r   parametrizer   r   r   r   r   r   r   r   r  r  r  r#  r&  rP   r   r   <module>r3     s]    				                    ' ' ' ' ' '             X X X X X X X X X X            / . . . . .0 (%	$+8 8 8/ / / .  .  .F  &  + + +
" " " @ @ @"& & &84 4 4
5 5 5
0 0 0  S S S" V G G GQ Q Q! ! !3 3 3
 .99B B :9BJ .990 0 :90% % %>> > >B
) 
) 
)3 3 3!, !, !,H .99
A 
A :9
A %;%;%;<<r3i00zzz22.99!? !? :9 32 10 =<!?H .99'C 'C :9'CTE E E
 .99  :92( ( ( ( (r   