
    PiM              	          d dl Zd dlZd dlmZ d dlmZ d dlmZ d dl	m
Z
mZmZmZ d dlmZ d dlmZmZ d dlmZ d d	lmZ d d
lmZmZ d dlmZmZmZ d dlmZ d dl m!Z! d dl"m#Z#m$Z$m%Z%m&Z& d dl'm(Z( ej)        *                    dddg          ej)        *                    dddg          ej)        *                    dddg          d                                     Z+ej)        *                    dddg          ej)        *                    dddg          d                         Z,ej)        *                    dddg          ej)        *                    dddg          d0d                        Z-d Z.d Z/d Z0ej)        *                    dddg          d              Z1ej)        *                    dg d!          ej)        *                    dddg          d"                         Z2ej)        *                    d#d$d%g          d&             Z3d' Z4d( Z5ej)        *                    d)d*d+gd*d+gfd*d+g ed*           ed+          d,fd*d+gd- fg          d.             Z6d/ Z7dS )1    N)parallel_backend)assert_allclose)ColumnTransformer)load_diabetes	load_irismake_classificationmake_regression)DummyClassifier)RandomForestClassifierRandomForestRegressor)SimpleImputer)permutation_importance)LinearRegressionLogisticRegression)
get_scorermean_squared_errorr2_score)train_test_split)make_pipeline)KBinsDiscretizerOneHotEncoderStandardScalerscale)_convert_containern_jobs      max_samples      ?      ?sample_weightonesc           
      b   t           j                            d          }d}t          d          \  }}||                    d|j        d                   z                       dd	          }t          j        ||g          }|d
k    rt          j        |          n|}t          dd          }	|	
                    ||           t          |	|||||| |          }
|
j        j        |j        d	         |fk    sJ t          j        |
j        d         |
j        d d         k              sJ d S )N*      T)
return_X_yMbP?r   r   sizer   r"   
   n_estimatorsrandom_state)r!   	n_repeatsr.   r   r   )nprandomRandomStater   normalshapereshapehstack	ones_liker   fitr   importancesallimportances_mean)r   r   r!   rngr/   Xyy_with_little_noiseweightsclfresults              /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/sklearn/inspection/tests/test_permutation_importance.py9test_permutation_importance_correlated_feature_regressionrD      s?    )


#
#CID)))DAqszzAGAJzGGGPPQSUVWW
	1)*++A!.&!8!8bl1ooomG
Rb
A
A
ACGGAqMMM#			 	 	F #
I'>>>>> 6&)"-0G0LLMMMMMMM    c           	      x   t          j        d          }t          j                            d          }d}t                      }|j        |j        }}||                    d|j	        d                   z   
                    dd          }|                    ||j        	          }||d
<   t          dd          }	|	                    ||           t          |	||||| |          }
|
j        j	        |j	        d         |fk    sJ t          j        |
j        d         |
j        d d         k              sJ d S )Npandasr$   r%   r'   r   r(   r*   r   )columnscorrelated_featurer+   r,   r/   r.   r   r   )pytestimportorskipr0   r1   r2   r   datatargetr3   r4   r5   	DataFramefeature_namesr   r8   r   r9   r:   r;   )r   r   pdr<   r/   datasetr=   r>   r?   rA   rB   s              rC   @test_permutation_importance_correlated_feature_regression_pandasrS   C   sE   
 
	X	&	&B )


#
#CIkkG<qAszzAGAJzGGGPPQSUVWW 	Q 566A1A
 br
B
B
BCGGAqMMM#		  F #
I'>>>>> 6&)"-0G0LLMMMMMMMrE   r$   c           	      6   t           j                            |          }d}d}d}d}d}||z   }	t          j        |          }
|                    |
|          t          j        fd|
d |         D                       }|                    t           j                  }||k     sJ t          j        ||	                    ||          gd          }|j
        ||	fk    sJ t          |d|	          \  }}}}t          d|
          }|                    ||           |j        }|d |         }||d          }|                                |                                k     sJ t#          |||||| |          }|j        j
        |j
        d         |fk    sJ |j        d |         }|j        |d          }t          t          j        |                    dk    sJ |                                dk     sJ |                                dk    sJ d S )Nr%     r   r   )r)   c                 D    g | ]}|k                         d d          S )r*   r   )r5   ).0cr>   s     rC   
<listcomp>zEtest_robustness_to_high_cardinality_noisy_feature.<locals>.<listcomp>~   s-    UUUqAF##B**UUUrE   )axisr   )	test_sizer.   r,   rJ   gHz>g?g333333?)r0   r1   r2   arangechoicer6   astypefloat32concatenaterandnr4   r   r   r8   feature_importances_maxminr   r9   r;   abs)r   r   seedr<   r/   	n_samples	n_classesn_informative_featuresn_noise_features
n_featuresclassesr=   X_trainX_testy_trainy_testrA   tree_importancesinformative_tree_importancesnoisy_tree_importancesrinformative_importancesnoisy_importancesr>   s                          @rC   1test_robustness_to_high_cardinality_noisy_featurerw   k   s    )


%
%CIII'*::J
 i	""G

7
++A
	UUUUG<S=S<S4TUUUVVA	A "I---- 	399Y0@AABKKKA7y*-----
 (8	1#( ( ($GVWf !ac
B
B
BCGGGW
 /#34K5K4K#L -.D.E.EF'++--0F0J0J0L0LLLLL 		 	 	A =171:y"99999  01H2H1HI*+A+B+BC rv'(())D0000  ""T)))) #&&((4//////rE   c                  ^   t           j                            d          } d}t          j        dddt           j        gg dg          j        }t          j        g d          }t          t                      t          d	                    }|	                    ||           t          ||||| 
          }|j        j        |j        d         |fk    sJ t          j        |j        d         |j        d d         k              sJ t           j                            d          } t          ||||| 
          }|j        j        |j        d         |fk    sJ t          j        |j        |j                  rJ t          j        |j        d         |j        d d         k              sJ d S )Nr$      r           @      @)r   r   r   r   r   r   r   r   lbfgssolverr/   r.   r   r*   r   )r0   r1   r2   arraynanTr   r   r   r8   r   r9   r4   r:   r;   allclose)r<   r/   r=   r>   rA   rB   result2s          rC   'test_permutation_importance_mixed_typesr      s   
)


#
#CI 	3S"&)<<<899;A
A
);7)K)K)K
L
LCGGAqMMM#CAQTUUUF#
I'>>>>> 6&)"-0G0LLMMMMM )


"
"C$S!Q)RUVVVG$Y(?????{6-w/BCCCCC 6'*2.1I#2#1NNOOOOOOOrE   c                     t          j        d          } t          j                            d          }d}|                     dddt          j        gg dd          }t          j        g d	          }t          t                      t                                }t          d
|dgfdt                      dgfg          }t          |t          d                    }|                    ||           t          |||||          }|j        j        |j        d         |fk    sJ t          j        |j        d         |j        d d         k              sJ d S )NrG   r$   r%   r    rz   r{   )abr   r   )col1col2r|   numr   catr   r}   r~   r   r   r*   )rK   rL   r0   r1   r2   rO   r   r   r   r   r   r   r   r   r8   r   r9   r4   r:   r;   )	rQ   r<   r/   r=   r>   num_preprocess
preprocessrA   rB   s	            rC   .test_permutation_importance_mixed_types_pandasr      sR   		X	&	&B
)


#
#CI 	sCbf5?S?S?STTUUA
A"=??N4D4DEEN"
&	*UMOOfX,NO J 
$6g$F$F$F
G
GCGGAqMMM#CAQTUUUF#
I'>>>>> 6&)"-0G0LLMMMMMMMrE   c                  "   t          ddd          \  } }t          |           } t          |          }t                                          | |          }d|j        dz  z  }t          || |dd          }t          ||j        d	d
           d S )N  r+   r   rg   rk   r.   r   2   neg_mean_squared_error)r/   scoringg?gư>)rtolatol)r	   r   r   r8   coef_r   r   r;   )r=   r>   lrexpected_importancesresultss        rC   .test_permutation_importance_linear_regresssionr      s    SRaHHHDAqaAaA					1	%	%B rx{?$
AqB(@  G g6T     rE   r   c           	      0   t          ddd          \  }}t                                          ||          }t          |||ddd|           }|d                                         }|d                                         }||z
  d	k    sJ t          |||ddd
          }t          |d         |d                    t          d          5  t          |||ddd
          }d d d            n# 1 swxY w Y   t          |d         |d                    d S )Nr   r+   r   r   r%   r   rJ   r9   333333?r   )r/   r.   r   	threading)r	   r   r8   r   rd   rc   r   r   )	r   r=   r>   r   importance_sequentialimp_minimp_maximportance_processesimportance_threadings	            rC   ;test_permutation_importance_equivalence_sequential_parallelr     s   
 SRaHHHDAq					1	%	%B2
AqAAa[   $M26688G#M26688GWs"""" 2
AqAAa   ]+-B=-Q  
 
+	&	& 
 
51! 
  
  

 
 
 
 
 
 
 
 
 
 
 
 
 
 
 ]+-B=-Q    s   C//C36C3)Nr   r   c           	         t          j        d          }t          ddd          \  }}|                    |          }t	          ddd	          }|                    |                    d
d                    }t          j        ||g          }|j	        j
        dk    sJ |                    |                                          }t          |j                  }|||<   ||         j	        |j	        k    sJ t          j        t          |                                        t"                    |_        t'          ddd          }	|	                    ||           d}
t+          |	|||
d| |          }|d                                         }|d                                         }||z
  dk    sJ t+          |	|||
d| |          }t1          |d         |d                    d S )NrG   d   r%   r   r      ordinalaveraged_inverted_cdf)n_binsencodequantile_methodr*   r   f)r-   	max_depthr.   rJ   r9   r   )rK   rL   r	   rO   r   fit_transformr5   r0   r6   dtypekindCategoricalravellenrH   r\   r^   strindexr   r8   r   rd   rc   r   )r   r   rQ   r=   r>   X_dfbinner
cat_columnnew_col_idxrfr/   importance_arrayr   r   importance_dataframes                  rC   7test_permutation_importance_equivalence_array_dataframer   -  s   
 
	X	&	&B SQQGGGDAq<<??D /  F
 %%aiiA&6&677J 		1j/""A7<3 
 0 0 2 233Jdl##K"D"j&66666 3t99%%,,S11DJ	A	K	K	KBFF1aLLLI-
		   }-1133G}-1133GWs"""" 2
	   ')=m)L    rE   
input_typer   	dataframec                 V   t          d          d}}t          ||d          \  }}|j        dk    sJ t          ||           }t	          d                              ||          }d}t          ||||d	
          }t          j        ||f          }t          ||j
                   d S )Ng     j@ry   r   r   g    .Aprior)strategyr%   r   )r/   r   )intr   nbytesr   r
   r8   r   r0   zerosr   r9   )	r   rg   rk   r=   r>   rA   r/   rt   r   s	            rC   /test_permutation_importance_large_memmaped_datar   s  s      HHazI
  DAq 8c>>>>1j))A
7
+
+
+
/
/1
5
5C IsAqIaHHHA 8Z$;<<(!-88888rE   c            	         t           j                            d          } d}d}|dz  }|                     dd||f          }t          j        |          }d|d |df         z  |d |df         z   |d |<   ||d df         d||d df         z  z   ||d <   t          d          }|                    ||           t          |||dd	d
          }|j        d         |j        d         z  }|t          j
        dd          k    sJ t          j        |          }	t          |||dd	d
|	          }|j        d         |j        d         z  }
|
t          j
        |d          k    sJ t          j        t          j        d|          t          j        d|          g          }	|                    |||	           t          |||dd	d
|	          }|j        d         |j        d         z  }||z  t          j
        dd          k    sJ d S )Nr   rU   r   g        r'   r   F)fit_interceptneg_mean_absolute_error   r.   r   r/   g{Gz?r.   r   r/   r!   g    _Br    )r0   r1   r2   r3   r   r   r8   r   r;   rK   approxr"   r6   repeat)r<   rg   rk   n_half_samplesxr>   r   pix1_x2_imp_ratio_w_nonewx1_x2_imp_ratio_w_onesx1_x2_imp_ratio_ws               rC   )test_permutation_importance_sample_weightr     sc   
 )


"
"CIJ!^N

3	:677A
AQ122Q7I5JJAo~o>??A-.Q~7I5J1JJAnoo 
	.	.	.BFF1aLLL
 
 
Aqq*Cs
 
 
B  03b6I!6LL!V]1d%;%;;;;; 		A	
		)
 
 
B  03b6I!6LL!V]3I4%P%PPPPP 		29X~66	#~8V8VWXXAFF1aOOO	
		)
 
 
B +A.1DQ1GG55q$9O9OOOOOOOrE   c            
         d } t          j        ddgddgg          }t          j        ddg          }t          j        ddg          }t                      }|                    ||           	 t	          |||d| d           n$# t
          $ r t          j        d           Y nw xY wt          j        t
                    5  t	          |||d| d|           d d d            d S # 1 swxY w Y   d S )	Nc                     dS )Nr    	estimatorr=   r>   s      rC   	my_scorerzJtest_permutation_importance_no_weights_scoring_function.<locals>.my_scorer  s    qrE   r   r   r   ry   r   zpermutation_test raised an error when using a scorer function that does not accept sample_weight even though sample_weight was Noner   )	r0   r   r   r8   r   	TypeErrorrK   failraises)r   r   r>   r   r   s        rC   7test_permutation_importance_no_weights_scoring_functionr     sp      	1a&1a&!""A
!QA
!QA			BFF1aLLL
r1aaVWXXXXX 
 
 
%	
 	
 	
 	
 	

 
y	!	! 
 
11i1TU	
 	
 	
 	

 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
s$   /B B&%B&C&&C*-C*z list_single_scorer, multi_scorerr2r   r   r   c                     t          ||                     |                    t          ||                     |                     dS )Nr   )r   predictr   r   s      rC   <lambda>r     sH    q)"3"3A"6"677+=aARARSTAUAU+V+V*V% % rE   c           	      ~   t          ddd          \  }}t                                          ||          }t          |||d|d          }t	          |                                          t	          |           k    sJ | D ]9}||         }t          |||d|d          }t          |j        |j                   :d S )Nr   r+   r   r   r   r   r   )r	   r   r8   r   setkeysr   r9   )	list_single_scorermulti_scorerr   r>   r   multi_importancescorermulti_resultsingle_results	            rC   (test_permutation_importance_multi_metricr     s    0 SRaHHHDAq					1	%	%B-
Aqq,!   $$&&''3/A+B+BBBBB$ M M'/.11f
 
 
 	0-2KLLLLM MrE   c                  @   t          j        dg          j        } t          j        g d          }t                      }|                    | |           d}t          j        t          |          5  t          || |d           ddd           dS # 1 swxY w Y   dS )zjCheck that a proper error message is raised when `max_samples` is not
    set to a valid input value.
    )r    rz   r{   g      @r|   z max_samples must be <= n_samples)matchr%   )r   N)	r0   r   r   r   r8   rK   r   
ValueErrorr   )r=   r>   rA   err_msgs       rC   -test_permutation_importance_max_samples_errorr     s     	&'((*A
A


CGGAqMMM1G	z	1	1	1 9 9sAqa88889 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9s   2BBB)r$   )8numpyr0   rK   joblibr   numpy.testingr   sklearn.composer   sklearn.datasetsr   r   r   r	   sklearn.dummyr
   sklearn.ensembler   r   sklearn.imputer   sklearn.inspectionr   sklearn.linear_modelr   r   sklearn.metricsr   r   r   sklearn.model_selectionr   sklearn.pipeliner   sklearn.preprocessingr   r   r   r   sklearn.utils._testingr   markparametrizerD   rS   rw   r   r   r   r   r   r   r   r   r   r   r   rE   rC   <module>r	     su        # # # # # # ) ) ) ) ) ) - - - - - -            * ) ) ) ) ) J J J J J J J J ( ( ( ( ( ( 5 5 5 5 5 5 E E E E E E E E         
 5 4 4 4 4 4 * * * * * * X X X X X X X X X X X X 5 5 5 5 5 5 Aq6**c
334.99 N  N :9 43 +* NF Aq6**c
33#N #N 43 +*#NL Aq6**c
33L0 L0 L0 43 +*L0^P P P<N N N0  $ c
33% % 43%P <<<00c
33A A 43 10AH '=>>9 9 ?>9,:P :P :Pz
 
 
> &
(	)D2J+KL+, j&&*4*5M*N*N 	
 +, 	
 (M M) (M*9 9 9 9 9rE   