
    &`i)                        d dl Z d dlmZmZmZ d dlZd dlZd dl	Z	d dl
mZ d dlmZ d dlmZ d dlmZmZ d dlmZmZmZ d dlmZ ed	ej        d
efd            Zed	ej        d
efd            Z	 	 dd	ej        deeef         dedefdZe	j         de	j!        j        d
edeej        egdf         dedeeef         f
d            Z"e G d de                      Z#dS )    N)AnyCallableDict)Dataset)OfflineEvaluator)Policy)SampleBatch convert_ma_batch_to_sample_batch)DeveloperAPIExperimentalAPIoverride)SampleBatchTypebatchindexc                 |    t           j                            | j        d                   }| ||f         | d d |f<   d S )Nr   )nprandompermutationshape)r   r   random_indss      x/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/rllib/offline/feature_importance.py_perturb_fnr      s>     )''A77KK./E!!!U(OOO    c                     t          j        | d         j                  }t          ||           t	          |          | d<   | S )Nobsperturbed_obs)r   vstackvaluesr   list)r   r   	obs_batchs      r   _perturb_dfr!      s=    	%,-..I	5!!!!)__E/Lr    policy_state	input_key
output_keyc                    |st           j        }t          j        |          }t          t           j        t	          j        | |         j                  i          }|                    |d          \  }}}|sd}|| |<   | S )a  A custom local function to do batch prediction of a policy.

    Given the policy state the action predictions are computed as a function of
    `input_key` and stored in the `output_key` column.

    Args:
        batch: A sub-batch from the dataset.
        policy_state: The state of the policy to use for the prediction.
        input_key: The key to use for the input to the policy. If not given, the
            default is SampleBatch.OBS.
        output_key: The key to use for the output of the policy. If not given, the
            default is "predicted_actions".

    Returns:
        The modified batch with the predicted actions added as a column.
    Fexplorepredicted_actions)r	   OBSr   
from_stater   r   r   compute_actions_from_input_dict)r   r#   r$   r%   policysample_batchactions_s           r   _compute_actionsr1      s    ,  $O	|,,FORYuY'7'>??	
 L
 ::<QV:WWMGQ )(
E*Lr   dataset
perturb_fn
batch_sizec          	          |                      ||dd|i          }|                     t          |ddd|d          }d }|                     ||d          }|S )	a  A remote function to compute the feature importance of a given index.

    Args:
        dataset: The dataset to use for the computation. The dataset should have `obs`
            and `actions` columns. Each record should be flat d-dimensional array.
        index: The index of the feature to compute the importance for.
        perturb_fn: The function to use for perturbing the dataset at the given index.
        batch_size: The batch size to use for the computation.
        policy_state: The state of the policy to use for the computation.

    Returns:
        The modified dataset that contains a `delta` column which is the absolute
        difference between the expected output and the output due to the perturbation.
    pandasr   )r4   batch_format	fn_kwargsperturbed_actionsr   )r%   r$   r#   c                 R    t          j        | d         | d         z
            | d<   | S )Nref_actionsr9   delta)r   abs)r   s    r   delta_fnz1get_feature_importance_on_index.<locals>.delta_fno   s,     m 4u=P7Q QRRgr   )r4   r7   )map_batchesr1   )	r2   r   r3   r4   r#   perturbed_dsr9   r>   r<   s	            r   get_feature_importance_on_indexrA   G   s    . &&E"	 '  L %00-((
 
	 1 	 	   ))Zh *  E Lr   c                        e Zd Z ee          ddefdededede	e
j        ege
j        f         f fd            Zded	eeef         fd
Z ee          dddeded	eeef         fd            Z xZS )FeatureImportance   g      ?r-   repeatlimit_fractionr3   c                 t    t                                          |           || _        || _        || _        dS )aD  Feature importance in a model inspection technique that can be used for any
        fitted predictor when the data is tablular.

        This implementation is also known as permutation importance that is defined to
        be the variation of the model's prediction when a single feature value is
        randomly shuffled. In RLlib it is implemented as a custom OffPolicyEstimator
        which is used to evaluate RLlib policies without performing environment
        interactions.

        Example usage: In the example below the feature importance module is used to
        evaluate the policy and the each feature's importance is computed after each
        training iteration. The permutation are repeated `self.repeat` times and the
        results are averages across repeats.

        ```python
            config = (
                AlgorithmConfig()
                .offline_data(
                    off_policy_estimation_methods=
                        {
                            "feature_importance": {
                                "type": FeatureImportance,
                                "repeat": 10,
                                "limit_fraction": 0.1,
                            }
                        }
                )
            )

            algorithm = DQN(config=config)
            results = algorithm.train()
        ```

        Args:
            policy: the policy to use for feature importance.
            repeat: number of times to repeat the perturbation.
            perturb_fn: function to perturb the features. By default reshuffle the
                features within the batch.
            limit_fraction: fraction of the dataset to use for feature importance
                This is only used in estimate_on_dataset when the dataset is too large
                to compute feature importance on.
        N)super__init__rE   r3   rF   )selfr-   rE   rF   r3   	__class__s        r   rI   zFeatureImportance.__init__~   s:    d 	   $,r   r   returnc                 x   t          |          }|d         }|j        d         }t          j        | j        |f          | j                            |d          \  }}}t          | j                  D ]}t          |          D ]u}t          j	        |          }t          ||           | j                            |d          \  }	}}t          j        t          j        |	|z
                      ||f<   v                    d          fdt          t                              D             }
|
S )a)  Estimate the feature importance of the policy.

        Given a batch of tabular observations, the importance of each feature is
        computed by perturbing each feature and computing the difference between the
        perturbed policy and the reference policy. The importance is computed for each
        feature and each perturbation is repeated `self.repeat` times.

        Args:
            batch: the batch of data to use for feature importance.

        Returns:
            A dict mapping each feature index string to its importance.
        r   Fr'   )r   r   c                 (    i | ]}d | |         S feature_ .0i
importances     r   
<dictcomp>z.FeatureImportance.estimate.<locals>.<dictcomp>   %    QQQQ>a>>:a=QQQr   )r
   r   r   zerosrE   r-   compute_actionsrangecopydeepcopyr   meanr=   len)rJ   r   r    
n_featuresr;   r0   rrU   copy_obs_batchr9   metricsrV   s              @r   estimatezFeatureImportance.estimate   sP    177%L	_R(
Xt{J788
 K77	57QQQt{## 	T 	TA:&& T T!%y!9!9N!4444*.+*E*E"E +F + +'!1a $&7262Ck2Q+R+R#S#S
1a4  T  __Q''
QQQQ%J:P:PQQQr   .)n_parallelismr2   re   c                     j                                         |                    t           j        |                                z                      }t          d|                                |z            }|                    t          |dd          }|	                    d          d         t          j                 j        d         }t          j         j        |f          t!           j                  D ]}|                                t          d                                |z  |z             fdt!          |          D             }t%          j        |          }	t          j        d |	D                       |<                       d          fd	t!          t-                              D             }
|
S )
at  Estimate the feature importance of the policy given a dataset.

        For each feature in the dataset, the importance is computed by applying
        perturbations to each feature and computing the difference between the
        perturbed prediction and the reference prediction. The importance
        computation for each feature and each perturbation is repeated `self.repeat`
        times. If dataset is large the user can initialize the estimator with a
        `limit_fraction` to limit the dataset to a fraction of the original dataset.

        The dataset should include a column named `obs` where each row is a vector of D
        dimensions. The importance is computed for each dimension of the vector.

        Note (Implementation detail): The computation across features are distributed
        with ray workers since each feature is independent of each other.

        Args:
            dataset: the dataset to use for feature importance.
            n_parallelism: number of parallel workers to use for feature importance.

        Returns:
            A dict mapping each feature index string to its importance.
        rD   r;   )r%   r#   )r4   r8   r   rN   c           	      X    g | ]&}t                               |j                   'S ))r2   r   r3   bsizer#   )rA   remoter3   )rT   rU   bsize_per_taskr#   rJ   shuffled_dss     r   
<listcomp>z9FeatureImportance.estimate_on_dataset.<locals>.<listcomp>  sQ     	 	 	  066'#(!- 7  	 	 	r   c                 8    g | ]}|                     d           S )r<   )r^   )rT   ds     r   rl   z9FeatureImportance.estimate_on_dataset.<locals>.<listcomp>  s"    %N%N%N!affWoo%N%N%Nr   c                 (    i | ]}d | |         S rP   rR   rS   s     r   rW   z9FeatureImportance.estimate_on_dataset.<locals>.<dictcomp>  rX   r   )r-   	get_statelimitintrF   countmaxr?   r1   taker	   r*   r   r   rY   rE   r[   random_shuffleraygetarrayr^   r_   )rJ   r2   re   dsrh   
actions_dsr`   ra   
remote_fnsds_w_fi_scoresrc   rj   rV   r#   rk   s   `          @@@@r   estimate_on_datasetz%FeatureImportance.estimate_on_dataset   s   6 {,,..]]3t2W]]__DEEFF Arxxzz]233^^+ ,  $ 
 

 WWQZZ];?39"=
Xt{J788
t{## 	P 	PA$3355K [%6%6%8%8:%E-$WXXN	 	 	 	 	 	 	 z**	 	 	J !WZ00NH%N%N~%N%N%NOOJqMM__Q''
QQQQ%J:P:PQQQr   )__name__
__module____qualname__r   r   r!   r   rr   floatr   pd	DataFramerI   r   r   strr   rd   r   r~   __classcell__)rK   s   @r   rC   rC   |   s.       X  #BM4- 4-4- 4- 	4-
 blC0",>?4- 4- 4- 4- 4-  4-l!o !$sCx. ! ! ! !F X8;B B BB25B	c3hB B B  B B B B Br   rC   )r"   r"   )$r\   typingr   r   r   numpyr   r6   r   rw   ray.datar   #ray.rllib.offline.offline_evaluatorr   ray.rllib.policyr   ray.rllib.policy.sample_batchr	   r
   ray.rllib.utils.annotationsr   r   r   ray.rllib.utils.typingr   ndarrayrr   r   r   r!   r   r1   ri   datarA   rC   rR   r   r   <module>r      s7    & & & & & & & & & &         



       @ @ @ @ @ @ # # # # # # W W W W W W W W O O O O O O O O O O 2 2 2 2 2 2 0rz 0# 0 0 0 0 r| C     	% %<%sCx.% % 	% % % %P 1X1 1 ",,d23	1
 1 sCx.1 1 1 1h ^ ^ ^ ^ ^( ^ ^ ^ ^ ^r   