
    Pi<                     v   d Z ddlZddlmZmZ ddlmZmZ ddlm	Z	 ddl
mZ ddlmZ dd	lmZ dd
lmZmZmZ ddlmZ ddlmZ  ee          Z G d de          Z	 	 	 	 	 ddedeeeeef                  dee	         deeeef                  deeeef                  deeeef                  fdZ 	 	 	 	 ddedeeeef                  dee	         deeeef                  deeeeef                  f
dZ!	 	 	 	 ddedeeeef                  dee	         deeeef                  deeeeef                  dee         fdZ"	 	 	 	 	 	 ddedee         deeeee         eeeeee         f         f         f                  dee	         deeeef                  deeeef                  deeeef                  defdZ#	 	 	 	 	 	 ddedee         deeeee         eeeeee         f         f         f                  dee	         deeeef                  deeeef                  deeeef                  fdZ$dS )zList and inspect datasets.    N)MappingSequence)OptionalUnion   )DownloadConfig)DownloadMode)StreamingDownloadManager)DatasetInfo)dataset_module_factoryget_dataset_builder_classload_dataset_builder)
get_logger)Versionc                       e Zd ZdS )SplitsNotFoundErrorN)__name__
__module____qualname__     d/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/datasets/inspect.pyr   r   &   s        Dr   r   path
data_filesdownload_configdownload_moderevisiontokenc                 b     t                     } fd|D             S )a6  Get the meta information about a dataset, returned as a dict mapping config name to DatasetInfoDict.

    Args:
        path (`str`): path to the dataset repository. Can be either:

            - a local path to the dataset directory containing the data files,
                e.g. `'./dataset/squad'`
            - a dataset identifier on the Hugging Face Hub (list all available datasets and ids with [`huggingface_hub.list_datasets`]),
                e.g. `'rajpurkar/squad'`, `'nyu-mll/glue'` or``'openai/webtext'`
        revision (`Union[str, datasets.Version]`, *optional*):
            If specified, the dataset module will be loaded from the datasets repository at this version.
            By default:
            - it is set to the local version of the lib.
            - it will also try to load it from the main branch if it's not available at the local version of the lib.
            Specifying a version that is different from your local version of the lib might cause compatibility issues.
        download_config ([`DownloadConfig`], *optional*):
            Specific download configuration parameters.
        download_mode ([`DownloadMode`] or `str`, defaults to `REUSE_DATASET_IF_EXISTS`):
            Download/generate mode.
        data_files (`Union[Dict, List, str]`, *optional*):
            Defining the data_files of the dataset configuration.
        token (`str` or `bool`, *optional*):
            Optional string or boolean to use as Bearer token for remote files on the Datasets Hub.
            If `True`, or not specified, will get token from `"~/.huggingface"`.
        **config_kwargs (additional keyword arguments):
            Optional attributes for builder class which will override the attributes if supplied.

    Example:

    ```py
    >>> from datasets import get_dataset_infos
    >>> get_dataset_infos('cornell-movie-review-data/rotten_tomatoes')
    {'default': DatasetInfo(description="Movie Review Dataset.
This is a dataset of containing 5,331 positive and 5,331 negative processed
sentences from Rotten Tomatoes movie reviews...), ...}
    ```
    )r   r   r   r   r   r   c                 :    i | ]}|t          d|d S ))r   config_namer   r   r   r   r   r   )get_dataset_config_info)	.0r!   config_kwargsr   r   r   r   r   r   s	     r   
<dictcomp>z%get_dataset_infos.<locals>.<dictcomp>^   sa         	, 	
#!+'	
 	
 	
 	
  r   )get_dataset_config_names)r   r   r   r   r   r   r$   config_namess   ``````` r   get_dataset_infosr(   *   s}    X ,'#  L          (   r   c                    t          | f||||d|}t          |t          j                            |                     }t          |j                                                  p"|j        	                    d|j
        pd          gS )aG  Get the list of available config names for a particular dataset.

    Args:
        path (`str`): path to the dataset repository. Can be either:

            - a local path to the dataset directory containing the data files,
                e.g. `'./dataset/squad'`
            - a dataset identifier on the Hugging Face Hub (list all available datasets and ids with [`huggingface_hub.list_datasets`]),
                e.g. `'rajpurkar/squad'`, `'nyu-mll/glue'` or``'openai/webtext'`
        revision (`Union[str, datasets.Version]`, *optional*):
            If specified, the dataset module will be loaded from the datasets repository at this version.
            By default:
            - it is set to the local version of the lib.
            - it will also try to load it from the main branch if it's not available at the local version of the lib.
            Specifying a version that is different from your local version of the lib might cause compatibility issues.
        download_config ([`DownloadConfig`], *optional*):
            Specific download configuration parameters.
        download_mode ([`DownloadMode`] or `str`, defaults to `REUSE_DATASET_IF_EXISTS`):
            Download/generate mode.
        data_files (`Union[Dict, List, str]`, *optional*):
            Defining the data_files of the dataset configuration.
        **download_kwargs (additional keyword arguments):
            Optional attributes for [`DownloadConfig`] which will override the attributes in `download_config` if supplied,
            for example `token`.

    Example:

    ```py
    >>> from datasets import get_dataset_config_names
    >>> get_dataset_config_names("nyu-mll/glue")
    ['cola',
     'sst2',
     'mrpc',
     'qqp',
     'stsb',
     'mnli',
     'mnli_mismatched',
     'mnli_matched',
     'qnli',
     'rte',
     'wnli',
     'ax']
    ```
    r   r   r   r   dataset_namer!   default)r   r   osr   basenamelistbuilder_configskeysbuilder_kwargsgetDEFAULT_CONFIG_NAME)r   r   r   r   r   download_kwargsdataset_modulebuilder_clss           r   r&   r&   m   s    h ,'#   N ,NIYIYZ^I_I_```K+002233 %))-9X9e\eff8 r   returnc                 $   t          | f||||d|}t          |t          j                            |                     }t          |j                                                  }|rt          |          dk    r|d         nd}	nd}	|j	        p|	S )aW  Get the default config name for a particular dataset.
    Can return None only if the dataset has multiple configurations and no default configuration.

    Args:
        path (`str`): path to the dataset repository. Can be either:

            - a local path to the dataset directory containing the data files,
                e.g. `'./dataset/squad'`
            - a dataset identifier on the Hugging Face Hub (list all available datasets and ids with [`huggingface_hub.list_datasets`]),
                e.g. `'rajpurkar/squad'`, `'nyu-mll/glue'` or``'openai/webtext'`
        revision (`Union[str, datasets.Version]`, *optional*):
            If specified, the dataset module will be loaded from the datasets repository at this version.
            By default:
            - it is set to the local version of the lib.
            - it will also try to load it from the main branch if it's not available at the local version of the lib.
            Specifying a version that is different from your local version of the lib might cause compatibility issues.
        download_config ([`DownloadConfig`], *optional*):
            Specific download configuration parameters.
        download_mode ([`DownloadMode`] or `str`, defaults to `REUSE_DATASET_IF_EXISTS`):
            Download/generate mode.
        data_files (`Union[Dict, List, str]`, *optional*):
            Defining the data_files of the dataset configuration.
        **download_kwargs (additional keyword arguments):
            Optional attributes for [`DownloadConfig`] which will override the attributes in `download_config` if supplied,
            for example `token`.

    Returns:
        Optional[str]: the default config name if there is one

    Example:

    ```py
    >>> from datasets import get_dataset_default_config_name
    >>> get_dataset_default_config_name("openbookqa")
    'main'
    ```
    r*   r+   r   r   Nr-   )
r   r   r.   r   r/   r0   r1   r2   lenr5   )
r   r   r   r   r   r6   r7   r8   r1   default_config_names
             r   get_dataset_default_config_namer=      s    Z ,'#   N ,NIYIYZ^I_I_```K;6;;==>>O (474H4HA4M4Moa00SW'*A.AAr   r!   c           
      b    t           f||||||d|}|j        }	|	j        |r|                                nt	                      }|||_        	  fd|                    t          |j        |                    D             |	_        n"# t          $ r}
t          d          |
d}
~
ww xY w|	S )a  Get the meta information (DatasetInfo) about a dataset for a particular config

    Args:
        path (`str`): path to the dataset repository. Can be either:

            - a local path to the dataset directory containing the data files,
                e.g. `'./dataset/squad'`
            - a dataset identifier on the Hugging Face Hub (list all available datasets and ids with [`huggingface_hub.list_datasets`]),
                e.g. `'rajpurkar/squad'`, `'nyu-mll/glue'` or``'openai/webtext'`
        config_name (:obj:`str`, optional): Defining the name of the dataset configuration.
        data_files (:obj:`str` or :obj:`Sequence` or :obj:`Mapping`, optional): Path(s) to source data file(s).
        download_config (:class:`~download.DownloadConfig`, optional): Specific download configuration parameters.
        download_mode (:class:`DownloadMode` or :obj:`str`, default ``REUSE_DATASET_IF_EXISTS``): Download/generate mode.
        revision (:class:`~utils.Version` or :obj:`str`, optional): Version of the dataset to load.
            As datasets have their own git repository on the Datasets Hub, the default version "main" corresponds to their "main" branch.
            You can specify a different version than the default "main" by using a commit SHA or a git tag of the dataset repository.
        token (``str`` or :obj:`bool`, optional): Optional string or boolean to use as Bearer token for remote files on the Datasets Hub.
            If True, or not specified, will get token from `"~/.huggingface"`.
        **config_kwargs (additional keyword arguments): optional attributes for builder class which will override the attributes if supplied.

    )namer   r   r   r   r   Nc                 0    i | ]}|j         |j         d S ))r?   r,   )r?   )r#   split_generatorr   s     r   r%   z+get_dataset_config_info.<locals>.<dictcomp>  s:       #  $/CUY&Z&Z  r   )	base_pathr   z<The split names could not be parsed from the dataset config.)r   infosplitscopyr   r   _split_generatorsr
   rB   	Exceptionr   )r   r!   r   r   r   r   r   r$   builderrC   errs   `          r   r"   r"      s   > #	'#	 	 	 	G <D{4CY/..000IYIY$)O!	o   '.'@'@,w7HZijjj( (  DKK  	o 	o 	o%&deeknn	oKs   :B 
B,B''B,c           
      v    t          | f||||||d|}t          |j                                                  S )ae  Get the list of available splits for a particular config and dataset.

    Args:
        path (`str`): path to the dataset repository. Can be either:

            - a local path to the dataset directory containing the data files,
                e.g. `'./dataset/squad'`
            - a dataset identifier on the Hugging Face Hub (list all available datasets and ids with [`huggingface_hub.list_datasets`]),
                e.g. `'rajpurkar/squad'`, `'nyu-mll/glue'` or``'openai/webtext'`
        config_name (`str`, *optional*):
            Defining the name of the dataset configuration.
        data_files (`str` or `Sequence` or `Mapping`, *optional*):
            Path(s) to source data file(s).
        download_config ([`DownloadConfig`], *optional*):
            Specific download configuration parameters.
        download_mode ([`DownloadMode`] or `str`, defaults to `REUSE_DATASET_IF_EXISTS`):
            Download/generate mode.
        revision ([`Version`] or `str`, *optional*):
            Version of the dataset to load.
            As datasets have their own git repository on the Datasets Hub, the default version "main" corresponds to their "main" branch.
            You can specify a different version than the default "main" by using a commit SHA or a git tag of the dataset repository.
        token (`str` or `bool`, *optional*):
            Optional string or boolean to use as Bearer token for remote files on the Datasets Hub.
            If `True`, or not specified, will get token from `"~/.huggingface"`.
        **config_kwargs (additional keyword arguments):
            Optional attributes for builder class which will override the attributes if supplied.

    Example:

    ```py
    >>> from datasets import get_dataset_split_names
    >>> get_dataset_split_names('cornell-movie-review-data/rotten_tomatoes')
    ['train', 'validation', 'test']
    ```
    )r!   r   r   r   r   r   )r"   r0   rD   r2   )	r   r!   r   r   r   r   r   r$   rC   s	            r   get_dataset_split_namesrK   '  s[    Z #	'#	 	 	 	D   ""###r   )NNNNN)NNNN)NNNNNN)%__doc__r.   collections.abcr   r   typingr   r   download.download_configr   download.download_managerr	   #download.streaming_download_managerr
   rC   r   loadr   r   r   utils.loggingr   utils.versionr   r   logger
ValueErrorr   strdictr0   boolr(   r&   r=   r"   rK   r   r   r   <module>rZ      s    !   				 - - - - - - - - " " " " " " " " 4 4 4 4 4 4 3 3 3 3 3 3 I I I I I I               
 & % % % % % " " " " " " 
H			 	 	 	 	* 	 	 	 48048<.2(,@ @
@tT3/0@ n-@ E,"345	@
 uS'\*+@ E$)$%@ @ @ @J /3048<37? ?
?uS'\*+? n-? E,"345	?
 tT3/0? ? ? ?H /3048<37;B ;B
;BuS'\*+;B n-;B E,"345	;B
 tT3/0;B c];B ;B ;B ;B@ "&_c048<.2(,7 7
7#7 sHSM73c8TW=FX@Y;Y3ZZ[\7 n-	7
 E,"3457 uS'\*+7 E$)$%7 7 7 7 7x "&_c048<.2(,7$ 7$
7$#7$ sHSM73c8TW=FX@Y;Y3ZZ[\7$ n-	7$
 E,"3457$ uS'\*+7$ E$)$%7$ 7$ 7$ 7$ 7$ 7$r   