§
    ÇPƒiŠ<  ã                   óv  — d Z ddlZddlmZmZ ddlmZmZ ddlm	Z	 ddl
mZ ddlmZ dd	lmZ dd
lmZmZmZ ddlmZ ddlmZ  ee¦  «        Z G d„ de¦  «        Z	 	 	 	 	 ddedeeeeef                  dee	         deeeef                  deeeef                  deeeef                  fd„Z 	 	 	 	 ddedeeeef                  dee	         deeeef                  deeeeef                  f
d„Z!	 	 	 	 ddedeeeef                  dee	         deeeef                  deeeeef                  dee         fd„Z"	 	 	 	 	 	 ddedee         deeeee         eeeeee         f         f         f                  dee	         deeeef                  deeeef                  deeeef                  defd„Z#	 	 	 	 	 	 ddedee         deeeee         eeeeee         f         f         f                  dee	         deeeef                  deeeef                  deeeef                  fd„Z$dS )zList and inspect datasets.é    N)ÚMappingÚSequence)ÚOptionalÚUnioné   )ÚDownloadConfig)ÚDownloadMode)ÚStreamingDownloadManager)ÚDatasetInfo)Údataset_module_factoryÚget_dataset_builder_classÚload_dataset_builder)Ú
get_logger)ÚVersionc                   ó   — e Zd ZdS )ÚSplitsNotFoundErrorN)Ú__name__Ú
__module__Ú__qualname__© ó    úd/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/datasets/inspect.pyr   r   &   s   € € € € € Ø€Dr   r   ÚpathÚ
data_filesÚdownload_configÚdownload_modeÚrevisionÚtokenc                 ób   ‡ ‡‡‡‡‡‡— t          ‰ ‰‰‰‰‰¬¦  «        }ˆˆˆˆˆ ˆˆfd„|D ¦   «         S )a6  Get the meta information about a dataset, returned as a dict mapping config name to DatasetInfoDict.

    Args:
        path (`str`): path to the dataset repository. Can be either:

            - a local path to the dataset directory containing the data files,
                e.g. `'./dataset/squad'`
            - a dataset identifier on the Hugging Face Hub (list all available datasets and ids with [`huggingface_hub.list_datasets`]),
                e.g. `'rajpurkar/squad'`, `'nyu-mll/glue'` or``'openai/webtext'`
        revision (`Union[str, datasets.Version]`, *optional*):
            If specified, the dataset module will be loaded from the datasets repository at this version.
            By default:
            - it is set to the local version of the lib.
            - it will also try to load it from the main branch if it's not available at the local version of the lib.
            Specifying a version that is different from your local version of the lib might cause compatibility issues.
        download_config ([`DownloadConfig`], *optional*):
            Specific download configuration parameters.
        download_mode ([`DownloadMode`] or `str`, defaults to `REUSE_DATASET_IF_EXISTS`):
            Download/generate mode.
        data_files (`Union[Dict, List, str]`, *optional*):
            Defining the data_files of the dataset configuration.
        token (`str` or `bool`, *optional*):
            Optional string or boolean to use as Bearer token for remote files on the Datasets Hub.
            If `True`, or not specified, will get token from `"~/.huggingface"`.
        **config_kwargs (additional keyword arguments):
            Optional attributes for builder class which will override the attributes if supplied.

    Example:

    ```py
    >>> from datasets import get_dataset_infos
    >>> get_dataset_infos('cornell-movie-review-data/rotten_tomatoes')
    {'default': DatasetInfo(description="Movie Review Dataset.
This is a dataset of containing 5,331 positive and 5,331 negative processed
sentences from Rotten Tomatoes movie reviews...), ...}
    ```
    )r   r   r   r   r   r   c                 ó:   •— i | ]}|t          d‰|‰‰‰‰‰d œ‰¤Ž“ŒS ))r   Úconfig_namer   r   r   r   r   r   )Úget_dataset_config_info)	Ú.0r!   Úconfig_kwargsr   r   r   r   r   r   s	     €€€€€€€r   ú
<dictcomp>z%get_dataset_infos.<locals>.<dictcomp>^   sa   ø€ ð ð ð ð ð 	Õ,ð 	
ØØ#Ø!Ø+Ø'ØØð	
ð 	
ð ð	
ð 	
ðð ð r   )Úget_dataset_config_names)r   r   r   r   r   r   r$   Úconfig_namess   ``````` r   Úget_dataset_infosr(   *   s}   øøøøøøø€ õX ,ØØØ'Ø#ØØðñ ô €Lðð ð ð ð ð ð ð ð ð ð (ðñ ô ð r   c                 ó  — t          | f||||dœ|¤Ž}t          |t          j                             | ¦  «        ¬¦  «        }t          |j                             ¦   «         ¦  «        p"|j         	                    d|j
        pd¦  «        gS )aG  Get the list of available config names for a particular dataset.

    Args:
        path (`str`): path to the dataset repository. Can be either:

            - a local path to the dataset directory containing the data files,
                e.g. `'./dataset/squad'`
            - a dataset identifier on the Hugging Face Hub (list all available datasets and ids with [`huggingface_hub.list_datasets`]),
                e.g. `'rajpurkar/squad'`, `'nyu-mll/glue'` or``'openai/webtext'`
        revision (`Union[str, datasets.Version]`, *optional*):
            If specified, the dataset module will be loaded from the datasets repository at this version.
            By default:
            - it is set to the local version of the lib.
            - it will also try to load it from the main branch if it's not available at the local version of the lib.
            Specifying a version that is different from your local version of the lib might cause compatibility issues.
        download_config ([`DownloadConfig`], *optional*):
            Specific download configuration parameters.
        download_mode ([`DownloadMode`] or `str`, defaults to `REUSE_DATASET_IF_EXISTS`):
            Download/generate mode.
        data_files (`Union[Dict, List, str]`, *optional*):
            Defining the data_files of the dataset configuration.
        **download_kwargs (additional keyword arguments):
            Optional attributes for [`DownloadConfig`] which will override the attributes in `download_config` if supplied,
            for example `token`.

    Example:

    ```py
    >>> from datasets import get_dataset_config_names
    >>> get_dataset_config_names("nyu-mll/glue")
    ['cola',
     'sst2',
     'mrpc',
     'qqp',
     'stsb',
     'mnli',
     'mnli_mismatched',
     'mnli_matched',
     'qnli',
     'rte',
     'wnli',
     'ax']
    ```
    ©r   r   r   r   ©Údataset_namer!   Údefault)r   r   Úosr   ÚbasenameÚlistÚbuilder_configsÚkeysÚbuilder_kwargsÚgetÚDEFAULT_CONFIG_NAME)r   r   r   r   r   Údownload_kwargsÚdataset_moduleÚbuilder_clss           r   r&   r&   m   s¤   € õh ,ØðàØ'Ø#Øðð ð ðð €Nõ ,¨NÍÌ×IYÒIYÐZ^ÑI_ÔI_Ð`Ñ`Ô`€KÝÔ+×0Ò0Ñ2Ô2Ñ3Ô3ð ØÔ%×)Ò)¨-¸Ô9XÐ9eÐ\eÑfÔfð8ð r   Úreturnc                 ó$  — t          | f||||dœ|¤Ž}t          |t          j                             | ¦  «        ¬¦  «        }t          |j                             ¦   «         ¦  «        }|rt          |¦  «        dk    r|d         nd}	nd}	|j	        p|	S )aW  Get the default config name for a particular dataset.
    Can return None only if the dataset has multiple configurations and no default configuration.

    Args:
        path (`str`): path to the dataset repository. Can be either:

            - a local path to the dataset directory containing the data files,
                e.g. `'./dataset/squad'`
            - a dataset identifier on the Hugging Face Hub (list all available datasets and ids with [`huggingface_hub.list_datasets`]),
                e.g. `'rajpurkar/squad'`, `'nyu-mll/glue'` or``'openai/webtext'`
        revision (`Union[str, datasets.Version]`, *optional*):
            If specified, the dataset module will be loaded from the datasets repository at this version.
            By default:
            - it is set to the local version of the lib.
            - it will also try to load it from the main branch if it's not available at the local version of the lib.
            Specifying a version that is different from your local version of the lib might cause compatibility issues.
        download_config ([`DownloadConfig`], *optional*):
            Specific download configuration parameters.
        download_mode ([`DownloadMode`] or `str`, defaults to `REUSE_DATASET_IF_EXISTS`):
            Download/generate mode.
        data_files (`Union[Dict, List, str]`, *optional*):
            Defining the data_files of the dataset configuration.
        **download_kwargs (additional keyword arguments):
            Optional attributes for [`DownloadConfig`] which will override the attributes in `download_config` if supplied,
            for example `token`.

    Returns:
        Optional[str]: the default config name if there is one

    Example:

    ```py
    >>> from datasets import get_dataset_default_config_name
    >>> get_dataset_default_config_name("openbookqa")
    'main'
    ```
    r*   r+   r   r   Nr-   )
r   r   r.   r   r/   r0   r1   r2   Úlenr5   )
r   r   r   r   r   r6   r7   r8   r1   Údefault_config_names
             r   Úget_dataset_default_config_namer=   ¯   s»   € õZ ,ØðàØ'Ø#Øðð ð ðð €Nõ ,¨NÍÌ×IYÒIYÐZ^ÑI_ÔI_Ð`Ñ`Ô`€KÝ˜;Ô6×;Ò;Ñ=Ô=Ñ>Ô>€OØð (Ý47¸Ñ4HÔ4HÈAÒ4MÐ4M˜o¨aÔ0Ð0ÐSWÐÐà'ÐØÔ*ÐAÐ.AÐAr   r!   c           
      ób  ‡ — t          ‰ f||||||dœ|¤Ž}|j        }	|	j        €‹|r|                     ¦   «         nt	          ¦   «         }|||_        	 ˆ fd„|                     t          |j        |¬¦  «        ¦  «        D ¦   «         |	_        n"# t          $ r}
t          d¦  «        |
‚d}
~
ww xY w|	S )a‹  Get the meta information (DatasetInfo) about a dataset for a particular config

    Args:
        path (`str`): path to the dataset repository. Can be either:

            - a local path to the dataset directory containing the data files,
                e.g. `'./dataset/squad'`
            - a dataset identifier on the Hugging Face Hub (list all available datasets and ids with [`huggingface_hub.list_datasets`]),
                e.g. `'rajpurkar/squad'`, `'nyu-mll/glue'` or``'openai/webtext'`
        config_name (:obj:`str`, optional): Defining the name of the dataset configuration.
        data_files (:obj:`str` or :obj:`Sequence` or :obj:`Mapping`, optional): Path(s) to source data file(s).
        download_config (:class:`~download.DownloadConfig`, optional): Specific download configuration parameters.
        download_mode (:class:`DownloadMode` or :obj:`str`, default ``REUSE_DATASET_IF_EXISTS``): Download/generate mode.
        revision (:class:`~utils.Version` or :obj:`str`, optional): Version of the dataset to load.
            As datasets have their own git repository on the Datasets Hub, the default version "main" corresponds to their "main" branch.
            You can specify a different version than the default "main" by using a commit SHA or a git tag of the dataset repository.
        token (``str`` or :obj:`bool`, optional): Optional string or boolean to use as Bearer token for remote files on the Datasets Hub.
            If True, or not specified, will get token from `"~/.huggingface"`.
        **config_kwargs (additional keyword arguments): optional attributes for builder class which will override the attributes if supplied.

    )Únamer   r   r   r   r   Nc                 ó0   •— i | ]}|j         |j         ‰d œ“ŒS ))r?   r,   )r?   )r#   Úsplit_generatorr   s     €r   r%   z+get_dataset_config_info.<locals>.<dictcomp>  s:   ø€ ð ð ð à#ð  Ô$¨Ô/CÐUYÐ&ZÐ&Zðð ð r   )Ú	base_pathr   z<The split names could not be parsed from the dataset config.)r   ÚinfoÚsplitsÚcopyr   r   Ú_split_generatorsr
   rB   Ú	Exceptionr   )r   r!   r   r   r   r   r   r$   ÚbuilderrC   Úerrs   `          r   r"   r"   í   s  ø€ õ> #Øð	àØØ'Ø#ØØð	ð 	ð ð	ð 	€Gð Œ<€DØ„{ÐØ4CÐY˜/×.Ò.Ñ0Ô0Ð0ÍÑIYÔIYˆØÐØ$)ˆOÔ!ð	oðð ð ð à'.×'@Ò'@Ý,°wÔ7HÐZiÐjÑjÔjñ(ô (ðñ ô ˆDŒKˆKøõ ð 	oð 	oð 	oÝ%Ð&dÑeÔeÐknÐnøøøøð	oøøøà€Ks   Á:B Â
B,ÂB'Â'B,c           
      óv   — t          | f||||||dœ|¤Ž}t          |j                             ¦   «         ¦  «        S )ae  Get the list of available splits for a particular config and dataset.

    Args:
        path (`str`): path to the dataset repository. Can be either:

            - a local path to the dataset directory containing the data files,
                e.g. `'./dataset/squad'`
            - a dataset identifier on the Hugging Face Hub (list all available datasets and ids with [`huggingface_hub.list_datasets`]),
                e.g. `'rajpurkar/squad'`, `'nyu-mll/glue'` or``'openai/webtext'`
        config_name (`str`, *optional*):
            Defining the name of the dataset configuration.
        data_files (`str` or `Sequence` or `Mapping`, *optional*):
            Path(s) to source data file(s).
        download_config ([`DownloadConfig`], *optional*):
            Specific download configuration parameters.
        download_mode ([`DownloadMode`] or `str`, defaults to `REUSE_DATASET_IF_EXISTS`):
            Download/generate mode.
        revision ([`Version`] or `str`, *optional*):
            Version of the dataset to load.
            As datasets have their own git repository on the Datasets Hub, the default version "main" corresponds to their "main" branch.
            You can specify a different version than the default "main" by using a commit SHA or a git tag of the dataset repository.
        token (`str` or `bool`, *optional*):
            Optional string or boolean to use as Bearer token for remote files on the Datasets Hub.
            If `True`, or not specified, will get token from `"~/.huggingface"`.
        **config_kwargs (additional keyword arguments):
            Optional attributes for builder class which will override the attributes if supplied.

    Example:

    ```py
    >>> from datasets import get_dataset_split_names
    >>> get_dataset_split_names('cornell-movie-review-data/rotten_tomatoes')
    ['train', 'validation', 'test']
    ```
    )r!   r   r   r   r   r   )r"   r0   rD   r2   )	r   r!   r   r   r   r   r   r$   rC   s	            r   Úget_dataset_split_namesrK   '  s[   € õZ #Øð	àØØ'Ø#ØØð	ð 	ð ð	ð 	€Dõ ”× Ò Ñ"Ô"Ñ#Ô#Ð#r   )NNNNN)NNNN)NNNNNN)%Ú__doc__r.   Úcollections.abcr   r   Útypingr   r   Údownload.download_configr   Údownload.download_managerr	   Ú#download.streaming_download_managerr
   rC   r   Úloadr   r   r   Úutils.loggingr   Úutils.versionr   r   ÚloggerÚ
ValueErrorr   ÚstrÚdictr0   Úboolr(   r&   r=   r"   rK   r   r   r   ú<module>rZ      s¯  ðð  !Ð  à 	€	€	€	Ø -Ð -Ð -Ð -Ð -Ð -Ð -Ð -Ø "Ð "Ð "Ð "Ð "Ð "Ð "Ð "à 4Ð 4Ð 4Ð 4Ð 4Ð 4Ø 3Ð 3Ð 3Ð 3Ð 3Ð 3Ø IÐ IÐ IÐ IÐ IÐ IØ Ð Ð Ð Ð Ð ðð ð ð ð ð ð ð ð ð ð
 &Ð %Ð %Ð %Ð %Ð %Ø "Ð "Ð "Ð "Ð "Ð "ð 
ˆHÑ	Ô	€ð	ð 	ð 	ð 	ð 	˜*ñ 	ô 	ð 	ð 48Ø04Ø8<Ø.2Ø(,ð@ð @Ø
ð@à˜˜t T¨3˜Ô/Ô0ð@ð ˜nÔ-ð@ð ˜E ,°Ð"3Ô4Ô5ð	@ð
 u˜S '˜\Ô*Ô+ð@ð E˜$ ˜)Ô$Ô%ð@ð @ð @ð @ðJ /3Ø04Ø8<Ø37ð?ð ?Ø
ð?àu˜S '˜\Ô*Ô+ð?ð ˜nÔ-ð?ð ˜E ,°Ð"3Ô4Ô5ð	?ð
 ˜˜t T¨3˜Ô/Ô0ð?ð ?ð ?ð ?ðH /3Ø04Ø8<Ø37ð;Bð ;BØ
ð;Bàu˜S '˜\Ô*Ô+ð;Bð ˜nÔ-ð;Bð ˜E ,°Ð"3Ô4Ô5ð	;Bð
 ˜˜t T¨3˜Ô/Ô0ð;Bð ˆc„]ð;Bð ;Bð ;Bð ;Bð@ "&Ø_cØ04Ø8<Ø.2Ø(,ð7ð 7Ø
ð7à˜#”ð7ð ˜˜s H¨S¤M°7¸3ÀÀcÈ8ÐTWÌ=ÐFXÔ@YÐ;YÔ3ZÐZÔ[Ô\ð7ð ˜nÔ-ð	7ð
 ˜E ,°Ð"3Ô4Ô5ð7ð u˜S '˜\Ô*Ô+ð7ð E˜$ ˜)Ô$Ô%ð7ð ð7ð 7ð 7ð 7ðx "&Ø_cØ04Ø8<Ø.2Ø(,ð7$ð 7$Ø
ð7$à˜#”ð7$ð ˜˜s H¨S¤M°7¸3ÀÀcÈ8ÐTWÌ=ÐFXÔ@YÐ;YÔ3ZÐZÔ[Ô\ð7$ð ˜nÔ-ð	7$ð
 ˜E ,°Ð"3Ô4Ô5ð7$ð u˜S '˜\Ô*Ô+ð7$ð E˜$ ˜)Ô$Ô%ð7$ð 7$ð 7$ð 7$ð 7$ð 7$r   