
    Pi                     
   d dl Z d dlZd dlmZ d dlmZmZ ddlmZm	Z	m
Z
mZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z& ddl'm(Z( ddl)m*Z* dd	l+m,Z,  e(e-          Z. G d
 d          Z/dS )    N)Iterable)OptionalUnion   )!SINGLE_FILE_COMPRESSION_PROTOCOLSArchiveIterableFilesIterable_get_extraction_protocol_get_path_extension!_prepare_path_and_storage_optionsis_relative_pathurl_or_path_join	xbasenamexdirname	xet_parsexexistsxgetsizexglob
xgzip_openxisdirxisfilexjoinxlistdirxnumpy_loadxopenxpandas_read_csvxpandas_read_excelxPathxpyarrow_parquet_read_tablexrelpathxsio_loadmatxsplit	xsplitextxwalkxxml_dom_minidom_parse)
get_logger)
map_nested   )DownloadConfigc            
       *   e Zd ZdZdZ	 	 	 	 ddee         dee         dee         dee         fdZe	d	             Z
d
 ZdedefdZd ZdedefdZd Zdeeej        f         dee         fdZdeeee         f         dee         fdZd Zd ZdS )StreamingDownloadManagera  
    Download manager that uses the "::" separator to navigate through (possibly remote) compressed archives.
    Contrary to the regular `DownloadManager`, the `download` and `extract` methods don't actually download nor extract
    data, but they rather return the path or url that could be opened using the `xopen` function which extends the
    built-in `open` function to stream data from remote files.
    TNdataset_namedata_dirdownload_config	base_pathc                     || _         || _        |pt          j                            d          | _        |pt                      | _        d | _        d| _	        d S )N.F)
_dataset_name	_data_dirospathabspath
_base_pathr)   r.   downloaded_sizerecord_checksums)selfr,   r-   r.   r/   s        /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/datasets/download/streaming_download_manager.py__init__z!StreamingDownloadManager.__init__9   sV     *!#;rws';';.B.2B2B# %    c                     | j         S N)r3   r:   s    r;   
manual_dirz#StreamingDownloadManager.manual_dirG   s
    ~r=   c                 4    t          | j        |d          }|S )aU  Normalize URL(s) of files to stream data from.
        This is the lazy version of `DownloadManager.download` for streaming.

        Args:
            url_or_urls (`str` or `list` or `dict`):
                URL(s) of files to stream data from. Each url is a `str`.

        Returns:
            url(s): (`str` or `list` or `dict`), URL(s) to stream data from matching the given input url_or_urls.

        Example:

        ```py
        >>> downloaded_files = dl_manager.download('https://storage.googleapis.com/seldon-datasets/sentence_polarity_v1/rt-polaritydata.tar.gz')
        ```
        T	map_tuple)r'   _download_singler:   url_or_urlss     r;   downloadz!StreamingDownloadManager.downloadK   s!    " !!6tTTTr=   urlpathreturnc                 l    t          |          }t          |          rt          | j        |          }|S r?   )strr   r   r7   )r:   rI   s     r;   rE   z)StreamingDownloadManager._download_single_   s4    g,,G$$ 	A&t@@Gr=   c                 4    t          | j        |d          }|S )a  Add extraction protocol for given url(s) for streaming.

        This is the lazy version of `DownloadManager.extract` for streaming.

        Args:
            url_or_urls (`str` or `list` or `dict`):
                URL(s) of files to stream data from. Each url is a `str`.

        Returns:
            url(s): (`str` or `list` or `dict`), URL(s) to stream data from matching the given input `url_or_urls`.

        Example:

        ```py
        >>> downloaded_files = dl_manager.download('https://storage.googleapis.com/seldon-datasets/sentence_polarity_v1/rt-polaritydata.tar.gz')
        >>> extracted_files = dl_manager.extract(downloaded_files)
        ```
        TrC   )r'   _extract)r:   rG   urlpathss      r;   extractz StreamingDownloadManager.extractf   s    & dm[DIIIr=   c                    t          |          }t          || j                  }|                    d          d         }t	          |          }|dv s|                    d          rt          d| d          ||S |t          v ret          j	        
                    |                    d          d                   }d|v r|d |                    d                   n|}| d	| d| S | d
| S )Nr.   z::r   )tgztar)z.tar.gzz.tar.bz2z.tar.xzz+Extraction protocol for TAR archives like 'z' is not implemented in streaming mode. Please use `dl_manager.iter_archive` instead.

Example usage:

	url = dl_manager.download(url)
	tar_archive_iterator = dl_manager.iter_archive(url)

	for filename, file in tar_archive_iterator:
		...r1   z://z://::)rL   r
   r.   splitr   endswithNotImplementedErrorr   r4   r5   basenamerindex)r:   rI   protocolr5   	extension
inner_files         r;   rN   z!StreamingDownloadManager._extract|   s'   g,,+GTEYZZZ}}T""1%'--	&&$--8Z*[*[&%g      N:::))'--*=*=a*@AAJAD
ARAR$<j&7&7&<&<$<==XbJ::::::::..W...r=   c                 R    |                      |                     |                    S )a0  Prepare given `url_or_urls` for streaming (add extraction protocol).

        This is the lazy version of `DownloadManager.download_and_extract` for streaming.

        Is equivalent to:

        ```
        urls = dl_manager.extract(dl_manager.download(url_or_urls))
        ```

        Args:
            url_or_urls (`str` or `list` or `dict`):
                URL(s) to stream from data from. Each url is a `str`.

        Returns:
            url(s): (`str` or `list` or `dict`), URL(s) to stream data from matching the given input `url_or_urls`.
        )rP   rH   rF   s     r;   download_and_extractz-StreamingDownloadManager.download_and_extract   s"    $ ||DMM+66777r=   urlpath_or_bufc                     t          |d          rt          j        |          S t          j        || j                  S )aN  Iterate over files within an archive.

        Args:
            urlpath_or_buf (`str` or `io.BufferedReader`):
                Archive path or archive binary file object.

        Yields:
            `tuple[str, io.BufferedReader]`:
                2-tuple (path_within_archive, file_object).
                File object is opened in binary mode.

        Example:

        ```py
        >>> archive = dl_manager.download('https://storage.googleapis.com/seldon-datasets/sentence_polarity_v1/rt-polaritydata.tar.gz')
        >>> files = dl_manager.iter_archive(archive)
        ```
        readrR   )hasattrr   from_buffrom_urlpathr.   )r:   r_   s     r;   iter_archivez%StreamingDownloadManager.iter_archive   sB    ( >6** 	f"+N;;;"/PTPdeeeer=   rO   c                 8    t          j        || j                  S )a  Iterate over files.

        Args:
            urlpaths (`str` or `list` of `str`):
                Root paths.

        Yields:
            str: File URL path.

        Example:

        ```py
        >>> files = dl_manager.download_and_extract('https://huggingface.co/datasets/AI-Lab-Makerere/beans/resolve/main/data/train.zip')
        >>> files = dl_manager.iter_files(files)
        ```
        rR   )r	   from_urlpathsr.   )r:   rO   s     r;   
iter_filesz#StreamingDownloadManager.iter_files   s    " *8TEYZZZZr=   c                     d S r?    r@   s    r;   manage_extracted_filesz/StreamingDownloadManager.manage_extracted_files       r=   c                     d S r?   rj   r@   s    r;   get_recorded_sizes_checksumsz5StreamingDownloadManager.get_recorded_sizes_checksums   rl   r=   )NNNN)__name__
__module____qualname____doc__is_streamingr   rL   r)   r<   propertyrA   rH   rE   rP   rN   r^   r   ioBufferedReaderr   tuplere   listrh   rk   rn   rj   r=   r;   r+   r+   /   s         L '+"&48#'& &sm& 3-& ".1	&
 C=& & & &   X  (       ,/ / / / / /68 8 8(f5b6G1G+H fXV[_ f f f f2[5d3i#8 [Xc] [ [ [ [&      r=   r+   )0ru   r4   collections.abcr   typingr   r   utils.file_utilsr   r   r	   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   r%   utils.loggingr&   utils.py_utilsr'   r.   r)   ro   loggerr+   rj   r=   r;   <module>r      s   				 				 $ $ $ $ $ $ " " " " " " " "                                                                                                                                   B ' & & & & & ' ' ' ' ' ' + + + + + + 
H		l l l l l l l l l lr=   