
    Pix                         d dl Z d dlZd dlmZ d dlmZ ddlmZ ddlm	Z	m
Z
mZmZmZmZ ddlmZ  ee          Z G d	 d
e j                  Zddee         defdZdee         defdZddededefdZd ZdS )    N)Optional)insecure_hashlib   )config) ExpectedMoreDownloadedFilesErrorExpectedMoreSplitsErrorNonMatchingChecksumErrorNonMatchingSplitsSizesErrorUnexpectedDownloadedFileErrorUnexpectedSplitsError   )
get_loggerc                       e Zd ZdZdZdZdZdS )VerificationModea:  `Enum` that specifies which verification checks to run.

    The default mode is `BASIC_CHECKS`, which will perform only rudimentary checks to avoid slowdowns
    when generating/downloading a dataset for the first time.

    The verification modes:

    |                           | Verification checks                                                           |
    |---------------------------|------------------------------------------------------------------------------ |
    | `ALL_CHECKS`              | Split checks and validity (number of files, checksums) of downloaded files    |
    | `BASIC_CHECKS` (default)  | Same as `ALL_CHECKS` but without checking downloaded files                    |
    | `NO_CHECKS`               | None                                                                          |

    
all_checksbasic_checks	no_checksN)__name__
__module____qualname____doc__
ALL_CHECKSBASIC_CHECKS	NO_CHECKS     m/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/datasets/utils/info_utils.pyr   r      s)          J!LIIIr   r   expected_checksumsrecorded_checksumsc                      t                               d           d S t          t                     t                    z
            dk    r9t	          t          t                     t                    z
                      t          t                    t                     z
            dk    r9t          t          t                    t                     z
                       fd D             }|d|z   nd}t          |          dk    rt          d| d| d          t                               d	|z              d S )
NzUnable to verify checksums.r   c                 8    g | ]}|         |         k    |S r   r   ).0urlr   r   s     r   
<listcomp>z$verify_checksums.<locals>.<listcomp>3   s0    hhh5G5LPbcfPg5g5g5g5g5gr   z for  zChecksums didn't matchz:
zY
Set `verification_mode='no_checks'` to skip checksums verification and ignore this errorz&All the checksums matched successfully)loggerinfolensetr   strr   r	   )r   r   verification_namebad_urlsfor_verification_names   ``   r   verify_checksumsr.   +   sr   !1222
3!""S);%<%<<==AA.s37I3J3JSQcMdMd3d/e/efff
3!""S);%<%<<==AA+C4F0G0G#N`JaJa0a,b,bccchhhhh1hhhH;L;XG&777^`
8}}q&g%: g gg g g
 
 	

 KK8;PPQQQQQr   expected_splitsrecorded_splitsc                      t                               d           d S t          t                     t                    z
            dk    r9t	          t          t                     t                    z
                      t          t                    t                     z
            dk    r9t          t          t                    t                     z
                       fd D             }t          |          dk    rt          t          |                    t                               d           d S )NzUnable to verify splits sizes.r   c                 j    g | ]/}|         j         |         j         k    |         |         d 0S ))expectedrecorded)num_examples)r"   namer/   r0   s     r   r$   z!verify_splits.<locals>.<listcomp>F   sP       4 -1F1SSS %T*8MNNSSSr   z$All the splits matched successfully.)r&   r'   r(   r)   r   r*   r   r
   )r/   r0   
bad_splitss   `` r   verify_splitsr8   >   s5   4555
3#o"6"6677!;;%c#o*>*>_AUAU*U&V&VWWW
3#o"6"6677!;;#CO(<(<s??S?S(S$T$TUUU    #  J
 :)#j//:::
KK677777r   Tpathrecord_checksumreturnc                 F   |r{t          j                    }t          | d          5 t          fdd          D ]}|                    |           |                                }ddd           n# 1 swxY w Y   nd}t          j                            |           |dS )z7Compute the file size and the sha256 checksum of a filerbc                  .                          d          S )Ni   )read)fs   r   <lambda>z(get_size_checksum_dict.<locals>.<lambda>U   s    affWoo r   r   N)	num_byteschecksum)	r   sha256openiterupdate	hexdigestosr9   getsize)r9   r:   mchunkrC   r@   s        @r   get_size_checksum_dictrM   P   s     #%%$ 	%5555s;;    {{}}H	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	%
 ..HEEEs   A A33A7:A7c                 B    | rt           j        r| t           j        k     S dS )zCheck if `dataset_size` is smaller than `config.IN_MEMORY_MAX_SIZE`.

    Args:
        dataset_size (int): Dataset size in bytes.

    Returns:
        bool: Whether `dataset_size` is smaller than `config.IN_MEMORY_MAX_SIZE`.
    F)r   IN_MEMORY_MAX_SIZE)dataset_sizes    r   is_small_datasetrQ   ]   s)      1 f777ur   )N)T)enumrI   typingr   huggingface_hub.utilsr   r%   r   
exceptionsr   r   r	   r
   r   r   loggingr   r   r&   Enumr   dictr.   r8   r*   boolrM   rQ   r   r   r   <module>rZ      s    				       2 2 2 2 2 2                             
H		    ty   *R R$ RT R R R R&88D> 8D 8 8 8 8$
F 
F 
Ft 
Ft 
F 
F 
F 
F    r   