
    Pi$              	       &   d dl Z d dlZd dlmZ d dlmZ d dlmZ d dlm	Z	m
Z
mZ d dlZd dlmZ ddlmZ dd	lmZ dd
lmZmZ ddlmZ ddlmZ  ee          Z G d dej                  Zdedeee         ef         fdZ  G d de!ee!ee	f         f                   Z"i dg dg dg dg dg dg dg dg dg dg dg dg d g d!g d"g d#g d$g i d%g d&g d'g d(g d)g d*g d+g d,g d-g d.g d/g d0g d1g d2g d3g d4g d5g g g g d6Z#dS )7    N)Counter)groupby)
itemgetter)AnyClassVarOptional)DatasetCardData   )METADATA_CONFIGS_FIELD)Features)DatasetInfoDatasetInfosDict)	_split_re)
get_loggerc                   &     e Zd Zd Zd fd	Z xZS )_NoDuplicateSafeLoaderc                       fd|j         D             }d |D             }t          |          fdD             }|rt          d|           d S )Nc                 0    g | ]\  }}j         |         S  )constructed_objects).0key_node_selfs      k/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/datasets/utils/metadata.py
<listcomp>zS_NoDuplicateSafeLoader._check_no_duplicates_on_constructed_node.<locals>.<listcomp>   s%    QQQ{x(2QQQ    c                 Z    g | ](}t          |t                    rt          |          n|)S r   )
isinstancelisttuple)r   keys     r   r   zS_NoDuplicateSafeLoader._check_no_duplicates_on_constructed_node.<locals>.<listcomp>   s1    MMMjd33<c


MMMr   c                 ,    g | ]}|         d k    |S )   r   )r   r"   counters     r   r   zS_NoDuplicateSafeLoader._check_no_duplicates_on_constructed_node.<locals>.<listcomp>   s'    EEE#GCL14D4D#4D4D4Dr   zGot duplicate yaml keys: )valuer   	TypeError)r   nodekeysduplicate_keysr%   s   `   @r   (_check_no_duplicates_on_constructed_nodez?_NoDuplicateSafeLoader._check_no_duplicates_on_constructed_node   s    QQQQdjQQQMMMMM$--EEEEEEE 	JHHHIII	J 	Jr   Fc                 x    t                                          ||          }|                     |           |S )N)deep)superconstruct_mappingr+   )r   r(   r-   mapping	__class__s       r   r/   z(_NoDuplicateSafeLoader.construct_mapping   s8    ''++Dt+<<55d;;;r   )F)__name__
__module____qualname__r+   r/   __classcell__)r1   s   @r   r   r      sO        J J J         r   r   readme_contentreturnc                 d   t          |                                           }|rw|d         dk    rkd|dd          v r_|dd                              d          dz   }d                    |d|                   }|d                    ||dz   d                    fS d d                    |          fS )Nr   z---r$   
)r    
splitlinesindexjoin)r6   full_contentsep_idx	yamlblocks       r   _split_yaml_from_readmer@   $   s    113344L AQ500Ul122>N5N5Nqrr"((//!3IIl1W9566	$))L1$?@@@@<((((r   c            	           e Zd ZU dZeZee         ed<   e	de
fd            Zededee
eef                  dedd fd	            Zed
edd fd            Zd
eddfdZdee         fdZdS )MetadataConfigsz5Should be in format {config_name: {**config_params}}.
FIELD_NAMEmetadata_configc                 D   |                      d          }|t          j        d| d          }t          |t          t
          f          st          |          t          |t                    r|D ]}t          |t
          t          f          r{t          |t                    rut          |          dk    rSd|v rOt          j
        t          |d                   r/t          |                     d          t
          t          f          st          |          d S d S d S )N
data_filesz
                Expected data_files in YAML to be either a string or a list of strings
                or a list of dicts with two keys: 'split' and 'path', but got a  
                Examples of data_files in YAML:

                   data_files: data.csv

                   data_files: data/*.png

                   data_files:
                    - part0/*
                    - part1/*

                   data_files:
                    - split: train
                      path: train/*
                    - split: test
                      path: test/*

                   data_files:
                    - split: train
                      path:
                      - train/part1/*
                      - train/part2/*
                    - split: test
                      path: test/*

                PS: some symbols like dashes '-' are not allowed in split names
                r
   splitpath)gettextwrapdedentr   r    str
ValueErrordictlenrematchr   )rD   yaml_data_filesyaml_error_messageyaml_data_files_items       r   $_raise_if_data_files_field_not_validz4MetadataConfigs._raise_if_data_files_field_not_valid3   sK   )--l;;&!)O^  " "> oc{;; 5 !3444/400 =,; = =(&';c4[II
=%&:DAA
=   455:: '+? ? ? "4H4Q R R !@ *+?+C+CF+K+KcSW[ Y Y !@
 ));<<<] '&D= == =r   parquet_commit_hashexported_parquet_filesdataset_infosr7   c                     fdt          |t          d                    D             r fd                                D              |           S )Nc                     i | ]f\  }}|fd t          |t          d                    D             t                              |t	                                j        pd          dgS )c                 8    g | ]\  }}|fd |D             dS )c                 H    g | ]}|d                               d          S )urlzrefs%2Fconvert%2Fparquet)replace)r   parquet_filerV   s     r   r   zhMetadataConfigs._from_exported_parquet_files_and_dataset_infos.<locals>.<dictcomp>.<listcomp>.<listcomp>r   s@     ! ! ! , )/778RTghh! ! !r   )rG   rH   r   )r   
split_nameparquet_files_for_splitrV   s      r   r   z]MetadataConfigs._from_exported_parquet_files_and_dataset_infos.<locals>.<dictcomp>.<listcomp>o   s`     	 	 	 <
$; ",! ! ! !0G! ! ! 	 	 	r   rG   z0.0.0rF   version)r   r   rL   rI   r   rc   )r   config_nameparquet_files_for_configrX   rV   s      r   
<dictcomp>zRMetadataConfigs._from_exported_parquet_files_and_dataset_infos.<locals>.<dictcomp>m   s     
 
 
 65 	 	 	 	 @GG_aklsatat?u?u	 	 	 }00kmmLLT_X_`` 
 
 
r   configc                 `    i | ])\  }fd |j         D                      d         d*S )c                 N    g | ]!}         d          D ]}|d         |k    |"S )rF   rG   r   )r   r`   	data_filerd   metadata_configss      r   r   z]MetadataConfigs._from_exported_parquet_files_and_dataset_infos.<locals>.<dictcomp>.<listcomp>   sT     # # #&)9+)F|)T# # &$W-;; " <;;;r   rc   rb   )splits)r   dataset_inford   rk   s     @r   rf   zRMetadataConfigs._from_exported_parquet_files_and_dataset_infos.<locals>.<dictcomp>   sw           .K # # # # #*6*=# # #  0<YG      r   )r   r   items)clsrV   rW   rX   rk   s    ` `@r   ._from_exported_parquet_files_and_dataset_infosz>MetadataConfigs._from_exported_parquet_files_and_dataset_infosf   s    
 
 
 
 
 :AAWYcdlYmYm9n9n
 
 
   	        2?1D1D1F1F      s#$$$r   dataset_card_datac                 V   |                     | j                  r|| j                 }t          |t                    st	          d| j         d| d          |D ].}d|vrt	          d| d          |                     |           / | fd|D                       S  |             S )Nz	Expected z to be a list, but got ''rd   zUEach config must include `config_name` field with a string name of a config, but got z. c                     i | ]J}|                                 x                    d           d                                 D             KS )rd   c                 N    i | ]"\  }}||d k    r|nt          j        |          #S )features)r   _from_yaml_list)r   paramr&   s      r   rf   zEMetadataConfigs.from_dataset_card_data.<locals>.<dictcomp>.<dictcomp>   sH     0 0 0(E5 (;(;uuAYZ_A`A`0 0 0r   )copypoprn   )r   rD   rg   s     r   rf   z:MetadataConfigs.from_dataset_card_data.<locals>.<dictcomp>   sr       
 ("1"6"6"8"88JJ}-- 0 0,2LLNN0 0 0  r   )rI   rC   r   r    rM   rU   )ro   rq   rk   rD   rg   s       @r   from_dataset_card_dataz&MetadataConfigs.from_dataset_card_data   s     00 	0@.55 j !hS^!h!hUe!h!h!hiii#3 J J 77$7#27 7 7   88IIII3   
 ,<  	 	 	 suur   Nc                    | r|                                  D ]}|                     |           |                     |          }t          t	          i ||                                                     }|                                D ]\  }}|                    dd            d |                                D             || j        <   d S d S )Nrd   c                      g | ]\  }}d |i|S )rd   r   )r   rd   config_metadatas      r   r   z8MetadataConfigs.to_dataset_card_data.<locals>.<listcomp>   s6     2 2 20K ??2 2 2r   )valuesrU   r{   rN   sortedrn   rz   rC   )r   rq   rD   current_metadata_configstotal_metadata_configsrd   r~   s          r   to_dataset_card_dataz$MetadataConfigs.to_dataset_card_data   s     
	#';;== K K99/JJJJ'+'B'BCT'U'U$%)&1U4L1UPT1U1[1[1]1]*^*^%_%_"0F0L0L0N0N 9 9,_##M488882 24J4P4P4R4R2 2 2do...
	 
	r   c                     d }|                                  D ]N\  }}t          |           dk    s|dk    s|                    d          r||}8t          d| d| d          O|S )Nr$   defaultz&Dataset has several default configs: 'z' and 'z'.)rn   rO   rI   rM   )r   default_config_namerd   rD   s       r   get_default_config_namez'MetadataConfigs.get_default_config_name   s    ",0JJLL 	 	(K4yyA~~	!9!9_=P=PQZ=[=[!9&.*5''$lATll]hlll  	 ": #"r   )r2   r3   r4   __doc__r   rC   r   rL   __annotations__staticmethodrN   rU   classmethodr    r   r   rp   r	   r{   r   r   r   r   r   r   rB   rB   .   s        ?? 6J6660=d 0= 0= 0= \0=d $% $% !%T#s(^ 4$% (	$%
 
$% $% $% [$%L  K\    [0o $    
## 
# 
# 
# 
# 
# 
#r   rB   zimage-classificationtranslationzimage-segmentationz	fill-maskzautomatic-speech-recognitionztoken-classificationzsentence-similarityzaudio-classificationzquestion-answeringsummarizationzzero-shot-classificationztable-to-textzfeature-extractionotherzmultiple-choiceztext-classificationztext-to-imageztext2text-generationzzero-shot-image-classificationztabular-classificationztabular-regressionzimage-to-imageztabular-to-textzunconditional-image-generationztext-retrievalztext-to-speechzobject-detectionzaudio-to-audioztext-generationconversationalztable-question-answeringzvisual-question-answeringzimage-to-textzreinforcement-learning)zvoice-activity-detectionztime-series-forecastingzdocument-question-answering)$rP   rJ   collectionsr   	itertoolsr   operatorr   typingr   r   r   yamlhuggingface_hubr	   rg   r   rv   r   infor   r   namingr   utils.loggingr   r2   logger
SafeLoaderr   rL   r!   r@   rN   rB   known_task_idsr   r   r   <module>r      sc   				                    * * * * * * * * * *  + + + + + + + + + + + +       0 0 0 0 0 0 0 0       & & & & & & 
H		    T_   )C )E(3-:L4M ) ) ) )O# O# O# O# O#d3S#X./ O# O# O#j&B&2& "& 	&
 #B& B& 2& B& "& R& & R& "& R& r&  2!&" R#& &$ B%&& %b'&( b)&* "+&, b-&. r/&0 %b1&2 b3&4 b5&6 7&8 b9&: r;&< b=&> ?&@  A&B RC&D bE& &F !#!#%K& & &r   