
    Pi01                        d dl mZmZ d dlmZmZ d dlZd dlZ	d dl
Z
d dlmZ d dlmZmZmZmZmZmZmZmZmZmZ d dlmZ erd dlZe
j        j                            e          ZddgZ e G d	 d
e
j!                              Z" G d de
j#                  Z$dej%        de&fdZ'defdZ(dej)        de	j*        fdZ+dej%        de&fdZ,e G d d                      Z-e G d d                      Z.defdZ/de	j*        fdZ0dej%        de&fdZ1defdZ2dej)        de	j3        fdZ4defdZ5d Z6de7d e8d!e8de	j3        fd"Z9d#ed e8d!e8fd$Z:d% Z;d& Z<d'e8fd(Z=d)ej%        defd*Z>d2d#efd,Z?d#ede8fd-Z@de&fd.ZAde&fd/ZBde&fd0ZCd1 ZDdS )3    )	dataclassfield)TYPE_CHECKINGOptionalN)Key)
Array2DArray3DArray4DArray5DFeatures	LargeListListValue_ArrayXD_arrow_to_datasets_dtype)cast_table_to_featuresz.h5z.hdf5c                   R    e Zd ZU dZdZee         ed<   dZee	j
                 ed<   dS )
HDF5ConfigzBuilderConfig for HDF5.N
batch_sizefeatures)__name__
__module____qualname____doc__r   r   int__annotations__r   datasetsr        w/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/datasets/packaged_modules/hdf5/hdf5.pyr   r       sD         !! $J$$$,0Hhx()00000r   r   c                   .    e Zd ZdZeZd Zd Zd Zd Z	dS )HDF5zXArrowBasedBuilder that converts HDF5 files to Arrow tables using the HF extension types.c                 @    t          j        | j        j                  S )N)r   )r   DatasetInfoconfigr   selfs    r    _infoz
HDF5._info-   s    #T[-ABBBBr   c           	          dd l }| j        j        st          d| j        j                   |                    | j        j                  }g }|                                D ]\  }}| j        j        o|D ]l}t          |d          5 } |j	        |d          5 }	t          |	          | j        _        d d d            n# 1 swxY w Y   d d d            n# 1 swxY w Y    |                    t          j        |d|i                     |S )Nr   z=At least one data file must be specified, but got data_files=rbrfiles)name
gen_kwargs)h5pyr%   
data_files
ValueErrordownloaditemsinfor   openFile_recursive_infer_featuresappendr   SplitGenerator)
r'   
dl_managerr/   r0   splits
split_namer,   
first_filefh5s
             r    _split_generatorszHDF5._split_generators0   s   {% 	wu]a]h]suuvvv(()?@@
!+!1!1!3!3 	a 	aJy!)"'  Jj$// O1&TYq#.. O"1J21N1NDI.O O O O O O O O O O O O O O OO O O O O O O O O O O O O O O MM(1zwX]N^___````s6   CB?	3C?CCCCCCc              #      K   |E d {V  d S Nr   )r'   r,   s     r    _generate_shardszHDF5._generate_shardsB   s$      r   c           
   #     K   dd l }| j        j        }t          |          D ]\  }}	 t	          |d          5 } |j        |d          5 }| j        j        t          |          | j        _        t          || j        j                  }|6t                              d| d           	 d d d            d d d            |p| j        p|}	t          t          d||	                    D ]\  }
}t          ||	z   |          }t          || j        j        ||          }|t                              d| d           Ut!          ||
          t#          || j        j                  fV  	 d d d            n# 1 swxY w Y   d d d            n# 1 swxY w Y   }# t$          $ r6}t                              d| dt)          |           d|             d }~ww xY wd S )	Nr   r*   r+   zFile z contains no data, skipping...zFailed to read file 'z' with error z: )r/   r%   r   	enumerater5   r6   r4   r   r7   _check_dataset_lengthsloggerwarning_writer_batch_sizerangemin_recursive_load_arraysr   r   r1   errortype)r'   r,   r/   batch_size_cfgfile_idxfiler>   r?   num_rowseffective_batch	batch_idxstartendpa_tablees                  r    _generate_tableszHDF5._generate_tablesE   s     /'.. 	 	NHd$%% q"1c** qb9-51J21N1NDI.#9"di>P#Q#Q#+"NN+W4+W+W+WXXX$q q q q q qq q q q q q q +9*_D<S*_W_09%8_:]:]0^0^ q q,Iu"%eo&=x"H"HC'=b$)BTV[]`'a'aH'/ &/[t/[/[/[ \ \ \ ("%h	":":<RS[]a]f]o<p<p"pppppqq q q q q q q q q q q q q q qq q q q q q q q q q q q q q q"    VTVVQVVSTVVWWW'	 	sr   FFA!E;,F7FB+E;/F;E??FE?FFF	FF	F
G)1GGN)
r   r   r   r   r   BUILDER_CONFIG_CLASSr(   r@   rC   rY   r   r   r    r"   r"   (   s_        bb%C C C  $      r   r"   dtypereturnc                 b    | j         dk    rdS | j        t          | j        d                   S dS )NcTr   F)kindsubdtype_is_complex_dtyper[   s    r    ra   ra   f   s6    zSt~! !23335r   c                    | j         j        | j         j        \  }}n| j        dd          }| j         }|t          j        k    rt          d          }nM|t          j        k    rt          d          }n-t                              d| d           t          d          }t          t          ||          t          ||          d          S )N   float32float64zFound complex dtype z0 that is not supported. Converting to float64...realimag)r[   r`   shapenp	complex64r   
complex128rG   rH   r   _create_sized_feature_impl)dsetr[   
data_shape
value_types       r    _create_complex_featuresrr   n   s    z& J/zzZ^

9%%

	"-		9%%

eeeeefff9%%
.z:FF.z:FF	
 	
  r   arrc                 
   t           j        j                            | j                  t           j        j                            | j                  d}t
          j                            |d         |d         gddg          S )Nrg   rh   ri   names)r   r   numpy_to_pyarrow_listarrayrh   ri   paStructArrayfrom_arrays)rs   datas     r    _convert_complex_to_nestedr|      sp    !*EEchOO!*EEchOO D >%%tF|T&\&B6SYJZ%[[[r   c                     | j         dk    S NV)r_   rb   s    r    _is_compound_dtyper      s    :r   c                   8    e Zd ZU ded<   dZej        ed<   d ZdS )_CompoundGroupzh5py.Datasetro   Nr{   c              #      K   | j         j        j        D ].}| j         j        |         }|t          | j        ||          fV  /d S rB   )ro   r[   rv   _CompoundFieldr{   )r'   
field_namefield_dtypes      r    r3   z_CompoundGroup.items   sZ      )// 	Q 	QJ)/*5KnTY
KPPPPPPP	Q 	Qr   )r   r   r   r   r{   rk   ndarrayr3   r   r   r    r   r      sH         
D"*Q Q Q Q Qr   r   c                       e Zd ZU eej                 ed<   eed<   ej        ed<    e	d          Z
eedf         ed<   d Zd	 Zd
S )r   r{   r-   r[   F)init.rj   c                 f    | j         t          | j                   ndf| j        j        z   | _        d S Nr   )r{   lenr[   rj   r&   s    r    __post_init__z_CompoundField.__post_init__   s-    (,	(=c$)nnn1FIYY


r   c                 2    | j         |         | j                 S rB   )r{   r-   )r'   keys     r    __getitem__z_CompoundField.__getitem__   s    y~di((r   N)r   r   r   r   rk   r   r   strr[   r   rj   tupler   r   r   r   r   r    r   r      s         
2:

III8OOO"U...E5c?...Z Z Z) ) ) ) )r   r   c                 >    t          |           }t          |          S rB   )r   r7   )ro   
mock_groups     r    _create_compound_featuresr      s    %%J$Z000r   c                     t          ||           }t          |          }t          ||dt          |                     S )N)r{   r   )r   r   rL   r   )rs   ro   r   r   s       r    _convert_compound_to_nestedr      s;    3///J(..H!*h3s88DDDr   c                 *    | j         rd| j         v rdS dS )NvlenTF)metadatarb   s    r    _is_vlen_dtyper      s"    ~ &EN22t5r   c                     | j         j        d         }|t          t          fv rt	          d          S t          |          }t          |          S )Nr   string)r[   r   r   bytesr   _np_to_pa_to_hf_valuer   )ro   
vlen_dtypeinner_features      r    _create_vlen_featuresr      sG    $V,Jc5\!!X)*55Mr   c                 J    t           j        j                            |           S rB   )r   r   rw   )rs   s    r    _convert_vlen_to_arrayr      s    %@@EEEr   c                     i }|                                  D ]P\  }}t          |          rt          |          }|r|||<   +t          |          rt	          |          }|r|||<   Qt          |          S rB   )r3   	_is_groupr7   _is_dataset_infer_featurer   )h5_objfeatures_dictpathro   r   s        r    r7   r7      s    Mllnn / /
dT?? 	/066H /&.d# 	/%d++H /&.d#M"""r   c                    t          | j                  rt          |           S t          | j                  s| j        j        dk    rt          |           S t          | j                  rt          |           S t          |           S r~   )	ra   r[   rr   r   r_   r   r   r   _create_sized_feature)ro   s    r    r   r      s~    $$ +'---	DJ	'	' +4:?c+A+A(...	
	#	# +$T*** &&&r   r   rU   rV   c                 Z   | ||         }t          | j                  rt          |          S t          | j                  rt	          |          S t          | j                  rt          ||           S | j        j        dk    rt          d| d          t          d | j
        dd          D                       rKt          j        | j                  }t          j        d |D             t          j        |                    S t          j        j                            |          S )NOzObject dtype dataset 'z' is not supported. For variable-length data, please use h5py.vlen_dtype() when creating the HDF5 file. See: https://docs.h5py.org/en/stable/special.html#variable-length-stringsc              3   "   K   | ]
}|d k    V  dS r   Nr   .0dims     r    	<genexpr>z_load_array.<locals>.<genexpr>  s&      22Csax222222r   rd   c                     g | ]}g S r   r   )r   _s     r    
<listcomp>z_load_array.<locals>.<listcomp>  s    ---AR---r   )rN   )r   r[   r   ra   r|   r   r   r_   r1   anyrj   rx   from_numpy_dtypearraylist_r   r   rw   )ro   r   rU   rV   rs   
inner_types         r    _load_arrayr      s5   
uSy/Cdj!! N%c***	4:	&	& N)#...	DJ	'	' N*3555	C		YT Y Y Y
 
 	
 224:abb>22222 	N,TZ88J8-----BHZ4H4HIIII$-HHMMMr   r   c                    i }|                                  D ]z\  }}||vr
t          |          rt          |||         ||          }nAt          |          rt	          ||||          }nt          dt          |                     ||||<   {t          |           rt          j	        
                    |          S |rdg g }
}	}|                                 D ]_\  }}t          |t          j                  rd}|                                }|	                    |           |
                    |           `t          j                            |
|	          }|rt          j        |          n|S d S )NzUnexpected type FTru   )r3   r   rL   r   r   r1   rN   _is_filerx   Tablefrom_pydict
isinstanceChunkedArraycombine_chunksr8   ry   rz   chunked_array)r   r   rU   rV   
batch_dictr   ro   rs   should_chunkkeysvalueskvsarrs                 r    rL   rL     s   Jllnn # #
dxT?? 	>(x~ucJJCC 	>dD%55CC<T

<<===?"Jt 0x##J/// 
@%*BFd$$&& 	 	DAq!R_-- '#$$&&KKNNNMM!~))&)==)5?r%%%4?
@ 
@r   c                 h    | j         dd          }t          | j                  }t          ||          S )Nrd   )rj   r   r[   rn   )ro   
dset_shapevalue_features      r    r   r   /  s0    ABBJ)$*55M%j-@@@r   c           	      v   |j         }t          d | D                       r3t                              d|  d| d| d           t	          |          S t          |           }|dk    r|S |dk    rt	          || d                   S |d	k    r t          |          | |
          S t          d| d          )Nc              3   "   K   | ]
}|d k    V  dS r   r   r   s     r    r   z-_create_sized_feature_impl.<locals>.<genexpr>7  s&      
*
*3!8
*
*
*
*
*
*r   z*HDF5 to Arrow: Found a dataset with shape z and dtype z\ that has a dimension with size 0. Shape information will be lost in the conversion to List(z).r   rd   )length   )rj   r[   Arrayz.D not supported. Maximum 5 dimensions allowed.)r[   r   rG   rH   r   r   _sized_arrayxd	TypeError)r   r   	dtype_strranks       r    rn   rn   5  s   #I

*
*z
*
*
*** # I  I  IPY  I  I  xE  I  I  I	
 	
 	
 M"""z??Dqyy	M*Q-8888	#~d##*IFFFFTTTTUUUr   r   c                 D    t           t          t          t          d|          S )N)         r   )r   r	   r
   r   )r   s    r    r   r   H  s    7w7;;DAAr   numpy_dtypec                 `    t          t          t          j        |                               S )Nrb   )r   r   rx   r   )r   s    r    r   r   L  s'    /0CK0P0PQQRRRRr    c                     |                                  D ]T\  }}||vr
t          |          r$t          |||         | | d          }||c S =t          |          r| | c S Ud S )N/)prefix)r3   r   _first_datasetr   )r   r   r   r   ro   founds         r    r   r   P  s    llnn % %
dxT?? 	%"4$6@R4@R@R@RSSSE  ! 	%$d$$$$$	%% %r   c           	         t          | |          }|d S | |         j        d         }|                                 D ]M\  }}||vr
t          |          r4|j        d         |k    r#t	          d| d|j        d          d|           N|S )Nr   z	Dataset 'z' has length z but expected )r   rj   r3   r   r1   )r   r   
first_pathrR   r   ro   s         r    rF   rF   \  s    11Jtj!'*Hllnn i i
dxt 	iz!}(( !gT!g!g
1!g!g]e!g!ghhhOr   c                 ^    dd l }t          | |j                  pt          | t                    S r   )r/   r   Groupr   r   r/   s     r    r   r   k  s,    KKKfdj))OZ-O-OOr   c                 ^    dd l }t          | |j                  pt          | t                    S r   )r/   r   Datasetr   r   s     r    r   r   q  s,    KKKfdl++Qz&./Q/QQr   c                 4    dd l }t          | |j                  S r   )r/   r   r6   r   s     r    r   r   w  s    KKKfdi(((r   c                 &   t          | t                    rt          d | j        D                       S t          | t                    r| j        dk    pt          | j                  S t          | t                    rt          | j                  S dS )Nc              3   "   K   | ]
}|d k    V  dS r   r   r   s     r    r   z'_has_zero_dimensions.<locals>.<genexpr>  s&      553!8555555r   r   F)	r   r   r   rj   r   r   _has_zero_dimensionsfeaturer   )r   s    r    r   r   }  s    '8$$ 55w}555555	GT	"	" ~"K&:7?&K&KK	GY	'	' #GO444ur   )r   )Edataclassesr   r   typingr   r   numpyrk   pyarrowrx   r   datasets.builderr   datasets.features.featuresr   r	   r
   r   r   r   r   r   r   r   datasets.tabler   r/   utilslogging
get_loggerr   rG   
EXTENSIONSBuilderConfigr   ArrowBasedBuilderr"   r[   boolra   rr   r   ry   r|   r   r   r   r   r   r   r   r   r   r7   r   r   r   r   rL   r   rn   r   r   r   rF   r   r   r   r   r   r   r    <module>r      s   ( ( ( ( ( ( ( ( * * * * * * * *                                             2 1 1 1 1 1  KKK			*	*8	4	4W
 1 1 1 1 1' 1 1 16 6 6 6 68% 6 6 6|RX $    h    2\BJ \2> \ \ \ \bh 4     Q Q Q Q Q Q Q Q 
) 
) 
) 
) 
) 
) 
) 
)1x 1 1 1 1
Ebn E E E E"( t    8    F
 Frx F F F F# # # # #' ' 'NC N N# N"( N N N N4@X @c @ @ @ @ @HA A AV V V&B B B B BSrx SE S S S S	% 	%X 	% 	% 	% 	%X #    P P P P PR4 R R R R) ) ) ) )    r   