
    Pi_                         d Z ddlZddlZddlmZ ddlmZ ddlmZ ddl	Z
ddlZddlmZ 	 ddlmZ n# e$ r dZY nw xY wdd	lmZ d
 Zd Zd Z	 ddZd Z G d d          Z G d d          Zd ZdS )zTF-specific utils import.    N)partial)ceil)uuid4)get_context)SharedMemory   )configc                    t          | t                    r| S t          j        rdd l}nt          d          | d         }i }|                                D ]\  }t          |t          j                  r$t          j	        fd| D                       |<   Ct          ||j
                  r%|	                    fd| D                       |<   }t          j        fd| D                       |<   |S )Nr   FCalled a Tensorflow-specific function but Tensorflow is not installed.c                      g | ]
}|         S  r   .0fks     k/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/datasets/utils/tf_utils.py
<listcomp>z)minimal_tf_collate_fn.<locals>.<listcomp>0        8 8 8!1 8 8 8    c                      g | ]
}|         S r   r   r   s     r   r   z)minimal_tf_collate_fn.<locals>.<listcomp>2   r   r   c                      g | ]
}|         S r   r   r   s     r   r   z)minimal_tf_collate_fn.<locals>.<listcomp>4   r   r   )
isinstancedictr	   TF_AVAILABLE
tensorflowImportErroritemsnpndarraystackTensorarray)featurestffirstbatchvr   s        @r   minimal_tf_collate_fnr(   $   s   (D!! d		 dbcccQKEE : :1a$$ 	:x 8 8 8 8x 8 8 899E!HH29%% 	:xx 8 8 8 8x 8 8 899E!HHx 8 8 8 8x 8 8 899E!HHLr   c                 H    t          |           }d|v r|d         |d<   |d= |S )Nlabellabels)r(   )r#   r&   s     r   #minimal_tf_collate_fn_with_renamingr,   8   s3    !(++E%.h'NLr   c                 "   t           j                            |           rt          | j                  S t           j                            |           p=t           j                            |           pt           j                            |           S N)patypesis_listis_numeric_pa_type
value_type
is_integeris_floating
is_decimal)pa_types    r   r2   r2   @   sl    	x   6!'"45558w''h28+?+?+H+HhBHL_L_`gLhLhhr   Fc                 B   t          | t          j                  s|                                 } d}t          | t          j                  r||                                          d}nt          j        t          j        |           dk              r|| d         | d         dz            nBt          | t          j                  r	||          nt          dt          |                       fd
                                D             |rOt          t                                                    d                   }fdt          |          D              |fi ||rQi }	|
                                D ]9\  }
}t          j        |
                   }|                    |          }||	|
<   :n`g }	|
                                D ]I\  }
}t          j        |
                   }|                    |          }|	                    |           J|	S )	NTF   r   zUnexpected type for indices: c                 ,    i | ]\  }}|v s|d v ||S ))r*   	label_idsr+   r   )r   keyvaluecols_to_retains      r   
<dictcomp>z np_get_batch.<locals>.<dictcomp>Y   s?     
 
 
Un$$/O(O(O (O(O(Or   c                 R    g | ]"fd                                  D             #S )c                 (    i | ]\  }}||         S r   r   )r   r=   r>   is      r   r@   z+np_get_batch.<locals>.<listcomp>.<dictcomp>b   s#    @@@JC#uQx@@@r   r   )r   rC   r&   s    @r   r   z np_get_batch.<locals>.<listcomp>b   s7    ]]]Q@@@@%++--@@@]]]r   )r   r   r   numpyintegeritemalldiffRuntimeErrortyper   lenlistvaluesranger"   astypeappend)indicesdatasetr?   
collate_fncollate_fn_argscolumns_to_np_typesreturn_dict
is_batchedactual_size	out_batchcol
cast_dtyper"   r&   s     `          @r   np_get_batchr]   F   s:    grz** "--//J'2:&& L'

	  A%	&	& L
WR[1_45	GRZ	(	( L J4==JJKKK!
 
 
 
#kkmm
 
 
  ^$u||~~..q122]]]]%P[J\J\]]]Ju0000E $	288:: 	# 	#OCHU3Z((ELL,,E"IcNN		# 	288:: 	$ 	$OCHU3Z((ELL,,EU####r   c	           	          t           j        rddlnt          d          t	          d          rj        nUt	          j        j        d          rj        j        j        n)t                     dk    rt          j        d           dt          t           |||d	          fd
                                D                                                     dj                  g          fd            }	j        j                            t                               }
|rQO                    d                    dj                            } fd}|
                    ||          }
n)|r'|
                    |
                                          }
||
                    ||          }
|
                    |	          }
|fd}nfd}|
                    |          S )a  Create a tf.data.Dataset from the underlying Dataset. This is a single-process method - the multiprocess
    equivalent is multiprocess_dataset_to_tf.

    Args:
        dataset (`Dataset`): Dataset to wrap with tf.data.Dataset.
        cols_to_retain (`List[str]`): Dataset column(s) to load in the
            tf.data.Dataset. It is acceptable to include column names that are created by the `collate_fn` and
            that do not exist in the original dataset.
        collate_fn(`Callable`): A function or callable object (such as a `DataCollator`) that will collate
            lists of samples into a batch.
        collate_fn_args (`Dict`): A  `dict` of keyword arguments to be passed to the
            `collate_fn`. Can be empty.
        columns_to_np_types (`Dict[str, np.dtype]`): A `dict` mapping column names to numpy dtypes.
        output_signature (`Dict[str, tf.TensorSpec]`): A `dict` mapping column names to
            `tf.TensorSpec` objects.
        shuffle(`bool`): Shuffle the dataset order when loading. Recommended True for training, False for
            validation/evaluation.
        batch_size (`int`, default `None`): Size of batches to load from the dataset. Defaults to `None`, which implies that
            the dataset won't be batched, but the returned dataset can be batched later with `tf_dataset.batch(batch_size)`.
        drop_remainder(`bool`, default `None`): Drop the last incomplete batch when loading. If not provided,
            defaults to the same setting as shuffle.

    Returns:
        `tf.data.Dataset`
    r   Nr   random_index_shuffleindex_shufflei zto_tf_dataset() can be memory-inefficient on versions of TensorFlow older than 2.9. If you are iterating over a dataset with a very large number of samples, consider upgrading to TF >= 2.9.F)rS   r?   rT   rU   rV   rW   c                 D    g | ]}j                             |          S r   )dtypesas_dtype)r   dtyper$   s     r   r   z!dataset_to_tf.<locals>.<listcomp>   s)    PPP%BIu%%PPPr   )input_signaturec                                          | g          fdt                                                    D             S )N)inpToutc                 (    i | ]\  }}||         S r   r   )r   rC   r=   outputs      r   r@   z9dataset_to_tf.<locals>.fetch_function.<locals>.<dictcomp>   s#    SSS61cVAYSSSr   )py_function	enumeratekeys)rR   rj   rV   	getter_fnr$   touts    @r   fetch_functionz%dataset_to_tf.<locals>.fetch_function   sY    	   
 

 TSSSY7J7O7O7Q7Q-R-RSSSSr      r:   )rd   )r>   c                                          | dk              r"j                            ddj                  }  || t	                    dz
            }| |fS )Nr:   rq   l            )shapemaxvalrd   r9   )indexseed	max_index)
reduce_allrandomuniformint64rL   )staterv   shuffled_indexrS   r_   r$   s      r   scan_random_indexz(dataset_to_tf.<locals>.scan_random_index   sn    }}Ub[)) T 	))U"()SS11EUXY`UaUadeUefffN.((r   )drop_remainderc                 F    fd|                                  D             S )Nc                 Z    i | ]'\  }}|                     ||         j                  (S r   ensure_shapert   r   r=   valoutput_signaturer$   s      r   r@   z8dataset_to_tf.<locals>.ensure_shapes.<locals>.<dictcomp>   s7    nnnxsTWC.>s.C.IJJnnnr   rD   
input_dictr   r$   s    r   ensure_shapesz$dataset_to_tf.<locals>.ensure_shapes   s-    nnnnn[e[k[k[m[mnnnnr   c                 F    fd|                                  D             S )Nc           	      j    i | ]/\  }}|                     ||         j        d d                   0S )r9   Nr   r   s      r   r@   z8dataset_to_tf.<locals>.ensure_shapes.<locals>.<dictcomp>   sB    rrrS[SVX[C.>s.C.I!"".MNNrrrr   rD   r   s    r   r   z$dataset_to_tf.<locals>.ensure_shapes   s-    rrrrr_i_o_o_q_qrrrrr   )r	   r   r   r   hasattrr_   rz   experimentalr`   rL   warningswarnr   r]   rN   function
TensorSpecr|   dataDatasetrO   fillcastscanshufflecardinalityr&   map)rS   r?   rT   rU   rV   r   r   
batch_sizer   rp   
tf_dataset	base_seedr   r   rn   r_   r$   ro   s   `   ``        @@@@r   dataset_to_tfr   v   s   H  dbccc r)** $!6	'	9	9 	$!y5Cw<<*$$M*  
  $%'/  I QPPP3F3M3M3O3OPPPD[["--bh"?"?!@[AAT T T T T T T BAT &&s7||44J B'3GGD"((C(CGDD		) 	) 	) 	) 	) 	) 	)  __Y0ABB

	 B''
(>(>(@(@AA
%%j%PP
//J	o 	o 	o 	o 	o 	o 	o
	s 	s 	s 	s 	s 	s >>-(((r   c                   ,    e Zd Zd Zd Zd Zd Zd ZdS )SharedMemoryContextc                 "    g | _         g | _        d S r.   )created_shmsopened_shmsselfs    r   __init__zSharedMemoryContext.__init__   s    r   c                     t          t          |          ||          }|r| j                            |           n| j                            |           |S )N)sizenamecreate)r   intr   rQ   r   )r   r   r   r   shms        r   get_shmzSharedMemoryContext.get_shm   s\    D		VDDD 	)$$S)))) ##C(((
r   c                     |                      |t          j        |          t          j        |          j        z  |          }t          j        |||j                  S )N)r   r   r   )rd   buffer)r   r   prodrd   itemsizer   buf)r   r   rt   rd   r   r   s         r   	get_arrayzSharedMemoryContext.get_array   sK    ll275>>BHUOO<T+T]clddz%uSW====r   c                     | S r.   r   r   s    r   	__enter__zSharedMemoryContext.__enter__       r   c                     | j         D ]*}|                                 |                                 +| j        D ]}|                                 d S r.   )r   closeunlinkr   )r   exc_type	exc_value	tracebackr   s        r   __exit__zSharedMemoryContext.__exit__   s[    $ 	 	CIIKKKJJLLLL# 	 	CIIKKKK	 	r   N)__name__
__module____qualname__r   r   r   r   r   r   r   r   r   r      s_            > > >      r   r   c                   L    e Zd Zd Zd Zd Zed             Zed             ZdS )NumpyMultiprocessingGeneratorc                 `    | _         | _        | _        | _        d |                                D              _         fd|                                D              _        | _        | _        | _	        |	 _
        |
 _         fd|                                D              _        d S )Nc                 4    g | ]\  }}|t           j        u |S r   )r   str_)r   r[   rd   s      r   r   z:NumpyMultiprocessingGenerator.__init__.<locals>.<listcomp>  s+    eeezsETY]_]dTdTdsTdTdTdr   c                 V    i | ]%\  }}||j         vr|nt          j        d           &S )U1)string_columnsr   rd   )r   r[   rd   r   s      r   r@   z:NumpyMultiprocessingGenerator.__init__.<locals>.<dictcomp>  sH     $
 $
 $
U #T%888bhtnn$
 $
 $
r   c                     i | ]D\  }}||j         vrt          |j        j                  nt          |j        j                  d z   ES r9   )r   r   rt   rank)r   r[   specr   s      r   r@   z:NumpyMultiprocessingGenerator.__init__.<locals>.<dictcomp>%  sb     !
 !
 !
T D4G)G)GTZ_%%%SQUQ[Q`MaMadeMe!
 !
 !
r   )rS   r?   rT   rU   r   r   rV   r   r   r   r   num_workerscolumns_to_ranks)r   rS   r?   rT   rU   rV   r   r   r   r   r   s   `          r   r   z&NumpyMultiprocessingGenerator.__init__  s     ,$.ee5H5N5N5P5Peee$
 $
 $
 $
17799$
 $
 $
  !1$,&!
 !
 !
 !
-3355!
 !
 !
r   c           
   #   T   K   t           j        t          t          t	           j                   j        z                                }                      j         j         j        | j	                  \  }}}t          d          g g }g }fdt          |          D             }fdt          |          D             } j         j         j         j         j         j         j        d}	t%                      5 t          |          D ]t'          t)                                }
d d|
 d d                                        fd j                                        D             }|                    |           |         }|k    r||}nd }|||         |         d	|	}                     j        |d
          }|                                 |                    |           d}|sht          |          D ]T|                             d          st7          d          |                                          |         }t;          d |                                D                       rd
} nt%                      5  fd|                                D             }d |                                D             } j        D ]G}||                             d||         j         d                    !                    d          ||<   H	 d d d            n# 1 swxY w Y   |V  |         "                                 V|h|D ]}|#                                 	 d d d            d S # 1 swxY w Y   d S )Nspawnc                 8    g | ]}                                 S r   Eventr   _ctxs     r   r   z:NumpyMultiprocessingGenerator.__iter__.<locals>.<listcomp>5  s!    FFFaciikkFFFr   c                 8    g | ]}                                 S r   r   r   s     r   r   z:NumpyMultiprocessingGenerator.__iter__.<locals>.<listcomp>6  s!    GGGqsyy{{GGGr   )rS   r?   rT   rU   rV   r   r   dw_r   
   c           	      l    i | ]0\  }}|                      d | d|ft          j        d          1S )r   _shapeTrt   rd   r   r   r   r|   r   r[   r   shm_ctxworker_names      r   r@   z:NumpyMultiprocessingGenerator.__iter__.<locals>.<dictcomp>G  s_     ' ' '!T **k+G+GC+G+G+GPTw^`^fos*tt' ' 'r   )r   rR   extra_batcharray_ready_eventarray_loaded_eventT)targetkwargsdaemonF<   )timeoutzData loading worker timed out!c              3   F   K   | ]}t          j        |d k               V  dS )r   N)r   any)r   rt   s     r   	<genexpr>z9NumpyMultiprocessingGenerator.__iter__.<locals>.<genexpr>e  s0      PP26%!),,PPPPPPr   c           	      v    i | ]5\  }}|                               d | |j        |         d          6S )r   Fr   )r   rV   )r   r[   rt   batch_shm_ctxrC   namesr   s      r   r@   z:NumpyMultiprocessingGenerator.__iter__.<locals>.<dictcomp>s  sl     " " " !+U  !8!8#(8 3 3c 3 3&+&*&>s&C',	 "9 " "" " "r   c                 >    i | ]\  }}|t          j        |          S r   )r   copy)r   r[   arrs      r   r@   z:NumpyMultiprocessingGenerator.__iter__.<locals>.<dictcomp>~  s&    !S!S!SS#rws||!S!S!Sr   Ur:   )$minr   r   r   rL   rS   r   distribute_batchesr   r   r   rO   r?   rT   rU   rV   r   r   r   strr   rQ   r   Processworker_loopstartwaitTimeoutErrorclearr   rN   viewrt   squeezesetjoin)r   r   per_worker_batchesfinal_batchfinal_batch_workershape_arraysworkersarray_ready_eventsarray_loaded_events	base_argsworker_random_idworker_shape_arraysworker_indicesfinal_batch_argworker_kwargsworkerend_signal_receivedarray_shapesarrays
string_colr   r   rC   r   r   r   s   `                   @@@@@@r   __iter__z&NumpyMultiprocessingGenerator.__iter__*  s     $*CS5F5F5X0Y0Y,Z,Z[[>B>U>UL$/4+>T\?
 ?
;K); '""FFFF53E3EFFFGGGGE+4F4FGGG |"1/#3#'#; $ 5"1
 
	 !"" H	g;'' ' '#&uww<< :A::(8::3B3?[)))' ' ' ' '%)%:%@%@%B%B' ' '# ##$7888!3A!6***{/F&1OO&*O#.-#2);A)>*=a*@! !  ! D,<][_``v&&&&"') &1{++ %1 %1A-a055b5AA M*+KLLL&q)//111#/?LPP,:M:M:O:OPPPPP  /3+ -.. -" " " " " " " /;.@.@.B.B" " " "T!SFLLNN!S!S!S*.*=  J &z 2 7 78ZF:<N<TUW<X8Z8Z [ [ c cdf g g #:..              & !LLL'*..0000M * &1R "  OH	 H	 H	 H	 H	 H	 H	 H	 H	 H	 H	 H	 H	 H	 H	 H	 H	 H	s8   
F N*BM<NMNM?NN!$N!c                     | S r.   r   r   s    r   __call__z&NumpyMultiprocessingGenerator.__call__  r   r   c           
          	
 dt           j        d<   t          j        rdd l}nt          d          |j                            g d           
 	f
d}t                      5 	fd|                                D             |D ]} ||           | ||                                           D ]\  }}d|d d <   
	                                 d d d            d S # 1 swxY w Y   d S )	N3TF_CPP_MIN_LOG_LEVELr   r   GPUc           	      4  
 t          | 	
d          }i }t                      5 }                                D ]\  }}||         }|v r0|                    d                              |j        dz             }|j        |         d d <   |                     d| |j        |d          ||<   |||         d d <                                                                     	                                 d d d            d S # 1 swxY w Y   d S )NT)rR   rS   r?   rT   rU   rV   rW   r   )r:   r   r   )
r]   r   r   r   reshapert   r   r   r   r   )rR   r&   
out_arraysr   r[   r\   r"   r   r   rT   rU   r?   rV   rS   r  r   r   s          r   send_batch_to_parentzGNumpyMultiprocessingGenerator.worker_loop.<locals>.send_batch_to_parent  s    -% /$7   E J$&& +- (;'@'@'B'B / /OC!#JEn,, !&

4 0 0 8 8u9L M M+0;L%aaa(&3&=&=&....ek\` '> ' 'JsO */JsOAAA&&!%%'''"'')))"((***%+ + + + + + + + + + + + + + + + + +s   CDDDc           	      l    i | ]0\  }}|                      d | d|ft          j        d          1S )r   r   Fr   r   r   s      r   r@   z=NumpyMultiprocessingGenerator.worker_loop.<locals>.<dictcomp>  s^       C W&&+'C'C'C'C'CD7Z\Zbkp&qq  r   r:   )
osenvironr	   r   r   r   set_visible_devicesr   r   r   )rS   r?   rT   rU   rV   r   r   rR   r   r   r   r   r$   r  r&   r[   r"   r  r   s   ````` `  ```     @@r   r   z)NumpyMultiprocessingGenerator.worker_loop  s    .1
)* 	h#####fggg
	%%b%000	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+B !"" 	$g    !1!7!7!9!9  L
 ! , ,$$U++++&$$[111*0022  
Uaaa!!###	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$s   2A4C33C7:C7c                    t          j        t          |                     }|rt           j                            |           t          |          }|||z  z
  }t          j        ||g          \  }}|st          |          dk    rd }|                    d|          }t          |          }	|	|	|z  z
  }
t          j        ||
g          \  }}|                    d||          }t          j        ||j        d         d          }d |D             }t          t          |                    D ]=}t          j	        ||         ||                             dd          gd          ||<   >|t          |          }nd }|||fS )Nr   r:   r9   )axisc                 8    g | ]}t          j        |d           S r   )r   r   )r   r
  s     r   r   zDNumpyMultiprocessingGenerator.distribute_batches.<locals>.<listcomp>  s$    eeebj;;eeer   )
r   arangerL   rz   r   splitr  rt   rO   concatenate)rS   r   r   r   r   rR   num_samplesincomplete_batch_cutofflast_incomplete_batchnum_batchesfinal_batches_cutofffinal_batchesper_worker_indicesrC   incomplete_batch_worker_idxs                  r   r   z0NumpyMultiprocessingGenerator.distribute_batches  s   )CLL)) 	'Ig&&&'ll #.z1I"J)+'<S;T)U)U&& 	)S!6771<<$(!//"j11'll*kK.GH!#'4H3I!J!J//"k:>>Xgw}Q/?aHHHeeRdeees=))** 	u 	uA$&N4Fq4I=YZK[KcKcdegiKjKj3krs$t$t$tq!! ,*-m*<*<''*.'!#8:UUUr   N)	r   r   r   r   r  r  staticmethodr   r   r   r   r   r   r     s         
  
  
D_ _ _B   E$ E$ \E$N V V \V V Vr   r   c
                    t           j        rddl}
nt          d          t	          | |||||||||	
  
        }|
j        j                            ||          }|r t          t          |           |z            }n,t          t          t          |           |z                      }|                    |
j        j                            |                    S )ao  Create a tf.data.Dataset from the underlying Dataset. This is a multi-process method - the single-process
    equivalent is dataset_to_tf.

    Args:
        dataset (`Dataset`): Dataset to wrap with tf.data.Dataset.
        cols_to_retain (`List[str]`): Dataset column(s) to load in the
            tf.data.Dataset. It is acceptable to include column names that are created by the `collate_fn` and
            that do not exist in the original dataset.
        collate_fn(`Callable`): A function or callable object (such as a `DataCollator`) that will collate
            lists of samples into a batch.
        collate_fn_args (`Dict`): A  `dict` of keyword arguments to be passed to the
            `collate_fn`. Can be empty.
        columns_to_np_types (`Dict[str, np.dtype]`): A `dict` mapping column names to numpy dtypes.
        output_signature (`Dict[str, tf.TensorSpec]`): A `dict` mapping column names to
            `tf.TensorSpec` objects.
        shuffle(`bool`): Shuffle the dataset order when loading. Recommended True for training, False for
            validation/evaluation.
        batch_size (`int`, default `None`): Size of batches to load from the dataset. Defaults to `None`, which implies that
            the dataset won't be batched, but the returned dataset can be batched later with `tf_dataset.batch(batch_size)`.
        drop_remainder(`bool`, default `None`): Drop the last incomplete batch when loading. If not provided,
            defaults to the same setting as shuffle.
        num_workers (`int`): Number of workers to use for loading the dataset. Should be >= 1.

    Returns:
        `tf.data.Dataset`
    r   Nr   )
rS   r?   rT   rU   rV   r   r   r   r   r   )r   )r	   r   r   r   r   r   r   from_generatorr   rL   r   applyr   assert_cardinality)rS   r?   rT   rU   rV   r   r   r   r   r   r$   data_generatorr   dataset_lengths                 r   multiprocess_dataset_to_tfr6    s    L  dbccc2%'/)%  N //Qa/bbJ >S\\Z788T#g,,";<<==BG0CCNSSTTTr   )F)__doc__r  r   	functoolsr   mathr   uuidr   rE   r   pyarrowr/   multiprocessr   multiprocess.shared_memoryr   r    r	   r(   r,   r2   r]   r   r   r   r6  r   r   r   <module>r?     s      				                            $ $ $ $ $ $7777777   LLL        (  i i i ej- - - -`n) n) n)b       @mV mV mV mV mV mV mV mV`=U =U =U =U =Us   3 ==