
    &`i.                        d dl Z d dlZd dlZd dlZd dlmZ d dlmZ d dlm	Z	m
Z
mZmZ d dlZd dlmZmZmZmZ d dlmZ d dlmZ  ej        e          ZdZ	 dd	ed
eej        j                 dee         fdZ G d d          Z dS )    N)Counter)Path)CallableDictOptionalUnion)StorageContext_download_from_fs_path_list_at_fs_pathget_fs_and_path)Trial)out_of_band_serialize_dataseta  This could be due to a large number of trials, large logfiles from lots of reported metrics, or throttling from the remote storage if uploading too frequently.
You may want to consider switching the `RunConfig(storage_filesystem)` to a more performant storage backend such as s3fs for a S3 storage path.
You can suppress this error by setting the environment variable TUNE_WARN_SLOW_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a higher value than the current threshold ({threshold}).experiment_pathfsreturnc                     ddl m} t          | |          \  }}t          ||          }|j                            d          }t          j        ||          }|sdS t          |          }t          ||          
                                S )a  Returns file name of most recently created experiment checkpoint.

    Args:
        experiment_path: Local or remote path to the experiment directory
            containing at least one experiment checkpoint file.

    Returns:
        str: The local or remote path to the latest experiment checkpoint file
            based on timestamp. None if no experiment checkpoints were found.
    r   )TuneController)storage_filesystemr   fs_path*N)"ray.tune.execution.tune_controllerr   r   r   CKPT_FILE_TMPLformatfnmatchfiltermaxr   as_posix)r   r   r   experiment_fs_path	filenamespatternmatchingfilenames           w/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/tune/execution/experiment_state.py"_find_newest_experiment_checkpointr%   $   s     BAAAAA,_QSTTTB B0BCCCI+22377G~i11H t8}}H"H--66888    c                       e Zd ZdZdddee         deeee	f         dee         fdZ
ed             Zd	efd
Z	 	 ddeg df         dedefdZddZdefdZdS )_ExperimentCheckpointManageraO  Helper class for managing experiment-level checkpoints.

    This class implements the ``checkpoint()`` method used to checkpoint
    experiment state. When called, this will serialize and write to disk
    the state of the trial runner, trial executor, and search algorithm, to
    a specified checkpoint file.

    The checkpoint period is automatically adjusted to
    ``max(10, time_per_checkpoint * 19)``. This means that at most 5% of the
    time (1/20) will be used for writing checkpoints, while 95% of the time
    (19/20) will be used to handle the rest of the training loop.
    N)sync_every_n_trial_checkpointsstoragecheckpoint_periodr)   c                   || _         t          d          | _        d | _        |dk    | _        | j        rd| _        nt          |          | _        || _        t                      | _        d| _	        t          t          j                            dd                    | _        t          t          j                            dd                    | _        d S )	Nz-infauto      $@F:TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S55TUNE_WARN_SLOW_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S30)_storagefloat_last_save_time_last_sync_time_auto_checkpoint_enabled_checkpoint_period_sync_every_n_trial_checkpointsr   &_trial_num_checkpoints_since_last_sync_should_force_sync_uposenvironget_excessive_sync_threshold_slow_sync_threshold)selfr*   r+   r)   s       r$   __init__z%_ExperimentCheckpointManager.__init__K   s      $V}}# ):V(C%( 	?&*D##&+,=&>&>D# 0N,HO		3+0").JNNLc *
 *
&
 %*JNNG %
 %
!!!r&   c                     | j         S N)r7   rA   s    r$   auto_checkpoint_enabledz4_ExperimentCheckpointManager.auto_checkpoint_enabledq   s    ,,r&   
time_takenc                     | j         rBt          d|dz            | _        t                              d|dd| j        dd           d S d S )Nr.      z#Experiment state snapshotting took .2fz+ seconds. Adjusting snapshotting period to z	 seconds.)r7   r   r8   loggerdebug)rA   rG   s     r$   _update_auto_checkpoint_timez9_ExperimentCheckpointManager._update_auto_checkpoint_timeu   s~    ( 		 '*$
R&@&@D#LL:-: : *9: : :    			 		r&   Fsave_fnforcewaitc                      j         j        }|p j        }t          j                    }| j        z
   j        k     r|sdS t          j                    }t                      5   |             ddd           n# 1 swxY w Y    fd}|r}t          j                    } |             t          j                    |z
  }	|	 j        k    r?t          
                    d|	ddt                               j                               j        t          j                     j        z
  nd}
 j         j                            | j         j                  }|rx|
=|
 j        k     r2 j        r+t          
                    d j         d j         d	           t          j                     _         j                                         d
 _        nL|
J|
 j        k    r?t          
                    d|
ddt                               j                              |r
 |             t          j                    |z
  }                     |           t          j                     _        dS )a  Saves execution state to the experiment directory on the storage path.
        This includes an experiment checkpoint file that contains trial statuses
        and the searcher state.

        Overwrites the current session checkpoint, which starts when self
        is instantiated. Throttle depends on self._checkpoint_period.

        Args:
            save_fn: Function to call to actually save data to the driver
                staging path. The files in the driver staging path will be
                uploaded to the storage path.
            force: Forces an experiment checkpoint and launches a sync to storage.
                This happens regardless of checkpoint_period
            wait: Waits for the sync up to complete before returning.
        Nc                      	  j         j                                         d S # t          $ r. t                              d j         j         dd           Y d S w xY w)Nz'Saving experiment state to storage at 'z' failed with exception: T)exc_info)r3   syncerrP   	ExceptionrK   errorr   rE   s   r$   wait_for_synczL_ExperimentCheckpointManager.sync_up_experiment_state.<locals>.wait_for_sync   s    $))+++++   T8T T T!       s   # 4AAzvSaving the experiment state (which holds a global view of trial statuses and is used to restore the experiment) took ~rJ   z1 seconds, which may be a performance bottleneck.
)	thresholdzLExperiment state snapshotting has been triggered multiple times in the last a   seconds and may become a bottleneck. A snapshot is forced if `CheckpointConfig(num_to_keep)` is set, and a trial has checkpointed >= `num_to_keep` times since the last snapshot.
You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.
You can suppress this warning by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (z3). Set it to 0 to completely suppress this warning.FzSaving the experiment state (which holds a global view of trial statuses and is used to restore the experiment) has already taken zh seconds, which may cause consistency issues upon restoration if your driver script ungracefully exits.
)rG   )r3   experiment_driver_staging_pathr;   time	monotonicr5   r8   r   r@   rK   warning_SLOW_SYNC_WARNINGr   r6   rT   sync_upr   r?   r:   clearrM   )rA   rN   rO   rP   driver_staging_pathnowcheckpoint_time_startrW   
start_time	wait_timetime_since_last_synclaunched_synccheckpoint_time_takens   `            r$   sync_up_experiment_statez5_ExperimentCheckpointManager.sync_up_experiment_state   s3   * #mJ33n%%(????F !% 0 0 +,, 	 	GIII	 	 	 	 	 	 	 	 	 	 	 	 	 	 		 	 	 	 	  	))JMOOO((:5I4444X&QX X *004;T0UU	X X   #/ Nt333 	
 ,44!A
 
  (	$0(4+III. J G)-)GG G 6G G G  " $(>#3#3D  7==???).D&& %0(4+DDDX)=MX X
 *004;T0UUX X    	MOOO $ 0 03H H 	))5J)KKK  $~//s   "A99A= A=r   c           
         | j         j        }t          || j         j                  }d |D             }|D ]l}t	          | j         j        |                                          }t	          | j         j        |                                          }t          |||           mt          	                    d| d| j         j        j
         d| j         j         d| j         j                    d S )Nr   c                 f    g | ].}|                     d           s|                     d          ,|/S )z.jsonz.pkl)endswith).0paths     r$   
<listcomp>zK_ExperimentCheckpointManager.sync_down_experiment_state.<locals>.<listcomp>  sP     
 
 
}}W%%
 *.v)>)>

 
 
r&   )r   r   
local_pathzCopied z from:
(fs, path) = (z, z)
-> )r3   r   r   r   r   r   rY   r
   rK   rL   	type_name)rA   r   	filepathsmatchesrelpathr   ro   s          r$   sync_down_experiment_statez7_ExperimentCheckpointManager.sync_down_experiment_state   s(   ]-$DM4TUUU	
 
!
 
 

  	R 	RG4=;WEENNPPG<g hjj  #b'jQQQQQAg A A0:A A}/A A ->A A	
 	
 	
 	
 	
r&   trialc                     | j         sd S | j        |xx         dz  cc<   | j        |         | j         k    r	d| _        d S d S )N   T)r9   r:   r;   )rA   ru   s     r$   on_trial_checkpointz0_ExperimentCheckpointManager.on_trial_checkpoint  sh    3 	F3E:::a?::: 7>34 4 *.D&&&4 4r&   )FF)r   N)__name__
__module____qualname____doc__r   r	   r   intr4   strrB   propertyrF   rM   r   boolrh   rt   r   rx    r&   r$   r(   r(   =   s(        $ 9=$
 $
 $
 .)$
 !eS1	$

 )1$
 $
 $
 $
L - - X-
u 
 
 
 
 	z0 z0"d(#z0 z0 	z0 z0 z0 z0x
 
 
 
0
. 
. 
. 
. 
. 
. 
.r&   r(   rD   )!r   loggingr<   rZ   collectionsr   pathlibr   typingr   r   r   r   
pyarrow.fspyarrowray.train._internal.storager	   r
   r   r   ray.tune.experiment.trialr   +ray.tune.impl.out_of_band_serialize_datasetr   	getLoggerry   rK   r]   r~   r   
FileSystemr%   r(   r   r&   r$   <module>r      s}     				              2 2 2 2 2 2 2 2 2 2 2 2                , + + + + + U U U U U U		8	$	$6  AE9 99&wz'<=9c]9 9 9 92b. b. b. b. b. b. b. b. b. b.r&   