
    &`iw)                        d dl Z d dlZd dlZd dlZd dlmZ d dlmZmZ d dl	Z	d dl
mZ d dlZd dlZd dlmZmZ d dlmZ d dlmZ d Z e            Z eej                   ed	          k    Z eej                   ed
          k     Z ee	j                   ed          k    Zeoe	j                                        Z	 d dlmZ n# e$ r	 d dl mZ Y nw xY werej!        j"        Z"nej!        j#        Z"er
d dl$m%Z%m&Z&m'Z'  e j(        e)          Z*dZ+ ed           G d dej!        j,                              Z- ed           G d de"                      Z. ed           G d dej!        j/                              Z0 ed           G d de                      Z1 ed          dej2        dej2        fd            Z3 ed           G d dej4        j5                              Z6dS )    N)Path)AnyDict)Version)TagKeyrecord_extra_usage_tag)
Checkpoint)	PublicAPIc                  >    	 dd l m}  n# t          $ r dd l} Y nw xY w| S )Nr   )lightning.pytorchpytorchModuleNotFoundErrorpytorch_lightning)pls    x/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/ray/train/lightning/_lightning_utils.pyimport_lightningr      sN    '&&&&&&& ' ' '&&&&&&'Is   	 z2.0.0z2.1.0z1.12.0)LightningEnvironment)FullStateDictConfigFullyShardedDataParallelStateDictType
_report_onbeta)	stabilityc                   t     e Zd ZdZ fdZedej        fd            Zede	e
ef         fd            Z xZS )RayDDPStrategya  Subclass of DDPStrategy to ensure compatibility with Ray orchestration.

    For a full list of initialization arguments, please refer to:
    https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.strategies.DDPStrategy.html

    Note that `process_group_backend`, `timeout`, and `start_method` are disabled here,
    please specify these arguments in :class:`~ray.train.torch.TorchConfig` instead.
    c                 n     t                      j        |i | t          t          j        d           d S N1)super__init__r   r   TRAIN_LIGHTNING_RAYDDPSTRATEGYselfargskwargs	__class__s      r   r    zRayDDPStrategy.__init__C   s8    $)&)))vDcJJJJJ    returnc                 H    t           j        j                                        S Nraytraintorch
get_devicer#   s    r   root_devicezRayDDPStrategy.root_deviceG       y))+++r'   c                 8    t          | j        | j                  S N)num_replicasrankdict
world_sizeglobal_rankr0   s    r   distributed_sampler_kwargsz)RayDDPStrategy.distributed_sampler_kwargsK   $    !
 
 
 	
r'   __name__
__module____qualname____doc__r    propertyr.   devicer1   r   strr   r;   __classcell__r&   s   @r   r   r   8   s         K K K K K ,U\ , , , X, 
DcN 
 
 
 X
 
 
 
 
r'   r   c                        e Zd ZdZ fdZedej        fd            Zede	e
ef         fd            Zde	e
ef         f fdZ xZS )RayFSDPStrategya  Subclass of FSDPStrategy to ensure compatibility with Ray orchestration.

    For a full list of initialization arguments, please refer to:
    https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.strategies.FSDPStrategy.html

    .. note::
        It is recommended to upgrade `lightning>=2.1` or above when using FSDP
        with Lightning, since Lightning starts to natively support `state_dict_type`,
        `sharding_strategy`, `auto_wrap_policy` and other FSDP configurations from 2.1.
    c                 n     t                      j        |i | t          t          j        d           d S r   )r   r    r   r   TRAIN_LIGHTNING_RAYFSDPSTRATEGYr"   s      r   r    zRayFSDPStrategy.__init__`   s8    $)&)))vEsKKKKKr'   r(   c                 H    t           j        j                                        S r*   r+   r0   s    r   r1   zRayFSDPStrategy.root_deviced   r2   r'   c                 8    t          | j        | j                  S r4   r7   r0   s    r   r;   z*RayFSDPStrategy.distributed_sampler_kwargsh   r<   r'   c                    | j         
J d            t          rt          rt          rt	          j        | j         t          j        t          dd                    5  | j         	                                }i }t          d          }|                                D ]/\  }}|                    d          r||d         }|||<   *|||<   0|cddd           S # 1 swxY w Y   dS t                                                      S )a  Gathers the full state dict to rank 0 on CPU.

        FSDP checkpointing is broken in Lightning 2.0.x. This subclass patches the
        behavior to perform a full state dict checkpointing, gathering the checkpoint
        shards on rank 0 CPU. Upgrade to `lightning>=2.1` to do sharded state dict
        checkpointing.

        See the note in the class docstring for more details.
        Nz.Failed to get the state dict for a None model!T)offload_to_cpu
rank0_only)modulestate_dict_typestate_dict_configz_forward_module.)model_TORCH_FSDP_AVAILABLE_LIGHTNING_GREATER_EQUAL_2_0_LIGHTNING_LESS_THAN_2_1r   rQ   r   FULL_STATE_DICTr   
state_dictlenitems
startswithr   lightning_module_state_dict)r#   rX   ckpt_state_dict
prefix_lenkvnon_prefixed_keyr&   s          r   r\   z+RayFSDPStrategy.lightning_module_state_dicto   sx    z%%'W%%% "	9,	9 )	9
 *9z - ="5#'D# # #   ' ' "Z2244
"$ !344
&,,.. / /DAq||$677 /+,Z[[>(<=(899-.**&#' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' '( 7766888s   A0CC!C)r>   r?   r@   rA   r    rB   r.   rC   r1   r   rD   r   r;   r\   rE   rF   s   @r   rH   rH   S   s        	 	L L L L L ,U\ , , , X, 
DcN 
 
 
 X
&9T#s(^ &9 &9 &9 &9 &9 &9 &9 &9 &9 &9r'   rH   c                   t     e Zd ZdZ fdZedej        fd            Zede	e
ef         fd            Z xZS )RayDeepSpeedStrategyzSubclass of DeepSpeedStrategy to ensure compatibility with Ray orchestration.

    For a full list of initialization arguments, please refer to:
    https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.strategies.DeepSpeedStrategy.html
    c                 n     t                      j        |i | t          t          j        d           d S r   )r   r    r   r   $TRAIN_LIGHTNING_RAYDEEPSPEEDSTRATEGYr"   s      r   r    zRayDeepSpeedStrategy.__init__   s8    $)&)))vJCPPPPPr'   r(   c                 H    t           j        j                                        S r*   r+   r0   s    r   r1   z RayDeepSpeedStrategy.root_device   r2   r'   c                 8    t          | j        | j                  S r4   r7   r0   s    r   r;   z/RayDeepSpeedStrategy.distributed_sampler_kwargs   r<   r'   r=   rF   s   @r   rc   rc      s         Q Q Q Q Q ,U\ , , , X, 
DcN 
 
 
 X
 
 
 
 
r'   rc   c                   x     e Zd ZdZ fdZdefdZdefdZdefdZdefdZ	dedd	fd
Z
dedd	fdZd Z xZS )RayLightningEnvironmentz9Setup Lightning DDP training environment for Ray cluster.c                 n     t                      j        |i | t          t          j        d           d S r   )r   r    r   r   'TRAIN_LIGHTNING_RAYLIGHTNINGENVIRONMENTr"   s      r   r    z RayLightningEnvironment.__init__   s8    $)&)))vMsSSSSSr'   r(   c                 b    t           j                                                                        S r*   )r,   r-   get_contextget_world_sizer0   s    r   r9   z"RayLightningEnvironment.world_size   "    y$$&&55777r'   c                 b    t           j                                                                        S r*   )r,   r-   rm   get_world_rankr0   s    r   r:   z#RayLightningEnvironment.global_rank   ro   r'   c                 b    t           j                                                                        S r*   )r,   r-   rm   get_local_rankr0   s    r   
local_rankz"RayLightningEnvironment.local_rank   ro   r'   c                 b    t           j                                                                        S r*   )r,   r-   rm   get_node_rankr0   s    r   	node_rankz!RayLightningEnvironment.node_rank   s"    y$$&&44666r'   sizeNc                     d S r*    )r#   rx   s     r   set_world_sizez&RayLightningEnvironment.set_world_size       r'   r6   c                     d S r*   rz   )r#   r6   s     r   set_global_rankz'RayLightningEnvironment.set_global_rank   r|   r'   c                     d S r*   rz   r0   s    r   teardownz RayLightningEnvironment.teardown   s    r'   )r>   r?   r@   rA   r    intr9   r:   rt   rw   r{   r~   r   rE   rF   s   @r   ri   ri      s       CCT T T T T8C 8 8 8 88S 8 8 8 88C 8 8 8 873 7 7 7 73 4    C D          r'   ri   trainerr(   c                     t           t          t          g}t           fd|D                       s2t	          dt           j                   dd |D              d          t           j        dd          }|r5t          |t                    s t	          dt          |           d	          t          t          j        d
            S )z@Prepare the PyTorch Lightning Trainer for distributed execution.c              3   B   K   | ]}t          j        |          V  d S r*   )
isinstancestrategy).0clsr   s     r   	<genexpr>z"prepare_trainer.<locals>.<genexpr>   s0      QQSz'*C00QQQQQQr'   zInvalid strategy class: zJ. To use PyTorch Lightning with Ray, the strategy object should be one of c                     g | ]	}|j         
S rz   )r>   )r   r   s     r   
<listcomp>z#prepare_trainer.<locals>.<listcomp>   s    ======r'   z class or its subclass.cluster_environmentNzoInvalid cluster environment plugin. The expected class is`ray.train.lightning.RayLightningEnvironment` but got !r   )r   rH   rc   anyRuntimeErrortyper   getattrr   ri   r   r   TRAIN_LIGHTNING_PREPARE_TRAINER)r   valid_strategy_classr   s   `  r   prepare_trainerr      s   
 +O=QRQQQQ<PQQQQQ 
tG,<'='=  ==(<===  
 
 	
 "'"24I4PP 
:4$ $ 
 4/004 4 4
 
 	
 6A3GGGNr'   c                   0     e Zd ZdZdZd fdZddZ xZS )RayTrainReportCallbacku$  A simple callback that reports checkpoints to Ray on train epoch end.

    This callback is a subclass of `lightning.pytorch.callbacks.Callback
    <https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.Callback.html#lightning.pytorch.callbacks.Callback>`_.

    It fetches the latest `trainer.callback_metrics` and reports together with
    the checkpoint on each training epoch end.

    Checkpoints will be saved in the following structure::

        checkpoint_00000*/      Ray Train Checkpoint
        └─ checkpoint.ckpt      PyTorch Lightning Checkpoint

    For customized reporting and checkpointing logic, implement your own
    `lightning.pytorch.callbacks.Callback` following this user
    guide: :ref:`Saving and Loading Checkpoints <train-dl-saving-checkpoints>`.
    zcheckpoint.ckptr(   Nc                    t                                                       t          j                                                    }t          j                                                                        }t          j                                                                        | _	        t          t          j                    d| d|                                           | _        t          j                            | j                  r$| j	        dk    rt%          j        | j                   t)          t*          j        d           d S )Nzlightning_checkpoints-job_id=z-name=r   r   )r   r    r,   get_runtime_context
get_job_idr-   rm   get_experiment_namers   rt   r   tempfile
gettempdiras_posixtmpdir_prefixospathisdirshutilrmtreer   r   &TRAIN_LIGHTNING_RAYTRAINREPORTCALLBACK)r#   job_idexperiment_namer&   s      r   r    zRayTrainReportCallback.__init__  s   (**5577)//11EEGG)//11@@BB!!!KFKK/KK
 
 (** 	 7==+,, 	.A1E1EM$,---vLcRRRRRr'   c                 p   t          | j        t          |j                                                            }t          j        |d           |j        }d |                                D             }|j        |d<   |j	        |d<   t          || j
                                                  }|                    |d           t          j        |          }t          j                            ||           |j                                         | j        d	k    rt)          j        |           d S d S )
NT)exist_okc                 >    i | ]\  }}||                                 S rz   )item)r   r_   r`   s      r   
<dictcomp>z=RayTrainReportCallback.on_train_epoch_end.<locals>.<dictcomp>  s&    ;;;41a1affhh;;;r'   epochstepF)weights_only)metrics
checkpointr   )r   r   rD   current_epochr   r   makedirscallback_metricsrZ   global_stepCHECKPOINT_NAMEsave_checkpointr	   from_directoryr,   r-   reportr   barrierrt   r   r   )r#   r   	pl_moduletmpdirr   	ckpt_pathr   s          r   on_train_epoch_endz)RayTrainReportCallback.on_train_epoch_end  s(   d(#g.C*D*DEENNPP
FT**** *;;7==??;;; #0!- !566??AA		>>>  .v66
	Z@@@ 	  """?aM&!!!!!  r'   )r(   N)r>   r?   r@   rA   r   r    r   rE   rF   s   @r   r   r      sh         $ (OS S S S S S" " " " " " " "r'   r   )7loggingr   r   r   pathlibr   typingr   r   r.   packaging.versionr   r,   	ray.trainray._common.usage.usage_libr   r   r	   ray.utilr
   r   r   __version__rU   rV   _TORCH_GREATER_EQUAL_1_12distributedis_availablerT   &lightning.pytorch.plugins.environmentsr   r   &pytorch_lightning.plugins.environments
strategiesFSDPStrategyDDPFullyShardedStrategytorch.distributed.fsdpr   r   r   	getLoggerr>   loggerLIGHTNING_REPORT_STAGE_KEYDDPStrategyr   rH   DeepSpeedStrategyrc   ri   Trainerr   	callbacksCallbackr   rz   r'   r   <module>r      s    				                  % % % % % % 



     F F F F F F F F                     &wr~66''':J:JJ "72>22WWW5E5EE #GE$566''(:K:KK 1Ve6G6T6T6V6V LKKKKKKK L L LKKKKKKKKL   9=-LL=8L           
	8	$	$)  V
 
 
 
 
R]. 
 
 
4 VA9 A9 A9 A9 A9l A9 A9 A9H V
 
 
 
 
2=: 
 
 
. V    2   > VRZ BJ    : V=" =" =" =" ="R\2 =" =" =" =" ="s   C CC