
     `iU                     Z   d Z ddlZddlmZ ddlZddlZddlm	Z	 ddl
mZ ddlmZmZmZ  e            r
ddlZddlmZ  ej        e          Zd Z e            r e            rdd	lmZ ndd
lmZ  G d de          Z G d de          Zdad Zd Zd Zd Z d Z!d Z"ddZ#ddZ$dS )z
Integration with Deepspeed
    N)partialmethod   )dep_version_check)is_accelerate_availableis_torch_availablelogging)nnc                      t           j                            d          d u} | r-	 t          j        d          }dS # t          j        $ r Y dS w xY wd S )N	deepspeedTF)	importlibutil	find_specimportlib_metadatametadataPackageNotFoundError)package_exists_s     w/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/integrations/deepspeed.pyis_deepspeed_availabler   $   so    ^--k::$FN  	"+K88A4!6 	 	 	55		 s   ; AA)HfDeepSpeedConfig)objectc                   "     e Zd ZdZ fdZ xZS )r   aJ  
    This object contains a DeepSpeed configuration dictionary and can be quickly queried for things like zero stage.

    A `weakref` of this object is stored in the module's globals to be able to access the config from areas where
    things like the Trainer object is not available (e.g. `from_pretrained` and `_get_resized_embeddings`). Therefore
    it's important that this object remains alive while the program is still running.

    [`Trainer`] uses the `HfTrainerDeepSpeedConfig` subclass instead. That subclass has logic to sync the configuration
    with values of [`TrainingArguments`] by replacing special placeholder values: `"auto"`. Without this special logic
    the DeepSpeed configuration is not modified in any way.

    Args:
        config_file_or_dict (`Union[str, Dict]`): path to DeepSpeed config file or dict.

    c                     t          |            t          d           t          d           t                                          |           d S )N
accelerater   )set_hf_deepspeed_configr   super__init__selfconfig_file_or_dict	__class__s     r   r   zHfDeepSpeedConfig.__init__J   sL    %%%,'''+&&&,-----    )__name__
__module____qualname____doc__r   __classcell__r!   s   @r   r   r   9   sB          . . . . . . . . .r"   r   c                   ^     e Zd ZdZ fdZd Zd ZddZ eed	          Z	dd
Z
d Z xZS )HfTrainerDeepSpeedConfigz
    The `HfTrainerDeepSpeedConfig` object is meant to be created during `TrainingArguments` object creation and has the
    same lifespan as the latter.
    c                 f    t                                          |           d | _        g | _        d S N)r   r   _dtype
mismatchesr   s     r   r   z!HfTrainerDeepSpeedConfig.__init__X   s.    ,---r"   c                 <    | j         t          d          | j         S )Nz8trainer_config_process() wasn't called yet to tell dtype)r-   
ValueError)r   s    r   dtypezHfTrainerDeepSpeedConfig.dtype]   s!    ;WXXX{r"   c                 @    |                      |          }|dS |dk    S )NFauto)	get_value)r   ds_key_longvals      r   is_autoz HfTrainerDeepSpeedConfig.is_autob   s'    nn[));5&= r"   NTc           
         |                      |          \  }}|dS |                    |          dk    r|||<   dS |sdS |                    |          }|.||k    r*| j                            d| d| d| d|            dS dS dS )a  
        A utility method that massages the config file and can optionally verify that the values match.

        1. Replace "auto" values with `TrainingArguments` value.

        2. If it wasn't "auto" and `must_match` is true, then check that DS config matches Trainer
        config values and if mismatched add the entry to `self.mismatched` - will assert during
        `trainer_config_finalize` for one or more mismatches.

        Nr3   z- ds =z vs hf )find_config_nodegetr.   append)r   r5   hf_valhf_key
must_matchconfigds_keyds_vals           r   
fill_matchz#HfTrainerDeepSpeedConfig.fill_matchi   s     ..{;;>F::f''#F6NF 	FF##&F"2"2O""#Y;#Y#Y#Y#Y#Y#YQW#Y#YZZZZZ "2"2r"   F)r?   c                    |j         |j        z  |j        z  }|                     d|j        d|            |                     d|j        d           |                     d|d|            |                     d|j        d           |                     d|j        d	           |                     d
|j        |j        gd           |                     d|j        d           |                     d|j	        d           | 
                    dd           |                     d|j        d	           |j        s|j        r|j        dk    rdnd}nd}|j        r8| j                            di           | j        d<   |j        | j        d         d<   |                     d|j        s|j        o|dk    d           |                     d|dk    d           |                     d|j        d           |                     d|j        p|j        d           |                     d          rt*          j        | _        dS |                     d          rt*          j        | _        dS t*          j        | _        dS ) z
        Adjust the config with `TrainingArguments` values. This stage is run during `TrainingArguments` object
        creation.
        train_micro_batch_size_per_gpuper_device_train_batch_sizegradient_accumulation_stepstrain_batch_sizeztrain_batch_size (calculated)gradient_clippingmax_grad_normzoptimizer.params.lrlearning_ratezoptimizer.params.betaszadam_beta1+adam_beta2zoptimizer.params.epsadam_epsilonzoptimizer.params.weight_decayweight_decayzscheduler.params.warmup_min_lrr   zscheduler.params.warmup_max_lrapexampN
checkpointuse_node_local_storagezfp16.enabledz%fp16|fp16_full_eval+fp16_backend(amp)zamp.enabledzfp16+fp16_backend(apex)zamp.opt_levelfp16_opt_levelzbf16.enabledzbf16|bf16_full_eval)
world_sizerF   rG   rC   rJ   rK   
adam_beta1
adam_beta2rL   rM   	fill_onlyfp16fp16_full_evalfp16_backendsave_on_each_noder@   r;   rR   bf16bf16_full_evalis_truetorchbfloat16r-   is_falsefloat32float16)r   argsauto_find_batch_sizerH   rY   s        r   trainer_config_processz/HfTrainerDeepSpeedConfig.trainer_config_process   s     ?T-MMPTPpp,,)$$		
 	
 	
 	),)	
 	
 	

 	+$$		
 	
 	
 	+T-?QQQ-t/A?SSS$_do.#	
 	
 	

 	.0A>RRR79JN[[[7;;;8$:Lo^^^ 9 	 + 	 %)%6&%@%@66eLLL! 	Y(,b(I(IDK%BFBXDK%&>? 	i.4.ILE4I3	
 	
 	
 	|v'=?XYYY)<>NOOO)Id6ILabbb <<'' 	(.DKKK]]>** 	(-DKKK-DKKKr"   c                 J    g d} fd|D             }t          |          dk    rud}t          |d          rt          |j        d          r|j        j        }nt          |j        d          rt	          |j        j                  }nt          |j        d          r,t          |j        j        d          r|j        j        j        }nMt          |j        d          r8t          |j        j        d          rt	          |j        j        j                  }|t          d	| d
                               d||z              	                                rB                     dt          d|z  |z                                            dd|z                                  d|d                                d|                    |          d           t           j                  dk    r-d                     j                  }t          d| d          dS )z
        This stage is run after we have the model and know num_training_steps.

        Now we can complete the configuration process.
        )$zero_optimization.reduce_bucket_size-zero_optimization.stage3_prefetch_bucket_size4zero_optimization.stage3_param_persistence_thresholdc                 >    g | ]}                     |          |S  )r7   ).0xr   s     r   
<listcomp>zDHfTrainerDeepSpeedConfig.trainer_config_finalize.<locals>.<listcomp>   s)     V V VqdllSToo V V V Vr"   r   Nr@   hidden_sizehidden_sizestext_configzThe model's config file has neither `hidden_size` nor `hidden_sizes` entry, therefore it's not possible to automatically fill out the following `auto` entries in the DeepSpeed config file: zb. You can fix that by replacing `auto` values for these keys with an integer value of your choice.rg   rh   g?ri   
   z scheduler.params.total_num_stepsznum_training_steps (calculated)z!scheduler.params.warmup_num_stepswarmup_steps
z]Please correct the following DeepSpeed config values that mismatch TrainingArguments values:
zF
The easiest method is to set these DeepSpeed config values to 'auto'.)lenhasattrr@   ro   maxrp   rq   r0   rV   is_zero3intrC   get_warmup_stepsr.   join)r   rc   modelnum_training_stepshidden_size_based_keyshidden_size_auto_keysro   r.   s   `       r   trainer_config_finalizez0HfTrainerDeepSpeedConfig.trainer_config_finalize   s   "
 "
 "

 !W V V V,B V V V$%%))Kuh'' 
M5<77 	M"',":KKU\>:: M"%el&?"@"@KKU\=99 MgelF^`m>n>n M"',":"FKKU\=99 MgelF^`n>o>o M"%el&>&K"L"LK" Y5JY Y Y   NNA;Q\C\]]]}} 	Ck)K788   J$   	.-	
 	
 	

 	/!!"455	
 	
 	
 t!##4?33Jq'q q q   $#r"   )NTF)r#   r$   r%   r&   r   r1   r7   rC   r   rV   re   r   r'   r(   s   @r   r*   r*   R   s         
    
  
! ! ![ [ [ [4 jU;;;IH( H( H( H(TC C C C C C Cr"   r*   c                 .    t          j        |           ad S r,   )weakrefref_hf_deepspeed_config_weak_ref)hf_deepspeed_config_objs    r   r   r     s    
 %,K0G$H$H!!!r"   c                  
    d a d S r,   )r   rk   r"   r   unset_hf_deepspeed_configr   !  s     %)!!!r"   c                  p    t           .t                       t                                                      S dS )NF)r   rx   rk   r"   r   is_deepspeed_zero3_enabledr   '  s1    $05R5T5T5`,..77999ur"   c                  V    t           !t                      t                      j        S d S r,   )r   r@   rk   r"   r   deepspeed_configr   .  s(    $05R5T5T5`,..55tr"   c                     t          |dd          |                                }|_        g ddt          j        ffd | |d           S )	z
    Loads state dict into a model specifically for Zero3, since DeepSpeed does not support the `transformers`
    tensor parallelism API.

    Nearly identical code to PyTorch's `_load_from_state_dict`
    	_metadataN Fmodulec                   
 i n                     d d         i           }||d<   ||dg g f}t                      rt          fd|D                       dk    rdd l}t	          |                     d d         d                    

fd|D             }t          |          dk    r`|j                            |d	          5  t          j	        
                                dk    r
 | j        |  d d d            n# 1 swxY w Y   | j                                        D ]\  }}	|	 |	||z   d
z   |           d S )Nassign_to_params_buffersTc                 >    g | ]}|                               |S rk   )
startswith)rl   keyprefixs     r   rn   zC_load_state_dict_into_zero3_model.<locals>.load.<locals>.<listcomp>M  s+    0e0e0ecnn]cNdNd0e0e0e0er"   r   F)r   recursec                 (    g | ]}|v |         S rk   rk   )rl   knamed_parameterss     r   rn   zC_load_state_dict_into_zero3_model.<locals>.load.<locals>.<listcomp>S  s*    aaa1P`K`K` 0 3K`K`K`r"   )modifier_rank.)r;   r   ru   r   dictr   zeroGatheredParametersr^   distributedget_rank_load_from_state_dict_modulesitems)r   
state_dictr   r   local_metadatarc   r   params_to_gathernamechildr   
error_msgsloadr   s     `       @r   r   z/_load_state_dict_into_zero3_model.<locals>.loadF  s   '/X\\&"+r5R5R5M12FND"b*M &'' 	<C0e0e0e0e
0e0e0e,f,fij,j,j  $F$;$;6#2#;X]$;$^$^__aaaaZaaa#$$q(( ^667GWX6YY < <(1133q8844d;;< < < < < < < < < < < < < < < "?0022 	W 	WKD% UJ(;=UVVV	W 	Ws   -DDD)r   )r   F)getattrcopyr   r	   Module)model_to_loadr   r   r   r   s     @@@r   !_load_state_dict_into_zero3_modelr   5  s     z;55H""J'
JW WRY W W W W W W W W4 	D
UCCCCr"   c                     ddl m}m} |j        }d}d|v r'|j        dk    rt          d           ||          }nG|                                rt                              d            	                                }d	|d
<   d}	d|v r ||          }	n;t          ||          r fd}
 |||
          }	n                     |          }	||	fS )zY
    A convenience wrapper that deals with optimizer and lr scheduler configuration.
    r   )
DummyOptimDummySchedulerN	optimizer	adafactorz|--adafactor was passed, but also found `optimizer` configured in the DeepSpeed config. Only one optimizer can be configured.)paramszDetected ZeRO Offload and non-DeepSpeed optimizers: This combination should work as long as the custom optimizer has both CPU and GPU implementation (except LAMB)Tzero_allow_untested_optimizer	schedulerc                 l    t          j                   }d |_        |                    |           }|S )Nr}   r   )r   lr_schedulercreate_scheduler)r   trainer_copyr   r}   trainers      r   _lr_scheduler_callablez5deepspeed_optim_sched.<locals>._lr_scheduler_callable  sC    #y11 -1)+<<'9Y  =     $#r"   )lr_scheduler_callabler   )accelerate.utilsr   r   r@   optimr0   
is_offloadloggerinfocreate_optimizer
isinstancer   )r   hf_deepspeed_configrc   r}   model_parametersr   r   r@   r   r   r   s   `  `       r   deepspeed_optim_schedr   e  sQ    <;;;;;;; 'F If:$$8   J&6777		))++ 	KKV   ,,..	26./Lf%~i00i,, 	p	$ 	$ 	$ 	$ 	$ 	$ *>)KabbbLL"33GYen3ooLl""r"   Fc                    ddl m} | j        }| j        }| j        j        j        j        }|                    |||           |	                    |
                                           |rU|                                st          d          |                    d           |                    d           d\  }}d}	nd| _        |j                            di                               d	d
          }
|
d
k    r4ddl}|                    ||
|                                |j                  }t)          t+          d |                                                    }	t/          | ||||	          \  }}||fS )a  
    Init DeepSpeed, after updating the DeepSpeed configuration with any relevant Trainer's args.

    If `resume_from_checkpoint` was passed then an attempt to resume from a previously saved checkpoint will be made.

    Args:
        trainer: Trainer object
        num_training_steps: per single gpu
        resume_from_checkpoint: path to a checkpoint if to resume from after normal DeepSpeedEngine load
        inference: launch in inference mode (no optimizer and no lr scheduler)
        auto_find_batch_size: whether to ignore the `train_micro_batch_size_per_gpu` argument as it's being
            set automatically by the auto batch size finder

    Returns: optimizer, lr_scheduler

    We may use `deepspeed_init` more than once during the life of Trainer, when we do - it's a temp hack based on:
    https://github.com/deepspeedai/DeepSpeed/issues/1394#issuecomment-937405374 until Deepspeed fixes a bug where it
    can't resume from a checkpoint after it did some stepping https://github.com/deepspeedai/DeepSpeed/issues/1612

    r   )r   zMZeRO inference only makes sense with ZeRO Stage 3 - please adjust your configr   r   )NNNtensor_parallelautotp_size   )r|   tp_sizer1   r@   c                     | j         S r,   )requires_grad)ps    r   <lambda>z deepspeed_init.<locals>.<lambda>  s     r"   )deepspeed.utilsr   r|   rc   acceleratorstatedeepspeed_pluginhf_ds_configr   setLevelget_process_log_levelrx   r0   del_config_sub_treer   r@   r;   r   tp_model_initr1   listfilter
parametersr   )r   r}   	inference	ds_loggerr|   rc   r   r   r   r   deepspeed_tp_sizer   s               r   deepspeed_initr     s   * 433333ME<D!-3DQ //e=OPPP t1133444 
"++-- 	nlmmm 	//<<<//???",	< /6::;LbQQUUVcefggq  ++))//11*1	 ,  E  '@'@%BRBRBTBT U UVV"7($0BDT#
 #
	< l""r"   Tc                 <   dd l }t          |                     | d                    }t          |          dk    rOt                              d|            |                     ||dd          \  }}|t          d|           d S t          d|           )Nr   z/global_step*zAttempting to resume from T)load_module_strictload_optimizer_statesload_lr_scheduler_statesz-[deepspeed] failed to resume from checkpoint z!Can't find a valid checkpoint at )globsortedru   r   r   load_checkpointr0   )deepspeed_enginecheckpoint_pathr   r   deepspeed_checkpoint_dirs	load_pathr   s          r   deepspeed_load_checkpointr     s    
 KKK &tyyO1R1R1R'S'S T T
$%%))BBBCCC'771"&%)	 8 
 
	1 ^_^^___  N_NNOOOr"   r   )T)%r&   r   importlib.metadatar   r   importlib.utilr   r   	functoolsr   dependency_versions_checkr   utilsr   r   r   r^   r	   
get_loggerr#   r   r   accelerate.utils.deepspeedr   DeepSpeedConfigbuiltinsr   r*   r   r   r   r   r   r   r   r   r   rk   r"   r   <module>r      s#     / / / / / /      # # # # # # 9 9 9 9 9 9 H H H H H H H H H H  LLL 
	H	%	%
 
 
  3!7!7!9!9 3OOOOOOO 322222. . . . . . . .2@ @ @ @ @0 @ @ @H !% I I I) ) )    - - -`:# :# :#z@# @# @# @#FP P P P P Pr"   