
    Pi                         d dl Z d dlmZ d dlZd dlmZ d dlmZ 	 	 ddej        j	        de
d	e
d
ede
defdZdeej        j	        ef         defdZdS )    N)Union)LambdaLR)OptimizerInBackwardWrapper      ?	optimizernum_warmup_stepsnum_training_steps
num_cycles
last_epochreturnc                 V    dt           dt          ffd}t          | ||          S )a  
    Create a learning rate schedule that linearly increases the learning rate from
    0.0 to lr over ``num_warmup_steps``, then decreases to 0.0 on a cosine schedule over
    the remaining ``num_training_steps-num_warmup_steps`` (assuming ``num_cycles`` = 0.5).

    This is based on the Hugging Face implementation
    https://github.com/huggingface/transformers/blob/v4.23.1/src/transformers/optimization.py#L104.

    Args:
        optimizer (torch.optim.Optimizer): The optimizer for which to
            schedule the learning rate.
        num_warmup_steps (int): The number of steps for the warmup phase.
        num_training_steps (int): The total number of training steps.
        num_cycles (float): The number of waves in the cosine schedule. Defaults to 0.5
            (decrease from the max value to 0 following a half-cosine).
        last_epoch (int): The index of the last epoch when resuming training. Defaults to -1

    Returns:
        torch.optim.lr_scheduler.LambdaLR with the appropriate schedule.
    current_stepr   c                     | k     r| t          d          z  S | z
  t          dz
            z  }ddt          j        t          j        z  dz  |z            z   z  }t          d|          S )N   r   g      ?g       @g        )maxmathcospi)r   progresscosine_lr_multipler   r
   r	   s      t/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/torchtune/training/lr_schedulers.py	lr_lambdaz2get_cosine_schedule_with_warmup.<locals>.lr_lambda+   s    ***#a)9":"::: !#33s!$448
 8
 
 !$(47Z/#5@AAA
 3*+++    )intfloatr   )r   r	   r
   r   r   r   s    ```  r   get_cosine_schedule_with_warmupr      sO    8, , , , , , , , , , Iy*555r   c                    t          | t                    rMg }|                                                                 D ]#}|                    |d         d                    $n| j        }t          |          dk     rt          dt          |                     |d         d         }|D ]}|d         |k    rt          d          |S )aP  
    Full_finetune_distributed and full_finetune_single_device assume all optimizers have
    the same LR, here to validate whether all the LR are the same and return if True.

    Args:
        optimizer (Union[torch.optim.Optimizer, OptimizerInBackwardWrapper]): A general
            optimizer input that could whether be a general optimizer or an optimizer
            warpper based on optimizer_in_backward.

    Returns:
        lr (float): The learning rate of the input optimizers.

    Raises:
        RuntimeError: If the learning rates of the input optimizer are not the same.
    param_groupsr   r   z,Invalid optimizer param groups with len of: lrz4LR Schedulers are different across all param groups )
isinstancer   
state_dictvaluesappendr   lenRuntimeError)r   r   paramr    groups        r   get_lrr)   =   s    $ )788 .))++2244 	: 	:En 5a 89999	: !-
<1N3|;L;LNN
 
 	

 
a	B W W;"UVVV Ir   )r   r   )r   typingr   torchtorch.optim.lr_schedulerr   torchtune.training.memoryr   optim	Optimizerr   r   r   r)    r   r   <module>r1      s            - - - - - - @ @ @ @ @ @ +6 +6{$+6+6 +6 	+6
 +6 +6 +6 +6 +6\"U[*,FFG"
" " " " " "r   