
    Pi2                        d dl mZ d dlZd dlmZ d dlmZ d dlmZ ddlm	Z	 ddl
mZ dd	lmZ dd
lmZ  G d de          Zdedededededee         dededededededefdZ G d de          Z G d de          Z G d d e          Z G d! d"e          Z G d# d$e          Z G d% d&e          Z G d' d(e          ZdS ))    )OptionalN)Tensor)DTensor)	Optimizer   )_fp32_to_bf16_sr)OptimState4bit)OptimState8bit)OptimStateFp8c                        e Zd Z	 	 d fdZdeddf fdZ fdZedede	d	e
fd
            Zdede	fdZ ej                    dd            Z xZS )	_AdamBasereturnNc                2   d|k    s"t          d                    |                    d|k    s"t          d                    |                    d|d         cxk    rdk     s*n t          d                    |d                             d|d         cxk    rdk     s*n t          d                    |d                             t          |||||	          }
t                                          ||
           || _        || _        |	| _        d S )
N        zInvalid learning rate: {}zInvalid epsilon value: {}r   g      ?z%Invalid beta parameter at index 0: {}r   z%Invalid beta parameter at index 1: {})lrbetasepsweight_decayamsgrad)
ValueErrorformatdictsuper__init__
block_sizebf16_stochastic_roundis_adamw)selfparamsr   r   r   r   r   r   r   r   defaults	__class__s              f/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/torchao/optim/adam.pyr   z_AdamBase.__init__   s4    byy8??CCDDDczz8??DDEEEeAh$$$$$$$$DKKERSHUUVVVeAh$$$$$$$$DKKERSHUUVVV%
 
 
 	***$%:"     param_groupc                     t                                          |           | j        d         }t          |d         t                    s+t          j        |d         t
          j                  |d<   d S d S )Nr   )dtype)r   add_param_groupparam_groups
isinstancer   torchtensorfloat32)r   r$   groupr!   s      r"   r(   z_AdamBase.add_param_group5   so    ,,, !"%%+v.. 	I,uT{%-HHHE$KKK	I 	Ir#   c                     t                                          |           | j        D ]}|                    dd           d S )Nr   F)r   __setstate__r)   
setdefault)r   stater.   r!   s      r"   r0   z_AdamBase.__setstate__=   sP    U###& 	/ 	/EY....	/ 	/r#   psignedr   c                     t           N)NotImplementedErrorr3   r4   r   s      r"   _subclass_zerosz_AdamBase._subclass_zerosC   s    !!r#   c           	         t          |t                    r|                                n|}|                                dk    r=|                                | j        z  dk    r|                     ||| j                  }nt          j        |          }t          |t                    r;t          j        ||j	        |j
        d|j        |                                          }|                    |j                  }|S )Ni   r   F)local_tensordevice_mesh
placements	run_checkshapestride)r*   r   to_localnumelr   r9   r+   
zeros_like
from_localr<   r=   r?   r@   todevice)r   r3   r4   local_pouts        r"   _new_bufferz_AdamBase._new_bufferG   s    ",Q"8"8?!**,,,a ==??d""w}}'HA'M'M&&wHHCC"7++C a!! 	$ M<gxxzz  C ffQX
r#   c                 T   d }|5t          j                    5   |            }d d d            n# 1 swxY w Y   t           j        j                                        5  | j        D ]}|d         D ]}|j        |j        }|j        rt          d          | j	        |         }t          |          dk    rjt          j        d          |d<   |                     |d          |d<   |                     |d          |d	<   |d
         r|                     |d          |d<   |dxx         dz  cc<   t          |d         t                    st          d           t          j        t           dd          |                                ||d         |d         |d	         |                    dd           |d         |d         d         |d         d         |d         |d         | j        | j        o|j        t           j        u            	 d d d            n# 1 swxY w Y   |S )Nr   z Sparse gradient is not supportedr   r   stepTexp_avgF
exp_avg_sqr   max_exp_avg_sqr   r   zulr was changed to a non-Tensor object. If you want to update lr, please use optim.param_groups[0]['lr'].fill_(new_lr))	fullgraphdynamicr   r   r   )r+   enable_grad_dynamoutilsdisable_cache_limitr)   grad	is_sparseRuntimeErrorr2   lenr,   rI   r*   r   compilesingle_param_adamdetachgetr   r   r'   bfloat16)r   closurelossr.   r3   rU   r2   s          r"   rK   z_AdamBase.stepe   s   "$$ ! !wyy! ! ! ! ! ! ! ! ! ! ! ! ! ! !
 ] 4466 .	 .	* - -x , ,Av~ 6D~ O*+MNNN JqME 5zzQ(-S(9(9f+/+;+;At+D+Di(.2.>.>q%.H.Hl+ + Q6:6F6Fq%6P6PE"23&MMMQ&MMM%eDk6:: *H   TEM"3tUSSS

fi(l+		"2D99dgq)gq)n-e2Pqw%.7P   =,-.	 .	 .	 .	 .	 .	 .	 .	 .	 .	 .	 .	 .	 .	 .	` s   /33F3HH!$H!r   Nr6   )__name__
__module____qualname__r   r   r(   r0   staticmethodr   boolintr9   rI   r+   no_gradrK   __classcell__r!   s   @r"   r   r      s       ! 
! ! ! ! ! !BI4 ID I I I I I I/ / / / / "6 "4 "S " " " \"V T    < U]__8 8 8 _8 8 8 8 8r#   r   r3   rU   rK   rL   rM   rN   r   beta1beta2r   r   IS_ADAMWBF16_STOCHASTIC_ROUNDc                    |                                  }|                                 }|r|||	z  |z  z
  }n||	|z  z   }d||z  z
  }d||z  z
  }|                                                     |d|z
            }|                                                     |                                d|z
            }|                    |           |                    |           |it	          j        |                                 |          }|                    |           |                                |                                z  |
z   }n,|                                |                                z  |
z   }||||z  z  |z  z
  }|r$|                     t          |                     d S |                     |           d S )Nr   )floatlerpsquarecopy_r+   maximumsqrtr   )r3   rU   rK   rL   rM   rN   r   rj   rk   r   r   rl   rm   p_f32grad_f32bias_correction1bias_correction2exp_avg_f32exp_avg_sq_f32max_exp_avg_sq_f32denoms                        r"   rZ   rZ      s     GGIIEzz||H 3\)E11lU225$;5$; --//&&xU;;K%%'',,X__->->E	JJNMM+^$$$!"]>+?+?+A+A>RR/000#((**-=-B-B-D-DDK$$&&)9)>)>)@)@@CGB+(889EAAE 	 ''(((((	r#   c                   Z     e Zd Z	 	 	 	 	 dddd	 d fd
Zedededefd            Z xZ	S )Adam8bitMbP?g?g+?:0yE>r   F   r   r   r   Nc                    t                                          ||||||||d	  	         t          j                            d           d S )NFr   r   r   ztorchao.optim.Adam8bitr   r   r+   _C_log_api_usage_once
r   r   r   r   r   r   r   r   r   r!   s
            r"   r   zAdam8bit.__init__   `     	!"7 	 
	
 
	
 
	
 	$$%=>>>>>r#   r3   r4   r   c                 D    t          j        | j        ||| j                  S r6   r
   zerosr?   rF   r8   s      r"   r9   zAdam8bit._subclass_zeros       #AGVZJJJr#   r   r   r   r   Fr`   
ra   rb   rc   r   rd   r   re   rf   r9   rh   ri   s   @r"   r~   r~               ? #? ? 
? ? ? ? ? ?2 K6 K4 KS K K K \K K K K Kr#   r~   c                   Z     e Zd Z	 	 	 	 	 dddd	 d fd
Zedededefd            Z xZ	S )Adam4bitr   r   r   r   F   r   r   Nc                    t                                          ||||||||d	  	         t          j                            d           d S )NFr   ztorchao.optim.Adam4bitr   r   s
            r"   r   zAdam4bit.__init__   r   r#   r3   r4   r   c                 D    t          j        | j        ||| j                  S r6   r	   r   r?   rF   r8   s      r"   r9   zAdam4bit._subclass_zeros  r   r#   r   r`   r   ri   s   @r"   r   r      r   r#   r   c                   Z     e Zd Z	 	 	 	 	 dddd	 d fd
Zedededefd            Z xZ	S )AdamFp8r   r   r   r   Fr   r   r   Nc                    t                                          ||||||||d	  	         t          j                            d           d S )NFr   ztorchao.optim.AdamFp8r   r   s
            r"   r   zAdamFp8.__init__  s`     	!"7 	 
	
 
	
 
	
 	$$%<=====r#   r3   r4   r   c                 B    t          j        | j        || j                  S r6   r   r   r?   rF   r8   s      r"   r9   zAdamFp8._subclass_zeros,      "17JAAAr#   r   r`   r   ri   s   @r"   r   r     s         > #> > 
> > > > > >2 B6 B4 BS B B B \B B B B Br#   r   c                   Z     e Zd Z	 	 	 	 	 dddd	 d fd
Zedededefd            Z xZ	S )	AdamW8bitr   r   r   {Gz?Fr   r   r   Nc                    t                                          ||||||||d	  	         t          j                            d           d S )NTr   ztorchao.optim.AdamW8bitr   r   s
            r"   r   zAdamW8bit.__init__2  `     	!"7 	 
	
 
	
 
	
 	$$%>?????r#   r3   r4   r   c                 D    t          j        | j        ||| j                  S r6   r   r8   s      r"   r9   zAdamW8bit._subclass_zerosK  r   r#   r   r   r   r   Fr`   r   ri   s   @r"   r   r   1           @ #@ @ 
@ @ @ @ @ @2 K6 K4 KS K K K \K K K K Kr#   r   c                   Z     e Zd Z	 	 	 	 	 dddd	 d fd
Zedededefd            Z xZ	S )	AdamW4bitr   r   r   r   Fr   r   r   Nc                    t                                          ||||||||d	  	         t          j                            d           d S )NTr   ztorchao.optim.AdamW4bitr   r   s
            r"   r   zAdamW4bit.__init__Q  r   r#   r3   r4   r   c                 D    t          j        | j        ||| j                  S r6   r   r8   s      r"   r9   zAdamW4bit._subclass_zerosj  r   r#   r   r`   r   ri   s   @r"   r   r   P  r   r#   r   c                   Z     e Zd Z	 	 	 	 	 dddd	 d fd
Zedededefd            Z xZ	S )AdamWFp8r   r   r   r   Fr   r   r   Nc                    t                                          ||||||||d	  	         t          j                            d           d S )NTr   ztorchao.optim.AdamWFp8r   r   s
            r"   r   zAdamWFp8.__init__p  s`     	!"7 	 
	
 
	
 
	
 	$$%=>>>>>r#   r3   r4   r   c                 B    t          j        | j        || j                  S r6   r   r8   s      r"   r9   zAdamWFp8._subclass_zeros  r   r#   r   r`   r   ri   s   @r"   r   r   o  s         ? #? ? 
? ? ? ? ? ?2 B6 B4 BS B B B \B B B B Br#   r   c                   4     e Zd Z	 	 	 	 	 d
dd	 d fd	Z xZS )_AdamWr   r   r   r   F)r   r   Nc                v    t                                          ||||||t          d          |d	  	         dS )zAdamW optimizer that supports quantized training (parameter is quantized). This optimizer should
        only be used with torchao's quantized training.infTr   N)r   r   ro   )	r   r   r   r   r   r   r   r   r!   s	           r"   r   z_AdamW.__init__  sQ     	U||"7 	 
	
 
	
 
	
 
	
 
	
r#   r   r`   )ra   rb   rc   r   rh   ri   s   @r"   r   r     sg         
 $
 
 

 
 
 
 
 
 
 
 
 
r#   r   )typingr   r+   r   torch.distributed._tensorr   torch.optimr   quant_utilsr   subclass_4bitr	   subclass_8bitr
   subclass_fp8r   r   ro   re   rZ   r~   r   r   r   r   r   r    r#   r"   <module>r      s                - - - - - - ! ! ! ! ! ! ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ' ' ' ' ' 'K K K K K	 K K K`..
. . 	.
 . V$. 	. . . . 
. .  . . . .bK K K K Ky K K K>K K K K Ky K K K>B B B B Bi B B B>K K K K K	 K K K>K K K K K	 K K K>B B B B By B B B>
 
 
 
 
Y 
 
 
 
 
r#   