
    Pi                     j   d dl Z d dlZd dlmZ d dlmZ d dlmZ d dlmZ ej	        j
        Z
ej	        j        Zej	        j        Zej        ZdedefdZ G d	 d
e          Ze                    e
j        j                  d             Ze                    e
j        j                  d             Ze                    e
j        j                  d             Ze                    e
j        j                  d             Ze                    ej        j        ej        j        ej        j        ej        j        e
j        j        g          d             Ze                    e
j        j                  d             Ze                    e
j        j                  d             Z eeg           dS )    N)Tensor)add_safe_globals)return_and_correct_aliasing)TorchAOBaseTensorinput
block_sizec                    | j         }|                     d|          } |                                                     d                              d          t          j        t                    j        z  }| |                    dd          z  } | 	                    t                                        d          }|                    |          |fS )Ng-q=   )
shapeviewabsamaxcliptorchfinfoDTYPEmaxto)r   r   r   scalecodess        n/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/torchao/optim/subclass_fp8.pyquantize_fp8r      s    KEJJr:&&EIIKKR  %%e,,u{5/A/A/EEEEJJr1%%%EHHUOO  $$E::ee##    c                       e Zd ZddgZededefd            ZdedefdZd Ze		 dd            Z
ddZe	dd
efd            Zd ZdS )OptimStateFp8r   r   c                 D    t          j        | |j        |j                  S )Ndevice)r   _make_wrapper_subclassr   r   )clsr   r   s      r   __new__zOptimStateFp8.__new__%   s    ,S%+elSSSSr   c                     |j         t          u sJ |j        dk    sJ || _        || _        |                                |                                z  | _        dS )a  Create quantized FP8 optimizer state.

        Args
            codes: quantized FP8 E4M3FN data. Has the same shape as the original float tensor.
            scale: scale data for block-wise quantization.

        NOTE: To get block-wise scale, the original float tensor is first reshape to (-1, block_size).
        Thus, the last dimension of the original float tensor is not necessarily divisible by block size.
        Given `codes` and `scale`, `block_size` is calculated as `codes.numel() // scale.numel()`.
        r   N)dtyper   ndimr   r   numelr   )selfr   r   s      r   __init__zOptimStateFp8.__init__)   sT     {e####zQ

++--5;;==8r   c                     | j         g fS Ntensor_attrsr'   s    r   __tensor_flatten__z OptimStateFp8.__tensor_flatten__:   s     "$$r   Nc                 :     | g fd| j         D             |R  S )Nc                      g | ]
}|         S  r1   ).0nametensor_data_dicts     r   
<listcomp>z6OptimStateFp8.__tensor_unflatten__.<locals>.<listcomp>B   s    BBBt$BBBr   r+   )r!   r4   tensor_attributes
outer_sizeouter_strides    `   r   __tensor_unflatten__z"OptimStateFp8.__tensor_unflatten__=   sE     s 
BBBB1ABBB
EV
 
 
 	
r   c                    | j                                         }|                    d| j                  | j                            dd          z  }||                    |          }|                    | j         j                  S )Nr
   r   )r   floatr   r   r   r   r   )r'   output_dtype
float_datas      r   
dequantizezOptimStateFp8.dequantizeE   sn    Z%%''
__R99DJOOBPQ<R<RR
##|44Jtz/000r      r   c                     t          j        |t          |          }t          j        |                                |z  |          } | ||          S )N)r$   r   r   )r   zerosr   r&   )r!   r   r   r   r   r   s         r   rA   zOptimStateFp8.zerosM   sK    Ev>>>EKKMMZ7GGGs5%   r   c           
      z    | j         j         d| j         dt          | j                   d| j         d| j         d
S )Nz(block_size=z, shape=z	, device=z, requires_grad=))	__class____name__r   tupler   r   requires_gradr-   s    r   __repr__zOptimStateFp8.__repr__S   sm    ~& d dDO d d4:&&d d15d dNRN`d d d	
r   )NNr*   )r?   N)rE   
__module____qualname__r,   staticmethodr   r"   r(   r.   classmethodr9   r>   intrA   rH   r1   r   r   r   r   "   s        W%LTF T6 T T T \T9f 9V 9 9 9 9"% % % PT
 
 
 [
1 1 1 1 ! !c ! ! ! [!

 
 
 
 
r   r   c                 .   |d         }|d         }t          |t                    rft          |t                    rQ|j        |j        k    sJ |j                            |j                   |j                            |j                   nt          |t                    rMt          ||j                  \  }}|j                            |           |j                            |           n'|                    |                                           |S )Nr   r   )
isinstancer   r   r   copy_r   r   r>   )functypesargskwargsdstsrcr   r   s           r   _rW   Z   s    
q'C
q'C#}%% $*S-*H*H $~////		"""		""""	C	'	' $#C88u		 			#..""###Jr   c                     |                     dd           }t          |d         j                            |          |d         j                            |                    }t          | |||          S )Nr   r   r   )getr   r   r   r   r   )rQ   rR   rS   rT   r   outs         r   rW   rW   o   sp     ZZ$''F
Q''Q'' C 'tT63???r   c                 *    d |D             } | |i |S )Nc                 d    g | ]-}t          |t                    r|                                n|.S r1   )rO   r   r>   )r2   xs     r   r5   z_.<locals>.<listcomp>|   s3    PPPajM::AALLNNNPPPr   r1   rQ   rR   rS   rT   s       r   rW   rW   z   s*    PP4PPPD4    r   c                 f    |\  }}t          |j                            |          |j                  S r*   )r   r   r   r   )rQ   rR   rS   rT   r]   r   s         r   rW   rW      s+    HAue,,ag666r   c           	          |d         }t          |t                    st          dt          |                     t           | |j        g|dd          R i | | |j        g|dd          R i |          S )Nr   z$expecting a OptimStateFp8 but found r   )rO   r   
ValueErrortyper   r   )rQ   rR   rS   rT   r]   s        r   rW   rW      s     	QAa'' KIQIIJJJ QW*tABBx***6**QW*tABBx***6**  r   c                 ~    |d         j                                         o|d         j                                        S )Nr   )r   	is_pinnedr   r^   s       r   rW   rW      s1    7=""$$Ba)@)@)B)BBr   c                    |d d         \  }}}}t          |          dk    r|d         nd}|dk    rt          d          |dk    rt          d          |j        }	t          j        |j        dd                    }
||
z  |	z  dk    s||
z  |	z  dk    r!t          d|j         d|	 d| d	| d
	          t          |j        ||         |j        ||
z  |	z  ||
z  |	z                     S )N   r   r   z+Only support aten.slice along the first dimz#Only support aten.slice with step=1zInvalid start or end for shape=z and block_size=zD. Make sure start and end align with block boundary. Received start=z, end=.)	lenra   r   mathprodr   r   r   r   )rQ   rR   rS   rT   r]   dimstartendstepr   strides              r   rW   rW      sA   bqbAsE3$ii!mm477D axxFGGGqyy>???JYqwqrr{##F 	*$))cFlj-HA-M-M2ag 2 2z 2 2#2 2+.2 2 2
 
 	
 	c		*,sV|z/IIJ  r   )ri   r   r   torch.serializationr   torch.utils._python_dispatchr   torchao.utilsr   opsatenc10d_functional_c10d_functionalfloat8_e4m3fnr   rM   r   r   
implementsrP   defaultrW   _to_copylerpScalarr   all_gather_into_tensorwait_tensordetachrd   slicer1   r   r   <module>r      sq           0 0 0 0 0 0 D D D D D D + + + + + +y~)+9- $ $C $ $ $ $5
 5
 5
 5
 5
% 5
 5
 5
p $*,--  .-( $-/00@ @ 10@ $)*++! ! ,+! $)+,,7 7 -,7
  	.6/7#+$,
 
	 	
 
	 $.011C C 21C
 $*+,,  -,8  - ! ! ! ! !r   