
    Pi                        d dl Z d dlZd dlmZ d dlmZ d dlmZ d dlmZ ddl	m
Z
mZmZmZ ej        j        Zej        j        Zej        j        Zd dlmZ  ed	          d
             Z ed	          d             Z G d de          Ze                    ej        j                  d             Ze                    ej        j                  d             Ze                    ej        j                  d             Ze                    ej        j                  d             Ze                    ej        j        ej        j        ej         j        ej         j        ej!        j        g          d             Ze                    ej"        j                  d             Ze                    ej#        j                  d             Z eeg           dS )    N)Tensor)add_safe_globals)return_and_correct_aliasing)TorchAOBaseTensor   )create_dynamic_mapdequant_with_qmapquantize_8bit_with_qmapscale_tensor)	lru_cache)maxsizec                  "    t          d          S )NTsignedr        o/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/torchao/optim/subclass_8bit.pyget_qmap_signedr      s    T****r   c                  "    t          d          S )NFr   r   r   r   r   get_qmap_unsignedr   #   s    U++++r   c            	           e Zd Zg dZededededefd            ZdedededefdZd Z	e
	 dd
            ZddZe
ddedefd            Zd Zd	S )OptimState8bit)codesscaleqmapr   r   r   r   c                 D    t          j        | |j        |j                  S )Ndevice)r   _make_wrapper_subclassshaper   )clsr   r   r   r   s        r   __new__zOptimState8bit.__new__+   s    ,S%+elSSSSr   c                    |j         t          j        u sJ |j        dk    sJ |j         t          j        u sJ || _        || _        || _        || _        |	                                |	                                z  | _
        dS )a  Create quantized 8-bit optimizer state as proposed in https://arxiv.org/abs/2110.02861

        Args
            codes: quantized 8-bit data stored as uint8. Has the same shape as the original float tensor.
            scale: scale data for block-wise quantization.
            qmap: lookup table that maps between quantized value (code) and float value.
            signed: whether the tensor is signed or unsigned.

        NOTE: To get block-wise scale, the original float tensor is first reshape to (-1, block_size).
        Thus, the last dimension of the original float tensor is not necessarily divisible by block size.
        Given `codes` and `scale`, `block_size` is calculated as `codes.numel() // scale.numel()`.
        r   N)dtypetorchuint8ndimfloat32r   r   r   r   numel
block_size)selfr   r   r   r   s        r   __init__zOptimState8bit.__init__/   sy     {ek))))zQzU]****

	++--5;;==8r   c                      | j         | j        gfS N)tensor_attrsr   r,   s    r   __tensor_flatten__z!OptimState8bit.__tensor_flatten__E   s     4;-//r   Nc                 :     | g fd| j         D             |R  S )Nc                      g | ]
}|         S r   r   ).0nametensor_data_dicts     r   
<listcomp>z7OptimState8bit.__tensor_unflatten__.<locals>.<listcomp>M   s    BBBt$BBBr   )r0   )r"   r7   tensor_attributes
outer_sizeouter_strides    `   r   __tensor_unflatten__z#OptimState8bit.__tensor_unflatten__H   sE     s 
BBBB1ABBB
EV
 
 
 	
r   c                 t    t          | j        | j        | j                  }||                    |          }|S r/   )r	   r   r   r   to)r,   output_dtype
float_datas      r   
dequantizezOptimState8bit.dequantizeP   s6    &tz49djII
##|44Jr   T   r+   c                 4   t          j        |t           j        |          }t          j        |                                |z  |          }|rt	                      nt                      }t          j        |t           j        |          } | ||||          S )N)r%   r   r   )r&   zerosr'   r*   r   r   tensorr)   )	r"   r!   r   r+   r   r   r   	qmap_listr   s	            r   rD   zOptimState8bit.zerosV   s    EVDDDEKKMMZ7GGG)/HO%%%5F5H5H	|IU]6JJJs5%v...r   c                     | j         j         d| j         d| j         dt	          | j                   d| j         d| j         dS )Nz(signed=z, block_size=z, shape=z	, device=z, requires_grad=))	__class____name__r   r+   tupler!   r   requires_gradr1   s    r   __repr__zOptimState8bit.__repr__^   s}    ~& d d d d$/ d d4:&&d d15d dNRN`d d d	
r   )NNr/   )TrB   N)rJ   
__module____qualname__r0   staticmethodr   boolr#   r-   r2   classmethodr<   rA   intrD   rM   r   r   r   r   r   (   s       ---LTF T6 T T T T T \T9f 9V 96 94 9 9 9 9,0 0 0 PT
 
 
 [
    / /$ /3 / / / [/
 
 
 
 
r   r   c                 x   |d         }|d         }t          |t                    rvt          |t                    ra|j        |j        k    r|j        |j        k    sJ |j                            |j                   |j                            |j                   nt          |t                    rbt          ||j                  \  }}t          ||j	                  }|j                            |           |j                            |           n'|                    |
                                           |S )Nr   r   )
isinstancer   r   r+   r   copy_r   r   r
   r   rA   )	functypesargskwargsdstsrc
scaled_srcr   r   s	            r   _r^   e   s   
q'C
q'C#~&& $:c>+J+J $zSZ''CNcn,L,L,LL		"""		"""" 
C	(	( $(cn==
E'
CH==		 			#..""###Jr   c                 F   |                     dd           }t          |d         j                            |          |d         j                            |          |d         j                            |          |d         j                  }t          | |||          S )Nr   r   r   )getr   r   r>   r   r   r   r   )rW   rX   rY   rZ   r   outs         r   r^   r^   |   s     ZZ$''F
Q''Q''Qv&&Q	 C 'tT63???r   c                 *    d |D             } | |i |S )Nc                 d    g | ]-}t          |t                    r|                                n|.S r   )rU   r   rA   )r5   xs     r   r8   z_.<locals>.<listcomp>   s3    QQQqjN;;BALLNNNQQQr   r   rW   rX   rY   rZ   s       r   r^   r^      s*    QQDQQQD4    r   c                 ~    |\  }}t          |j                            |          |j        |j        |j                  S r/   )r   r   viewr   r   r   )rW   rX   rY   rZ   rd   r!   s         r   r^   r^      s3    HAu!',,u--qwIIIr   c           	      2   |d         }t          |t                    st          dt          |                     t           | |j        g|dd          R i | | |j        g|dd          R i ||j                                        |j                  S )Nr   z%expecting a OptimState8bit but found r   )	rU   r   
ValueErrortyper   r   r   cloner   )rW   rX   rY   rZ   rd   s        r   r^   r^      s     	QAa(( LJaJJKKK QW*tABBx***6**QW*tABBx***6**			  r   c                     |d         j                                         o=|d         j                                        o|d         j                                        S )Nr   )r   	is_pinnedr   r   re   s       r   r^   r^      sR     	Q!! 	%GM##%%	%GL""$$r   c                    |d d         \  }}}}t          |          dk    r|d         nd}|dk    rt          d          |dk    rt          d          |j        }	t          j        |j        dd                    }
||
z  |	z  dk    s||
z  |	z  dk    r!t          d|j         d|	 d| d	| d
	          t          |j        ||         |j        ||
z  |	z  ||
z  |	z           |j	        
                                |j                  S )N   r   r   z+Only support aten.slice along the first dimz#Only support aten.slice with step=1zInvalid start or end for shape=z and block_size=zD. Make sure start and end align with block boundary. Received start=z, end=.)lenri   r+   mathprodr!   r   r   r   r   rk   r   )rW   rX   rY   rZ   rd   dimstartendstepr+   strides              r   r^   r^      sS   bqbAsE3$ii!mm477D axxFGGGqyy>???JYqwqrr{##F 	*$))cFlj-HA-M-M2ag 2 2z 2 2#2 2+.2 2 2
 
 	
 	c		*,sV|z/IIJ			  r   )$rr   r&   r   torch.serializationr   torch.utils._python_dispatchr   torchao.utilsr   quant_utilsr   r	   r
   r   opsatenc10d_functional_c10d_functional	functoolsr   r   r   r   
implementsrV   defaultr^   _to_copylerpScalarrg   all_gather_into_tensorwait_tensordetachrm   slicer   r   r   <module>r      s           0 0 0 0 0 0 D D D D D D + + + + + +            y~)+9-         1+ + + 1, , ,:
 :
 :
 :
 :
& :
 :
 :
z 4:-..  /., 4=011	@ 	@ 21	@ 49+,,! ! -,! 49,--J J .-J
  	.6/7#+$,
 
 
 
" 4>122  32 4:,--  .-<  .! " " " " "r   