
    Pi"                        d dl Z d dlZd dlmZ d dlmZ d dlmZ d dlmZ ddl	m
Z
mZmZmZ ej        j        Zej        j        Zej        j        Zd dlmZ  ed	          d
             Z ed	          d             Z G d de          Ze                    ej        j                  d             Ze                    ej        j                  d             Ze                    ej        j                  d             Ze                    ej        j                  d             Ze                    ej        j        ej        j        ej         j        ej         j        ej!        j        g          d             Ze                    ej"        j                  d             Ze                    ej#        j                  d             Z eeg           dS )    N)Tensor)add_safe_globals)return_and_correct_aliasing)TorchAOBaseTensor   )create_dynamic_mapdequant_with_qmapquantize_4bit_with_qmapscale_tensor)	lru_cache)maxsizec                  $    t          ddd          S )NT      )r        o/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/torchao/optim/subclass_4bit.pyget_qmap_signedr   "   s    dAq)))r   c                  f    t          j        dddd          dd                                          S )Nr   r      cpudevice)torchlinspacetolistr   r   r   get_qmap_unsignedr   '   s0    >!Q5111!""5<<>>>r   c            	           e Zd Zg dZededededefd            ZdedededefdZd Z	e
	 dd
            ZddZe
ddedefd            Zd Zd	S )OptimState4bit)codesscaleqmapr    r!   r"   signedc                 :    t          j        | ||j                  S )Nr   )r   _make_wrapper_subclassr   )clsr    r!   r"   r#   shapes         r   __new__zOptimState4bit.__new__/   s    ,S%MMMMr   c                 6   |j         t          j        u sJ |j        dk    sJ |j        dk    sJ |j         t          j        u sJ || _        || _        || _        || _        || _	        |
                                dz  |
                                z  | _        dS )aA  Create quantized 4-bit optimizer state as proposed in https://arxiv.org/abs/2309.01507

        Args
            codes: quantized and packed 4-bit data stored as uint8.
            scale: scale data for block-wise quantization.
            qmap: lookup table that maps between quantized value (code) and float value.
            signed: whether the tensor is signed or unsigned.
            shape: shape of original float tensor.

        NOTE: To get block-wise scale, the original float tensor is first reshape to (-1, block_size).
        Thus, the last dimension of the original float tensor is not necessarily divisible by block size.
        Given `codes` and `scale`, `block_size` is calculated as `codes.numel() * 2 // scale.numel()`.
        The extra `* 2` is because `codes` is 4-bit data packed in 8-bit storage.
        r      N)dtyper   uint8ndimfloat32r    r!   r"   r#   _shapenumel
block_size)selfr    r!   r"   r#   r'   s         r   __init__zOptimState4bit.__init__3   s     {ek))))zQzQzU]****

	++--!+u{{}}<r   c                 ,    | j         | j        | j        gfS N)tensor_attrsr#   r/   r2   s    r   __tensor_flatten__z!OptimState4bit.__tensor_flatten__M   s     4;"<<<r   Nc                 :     | g fd| j         D             |R  S )Nc                      g | ]
}|         S r   r   ).0nametensor_data_dicts     r   
<listcomp>z7OptimState4bit.__tensor_unflatten__.<locals>.<listcomp>U   s    BBBt$BBBr   )r6   )r&   r=   tensor_attributes
outer_sizeouter_strides    `   r   __tensor_unflatten__z#OptimState4bit.__tensor_unflatten__P   sE     s 
BBBB1ABBB
EV
 
 
 	
r   c                     t          j        | j        dz	  | j        dz  gd          }t          || j        | j                  }||                    |          }|                    | j                  S )Nr      )dim)	r   stackr    r	   r"   r!   toviewr/   )r2   output_dtyper    
float_datas       r   
dequantizezOptimState4bit.dequantizeX   sh    TZ1_dj6.ABKKK&udiDD
##|44Jt{+++r   T   r1   c                 t   t          |t                    r|fn|}t          j        |          }t	          j        |dz  t          j        |          }t	          j        ||z  |          }|rt                      nt                      }t	          j	        |t          j
        |          }	 | |||	||          S )Nr*   )r+   r   r   )
isinstanceintmathprodr   zerosr,   r   r   tensorr.   )
r&   r'   r#   r1   r   n_elemsr    r!   	qmap_listr"   s
             r   rS   zOptimState4bit.zeros_   s    &uc22=)E""GqLFKKKGz1&AAA)/HO%%%5F5H5H	|IU]6JJJs5%vu555r   c                     | j         j         d| j         d| j         dt	          | j                   d| j         d| j         dS )Nz(signed=z, block_size=z, shape=z	, device=z, requires_grad=))	__class____name__r#   r1   tupler'   r   requires_gradr7   s    r   __repr__zOptimState4bit.__repr__j   s}    ~& d d d d$/ d d4:&&d d15d dNRN`d d d	
r   )NNr5   )TrM   N)rZ   
__module____qualname__r6   staticmethodr   boolr(   r3   r8   classmethodrB   rL   rP   rS   r]   r   r   r   r   r   ,   s       ---LNF N6 N N N N N \N=f =V =6 =4 = = = =4= = = PT
 
 
 [
, , , , 6 6$ 63 6 6 6 [6
 
 
 
 
r   r   c                    |d         }|d         }t          |t                    rt          |t                    rq|j        |j        k    r |j        |j        k    r|j        |j        k    sJ |j                            |j                   |j                            |j                   nt          |t                    rt          |	                    d          |j                  \  }}t          ||j                  }|j                            |d d d         dz  |dd d         z             |j                            |           n'|                    |                                           |S )Nr   r   rE   r*   r   )rO   r   r#   r1   r/   r    copy_r!   r   rI   r
   r"   rL   )	functypesargskwargsdstsrc
scaled_srcr!   r    s	            r   _rl   q   sM   
q'C
q'C#~&& $:c>+J+J $J#*$$#.00
cj((() 			"""		"""" 
C	(	( $("s~FF
E'
CH==	sssqE!$Q$K7888	 			#..""###Jr   c                 ^   |                     dd           }t          |d         j                            |          |d         j                            |          |d         j                            |          |d         j        |d         j                  }t          | |||          S )Nr   r   r   )	getr   r    rH   r!   r"   r#   r'   r   )re   rf   rg   rh   r   outs         r   rl   rl      s     ZZ$''F
Q''Q''Qv&&QQ C 'tT63???r   c                 *    d |D             } | |i |S )Nc                 d    g | ]-}t          |t                    r|                                n|.S r   )rO   r   rL   )r;   xs     r   r>   z_.<locals>.<listcomp>   s3    QQQqjN;;BALLNNNQQQr   r   re   rf   rg   rh   s       r   rl   rl      s*    QQDQQQD4    r   c                    |\  }}t          |j                  t          |          k    r,t          |j        |j        |j        |j        |j                  S t          |          dk    rF|d         dk    r:t          |j        |j        |j        |j        |	                                f          S t          |j        j         d          )Nr   r   rE   z4 only supports .view() with same shape or shape=[-1])r[   r'   r   r    r!   r"   r#   r/   lenr0   
ValueErrorrY   rZ   )re   rf   rg   rh   rr   r'   s         r   rl   rl      s    HAuQW~~u%%agqw!(KKK
5zzQ58r>>agqw17799,OOO
;UUU  r   c                    |d         }t          |t                    st          dt          |                      | |j        g|dd          R i |} | |j        g|dd          R i |}|j        d         |                                z  |j                                        z  f|j        dd          z   }t          |||j        	                                |j
        |          S )Nr   z%expecting a OptimState4bit but found r   )rO   r   rv   typer    r!   r/   r0   r"   cloner#   )re   rf   rg   rh   rr   r    r!   r'   s           r   rl   rl      s     	QAa(( LJaJJKKKD.48...v..ED.48...v..E Xa[5;;==(AGMMOO;=LE %%HHHr   c                     |d         j                                         o=|d         j                                        o|d         j                                        S )Nr   )r    	is_pinnedr!   r"   rs   s       r   rl   rl      sR     	Q!! 	%GM##%%	%GL""$$r   c                    |d d         \  }}}}t          |          dk    r|d         nd}|dk    rt          d          |dk    rt          d          |j        }	t          j        |j        dd                    }
||
z  |	z  dk    s||
z  |	z  dk    r!t          d|j         d|	 d| d	| d
	          |j        ||
z  dz  ||
z  dz           }|j        ||
z  |	z  ||
z  |	z           }|j        d         |                                z  |j                                        z  f|j        dd          z   }t          |||j
                                        |j        |          S )Nr   r   r   z+Only support aten.slice along the first dimz#Only support aten.slice with step=1zInvalid start or end for shape=z and block_size=zD. Make sure start and end align with block boundary. Received start=z, end=.r*   )ru   rv   r1   rQ   rR   r'   r    r!   r0   r   r"   ry   r#   )re   rf   rg   rh   rr   rF   startendstepr1   strider    r!   r'   s                 r   rl   rl      s   bqbAsE3$ii!mm477D axxFGGGqyy>???JYqwqrr{##F 	*$))cFlj-HA-M-M2ag 2 2z 2 2#2 2+.2 2 2
 
 	
 GEFNa'#,!*;;<EGEFNj03<:3MMNE WQZ%++--'17==??:<qwqrr{JE%%HHHr   )$rQ   r   r   torch.serializationr   torch.utils._python_dispatchr   torchao.utilsr   quant_utilsr   r	   r
   r   opsatenc10d_functional_c10d_functional	functoolsr   r   r   r   
implementsrd   defaultrl   _to_copylerpScalarrI   all_gather_into_tensorwait_tensordetachr{   slicer   r   r   <module>r      s           0 0 0 0 0 0 D D D D D D + + + + + +            y~)+9-         1* * * 1? ? ?B
 B
 B
 B
 B
& B
 B
 B
J 4:-..  /.4 4=011
@ 
@ 21
@ 49+,,! ! -,! 49,--  .-  	.6/7#+$,
 
I I
 
I$ 4>122  32 4:,--I I .-IB  .! " " " " "r   