
    Pio6                        d dl mZmZ d dlZd dlmc mZ d dlm	Z	 d dl
mZ d dlmZ ddlmZmZ ddlmZ dd	lmZ  G d
 dej        j                  Z G d de          Z G d de          Z G d dej        j                  ZdS )    )AnyOptionalN)TorchAODType)TwoStepQuantizer)get_group_qparams_symmetric   )FakeQuantizeConfigBaseIntxFakeQuantizeConfig)FakeQuantizerBase)_get_qmin_qmaxc                       e Zd ZdZ	 	 	 	 	 	 ddededee         dee         d	ed
ededee         ddf fdZ	de
j        de
j        fdZde
j        j        fdZe	 dde
j        j        dee         fd            Z xZS )FakeQuantizedEmbeddinga  
    General embedding layer with fake quantized weights.

    Specific target dtypes, granularity, schemes etc. are specified
    through separate configs for weights and activations.

    Example usage::

        weight_config = IntxFakeQuantizeConfig(
            dtype=torch.int4,
            group_size=8,
            symmetric=True,
        )
        fq_embedding = FakeQuantizedEmbedding(5, 10, weight_config)
        fq_embedding(torch.LongTensor([3]))
    N       @Fnum_embeddingsembedding_dimpadding_idxmax_norm	norm_typescale_grad_by_freqsparseweight_configreturnc	           	           t                      j        |||||||g|	R i |
 t          j                            d           |t          j        |          | _        d S d | _        d S )Nz/torchao.quantization.qat.FakeQuantizedEmbedding)super__init__torch_C_log_api_usage_oncer   from_configweight_fake_quantizer)selfr   r   r   r   r   r   r   r   argskwargs	__class__s              v/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/torchao/quantization/qat/embedding.pyr   zFakeQuantizedEmbedding.__init__,   s     	
	
 
	
 
	
 
	
 
	
 
	
 
	
 	$$%VWWW$):)F})U)UD&&&)-D&&&    xc           	          | j         |                      | j                  }n| j        }t          j        ||| j        | j        | j        | j        | j                  S N)	r    weightF	embeddingr   r   r   r   r   )r!   r'   ws      r%   forwardzFakeQuantizedEmbedding.forwardJ   s^    %1**4;77AAA{MN#K
 
 	
r&   c                     t           j                            | j        | j        | j        | j        | j        | j        | j	        | j
        j        | j
        j        	  	        }| j
        j        t          j        d          k    r| j
        |_
        |S )N)devicedtypemeta)r   nn	Embeddingr   r   r   r   r   r   r   r*   r0   r1   )r!   new_embeddings     r%   to_embeddingz#FakeQuantizedEmbedding.to_embeddingY   s    **MN#K;%+# + 

 

 ;f!5!555#';M r&   modc                    t          |j        |j        |j        |j        |j        |j        |j        ||j        j	        |j        j
        
  
        }|j        j	        t          j	        d          k    r|j        |_        |S )N)r   r0   r1   r2   )r   r   r   r   r   r   r   r   r*   r0   r1   r   )clsr7   r   r5   s       r%   from_embeddingz%FakeQuantizedEmbedding.from_embeddingl   s}     /OLM"J':$*"
 
 
 :V 4 444#&:M r&   )NNr   FFNr)   )__name__
__module____qualname____doc__intr   floatboolr	   r   r   Tensorr.   r3   r4   r6   classmethodr:   __classcell__r$   s   @r%   r   r      sS        * &*$(#(:>. .. . c]	.
 5/. . !. .   67. 
. . . . . .<
 
%, 
 
 
 
eh0    &  ;? X   67   [    r&   r   c            	           e Zd ZdZdej        ej        fdedej        dej        ddf fdZ	d	ej
        j        d
ededej
        j        fdZd	ej
        j        d
ededej
        j        fdZdej
        j        fdZ xZS )#Int4WeightOnlyEmbeddingQATQuantizerz
    Quantizer for performing QAT on a model, where embedding layers have
    int4 fake quantized grouped per channel weights.
       
group_sizescale_precisionzero_point_precisionr   Nc                     t                                                       t          j                            d           d| _        || _        || _        || _        d S )Nz<torchao.quantization.qat.Int4WeightOnlyEmbeddingQATQuantizer   )	r   r   r   r   r   	bit_widthrI   rJ   rK   )r!   rI   rJ   rK   r$   s       r%   r   z,Int4WeightOnlyEmbeddingQATQuantizer.__init__   s^     	$$J	
 	
 	
 ),;1E!!!r&   modelr"   r#   c                      ddl m} dt          j        j        dt
          dt          fd}dt          j        j        dt          j        j        f fd} ||||           |S )zP
        Swap `nn.Embedding` modules with `Int4WeightOnlyQATEmbedding`.
        r   ))_replace_with_custom_fn_if_matches_filterchildcur_fqnr   c                 @    t          | t          j        j                  S r)   )
isinstancer   r3   r4   )rR   rS   s     r%   	filter_fnz>Int4WeightOnlyEmbeddingQATQuantizer.prepare.<locals>.filter_fn   s    eUX%7888r&   c                 &   t          | j        | j        | j        | j        | j        | j        | j        j        j	        j
        | j        j        | j        j                  }| j        j        t          j        d          k    r| j        |_        |S )N)r   r   r   r   r   r   r   rI   rJ   rK   r0   r1   r2   )Int4WeightOnlyQATEmbeddingr   r   r   r   r   r   r   rI   rJ   rK   r*   r0   r1   r   )rR   r5   r!   s     r%   replacement_fnzCInt4WeightOnlyEmbeddingQATQuantizer.prepare.<locals>.replacement_fn   s    6$3#1!-/#(#;|? $ 4%)%>|*l(  M& |"el6&:&:::',|$  r&   )torchao.quantization.quant_apirQ   r   r3   ModulestrrA   )r!   rO   r"   r#   rQ   rV   rY   s   `      r%   preparez+Int4WeightOnlyEmbeddingQATQuantizer.prepare   s    	
 	
 	
 	
 	
 	
	9UX_ 	9s 	9t 	9 	9 	9 	9	!%(/ 	!eho 	! 	! 	! 	! 	! 	!0 	21%SSSr&   c                 0    |                      |           |S )z_
        Swap all `Int4WeightOnlyQATEmbedding` modules with `Int4WeightOnlyEmbedding`.
        )_convert_helper)r!   rO   r"   r#   s       r%   convertz+Int4WeightOnlyEmbeddingQATQuantizer.convert   s     	U###r&   modulec                    ddl m} |                                D ]p\  }}t          |t                    r?|j        j        j        }|j        j        j        }|j        j        j	        }t          |j        |j        |j        |j        |j        |j        |j        ||||j        j        |j        j                  }t+          |||           t-          | j                  \  }	}
t1          |j        | j        ||          \  }}|                    |          } ||j        |||	|
t4          j        |          }||_        |                    |          |_        |                    |          |_        [|                     |           rdS )z
        Helper function to recursively swap `Int4WeightOnlyQATEmbedding`
        modules with `Int4WeightOnlyEmbedding`
        r   )8_quantized_decomposed_quantize_per_channel_group_wrapper)r   r   r   r   r   r   r   rI   rJ   rK   r0   output_dtype)	precisionN)torchao._executorch_opsrc   named_childrenrU   rX   r    configrI   rJ   rK   Int4WeightOnlyEmbeddingr   r   r   r   r   r   r   r*   r0   r1   setattrr   rN   r   tor   int8scale
zero_pointr_   )r!   ra   rc   namerR   rI   rJ   rK   quantized_embeddingqminqmaxszpq_weights                 r%   r_   z3Int4WeightOnlyEmbeddingQATQuantizer._convert_helper   s   
	
 	
 	
 	
 	
 	
 "0022 /	, /	,KD%%!;<< .,"8?J
"'"="D"T/6K % '>#(#7"'"5 % 1"^#o','? <)$3)= <.!&!3' ' '#  &9:::  .dn==t5LN-	  B UU/00SSLJ  .6#*,-DD,A,A#)137K1L1L#..$$U++++_/	, /	,r&   )r;   r<   r=   r>   r   float32int32r?   r1   r   r3   r[   r   r]   r`   r_   rD   rE   s   @r%   rG   rG      s(         ',},1K	F FF F $k	F
 
F F F F F F'X_'-0'<?'	' ' ' 'RX_-0<?	   8,eho 8, 8, 8, 8, 8, 8, 8, 8,r&   rG   c                        e Zd ZdZddddddej        ej        fdededee         d	ee	         d
e	de
de
dedej        dej        f fdZdde
fdZd Z xZS )rX   a>  
    This module implements a embedding layer with int4 fake quantized
    grouped per channel weights.

    args:
        group_size: the number of elements in each quantized group for weights
        scale_precision: precision of per group scales
        zero_point_precision: precision of per group zero points
    Nr   F    r   r   r   r   r   r   r   rI   rJ   rK   c           
          t          t          j        |dd|	|
          } t                      j        ||||||||g|R i | d S )NT)r1   rI   is_symmetric
is_dynamicrJ   rK   )r
   r   INT4r   r   )r!   r   r   r   r   r   r   r   rI   rJ   rK   r"   r#   r   r$   s                 r%   r   z#Int4WeightOnlyQATEmbedding.__init__  s     /#!+!5
 
 
 		
 	
 	
 	
 	
 	
 	
 	
 	
r&   Tenabledc                     || j         _        d S r)   )r    r~   )r!   r~   s     r%   enable_fake_quantz,Int4WeightOnlyQATEmbedding.enable_fake_quant<  s    -4"***r&   c                 0    |                      d           d S )NF)r   )r!   s    r%   disable_fake_quantz-Int4WeightOnlyQATEmbedding.disable_fake_quant?  s    u%%%%%r&   )T)r;   r<   r=   r>   r   rv   rw   r?   r   r@   rA   r1   r   r   r   rD   rE   s   @r%   rX   rX     s         &*$(#(',},1K"
 "
"
 "
 c]	"

 5/"
 "
 !"
 "
 "
 "
 $k"
 "
 "
 "
 "
 "
H5 5 5 5 5 5& & & & & & &r&   rX   c                        e Zd ZdZddddddej        ej        dej        f
dededee         d	ee	         d
e	de
de
dedej        dej        dej        dej        f fdZd Z xZS )ri   zg
    This module implements a embedding layer with int4 quantized
    grouped per channel weights.
    Nr   Fry   r   r   r   r   r   r   r   rI   rJ   rK   r0   rd   c                    t                                                       || _        || _        || _        || _        || _        || _        || _        d| _	        || _
        |	| _        |
| _        || _        |                     dt          j        ||ft          j        |                     |                     dt          j        |||z  f|	|                     |                     dt          j        |||z  f|
|                     d S )NrM   r*   )r1   r0   rm   rn   )r   r   r   r   r   r   r   r   r   rN   rI   rJ   rK   rd   register_bufferr   emptyrl   )r!   r   r   r   r   r   r   r   rI   rJ   rK   r0   rd   r$   s                r%   r   z Int4WeightOnlyEmbedding.__init__I  sK    	 -*& ""4 $.$8!( 	K/uz&  	
 	
 	
 	K*!<=%  	
 	
 	
 	K*!<=*  	
 	
 	
 	
 	
r&   c           
         ddl m} t          | j                  \  }} || j        d| j        g| j        | j        t          j	        ||| j
                  }t          j        ||| j        | j        | j        | j        | j                  S )Nr   )dequantize_affiner   )rd   )%torchao.quantization.quant_primitivesr   r   rN   r*   rI   rm   rn   r   rl   rd   r+   r,   r   r   r   r   r   )r!   r'   r   rq   rr   w_dqs         r%   r.   zInt4WeightOnlyEmbedding.forward  s    	
 	
 	
 	
 	
 	
 $DN33
d
 ! K JOJ*	
 	
 	
 {MN#K
 
 	
r&   )r;   r<   r=   r>   r   rv   rw   r?   r   r@   rA   r1   r0   r   r.   rD   rE   s   @r%   ri   ri   C  s         &*$(#(',},1K#$)M7
 7
7
 7
 c]	7

 5/7
 7
 !7
 7
 7
 7
 $k7
 7
 k7
 7
 7
 7
 7
 7
r
 
 
 
 
 
 
r&   ri   )typingr   r   r   torch.nn.functionalr3   
functionalr+   r   r   torchao.quantization.unifiedr   torchao.quantization.utilsr   fake_quantize_configr	   r
   fake_quantizerr   utilsr   r4   r   rG   rX   r[   ri    r&   r%   <module>r      s   !                         > > > > > > 9 9 9 9 9 9 B B B B B B        . - - - - -     
i i i i iUX/ i i ib, , , , ,*: , , ,D3& 3& 3& 3& 3&!7 3& 3& 3&l[
 [
 [
 [
 [
eho [
 [
 [
 [
 [
r&   