
    Pie              	          d dl Z d dlZd dlmZ d dlZd dlmZ d dlmZ  e j	        e
          Ze                    e j                    e j        ej                  Z e j        d          Ze                    e           e                    e           	 ddej        deded	efd
Z G d dej                  Zi fdej        fdZ G d d          ZdS )    N)Optional)quantize_per_channel_groupz4%(asctime)s - %(name)s - %(levelname)s - %(message)sTvals
group_sizenbithas_weight_zerosc           	      n   |dk    r|dk    sJ |rd|dz
  z   }d|dz
  z  dz
  }n
d}d|z  dz
  }| j         \  }}|                     d|          } t          j        | d          \  }	}
t          j        | d          \  }}
||	z
  ||z
  z  }|st          j        |          }n|t          j        |	|z            z
  }|                     ||          } |                    |d          }|                    |d          }t          | |||||rt          j        nt          j	        |          }|sd }|||fS )N      r   )axis)inputscaleszero_points	quant_min	quant_maxdtyper   )
shapereshapetorchminmax
zeros_likeroundr   int8uint8)r   r   r   r   signedqminqmaxnkvmins_vmaxsgroup_scalesgroup_zerosgroup_qvalss                  r/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/torchao/experimental/quant_api.py	_quantizer)      sz    199" tax!dQh1$T	Q:DAq<<J''DyA&&&HE1yA&&&HE1EMdTk2L ?&|44U[)=>>><<1D''2..L%%a,,K,"3ejj  K  k11    c                   *     e Zd Z fdZd Zd Z xZS )UIntxWeightOnlyQuantizedLinearc                 d    t                                                       || _        || _        d S )N)super__init___pack_weights_op
_linear_op)selfpack_weight_op	linear_op	__class__s      r(   r/   z'UIntxWeightOnlyQuantizedLinear.__init__F   s.    
 	 .#r*   c                    || _         || _        t          || j        | j         dd          \  }}}| |z  }t          j        |d          | _        t          j        |d          | _        |                     |                                          	                    d          }t          j        |d          | _
        d S )NTF)r   r   )requires_gradmps)device)r   r   r)   nn	Parameterweight_scalesweight_zerosr0   cputopacked_weights)r2   weightsr   r   weight_qvalsr<   r=   r@   s           r(   quantize_and_pack_weightsz8UIntxWeightOnlyQuantizedLinear.quantize_and_pack_weightsO   s    	$4=T_di$u5
 5
 5
1m\ %}}4\-uMMMLUKKK..|/?/?/A/ABBEEUESS l>OOOr*   c                    |                                 dk    sJ |                                 dk    r-|                     || j        | j        | j        | j                  S |j        dd         }|j        d         }| j        j        d         } |                     |                    d|          | j        | j        | j        | j                  j        g ||R  S )N   r   r   )dimr1   r@   r   r<   r=   r   r   )r2   x
lead_shaper!   r    s        r(   forwardz&UIntxWeightOnlyQuantizedLinear.forward\   s    uuww!||||5577a<<??#"!   WQrT]
GBK$Q'tIIb!O
 
 " "  !" " " 	"r*   )__name__
__module____qualname__r/   rC   rI   __classcell__)r5   s   @r(   r,   r,   E   sY        $ $ $ $ $P P P" " " " " " "r*   r,   modulec           
         |d         }|d         }t          | t          j                  rJ |dk    r|dk    sJ |                                 D ]\  }}t          |t          j                  st	          ||           0|j        J t          t          t          j	        j
        d| d          t          t          j	        j
        d| d          	          }t          | ||           |                    |j        ||           d S )
Nr   r   r
      _pack_weight_bit_linear_fp_act_
bit_weight)r3   r4   )
isinstancer:   Linearnamed_children)_replace_linear_with_quantized_linear_mpsbiasr,   getattrr   opstorchaosetattrrC   weight)rN   kwargsr   r   namechildqlinears          r(   rX   rX   t   s,   %J&>D&"),,,,,199",,.. N Ne%++ 	N5eVDDDD:%%%4&uy'8:S$:S:S:STT!I%'I'I'I'I   G FD'***--elD*MMMMN Nr*   c                   b    e Zd Zddddee         dee         fdZdej        dej        fdZdS )	UIntxWeightOnlyLinearQuantizerN)bitwidth	groupsizere   rf   c                   |dk    rt          d          || _        |t          j        t          j        t          j        fvrt          d          || _        | d}t          	                    d| d           |t          dd          vrt          d	          || _        | d
}t          	                    d| d           |dvrt          d          || _        d S )Nr8   zHOnly device=mps is currently supported in UIntxWeightOnlyLinearQuantizerz[Only precisions float32, float16 & bfloat16 are supported in UIntxWeightOnlyLinearQuantizer   z&bitwidth not specified, defaulting to .r
   r   zDOnly bitwidts 1 to 7 are supported in UIntxWeightOnlyLinearQuantizer   z'groupsize not specified, defaulting to )    @   rj      zQOnly groupsizes 32, 64, 128 & 256 are supported in UIntxWeightOnlyLinearQuantizer)NotImplementedErrorr9   r   float32float16bfloat16
ValueError	precisionloggerwarningrangere   rf   )r2   r9   rs   re   rf   s        r(   r/   z'UIntxWeightOnlyLinearQuantizer.__init__   s    U??%Z   !DKU]EM5>JJJm   'DNHNNOHOOOPPP5A;;&&V   %DMINNQYQQQRRR...c   'DNNNr*   modelreturnc                     |                     | j                                       | j                  }t          || j        | j        d           |S )N)r   r   )r_   )r?   r9   rs   rX   rf   re   )r2   rw   s     r(   quantizez'UIntxWeightOnlyLinearQuantizer.quantize   sZ    %%((881"n 	
 	
 	
 	
 r*   )	rJ   rK   rL   r   intr/   r:   Modulerz    r*   r(   rd   rd      s}         #'#'(' (' ('
 3-(' C=(' (' (' ('T	bi 	BI 	 	 	 	 	 	r*   rd   )T)loggingsystypingr   r   torch.nnr:   $torch.ao.quantization.fx._decomposedr   	getLoggerrJ   rt   setLevelWARNINGStreamHandlerstdouthandler	Formatter	formattersetFormatter
addHandlerTensorr{   boolr)   r|   r,   rX   rd   r}   r*   r(   <module>r      s    



                   
	8	$	$         '


+
+GTUU	   Y      '    TX'2 '2
,'2$''2/2'2FJ'2 '2 '2 '2T+" +" +" +" +"RY +" +" +"^ IK N Nbi N N N N,4 4 4 4 4 4 4 4 4 4r*   