
     `i=                         d dl mZmZmZ ddlmZ erddlmZ ddlm	Z	m
Z
mZ  e
            rd dlZ ej        e          Z G d d	e          ZdS )
    )TYPE_CHECKINGOptionalUnion   )HfQuantizer   )PreTrainedModel)is_accelerate_availableis_torch_availableloggingNc                       e Zd ZdZdZdZdgZ fdZd Zdd	Z		 dddde
ee                  fdZdeeeeef         f         deeeeef         f         fdZddZddZedefd            Zedefd            Z xZS )BitNetHfQuantizerz
    1.58-bit quantization from BitNet quantization method:
    Before loading: it converts the linear layers into BitLinear layers during loading.

    Check out the paper introducing this method: https://huggingface.co/papers/2402.17764
    FT
acceleratec                 J     t                      j        |fi | || _        d S N)super__init__quantization_config)selfr   kwargs	__class__s      |/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/quantizers/quantizer_bitnet.pyr   zBitNetHfQuantizer.__init__-   s1    ,77777#6       c                 <   t                      st          d          |                    dd          s|                    dd          rt          d          t          j                                        st                              d           d S |                    d          }|t                              d           d S |Pt          |t                    r=d	|                                v sd
|                                v rt          d          d S d S d S )NzOLoading a BitNet quantized model requires accelerate (`pip install accelerate`)from_tfF	from_flaxztLoading ternary weights from tf/flax is currently not supported, please make sure the weights are in PyTorch format.zhYou don't have a GPU available to load the model, the inference will be slow because of weight unpacking
device_mapzYou have loaded a BitNet model on CPU and have a CUDA device available, make sure to set your model on a GPU device in order to run your model.cpudiskzYou are attempting to load a BitNet model with a device_map that contains a CPU or disk device.This is not supported. Please remove the CPU or disk device from the device_map.)r
   ImportErrorget
ValueErrortorchcudais_availableloggerwarning_once
isinstancedictvalues)r   argsr   r   s       r   validate_environmentz&BitNetHfQuantizer.validate_environment1   sS   &(( 	qoppp::i'' 	6::k5+I+I 	;  
 z&&(( 	z   FZZ--
I     #*d++ *:K:K:M:M1M1MQW[e[l[l[n[nQnQn g   $# QnQnr   modelr	   c                     |S r    )r   r-   r   s      r   #_process_model_after_weight_loadingz5BitNetHfQuantizer._process_model_after_weight_loadingN   s    r   Nkeep_in_fp32_modulesc                     ddl m} |                     || j        j        |          | _         ||| j        | j        | j                  }d S )Nr   )replace_with_bitnet_linear)modules_to_not_convertr   pre_quantized)integrationsr3   get_modules_to_not_convertr   r4   r5   )r   r-   r1   r   r3   s        r   $_process_model_before_weight_loadingz6BitNetHfQuantizer._process_model_before_weight_loadingQ   sn     	>=====&*&E&E4+BDX'
 '
# +*#'#> $ 8,	
 
 
r   
max_memoryreturnc                 B    d |                                 D             }|S )Nc                      i | ]\  }}||d z  S )g?r/   ).0keyvals      r   
<dictcomp>z7BitNetHfQuantizer.adjust_max_memory.<locals>.<dictcomp>e   s"    III(#sc3:IIIr   )items)r   r9   s     r   adjust_max_memoryz#BitNetHfQuantizer.adjust_max_memoryd   s'    IIj6F6F6H6HIII
r   target_dtypetorch.dtypec                     t           j        }|S r   )r#   int8)r   rC   s     r   adjust_target_dtypez%BitNetHfQuantizer.adjust_target_dtypeh   s    zr   c                     dS )NTr/   )r   safe_serializations     r   is_serializablez!BitNetHfQuantizer.is_serializablel   s    tr   c                 B    | j         j        dk    o| j         j        dk    S )Nautobitlinearonliner   linear_classquantization_moder   s    r   is_trainablezBitNetHfQuantizer.is_trainableo   s+     $1_D G(:hF	
r   c                 B    | j         j        dk    o| j         j        dk    S )zUFlag indicating whether the quantized model can carry out quantization aware trainingrL   rM   rN   rQ   s    r   is_qat_trainablez"BitNetHfQuantizer.is_qat_trainablev   s+     $1_D G(:hF	
r   )r-   r	   r   )rC   rD   r:   rD   )__name__
__module____qualname____doc__ requires_parameters_quantizationrequires_calibrationrequired_packagesr   r,   r0   r   liststrr8   r)   r   intrB   rG   rJ   propertyboolrR   rT   __classcell__)r   s   @r   r   r       sv         (-$%7 7 7 7 7  :    59
 
 
 'tCy1
 
 
 
&DeCHo1E,F 4PSUZ[^`c[cUdPdKe           
d 
 
 
 X
 
$ 
 
 
 X
 
 
 
 
r   r   )typingr   r   r   baser   modeling_utilsr	   utilsr
   r   r   r#   
get_loggerrU   r&   r   r/   r   r   <module>rg      s    2 1 1 1 1 1 1 1 1 1        1000000 H H H H H H H H H H  LLL 
	H	%	%\
 \
 \
 \
 \
 \
 \
 \
 \
 \
r   