
     `i,                        d dl mZ d dlmZ ddlmZ ddlmZmZm	Z	 ddl
mZ ddlmZ erdd	lmZ  e            rd d
lZ e            rd dlmZ ed             Zee_         e	j        e          Z G d de          Zd
S )    )defaultdict)TYPE_CHECKING   )prepare_for_hqq_linear)is_hqq_availableis_torch_availablelogging   )HfQuantizer)get_module_from_name)PreTrainedModelN)	HQQLinearc                 D    t          j        d| j        | j                  S )Nr   )dtypedevice)torchemptycompute_dtyper   selfs    y/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/quantizers/quantizer_hqq.pyweightr   %   s    {1D$6t{KKKK    c            	       
    e Zd ZdZdZdZdZdgZ fdZddde	e
         d	e
d
e	e
         fdZddde	e
         de	e
         d
e	e
         fdZddde
d
efdZddddde
ddfdZd Z	 	 ddZddZddZed
efd            Z xZS )HqqHfQuantizerz
    HQQ quantizer base HF class.
    nn.Linear modules are first tagged with quant_config in _process_model_before_weight_loading().
    FThqqc                 "   t                      st          d           t                      j        |fi | d | _        d| _        t          d d                                           dhz
  | _        |	                    dd          s|	                    dd          rt          d          | j        =d|v r|d         | _        n+t          j        | _        t                              d           |	                    d	          }t          |t                     rtd
|                                v sd|                                v rt          d          t%          t'          |                                                    dk    | _        d S d S )NzA valid HQQ version (>=0.2.1) is not available. Please follow the instructions to install it: `https://github.com/mobiusml/hqq/`.Fbiasfrom_tf	from_flaxzwConverting weights from tf/flax weights is currently not supported, please make sure the weights are in PyTorch format.r   zOSetting dtype to torch.float32 as the default value since it was not specified.
device_mapcpudiskzYou are attempting to use an HQQ model with a device_map that contains a CPU or disk device. This is not supported. Please remove the CPU or disk device from the device_map.r
   )r   ImportErrorsuper__init__r   using_multi_gpur   state_dict_keyshqq_keysget
ValueErrorr   float32loggerinfo
isinstancedictvalueslenset)r   quantization_configkwargsr!   	__class__s       r   r&   zHqqHfQuantizer.__init__9   s   !! 	 T   	,77777
$!$--==??6(J::i'' 	6::k5+I+I 	;  
 :&  #G_

"]
mnnnZZ--
j$'' 	I
))++++v9J9J9L9L/L/L h  
 (+3z/@/@/B/B+C+C'D'Dq'H$$$	I 	Ir   modelr   missing_keysprefixreturnc                 ,    | j         rd |D             S |S )Nc                     g | ]}d |v|	S )r    ).0keys     r   
<listcomp>z6HqqHfQuantizer.update_missing_keys.<locals>.<listcomp>_   s"    IIICHC4G4GC4G4G4Gr   )pre_quantized)r   r7   r8   r9   r5   s        r   update_missing_keysz"HqqHfQuantizer.update_missing_keys[   s)      	 II<IIIIr   expected_keysloaded_keysc                 $   | j         s|S fdt          |          }|                                D ]\  }}||_        t                      } ||           t                      }|D ]0|j        j        d         D ]}	|	v r|                               1||z  }t          d d t          j	        dd          
                                dhz
  }
t                      }|D ]2t          fd|D                       r|                               3||z  }|D ]bdz   |v r|                    dz              n!|                    fd	|
D                        d
z   |v r|                    d
z              ct          |          S )Nc                     |                                  D ]J\  }}t          |t          j        j                  r|                    |j                    ||           Kd S N)named_childrenr/   r   nnLinearaddname)r7   layersrL   module_find_hqq_quantizable_layerss       r   rO   zIHqqHfQuantizer.update_expected_keys.<locals>._find_hqq_quantizable_layersk   sl     % 4 4 6 6 = =ffux88 ,JJv{+++,,VV<<<<= =r   skip_modulesr"   Flinear_layerquant_configr   r   del_origr   c              3       K   | ]}|v V  	d S rG   r=   )r>   _moduler?   s     r   	<genexpr>z6HqqHfQuantizer.update_expected_keys.<locals>.<genexpr>   s'      @@g7c>@@@@@@r   z.weightc                      h | ]
}d z   |z   S ).r=   )r>   _ref_keyrV   s     r   	<setcomp>z6HqqHfQuantizer.update_expected_keys.<locals>.<setcomp>   s"     T T Th3!9 T T Tr   z.bias)rA   r3   named_modulesrL   configr4   rK   r   r   float16r(   anyupdatelist)r   r7   rC   rD   new_keysrL   rN   _valid_modules_skipped_modules_skip_module	_ref_keys_rm_keysrO   rV   r?   s               @@@r   update_expected_keysz#HqqHfQuantizer.update_expected_keysd   s    ! 	!  	= 	= 	= 	= 	= }%% "//11 	 	LD&FKK $$UN;;; 55% 	2 	2G % @ P 2 27**$((1112 	** -
 
 
 /

vh'	 55 	" 	"C@@@@@@@@@ "S!!!H & 	0 	0G"k11Wy01111 T T T T) T T TUUU K//Ww.///H~~r   
param_namec                 f    t          ||          \  }}t          |t          j        j                  S rG   )r   r/   r   rI   rJ   )r   r7   ri   r5   rN   _s         r   param_needs_quantizationz'HqqHfQuantizer.param_needs_quantization   s+    (
;;	 &%(/222r   param_valueztorch.Tensortarget_deviceztorch.devicec                 T   t          ||          \  }|                    dd          d         }t          ||          \  }}	|j        j        d         }
|j        j        d         }t	          fd|D                       r6                    ||                    || j                  idd	
           d S | j        r8t          | d          st          t                    | _        | j        |                             ||i           | j        |         t          fd| j        D                       rdv sj        t#          d d | j        |d          }|                               |j        Ht%          |j        t&          j                  r)t&          j                            |j                  |_        | j        r|                     |          }t3          ||	|           | j        |= d S                     ||idd	
           j        j        j        dk    oj        d u pj        j        j        dk    }|rd                    j                            d          dd                    }d|
v r|
}n||
v r|
|         }t#          || j        |d	          }|j        Ht%          |j        t&          j                  r)t&          j                            |j                  |_        | j        r|                     |          }t3          ||	|           d S d S )NrY   r
   r   rS   rP   c              3   *   K   | ]}|j         v V  d S rG   )rL   )r>   skip_modulerN   s     r   rW   z8HqqHfQuantizer.create_quantized_param.<locals>.<genexpr>   s*      JJk{fk)JJJJJJr   )r   r   FT)strictassign
hqq_paramsc              3       K   | ]}|v V  	d S rG   r=   )r>   krt   s     r   rW   z8HqqHfQuantizer.create_quantized_param.<locals>.<genexpr>   s'      ::q1
?::::::r   r   rQ   metaweight_quant_params)rS   r   r   rT   ) r   rsplitr]   r4   r_   load_state_dicttor   rA   hasattrr   r0   rt   r`   allr)   r   r   r/   r   TensorrI   	Parameterr'   _patch_layer_for_multigpusetattrr   r   typejoinrL   split)r   r7   rm   ri   rn   r5   tensor_namemodule_nameparent_modulenoderS   rP   	hqq_layermodule_is_ready
module_tagmodule_quant_configrt   rN   s                   @@r   create_quantized_paramz%HqqHfQuantizer.create_quantized_param   sm    35*EE ''Q//225+FFt|7G|7G JJJJ\JJJJJ 	""knnMnTTU^clp #    F  	4.. 4"-d"3"3OK(//k0JKKK5J ::::DM::::: 9*@T@TX^XcXk%!%!%"&*("  	 ))*555>-*Y^U\2Z2Z-%*X%7%7	%G%GIN' J $ > >y I IItY777OK0&F 	[9%PTUUU !-.3v= 
K4D6;#5#:f#D 	  	4&+"3"3C"8"8"=>>J$44&2##|++&2:&>#!0"j$  I ~)j.V.V)!&!3!3IN!C!C	# F ::9EE	M433333+	4 	4r   c                 &    d fd_         S )Nc                     t          j        |                    | j                  |                                                                           }| j        
|| j        z  }|S rG   )r   matmulr|   r   
dequantizetr   )r   xouts      r   forward_with_devicezEHqqHfQuantizer._patch_layer_for_multigpu.<locals>.forward_with_device   sP    ,qttDK00$//2C2C2E2E2G2GHHCy$ty Jr   c                      |           S rG   r=   )r   r   r   s    r   <lambda>z:HqqHfQuantizer._patch_layer_for_multigpu.<locals>.<lambda>   s    &9&9)Q&G&G r   )forward)r   r   r   s    `@r   r   z(HqqHfQuantizer._patch_layer_for_multigpu   s0    	 	 	 HGGGG	r   c                 2    t          || j                  }d S )N)r4   )r   r4   r   r7   r5   s      r   $_process_model_before_weight_loadingz3HqqHfQuantizer._process_model_before_weight_loading  s     'u$BZ[[[r   c                 F    d|_         |                                 |_        |S NT)is_hqq_quantizedis_serializableis_hqq_serializabler   s      r   #_process_model_after_weight_loadingz2HqqHfQuantizer._process_model_after_weight_loading
  s#    !%$($8$8$:$:!r   Nc                     dS r   r=   )r   safe_serializations     r   r   zHqqHfQuantizer.is_serializable  s    tr   c                     dS r   r=   r   s    r   is_trainablezHqqHfQuantizer.is_trainable  s    tr   )r7   r   rG   )__name__
__module____qualname____doc__use_keep_in_fp32_modules requires_parameters_quantizationrequires_calibrationrequired_packagesr&   ra   strrB   rh   boolrl   r   r   r   r   r   propertyr   __classcell__)r6   s   @r   r   r   .   s        
  %'+$  I  I  I  I  ID & 6:3i IL 	c       9&97;Cy9OSTWy9	c9 9 9 9v3.? 3S 3_c 3 3 3 3P4 P4 $P4 	P4
 &P4 P4 P4 P4d  \ \ \ \ \   
    d    X    r   r   )collectionsr   typingr   integrationsr   utilsr   r   r	   baser   quantizers_utilsr   modeling_utilsr   r   hqq.core.quantizer   r   r   
get_loggerr   r-   r   r=   r   r   <module>r      sR   $ # # # # #             1 1 1 1 1 1 A A A A A A A A A A       2 2 2 2 2 2  1000000  LLL 
++++++
 L L XL I		H	%	%f f f f f[ f f f f fr   