
     `i.W                        d dl Z d dlmZmZ d dlZd dlmZ ddlm	Z	m
Z
mZ ddlmZ ddlmZ ddlmZ  e            rd dlZ ee          Zd	d
dddddde	de
d         ide
d         idZ eed                                                   Z G d de          Z G d d          Z G d de          Z G d de          Z G d de          Z G d d e          Z G d! d"e          Z G d# d$e          Z  G d% d&e          Z! G d' d(e          Z" G d) d*e          Z#eeeeeeee e!e"e"e#d+Z$d, Z%	 	 	 d4d.ee&         d/ee'         d0e&fd1Z(d5d3Z)dS )6    N)
NamedTupleOptional)tqdm   )GGUF_CONFIG_MAPPINGGGUF_TOKENIZER_MAPPING_gguf_parse_value)is_torch_available)is_gguf_available)
get_loggerversiontensor_countkv_count)r   r   r   	file_typequantization_version)r   r   )GGUFgeneral	tokenizertokenizer_config)ignoreconfigr   r   r   c                   8    e Zd ZU ej        ed<   eed<   eed<   dS )
GGUFTensorweightsnamemetadataN)__name__
__module____qualname__npndarray__annotations__strdict     |/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/modeling_gguf_pytorch_utils.pyr   r   7   s0         Z
IIINNNNNr&   r   c                       e Zd ZddZd ZdS )TensorProcessorNc                     |pi | _         d S Nr   )selfr   s     r'   __init__zTensorProcessor.__init__>   s    lr&   c                 $    t          ||i           S r+   r   r-   r   r   kwargss       r'   processzTensorProcessor.processA   s    '4,,,r&   r+   )r   r   r   r.   r3   r%   r&   r'   r)   r)   =   s7        # # # #- - - - -r&   r)   c            	       b     e Zd Zd	 fd	Zd Z	 d	dej        dedee         dej        fdZ	 xZ
S )
LlamaTensorProcessorNc                 L    t                                          |           d S Nr,   superr.   r-   r   	__class__s     r'   r.   zLlamaTensorProcessor.__init__F   $    '''''r&   c                 8   d|v sd|v r| j                             d          }| j                             d          }d ||fv rt          ||i           S d|v r|                     |||          }nd|v r|                     |||          }t          ||i           S )Nz.attn_k.z.attn_q.num_attention_headsnum_key_value_heads)r   getr   _reverse_permute_weights)r-   r   r   r2   	num_headsnum_kv_headss         r'   r3   zLlamaTensorProcessor.processI   s    t!3!3(=>>I;??+@AAL	<000!'4444T!!77IVVt##77LYY'4,,,r&   r   n_headrC   returnc                     |||k    r|}|j         d         |z  dz  } |j        ||dg|j         dd          R  }|                    dd                              |j                   S )Nr      r   )shapereshapeswapaxes)r-   r   rD   rC   dimws         r'   rA   z-LlamaTensorProcessor._reverse_permute_weightsV   s{    
 #,(>(>!FmA&(A-GOFC?W]122->???zz!Q''666r&   r+   )r   r   r   r.   r3   r    r!   intr   rA   __classcell__r;   s   @r'   r5   r5   E   s        ( ( ( ( ( (- - - OS
7 
7z
7+.
7>Fsm
7	
7 
7 
7 
7 
7 
7 
7 
7r&   r5   c                   X     e Zd Zd	 fd	Zd Zdej        deeef         dedefdZ	 xZ
S )
Qwen2MoeTensorProcessorNc                 L    t                                          |           d S r7   r8   r:   s     r'   r.   z Qwen2MoeTensorProcessor.__init__d   r<   r&   c                 
   d|v rU|                     d          }|                     d          }|r)|                     ||||           t          |d i           S d|v rt          j        |d          }t          ||i           S )N_exptensor_key_mappingparsed_parametersffn_gate_inp_shexpr   axis)r@   _split_moe_expert_tensorr   r    expand_dims)r-   r   r   r2   rU   rV   s         r'   r3   zQwen2MoeTensorProcessor.processg   s    T>>!',@!A!A &

+> ? ?! 5--g7H$Pbccc!'44444'' nW1555G'4,,,r&   r   rV   r   rU   c                    ||         }| j                             dd          }t          d|          D ]S}|                    dd| d          }||         }t	          j        t          j        |                    |d         |<   Td S )Nnum_experts<   r   mlp.experts..tensors)r   r@   rangereplacetorch
from_numpyr    copy)	r-   r   rV   r   rU   	w_counteri	temp_name
exp_weights	            r'   rZ   z0Qwen2MoeTensorProcessor._split_moe_expert_tensort   s    
 "$'KOOM266	q)$$ 	\ 	\A^5HA5H5H5HIII J6;6FrwzGZGZ6[6[i(33	\ 	\r&   r+   )r   r   r   r.   r3   r    r!   r$   r#   rZ   rN   rO   s   @r'   rQ   rQ   c   s        ( ( ( ( ( (- - -
\z
\6:39o
\MP
\fj
\ 
\ 
\ 
\ 
\ 
\ 
\ 
\r&   rQ   c                   b     e Zd Zd	 fd	Zd Zdej        dedefdZdej        dedefdZ	 xZ
S )
BloomTensorProcessorNc                 L    t                                          |           d S r7   r8   r:   s     r'   r.   zBloomTensorProcessor.__init__   r<   r&   c                     d|v rM| j         d         }| j         d         }d|v r|                     |||          }n|                     |||          }t          ||i           S )Nattn_qkvrD   hidden_sizeweight)r   _reverse_reshape_weights_reverse_reshape_biasr   )r-   r   r   r2   rB   n_embeds         r'   r3   zBloomTensorProcessor.process   sr    H-Ik-0G477GTT44WiQQ'4,,,r&   r   rD   rt   c                 D   t          j        |dd          \  }}}|                    |||z  |          }|                    |||z  |          }|                    |||z  |          }t          j        |||gd          }|                    |dz  ||z  z  |          S )N   r   rX   r   )r    array_splitrI   stack)r-   r   rD   rt   qkvqkv_weightss           r'   rr   z-BloomTensorProcessor._reverse_reshape_weights   s     .!!4441aIIfg/99IIfg/99IIfg/99h1ayq111""6A:F1B#CWMMMr&   c                 $   t          j        |d          \  }}}|                    |||z            }|                    |||z            }|                    |||z            }t          j        |||gd                                          }|S )Nrv   r   rX   )r    rw   rI   rx   flatten)r-   r   rD   rt   q_biask_biasv_biasqkv_biass           r'   rs   z*BloomTensorProcessor._reverse_reshape_bias   s     "$!;!;6(9::6(9::6(9::8VVV41===EEGGr&   r+   )r   r   r   r.   r3   r    r!   rM   rr   rs   rN   rO   s   @r'   rl   rl      s        ( ( ( ( ( (- - -
N
 
NC 
NRU 
N 
N 
N 
N
RZ 
 
s 
 
 
 
 
 
 
 
r&   rl   c                   &     e Zd Zd fd	Zd Z xZS )T5TensorProcessorNc                 L    t                                          |           d S r7   r8   r:   s     r'   r.   zT5TensorProcessor.__init__   r<   r&   c                     d }|                     d          D ]'}|                                rt          |          } n(t          ||d|i          S )Nr`   bid)splitisdigitrM   r   )r-   r   r   r2   r   chunks         r'   r3   zT5TensorProcessor.process   s]    ZZ__ 	 	E}} %jj '4%666r&   r+   r   r   r   r.   r3   rN   rO   s   @r'   r   r      sL        ( ( ( ( ( (7 7 7 7 7 7 7r&   r   c                   &     e Zd Zd fd	Zd Z xZS )GPT2TensorProcessorNc                 L    t                                          |           d S r7   r8   r:   s     r'   r.   zGPT2TensorProcessor.__init__   r<   r&   c                     d|v sd|v sd|v sd|v r|j         }|dk    rId}|                    di           }t          j        t	          j        |                    |d         |<   d }t          ||i           S )	Nzattn_qkv.weightzffn_down.weightzffn_up.weightzattn_output.weightoutput.weightzlm_head.weightrV   ra   )Tr@   rd   re   r    rf   r   )r-   r   r   r2   rV   s        r'   r3   zGPT2TensorProcessor.process   s     %% D(($&&#t++iG ?"" $D &

+> C C161A"''BRBR1S1Si(.D'4,,,r&   r+   r   rO   s   @r'   r   r      sL        ( ( ( ( ( (- - - - - - -r&   r   c                   &     e Zd Zd fd	Zd Z xZS )MambaTensorProcessorNc                 L    t                                          |           d S r7   r8   r:   s     r'   r.   zMambaTensorProcessor.__init__   r<   r&   c                     d|v rt          j        |d          }d|v rt          j        |           }t          ||i           S )Nzssm_conv1d.weightr   rX   ssm_a)r    r[   logr   r1   s       r'   r3   zMambaTensorProcessor.process   sQ    $&& nW1555Gd?? fgX&&G'4,,,r&   r+   r   rO   s   @r'   r   r      sL        ( ( ( ( ( (	- 	- 	- 	- 	- 	- 	-r&   r   c                   &     e Zd Zd fd	Zd Z xZS )NemotronTensorProcessorNc                 L    t                                          |           d S r7   r8   r:   s     r'   r.   z NemotronTensorProcessor.__init__   r<   r&   c                 6    d|v r|dz
  }t          ||i           S Nznorm.weightr   r0   r1   s       r'   r3   zNemotronTensorProcessor.process   (    D  kG'4,,,r&   r+   r   rO   s   @r'   r   r      sL        ( ( ( ( ( (- - - - - - -r&   r   c                   &     e Zd Zd fd	Zd Z xZS )Gemma2TensorProcessorNc                 L    t                                          |           d S r7   r8   r:   s     r'   r.   zGemma2TensorProcessor.__init__   r<   r&   c                 6    d|v r|dz
  }t          ||i           S r   r0   r1   s       r'   r3   zGemma2TensorProcessor.process   r   r&   r+   r   rO   s   @r'   r   r      sL        ( ( ( ( ( (
- - - - - - -r&   r   c                   &     e Zd Zd fd	Zd Z xZS )Lfm2TensorProcessorNc                 L    t                                          |           d S r7   r8   r:   s     r'   r.   zLfm2TensorProcessor.__init__   r<   r&   c                 X    d|v rt          j        |d          }t          ||i           S )Nzshortconv.conv.weightr   rX   )r    r[   r   r1   s       r'   r3   zLfm2TensorProcessor.process   s3    "d**nW1555G'4,,,r&   r+   r   rO   s   @r'   r   r      sL        ( ( ( ( ( (- - - - - - -r&   r   )llamaqwen2moeqwen3moebloomt5	t5encodergpt2mambanemotrongemma2gemma3lfm2c                 Z    || j         vrg S | j         |         fdj        D             S )Nc                 P    g | ]"}t          j        |         j                  #S r%   r	   partstypes).0_data_indexvalues     r'   
<listcomp>zread_field.<locals>.<listcomp>  s-    cccek+6DDcccr&   )fieldsdata)readerfieldr   s     @r'   
read_fieldr     s?    FM!!	M% EccccX]Xbccccr&    
model_type
num_layers	qual_namec           	         t                      rt                      r	ddlm}m} n)t
                              d           t          d          || j        j	        n|}|| j        j
        n|}|dk    rd}n#|dk    rd	}n|d
k    rd}n|dk    rd}n|dk    rd}d}|                                D ]\  }}||k    r|} n|t          d| d           |||          }	i |                                 }
|
D ]}|dv rd|v rt          j        dd|          }|d}}|                    d          s|                    d          r|                    dd          \  }}d|z   }|	                    |          }|||z   ||z   <   |                                 x}rU|D ]R\  }}t)          |||| | d          }fd|                                D             }                    |           SS )aY  
    GGUF uses this naming convention for their tensors from HF checkpoint:
    `blk.N.BB.weight` and `blk.N.BB.bias`
    where N signifies the block number of a layer, and BB signifies the
    attention/mlp layer components.
    See "Standardized tensor names" in
    https://github.com/ggerganov/ggml/blob/master/docs/gguf.md for details.
    r   )MODEL_ARCH_NAMESget_tensor_name_mapLoading a GGUF checkpoint in PyTorch, requires both PyTorch and GGUF>=0.10.0 to be installed. Please see https://pytorch.org/ and https://github.com/ggerganov/llama.cpp/tree/master/gguf-py for installation instructions.KPlease install torch and gguf>=0.10.0 to load a GGUF checkpoint in PyTorch.Ncoherez	command-r	qwen2_moer   	qwen3_moer   gemma3_textr   umt5r   zUnknown gguf model_type: z in gguf-py. This might because you're using an outdated version of gguf-py package, you can install `gguf` package from source refer to https://github.com/ggerganov/llama.cpp/tree/master/gguf-py#development)r   r   r_   zmlp.experts.\d+.r   z.weightz.biasr`   r   )r   c                 $    i | ]\  }}|v	||S r%   r%   )r   rz   r{   gguf_to_hf_name_maps      r'   
<dictcomp>z+get_gguf_hf_weights_map.<locals>.<dictcomp>e  s*    XXX11DW;W;Wq!;W;W;Wr&   )r   r
   ggufr   r   loggererrorImportErrorr   r   num_hidden_layersitemsNotImplementedError
state_dictresubendswithrsplitget_namenamed_childrenget_gguf_hf_weights_mapupdate)hf_modelr   r   r   r   r   archkeyr   name_mapr   hf_namer   suffix	gguf_namer   childsub_mapr   s                     @r'   r   r     s     i133 i>>>>>>>>>A	
 	
 	
 ghhh/9/A++zJ6@6H22jJX 

	{	"	"

	{	"	"

	}	$	$

	v		
D&,,..  
UJDE  |!U
 U U U
 
 	
 #"444H $$&&J F F111n6O6Of0.'JJGfI&& 	"'*:*:7*C*C 	"">>#q11LD&6\F%%d++	2;g2EI.// "00222~ 0) 	0 	0KD%-eZZcWleiWlWlWlmmmGXXXXXXXG&&w////r&   Fc                 d
  !"# t                      rt                      r	ddlm}m} n)t
                              d           t          d           ||           }|j        }t          |
                                          }d t          D             }t          |d          d         }	t          |d          }
d}d	|	v rd
|
v rd
}nYd|	v sd|	v rOd|d         d<   |
r/d|
d                                         v rd}d|	v rdg|d         d<   nd|	v rdg|d         d<   d}n|	}d|	v rd}nd|	v rd}d|	v r]h d!d"t          !fd|j        D                       }t          "fd|j        D                       }||d         d<   | |d         d<   |	t           vr|t           vrt#          d|	 d           d!d"g}t%          d# |j        D                       p|	|v |d         d$<   |j                                        D ]?\  }#|                    |	|          }|                    d%          }|d         }d%                    |d&d                   }#fd'#j        D             }t1          |          d&k    r|d         }t3          |t4                    r|	|v r|                    |	|          }t                                          D ]N\  }}||v rE|||         v r;||         |         }|d(k    r(||||         |<   ||v r|                    |           O||v r t
                              d)| d*|            A|d         d+         d,k    rd-|d         d+<   |d         d+         d.k    rS|d         d/         }t;          |          |d         d/<   d0|d         d1<   d2 t=          |          D             |d         d3<   d4|d         vrE|d5         }d6|v rt1          |d6                   |d         d4<   nt
                              d7           |ri |d8<   tA          |          }|!                    di           }tD          !                    |	tF                    } ||9          }tI          |j        d:;          D ]}|j%        } ||j        |j&                  }|'                    ||||<          } | j(        }| j%        }||vrK||         }tS          j*        tW          j,        |                    |d8         |<   t1          |          dk    rt
                              d=|            |S )>a  
    Load a GGUF file and return a dictionary of parsed parameters containing tensors, the parsed
    tokenizer and config attributes.

    Args:
        gguf_checkpoint_path (`str`):
            The path the to GGUF file to load
        return_tensors (`bool`, defaults to `False`):
            Whether to read the tensors from the file and return them. Not doing so is faster
            and only loads the metadata in memory.
    r   )
GGUFReader
dequantizer   r   c                     i | ]}|i S r%   r%   )r   rz   s     r'   r   z(load_gguf_checkpoint.<locals>.<dictcomp>  s    EEE1BEEEr&   zgeneral.architecturezgeneral.nameNr   mistralr   r   Tr   is_gated_actr   UMT5EncoderModelarchitecturesT5EncoderModelr   r   r   r   stablelm>   attn_k.biasattn_q.biasattn_v.biasffn_normc              3   4   K   | ]}D ]}||j         v V  d S r+   r   )r   tensor	bias_nameattn_bias_names      r'   	<genexpr>z'load_gguf_checkpoint.<locals>.<genexpr>  s9      mmF^lmmQZyFK/mmmmmmmr&   c              3   *   K   | ]}|j         v V  d S r+   r   )r   r   ffn_norm_names     r'   r   z'load_gguf_checkpoint.<locals>.<genexpr>  s*      #^#^VMV[$@#^#^#^#^#^#^r&   use_qkv_biasuse_parallel_residualzGGUF model with architecture z is not supported yet.falconr   c              3   ,   K   | ]}d |j         k    V  dS )r   Nr   )r   r   s     r'   r   z'load_gguf_checkpoint.<locals>.<genexpr>  s)      HHvOv{*HHHHHHr&   tie_word_embeddingsr`   r   c                 P    g | ]"}t          j        |         j                  #S r%   r   )r   r   r   s     r'   r   z(load_gguf_checkpoint.<locals>.<listcomp>  s-    hhhk"5;{#;U[IIhhhr&   z1Some keys were not parsed and added into account z | r   r   r   r   r?   Fblock_auto_adjust_ff_dimc                 $    g | ]\  }}|d k    |S )r   r%   )r   rh   rC   s      r'   r   z(load_gguf_checkpoint.<locals>.<listcomp>  s.     9
 9
 9
!!\L[\L\L\AL\L\L\r&   full_attn_idxs
vocab_sizer   tokenszCan't find a way to retrieve missing config vocab_size from tokenizer parameters. This will use default value from model config class and cause unexpected behavior.ra   r,   z,Converting and de-quantizing GGUF tensors...)desc)r   r   rU   rV   z0Some keys of the GGUF file were not considered: )-r   r
   r   r   r   r   r   r   r   listkeysGGUF_TO_TRANSFORMERS_MAPPINGr   loweranyra   GGUF_SUPPORTED_ARCHITECTURES
ValueErrorallr   rc   r   joinr   len
isinstancer#   removeinfomax	enumeratewarningr   r@   TENSOR_PROCESSORSr)   r   r   tensor_typer3   r   rd   re   r    rf   )$gguf_checkpoint_pathreturn_tensorsmodel_to_loadr   r   r   r   reader_keysrV   architecture
model_nameupdated_architecturer   r  
exceptionsgguf_keyr   prefix
config_keyr   	parameterparameter_renamesrenamed_config_keygguf_num_key_value_headstokenizer_parametersrU   r   ProcessorClass	processorr   r   r   resultr   r   r   s$                                    @@@r'   load_gguf_checkpointr2  k  s     i133 i/////////A	
 	
 	
 ghhhZ,--F]Fv{{}}%%KEE(DEEEf&<==a@LFN33J ,9
#:#:( 
		!<!<6:(#N3 	(&JqM$7$7$9$999#) l**@R?S!(+O<l**@P?Q!(+O<#'  +\!!*	|	#	#*
 \!!FFF"mmmmfnmmmmm ##^#^#^#^v~#^#^#^ ^ ^6>(#N3CX?X(#$;<777<PXt<t<t]]]]^^^ G$JHHHHHHHfL\fLf h 56
 "=..00 b b%##L2FGGs##qXXeABBi((
hhhh]b]ghhhu::??!HEeS!! 	Fle&;&;MM,0DEEE,H,N,N,P,P 
	1 
	1(I(***z=Nv=V/V/V%6v%>z%J"%++%1GL%i01CD{**&&x000{""KK`H``Y^``aaa "<0H<<4A(#L1"<0F::#4X#>?T#U =@AY=Z=Z(#$9:BG(#$>?9
 9
%./G%H%H9
 9
 9
(#$45 ,X6660=+++8;<PQY<Z8[8[h'55NNe  
  T'))$4]CC"&&x44*..|_MM"N&111	6>0^___ 	T 	TF;D jf.@AAG&&#5"3	 '  F nG;D---%d+D161A"''BRBR1S1Si(..
;!T{TTUUUr&   )NNr   )FN)*r   typingr   r   numpyr    	tqdm.autor   integrationsr   r   r	   utilsr
   utils.import_utilsr   utils.loggingr   rd   r   r   r  r  r  r  r   r)   r5   rQ   rl   r   r   r   r   r   r   r  r   r#   rM   r   r2  r%   r&   r'   <module>r:     s    
			 ' ' ' ' ' ' ' '                   
 & % % % % % 1 1 1 1 1 1 % % % % % %  LLL	H		 !*"
 

 "-F\]]  "5kBC$&<=O&PQ      $t$@$J$O$O$Q$QRR        - - - - - - - -7 7 7 7 7? 7 7 7<\ \ \ \ \o \ \ \<$ $ $ $ $? $ $ $N
7 
7 
7 
7 
7 
7 
7 
7- - - - -/ - - -4- - - - -? - - - - - - - -o - - -	- 	- 	- 	- 	-O 	- 	- 	-- - - - -/ - - - "''!
"!'##   d d d !% $	O OO O 	O O O Odi i i i i ir&   