
    .`i'Q                        d Z ddlmZ ddlmZ ddlZddlmZ ddlmZ ddl	m
Z
 ddlmZ dd	lmZmZ dd
lmZmZ ddlmZ ddlmZ ddlmZ ddlmZmZmZ ddlmZ ddlm Z  ddl!m"Z" ddl#m$Z$m%Z% ddl&m'Z'm(Z( ddl)m*Z* ddl+m,Z, ddl-m.Z.m/Z/ ddl0m1Z1m2Z2m3Z3m4Z4m5Z5m6Z6m7Z7  G d dej8                  Z9 G d dej8                  Z: G d dej8                  Z;e G d d ej8                              Z< G d! d"ej8        e.e/          Z=dS )#zAInference-only Apertus model compatible with HuggingFace weights.    )Iterable)isliceN)nn)ApertusConfig)	Attention)support_torch_compile)CacheConfig
VllmConfig)get_pp_group$get_tensor_model_parallel_world_size)XIELU)EncoderOnlyAttention)RMSNorm)ColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)get_rope)ParallelLMHeadVocabParallelEmbedding)default_weight_loadermaybe_remap_kv_scale_name)IntermediateTensors)AttentionType   )SupportsLoRA
SupportsPP)AutoWeightsLoaderPPMissingLayerextract_layer_indexis_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefixc                   V     e Zd Z	 	 	 	 ddededededz  d	ed
ededdf fdZd Z xZ	S )
ApertusMLPNF Thidden_sizeintermediate_size
hidden_actquant_configbiasprefixreduce_resultsreturnc           	         t                                                       t          ||||| d          | _        t	          |||||| d          | _        |dk    rt          d| d          t                      | _        d S )Nz.up_proj
input_sizeoutput_sizer-   r,   r.   z
.down_proj)r3   r4   r-   r,   r/   r.   xieluzUnsupported activation: z". Only xIELU is supported for now.)	super__init__r   up_projr   	down_proj
ValueErrorr   act_fn)	selfr)   r*   r+   r,   r-   r.   r/   	__class__s	           v/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/apertus.pyr7   zApertusMLP.__init__L   s     	+")%&&&
 
 
 +(#%)(((
 
 
   3: 3 3 3   gg    c                     |                      |          \  }}|                     |          }|                     |          \  }}|S N)r8   r;   r9   )r<   x_s      r>   forwardzApertusMLP.forwardm   s>    ||A1KKNN~~a  1r?   )NFr(   T)
__name__
__module____qualname__intstrr   boolr7   rD   __classcell__r=   s   @r>   r'   r'   K   s         37#   	
 )4/    
     B      r?   r'   c                        e Zd Zddddddej        fdedededed	ed
edz  dedede	dz  de
de
ddf fdZdej        dej        dej        fdZded
edz  ddfdZ xZS )ApertusAttention    NFr(   configr)   	num_headsnum_kv_headsmax_position_embeddingsr,   r-   bias_o_projcache_configr.   	attn_typer0   c                    t                                                       t          |
          }|| _        t	                      }|| _        | j        |z  dk    sJ | j        |z  | _        || _        | j        |k    r| j        |z  dk    sJ n|| j        z  dk    sJ t          d| j        |z            | _	        t          |dd           }|| j        | j        z  }|| _        | j        | j        z  | _        | j	        | j        z  | _        | j        dz  | _        || _        t!          || j        | j        | j        |||
 d          | _        t%          | j        | j        z  ||||
 d          | _        |                     ||	           d }t          |d
d           x}r||         dk    }|r|j        }|t,          j        k    rt0          nt2          } || j        | j        | j        | j	        |	||||
 d	  	        | _        t7          | j        |j                  | _        t7          | j        |j                  | _        d S )Nr   r   head_dimg      	.qkv_proj)r)   	head_sizetotal_num_headstotal_num_kv_headsr-   r,   r.   z.o_projr2   r,   layer_typessliding_attentionz.attn)rR   rU   r,   per_layer_sliding_windowrV   r.   eps)r6   r7   r!   r)   r   r[   rQ   r\   maxrR   getattrrX   q_sizekv_sizescalingrS   r   qkv_projr   o_proj_init_rotary_embsliding_windowr   ENCODER_ONLYr   r   attnr   rms_norm_epsq_normk_norm)r<   rP   r)   rQ   rR   rS   r,   r-   rT   rU   r.   rV   	layer_idxtp_sizerX   rk   r^   
is_slidingattn_clsr=   s                      r>   r7   zApertusAttention.__init__u   s    	'//	&688(#g-2222-8"."g-- *W499999 T4499994#:g#EFF6:t44'4+??H nt}4(4=8}d*'>$)#m 0#6%'''
 
 
 (+dm;#%%%%
 
 
 	f<@@@!&->>>; 	7$Y/3FFJ 7!'!6 M666 !  	 HNML*%%%3###

 

 

	 dm1DEEEdm1DEEEr?   	positionshidden_statesc                 n   |                      |          \  }}|                    | j        | j        | j        gd          \  }}}|                     |                                                    d| j                                                |          }| 	                    |                                                    d| j                                                |          }| 
                    |||          \  }}|                     |||          }|                     |          \  }	}|	S )N)dim)rh   splitre   rf   ro   
contiguousviewrX   view_asrp   
rotary_embrm   ri   )
r<   ru   rv   qkvrC   qkvattn_outputoutputs
             r>   rD   zApertusAttention.forward   s    
 }--Q))T[$,E2)NN1aKK++B>>??GGJJKK++B>>??GGJJy!Q//1ii1a((KK,,	r?   c                     d}|o|                                 dk    }|r|j        dk    rd}t          | j        | j        |j        |          | _        d S )NTggufapertusF)max_positionrope_parametersis_neox_style)get_name
model_typer   rX   rS   r   r~   )r<   rP   r,   r   is_ggufs        r>   rj   z!ApertusAttention._init_rotary_emb   sl    
 D<#8#8#:#:f#D 	"v(I55!M"M5"2'	
 
 
r?   )rE   rF   rG   r   DECODERr   rH   r   rJ   r	   rI   r7   torchTensorrD   rj   rK   rL   s   @r>   rN   rN   t   sn        (,26!+/&.VF VFVF VF 	VF
 VF "%VF )4/VF VF VF "D(VF VF VF 
VF VF VF VF VF VFp< | 
	   

 )4/
 
	
 
 
 
 
 
 
 
r?   rN   c                        e Zd Z	 	 	 ddededz  dedz  deddf
 fdZd	ej	        d
ej	        dej	        dz  de
ej	        ej	        f         fdZ xZS )ApertusDecoderLayerNr(   rP   rU   r,   r.   r0   c                    t                                                       |j        | _        t          |dd          }t          |dd          pt          |dd          }|}t	          |d          r|j        }t          |dd          rt          j        }nt          j        }t          || j        |j
        t          |d	|j
                  |||||| d
|          | _        t          | j        |j        |j        |t          |dd          | d          | _        t!          |j        |j                  | _        t!          |j        |j                  | _        d S )NrS   rO   attention_biasFr-   qkv_bias	is_causalTnum_key_value_headsz
.self_attn)rP   r)   rQ   rR   rS   r,   r-   rT   rU   r.   rV   mlp_biasz.mlp)r)   r*   r+   r,   r-   r.   ra   )r6   r7   r)   rd   hasattrr   r   r   rl   rN   num_attention_heads	self_attnr'   r*   r+   mlpr   rn   attention_layernormfeedforward_layernorm)
r<   rP   rU   r,   r.   rS   r   rT   rV   r=   s
            r>   r7   zApertusDecoderLayer.__init__   s    	!-")&2KT"R"R !)95AA 
WFEF
 F
 %6:&& 	-#_N 6;-- 	3%-II%2I)(0 -v/I  %<%#%(((
 
 
 ($6(%U33???
 
 
 $+6+=6CV#W#W#W %,F$7&
 &
 &
"""r?   ru   rv   residualc                     ||}|                      |          }n|                      ||          \  }}|                     ||          }|                     ||          \  }}|                     |          }||fS )N)ru   rv   )r   r   r   r   )r<   ru   rv   r   s       r>   rD   zApertusDecoderLayer.forward'  s     $H 44]CCMM&*&>&>}h&W&W#M8-XX #'"<"<]H"U"Ux//h&&r?   )NNr(   )rE   rF   rG   r   r	   r   rI   r7   r   r   tuplerD   rK   rL   s   @r>   r   r      s         ,0267
 7
7
 "D(7
 )4/	7

 7
 
7
 7
 7
 7
 7
 7
r'<' |' ,%	'
 
u|U\)	*' ' ' ' ' ' ' 'r?   r   c                   F    e Zd Zdeddededeej                 f fdZ	de
j        de
j        fd	Z	 dde
j        d
z  de
j        ded
z  de
j        d
z  de
j        ez  ee
j        ee
j                 f         z  f
dZdeeee
j        f                  dee         fdZ xZS )ApertusModelr(   r.   
layer_typevllm_configr.   r   c                   t                                                       |j        j        |j        |j        | _        | _        j        | _        t                      j	        sj
        r5t                      j        r"t          | j        j                  | _        nt                      | _        t!          j        fd| d          \  | _        | _        | _        t                      j        r!t+          j        j                  | _        nt                      | _        t1          t2          df                     | _        t7          ddgj                  | _        d S )	Nr]   c                 "     |           S )N)rP   rU   r,   r.    )r.   rU   rP   r   r,   s    r>   <lambda>z'ApertusModel.__init__.<locals>.<lambda>[  s$    ::))	   r?   z.layers)r.   ra   .rv   r   )r6   r7   model_config	hf_configrU   r,   rP   
vocab_sizer   is_first_ranktie_word_embeddingsis_last_rankr   r)   embed_tokensr    r$   num_hidden_layersstart_layer	end_layerlayersr   rn   normr   rH   aux_hidden_state_layersr#   make_empty_intermediate_tensors)r<   r   r.   r   rU   rP   r,   r=   s      `@@@r>   r7   zApertusModel.__init__=  s    	)3"/"/( +>>' 		1&		1+7>>+F		1 !7")! ! !D !/ 0 0D8C$       %%%	9
 	9
 	9
5$.$+ >>& 	) 28KLLLDII&((DI',S#X'8'8$/Vj)6+=0
 0
,,,r?   	input_idsr0   c                 ,    |                      |          S rA   )r   r<   r   s     r>   embed_input_idszApertusModel.embed_input_idsn  s      +++r?   Nru   intermediate_tensorsinputs_embedsc                    t                      j        r||}n|                     |          }d }n|J |d         }|d         }g }t          t	          | j        | j        | j                            D ]6\  }}	|| j        v r|	                    ||z               |	|||          \  }}7t                      j
        st          ||d          S |                     ||          \  }}
t          |          dk    r||fS |S )Nrv   r   )rv   r   r   )r   r   r   	enumerater   r   r   r   r   appendr   r   r   len)r<   r   ru   r   r   rv   r   aux_hidden_statesidxlayerrC   s              r>   rD   zApertusModel.forwardq  sD    >>' 		8( - $ 4 4Y ? ?HH'3330AM+J7H#4; 0$.AA
 
 	P 	PJC d222!(()ABBB&+eI}h&O&O#M88~~* 	&"/XFF    99]H==q !!A%% "333r?   weightsc                    g d}t          |                                           }|                                 D ]4\  }}|                    d          s|                    d          r|||<   5t	                      }|D ]\  }}d|v rd|v sd|v r| j        ~| j                            |          x}rb||         }	t          |	dt                    }
|	                                dk    r|n|d         } |
|	|           |
                    |           d	|v sd
|v rt          ||          }||D ]i\  }}}||vr|                    ||          }|                    d          r||vr;t          ||           rL||         }	|	j        }
 |
|	||            nW|                    d          r||vr;t          ||           rM||         }	t          |	dt                    }
 |
|	|           |
                    |           |S )N))rY   z.q_projr   )rY   z.k_projr   )rY   z.v_projr   z.betaz.epszrotary_emb.inv_freqzrotary_emb.cos_cachedzrotary_emb.sin_cachedweight_loaderr   scale
zero_pointz.bias)dictnamed_parametersnamed_buffersendswithsetr,   get_cache_scalerd   r   ry   addr   replacer"   r   )r<   r   stacked_params_mappingparams_dictnamebufferloaded_paramsloaded_weight
scale_nameparamr   
param_nameweight_nameshard_ids                 r>   load_weightszApertusModel.load_weights  s   "
 "
 "
 4002233 !..00 	+ 	+LD&}}W%% +v)>)> +$*D!"%%%#* 2	$ 2	$D-$,,&$..2IT2Q2Q  ,"/??EEE
 - $J/ '@U V V%2%6%6%8%8A%=%=MM=QRCS  e]333!!*---$,$"6"60{CC<5K 4 41
Kd**||K<<==)) d+.E.E*466 #D) % 3e]H=== ==)) d+.E.E*466 #D) '@U V Ve]333d####r?   rA   )rE   rF   rG   r   r
   rI   typer   Moduler7   r   r   r   r   r   listrD   r   r   r   rK   rL   s   @r>   r   r   ;  s_        &9/
 /
 /
  /
 	/

 O/
 /
 /
 /
 /
 /
b, ,%, , , , , .2# #<$&# <# 2D8	#
 |d*# 
+	+eEL$u|BT4T.U	U# # # #JBHU33D-E$F B3s8 B B B B B B B Br?   r   c                       e Zd Zdg diZdddZdedded	ed
ee	j
                 f fdZdeedf         ddfdZdeedf         fdZdefded	ed
ee	j
                 fdZdej        dej        fdZ	 	 ddej        dej        dedz  dej        dz  dej        ez  f
dZdej        dej        dz  fdZdeeeej        f                  dee         fdZ xZS )ApertusForCausalLMrh   )q_projk_projv_projinput_embeddingsoutput_embeddings)r   lm_headr(   r   r   r.   r   c          	      Z   t                                                       |j        j        }|j        }|| _        |                     |t          |d          |          | _        t                      j
        rt          |j        |j        |t          |d                    | _        |j        r)| j                            | j        j                  | _        t%          |dd          }t'          |j        |          | _        nt+                      | _        | j        j        | _        d S )Nmodelr   r.   r   r   )r,   r.   logit_scaleg      ?)r   )r6   r7   r   r   r,   rP   _init_modelr%   r   r   r   r   r   r)   r   r   tie_weightsr   rd   r   logits_processorr    r   )r<   r   r.   r   rP   r,   r   r=   s          r>   r7   zApertusForCausalLM.__init__  s+    	)3"/%%#00! & 
 

 >>& 	,)!")#FI66	  DL ) Q#|77
8OPP!&-==K$3!% % %D!! *++DL J6 	,,,r?   r   .r0   Nc                     || j         _        d S rA   )r   r   )r<   r   s     r>   set_aux_hidden_state_layersz.ApertusForCausalLM.set_aux_hidden_state_layers  s    -3
***r?   c                 J    t          | j        j                  }d|dz  |dz
  fS )N      )r   r   r   )r<   
num_layerss     r>   "get_eagle3_aux_hidden_state_layersz5ApertusForCausalLM.get_eagle3_aux_hidden_state_layers  s)    *++
:?JN33r?   c                 &    t          |||          S )Nr   )r   )r<   r   r.   r   s       r>   r   zApertusForCausalLM._init_model  s"     #Fz
 
 
 	
r?   r   c                 6    | j                             |          S rA   )r   r   r   s     r>   r   z"ApertusForCausalLM.embed_input_ids  s    z)))444r?   ru   r   r   c                 6    |                      ||||          }|S rA   )r   )r<   r   ru   r   r   model_outputs         r>   rD   zApertusForCausalLM.forward  s)     zzy"6
 
 r?   rv   c                 <    |                      | j        |          }|S rA   )r   r   )r<   rv   logitss      r>   compute_logitsz!ApertusForCausalLM.compute_logits+  s      &&t|]CCr?   r   c                 l    t          | | j        j        rdgnd           }|                    |          S )Nzlm_head.)skip_prefixes)r   rP   r   r   )r<   r   loaders      r>   r   zApertusForCausalLM.load_weights2  sC    "+/;+JTJ<<PT
 
 
 ""7+++r?   )NN)rE   rF   rG   packed_modules_mappingembedding_modulesr   r
   rI   r   r   r   r7   r   rH   r   r   r   r   r   r   r   rD   r   r   r   r   rK   rL   s   @r>   r   r     s!       (*H*H*HI +&  &9%
 %
 %
  %
 	%

 O%
 %
 %
 %
 %
 %
N4%S/ 4d 4 4 4 44E#s(O 4 4 4 4 &9	
 

 
 O	
 
 
 
5 5%, 5 5 5 5 <@-1
 
<
 <
 2D8	

 |d*
 
+	+
 
 
 
| 
	   ,HU33D-E$F ,3s8 , , , , , , , ,r?   r   )>__doc__collections.abcr   	itertoolsr   r   r   transformersr   vllm.attention.layerr   vllm.compilation.decoratorsr   vllm.configr	   r
   vllm.distributedr   r   %vllm.model_executor.layers.activationr   ;vllm.model_executor.layers.attention.encoder_only_attentionr   $vllm.model_executor.layers.layernormr   !vllm.model_executor.layers.linearr   r   r   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr   +vllm.model_executor.layers.rotary_embeddingr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr   r   vllm.sequencer   vllm.v1.attention.backendr   
interfacesr   r   utilsr   r    r!   r"   r#   r$   r%   r   r'   rN   r   r   r   r   r?   r>   <module>r     sg  4 H G $ $ $ $ $ $              & & & & & & * * * * * * = = = = = = / / / / / / / / O O O O O O O O 7 7 7 7 7 7      9 8 8 8 8 8         
 H G G G G G F F F F F F @ @ @ @ @ @               . - - - - - 3 3 3 3 3 3 0 0 0 0 0 0 0 0                 & & & & & & & &Rv
 v
 v
 v
 v
ry v
 v
 v
rK' K' K' K' K'") K' K' K'\ \ \ \ \ \29 \ \ \~\, \, \, \, \,L* \, \, \, \, \,r?   