
    .`i                     ^   d dl mZ d dlZd dlmZ d dlmZ d dlmZ d dlm	Z	 d dl
mZmZ d dlmZ d d	lmZ d d
lmZ d dlmZmZmZ d dlmZ d dlmZmZmZmZmZ d dlm Z  d dl!m"Z" d dl#m$Z$ d dl%m&Z& d dl'm(Z(m)Z) d dl*m+Z+ d dl,m-Z- ddl.m/Z/ ddl0m1Z1m2Z2m3Z3m4Z4m5Z5  ee6          Z7 ej8         ej9                    j:                  Z; G d dej<                  Z= G d dej<                  Z> G d dej<                  Z? G d dej<                  Z@ G d  d!ej<                  ZA e	d" #           G d$ d%ej<                              ZB e	d& #           G d' d(ej<                              ZC e	d) #           G d* d+ej<        e/                      ZD G d, d-ej<                  ZEdS ).    )IterableN)nn)Gemma3nTextConfig)	Attention)support_torch_compile)CacheConfig
VllmConfig)$get_tensor_model_parallel_world_size)get_forward_context)init_logger)_ACTIVATION_REGISTRY
GeluAndMulGeluAndMulSparse)RMSNorm)ColumnParallelLinearMergedColumnParallelLinearQKVParallelLinearReplicatedLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)get_rope)VocabParallelEmbedding)default_weight_loadermaybe_remap_kv_scale_name)IntermediateTensors)KVSharingFastPrefillMetadata   )SupportsQuant)AutoWeightsLoaderextract_layer_indexis_pp_missing_parametermake_layersmaybe_prefixc                        e Zd ZdZdededededededef fd	Zd
e	j
        de	j
        fdZde	j
        de	j
        fdZde	j
        de	j
        fdZde	j
        de	j
        de	j
        fdZ xZS )Gemma3nAltUpa  Alternating updates (Altup)
    The AltUp module wraps transformer layers. The `predict` step modifies the
    input to the transformer layer, and the `correct` step propagates the output
    of the transformer layer to the sparsely updated dimensions.
    See more in the research paper:
    https://proceedings.neurips.cc/paper_files/paper/2023/file/f2059277ac6ce66e7e5543001afa8bb5-Paper-Conference.pdf
    hidden_sizerms_norm_epsaltup_num_inputsaltup_coef_clipaltup_active_idxquant_configprefixc                    t                                                       || _        || _        || _        t          ||d|| dd          | _        t          ||dz  d|| dd          | _        t          ||d|| dd          | _        t          ||          | _
        t          j        |dz  | j        j        j        	          | _        t!          j        t          j        |t          j        	                    | _        d S )
NFz.correction_coefsbiasr,   r-   return_bias   z.prediction_coefsz.modality_routerr'   epsg      dtype)super__init__r)   r+   r*   r   correction_coefsprediction_coefsmodality_routerr   router_normtorchtensorweightr6   router_input_scaler   	Parameterzerosfloat32correct_output_scale)	selfr'   r(   r)   r*   r+   r,   r-   	__class__s	           v/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/gemma3n.pyr8   zGemma3nAltUp.__init__M   sO    	 0 0. 0%///!
 !
 !
 !1a%///!
 !
 !
  0%... 
  
  
 ##
 
 
 #(,T%9%@%F#
 #
 #
 %'LK5=999%
 %
!!!    xreturnc                     |                      |          | j        z  }|                     |          }t          j        |                                                              |          S N)r<   r@   r;   r=   tanhfloattype_as)rE   rI   router_inputsrouteds       rG   _compute_router_modalitiesz'Gemma3nAltUp._compute_router_modalities   sV    ((++d.EE%%m44z&,,..))11!444rH   	correctedc                 l    |                     | j                  | j        z                       |          S rL   )rO   rD   )rE   rS   s     rG   scale_corrected_outputz#Gemma3nAltUp.scale_corrected_output   s0    d7884;TT
')

	rH   hidden_statesc                    |                      || j                           }|                     |          }|                    d| j        | j                                      ddd          }t          j        |                    ddd          |          }|                    ddd          }||z  }|                                S )Nr   r2   r   )	rR   r+   r:   reshaper)   permuter=   matmul
contiguous)rE   rV   
modalities	all_coefsall_coefs_Tpredictionss         rG   predictzGemma3nAltUp.predict   s     44$/0
 

 ))*55	  ''!!
 
 '!Q

	 	 l=#8#8Aq#A#A;OO!))!Q22}$%%'''rH   r`   	activatedc                 D   |                      |          }||| j                 z
  }|                    | j        dd          }|                     |          dz   }|j                            d          }t          j        ||          }||z  }|	                                S )Nr         ?rX   )
rR   r+   repeatr)   r9   T	unsqueezer=   mulr\   )rE   r`   rb   r]   
innovationr^   rS   s          rG   correctzGemma3nAltUp.correct   s     44Y??
T-B!CC
&&t'<aCC
 ))*55;	K))"--	 Ij)44	[ 	##%%%rH   )__name__
__module____qualname____doc__intrN   r   strr8   r=   TensorrR   rU   ra   rj   __classcell__rF   s   @rG   r&   r&   D   s0        1
1
 1
 	1

 1
 1
 )1
 1
 1
 1
 1
 1
 1
f5EL 5U\ 5 5 5 5
     
(U\ (el ( ( ( (0& <&49L&	& & & & & & & &rH   r&   c                   l     e Zd ZdZdddededededz  ded	df fd
Zde	j
        d	e	j
        fdZ xZS )Gemma3nLaurelBlockz Learned Augmented Residual LayerN)r,   r'   laurel_rankr(   r,   r-   rJ   c                    t                                                       t          ||d|| dd          | _        t	          ||d|| dd          | _        t          ||          | _        d S )NFz.linear_leftr/   z.linear_rightr3   )r7   r8   r   linear_leftr   linear_rightr   post_laurel_norm)rE   r'   rv   r(   r,   r-   rF   s         rG   r8   zGemma3nLaurelBlock.__init__   s     	/%***
 
 
 .%+++
 
 
 !(#!
 !
 !
rH   rI   c                     |                      |          }|                     |          }|                     |          }||z   S rL   )rx   ry   rz   )rE   rI   laurel_xnormed_laurel_xs       rG   forwardzGemma3nLaurelBlock.forward   sF    ##A&&$$X..//99?""rH   )rk   rl   rm   rn   ro   rN   r   rp   r8   r=   rq   r~   rr   rs   s   @rG   ru   ru      s        ** 37
 
 

 
 	
 )4/
 
 

 
 
 
 
 
@# #%, # # # # # # # #rH   ru   c                   n     e Zd Z	 	 	 ddedededededz  d	ed
df fdZdej	        d
ej	        fdZ
 xZS )
Gemma3nMLP        N r'   intermediate_sizehidden_activationactivation_sparsityr,   r-   rJ   c                 B   t                                                       t          ||gdz  d|| d          | _        t	          ||d|| d          | _        |dk    rt          d          |dk    rt          |d	
          nt          d	          | _	        d S )Nr2   Fz.gate_up_projr0   r,   r-   z
.down_projgelu_pytorch_tanhzGemma3 uses `gelu_pytorch_tanh` as the hidden activation function. Please set `hidden_act` and `hidden_activation` to `gelu_pytorch_tanh`.r   rM   )r   approximate)r   )
r7   r8   r   gate_up_projr   	down_proj
ValueErrorr   r   act_fn)rE   r'   r   r   r   r,   r-   rF   s          rG   r8   zGemma3nMLP.__init__   s     	6!#%+++
 
 
 +%(((
 
 
  333'   #S(( $7V    /// 	rH   rI   c                     |                      |          \  }}|                     |          }|                     |          \  }}|S rL   )r   r   r   )rE   rI   gate_up_s       rG   r~   zGemma3nMLP.forward  sD    &&q))
KK  ~~a  1rH   )r   Nr   )rk   rl   rm   ro   rp   rN   r   r8   r=   rq   r~   rr   rs   s   @rG   r   r      s         &)26%
 %
%
 %
 	%

 #%
 )4/%
 %
 
%
 %
 %
 %
 %
 %
N %,        rH   r   c                        e Zd Z	 	 	 ddedededededed	edz  d
edz  deddf fdZde	j
        de	j
        de	j
        fdZ xZS )Gemma3nAttentionNr   configr'   	num_headsnum_kv_headshead_dimmax_position_embeddingscache_configr,   r-   rJ   c
                    t                                                       || _        || _        t	                      }
|| _        | j        |
z  dk    sJ | j        |
z  | _        || _        | j        |
k    r| j        |
z  dk    sJ n|
| j        z  dk    sJ t          d| j        |
z            | _	        || _
        | j        | j
        z  | _        | j	        | j
        z  | _        t          || j
        | j        | j        |j        ||	 d          | _        t!          | j        | j
        z  ||j        ||	 d          | _        t%          | j
        |j                  | _        t%          | j
        |j                  | _        t%          | j
        |j        d          | _        t/          |	          }|j        |         }|d	k    }|r|j        nd | _        ||j        v r|j        |         }n%|j                                        }|r
|j        |d
<   |j        |j        z
  }||k    | _        d }| j        rQ| j        dnd}||z
  }|dk    r;d|	v r|	                     d          d         }ntC          d|	 d          | d| d}tE          | j
        ||d          | _#        tI          | j        | j
        d| j	        ||| j        ||	 d	  	        | _%        d S )Nr   r   z	.qkv_projr   z.o_projr3   F)r'   r4   
has_weightsliding_attention
rope_thetar2   z.layers.z0Unexpected prefix format for Gemma3nAttention: 'zc'. The prefix is expected to contain '.layers.' to correctly determine the KV sharing target layer.z.self_attn.attnT)max_positionrope_parametersis_neox_stylerd   z.attn)	r   	head_sizescaler   r   r,   per_layer_sliding_windowkv_sharing_target_layer_namer-   )&r7   r8   r   r'   r
   total_num_headsr   total_num_kv_headsmaxr   r   q_sizekv_sizer   attention_biasqkv_projr   o_projr   r(   q_normk_normv_normr!   layer_typessliding_windowr   copyrope_local_base_freqnum_hidden_layersnum_kv_shared_layersis_kv_sharedsplitr   r   
rotary_embr   attn)rE   r   r'   r   r   r   r   r   r,   r-   tp_size	layer_idx
layer_type
is_slidingr   first_kv_shared_layer_idxr   offsetkv_shared_layer_indexparam_name_before_layersrF   s                       rG   r8   zGemma3nAttention.__init__  s    	&688(#g-2222-8"."g-- *W499999 T4499994#:g#EFF nt}4(4=8)M #&%'''
 
 
 ( 4=0&%%%%
 
 
 $-V=PQQQ$-V=PQQQ6+>5
 
 
 (//	'	2
#66
7AKf33t ///$4Z@OO %499;;O L060K- $v'BB 	" &)BB'+$ 	{ -9QQqF$=$F!$)) ''/5||J/G/G/J,,$("( ( (   3K/z/zTi/z/z/z,"M0+	
 
 
 nm*%%%)%8)E###

 

 

			rH   	positionsrV   c                    |                      |          \  }}|                    | j        | j        | j        gd          \  }}}|                    d| j        | j        f          }|                     |          }|                    dd          }|                    d| j	        | j        f          }| 
                    |          }|                    dd          }|                    d| j	        | j        f          }|                     |          }|                    dd          }|                     |||          \  }}|                     |||          }	|                     |	          \  }
}|
S )NrX   dim)r   r   r   r   	unflattenr   r   r   flattenr   r   r   r   r   r   )rE   r   rV   kwargsqkvr   qkvattn_outputoutputs              rG   r~   zGemma3nAttention.forward  sE    }--Q))T[$,E2)NN1aKKT^T];<<KKNNIIb"KKT.>??KKNNIIb"KKT.>??KKNNIIb"y!Q//1ii1a((KK,,	rH   NNr   )rk   rl   rm   r   ro   r   r   rp   r8   r=   rq   r~   rr   rs   s   @rG   r   r     s        ,026{
 {
!{
 {
 	{

 {
 {
 "%{
 "D({
 )4/{
 {
 
{
 {
 {
 {
 {
 {
z< |
 
       rH   r   c                        e Zd Z	 	 	 ddededz  dedz  deddf
 fdZd	ej	        d
ej	        dej	        de
ej	        ej	        f         fdZ xZS )Gemma3nDecoderLayerNr   r   r   r,   r-   rJ   c                    t                                                       t          |t                    sJ |j        | _        |j        sJ t          |j        |j        |j	        |j
        |j        || d          | _        t          ||j        |j        |j        |j        |j        ||| d	  	        | _        t%          |j        |j        t)          |                   |j        ||j        t)          |                   | d          | _        t1          |j        |j        |j        || d          | _        t7          |j        |j        d	|| d
d	          | _        t7          |j        |j        d	|| dd	          | _        t?          |j        |j                  | _         t?          |j        |j                  | _!        t?          |j        |j                  | _"        t?          |j        |j                  | _#        t?          |j        |j                  | _$        tJ          |j                 | _&        d S )Nz.altup)r'   r(   r)   r*   r+   r,   r-   z
.self_attn)	r   r'   r   r   r   r   r   r,   r-   z.mlp)r'   r   r   r,   r   r-   z.laurel)r'   rv   r(   r,   r-   Fz.per_layer_input_gater/   z.per_layer_projectionr4   )'r7   r8   
isinstancer   r+   altup_correct_scaler&   r'   r(   r)   r*   altupr   num_attention_headsnum_key_value_headsr   r   	self_attnr   r   r!   r   activation_sparsity_patternmlpru   rv   laurelr   hidden_size_per_layer_inputper_layer_input_gateper_layer_projectionr   input_layernormpost_attention_layernormpre_feedforward_layernormpost_feedforward_layernormpost_per_layer_input_normr   r   )rE   r   r   r,   r-   rF   s        rG   r8   zGemma3nDecoderLayer.__init__  s    	&"344444 & 7))))!*,#4"2#4%$$$
 
 

 **03_$*$B%%(((

 

 

 *$67J67R7RS$6% & B#F++! ???

 

 

 )**,%%%%
 
 
 %5.%333%
 %
 %
! %5.%333%
 %
 %
!  '# 
  
  
 )0#)
 )
 )
% *1#*
 *
 *
& +2#+
 +
 +
' *1#*
 *
 *
&
 +6+CDrH   r   rV   per_layer_inputc                 V   | j                             |          }|| j                 }|                     |          }|                     |          } | j        d||d|}	|                     |	          }	|	|z   }
|
|z   t          j        t          j	        d                    z  }| 
                    |          }|                     |          }|                     |          }||z   }| j                             ||          }|| j                 }| j                             |          }|                     |          }|                     |          }t          j        ||          }|                     |          }|                     |          }|dd xx         |z  cc<   |S )N)r   rV          @r    )r   ra   r+   r   r   r   r   r=   sqrtr>   r   r   r   rj   rU   r   r   rh   r   r   )rE   r   rV   r   r   r`   active_predictionactive_prediction_normedlaurel_outputr   
attn_gatedattn_laurel	attn_normattn_ffwattn_ffw_normattn_ffw_laurel_gatedcorrected_predictionsfirst_predictions                     rG   r~   zGemma3nDecoderLayer.forward  s    j((77'(=>#'#7#78I#J#J $<== t~ 
2
 
 
 

 ,,T22--
!M1UZS@Q@Q5R5RR 22;??	88I&&77AA +m ; !%
 2 2;@U V V01FG:<<=MNN  445EFF;;'788 9%5GG  445EFF99:JKKabb!!!%55!!!$$rH   r   )rk   rl   rm   r   r   r   rp   r8   r=   rq   tupler~   rr   rs   s   @rG   r   r     s         ,026]E ]E!]E "D(]E )4/	]E
 ]E 
]E ]E ]E ]E ]E ]E~,%<,% |,% 	,% 
u|U\)	*,% ,% ,% ,% ,% ,% ,% ,%rH   r   c                     | j         j        S rL   r   kv_sharing_fast_prefillvllm_configs    rG   <lambda>r   ?      +":"R rH   )	enable_ifc                   d    e Zd ZdZdddededee         def fdZ	d	e
j        d
e
j        fdZde
j        de
j        dz  d
e
j        fdZd	e
j        d
e
j        fdZde
j        d
e
j        fdZ	 	 dd	e
j        de
j        de
j        dz  de
j        dz  d
ee
j        e
j        f         f
dZ xZS )Gemma3nSelfDecoderz:
    Includes altup embedding and self decoder layers
    r   r-   r   r-   decoder_layerslayer_idx_startc          
      &   t                                                       || _        || _        |j        j        | _        |j        t          j	        j
         d          | _        t          j        j
        dz  | j        j        j                  | _        t          j        j        j        z   d          | _        t          j        j        dz  | j        j        j                  | _        t-          j
        j        j        z  ddd d	          | _        t1          j        j        
          | _        t          j        t          j        d                                        | j        j        j                  | _        t          j        j
        dz  | j        j        j                  | _        t?          j         fdtC          d| j        j"                  D                       | _#        d S )Nz.embed_tokens)r,   r-         ?r5   z.per_layer_embed_tokensFTz.per_layer_model_projectionr0   gather_outputr1   r,   r-   r3   r   c                 `    g | ]*}t          j        j        d dd  d|dz
             +S )FTz.altup_projections.r   r  r   r'   .0idxr   r-   r,   s     rG   
<listcomp>z/Gemma3nSelfDecoder.__init__.<locals>.<listcomp>  sg         %&&"& %!-$BBqBB    rH   r   )$r7   r8   r  r  model_config	hf_configr   r,   r   
vocab_sizer'   embed_tokensr=   r>   r?   r6   embed_scalevocab_size_per_layer_inputr   r   embed_tokens_per_layerembed_scale_per_layerr   per_layer_model_projectionr   r(   per_layer_projection_normrsqrttoper_layer_input_scaleper_layer_projection_scaler   
ModuleListranger)   altup_projections)rE   r   r-   r  r  r   r,   rF   s     `  @@rG   r8   zGemma3nSelfDecoder.__init__F  sM    	,.)3"/2%+++	
 
 
 !<##*0
 
 

 '=-$v'II%555	'
 '
 '
# &+\.3#*0&
 &
 &
" +?$v'II%999+
 +
 +
' *1:#*
 *
 *
& &+[c1B1B%C%C%F%F$*&
 &
" +0,##*0+
 +
 +
' "$      !DK$@AA  "
 "
rH   	input_idsrJ   c                     t          j        |dk    || j        j        k               }t          j        ||t          j        |                    }|                     |          | j        z  S )Nr   )r=   logical_andr   r  where
zeros_liker  r  )rE   r  per_layer_inputs_maskper_layer_inputs_tokenss       rG   get_per_layer_input_embeddingsz1Gemma3nSelfDecoder.get_per_layer_input_embeddings  st     !& 1NI(NN!
 !
 #(+!9e.>y.I.I#
 #
 ''(?@@()	
rH   hidden_states_0per_layer_inputsNc                     |                      |          } |j        g |j        d d         | j        j        | j        j        R  }|                     |          }|||z   }|| j        z  }n|}|S )NrX   )r  rY   shaper   r   r   r  r  )rE   r&  r'  r   s       rG   get_per_layer_inputsz'Gemma3nSelfDecoder.get_per_layer_inputs  s    
  $>>OO;3;  
"3B3' 
K) 
 K3 
  
  

  $==>RSS'36FF ::3rH   c                 <    |                      |          | j        z  S rL   )r  r  rE   r  s     rG   embed_input_idsz"Gemma3nSelfDecoder.embed_input_ids  s      ++d.>>>rH   c           	         |g| j         j        z  }t          j        |dz  dd          dz  }t	          d| j         j                  D ]r} | j        |dz
           ||                   ||<   t          j        ||         dz  dd          dz  }||xx         |t          j        |t                    z  z  cc<   st          j        |d          }|S )Nr2   rX   Tr   keepdimr  r   r   )	r   r)   r=   meanr  r  maximumEPSstack)rE   r&  rV   target_magnitudeinew_magnitudes         rG   altup_embedzGemma3nSelfDecoder.altup_embed  s    ()DK,HH :oq&8b$OOOSVVq$+677 	U 	UA<t5a!e<]1=MNNM!
=+q0b$GGG3N  ! 05=PS3T3T TTMr:::rH   r   inputs_embedsc                 j   ||}n|                      |          }|                     ||          }|                     |          }|                    ddd          }t	          | j                  D ](\  }	}
|	| j        z   } |
d|||d d |d d f         d|})|                    ddd          }||fS Nr2   r   r   )r   rV   r   r   )r-  r*  r8  rZ   	enumerater  r  )rE   r  r   r9  r'  r   r&  adjusted_per_layer_inputsrV   r  layerr   s               rG   r~   zGemma3nSelfDecoder.forward  s    $+OO"229==O$($=$=-%
 %
! ((99 &--aA66#D$788 	 	JCd22I!E #+ 9!!!Y/ J  	 MM &--aA66777rH   NN)rk   rl   rm   rn   r	   rp   listr   ro   r8   r=   rq   r%  r*  r-  r8  r   r~   rr   rs   s   @rG   r   r   >  s         	F
 F
 F
  F
 	F

 01F
 F
 F
 F
 F
 F
 F
P
 
 
 
 
 
    ,-  
	       (? ?%, ? ? ? ?5< EL    " .204"8 "8<"8 <"8 |d*	"8
  ,-"8 
u|U\)	*"8 "8 "8 "8 "8 "8 "8 "8rH   r   c                     | j         j        S rL   r   r   s    rG   r   r     r   rH   c            	            e Zd ZdZdddededee         def fdZ	d	e
j        d
e
j        de
j        de
j        fdZ xZS )Gemma3nCrossDecoderz
    Cross-decoder layers
    r   r   r   r-   r  r  c                d    t                                                       || _        || _        d S rL   )r7   r8   r  r  )rE   r   r-   r  r  rF   s        rG   r8   zGemma3nCrossDecoder.__init__  s1     	,.rH   r   rV   r'  rJ   c                     |                     ddd          }t          | j                  D ](\  }}|| j        z   } |d|||d d |d d f         d|})|                     ddd          }|S r;  )rZ   r<  r  r  )rE   r   rV   r'  r   r  r>  r   s           rG   r~   zGemma3nCrossDecoder.forward  s     &--aA66#D$788 	 	JCd22I!E #+ 0Iqqq A  	 MM &--aA66rH   )rk   rl   rm   rn   r	   rp   r@  r   ro   r8   r=   rq   r~   rr   rs   s   @rG   rC  rC    s          	
/ 
/ 
/  
/ 	
/
 01
/ 
/ 
/ 
/ 
/ 
/ 
/< |  ,	 
       rH   rC  c                     | j         j         S rL   r   r   s    rG   r   r     s    k&>&V"V rH   c                   *    e Zd Zdddedef fdZed             Zdej	        dej	        fd	Z
dej	        dej	        fd
Z	 	 ddej	        dej	        dej	        dz  dej	        dz  dej	        f
dZ	 	 ddej	        dej	        dej	        dz  dej	        dz  dej	        f
dZdej	        dej	        fdZ	 	 	 ddej	        dz  dej	        dej	        dz  dedz  dej	        dz  dej	        ez  fdZdeeeej	        f                  dee         fdZ xZS )Gemma3nTextModelr   r   r   r-   c                  	 t                                                       |j        j        |j        |j        	| _        	| _        t          j        	fdt          d| j        j
                  D                       | _        t          j        	fd d          \  | _        | _        | _        j        j        z
  }ddlm}  |d          5  t)          | d	| j        d |         d
          | _        d d d            n# 1 swxY w Y    |d          5  t-          | d| j        |d          |
          | _        d d d            n# 1 swxY w Y   t1          j        j                  | _        j        | _        | j        r|j        j        }tA          | !                                          j"        }tG          j$        |tF          j%        |          | _&        tG          j$        |j        | j        j
        f| j'        j(        j)        |          | _*        tG          j$        || j        j        | j        j+        f| j'        j(        j)        |          | _,        d S d S )Nc                 `    g | ]*}t          j        j        d dd  d|dz
             +S )FTz.altup_unembed_projections.r   r  r  r	  s     rG   r  z-Gemma3nTextModel.__init__.<locals>.<listcomp>   sg         %&&"& %!-$JJqJJ    rH   r   c                 *    t          |           S )Nr   )r   )r-   r   r   r,   s    rG   r   z+Gemma3nTextModel.__init__.<locals>.<lambda>1  s     .l6   rH   z.layersr   r   )set_model_tagself_decoderz.self_decoder)r   r-   r  r  cross_decoderz.cross_decoderr   r6   device)-r7   r8   r  r  r   r,   r   r   r  r  r)   altup_unembed_projectionsr#   r   start_layer	end_layerlayersr   vllm.compilation.backendsrL  r   rM  rC  rN  r   r'   r(   normr   fast_prefill_enabledscheduler_configmax_num_batched_tokensnext
parametersrP  r=   rB   int64r   r  r?   r6   rV   r   r'  )rE   r   r-   r   rL  max_num_tokensrP  r   r   r,   rF   s     `    @@@rG   r8   zGemma3nTextModel.__init__  sr   )3"/"/()+      !DK$@AA  *
 *
&  9D$      %%%9
 9
 9
5$.$+ $v'BB 	" 	<;;;;; ]>** 	 	 2' ///#{+E,E+EF !	! ! !D	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ]?++ 	 	!4' 000#{+D+E+EF 9	" " "D	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 #
 
 
	
 %1$H!$ 	 )9PN$//++,,3F"[ek&  DN "'!3T[5QR'.4" " "D
 %*K"K1K;
 '.4% % %D!!!	 	s$   )DDD*)EE#&E#c                     | j         j        S rL   )rM  r  )rE   s    rG   r  zGemma3nTextModel.embed_tokensn  s     --rH   r  rJ   c                 6    | j                             |          S rL   )rM  r%  r,  s     rG   r%  z/Gemma3nTextModel.get_per_layer_input_embeddingsr  s     ??	JJJrH   c                 6    | j                             |          S rL   )rM  r-  r,  s     rG   r-  z Gemma3nTextModel.embed_input_idsu  s     00;;;rH   Nr   r9  r'  c                    d\  }}t                      j        }| j        r^|\t          |t                    sJ || j        d         j        j        j                 }	t          |	t                    r|	j
        }|	j        }|                    d          }
| j        d |
                             |            | j        d|| j        d |
         ||d|\  }}|4t!          j        |                    d          |j        |j                  }|                                }|                    d          }| j        d |                             ||                    | j        d |                             ||                    | j        d |                             ||                     | j        d| j        d |         | j        d |         | j        d |         d|}||dk    sJ |d |         ||d |         <   n|}|S )Nr?  rX   r   r  r   r9  r'  rO  r   rV   r'  r   )r   attn_metadatarW  r   dictrT  r   r   
layer_namer   logits_indices_paddednum_logits_indicessizer   copy_rM  r=   aranger6   rP  clonerV   r'  rN  )rE   r  r   r9  r'  r   rg  rh  rd  layer_attn_metadata
batch_sizeself_decoder_hidden_statesper_layer_inputs_adjustedrV   num_padded_logits_indicescross_decoder_hidden_statess                   rG   fast_prefill_forwardz%Gemma3nTextModel.fast_prefill_forwardx  s    5?11+--; $ 	L)BmT22222"/B).9# -/KLL L(;(Q%%8%K" ^^A&&
{
{#)))444@Q@Q A
n[j[1'-	A
 A

 A
 A
="$= !($)Lq!!o '% % %! 388:: %:$>$>q$A$A!111288+,	
 	
 	
 	5556<<&'<=	
 	
 	
 	8889??%&;<	
 	
 	
 '9d&8 '
n%?&?%?@,-G.G-GH!23M4M3MN'
 '
 	'
 '
# )%)))) ,,?-?,?@ /0C1C0CDEE 8MrH   c                 R     | j         d||||d|\  }} | j        d|||d|}|S )Nrb  rc  r   )rM  rN  )rE   r  r   r9  r'  r   rV   s          rG   normal_forwardzGemma3nTextModel.normal_forward  sy     +<$*; +
'-	+
 +

 +
 +
'' +* 
'-
 
 	
 
 rH   rV   c           	         t          j        |d         dz  dd          dz  }t          d| j        j                  D ]z} | j        |dz
           |d|f                   |d|f<   t          j        |d|f         dz  dd          dz  }|d|fxx         |t          j        |t                    z  z  cc<   {t          j        |d	          }|S )
N).r   r2   rX   Tr/  r  r   .r   )r=   r1  r  r   r)   rQ  r2  r3  )rE   rV   r5  r6  r7  s        rG   altup_unembedzGemma3nTextModel.altup_unembed  s	    J}V,1r4HHHCO 	 q$+677 		 		A$ID$B1q5$Ic1f%% %M#q&! 
=a0A52tLLLPSS  #q&!!!%5s9 9 & !!!! 
=b999rH   intermediate_tensorsc                     | j         r | j        ||||fi |}n | j        ||||fi |}|                     |          }|                     |          S rL   )rW  rs  ru  rw  rV  rE   r  r   r'  rx  r9  r   rV   s           rG   r~   zGemma3nTextModel.forward  s     $ 	5D5 	 
  MM 0D/ 	 
  M **=99yy'''rH   weightsc                    g d}t          |                                           }t                      }|D ]\  }}|                    d          s/|                    d          s|                    d          sd| }| j        d| j                            |          x}rH||         }t          |dt                    }	|d         } |	||           |                    |           |D ]n\  }
}}||vrd|v r|	                    ||
          }|
                    d	          r||vr@t          ||           rQ||         }|j        }	 |	|||            nk|
                    d	          r||vrAt          ||          }|Ut          ||           rg||         }t          |dt                    }	 |	||           |                    |           |S )
N))r   q_projr   )r   k_projr   )r   v_projr   )r   	gate_projr   )r   up_projr   rT  rQ  rV  zself_decoder.weight_loaderr   r  z.bias)re  named_parametersset
startswithr,   get_cache_scalegetattrr   addreplaceendswithr"   r  r   )rE   r{  stacked_params_mappingparams_dictloaded_paramsnameloaded_weight
scale_nameparamr  
param_name
shard_nameshard_ids                rG   load_weightszGemma3nTextModel.load_weights  sT   "
 "
 "
 4002233"%%%#* 1	$ 1	$D- OOH--.(CDD. //.
 .t-- ,"/??EEE
 - $J/ '@U V V -a 0e]333!!*---4J 4 40
JT))&$..||J
;;==)) d+.E.E*466 #D) % 3e]H=== ==)) d+.E.E0{CC<*466 #D) '@U V Ve]333d####rH   r?  )NNN)rk   rl   rm   r	   rp   r8   propertyr  r=   rq   r%  r-  rs  ru  rw  r   r~   r   r   r  r  rr   rs   s   @rG   rH  rH    s        BD U U Uz U3 U U U U U Un . . X.K K K K K K< <%, < < < < .204K K<K <K |d*	K
  ,-K 
K K K Kb .204 < < |d*	
  ,- 
   .| 
   4 15;?-1( (<$&( <(  ,-	(
 2D8( |d*( 
+	+( ( ( (8>HU33D-E$F >3s8 > > > > > > > >rH   rH  c                   D    e Zd Zg dddgdZdddedef fd	Zd
ej        dej        fdZ	ddddd
ej        dej        dej        dz  de
dz  dej        dz  dej        e
z  fdZdej        dej        dz  fdZdeeeej        f                  dee         fdZ xZS )Gemma3nForCausalLM)r}  r~  r  r  r  )r   r   r   r   r   r-   c                   |j         j        }t                                                       || _        |j        | _        t          |t          |d                    | _        t          |j
        |j                  | _        d S )Nmodel)r   r-   )soft_cap)r  r  r7   r8   r   r   rH  r$   r  r   r  final_logit_softcappinglogits_processor)rE   r   r-   r   rF   s       rG   r8   zGemma3nForCausalLM.__init__\  s    )3'4%#L,I,I
 
 

 !0(F!
 !
 !
rH   r  rJ   c                 6    | j                             |          S rL   )r  r-  r,  s     rG   r-  z"Gemma3nForCausalLM.embed_input_idsi  s    z)))444rH   Nr'  rx  r9  r   r'  rx  r9  c                ,     | j         ||f|||d|}|S )Nr  )r  rz  s           rG   r~   zGemma3nForCausalLM.forwardl  sD     #

 .!5'
 
 
 
 rH   rV   c                 F    |                      | j        j        |          }|S rL   )r  r  r  )rE   rV   logitss      rG   compute_logitsz!Gemma3nForCausalLM.compute_logits  s#     &&tz'>NNrH   r{  c                 R    t          | g d          }|                    |          S )N)zembed_audio.zembed_vision.zaudio_tower.zvision_tower.)skip_substrs)r    r  )rE   r{  loaders      rG   r  zGemma3nForCausalLM.load_weights  s8    "RRR
 
 
 ""7+++rH   )rk   rl   rm   packed_modules_mappingr	   rp   r8   r=   rq   r-  r   r~   r  r   r   r  r  rr   rs   s   @rG   r  r  O  s       
 
 
 

 
 BD 
 
 
z 
3 
 
 
 
 
 
5 5%, 5 5 5 5 15;?-1  < <
  ,- 2D8 |d* 
+	+   (| 
	   ,HU33D-E$F ,3s8 , , , , , , , ,rH   r  )Fcollections.abcr   r=   r   1transformers.models.gemma3n.configuration_gemma3nr   vllm.attention.layerr   vllm.compilation.decoratorsr   vllm.configr   r	   vllm.distributedr
   vllm.forward_contextr   vllm.loggerr   %vllm.model_executor.layers.activationr   r   r   $vllm.model_executor.layers.layernormr   !vllm.model_executor.layers.linearr   r   r   r   r   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr   +vllm.model_executor.layers.rotary_embeddingr   3vllm.model_executor.layers.vocab_parallel_embeddingr   -vllm.model_executor.model_loader.weight_utilsr   r   vllm.sequencer    vllm.v1.attention.backends.utilsr   
interfacesr   utilsr    r!   r"   r#   r$   rk   loggerr>   finfominr3  Moduler&   ru   r   r   r   r   rC  rH  r  r   rH   rG   <module>r     s  $ % $ $ $ $ $        O O O O O O * * * * * * = = = = = = / / / / / / / / A A A A A A 4 4 4 4 4 4 # # # # # #         
 9 8 8 8 8 8              H G G G G G F F F F F F @ @ @ @ @ @ V V V V V V        . - - - - - I I I I I I % % % % % %              
X		el;5;==$%%v& v& v& v& v&29 v& v& v&r'# '# '# '# '# '# '# '#T, , , , , , , ,^U U U U Ury U U UpL% L% L% L% L%") L% L% L%` RR  b8 b8 b8 b8 b8 b8 b8 b8L RR  % % % % %") % % %R VV  v v v v vry- v v vr	?, ?, ?, ?, ?, ?, ?, ?, ?, ?,rH   