
    .`imL                     .   d dl mZ d dlmZ d dlZd dlmZ d dlmZ d dlm	Z	 d dl
mZ d dlmZmZ d d	lmZmZ d d
lmZ d dlmZ d dlmZ d dlmZ d dlmZmZmZ d dlmZ d dl m!Z! d dl"m#Z# d dl$m%Z%m&Z& d dl'm(Z(m)Z) d dl*m+Z+ d dl,m-Z- ddl.m/Z/m0Z0 ddl1m2Z2m3Z3m4Z4m5Z5m6Z6m7Z7  ee8          Z9 G d dej:                  Z; G d dej:                  Z< G d dej:                  Z=e G d d ej:                              Z> G d! d"ej:        e/e0          Z?dS )#    )Iterable)isliceN)nn)Gemma3TextConfig)	Attention)support_torch_compile)CacheConfig
VllmConfig)get_pp_group$get_tensor_model_parallel_world_size)init_logger)
GeluAndMul)EncoderOnlyAttention)GemmaRMSNorm)MergedColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)get_rope)ParallelLMHeadVocabParallelEmbedding)default_weight_loadermaybe_remap_kv_scale_name)IntermediateTensors)AttentionType   )SupportsLoRA
SupportsPP)AutoWeightsLoaderextract_layer_indexis_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefixc                   h     e Zd Z	 	 ddededededz  deddf fd	Zd
ej        dej        fdZ	 xZ
S )	Gemma3MLPN hidden_sizeintermediate_sizehidden_activationquant_configprefixreturnc                    t                                                       t          ||gdz  d|| d          | _        t	          ||d|| d          | _        |dk    rt          d          t          d	          | _        d S )
N   Fz.gate_up_projbiasr,   r-   z
.down_projgelu_pytorch_tanhzGemma3 uses `gelu_pytorch_tanh` as the hidden activation function. Please set `hidden_act` and `hidden_activation` to `gelu_pytorch_tanh`.tanh)approximate)	super__init__r   gate_up_projr   	down_proj
ValueErrorr   act_fn)selfr)   r*   r+   r,   r-   	__class__s         u/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/gemma3.pyr7   zGemma3MLP.__init__D   s     	6!#%+++
 
 
 +%(((
 
 
  333'  
 !V444    xc                     |                      |          \  }}|                     |          }|                     |          \  }}|S N)r8   r;   r9   )r<   r@   gate_up_s       r>   forwardzGemma3MLP.forwardc   sD    &&q))
KK  ~~a  1r?   )Nr(   )__name__
__module____qualname__intstrr   r7   torchTensorrE   __classcell__r=   s   @r>   r'   r'   C   s         375 55 5 	5
 )4/5 5 
5 5 5 5 5 5> %,        r?   r'   c                        e Zd Z	 	 	 	 ddedededededed	edz  d
edz  dedz  deddf fdZ	de
j        de
j        de
j        fdZ xZS )Gemma3AttentionNr(   configr)   	num_headsnum_kv_headshead_dimmax_position_embeddingscache_configr,   attn_logits_soft_capr-   r.   c                 8   t                                                       || _        || _        t	                      }|| _        | j        |z  dk    sJ | j        |z  | _        || _        | j        |k    r| j        |z  dk    sJ n|| j        z  dk    sJ t          d| j        |z            | _	        || _
        | j        | j
        z  | _        | j	        | j
        z  | _        |j        dz  | _        t          || j
        | j        | j        |j        ||
 d          | _        t%          | j        | j
        z  ||j        ||
 d          | _        t)          | j
        |j                  | _        t)          | j
        |j                  | _        t1          |
          }|j        |         }|dk    | _        | j        r|j        nd }||j        v r|j        |         }n$|j        }| j        rt;          d	|j        
          }t?          | j
        ||d          | _         tC          |dd          rtD          j#        }ntD          j$        }|tD          j$        k    rtJ          ntL          } || j        | j
        | j        | j	        ||||	||
 d
  
        | _'        d S )Nr   r   g      z	.qkv_projr1   z.o_projepssliding_attentiondefault)	rope_type
rope_thetaT)max_positionrope_parametersis_neox_style	is_causalz.attn)rS   rV   r,   	attn_typelogits_soft_capper_layer_sliding_windowr-   )(r6   r7   rQ   r)   r   total_num_headsrR   total_num_kv_headsmaxrS   rT   q_sizekv_sizequery_pre_attn_scalarscalingr   attention_biasqkv_projr   o_projr   rms_norm_epsq_normk_normr!   layer_types
is_slidingsliding_windowr`   dictrope_local_base_freqr   
rotary_embgetattrr   DECODERENCODER_ONLYr   r   attn)r<   rQ   r)   rR   rS   rT   rU   rV   r,   rW   r-   tp_size	layer_idx
layer_typeru   r`   rc   attn_clsr=   s                     r>   r7   zGemma3Attention.__init__k   s    	&688(#g-2222-8"."g-- *W499999 T4499994#:g#EFF nt}4(4=83T9)M #&%'''
 
 
 ( 4=0&%%%%
 
 
 #4=f6IJJJ"4=f6IJJJ'//	'	2
$(;;26/K..t ///$4Z@OO %4O "&'F4O# # # #M0+	
 
 
 6;-- 	3%-II%2I M666 !  	 HNML*%%0%3###
 
 
			r?   	positionshidden_statesc                 V   |                      |          \  }}|                    | j        | j        | j        gd          \  }}}|                    d| j        | j        f          }|                     |          }|                    dd          }|                    d| j	        | j        f          }| 
                    |          }|                    dd          }|                     |||          \  }}|                     |||          }	|                     |	          \  }
}|
S )N)dim)rn   splitri   rj   	unflattenrR   rT   rq   flattenrS   rr   rx   r|   ro   )r<   r   r   kwargsqkvrD   qkvattn_outputoutputs              r>   rE   zGemma3Attention.forward   s    }--Q))T[$,E2)NN1aKKT^T];<<KKNNIIb"KKT.>??KKNNIIb"y!Q//1ii1a((KK,,	r?   )NNNr(   )rF   rG   rH   r   rI   r	   r   floatrJ   r7   rK   rL   rE   rM   rN   s   @r>   rP   rP   j   s        ,026-1g
 g
 g
 g
 	g

 g
 g
 "%g
 "D(g
 )4/g
 $dlg
 g
 
g
 g
 g
 g
 g
 g
R< |
 
       r?   rP   c                        e Zd Z	 	 	 ddededz  dedz  deddf
 fdZd	ej	        d
ej	        dej	        dz  de
ej	        ej	        f         fdZ xZS )Gemma3DecoderLayerNr(   rQ   rV   r,   r-   r.   c                 B   t                                                       |j        | _        t          || j        |j        |j        |j        |j        ||d | d
  
        | _        |j        | _        t          | j        |j
        |j        || d          | _        t          |j        |j                  | _        t          |j        |j                  | _        t          |j        |j                  | _        t          |j        |j                  | _        d S )Nz
.self_attn)
rQ   r)   rR   rS   rT   rU   rV   r,   rW   r-   z.mlp)r)   r*   r+   r,   r-   rY   )r6   r7   r)   rP   num_attention_headsnum_key_value_headsrT   rU   	self_attnr'   r*   r+   mlpr   rp   input_layernormpost_attention_layernormpre_feedforward_layernormpost_feedforward_layernorm)r<   rQ   rV   r,   r-   r=   s        r>   r7   zGemma3DecoderLayer.__init__   s?    	!-((03_$*$B%%!%(((
 
 
 "-($6$6%???
 
 
  ,F,>FDWXXX(4F$7)
 )
 )
% *6F$7*
 *
 *
& +7F$7+
 +
 +
'''r?   r   r   residualc                 @   ||}|                      |          }n|                      ||          \  }} | j        d||d|}|                     |          }|                     ||          \  }}|                     |          }|                     |          }||fS )N)r   r    )r   r   r   r   r   r   )r<   r   r   r   r   s        r>   rE   zGemma3DecoderLayer.forward  s     $H 00??MM&*&:&:=(&S&S#M8& 
'
 
 
 

 55mDD"&"@"@8#
 #
x //77FFh&&r?   )NNr(   )rF   rG   rH   r   r	   r   rJ   r7   rK   rL   tuplerE   rM   rN   s   @r>   r   r      s         ,026&
 &
 &
 "D(&
 )4/	&

 &
 
&
 &
 &
 &
 &
 &
P'<' |' ,%	' 
u|U\)	*' ' ' ' ' ' ' 'r?   r   c                        e Zd Zdddedef fdZdej        dej        fdZ	 ddej        d	z  d
ej        de	d	z  dej        d	z  dej        e	z  f
dZ
deeeej        f                  dee         fdZ xZS )Gemma3Modelr(   r-   vllm_configr-   c                6   t                                                       |j        j        |j        |j        | _        | _        t          j        j	        | d          | _
        t          j        fd| d          \  | _        | _        | _        t!          j	        j                  | _        | j        j	        dz  }|                     dt)          j        |          d	
           t-          ddgj	                  | _        d S )Nz.embed_tokensr,   r-   c                 *    t          |           S )Nr   )r   )r-   rV   rQ   r,   s    r>   <lambda>z&Gemma3Model.__init__.<locals>.<lambda>@  s     -l6   r?   z.layersr   rY   g      ?
normalizerF)
persistentr   r   )r6   r7   model_config	hf_configrV   r,   rQ   r   
vocab_sizer)   embed_tokensr$   num_hidden_layersstart_layer	end_layerlayersr   rp   normregister_bufferrK   tensorr#   make_empty_intermediate_tensors)r<   r   r-   r   rV   rQ   r,   r=   s       @@@r>   r7   zGemma3Model.__init__0  sG   )3"/"/(2%+++	
 
 
 9D$      %%%9
 9
 9
5$.$+ !!39LMMM	 [,c1
\5<
+C+CPUVVV/Vj)6+=0
 0
,,,r?   	input_idsr.   c                 <    |                      |          | j        z  S rB   )r   r   r<   r   s     r>   embed_input_idszGemma3Model.embed_input_idsQ  s       ++do==r?   Nr   intermediate_tensorsinputs_embedsc                 l   t                      j        r||}n|                     |          }d }n|J |d         }|d         }t          | j        | j        | j                  D ]} ||||fi |\  }}t                      j        st          ||d          S | 	                    ||          \  }}	|S )Nr   r   )r   r   )
r   is_first_rankr   r   r   r   r   is_last_rankr   r   )
r<   r   r   r   r   r   r   r   layerrD   s
             r>   rE   zGemma3Model.forwardV  s     >>' 		8( - $ 4 4Y ? ?HH'3330AM+J7HDK)94>JJ 	 	E&+e' ' 	' '#M88 ~~* 	&"/XFF    99]H==qr?   weightsc                    g d}t          |                                           }t                      }|D ]\  }}| j        r7| j                                        dk    r|                    d          r|dz  }| j        d| j                            |          x}rH||         }t          |dt                    }	|d         } |	||           |	                    |           |                    d          rWt          ||          }
|
E|
|v rA||
         }t          |dt                    }	 |	||           |	                    |
           |D ]i\  }}}||vr|                    ||          }|                    d          r||vr;t          ||           rL||         }|j        }	 |	|||            nk|                    d          r||vrt          ||          }|t          ||           r||         }t          |dt                    }	 |	||           |	                    |           	|S )	N))rn   q_projr   )rn   k_projr   )rn   v_projr   )r8   	gate_projr   )r8   up_projr   ggufznorm.weightr   weight_loaderr   )z.k_scalez.v_scalez.q_scalez.prob_scalez.bias)rv   named_parameterssetr,   get_nameendswithget_cache_scalery   r   addr   replacer"   r   )r<   r   stacked_params_mappingparams_dictloaded_paramsnameloaded_weight
scale_nameparamr   remapped_name
param_name
shard_nameshard_ids                 r>   load_weightszGemma3Model.load_weightsv  s   "
 "
 "
 4002233"%%%#* >	$ >	$D- !#%..00F::MM-00 ; " ,"/??EEE
 - $J/ '@U V V -a 0e]333!!*--- }}PQQ  9$ L L ,+1M1M'6E$+0E% %M "M%777!%%m444 5K 4 40
JT))||J
;;==)) d+.E.E*466 #D) % 3e]H=== ==)) d+.E.E0{CC<*466 #D) '@U V Ve]333d####r?   rB   )rF   rG   rH   r
   rJ   r7   rK   rL   r   r   rE   r   r   r   r   rM   rN   s   @r>   r   r   .  s,       AC 
 
 
z 
3 
 
 
 
 
 
B> >%, > > > > .2 <$& < 2D8	
 |d* 
+	+   @KHU33D-E$F K3s8 K K K K K K K Kr?   r   c                   ,    e Zd Zg dddgdZdddedef fd	Zd
ej        dej        fdZ		 	 dd
ej        dej        de
dz  dej        dz  dej        e
z  f
dZdej        dej        dz  fdZdeeeej        f                  dee         fdZ xZS )Gemma3ForCausalLM)r   r   r   r   r   )rn   r8   r(   r   r   r-   c          	         |j         j        }|j        }t                                                       || _        || _        t          |t          |d                    | _        t          |j
        |j        |t          |d                    | _        |j        r)| j                            | j        j                  | _        t!          |j
        |j                  | _        | j        j        | _        d S )Nmodel)r   r-   lm_headr   )soft_cap)r   r   r,   r6   r7   rQ   r   r%   r   r   r   r)   r   tie_word_embeddingstie_weightsr   r   final_logit_softcappinglogits_processorr   )r<   r   r-   rQ   r,   r=   s        r>   r7   zGemma3ForCausalLM.__init__  s    )3"/( #L,I,I
 
 

 &%	22	
 
 
 % 	M<33DJ4KLLDL /(F!
 !
 !
 J6 	,,,r?   r   r.   c                 6    | j                             |          S rB   )r   r   r   s     r>   r   z!Gemma3ForCausalLM.embed_input_ids  s    z)))444r?   Nr   r   r   c                 (     | j         ||||fi |}|S rB   )r   )r<   r   r   r   r   r   r   s          r>   rE   zGemma3ForCausalLM.forward  s6     #
y"6
 
IO
 
 r?   r   c                 <    |                      | j        |          }|S rB   )r   r   )r<   r   logitss      r>   compute_logitsz Gemma3ForCausalLM.compute_logits  s      &&t|]CCr?   r   c                 l    t          | | j        j        rdgnd           }|                    |          S )Nzlm_head.)skip_prefixes)r    rQ   r   r   )r<   r   loaders      r>   r   zGemma3ForCausalLM.load_weights  sC    "+/;+JTJ<<PT
 
 
 ""7+++r?   )NN)rF   rG   rH   packed_modules_mappingr
   rJ   r7   rK   rL   r   r   rE   r   r   r   r   r   rM   rN   s   @r>   r   r     s{       
 
 
 

 
 BD 
 
 
z 
3 
 
 
 
 
 
65 5%, 5 5 5 5 <@-1 < < 2D8	
 |d* 
+	+   | 
	   ,HU33D-E$F ,3s8 , , , , , , , ,r?   r   )@collections.abcr   	itertoolsr   rK   r   transformersr   vllm.attention.layerr   vllm.compilation.decoratorsr   vllm.configr	   r
   vllm.distributedr   r   vllm.loggerr   %vllm.model_executor.layers.activationr   ;vllm.model_executor.layers.attention.encoder_only_attentionr   $vllm.model_executor.layers.layernormr   !vllm.model_executor.layers.linearr   r   r   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr   +vllm.model_executor.layers.rotary_embeddingr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr   r   vllm.sequencer   vllm.v1.attention.backendr   
interfacesr   r   utilsr    r!   r"   r#   r$   r%   rF   loggerModuler'   rP   r   r   r   r   r?   r>   <module>r     sy  $ % $ $ $ $ $              ) ) ) ) ) ) * * * * * * = = = = = = / / / / / / / / O O O O O O O O # # # # # # < < < < < <      > = = = = =         
 H G G G G G F F F F F F @ @ @ @ @ @               . - - - - - 3 3 3 3 3 3 0 0 0 0 0 0 0 0                
X		$ $ $ $ $	 $ $ $N} } } } }bi } } }@A' A' A' A' A' A' A' A'H R R R R R") R R RjD, D, D, D, D,	< D, D, D, D, D,r?   