
    .`i<                     `   d Z ddlmZ ddlmZ ddlmZ ddlmZ ddl	Z	ddl	m
Z
 ddlmZ dd	lmZ dd
lmZ ddlmZmZ ddlmZmZ ddlmZ ddlmZ ddlmZ ddlmZmZm Z  ddl!m"Z" ddl#m$Z$ ddl%m&Z& ddl'm(Z( ddl)m*Z* ddl+m,Z, ddl-m.Z.m/Z/ ddl0m1Z1m2Z2m3Z3m4Z4m5Z5  ee6          Z7ede8dz  de8dz  de
j9        fd            Z: G d de
j9                  Z; G d  d!e
j9                  Z< G d" d#e
j9                  Z=e G d$ d%e
j9                              Z> G d& d'e
j9        e.e/          Z?dS )(z?Inference-only Gemma model compatible with HuggingFace weights.    )Iterable)cache)islice)AnyN)nn)GemmaConfig)	Attention)support_torch_compile)CacheConfig
VllmConfig)get_pp_group$get_tensor_model_parallel_world_size)init_logger)
GeluAndMul)GemmaRMSNorm)MergedColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)get_rope)VocabParallelEmbedding)default_weight_loader)IntermediateTensors   )SupportsLoRA
SupportsPP)AutoWeightsLoaderis_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefix
hidden_acthidden_activationreturnc                     |.| t                               d| |            t          d          S |dk    rt          d          S |dk    rt          d          S t          d|  d          )	Na~  Gemma's activation function was incorrectly set to exact GeLU in the config JSON file when it was initially released. Changing the activation function to approximate GeLU (`gelu_pytorch_tanh`). If you want to use the legacy `%s`, edit the config JSON to set `hidden_activation=%s` instead of `hidden_act`. See https://github.com/huggingface/transformers/pull/29402 for more details.tanh)approximategelu_pytorch_tanhgelunonezActivation function z# is not supported for Gemma models.)loggerwarningr   
ValueError)r#   r$   s     t/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/gemma.py_get_gemma_act_fnr0   <   s    
  !NN$    f----	1	1	1f----	f	$	$f----R:RRR
 
 	
    c                   ^     e Zd Z	 	 	 	 ddedededz  dedz  dedz  ded	df fd
Zd Z xZS )GemmaMLPN hidden_sizeintermediate_sizer#   r$   quant_configprefixr%   c                     t                                                       t          ||gdz  d|| d          | _        t	          ||d|| d          | _        t          ||          | _        d S )N   Fz.gate_up_projbiasr7   r8   z
.down_proj)super__init__r   gate_up_projr   	down_projr0   act_fn)selfr5   r6   r#   r$   r7   r8   	__class__s          r/   r>   zGemmaMLP.__init__[   s     	6!#%+++
 
 
 +%(((
 
 
 (
4EFFr1   c                     |                      |          \  }}|                     |          }|                     |          \  }}|S N)r?   rA   r@   )rB   xgate_up_s       r/   forwardzGemmaMLP.forwardu   sD    &&q))
KK  ~~a  1r1   )NNNr4   )	__name__
__module____qualname__intstrr   r>   rI   __classcell__rC   s   @r/   r3   r3   Z   s        
 "&(,26G GG G $J	G
 :G )4/G G 
G G G G G G4      r1   r3   c                        e Zd Z	 	 	 	 ddededededeeef         d	ed
edz  dedz  deddf fdZ	de
j        de
j        de
j        fdZ xZS )GemmaAttention    Nr4   r5   	num_headsnum_kv_headshead_dimrope_parametersmax_position_embeddingscache_configr7   r8   r%   c
           
      $   t                                                       || _        t                      }
|| _        | j        |
z  dk    sJ | j        |
z  | _        || _        | j        |
k    r| j        |
z  dk    sJ n|
| j        z  dk    sJ t          d| j        |
z            | _        || _	        | j        | j	        z  | _
        | j        | j	        z  | _        | j	        dz  | _        t          || j	        | j        | j        d||	 d          | _        t          | j        | j	        z  |d||	 d          | _        t#          | j	        ||d	          | _        t'          | j        | j	        | j        | j        |||	 d
          | _        d S )Nr   r   g      Fz	.qkv_projr;   z.o_projT)max_positionrW   is_neox_stylez.attn)rU   rY   r7   r8   )r=   r>   r5   r   total_num_headsrT   total_num_kv_headsmaxrU   rV   q_sizekv_sizescalingr   qkv_projr   o_projr   
rotary_embr	   attn)rB   r5   rT   rU   rV   rW   rX   rY   r7   r8   tp_sizerC   s              r/   r>   zGemmaAttention.__init__}   s    	&688(#g-2222-8"."g-- *W499999 T4499994#:g#EFF nt}4(4=8}d*)M #%'''
 
 
 ( 4=0%%%%
 
 
 #M0+	
 
 
 NML*%%###
 
 
			r1   	positionshidden_statesc                 "   |                      |          \  }}|                    | j        | j        | j        gd          \  }}}|                     |||          \  }}|                     |||          }|                     |          \  }	}|	S )N)dim)rc   splitr`   ra   re   rf   rd   )
rB   rh   ri   qkvrH   qkvattn_outputoutputs
             r/   rI   zGemmaAttention.forward   s    
 }--Q))T[$,E2)NN1ay!Q//1ii1a((KK,,	r1   )rS   NNr4   )rJ   rK   rL   rM   dictrN   r   r   r   r>   torchTensorrI   rO   rP   s   @r/   rR   rR   |   s        (,+/26@
 @
@
 @
 	@

 @
 c3h@
 "%@
 "D(@
 )4/@
 @
 
@
 @
 @
 @
 @
 @
D
<
 |
 
	
 
 
 
 
 
 
 
r1   rR   c                        e Zd Z	 	 	 ddededz  dedz  deddf
 fdZd	ej	        d
ej	        dej	        dz  de
ej	        ej	        f         fdZ xZS )GemmaDecoderLayerNr4   configrY   r7   r8   r%   c                    t                                                       |j        | _        t          | j        |j        |j        |j        |j        |j        ||| d	  	        | _	        t          | j        |j        |j        t          |dd           || d          | _        t          |j        |j                  | _        t          |j        |j                  | _        d S )Nz
.self_attn)	r5   rT   rU   rV   rX   rW   rY   r7   r8   r$   z.mlp)r5   r6   r#   r$   r7   r8   eps)r=   r>   r5   rR   num_attention_headsnum_key_value_headsrV   rX   rW   	self_attnr3   r6   r#   getattrmlpr   rms_norm_epsinput_layernormpost_attention_layernorm)rB   ry   rY   r7   r8   rC   s        r/   r>   zGemmaDecoderLayer.__init__   s     	!-'(03_$*$B"2%%(((

 

 

 ($6(%f.A4HH%???
 
 
  ,F,>FDWXXX(4F$7)
 )
 )
%%%r1   rh   ri   residualc                     ||}|                      |          }n|                      ||          \  }}|                     ||          }|                     ||          \  }}|                     |          }||fS )N)rh   ri   )r   r   r   r   )rB   rh   ri   r   s       r/   rI   zGemmaDecoderLayer.forward   s     $H 00??MM&*&:&:=(&S&S#M8' ' 
 
 #'"?"?x"X"Xx//h&&r1   )NNr4   )rJ   rK   rL   r   r   r   rN   r>   ru   rv   tuplerI   rO   rP   s   @r/   rx   rx      s         ,026
 

 "D(
 )4/	

 
 

 
 
 
 
 
B'<' |' ,%	'
 
u|U\)	*' ' ' ' ' ' ' 'r1   rx   c                        e Zd Zdddedef fdZdej        dej        fdZ	 ddej        d
ej        de	d	z  dej        d	z  dej        e	z  f
dZ
deeeej        f                  dee         fdZ xZS )
GemmaModelr4   r8   vllm_configr8   c                   t                                                       |j        j        |j        |j        | _        t          j        j	                  | _
        t          j        fd| d          \  | _        | _        | _        t!          j	        j                  | _        | j        j	        dz  }|                     dt)          j        |          d           t-          d	d
gj	                  | _        d S )Nc                 *    t          |           S )Nr   )rx   )r8   rY   ry   r7   s    r/   <lambda>z%GemmaModel.__init__.<locals>.<lambda>  s     ,l6   r1   z.layersr   r{   g      ?
normalizerF)
persistentri   r   )r=   r>   model_config	hf_configrY   r7   ry   r   
vocab_sizer5   embed_tokensr!   num_hidden_layersstart_layer	end_layerlayersr   r   normregister_bufferru   tensorr    make_empty_intermediate_tensors)rB   r   r8   r   rY   ry   r7   rC   s       @@@r/   r>   zGemmaModel.__init__  s+   )3"/"/2
 
 9D$      %%%9
 9
 9
5$.$+ !!39LMMM	 [,c1
\5<
+C+CPUVVV/Vj)6+=0
 0
,,,r1   	input_idsr%   c                 ,    |                      |          S rE   )r   rB   r   s     r/   embed_input_idszGemmaModel.embed_input_ids'  s      +++r1   Nrh   intermediate_tensorsinputs_embedsc                 |   t                      j        r'||}n|                     |          }|| j        z  }d }n|d         }|d         }t	          | j        | j        | j                  D ]} ||||          \  }}t                      j        st          ||d          S | 
                    ||          \  }}|S )Nri   r   )ri   r   )r   is_first_rankr   r   r   r   r   r   is_last_rankr   r   )	rB   r   rh   r   r   ri   r   layerrH   s	            r/   rI   zGemmaModel.forward*  s     >>' 		8( - $ 4 4Y ? ?T_,MHH0AM+J7HDK)94>JJ 	 	E&+e' '#M88
 ~~* 	&"/XFF    99]H==qr1   weightsc                 (   g d}t          |                                           }t                      }|D ]\  }}|D ]i\  }}}	||vr|                    ||          }|                    d          r||vr;t          ||           rL||         }
|
j        } ||
||	            nU|                    d          r||vrt          ||           r||         }
t          |
dt                    } ||
|           |	                    |           |S )N))rc   q_projro   )rc   k_projrp   )rc   v_projrq   )r?   	gate_projr   )r?   up_projr   z.biasweight_loader)
rt   named_parameterssetreplaceendswithr   r   r   r   add)rB   r   stacked_params_mappingparams_dictloaded_paramsnameloaded_weight
param_name
shard_nameshard_idparamr   s               r/   load_weightszGemmaModel.load_weightsH  sg   "
 "
 "
 4002233"%%%#* 	$ 	$D-4J 4 40
JT))||J
;;==)) d+.E.E*466 #D) % 3e]H=== ==)) d+.E.E*466 #D) '@U V Ve]333d####r1   rE   )rJ   rK   rL   r   rN   r>   ru   rv   r   r   rI   r   r   r   r   rO   rP   s   @r/   r   r     s       AC 
 
 
z 
3 
 
 
 
 
 
@, ,%, , , , , .2 < < 2D8	
 |d* 
+	+   <$HU33D-E$F $3s8 $ $ $ $ $ $ $ $r1   r   c                   ,    e Zd Zg dddgdZdddedef fd	Zd
ej        dej        fdZ		 	 dd
ej        dej        de
dz  dej        dz  dej        e
z  f
dZdej        dej        dz  fdZdeeeej        f                  dee         fdZ xZS )GemmaForCausalLM)r   r   r   r   r   )rc   r?   r4   r   r   r8   c                8   t                                                       |j        j        }|j        }|| _        |j        sJ || _        t          |t          |d                    | _	        t          |j                  | _        | j	        j        | _        d S )Nmodel)r   r8   )r=   r>   r   r   r7   ry   tie_word_embeddingsr   r"   r   r   r   logits_processorr   )rB   r   r8   ry   r7   rC   s        r/   r>   zGemmaForCausalLM.__init__|  s    )3"/))))(#L,I,I
 
 

 !00A B BJ6 	,,,r1   r   r%   c                 6    | j                             |          S rE   )r   r   r   s     r/   r   z GemmaForCausalLM.embed_input_ids  s    z)))444r1   Nrh   r   r   c                 6    |                      ||||          }|S rE   )r   )rB   r   rh   r   r   ri   s         r/   rI   zGemmaForCausalLM.forward  s)     

y"6
 
 r1   ri   c                 F    |                      | j        j        |          }|S rE   )r   r   r   )rB   ri   logitss      r/   compute_logitszGemmaForCausalLM.compute_logits  s#     &&tz'>NNr1   r   c                 l    t          | | j        j        rdgnd           }|                    |          S )Nzlm_head.)skip_prefixes)r   ry   r   r   )rB   r   loaders      r/   r   zGemmaForCausalLM.load_weights  sC    "+/;+JTJ<<PT
 
 
 ""7+++r1   )NN)rJ   rK   rL   packed_modules_mappingr   rN   r>   ru   rv   r   r   rI   r   r   r   r   r   rO   rP   s   @r/   r   r   o  s{       
 
 
 

 
 BD 
 
 
z 
3 
 
 
 
 
 
$5 5%, 5 5 5 5 <@-1
 
<
 <
 2D8	

 |d*
 
+	+
 
 
 
| 
	   ,HU33D-E$F ,3s8 , , , , , , , ,r1   r   )@__doc__collections.abcr   	functoolsr   	itertoolsr   typingr   ru   r   transformersr   vllm.attention.layerr	   vllm.compilation.decoratorsr
   vllm.configr   r   vllm.distributedr   r   vllm.loggerr   %vllm.model_executor.layers.activationr   $vllm.model_executor.layers.layernormr   !vllm.model_executor.layers.linearr   r   r   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr   +vllm.model_executor.layers.rotary_embeddingr   3vllm.model_executor.layers.vocab_parallel_embeddingr   -vllm.model_executor.model_loader.weight_utilsr   vllm.sequencer   
interfacesr   r   utilsr   r   r    r!   r"   rJ   r,   rN   Moduler0   r3   rR   rx   r   r    r1   r/   <module>r      sl  $ F E $ $ $ $ $ $                          $ $ $ $ $ $ * * * * * * = = = = = = / / / / / / / / O O O O O O O O # # # # # # < < < < < < = = = = = =         
 H G G G G G F F F F F F @ @ @ @ @ @ V V V V V V O O O O O O - - - - - - 0 0 0 0 0 0 0 0              
X		 
d

Tz
 Y
 
 
 
:    ry   DM M M M MRY M M M`6' 6' 6' 6' 6'	 6' 6' 6'r f f f f f f f fR:, :, :, :, :,ry,
 :, :, :, :, :,r1   