
    .`i?                        d dl mZ d dlmZ d dlZd dlmZ d dlmZ d dlm	Z	 d dl
mZ d dlmZmZ d d	lmZmZ d d
lmZ d dlmZ d dlmZ d dlmZmZmZ d dlmZ d dlmZ d dl m!Z! d dl"m#Z# d dl$m%Z%m&Z& d dl'm(Z( ddl)m*Z*m+Z+ ddl,m-Z-m.Z.m/Z/m0Z0m1Z1m2Z2  ee3          Z4 G d dej5                  Z6 G d dej5                  Z7 G d dej5                  Z8e G d dej5                              Z9 G d d ej5        e*e+          Z:dS )!    )Iterable)isliceN)nn)Gemma2Config)	Attention)support_torch_compile)CacheConfig
VllmConfig)get_pp_group$get_tensor_model_parallel_world_size)init_logger)
GeluAndMul)GemmaRMSNorm)MergedColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)get_rope)VocabParallelEmbedding)default_weight_loadermaybe_remap_kv_scale_name)IntermediateTensors   )SupportsLoRA
SupportsPP)AutoWeightsLoaderextract_layer_indexis_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefixc                   l     e Zd Z	 	 ddedededededz  ded	df fd
Zdej        d	ej        fdZ	 xZ
S )	Gemma2MLPN hidden_sizeintermediate_size
hidden_acthidden_activationquant_configprefixreturnc                 &   t                                                       t          ||gdz  d|| d          | _        t	          ||d|| d          | _        ||cxk    rdk    sn t          d          t          d	          | _        d S )
N   Fz.gate_up_projbiasr*   r+   z
.down_projgelu_pytorch_tanhzGemma2 uses `gelu_pytorch_tanh` as the hidden activation function. Please set `hidden_act` and `hidden_activation` to `gelu_pytorch_tanh`.tanh)approximate)	super__init__r   gate_up_projr   	down_proj
ValueErrorr   act_fn)selfr&   r'   r(   r)   r*   r+   	__class__s          u/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/gemma2.pyr5   zGemma2MLP.__init__>   s     	6!#%+++
 
 
 +%(((
 
 
 /FFFF3FFFFF'  
 !V444    xc                     |                      |          \  }}|                     |          }|                     |          \  }}|S N)r6   r9   r7   )r:   r>   gate_up_s       r<   forwardzGemma2MLP.forward^   sD    &&q))
KK  ~~a  1r=   )Nr%   )__name__
__module____qualname__intstrr   r5   torchTensorrC   __classcell__r;   s   @r<   r$   r$   =   s         375 55 5 	5
 5 )4/5 5 
5 5 5 5 5 5@ %,        r=   r$   c                        e Zd Z	 	 	 	 ddedededededed	edz  d
edz  dedz  deddf fdZ	de
j        de
j        de
j        fdZ xZS )Gemma2AttentionNr%   configr&   	num_headsnum_kv_headshead_dimmax_position_embeddingscache_configr*   attn_logits_soft_capr+   r,   c                    t                                                       || _        || _        t	                      }|| _        | j        |z  dk    sJ | j        |z  | _        || _        | j        |k    r| j        |z  dk    sJ n|| j        z  dk    sJ t          d| j        |z            | _	        || _
        | j        | j
        z  | _        | j	        | j
        z  | _        |j        dz  | _        t          || j
        | j        | j        |j        ||
 d          | _        t%          | j        | j
        z  ||j        ||
 d          | _        t)          | j
        ||j        d          | _        t/          |
          }|j        |         d	k    }|r|j        nd }t5          | j        | j
        | j        | j	        |||	||
 d
	  	        | _        d S )Nr   r   g      z	.qkv_projr/   z.o_projT)max_positionrope_parametersis_neox_stylesliding_attentionz.attn)rQ   rT   r*   logits_soft_capper_layer_sliding_windowr+   )r4   r5   rO   r&   r   total_num_headsrP   total_num_kv_headsmaxrQ   rR   q_sizekv_sizequery_pre_attn_scalarscalingr   attention_biasqkv_projr   o_projr   rX   
rotary_embr   layer_typessliding_windowr   attn)r:   rO   r&   rP   rQ   rR   rS   rT   r*   rU   r+   tp_size	layer_idx
is_slidingri   r;   s                  r<   r5   zGemma2Attention.__init__f   s%    	&688(#g-2222-8"."g-- *W499999 T4499994#:g#EFF nt}4(4=83T9)M #&%'''
 
 
 ( 4=0&%%%%
 
 
 #M0"2	
 
 
 (//	'	26II
2<F..$NML*%%0%3###

 

 

			r=   	positionshidden_statesc                 "   |                      |          \  }}|                    | j        | j        | j        gd          \  }}}|                     |||          \  }}|                     |||          }|                     |          \  }	}|	S )N)dim)re   splitr`   ra   rg   rj   rf   )
r:   rn   ro   qkvrB   qkvattn_outputoutputs
             r<   rC   zGemma2Attention.forward   s    
 }--Q))T[$,E2)NN1ay!Q//1ii1a((KK,,	r=   )NNNr%   )rD   rE   rF   r   rG   r	   r   floatrH   r5   rI   rJ   rC   rK   rL   s   @r<   rN   rN   e   s        ,026-1H
 H
H
 H
 	H

 H
 H
 "%H
 "D(H
 )4/H
 $dlH
 H
 
H
 H
 H
 H
 H
 H
T
<
 |
 
	
 
 
 
 
 
 
 
r=   rN   c                        e Zd Z	 	 	 ddededz  dedz  deddf
 fdZd	ej	        d
ej	        dej	        dz  de
ej	        ej	        f         fdZ xZS )Gemma2DecoderLayerNr%   rO   rT   r*   r+   r,   c                 X   t                                                       |j        | _        t          || j        |j        |j        |j        |j        |||j        | d
  
        | _	        |j        | _        t          | j        |j        |j        |j        || d          | _        t          |j        |j                  | _        t          |j        |j                  | _        t          |j        |j                  | _        t          |j        |j                  | _        d S )Nz
.self_attn)
rO   r&   rP   rQ   rR   rS   rT   r*   rU   r+   z.mlp)r&   r'   r(   r)   r*   r+   eps)r4   r5   r&   rN   num_attention_headsnum_key_value_headsrR   rS   attn_logit_softcapping	self_attnr$   r'   r(   r)   mlpr   rms_norm_epsinput_layernormpost_attention_layernormpre_feedforward_layernormpost_feedforward_layernorm)r:   rO   rT   r*   r+   r;   s        r<   r5   zGemma2DecoderLayer.__init__   sH    	!-((03_$*$B%%!'!>(((
 
 
 "-($6($6%???
 
 
  ,F,>FDWXXX(4F$7)
 )
 )
% *6F$7*
 *
 *
& +7F$7+
 +
 +
'''r=   rn   ro   residualc                 N   ||}|                      |          }n|                      ||          \  }}|                     ||          }|                     |          }|                     ||          \  }}|                     |          }|                     |          }||fS )N)rn   ro   )r   r   r   r   r   r   )r:   rn   ro   r   s       r<   rC   zGemma2DecoderLayer.forward   s     $H 00??MM&*&:&:=(&S&S#M8' ' 
 
 55mDD"&"@"@8#
 #
x //77FFh&&r=   )NNr%   )rD   rE   rF   r   r	   r   rH   r5   rI   rJ   tuplerC   rK   rL   s   @r<   r|   r|      s         ,026'
 '
'
 "D('
 )4/	'

 '
 
'
 '
 '
 '
 '
 '
R'<' |' ,%	'
 
u|U\)	*' ' ' ' ' ' ' 'r=   r|   c                        e Zd Zdddedef fdZdej        dej        fdZ	 ddej        d	z  d
ej        de	d	z  dej        d	z  dej        e	z  f
dZ
deeeej        f                  dee         fdZ xZS )Gemma2Modelr%   r+   vllm_configr+   c                *   t                                                       |j        j        |j        |j        | _        | _        t          j        j	                  | _
        t          j        fd| d          \  | _        | _        | _        t!          j	        j                  | _        | j        j	        dz  }|                     dt)          j        |          d           t-          d	d
gj	                  | _        d S )Nc                 *    t          |           S )Nr   )r|   )r+   rT   rO   r*   s    r<   <lambda>z&Gemma2Model.__init__.<locals>.<lambda>  s     -l6   r=   z.layersr   r~   g      ?
normalizerF)
persistentro   r   )r4   r5   model_config	hf_configrT   r*   rO   r   
vocab_sizer&   embed_tokensr!   num_hidden_layersstart_layer	end_layerlayersr   r   normregister_bufferrI   tensorr    make_empty_intermediate_tensors)r:   r   r+   r   rT   rO   r*   r;   s       @@@r<   r5   zGemma2Model.__init__  s3   )3"/"/(2
 
 9D$      %%%9
 9
 9
5$.$+ !!39LMMM	 [,c1
\5<
+C+CPUVVV/Vj)6+=0
 0
,,,r=   	input_idsr,   c                 ,    |                      |          S r@   )r   r:   r   s     r<   embed_input_idszGemma2Model.embed_input_ids!  s      +++r=   Nrn   intermediate_tensorsinputs_embedsc                    t                      j        r'||}n|                     |          }|| j        z  }d }n|J |d         }|d         }t	          | j        | j        | j                  D ]} ||||          \  }}t                      j        st          ||d          S | 
                    ||          \  }}|S )Nro   r   )ro   r   )r   is_first_rankr   r   r   r   r   r   is_last_rankr   r   )	r:   r   rn   r   r   ro   r   layerrB   s	            r<   rC   zGemma2Model.forward$  s     >>' 
	8( - $ 4 4Y ? ?T_,MHH'3330AM+J7HDK)94>JJ 	 	E&+e' '#M88
 ~~* 	&"/XFF    99]H==qr=   weightsc                 ,   g d}t          |                                           }t                      }|D ]\\  }}| j        d| j                            |          x}rH||         }t          |dt                    }	|d         } |	||           |                    |           q|D ]i\  }
}}||vr|                    ||
          }|	                    d          r||vr;t          ||           rL||         }|j        }	 |	|||            nj|	                    d          r||vrt          ||          }|t          ||           r||         }t          |dt                    }	 |	||           |                    |           ^|S )N))re   q_projru   )re   k_projrv   )re   v_projrw   )r6   	gate_projr   )r6   up_projr   weight_loaderr   z.bias)dictnamed_parameterssetr*   get_cache_scalegetattrr   addreplaceendswithr   r   r   )r:   r   stacked_params_mappingparams_dictloaded_paramsnameloaded_weight
scale_nameparamr   
param_name
shard_nameshard_ids                r<   load_weightszGemma2Model.load_weightsC  s   "
 "
 "
 4002233"%%%#* %	$ %	$D- ,"/??EEE
 - $J/ '@U V V -a 0e]333!!*---4J 4 40
JT))||J
;;==)) d+.E.E*466 #D) % 3e]H=== ==)) d+.E.E0{CC<*466 #D) '@U V Ve]333d####r=   r@   )rD   rE   rF   r
   rH   r5   rI   rJ   r   r   rC   r   r   r   r   rK   rL   s   @r<   r   r      s        AC 
 
 
z 
3 
 
 
 
 
 
>, ,%, , , , , .2 <$& < 2D8	
 |d* 
+	+   >2HU33D-E$F 23s8 2 2 2 2 2 2 2 2r=   r   c                   ,    e Zd Zg dddgdZdddedef fd	Zd
ej        dej        fdZ		 	 dd
ej        dej        de
dz  dej        dz  dej        e
z  f
dZdej        dej        dz  fdZdeeeej        f                  dee         fdZ xZS )Gemma2ForCausalLM)r   r   r   r   r   )re   r6   r%   r   r   r+   c                F   |j         j        }|j        }t                                                       || _        |j        sJ || _        t          |t          |d                    | _	        t          |j        |j                  | _        | j	        j        | _        d S )Nmodel)r   r+   )soft_cap)r   r   r*   r4   r5   rO   tie_word_embeddingsr   r"   r   r   r   final_logit_softcappinglogits_processorr   )r:   r   r+   rO   r*   r;   s        r<   r5   zGemma2ForCausalLM.__init__  s    )3"/))))( #L,I,I
 
 

 !0(F!
 !
 !
 J6 	,,,r=   r   r,   c                 6    | j                             |          S r@   )r   r   r   s     r<   r   z!Gemma2ForCausalLM.embed_input_ids  s    z)))444r=   Nrn   r   r   c                 6    |                      ||||          }|S r@   )r   )r:   r   rn   r   r   ro   s         r<   rC   zGemma2ForCausalLM.forward  s)     

y"6
 
 r=   ro   c                 F    |                      | j        j        |          }|S r@   )r   r   r   )r:   ro   logitss      r<   compute_logitsz Gemma2ForCausalLM.compute_logits  s#     &&tz'>NNr=   r   c                 l    t          | | j        j        rdgnd           }|                    |          S )Nzlm_head.)skip_prefixes)r   rO   r   r   )r:   r   loaders      r<   r   zGemma2ForCausalLM.load_weights  sC    "+/;+JTJ<<PT
 
 
 ""7+++r=   )NN)rD   rE   rF   packed_modules_mappingr
   rH   r5   rI   rJ   r   r   rC   r   r   r   r   r   rK   rL   s   @r<   r   r   x  s{       
 
 
 

 
 BD 
 
 
z 
3 
 
 
 
 
 
&5 5%, 5 5 5 5 <@-1
 
<
 <
 2D8	

 |d*
 
+	+
 
 
 
| 
	   ,HU33D-E$F ,3s8 , , , , , , , ,r=   r   );collections.abcr   	itertoolsr   rI   r   transformersr   vllm.attention.layerr   vllm.compilation.decoratorsr   vllm.configr	   r
   vllm.distributedr   r   vllm.loggerr   %vllm.model_executor.layers.activationr   $vllm.model_executor.layers.layernormr   !vllm.model_executor.layers.linearr   r   r   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr   +vllm.model_executor.layers.rotary_embeddingr   3vllm.model_executor.layers.vocab_parallel_embeddingr   -vllm.model_executor.model_loader.weight_utilsr   r   vllm.sequencer   
interfacesr   r   utilsr   r   r   r    r!   r"   rD   loggerModuler$   rN   r|   r   r    r=   r<   <module>r      s'  & % $ $ $ $ $              % % % % % % * * * * * * = = = = = = / / / / / / / / O O O O O O O O # # # # # # < < < < < < = = = = = =         
 H G G G G G F F F F F F @ @ @ @ @ @ V V V V V V        . - - - - - 0 0 0 0 0 0 0 0                
X		% % % % %	 % % %PU U U U Ubi U U Up@' @' @' @' @' @' @' @'F t t t t t") t t tn;, ;, ;, ;, ;,	< ;, ;, ;, ;, ;,r=   