
    .`i W              
          d Z ddlmZ ddlmZ ddlmZ ddlZddlmZ ddl	m
Z
 ddlmZ dd	lmZ dd
lmZmZ ddlmZmZ ddlmZ ddlmZ ddlmZ ddlmZmZmZ ddlm Z  ddl!m"Z" ddl#m$Z$ ddl%m&Z&m'Z' ddl(m)Z)m*Z* ddl+m,Z, ddl-m.Z.m/Z/ ddl0m1Z1 ddl2m3Z3m4Z4m5Z5 ddl6m7Z7m8Z8m9Z9m:Z:m;Z;m<Z<m=Z=  G d dej>                  Z? G d dej>                  Z@ G d d ej>                  ZA	 	 d-d!ejB        d"ejB        d#e,dz  d$ejB        dz  fd%ZC edd&ddd'eC(           G d) d*ej>                              ZD G d+ d,ej>        e4e5e3          ZEdS ).z?Inference-only Qwen2 model compatible with HuggingFace weights.    )Iterable)islice)AnyN)nn)Qwen2Config)	Attention)support_torch_compile)CacheConfig
VllmConfig)get_pp_group$get_tensor_model_parallel_world_size)
SiluAndMul)EncoderOnlyAttention)RMSNorm)MergedColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)get_rope)ParallelLMHeadVocabParallelEmbedding)default_weight_loadermaybe_remap_kv_scale_name)IntermediateTensors)is_interleavedset_default_rope_theta)AttentionType   )SupportsEagle3SupportsLoRA
SupportsPP)AutoWeightsLoaderPPMissingLayerextract_layer_indexis_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefixc                   J     e Zd Z	 	 ddededededz  deddf fd	Zd
 Z xZS )Qwen2MLPN hidden_sizeintermediate_size
hidden_actquant_configprefixreturnc                    t                                                       t          ||gdz  d|| d          | _        t	          ||d|| d          | _        |dk    rt          d| d          t                      | _        d S )	N   Fz.gate_up_projbiasr0   r1   z
.down_projsiluzUnsupported activation: z!. Only silu is supported for now.)	super__init__r   gate_up_projr   	down_proj
ValueErrorr   act_fn)selfr-   r.   r/   r0   r1   	__class__s         t/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/qwen2.pyr9   zQwen2MLP.__init__N   s     	6!#%+++
 
 
 +%(((
 
 
 X:XXX   !ll    c                     |                      |          \  }}|                     |          }|                     |          \  }}|S N)r:   r=   r;   )r>   xgate_up_s       r@   forwardzQwen2MLP.forwardk   sD    &&q))
KK  ~~a  1rA   )Nr,   )	__name__
__module____qualname__intstrr   r9   rG   __classcell__r?   s   @r@   r+   r+   M   s         37# ## # 	#
 )4/# # 
# # # # # #:      rA   r+   c                        e Zd Zddddej        dddfdededed	eeef         d
ede	dz  de
dz  dededeeef         dz  dededdf fdZdej        dej        dej        fdZ xZS )Qwen2Attentioni   Nr,   Fgư>r-   	num_headsnum_kv_headsrope_parametersmax_positioncache_configr0   r1   	attn_typedual_chunk_attention_configqk_normrms_norm_epsr2   c           
      $   t                                                       || _        t                      }|| _        | j        |z  dk    sJ | j        |z  | _        || _        | j        |k    r| j        |z  dk    sJ n|| j        z  dk    sJ t          d| j        |z            | _        || j        z  | _	        | j        | j	        z  | _
        | j        | j	        z  | _        | j	        dz  | _        |
| _        || _        t          || j	        | j        | j        d|| d          | _        t#          | j        | j	        z  |d|| d          | _        | j        r6t'          | j	        |	          | _        t'          | j	        |	          | _        t-          | j	        |||

          | _        |	t0          j        k    rt4          nt6          } || j        | j	        | j        f| j        |||	| dd|
rt9          |          |
dni | _        d S )Nr   r   g      Tz	.qkv_projr5   Fz.o_projeps)rT   rS   rW   z.attn)rR   rU   r0   rV   r1   )	layer_idxrW   )r8   r9   r-   r   total_num_headsrQ   total_num_kv_headsmaxrR   head_dimq_sizekv_sizescalingrW   rX   r   qkv_projr   o_projr   q_normk_normr   
rotary_embr   ENCODER_ONLYr   r   r%   attn)r>   r-   rQ   rR   rS   rT   rU   r0   r1   rV   rW   rX   rY   tp_sizeattn_clsr?   s                  r@   r9   zQwen2Attention.__init__s   s    	&688(#g-2222-8"."g-- *W499999 T4499994#:g#EFF#t';;nt}4(4=8}d*+F()M #%'''
 
 
 ( 4=0%%%%
 
 
 < 	C!$-\BBBDK!$-\BBBDK"M%+(C	
 
 
 M666 !  	
 HNML
 *%%###
 
 +	088/J  
 
 
			rA   	positionshidden_statesc                    |                      |          \  }}|                    | j        | j        | j        gd          \  }}}| j        r|j        d         }|                    || j        | j                  }|                    || j	        | j                  }| 
                    |          }|                     |          }|                    || j                  }|                    || j                  }|                     |||          \  }}|                     |||          }	|                     |	          \  }
}|
S )N)dimr   )re   splitrb   rc   rX   shapeviewrQ   ra   rR   rg   rh   ri   rk   rf   )r>   rn   ro   qkvrF   qkvtotal_tokensattn_outputoutputs              r@   rG   zQwen2Attention.forward   s!   
 }--Q))T[$,E2)NN1a < 	3 71:L|T^T]CCA|T%6FFA AAAA |T[11A|T\22Ay!Q//1ii1a((KK,,	rA   )rH   rI   rJ   r   DECODERrK   dictrL   r   r
   r   boolfloatr9   torchTensorrG   rM   rN   s   @r@   rP   rP   r   sI        &+/26&.=A"V
 V
V
 V
 	V

 c3hV
 V
 "D(V
 )4/V
 V
 V
 &*#s(^d%:V
 V
 V
 
V
 V
 V
 V
 V
 V
p< | 
	       rA   rP   c                        e Zd Z	 	 	 ddededz  dedz  deddf
 fdZd	ej	        d
ej	        dej	        dz  de
ej	        ej	        f         fdZ xZS )Qwen2DecoderLayerNr,   configrU   r0   r1   r2   c                 r   t                                                       |j        | _        t          |d           t	          |dd           }t	          |dd          rt
          j        }nt
          j        }t	          |dd          }t          | j        |j	        |j
        |j        |||j        | d||||j        	          | _        t          | j        |j        |j        || d
          | _        t'          |j        |j                  | _        t'          |j        |j                  | _        d S )Ni@B )default_thetarW   	is_causalTrX   Fz
.self_attn)r-   rQ   rT   rR   rU   r0   rS   r1   rV   rW   rX   rY   z.mlp)r-   r.   r/   r0   r1   r[   )r8   r9   r-   r   getattrr   r}   rj   rP   num_attention_headsmax_position_embeddingsnum_key_value_headsrS   rY   	self_attnr+   r.   r/   mlpr   input_layernormpost_attention_layernorm)	r>   r   rU   r0   r1   rW   rV   rX   r?   s	           r@   r9   zQwen2DecoderLayer.__init__   s\    	!-vW====&-14'
 '
# 6;-- 	3%-II%2I &)U33'(073%%"2((((C,
 
 
 ($6(%???
 
 
  'v'9v?RSSS(/F$7)
 )
 )
%%%rA   rn   ro   residualc                     ||}|                      |          }n|                      ||          \  }}|                     ||          }|                     ||          \  }}|                     |          }||fS )N)rn   ro   )r   r   r   r   )r>   rn   ro   r   s       r@   rG   zQwen2DecoderLayer.forward  s     $H 00??MM&*&:&:=(&S&S#M8' ' 
 
 #'"?"?x"X"Xx//h&&rA   )NNr,   )rH   rI   rJ   r   r
   r   rL   r9   r   r   tuplerG   rM   rN   s   @r@   r   r      s         ,0262
 2
2
 "D(2
 )4/	2

 2
 
2
 2
 2
 2
 2
 2
h'<' |' ,%	'
 
u|U\)	*' ' ' ' ' ' ' 'rA   r   	input_idsrn   intermediate_tensorsinputs_embedsc                 v   t          j        |                                 d         |                                d         k               |Nt          j        |                                 d         |d                                         d         k               |Ht          j        |                                 d         |                                d         k               |R|Rt          j        |                                d         |d                                         d         k               dS dS dS )zShape invariants for Qwen2Model Model, those are translated to
    runtime assertions for unbacked dynamic shapes and are compiled away for
    backedr   rq   Nro   r   )r   _checksizer   rn   r   r   s       r@   qwen_2_model_invariantsr   5  s(    
L!!!$	(8(8(<<==='NNQ#7#H#M#M#O#OPQ#RR	
 	
 	
  Y^^%%a(M,>,>,@,@,CCDDD
  %9%E  #';O'L'Q'Q'S'STU'VV	
 	
 	
 	
 	
 ! %E%ErA   rq   r   )dynamic_arg_dimsshape_invariantsc                       e Zd Zdeddededeej                 f fdZ	de
j        de
j        fd	Z	 	 dde
j        de
j        ded
z  de
j        d
z  de
j        ez  f
dZdeeee
j        f                  dee         fdZ xZS )
Qwen2Modelr,   )r1   decoder_layer_typevllm_configr1   r   c                   t                                                       |j        j                                        |j        |j        t          |j        j                  r8j	        j
        k    s(J d                    j	        j
                              | _        | _        j        | _        t                      j        sj        r9t                      j        r&t%          j        j        | d          | _        nt+                      | _        t-          j
        fd| d          \  | _        | _        | _        t5          ddgj                  | _        t                      j        r!t9          j        j        	          | _        nt+                      | _        t?          t@          d
f                     | _!        d S )NzSliding window for some but all layers is not supported. This model uses sliding window but `max_window_layers` = {} is less than `num_hidden_layers` = {}. Please open an issue to discuss this feature.z.embed_tokensr0   r1   c                 "     |           S )N)r   rU   r0   r1    )r1   rU   r   r   r0   s    r@   <lambda>z%Qwen2Model.__init__.<locals>.<lambda>  s&    --))	   rA   z.layersr1   ro   r   r[   .)"r8   r9   model_config	hf_configget_text_configrU   r0   r   hf_text_configmax_window_layersnum_hidden_layersformatr   
vocab_sizer   is_first_ranktie_word_embeddingsis_last_rankr   r-   embed_tokensr$   r(   start_layer	end_layerlayersr'   make_empty_intermediate_tensorsr   rY   normr   rK   aux_hidden_state_layers)r>   r   r1   r   rU   r   r0   r?   s      `@@@r@   r9   zQwen2Model.__init__a  s    	)3CCEE"/"/ +2ABB 		+v/GGGG+ ,26,,, ,	 HGG ( +>>' 
	1&
	1+7>>+F
	1 !7!") ///	! ! !D !/ 0 0D8C$       %%%	9
 	9
 	9
5$.$+ 0Wj)6+=0
 0
, >>& 	) 28KLLLDII&((DI',S#X'8'8$$$rA   r   r2   c                 ,    |                      |          S rC   )r   r>   r   s     r@   embed_input_idszQwen2Model.embed_input_ids  s      +++rA   Nrn   r   r   c                    t                      j        r||}n|                     |          }d }n|J |d         }|d         }g }t          t	          | j        | j        | j                            D ]6\  }}	|| j        v r|	                    ||z               |	|||          \  }}7t                      j
        st          ||d          S |                     ||          \  }}
t          |          dk    r||fS |S )Nro   r   )ro   r   r   )r   r   r   	enumerater   r   r   r   r   appendr   r   r   len)r>   r   rn   r   r   ro   r   aux_hidden_statesidxlayerrF   s              r@   rG   zQwen2Model.forward  sD    >>' 		8( - $ 4 4Y ? ?HH'3330AM+J7H#4; 0$.AA
 
 	P 	PJC d222!(()ABBB&+eI}h&O&O#M88~~* 	&"/XFF    99]H==q !!A%% "333rA   weightsc                    g d}t          |                     d                    }t                      }|D ]\  }}d|v r| j        ~| j                            |          x}rb||         }t          |dt                    }	|                                dk    r|n|d         } |	||           |                    |           |D ]\  }
}}||vr|	                    ||
          }|
                    d          r||vr;t          ||           rL|
                    d          rt          ||          }|t||         }t          |dt                    }	|	t          k    r |	||           n |	|||            nq|
                    d          r||vrft          ||          }|zt          ||           r||vr||         }t          |dt                    }	 |	||           |                    |           |S )	N))re   q_projrw   )re   k_projrx   )re   v_projry   )r:   	gate_projr   )r:   up_projr   F)remove_duplicatezrotary_emb.inv_freqweight_loaderr   z.biasscale)r~   named_parameterssetr0   get_cache_scaler   r   rr   addreplaceendswithr&   r   )r>   r   stacked_params_mappingparams_dictloaded_paramsnameloaded_weight
scale_nameparamr   
param_nameweight_nameshard_ids                r@   load_weightszQwen2Model.load_weights  s   "
 "
 "
 400%0HHII"%%%#* 3	$ 3	$D-$,, ,"/??EEE
 - $J/ '@U V V%2%6%6%8%8A%=%=MM=QRCS  e]333!!*---5K #4 #41
Kd**||K<<==)) d+.E.E*466 ==)) !4T;GGD| #D) '@U V V $999!M%7777!M%AAA ==)) d+.E.E0{CC<*466 {**#D) '@U V Ve]333d####rA   NN)rH   rI   rJ   r   r   rL   typer   Moduler9   r   r   r   r   rG   r   r   r   r   rM   rN   s   @r@   r   r   U  s<         .?<9 <9 <9  <9 	<9
 !O<9 <9 <9 <9 <9 <9|, ,%, , , , , <@-1$ $<$ <$ 2D8	$
 |d*$ 
+	+$ $ $ $L?HU33D-E$F ?3s8 ? ? ? ? ? ? ? ?rA   r   c                   h    e Zd Zg dddgdZdddedef fd	Zd
ej        dej        fdZ	de
edf         ddfdZde
edf         fdZ	 	 dd
ej        dej        dedz  dej        dz  dej        ez  f
dZdej        dej        dz  fdZdee
eej        f                  dee         fdZ xZS )Qwen2ForCausalLM)r   r   r   r   r   )re   r:   r,   r   r   r1   c          	      *   t                                                       |j        j                                        }|j        }|| _        || _        t          |t          |d                    | _	        t                      j        rJ|j        r| j	        j        | _        nDt          |j        |j        |t          |d                    | _        nt%                      | _        t'          |j                  | _        | j	        j        | _        d S )Nmodel)r   r1   lm_headr   )r8   r9   r   r   r   r0   r   r   r)   r   r   r   r   r   r   r   r   r-   r$   r   logits_processorr   )r>   r   r1   r   r0   r?   s        r@   r9   zQwen2ForCausalLM.__init__  s   )3CCEE"/(#L,I,I
 
 

 >>& 	,) #z6-%&!-'	::	      *++DL /0A B B J6 	,,,rA   r   r2   c                 6    | j                             |          S rC   )r   r   r   s     r@   r   z Qwen2ForCausalLM.embed_input_ids6  s    z)))444rA   r   .Nc                     || j         _        d S rC   )r   r   )r>   r   s     r@   set_aux_hidden_state_layersz,Qwen2ForCausalLM.set_aux_hidden_state_layers9  s    -3
***rA   c                 J    t          | j        j                  }d|dz  |dz
  fS )Nr4      )r   r   r   )r>   
num_layerss     r@   "get_eagle3_aux_hidden_state_layersz3Qwen2ForCausalLM.get_eagle3_aux_hidden_state_layers<  s)    *++
:?JN33rA   rn   r   r   c                 6    |                      ||||          }|S rC   )r   )r>   r   rn   r   r   ro   s         r@   rG   zQwen2ForCausalLM.forward@  s)     

y"6
 
 rA   ro   c                 <    |                      | j        |          }|S rC   )r   r   )r>   ro   logitss      r@   compute_logitszQwen2ForCausalLM.compute_logitsL  s      &&t|]CCrA   r   c                 l    t          | | j        j        rdgnd           }|                    |          S )Nzlm_head.)skip_prefixes)r#   r   r   r   )r>   r   loaders      r@   r   zQwen2ForCausalLM.load_weightsS  sC    "+/;+JTJ<<PT
 
 
 ""7+++rA   r   )rH   rI   rJ   packed_modules_mappingr   rL   r9   r   r   r   r   rK   r   r   r   rG   r   r   r   r   rM   rN   s   @r@   r   r   
  s       
 
 
 

 
 BD 
 
 
z 
3 
 
 
 
 
 
>5 5%, 5 5 5 54%S/ 4d 4 4 4 44E#s(O 4 4 4 4 <@-1
 
<
 <
 2D8	

 |d*
 
+	+
 
 
 
| 
	   ,HU33D-E$F ,3s8 , , , , , , , ,rA   r   r   )F__doc__collections.abcr   	itertoolsr   typingr   r   r   transformersr   vllm.attention.layerr   vllm.compilation.decoratorsr	   vllm.configr
   r   vllm.distributedr   r   %vllm.model_executor.layers.activationr   ;vllm.model_executor.layers.attention.encoder_only_attentionr   $vllm.model_executor.layers.layernormr   !vllm.model_executor.layers.linearr   r   r   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr   +vllm.model_executor.layers.rotary_embeddingr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr   r   vllm.sequencer   vllm.transformers_utils.configr   r   vllm.v1.attention.backendr   
interfacesr    r!   r"   utilsr#   r$   r%   r&   r'   r(   r)   r   r+   rP   r   r   r   r   r   r   rA   r@   <module>r     s   4 F E $ $ $ $ $ $                    $ $ $ $ $ $ * * * * * * = = = = = = / / / / / / / / O O O O O O O O < < < < < <      9 8 8 8 8 8         
 H G G G G G F F F F F F @ @ @ @ @ @               . - - - - - Q Q Q Q Q Q Q Q 3 3 3 3 3 3 @ @ @ @ @ @ @ @ @ @                 " " " " "ry " " "Jt t t t tRY t t tnI' I' I' I' I'	 I' I' I'^ 8<)-	
 
|
|
 .4
 <$&	
 
 
 
@   !  -
 
 
g g g g g g g
 
gTN, N, N, N, N,ry,
N N, N, N, N, N,rA   