
    .`i=0                        d Z ddlmZ ddlmZ ddlZddlmZ ddlmZ ddl	m
Z
 ddlmZ dd	lmZmZ dd
lmZmZ ddlmZ ddlmZ ddlmZmZ ddlmZ ddlmZ ddlmZ ddl m!Z! ddl"m#Z# ddl$m%Z% ddl&m'Z' ddl(m)Z)m*Z*m+Z+ ddl,m-Z. ddl,m/Z/ ddl0m1Z1m2Z2m3Z3m4Z4  ee5          Z6 G d dej7                  Z8 G d dej7                  Z9de9iZ: eddddd !           G d" d#e/                      Z; G d$ d%ej7        e*e+e)          Z<dS )&z?Inference-only Qwen3 model compatible with HuggingFace weights.    )Iterable)AnyN)nn)Qwen3Config)	Attention)support_torch_compile)CacheConfig
VllmConfig)get_pp_group$get_tensor_model_parallel_world_size)init_logger)RMSNorm)QKVParallelLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)get_rope)ParallelLMHead)IntermediateTensors)set_default_rope_theta)AttentionType   )SupportsEagle3SupportsLoRA
SupportsPP)Qwen2MLP)
Qwen2Model)AutoWeightsLoaderPPMissingLayerextract_layer_indexmaybe_prefixc                        e Zd Zdddddddej        df	dededed	ed
ededz  dedede	dz  de
dz  dededeeef         dz  ddf fdZdej        dej        dej        fdZ xZS )Qwen3Attentioni   Ngư>F hidden_size	num_headsnum_kv_headsrope_parametersmax_positionhead_dimrms_norm_epsqkv_biascache_configquant_configprefix	attn_typedual_chunk_attention_configreturnc           
         t                                                       || _        t                      }|| _        | j        |z  dk    sJ | j        |z  | _        || _        | j        |k    r| j        |z  dk    sJ n|| j        z  dk    sJ t          d| j        |z            | _        |p	|| j        z  | _	        | j        | j	        z  | _
        | j        | j	        z  | _        | j	        dz  | _        || _        t          || j	        | j        | j        ||
| d          | _        t!          | j        | j	        z  |d|
| d          | _        t%          | j	        |||          | _        t)          | j        | j	        | j        f| j        |	|
| d	|d
|rt+          |          |dni | _        t/          | j	        |          | _        t/          | j	        |          | _        d S )Nr   r   g      z	.qkv_proj)biasr.   r/   Fz.o_proj)r)   r(   r1   z.attn)r'   r-   r.   r/   r0   )	layer_idxr1   eps)super__init__r%   r   total_num_headsr&   total_num_kv_headsmaxr'   r*   q_sizekv_sizescalingr1   r   qkv_projr   o_projr   
rotary_embr   r    attnr   q_normk_norm)selfr%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   tp_size	__class__s                  t/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/qwen3.pyr9   zQwen3Attention.__init__9   sZ     	&688(#g-2222-8"."g-- *W499999 T4499994#:g#EFF GK43G$Gnt}4(4=8}d*+F()M #%'''
 
 
 ( 4=0%%%%
 
 
 #M%+(C	
 
 
 NML
 *%%###
 
 +	088/J  
 
 
	  dm>>>dm>>>    	positionshidden_statesc                    |                      |          \  }}|                    | j        | j        | j        gd          \  }}} |j        g |j        d d         |j        d         | j        z  | j        R  }|                     |          }|                    |j                  } |j        g |j        d d         |j        d         | j        z  | j        R  }	|                     |	          }	|	                    |j                  }| 	                    |||          \  }}| 
                    |||          }
|                     |
          \  }}|S )N)dim)r@   splitr=   r>   viewshaper*   rD   rE   rB   rC   rA   )rF   rK   rL   qkv_qkv	q_by_head	k_by_headattn_outputoutputs               rI   forwardzQwen3Attention.forward   sE   
 }--Q))T[$,E2)NN1aAFVAGCRCLV!'"+*FVVVV	KK	**	NN17##AFVAGCRCLV!'"+*FVVVV	KK	**	NN17##y!Q//1ii1a((KK,,	rJ   )__name__
__module____qualname__r   DECODERintdictfloatboolr	   r   strr   r9   torchTensorr\   __classcell__rH   s   @rI   r#   r#   8   sS        &##+/26&.=AN? N?N? N? 	N?
 N? N? *N? N? N? "D(N? )4/N? N? N? &*#s(^d%:N? 
N? N? N? N? N? N?`< | 
	       rJ   r#   c                        e Zd Z	 	 	 ddededz  dedz  deddf
 fdZd	ej	        d
ej	        dej	        dz  de
ej	        ej	        f         fdZ xZS )Qwen3DecoderLayerNr$   configr-   r.   r/   r2   c                    t                                                       |j        | _        t          |d           t	          |dd           }t	          |dd          rt
          j        }nt
          j        }t          | j        |j	        |j
        |j        |j        t	          |dd          t	          |dd           |||j        | d	||
          | _        t          | j        |j        |j        || d          | _        t'          |j        |j                  | _        t'          |j        |j                  | _        d S )Ni@B )default_thetar1   	is_causalTattention_biasFr*   z
.self_attn)r%   r&   r)   r'   r+   r,   r*   r-   r.   r(   r/   r0   r1   z.mlp)r%   intermediate_size
hidden_actr.   r/   r6   )r8   r9   r%   r   getattrr   r`   ENCODER_ONLYr#   num_attention_headsmax_position_embeddingsnum_key_value_headsr+   r(   	self_attnQwen3MLPrq   rr   mlpr   input_layernormpost_attention_layernorm)rF   rl   r-   r.   r/   r1   r0   rH   s          rI   r9   zQwen3DecoderLayer.__init__   se    	!-vW====&-14'
 '
# 6;-- 	3%-II%2I'(073,V%5u==VZ66%%"2((((C
 
 
 ($6(%???
 
 
  'v'9v?RSSS(/F$7)
 )
 )
%%%rJ   rK   rL   residualc                     ||}|                      |          }n|                      ||          \  }}|                     ||          }|                     ||          \  }}|                     |          }||fS )N)rK   rL   )r{   rx   r|   rz   )rF   rK   rL   r}   s       rI   r\   zQwen3DecoderLayer.forward   s     $H 00??MM&*&:&:=(&S&S#M8' ' 
 
 #'"?"?x"X"Xx//h&&rJ   )NNr$   )r]   r^   r_   r   r	   r   re   r9   rf   rg   tupler\   rh   ri   s   @rI   rk   rk      s         ,0260
 0
0
 "D(0
 )4/	0

 0
 
0
 0
 0
 0
 0
 0
d'<' |' ,%	'
 
u|U\)	*' ' ' ' ' ' ' 'rJ   rk   	attentionrN   )	input_idsrK   intermediate_tensorsinputs_embeds)dynamic_arg_dimsc                   .     e Zd Zdddedef fdZ xZS )
Qwen3Modelr$   r/   vllm_configr/   c                Z    t                                          ||t                     d S )N)r   r/   decoder_layer_type)r8   r9   rk   )rF   r   r/   rH   s      rI   r9   zQwen3Model.__init__   s6    #FGX 	 	
 	
 	
 	
 	
rJ   )r]   r^   r_   r
   re   r9   rh   ri   s   @rI   r   r      s\         BD 
 
 
z 
3 
 
 
 
 
 
 
 
 
 
rJ   r   c                   h    e Zd Zg dddgdZdddedef fd	Zd
eedf         ddfdZ	deedf         fdZ
dej        dej        fdZ	 	 ddej        dej        dedz  dej        dz  dej        ez  f
dZdej        dej        dz  fdZdeeeej        f                  dee         fdZ xZS )Qwen3ForCausalLM)q_projk_projv_proj	gate_projup_proj)r@   gate_up_projr$   r   r   r/   c          	         t                                                       |j        j        }|j        }|| _        || _        t          |t          |d                    | _        t                      j
        rJ|j        r| j        j        | _        nDt          |j        |j        |t          |d                    | _        nt#                      | _        t%          |j                  | _        | j        j        | _        d S )Nmodel)r   r/   lm_head)r.   r/   )r8   r9   model_config	hf_configr.   rl   r   r!   r   r   is_last_ranktie_word_embeddingsembed_tokensr   r   
vocab_sizer%   r   r   logits_processormake_empty_intermediate_tensors)rF   r   r/   rl   r.   rH   s        rI   r9   zQwen3ForCausalLM.__init__
  s    )3"/(#L,I,I
 
 

 >>& 	,) #z6-%&!-'	::	      *++DL /0A B B J6 	,,,rJ   layers.r2   Nc                     || j         _        d S N)r   aux_hidden_state_layers)rF   r   s     rI   set_aux_hidden_state_layersz,Qwen3ForCausalLM.set_aux_hidden_state_layers)  s    -3
***rJ   c                 J    t          | j        j                  }d|dz  |dz
  fS )N      )lenr   r   )rF   
num_layerss     rI   "get_eagle3_aux_hidden_state_layersz3Qwen3ForCausalLM.get_eagle3_aux_hidden_state_layers,  s)    *++
:?JN33rJ   r   c                 6    | j                             |          S r   )r   embed_input_ids)rF   r   s     rI   r   z Qwen3ForCausalLM.embed_input_ids0  s    z)))444rJ   rK   r   r   c                 6    |                      ||||          }|S r   )r   )rF   r   rK   r   r   rL   s         rI   r\   zQwen3ForCausalLM.forward3  s)     

y"6
 
 rJ   rL   c                 <    |                      | j        |          }|S r   )r   r   )rF   rL   logitss      rI   compute_logitszQwen3ForCausalLM.compute_logits?  s      &&t|]CCrJ   weightsc                 l    t          | | j        j        rdgnd           }|                    |          S )Nzlm_head.)skip_prefixes)r   rl   r   load_weights)rF   r   loaders      rI   r   zQwen3ForCausalLM.load_weightsF  sC    "+/;+JTJ<<PT
 
 
 ""7+++rJ   )NN)r]   r^   r_   packed_modules_mappingr
   re   r9   r   ra   r   r   rf   rg   r   r   r\   r   r   setr   rh   ri   s   @rI   r   r      s       
 
 
 

 
 BD 
 
 
z 
3 
 
 
 
 
 
>4%S/ 4d 4 4 4 44E#s(O 4 4 4 45 5%, 5 5 5 5 <@-1
 
<
 <
 2D8	

 |d*
 
+	+
 
 
 
| 
	   ,HU33D-E$F ,3s8 , , , , , , , ,rJ   r   )=__doc__collections.abcr   typingr   rf   r   transformersr   vllm.attention.layerr   vllm.compilation.decoratorsr   vllm.configr	   r
   vllm.distributedr   r   vllm.loggerr   $vllm.model_executor.layers.layernormr   !vllm.model_executor.layers.linearr   r   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr   +vllm.model_executor.layers.rotary_embeddingr   3vllm.model_executor.layers.vocab_parallel_embeddingr   vllm.sequencer   vllm.transformers_utils.configr   vllm.v1.attention.backendr   
interfacesr   r   r   qwen2r   ry   r   utilsr   r   r    r!   r]   loggerModuler#   rk   ALL_DECODER_LAYER_TYPESr   r    rJ   rI   <module>r      s  0 F E $ $ $ $ $ $              $ $ $ $ $ $ * * * * * * = = = = = = / / / / / / / / O O O O O O O O # # # # # # 8 8 8 8 8 8 R R R R R R R R G G G G G G F F F F F F @ @ @ @ @ @ N N N N N N - - - - - - A A A A A A 3 3 3 3 3 3 @ @ @ @ @ @ @ @ @ @ ' ' ' ' ' '       W W W W W W W W W W W W	X		b b b b bRY b b bJG' G' G' G' G'	 G' G' G'V " 
   ! 	 	 	
 
 
 
 
 
 
	 	
N, N, N, N, N,ry,
N N, N, N, N, N,rJ   