
    .`i3                        d Z ddlZddlmZ ddlmZ ddlmZ ddlZddlm	Z	 ddl
mZ ddlmZ dd	lmZ dd
lmZmZ ddlmZmZ ddlmZ ddlmZ ddlmZmZmZ ddlmZ ddl m!Z! ddl"m#Z# ddl$m%Z%m&Z& ddl'm(Z( ddl)m*Z* ddl+m,Z,m-Z- ddl.m/Z/m0Z0m1Z1m2Z2  G d de	j3                  Z4 G d de	j3                  Z5 G d de	j3                  Z6e G d de	j3                              Z7 G d  d!e	j3                  Z8 G d" d#e8e-e,          Z9dS )$z>Inference-only QWen model compatible with HuggingFace weights.    N)Iterable)islice)Any)nn)PretrainedConfig)	Attention)support_torch_compile)CacheConfig
VllmConfig)get_pp_group$get_tensor_model_parallel_world_size)
SiluAndMul)RMSNorm)MergedColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)get_rope)ParallelLMHeadVocabParallelEmbedding)default_weight_loader)IntermediateTensors   )SupportsLoRA
SupportsPP)is_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefixc                   j     e Zd ZdZ	 	 	 ddededededz  d	ef
 fd
Zdej	        dej	        fdZ
 xZS )QWenMLPzMLP for the language component of the Qwen model, which contains a
    MergedColumnParallelLinear merging 2 outputs via silu activation.siluN hidden_sizeintermediate_size
hidden_actquant_configprefixc                    t                                                       t          ||gdz  d|| d          | _        t	          ||d|| d          | _        |dk    rt          d| d          t                      | _        d S )	N   Fz.gate_up_projbiasr(   r)   .c_projr#   zUnsupported activation: z!. Only silu is supported for now.)	super__init__r   gate_up_projr   c_proj
ValueErrorr   act_fn)selfr%   r&   r'   r(   r)   	__class__s         s/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/qwen.pyr0   zQWenMLP.__init__5   s     	6!#%+++
 
 
 (%%%%
 
 
 X:XXX   !ll    xreturnc                     |                      |          \  }}|                     |          }|                     |          \  }}|S N)r1   r4   r2   )r5   r9   gate_up_s       r7   forwardzQWenMLP.forwardR   sB    &&q))
KK  {{1~~1r8   )r#   Nr$   )__name__
__module____qualname____doc__intstrr   r0   torchTensorr?   __classcell__r6   s   @r7   r"   r"   1   s        I I !26# ## # 	#
 )4/# # # # # # #: %,        r8   r"   c                        e Zd Z	 	 	 	 ddedededeeef         dz  dedz  dedz  d	ef fd
Z	de
j        de
j        de
j        fdZ xZS )QWenAttentionNr$   r%   	num_headsmax_position_embeddingsrope_parameterscache_configr(   r)   c           	      *   t                                                       || _        t                      }|| _        | j        |z  dk    sJ | j        |z  | _        || j        z  | _        t          || j        | j        d|| d          | _        t          | j        | j        z  |d|| d          | _
        | j        dz  | _        t          | j        ||          | _        t          | j        | j        | j        ||| d	
          | _        d S )Nr   Tz.c_attnr,   Fr.   g      )max_positionrN   .attn)rO   r(   r)   )r/   r0   r%   r   total_num_headsrL   head_dimr   c_attnr   r2   scalingr   
rotary_embr   attn)
r5   r%   rL   rM   rN   rO   r(   r)    tensor_model_parallel_world_sizer6   s
            r7   r0   zQWenAttention.__init__Z   sN    	&+O+Q+Q((#&FF!KKKK-1QQ#t';;'M %%%%
 
 
 ( 4=0%%%%
 
 
 }d*"M0+
 
 

 NML%%###
 
 
			r8   	positionshidden_statesr:   c                     |                      |          \  }}|                    dd          \  }}}|                     |||          \  }}|                     |||          }|                     |          \  }	}|	S )N   )chunksdim)rU   chunkrW   rX   r2   )
r5   rZ   r[   qkvr>   qkvattn_outputoutputs
             r7   r?   zQWenAttention.forward   s|    
 ]++Q))1")--1ay!Q//1ii1a((KK,,	r8   )NNNr$   )r@   rA   rB   rD   dictrE   r   r
   r   r0   rF   rG   r?   rH   rI   s   @r7   rK   rK   Y   s         26+/26.
 .
.
 .
 "%	.

 c3h$..
 "D(.
 )4/.
 .
 .
 .
 .
 .
 .
`
<
 |
 
	
 
 
 
 
 
 
 
r8   rK   c            
            e Zd Z	 	 	 ddededz  dedz  def fdZdej	        d	ej	        d
ej	        dz  de
ej	        ej	        f         fdZ xZS )	QWenBlockNr$   configrO   r(   r)   c           
      |   t                                                       t          |j        |j                  | _        t          |j        |j        |j        |j	        ||| d          | _
        t          |j        |j                  | _        t          |j        |j        dz  || d          | _        d S )NepsrR   )rN   rO   r(   r)   r+   z.mlpr(   r)   )r/   r0   r   r%   layer_norm_epsilonln_1rK   num_attention_headsrM   rN   rX   ln_2r"   r&   mlp)r5   rk   rO   r(   r)   r6   s        r7   r0   zQWenBlock.__init__   s     	F.F4MNNN	!&*"2%%###
 
 
	 F.F4MNNN	$)%???	
 
 
r8   rZ   r[   residualr:   c                     ||}|                      |          }n|                      ||          \  }}|                     ||          }|                     ||          \  }}|                     |          }||fS )N)rZ   r[   )rq   rX   rs   rt   )r5   rZ   r[   ru   s       r7   r?   zQWenBlock.forward   s     $H IIm44MM&*iix&H&H#M8		' " 
 
 #'))M8"D"Dx//h&&r8   )NNr$   )r@   rA   rB   r   r
   r   rE   r0   rF   rG   tupler?   rH   rI   s   @r7   rj   rj      s         ,026
 
 
 "D(
 )4/	

 
 
 
 
 
 
:'<' |' ,%	'
 
u|U\)	*' ' ' ' ' ' ' 'r8   rj   c                        e Zd Zdddedef fdZdej        dej        fdZ	 ddej        d
ej        de	d	z  dej        d	z  dej        e	z  f
dZ
 xZS )	QWenModelr$   r)   vllm_configr)   c                   t                                                       |j        j        |j        |j        | _        j        | _        t          j        j	                  | _
        t          j        fd| d          \  | _        | _        | _        t!          j	        j                  | _        t'          ddgj	                  | _        d S )Nc                 *    t          |           S )Nrz   )rj   )r)   rO   rk   r(   s    r7   <lambda>z$QWenModel.__init__.<locals>.<lambda>   s    9V\<PVWWW r8   z.hrz   rm   r[   ru   )r/   r0   model_config	hf_configrO   r(   rk   
vocab_sizer   r%   wter   num_hidden_layersstart_layer	end_layerhr   rp   ln_fr   make_empty_intermediate_tensors)r5   r{   r)   rO   rk   r(   r6   s      @@@r7   r0   zQWenModel.__init__   s    )3"/"/ +)
 
 4?$WWWWWW===4
 4
 4
0$.$&
 F.F4MNNN	/Vj)6+=0
 0
,,,r8   	input_idsr:   c                 ,    |                      |          S r<   )r   r5   r   s     r7   embed_input_idszQWenModel.embed_input_ids   s    xx	"""r8   NrZ   intermediate_tensorsinputs_embedsc                 p   t                      j        r||}n|                     |          }d }n|J |d         }|d         }t          | j        | j        | j                  D ]} ||||          \  }}t                      j        st          ||d          S | 	                    ||          \  }}|S )Nr[   ru   )r[   ru   )
r   is_first_rankr   r   r   r   r   is_last_rankr   r   )	r5   r   rZ   r   r   r[   ru   layerr>   s	            r7   r?   zQWenModel.forward   s     >>' 		8( - $ 4 4Y ? ?HH'3330AM+J7HDFD$4dnEE 	 	E&+e' '#M88
 ~~* 	&"/XFF    99]H==qr8   r<   )r@   rA   rB   r   rE   r0   rF   rG   r   r   r?   rH   rI   s   @r7   ry   ry      s        AC 
 
 
z 
3 
 
 
 
 
 
0# #%, # # # # .2 < < 2D8	
 |d* 
+	+       r8   ry   c            	            e Zd Zdeddededee         ddf fdZd	ej	        dej	        fd
Z
dej	        dej	        dz  fdZdeeeej	        f                  dee         fdZ xZS )QWenBaseModelr$   )r)   transformer_typer{   r)   r   r:   Nc          	         t                                                       |j        j        }|j        }|j        j        }|| _        || _        || _         ||t          |d                    | _        t          |j
        |j        |t          |d                    | _        | j        j        r| j        j        j        | j        _        t!          |j
                  | _        | j        j        | _        d S )Ntransformerr{   r)   lm_headro   )r/   r0   r   r   r(   multimodal_configrk   r    r   r   r   r%   r   tie_word_embeddingsr   weightr   logits_processorr   )r5   r{   r)   r   rk   r(   r   r6   s          r7   r0   zQWenBaseModel.__init__
  s     	)3"/'4F!2(++#L,O,O
 
 
 &%	22	
 
 
 ;* 	>"&"2"6"=DL /0A B B< 	,,,r8   r   c                 6    | j                             |          S r<   )r   r   r   s     r7   r   zQWenBaseModel.embed_input_ids(  s    ##I...r8   r[   c                 <    |                      | j        |          }|S r<   )r   r   )r5   r[   logitss      r7   compute_logitszQWenBaseModel.compute_logits+  s      &&t|]CCr8   weightsc                 2   ddg}t          |                                           }t                      }|D ]\  }}d|v r
|D ]i\  }}}	||vr|                    ||          }|                    d          r||vr;t          ||           rL||         }
|
j        } ||
||	            nU|                    d          r||vrt          ||           r||         }
t          |
dt                    } ||
|           |	                    |           |S )N)r1   w2r   )r1   w1r   zrotary_emb.inv_freqz.biasweight_loader)
rh   named_parameterssetreplaceendswithr   r   getattrr   add)r5   r   stacked_params_mappingparams_dictloaded_paramsnameloaded_weight
param_nameweight_nameshard_idparamr   s               r7   load_weightszQWenBaseModel.load_weights2  ss    &%"

 4002233"%%%#* 	$ 	$D-$,,5K 4 41
Kd**||K<<==)) d+.E.E*466 #D) % 3e]H=== ==)) d+.E.E*466 #D) '@U V Ve]333d####r8   )r@   rA   rB   ry   r   rE   typer0   rF   rG   r   r   r   rw   r   r   rH   rI   s   @r7   r   r   	  s       
 ,5
 
 
  
 	

 y/
 

 
 
 
 
 
</ /%, / / / /| 
	   $HU33D-E$F $3s8 $ $ $ $ $ $ $ $r8   r   c                        e Zd ZdgddgdZdddedef fd	Z	 	 ddej        dej        de	d
z  dej        d
z  dej        e	z  f
dZ
 xZS )QWenLMHeadModelrU   r   r   )rU   r1   r$   rz   r{   r)   c                    |j         j        }t          |d          r*ddgi}t          dt	          j        |           d          t                                          ||           d S )NvisualarchitecturesQwenVLForConditionalGenerationzThe configuration of this model indicates that it supports vision inputs, but you instantiated the text-only version of this model. Please use the vision model by setting `--hf-overrides 'z'`r   )r   r   hasattrRuntimeErrorjsondumpsr/   r0   )r5   r{   r)   rk   hf_overridesr6   s        r7   r0   zQWenLMHeadModel.__init__b  s    )368$$ 	+.N-OPLA %)J|$<$<A A A   	[@@@@@r8   Nr   rZ   r   r   r:   c                 6    |                      ||||          }|S r<   )r   )r5   r   rZ   r   r   r[   s         r7   r?   zQWenLMHeadModel.forwardo  s+     ((y"6
 
 r8   )NN)r@   rA   rB   packed_modules_mappingr   rE   r0   rF   rG   r   r?   rH   rI   s   @r7   r   r   Y  s        *
  BD A A Az A3 A A A A A A" <@-1
 
<
 <
 2D8	

 |d*
 
+	+
 
 
 
 
 
 
 
r8   r   ):rC   r   collections.abcr   	itertoolsr   typingr   rF   r   transformersr   vllm.attention.layerr   vllm.compilation.decoratorsr	   vllm.configr
   r   vllm.distributedr   r   %vllm.model_executor.layers.activationr   $vllm.model_executor.layers.layernormr   !vllm.model_executor.layers.linearr   r   r   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr   +vllm.model_executor.layers.rotary_embeddingr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr   vllm.sequencer   
interfacesr   r   utilsr   r   r   r    Moduler"   rK   rj   ry   r   r    r8   r7   <module>r      s.   E D  $ $ $ $ $ $                    ) ) ) ) ) ) * * * * * * = = = = = = / / / / / / / / O O O O O O O O < < < < < < 8 8 8 8 8 8         
 H G G G G G F F F F F F @ @ @ @ @ @        P O O O O O - - - - - - 0 0 0 0 0 0 0 0           % % % % %bi % % %P; ; ; ; ;BI ; ; ;|2' 2' 2' 2' 2'	 2' 2' 2'j 9 9 9 9 9	 9 9 9xM M M M MBI M M M`         mZ          r8   