
    .`iM                     V   d Z ddlmZ ddlmZ ddlmZ ddlZddlmZ ddl	m
Z
 ddlmZ dd	lmZmZmZ dd
lmZmZmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZmZm Z m!Z! ddl"m#Z# ddl$m%Z% ddl&m'Z' ddl(m)Z)m*Z* ddl+m,Z, ddl-m.Z. ddl/m0Z0 ddl1m2Z2 ddl3m4Z4m5Z5m6Z6m7Z7m8Z8  ee9          Z: G d dej;                  Z< G d dej;                  Z= G d dej;                  Z> G d  d!ej;                  Z?e G d" d#ej;                              Z@ G d$ d%ej;        e2          ZAdS )&zInference-only Jurassic model.    )Iterable)islice)AnyN)nn)	Attention)support_torch_compile)CacheConfigModelConfig
VllmConfig)get_pp_group$get_tensor_model_parallel_world_size tensor_model_parallel_all_reduce)init_logger)
SiluAndMul)FusedMoE)RMSNorm)ColumnParallelLinearMergedColumnParallelLinearReplicatedLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)get_rope)ParallelLMHeadVocabParallelEmbedding)default_weight_loader)IntermediateTensors)Step3TextConfig   )
SupportsPP)PPMissingLayeris_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefixc                   \     e Zd Z	 	 d
dededz  def fdZdej        dej        fd	Z	 xZ
S )FusedMoEBlockN configquant_configprefixc                    t                                                       t                      | _        | j        |j        k    r t          d| j         d|j         d          t          |j        |j        |j        |j	        d|j
        || d          | _        t          |j        |j        dd | d          | _        d S )	NzTensor parallel size z' is greater than the number of experts .Fz.experts)num_expertstop_khidden_sizeintermediate_sizereduce_resultsrenormalizer*   r+   z.gatebiasr*   r+   )super__init__r   tp_sizemoe_num_experts
ValueErrorr   	moe_top_kr0   moe_intermediate_sizenorm_expert_weightexpertsr   gate)selfr)   r*   r+   	__class__s       y/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/step3_text.pyr7   zFusedMoEBlock.__init__6   s     	;==<&000C C C)/)?C C C  
  ."*$: 1%&&&	
 	
 	
 %"###
 
 
			    hidden_statesreturnc                    |j         }|j         d         }|                    d|          }|                     |          \  }}|                     ||          }| j        dk    rt          |          }|                    |          S )N)rD   router_logitsr   )shapeviewr?   r>   r8   r   )r@   rD   
orig_shape
hidden_dimrH   _final_hidden_statess          rB   forwardzFusedMoEBlock.forwardW   s    "(
"(,
%**2z::99]33q"ll'} + 
 
 <!"BCV"W"W"''
333rC   Nr(   )__name__
__module____qualname__r
   r   strr7   torchTensorrO   __classcell__rA   s   @rB   r'   r'   5   s         37	
 

 )4/
 	
 
 
 
 
 
B4U\ 4el 4 4 4 4 4 4 4 4rC   r'   c                   h     e Zd Z	 	 ddededededz  deddf fd	Zd
ej        dej        fdZ	 xZ
S )Step3TextMLPNr(   r0   r1   
hidden_actr*   r+   rE   c                 &   t                                                       t          ||gdz  d|| d          | _        t	          ||d|| d          | _        |dk    rt          d| d          t                      | _        || _	        d S )	N   F.gate_up_projr4   z
.down_projsiluzUnsupported activation: z!. Only silu is supported for now.)
r6   r7   r   gate_up_projr   	down_projr:   r   act_fnr0   )r@   r0   r1   r[   r*   r+   rA   s         rB   r7   zStep3TextMLP.__init__h   s     	6!#%+++
 
 
 +%(((
 
 
 X:XXX   !ll&rC   rD   c                     |                      |          \  }}|                     |          }|                     |          \  }}|S N)r`   rb   ra   )r@   rD   gate_uprM   intermediate_actoutputs         rB   rO   zStep3TextMLP.forward   sF    &&}55
;;w//NN#344	rC   rP   )rQ   rR   rS   intrT   r   r7   rU   rV   rO   rW   rX   s   @rB   rZ   rZ   g   s         37' '' ' 	'
 )4/' ' 
' ' ' ' ' '<U\ el        rC   rZ   c                        e Zd Z	 	 	 	 	 	 ddedededed	eeef         d
edz  dedededz  de	dz  def fdZ
dej        dej        dej        fdZ xZS )Step3TextAttentionN       r(   r0   	num_headsnum_kv_headsnorm_epsrope_parametersshare_q_dimmax_position_embeddinghead_dimcache_configr*   r+   c           	      4   t                                                       || _        t                      }|| _        | j        |z  dk    sJ | j        |z  | _        |dk    rt          d| d          || _        || _        | j        | j        z  | _	        |r|n| j        | _
        t          || j
        | j	        dz  z   d|
| d          | _        t          | j        | j        z  |d|
| d	          | _        t          | j
        |
          | _        t#          | j
        | j        | j        z  d|
| d          | _        t'          | j        ||          | _        | j        dz  }t+          | j        | j        || j        |	| d          | _        d S )Nr   r   z3Step3TextAttention num_kv_heads must be 1, but got r-   r]   F	.qkv_projr4   z.o_projepsz.wq)max_positionrp   g      z.attn)rt   r+   )r6   r7   r0   r   total_num_headsrm   r:   rn   rs   kv_sizeq_sizer   qkv_projr   o_projr   
inter_normr   wqr   
rotary_embr   attn)r@   r0   rm   rn   ro   rp   rq   rr   rs   rt   r*   r+   r8   scalingrA   s                 rB   r7   zStep3TextAttention.__init__   s    	&688(#g-2222-81UlUUU   ) (4=8%0Ckkdm(K$,**%'''
 
 
 ( 4=0%%%%
 
 
 "$+8<<<&KMD00%>>>
 
 
 #M/+
 
 

 -%NM%###
 
 
			rC   	positionsrD   rE   c                    |                      |          \  }}|                    | j        | j        | j        gd          \  }}}|                     |          }|                     |          d         }|                     |||          \  }}|                     |||          }|                     |          \  }	}|	S )NrG   )dimr   )	r}   splitr|   r{   r   r   r   r   r~   )
r@   r   rD   qkvrM   qkvattn_outputresiduals
             rB   rO   zStep3TextAttention.forward   s     }--Q))T[$,E2)NN1aOOAGGAJJqMy!Q//1ii1a((kk+..!rC   )Nrk   rl   NNr(   )rQ   rR   rS   rh   floatdictrT   r   r	   r   r7   rU   rV   rO   rW   rX   s   @rB   rj   rj      s        #'&*+/26D
 D
D
 D
 	D

 D
 c3hD
 4ZD
 !$D
 D
 "D(D
 )4/D
 D
 D
 D
 D
 D
 D
L

6;l
	
 
 
 
 
 
 
 
rC   rj   c                        e Zd Z	 	 	 ddededz  dedz  deddf
 fdZd	ej	        d
ej	        dej	        dz  de
ej	        ej	        f         fdZ xZS )Step3TextDecoderLayerNr(   r)   rt   r*   r+   rE   c                    t                                                       |j        | _        t          | j        |j        d|||j        |j        |j        |j        |j	        | d          | _
        t          |                    d          d                             d          d                   }t          |dd           }|2d |                                                    d	          D             }nd
 t          d|j                  D             }||v rHt#          ||| d          | _        t'          | j        |j        d|| d          | _        d| _        n-t'          |j        |j        d|| d          | _        d| _        t3          |j        |j                  | _        t3          |j        |j                  | _        d S )Nr   z
.self_attn)r0   rm   rn   rt   r*   ro   rr   rs   rq   rp   r+   zlayers.r-   r   moe_layers_enumc                 ,    g | ]}t          |          S  )rh   .0is     rB   
<listcomp>z2Step3TextDecoderLayer.__init__.<locals>.<listcomp>   s    QQQc!ffQQQrC   ,c                     g | ]}|S r   r   r   s     rB   r   z2Step3TextDecoderLayer.__init__.<locals>.<listcomp>   s    LLLAaLLLrC   z.moe)r)   r*   r+   r_   z.share_expert)r0   r1   r[   r*   r+   Tz.mlpFrw   )r6   r7   r0   rj   num_attention_headsrms_norm_epsrr   rs   rq   rp   	self_attnrh   r   getattrstriprangenum_hidden_layersr'   moerZ   share_expert_dimshare_expertuse_moer1   mlpr   input_layernormpost_attention_layernorm)	r@   r)   rt   r*   r+   	layer_idxr   moe_layers_idxrA   s	           rB   r7   zStep3TextDecoderLayer.__init__   s    	!-+(0%%(#)#@_*"2(((
 
 
 Y//288==a@AA	!&*;TBB&QQo.C.C.E.E.K.KC.P.PQQQNN MLq&2J)K)KLLLN&&$LF  DH !- ,"("9!) ///! ! !D  DLL#"."(":!)   DH !DL&v'9v?RSSS(/F$7)
 )
 )
%%%rC   r   rD   r   c                 h   ||}|                      |          }n|                      ||          \  }}|                     ||          }|                     ||          \  }}| j        r0|                     |          }|                     |          }||z   }n|                     |          }||fS )N)r   rD   )r   r   r   r   r   r   r   )r@   r   rD   r   share_output
moe_outputs         rB   rO   zStep3TextDecoderLayer.forward  s     $H 00??MM&*&:&:=(&S&S#M8' ' 
 

 #'"?"?x"X"Xx< 	4,,];;L-00J(:5MM HH]33Mh&&rC   )NNr(   )rQ   rR   rS   r   r	   r   rT   r7   rU   rV   tuplerO   rW   rX   s   @rB   r   r      s         ,0268
 8
8
 "D(8
 )4/	8

 8
 
8
 8
 8
 8
 8
 8
t'<' |' ,%	'
 
u|U\)	*' ' ' ' ' ' ' 'rC   r   c                        e Zd Zddededdf fdZdej        dej        fdZ	 	 ddej        d	ej        d
e	dz  dej        dz  dej        f
dZ
 xZS )Step3TextModelr(   vllm_configr+   rE   Nc                    t                                                       |j        j        |j        |j        j        | _        | _        t                      j	        sj
        r3t                      j        r t          | j        j                  | _        nt                      | _        t!          j        fd| d          \  | _        | _        | _        t                      j        r!t+          j        j                  | _        nt                      | _        t1          dgj                  | _        d S )Nc                 *    t          |           S )N)r)   rt   r*   r+   )r   )r+   rt   r)   r*   s    rB   <lambda>z)Step3TextModel.__init__.<locals>.<lambda>O  s#    0))	   rC   z.layersr+   rw   rD   )r6   r7   model_config	hf_configrt   r*   
vocab_sizer)   r   is_first_ranktie_word_embeddingsis_last_rankr   r0   embed_tokensr!   r$   r   start_layer	end_layerlayersr   r   normr#   make_empty_intermediate_tensors)r@   r   r+   rt   r)   r*   rA   s      @@@rB   r7   zStep3TextModel.__init__;  sT   )3"/"/ +>>' 	1&	1+7>>+F	1 !7"! !D
 !/ 0 0D8C$      %%%	9
 	9
 	9
5$.$+ >>& 	) 28KLLLDII&((DI/Vv10
 0
,,,rC   	input_idsc                 ,    |                      |          S rd   )r   r@   r   s     rB   embed_input_idszStep3TextModel.embed_input_ids`  s      +++rC   r   intermediate_tensorsinputs_embedsc                 p   t                      j        r||}n|                     |          }d }n|J |d         }|d         }t          | j        | j        | j                  D ]} ||||          \  }}t                      j        st          ||d          S | 	                    ||          \  }}|S )NrD   r   )rD   r   )
r   r   r   r   r   r   r   r   r   r   )	r@   r   r   r   r   rD   r   layerrM   s	            rB   rO   zStep3TextModel.forwardc  s     >>' 		8( - $ 4 4Y ? ?HH'3330AM+J7HDK)94>JJ 	P 	PE&+eI}h&O&O#M88~~* 	&%2 (     99]H==qrC   )r(   NN)rQ   rR   rS   r   rT   r7   rU   rV   r   r   rO   rW   rX   s   @rB   r   r   9  s        #
 #
J #
 #
T #
 #
 #
 #
 #
 #
J, ,%, , , , , <@-1 < < 2D8	
 |d* 
       rC   r   c            
            e Zd Zdddedef fdZdej        dej        fdZ	 	 ddej        d
ej        de	d	z  dej        d	z  fdZ
dej        dej        fdZdeeeej        f                  dee         fdZ xZS )Step3TextForCausalLMr(   r   r   r+   c                   t                                                       |j        j        }|| _        || _        t          ||          | _        t                      j	        rIt          |j        |j        t          |d                    | _        t          |j                  | _        nt#                      | _        | j        j        | _        d S )N)r   r+   lm_headr   )r6   r7   r   r   r)   r   r   modelr   r   r   r   r0   r%   r   r   logits_processorr!   r   )r@   r   r+   r)   rA   s       rB   r7   zStep3TextForCausalLM.__init__  s     	)3&#FKKK
>>& 	,)!"#FI66  DL
 %4F4E$F$FD!!)++DL J6 	,,,rC   r   rE   c                 6    | j                             |          S rd   )r   r   r   s     rB   r   z$Step3TextForCausalLM.embed_input_ids  s    z)))444rC   Nr   r   r   c                 6    |                      ||||          }|S rd   )r   )r@   r   r   r   r   rD   s         rB   rO   zStep3TextForCausalLM.forward  s)     

y"6
 
 rC   rD   c                 <    |                      | j        |          }|S rd   )r   r   )r@   rD   logitss      rB   compute_logitsz#Step3TextForCausalLM.compute_logits  s    &&t|]CCrC   weightsc           
      F   ddd| j         j        | j         j        | j         j        dz  z   z  fdd| j         j        | j         j        | j         j        dz  z   z  | j         j        | j         j        z   | j         j        | j         j        dz  z   z  fdd| j         j        | j         j        z   | j         j        | j         j        dz  z   z  | j         j        | j         j        dz  z   | j         j        | j         j        dz  z   z  fg}ddg}t          |                                           }t                      }g d	}d
 |D             }|D ]Z\  }|D ]\  }	}
}|
vrt          fd|D                       r'                    |
|	          t          |           rN|         }|j	        } ||||           |
                                n|D ]}|\  }	}
}|
vr                    |
|	          t          |           r4                    d          s                    d          r|vrc|         }|j	        }t          |j        d                   D ]}||         } |||||           |
                                n|D ]\  }	}
}}|
vr                    |
|	          t          |           r3|         }|j        |j                 }t          ||z            }t          ||z            }|                    |j        |||z
            }|                    |           |
                                nQt          |           r|         }t%          |dt&                    } |||           |
                               \|S )Nrv   z.q_projr   r]   z.k_projz.v_proj)r^   z
.gate_projr   )r^   z.up_projr   )).moe.experts.w13_weightz.moe.gate_proj.weightw1)r   z.moe.up_proj.weightw3)z.moe.experts.w2_weightz.moe.down_proj.weightw2c                     g | ]
}|d          S )r   r   )r   datas     rB   r   z5Step3TextForCausalLM.load_weights.<locals>.<listcomp>  s    %P%P%P$d1g%P%P%PrC   c              3       K   | ]}|v V  	d S rd   r   )r   disable_moe_stacked_paramnames     rB   	<genexpr>z4Step3TextForCausalLM.load_weights.<locals>.<genexpr>  s<        1 .5     rC   z.bias_bias)shard_id	expert_idweight_loader)r)   rq   rs   r   named_parameterssetanyreplacer"   r   addendswithr   rI   
output_dimrh   narrowcopy_r   r   )r@   r   qkv_params_mappingstacked_params_mappingparams_dictloaded_paramsexpert_params_mappingdisable_moe_stacked_paramsloaded_weight
param_nameweight_namer   paramr   mappingr   loaded_weight_expert	start_idxend_idxr   	begin_idxparam_slicer   s                         @rB   load_weightsz!Step3TextForCausalLM.load_weights  s    ';*T[-AA-EEG	 ';*T[-AA-EEG(4;+??;*T[-AA-EEG (4;+??;*T[-AA-EEG(4;+?!+CC;*T[-AA-EEG#
8 /,"

 4002233"%%%!
 !
 !
 &Q%P:O%P%P%P"#* J	0 J	0D-5K I0 I01
Kd**    5O      ||K<<*466 #D) % 3e]H===!!$'''4 80 80G8?5JX"$.. <<Z@@D.tT:: !  g..!26--2H2H!k11 '-E$)$7M%*=+>q+A%B%B  	/<Y/G,%!0 %-&/     "%%d+++E ,0 0 "#!&d22$#||KDD24>> %$ +D 1#k%*:;$'	C$8$8	"%gm"4"4&+ll!,i99L' ' $))-888%))$///24>> %$ +D 1(/!?4I) ) &e];;;%))$///rC   r   )rQ   rR   rS   r   rT   r7   rU   rV   r   r   rO   r   r   r   r   r   rW   rX   s   @rB   r   r     sE       
 	
 
 
  
 	
 
 
 
 
 
85 5%, 5 5 5 5 <@-1
 
<
 <
 2D8	

 |d*
 
 
 
EL U\    vHU33D-E$F v3s8 v v v v v v v vrC   r   )B__doc__collections.abcr   	itertoolsr   typingr   rU   r   vllm.attention.layerr   vllm.compilation.decoratorsr   vllm.configr	   r
   r   vllm.distributedr   r   r   vllm.loggerr   %vllm.model_executor.layers.activationr   $vllm.model_executor.layers.fused_moer   $vllm.model_executor.layers.layernormr   !vllm.model_executor.layers.linearr   r   r   r   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr   +vllm.model_executor.layers.rotary_embeddingr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr   vllm.sequencer   (vllm.transformers_utils.configs.step3_vlr   
interfacesr    utilsr!   r"   r#   r$   r%   rQ   loggerModuler'   rZ   rj   r   r   r   r   rC   rB   <module>r     s   % $ $ $ $ $ $ $                    * * * * * * = = = = = = < < < < < < < < < <         
 $ # # # # # < < < < < < 9 9 9 9 9 9 8 8 8 8 8 8            H G G G G G F F F F F F @ @ @ @ @ @        P O O O O O - - - - - - D D D D D D " " " " " "              
X		/4 /4 /4 /4 /4BI /4 /4 /4d# # # # #29 # # #LQ Q Q Q Q Q Q QhU' U' U' U' U'BI U' U' U'p G G G G GRY G G GTf f f f f29j f f f f frC   