
    .`icQ                     6   d Z ddlmZ ddlmZ ddlZddlmZ ddlmZ ddl	m
Z
 ddlmZ dd	lmZmZmZ dd
lmZmZmZ ddlmZ ddlmZ ddlmZ ddlmZmZmZmZ ddl m!Z! ddl"m#Z# ddl$m%Z% ddl&m'Z'm(Z( ddl)m*Z*m+Z+ ddl,m-Z- ddl.m/Z/m0Z0 ddl1m2Z2m3Z3m4Z4m5Z5m6Z6m7Z7  G d dej8                  Z9 G d dej8                  Z: G d dej8                  Z; G d dej8                  Z<e G d  d!ej8                              Z= G d" d#ej8        e0e/          Z>dS )$zInference-only dots1 model.    )Iterable)isliceN)nn)Dots1Config)	Attention)support_torch_compile)CacheConfigModelConfig
VllmConfig)get_pp_group$get_tensor_model_parallel_world_size tensor_model_parallel_all_reduce)
SiluAndMul)SharedFusedMoE)RMSNorm)MergedColumnParallelLinearQKVParallelLinearReplicatedLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)get_rope)ParallelLMHeadVocabParallelEmbedding)default_weight_loadermaybe_remap_kv_scale_name)IntermediateTensors   )SupportsLoRA
SupportsPP)AutoWeightsLoaderPPMissingLayeris_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefixc                   P     e Zd Z	 	 	 ddededededz  ded	ed
df fdZd Z xZ	S )Dots1MLPNT hidden_sizeintermediate_size
hidden_actquant_configreduce_resultsprefixreturnc           	         t                                                       t          ||gdz  d|| d          | _        t	          ||d||| d          | _        |dk    rt          d| d	          t                      | _        d S )
N   Fz.gate_up_projbiasr-   r/   z
.down_proj)r4   r-   r.   r/   siluUnsupported activation: !. Only silu is supported for now.)	super__init__r   gate_up_projr   	down_proj
ValueErrorr   act_fn)selfr*   r+   r,   r-   r.   r/   	__class__s          t/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/dots1.pyr9   zDots1MLP.__init__M   s     	6!#%+++
 
 
 +%)(((
 
 
 X:XXX   !ll    c                     |                      |          \  }}|                     |          }|                     |          \  }}|S N)r:   r=   r;   )r>   xgate_up_s       r@   forwardzDots1MLP.forwardl   sD    &&q))
KK  ~~a  1rA   )NTr)   )
__name__
__module____qualname__intstrr   boolr9   rG   __classcell__r?   s   @r@   r(   r(   L   s         37## ## # 	#
 )4/# # # 
# # # # # #>      rA   r(   c                   \     e Zd Z	 	 d
dededz  def fdZdej        dej        fd	Z	 xZ
S )Dots1MoENr)   configr-   r/   c                    t                                                       t                      | _        |j        | _        |j        | _        |j        dk    rt          d|j         d          t          |j	        |j
        dd | d          | _        |j        dk    r6t          j        t          j        |j
                            | j        _        nd | j        _        |j        7|j        |j        z  }t'          |j	        ||j        |d| d	          | _        nd | _        t+          | j        |j
        |j        |j	        |j        d|j        |d
|j        |j        | d|j        d| j        j                  | _        d S )Nr5   r6   r7   Fz.gater3   noaux_tcz.shared_experts)r*   r+   r,   r-   r.   r/   Tz.expertsg      ?)shared_expertsnum_expertstop_kr*   r+   r.   renormalizer-   use_grouped_topknum_expert_group
topk_groupr/   scoring_funcrouted_scaling_factore_score_correction_bias)r8   r9   r   tp_sizer]   n_shared_expertsr,   r<   r   r*   n_routed_expertsgatetopk_methodr   	Parametertorchemptyr^   moe_intermediate_sizer(   rU   r   num_experts_per_toknorm_topk_probn_groupr[   r\   experts)r>   rR   r-   r/   r+   r?   s        r@   r9   zDots1MoE.__init__t   s    	;==%+%A" & 7&&26+< 2 2 2  
 %####
 
 
	 ++02F3441 1DI-- 15DI-". & <v?V V"*"."3!,)$ 111# # #D #'D%./,*$: -%!#^(&&&,"%$(I$E!
 
 
rA   hidden_statesr0   c                 D   |j         \  }}|                    d|          }|                     |          \  }}|                     ||          \  }}| j        ||z   | j        z  }n
|| j        z  }| j        dk    rt          |          }|                    ||          S )N)rl   router_logitsr   )shapeviewrb   rk   rU   r]   r_   r   )	r>   rl   
num_tokens
hidden_dimro   rF   
shared_out
routed_outfinal_hidden_statess	            r@   rG   zDots1MoE.forward   s    !.!4
J%**2z::99]33q!%'} ". "
 "

J *#-
#:d>X"X",t/I"I<!"BCV"W"W"''
J???rA   )Nr)   )rH   rI   rJ   r   r   rL   r9   re   TensorrG   rN   rO   s   @r@   rQ   rQ   s   s         37	=
 =
=
 )4/=
 	=
 =
 =
 =
 =
 =
~@U\ @el @ @ @ @ @ @ @ @rA   rQ   c                        e Zd Z	 	 	 	 ddededededed	edz  d
edz  deddf fdZde	j
        de	j
        de	j
        fdZ xZS )Dots1Attention    Nr)   r*   	num_headsnum_kv_headsrR   max_position_embeddingscache_configr-   r/   r0   c	           
         t                                                       || _        t                      }	|| _        | j        |	z  dk    sJ | j        |	z  | _        || _        | j        |	k    r| j        |	z  dk    sJ n|	| j        z  dk    sJ t          d| j        |	z            | _        t          |d|| j        z            | _
        | j        | j
        z  | _        | j        | j
        z  | _        | j
        dz  | _        || _        |j        }
t!          || j
        | j        | j        |
|| d          | _        t%          | j        | j
        z  |d|| d          | _        t)          | j
        ||j        	          | _        t/          | j        | j
        | j        | j        ||| d
          | _        t3          | j
        |j                  | _        t3          | j
        |j                  | _        d S )Nr   r   head_dimg      z	.qkv_projr3   Fz.o_proj)max_positionrope_parametersz.attn)r|   r~   r-   r/   eps)r8   r9   r*   r   total_num_headsr{   total_num_kv_headsmaxr|   getattrr   q_sizekv_sizescalingr}   attention_biasr   qkv_projr   o_projr   r   
rotary_embr   attnr   rms_norm_epsq_normk_norm)r>   r*   r{   r|   rR   r}   r~   r-   r/   r_   r   r?   s              r@   r9   zDots1Attention.__init__   s4    	&688(#g-2222-8"."g-- *W499999 T4499994#:g#EFF
K4CW4WXXnt}4(4=8}d*'>$.)M #%'''
 
 
 ( 4=0%%%%
 
 
 #M0"2
 
 

 NML*%%###
 
 
	 dm1DEEEdm1DEEErA   	positionsrl   c                 R   |                      |          \  }}|                    | j        | j        | j        gd          \  }}}|                     |                    d| j        | j                                                |j                  }| 	                    |                    d| j
        | j                                                |j                  }|                     |||          \  }}|                     |||          }|                     |          \  }	}|	S )Nrn   )dim)r   splitr   r   r   reshaper{   r   rp   r   r|   r   r   r   )
r>   r   rl   qkvrF   qkvattn_outputoutputs
             r@   rG   zDots1Attention.forward  s     }--Q))T[$,E2)NN1aKK		"dndmDDEEMMagVVKK		"d&7GGHHPPG
 
 y!Q//1ii1a((KK,,	rA   )rz   NNr)   )rH   rI   rJ   rK   r   r	   r   rL   r9   re   rw   rG   rN   rO   s   @r@   ry   ry      s        (,+/26CF CFCF CF 	CF
 CF "%CF "D(CF )4/CF CF 
CF CF CF CF CF CFJ6;l	       rA   ry   c                        e Zd Z	 	 ddededededz  dedz  ddf fdZd	e	j
        d
e	j
        de	j
        dz  de	j
        fdZ xZS )Dots1DecoderLayerNrR   r/   model_configr~   r-   r0   c                    t                                                       |j        | _        t          |dd          }t	          |                    d          d                   }|| _        t          | j        |j        |j	        ||||| d          | _
        |j        4||j        k    r)||j        z  dk    rt          ||| d	
          | _        n+t!          |j        |j        |j        || d	          | _        t'          |j        |j                  | _        t'          |j        |j                  | _        |j        | _        d S )Nr}   rz   .)seprn   z
.self_attn)r*   r{   r|   rR   r}   r~   r-   r/   r   z.mlp)rR   r-   r/   )r*   r+   r,   r-   r/   r   )r8   r9   r*   r   rK   r   	layer_idxry   num_attention_headsnum_key_value_heads	self_attnra   first_k_dense_replacemoe_layer_freqrQ   mlpr(   r+   r,   r   r   input_layernormpost_attention_layernormr]   )	r>   rR   r/   r   r~   r-   r}   r   r?   s	           r@   r9   zDots1DecoderLayer.__init__  sy    	!-")&2KT"R"R--b122	"'(03$;%%(((	
 	
 	
 #/V999F11Q66LF  DHH  "."(":!,)   DH  'v'9v?RSSS(/F$7)
 )
 )
% &,%A"""rA   r   rl   residualc                     ||}|                      |          }n|                      ||          \  }}|                     ||          }|                     ||          \  }}|                     |          }||fS )N)r   rl   )r   r   r   r   )r>   r   rl   r   s       r@   rG   zDots1DecoderLayer.forwardJ  s     $H 00??MM&*&:&:=(&S&S#M8-XX"&"?"?x"X"Xx//h&&rA   NN)rH   rI   rJ   r   rL   r
   r	   r   r9   re   rw   rG   rN   rO   s   @r@   r   r     s         ,026,B ,B,B ,B "	,B
 "D(,B )4/,B 
,B ,B ,B ,B ,B ,B\'<' |' ,%	'
 
' ' ' ' ' ' ' 'rA   r   c                       e Zd ZdZdddedef fdZdej        dej        fd	Z		 ddej        dej        de
d
z  dej        d
z  dej        e
z  f
dZdeeeeeef                  fdZdeeeej        f                  dee         fdZ xZS )
Dots1ModelFr)   r/   vllm_configr/   c                |   t                                                       |j        j        |j        |j        |j        | _        j        | _        t                      j	        r&t          j        j        | d          | _        nt                      | _        t          j        fd| d          \  | _        | _        | _        t                      j        r!t)          j        j                  | _        nt                      | _        t/          ddgj                  | _        d S )	Nz.embed_tokensr-   r/   c                 ,    t          |           S )N)r   r~   r-   )r   )r/   r~   rR   r   r-   s    r@   <lambda>z%Dots1Model.__init__.<locals>.<lambda>v  s&    ,)))   rA   z.layersr   r   rl   r   )r8   r9   r   	hf_configr~   r-   rR   
vocab_sizer   is_first_rankr   r*   embed_tokensr"   r%   num_hidden_layersstart_layer	end_layerlayersis_last_rankr   r   normr$   make_empty_intermediate_tensors)r>   r   r/   r~   rR   r   r-   r?   s      @@@@r@   r9   zDots1Model.__init___  s]   )3"/"/"/ +>>' 	1 6!") ///	! ! !D !/ 0 0D8C$       %%%
9
 
9
 
9
5$.$+ >>& 	) 28KLLLDII&((DI/Vj)6+=0
 0
,,,rA   	input_idsr0   c                 ,    |                      |          S rC   )r   r>   r   s     r@   embed_input_idszDots1Model.embed_input_ids  s      +++rA   Nr   intermediate_tensorsinputs_embedsc                 p   t                      j        r||}n|                     |          }d }n|J |d         }|d         }t          | j        | j        | j                  D ]} ||||          \  }}t                      j        st          ||d          S | 	                    ||          \  }}|S )Nrl   r   )rl   r   )
r   r   r   r   r   r   r   r   r   r   )	r>   r   r   r   r   rl   r   layerrF   s	            r@   rG   zDots1Model.forward  s     >>' 		8( - $ 4 4Y ? ?HH'3330AM+J7HDK)94>JJ 	 	E&+e' '#M88
 ~~* 	&"/XFF    99]H==qrA   c                 H    t          j        | ddd| j        j                  S )N	gate_projr;   up_proj)ckpt_gate_proj_nameckpt_down_proj_nameckpt_up_proj_namerV   )r   make_expert_params_mappingrR   ra   r>   s    r@   get_expert_mappingzDots1Model.get_expert_mapping  s0    8 + +'4
 
 
 	
rA   weightsc           	      J   g d}t          |                                           }t                      }|                                 }|D ]W\  }}d|v r|D ]r\  }}	}
|	|vrd|v r||vr|                    |	|          }|                    d          r||vrDt          ||           rU||         }|j        } ||||
            n|D ]U}|\  }}	}}
|	|vr|                    |	|          }t          ||           r5||         }|j        } |||||
|            nj|                    d          r||vrt          ||          }|t          ||           r||         }t          |dt                    } |||           |                    |           Y|S )N))r   q_projr   )r   k_projr   )r   v_projr   )r:   r   r   )r:   r   r   zrotary_emb.inv_freqzmlp.experts.z.bias)shard_id	expert_idweight_loader)dictnamed_parameterssetr   replaceendswithr#   r   r   r   r   add)r>   r   stacked_params_mappingparams_dictloaded_paramsexpert_params_mappingnameloaded_weight
param_nameweight_namer   paramr   mappingr   s                  r@   load_weightszDots1Model.load_weights  sO   "
 "
 "
 4002233"%%% $ 7 7 9 9#* 2	$ 2	$D-$,,5K .8 .81
Kd**"d**K0G0G||K<<==)) d+.E.E*466 #D) % 3e]H===4 8 8GCJ@JY"$.. <<Z@@D.tT:: ! '-E$)$7M!M%!)"+    E}}W-- !$k2I2I 4T;GGD| .tT:: ! '-E$+0E% %M "M%777d####rA   rC   )rH   rI   rJ   fall_back_to_pt_during_loadr   rL   r9   re   rw   r   r   rG   listtuplerK   r   r   r   r   rN   rO   s   @r@   r   r   [  sP       "'AC '
 '
 '
z '
3 '
 '
 '
 '
 '
 '
R, ,%, , , , , .2 < < 2D8	
 |d* 
+	+   <
DsCc/A)B$C 
 
 
 
?HU33D-E$F ?3s8 ? ? ? ? ? ? ? ?rA   r   c                   X    e Zd Zg dddgdZdddedef fd	Zd
ej        dej        fdZ		 	 dd
ej        dej        de
dz  dej        dz  dej        e
z  f
dZdej        dej        dz  fdZdeeeej        f                  dee         fdZdeeeeeef                  fdZ xZS )Dots1ForCausalLM)r   r   r   r   r   )r   r:   r)   r   r   r/   c          	         t                                                       |j        j        }|j        }|| _        || _        t          |t          |d                    | _        t                      j
        r1t          |j        |j        |t          |d                    | _        nt                      | _        t!          |j                  | _        | j        j        | _        d S )Nmodel)r   r/   lm_headr   )r8   r9   r   r   r-   rR   r   r&   r   r   r   r   r   r*   r   r"   r   logits_processorr   )r>   r   r/   rR   r-   r?   s        r@   r9   zDots1ForCausalLM.__init__  s    )3"/(#L,I,I
 
 

 >>& 	,)!")#FI66	  DLL *++DL /0A B BJ6 	,,,rA   r   r0   c                 6    | j                             |          S rC   )r   r   r   s     r@   r   z Dots1ForCausalLM.embed_input_ids  s    z)))444rA   Nr   r   r   c                 6    |                      ||||          }|S rC   )r   )r>   r   r   r   r   rl   s         r@   rG   zDots1ForCausalLM.forward  s+     

 	
 
 rA   rl   c                 <    |                      | j        |          }|S rC   )r   r   )r>   rl   logitss      r@   compute_logitszDots1ForCausalLM.compute_logits*  s      &&t|]CCrA   r   c                 J    t          |           }|                    |          S rC   )r!   r   )r>   r   loaders      r@   r   zDots1ForCausalLM.load_weights1  s#    "4((""7+++rA   c                 4    | j                                         S rC   )r   r   r   s    r@   r   z#Dots1ForCausalLM.get_expert_mapping5  s    z,,...rA   r   )rH   rI   rJ   packed_modules_mappingr   rL   r9   re   rw   r   r   rG   r   r   r   r   r   r   rK   r   rN   rO   s   @r@   r   r     s       
 
 
 

 
 BD 
 
 
z 
3 
 
 
 
 
 
.5 5%, 5 5 5 5 <@-1 < < 2D8	
 |d* 
+	+   | 
	   ,HU33D-E$F ,3s8 , , , ,/DsCc/A)B$C / / / / / / / /rA   r   )?__doc__collections.abcr   	itertoolsr   re   r   transformersr   vllm.attention.layerr   vllm.compilation.decoratorsr   vllm.configr	   r
   r   vllm.distributedr   r   r   %vllm.model_executor.layers.activationr   $vllm.model_executor.layers.fused_moer   $vllm.model_executor.layers.layernormr   !vllm.model_executor.layers.linearr   r   r   r   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr   +vllm.model_executor.layers.rotary_embeddingr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr   r   vllm.sequencer   
interfacesr   r    utilsr!   r"   r#   r$   r%   r&   Moduler(   rQ   ry   r   r   r    rA   r@   <module>r     s  4 " ! $ $ $ $ $ $              $ $ $ $ $ $ * * * * * * = = = = = = < < < < < < < < < <         
 = < < < < < ? ? ? ? ? ? 8 8 8 8 8 8            H G G G G G F F F F F F @ @ @ @ @ @               . - - - - - 0 0 0 0 0 0 0 0               $ $ $ $ $ry $ $ $NP@ P@ P@ P@ P@ry P@ P@ P@fR R R R RRY R R Rj=' =' =' =' ='	 =' =' ='@ U U U U U U U UpB/ B/ B/ B/ B/ry*l B/ B/ B/ B/ B/rA   