
    .`iD                     .   d dl mZ d dlmZ d dlZd dlmZ d dlmZ d dl	m
Z
 d dlmZmZ d dlmZmZmZ d dlmZ d d	lmZmZmZ d d
lmZ d dlmZ d dlmZ d dlmZm Z  d dl!m"Z"m#Z# d dl$m%Z% ddl&m'Z' ddl(m)Z)m*Z*m+Z+m,Z,m-Z-  G d dej.                  Z/ G d de          Z0 G d dej.                  Z1 G d dej.                  Z2 G d dej.                  Z3 G d dej.                  Z4 G d d ej.                  Z5 G d! d"ej.        e'          Z6dS )#    )Iterable)isliceN)
DbrxConfig)	Attention)CacheConfig
VllmConfig)get_pp_groupget_tensor_model_parallel_rank$get_tensor_model_parallel_world_size)FusedMoE)QKVParallelLinearReplicatedLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)get_rope)ParallelLMHeadVocabParallelEmbedding)default_weight_loadermaybe_remap_kv_scale_name)IntermediateTensors   )
SupportsPP)AutoWeightsLoaderis_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefixc                   d     e Zd ZdZ	 d	dedej        dz  f fdZdej        dej        fdZ	 xZ
S )

DbrxRouterzXA Router implementation for DBRX that returns logits for each expert
    per token.
    Nconfigparams_dtypec                     t                                                       t                      | _        |j        j        | _        |j        | _        t          | j        | j        d|d           | _	        d S )NF)biasr"   quant_config)
super__init__r   tp_size
ffn_configmoe_num_expertsnum_total_expertsd_modelr   layer)selfr!   r"   	__class__s      s/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/dbrx.pyr'   zDbrxRouter.__init__4   sm    
 	;==!'!2!B~%L"%
 
 



    hidden_statesreturnc                 6    |                      |          \  }}|S N)r-   )r.   r2   router_logits_s       r0   forwardzDbrxRouter.forwardE   s    ::m44qr1   r5   )__name__
__module____qualname____doc__r   torchdtyper'   Tensorr8   __classcell__r/   s   @r0   r    r    /   s          ,0
 

 kD(
 
 
 
 
 
"U\ el        r1   r    c            	       z     e Zd Z	 	 	 ddededz  dej        dz  def fdZde	j
        d	ej        d
edefdZ xZS )DbrxExpertsN r!   r%   r"   prefixc                    t                                          |j        j        |j        j        |j        |j        j        |dd|t                      |
  
         || _        |j        | _        | j        j        j        | j	        z  | _
        d S )NT)
num_expertstop_khidden_sizeintermediate_sizer"   reduce_resultsrenormalizer%   r(   rE   )r&   r'   r)   r*   	moe_top_kr,   ffn_hidden_sizer   r!   r(   rJ   r.   r!   r%   r"   rE   r/   s        r0   r'   zDbrxExperts.__init__K   s     	)9#-$/?%%8:: 	 	
 	
 	
 ~!%!7!G4<!Wr1   paramloaded_weightweight_name
param_namec                    t                      }|j        }| j        }t          ||z  |dz   |z            }|                    d          r||                    d          rFt          j        |d| j        | j        z  | j        g          }|d d |d d f         |d d d|d d f<   n!|                    d          r
||d d df<   n|}|                    d          r|                    d          rIt          j        |d| j        | j        z  | j        g          }|d d |d d f         |d d |d|z  d d f<   n&|                    d          r
||d d df<   n||d d <   |                    d	          rr|                    d          rTt          j        |d| j        | j        z  | j        g          	                    dd          }|d d d d |f         |d d <   d S ||d d <   d S d S )
Nr   w1weightr   weight_scalev1   w2)
r
   datarJ   sliceendswithr=   reshaper(   r,   	transpose)	r.   rP   rQ   rR   rS   tp_rank
param_data
shard_sizeshards	            r0   weight_loaderzDbrxExperts.weight_loaderc   sq    122Z
+
g
*Wq[J,FGG %% 
	+""8,, 	+ %!/$,>M! ! 2?qqq%{1K
111a
lAAA-..$$^44 +#0
111a4  *
%% 	.""8,, . %!/$,>M! ! ANAAuaaaKA
111j1z>9111<== $$^44 .#0
111a4   -
111%% 	.""8,, . %!/$,>M! ! )Aq//  !.aaaEk :
111 -
111	. 	.r1   NNrD   )r9   r:   r;   r   r   r=   r>   strr'   nn	Parameterr?   re   r@   rA   s   @r0   rC   rC   J   s         37+/X XX )4/X kD(	X
 X X X X X X0-.|-. |-. 	-.
 -. -. -. -. -. -. -. -.r1   rC   c            	       v     e Zd ZdZ	 	 	 ddededz  dej        dz  def fdZ	d	ej
        d
ej
        fdZ xZS )DbrxMoEzA tensor-parallel MoE implementation for DBRX.

    Each expert's weights are sharded across all ranks and a fused MoE
    kernel is used for the forward pass, and finally we reduce the outputs
    across ranks.
    NrD   r!   r%   r"   rE   c                    t                                                       |j        | _        |t          j                    }|| _        t          || j                  | _        t          ||| j        | d          | _	        d S )Nz.experts)r!   r%   r"   rE   )
r&   r'   r,   r=   get_default_dtyper"   r    routerrC   expertsrO   s        r0   r'   zDbrxMoE.__init__   s     	~ 244L( ):;;"%*&&&	
 
 
r1   r2   r3   c                     |j         }|                    d| j                  }|                     |          }|                     ||          }|                    |          S )NrW   )shapeviewr,   rn   ro   )r.   r2   
orig_shaper6   final_hidden_statess        r0   r8   zDbrxMoE.forward   sZ    "(
%**2t|<<M22"ll=-HH"''
333r1   rf   )r9   r:   r;   r<   r   r   r=   r>   rg   r'   r?   r8   r@   rA   s   @r0   rk   rk      s          37+/
 

 )4/
 kD(	

 
 
 
 
 
 
,4U\ 4el 4 4 4 4 4 4 4 4r1   rk   c            	       v     e Zd Z	 	 	 ddededz  dedz  def fdZdej	        d	ej	        d
ej	        fdZ
 xZS )DbrxAttentionNrD   r!   cache_configr%   rE   c           
         t                                                       |j        | _        |j        | _        | j        | j        z  | _        |j        j        | _        |j        j	        | _	        dt          |j        j                  d}|j        | _        t          | j        | j        | j        | j        d|| d          | _        t!          | j        | j        d|| d          | _        t%          | j        | j        |d          | _        t)                      }|| _        | j        |z  d	k    sJ | j        |z  | _        | j        |k    r| j        |z  d	k    sJ n|| j        z  d	k    sJ t/          d
| j        |z            | _        | j        | j        z  | _        | j        | j        z  | _        | j        dz  | _        t9          | j        | j        | j        | j        ||| d          | _        d S )Ndefault)	rope_type
rope_thetaFz.Wqkv)r$   r%   rE   z	.out_projT)max_positionrope_parametersis_neox_styler   r   g      .attn)num_kv_headsrw   r%   rE   )r&   r'   r,   n_headstotal_num_headshead_dimattn_config
kv_n_headstotal_num_kv_headsclip_qkvintr{   max_seq_lenr|   r   Wqkvr   out_projr   
rotary_embr   r(   	num_headsmaxr   q_sizekv_sizescalingr   attn)r.   r!   rw   r%   rE   r}   tp_world_sizer/   s          r0   r'   zDbrxAttention.__init__   s1    	~%~(<<"("4"?*3"f0;<<
 
 #. &LM #%###
 
 
	 *LL%'''
 
 
 #M*+	
 
 
 =>>$#m3q8888->"m33 *]:a????? !4#::a????4#:m#KLLnt}4(4=8}d*NML*%%###
 
 
			r1   position_idsr2   r3   c                 t   |                      |          \  }}| j        "|                    | j         | j                   |                    | j        | j        | j        gd          \  }}}|                     |||          \  }}|                     |||          }|                     |          \  }}|S )N)minr   rW   )dim)	r   r   clamp_splitr   r   r   r   r   )	r.   r   r2   qkvr7   qkvattn_outputs	            r0   r8   zDbrxAttention.forward   s    
 =))Q=$JJDM>t}J===))T[$,E2)NN1a|Q221ii1a((==55qr1   rf   r9   r:   r;   r   r   r   rg   r'   r=   r?   r8   r@   rA   s   @r0   rv   rv      s         ,026C
 C
C
 "D(C
 )4/	C

 C
 C
 C
 C
 C
 C
Jl | 
	       r1   rv   c            	       v     e Zd Z	 	 	 ddededz  dedz  def fdZdej	        d	ej	        d
ej	        fdZ
 xZS )DbrxFusedNormAttentionNrD   r!   rw   r%   rE   c                    t                                                       |j        | _        t          |||| d          | _        t          j        | j                  | _        t          j        | j                  | _        d S )Nr   rE   )	r&   r'   r,   rv   r   rh   	LayerNormnorm_1norm_2r.   r!   rw   r%   rE   r/   s        r0   r'   zDbrxFusedNormAttention.__init__  sx     	~!L,&7G7G7G
 
 
	 l4<00l4<00r1   r   r2   r3   c                     |}|                      |          }|                     ||          }||z   }|}|                     |          }||fS N)r   r2   )r   r   r   )r.   r   r2   residualxs        r0   r8   zDbrxFusedNormAttention.forward  sd    
 !M22II%'  
 
 !1 M22h&&r1   rf   r   rA   s   @r0   r   r     s         ,0261 11 "D(1 )4/	1
 1 1 1 1 1 1'l' |' 
	' ' ' ' ' ' ' 'r1   r   c            	       v     e Zd Z	 	 	 ddededz  dedz  def fdZdej	        d	ej	        d
ej	        fdZ
 xZS )	DbrxBlockNrD   r!   rw   r%   rE   c                     t                                                       t          |||| d          | _        t	          ||| d          | _        d S )Nz.norm_attn_normr   z.ffn)r&   r'   r   norm_attn_normrk   ffnr   s        r0   r'   zDbrxBlock.__init__1  sf     	4L,&7Q7Q7Q
 
 
 6<6HHHr1   r   r2   r3   c                 n    |                      ||          \  }}|                     |          }||z   }|S r   )r   r   )r.   r   r2   r   s       r0   r8   zDbrxBlock.forward>  sL    
 #'"5"5%' #6 #
 #
x //%0r1   rf   r   rA   s   @r0   r   r   0  s         ,026I II "D(I )4/	I
 I I I I I Il | 
	       r1   r   c                        e Zd Zdddedef fdZdej        dej        fdZ	 ddej        d
ej        de	d	z  dej        d	z  dej        e	z  f
dZ
deeeej        f                  dee         fdZ xZS )	DbrxModelrD   r   vllm_configrE   c                `   t                                                       |j        j        |j        |j        | _        t          j        j                  | _	        t          j        fd| d          \  | _        | _        | _        t          j        j        d          | _        |                                 D ]G}t'          |d          r5t)          |j        t          j                  r|                    dd            Ht1          dgj                  | _        d S )Nc                 *    t          |           S )Nr   )r   )rE   rw   r!   r%   s    r0   <lambda>z$DbrxModel.__init__.<locals>.<lambda>[  s    9V\<PVWWW r1   z.blocksr   gh㈵>)epsr$   r2   )r&   r'   model_config	hf_configrw   r%   r   
vocab_sizer,   wter   n_layersstart_layer	end_layerblocksrh   r   norm_fmoduleshasattr
isinstancer$   ri   register_parameterr   make_empty_intermediate_tensors)r.   r   rE   modulerw   r!   r%   r/   s       @@@r0   r'   zDbrxModel.__init__M  s4   )3"/"/()N
 
 9DOWWWWWW%%%9
 9
 9
5$.$+
 l6>t<<<llnn 	8 	8Fvv&& 8:fk2<+P+P 8))&$777/Vv~0
 0
,,,r1   	input_idsr3   c                 ,    |                      |          S r5   )r   r.   r   s     r0   embed_input_idszDbrxModel.embed_input_idsg  s    xx	"""r1   Nr   intermediate_tensorsinputs_embedsc                 J   t                      j        r||}n"|                     |          }n|sJ |d         }t          | j        | j        | j                  D ]} |||          }t                      j        st          d|i          S | 	                    |          }|S )Nr2   )
r	   is_first_rankr   r   r   r   r   is_last_rankr   r   )r.   r   r   r   r   r2   blocks          r0   r8   zDbrxModel.forwardj  s     >>' 	B( - $ 4 4Y ? ?''''0AMDK)94>JJ 	? 	?E!E,>>MM~~* 	I&'GHHHM22r1   weightsc                 @   d dD             }t          |                     d                    }t                      }|D ]\\  }}| j        ~| j                            |          x}rb||         }t          |dt                    }	|                                dk    r|n|d         } |	||           |                    |           |	                    d          r|dz   }|D ]O\  }
}||vr
|
                    ||
          }t          ||           r1||         }|j        }	 |	||||            nPt          ||           r	t          ||          }|||         }t          |dt                    }	 |	||           |                    |           ^|S )	Nc                 (    g | ]}|d v rdndd| fS ))rU   rY   w13r[   zmlp. ).0rR   s     r0   
<listcomp>z*DbrxModel.load_weights.<locals>.<listcomp>  sH     !
 !
 !

  %44$${$$!
 !
 !
r1   )rU   rY   r[   F)remove_duplicatere   r   )rU   r[   rY   _weight)dictnamed_parameterssetr%   get_cache_scalegetattrr   r   addr^   replacer   re   r   )r.   r   expert_params_mappingparams_dictloaded_paramsnamerQ   
scale_namerP   re   rS   rR   s               r0   load_weightszDbrxModel.load_weights  s   !
 !

  2!
 !
 !
 400%0HHII"%%%#* %	$ %	$D- ,"/??EEE
 - $J/ '@U V V%2%6%6%8%8A%=%=MM=QRCS  e]333!!*---}}/00 (i'+@ 4 4'
Kd**||K<<*466 #D) % 3e]KFFF +466 0{CC<#D) '@U V Ve]333d####r1   r5   )r9   r:   r;   r   rg   r'   r=   r?   r   r   r8   r   tupler   r   r@   rA   s   @r0   r   r   L  s       AC 
 
 
z 
3 
 
 
 
 
 
4# #%, # # # # .2 < l 2D8	
 |d* 
+	+   ,1HU33D-E$F 13s8 1 1 1 1 1 1 1 1r1   r   c                       e Zd Zdddedef fdZdej        dej        fdZ	 	 ddej        d
ej        de	d	z  dej        d	z  dej        e	z  f
dZ
dej        dej        d	z  fdZdeeeej        f                  dee         fdZ xZS )DbrxForCausalLMrD   r   r   rE   c          	         t                                                       |j        j        }|j        }|| _        |j        rt          d          || _        t          |t          |d                    | _
        t          |j        |j        |t          |d                    | _        t          |j                  | _        | j
        j        | _        d S )Nz5tie_word_embeddings is not supported for Dbrx models.transformer)r   rE   lm_head)r%   rE   )r&   r'   r   r   r%   r!   tie_word_embeddings
ValueErrorr   r   r   r   r   r,   r   r   logits_processorr   )r.   r   rE   r!   r%   r/   s        r0   r'   zDbrxForCausalLM.__init__  s    )3"/% 	VTUUU($#L,O,O
 
 
 &N%	22	
 
 
 !00A B B< 	,,,r1   r   r3   c                 6    | j                             |          S r5   )r   r   r   s     r0   r   zDbrxForCausalLM.embed_input_ids  s    //	:::r1   N	positionsr   r   c                 6    |                      ||||          }|S r5   )r   )r.   r   r   r   r   r2   s         r0   r8   zDbrxForCausalLM.forward  s+     ((y"6
 
 r1   r2   c                 <    |                      | j        |          }|S r5   )r   r   )r.   r2   logitss      r0   compute_logitszDbrxForCausalLM.compute_logits  s      &&t|]CCr1   r   c                 J    t          |           }|                    |          S r5   )r   r   )r.   r   loaders      r0   r   zDbrxForCausalLM.load_weights  s#    "4((""7+++r1   )NN)r9   r:   r;   r   rg   r'   r=   r?   r   r   r8   r   r   r   r   r   r@   rA   s   @r0   r   r     sN       AC 
 
 
z 
3 
 
 
 
 
 
.; ;%, ; ; ; ; <@-1
 
<
 <
 2D8	

 |d*
 
+	+
 
 
 
| 
	   ,HU33D-E$F ,3s8 , , , , , , , ,r1   r   )7collections.abcr   	itertoolsr   r=   torch.nnrh   transformersr   vllm.attention.layerr   vllm.configr   r   vllm.distributedr	   r
   r   $vllm.model_executor.layers.fused_moer   !vllm.model_executor.layers.linearr   r   r   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr   +vllm.model_executor.layers.rotary_embeddingr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr   r   vllm.sequencer   
interfacesr   utilsr   r   r   r   r   Moduler    rC   rk   rv   r   r   r   r   r   r1   r0   <module>r     sv   % $ $ $ $ $              # # # # # # * * * * * * / / / / / / / /         
 : 9 9 9 9 9         
 H G G G G G F F F F F F @ @ @ @ @ @               . - - - - - " " " " " "                    6F. F. F. F. F.( F. F. F.R$4 $4 $4 $4 $4bi $4 $4 $4NR R R R RBI R R Rj' ' ' ' 'RY ' ' 'B    	   8e e e e e	 e e eP0, 0, 0, 0, 0,bi 0, 0, 0, 0, 0,r1   