
    .`i!;                       d Z ddlmZ ddlZddlmZ ddlZddlmZ ddlm	Z	m
Z
 ddlmZmZ ddlmZmZmZ dd	lmZ dd
lmZ ddlmZmZmZ ddlmZ ddlmZ ddlmZm Z  ddl!m"Z" ddl#m$Z$ ddl%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+ ddl,m-Z- g dddgdZ.d&dZ/ G d dej0                  Z1 G d dej0                  Z2 G d  d!ej0                  Z3 G d" d#ej0                  Z4 G d$ d%ej0        e$          Z5dS )'z4Shared Step decoder blocks and the Step1 text model.    )annotationsN)Iterable)nn)	AttentionAttentionType)CacheConfig
VllmConfig)get_pp_groupget_tensor_model_parallel_rank$get_tensor_model_parallel_world_size)
SiluAndMul)RMSNorm)MergedColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)ParallelLMHeadVocabParallelEmbedding)default_weight_loader)
SupportsPP)AutoWeightsLoaderPPMissingLayeris_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefix)IntermediateTensors)q_projk_projv_proj	gate_projup_proj)qkv_projgate_up_projtotal_num_headsintreturntorch.Tensorc                   dt          j        t          j        |                     z  }t          j        dd|z  z  t          j                  }t          j        |t          j        dd|z   t          j                            }|| k    r~t          j        dd|z  z  t          j                  }| |z
  }t          j        ddd|z  z   dt          j                  }t          j	        |t          j        ||          gd          }|S )z+Reference ALiBi slopes used by Step models.   g       )dtype   g      r   dim)
mathfloorlog2torchtensorfloat32powarangeint32cat)r&   closest_power_of_2baseslopes
extra_basenum_remaining_headsextra_powerss          t/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/step1.py_get_step_alibi_slopesrA   3   s%   dj?)C)CDDD<	d''(m  D YQ..ekBBB F _,,\$++,-
 
 

 .0BB|'''+	
 
 
 UYz<889
 
 
 M    c                  0     e Zd Z	 	 	 dd fd	ZddZ xZS )StepAttentionN cache_configCacheConfig | Nonequant_configQuantizationConfig | Noneprefixstrc                   t                                                       |j        | _        t                      }|j        | _        | j        |z  dk    sJ | j        |z  | _        | j        | j        z  | _        t          |dt          |dd                    }||dk    rd}|| _	        | j	        |k    r| j	        |z  dk    sJ n|| j	        z  dk    sJ t          d| j	        |z            | _        t          | j        | j        | j        | j	        t          |dd          || d          | _        | j        | j        z  | _        | j        | j        z  | _        t!          | j        | j        z  | j        t          |dd          || d	
          | _        t%                      }|| j        z  }|dz   | j        z  }	t'          | j                  ||	         }
|
                                }
| j        dz  | _        t-          | j        | j        | j        | j        |||
| ddt.          j        
  
        | _        d S )Nr   num_attention_groupsnum_key_value_headsr-   attention_biasF	.qkv_proj)hidden_size	head_sizer&   total_num_kv_headsbiasrH   rJ   z.o_proj
input_sizeoutput_sizerT   rH   rJ   g      z.attnT)num_kv_headsrF   rH   alibi_slopesrJ   use_alibi_sqrt	attn_type)super__init__rQ   r   num_attention_headsr&   	num_headshead_dimgetattrrS   maxrX   r   r$   q_sizekv_sizer   o_projr   rA   tolistscaler   r   DECODERattn)selfconfigrF   rH   rJ   tp_sizerS   tp_rank
head_starthead_endrY   	__class__s              r@   r]   zStepAttention.__init__R   s    	!-688%9#g-2222-8(D,@@$*GF<QST,U,U
 
 %);q)@)@!""4"g--*W499999T4499994#:g#EFF)(m 0#6!1599%'''
 
 
 nt}4(4=8'+dm;(!1599%%%%
 
 
 122t~-
aK4>1-d.BCCJxDWX#**,,]D(
NMJ*%%%####+
 
 
			rB   hidden_statesr)   r(   c                    |                      |          \  }}|                    | j        | j        | j        gd          \  }}}|                     |||          }|                     |          \  }}|S )Nr.   )r$   splitrc   rd   ri   re   )	rj   rq   qkv_qkvattn_outputoutputs	            r@   forwardzStepAttention.forward   sp     }--Q))T[$,E2)NN1aii1a((KK,,	rB   )NNrE   )rF   rG   rH   rI   rJ   rK   )rq   r)   r(   r)   __name__
__module____qualname__r]   r|   __classcell__rp   s   @r@   rD   rD   Q   si         ,026A
 A
 A
 A
 A
 A
 A
F       rB   rD   c                  0     e Zd Z	 	 	 dd fdZddZ xZS )StepMLPNrE   FrQ   r'   intermediate_sizerH   rI   rJ   rK   rT   boolc                    t                                                       t          |||g||| d          | _        t	          ||||| d          | _        t                      | _        d S )N.gate_up_proj)rV   output_sizesrT   rH   rJ   z
.down_projrU   )r\   r]   r   r%   r   	down_projr   act_fn)rj   rQ   r   rH   rJ   rT   rp   s         r@   r]   zStepMLP.__init__   s     	6"+->?%+++
 
 
 +(#%(((
 
 
 !llrB   xr)   r(   c                    |                      |          \  }}|                     |          }|                     |          \  }}|S N)r%   r   r   )rj   r   rv   s      r@   r|   zStepMLP.forward   sB      ##1KKNN~~a  1rB   )NrE   F)
rQ   r'   r   r'   rH   rI   rJ   rK   rT   r   )r   r)   r(   r)   r}   r   s   @r@   r   r      sa        
 37# # # # # # #2       rB   r   c                  2     e Zd Zdd fdZddZddZ xZS )StepDecoderLayerrE   vllm_configr	   rJ   rK   c                   t                                                       |j        j        }|j        }|j        }|j        | _        t          |||| d          | _        t          | j        |j
        || dt          |dd                    | _        t          | j        |j                  | _        t          | j        |j                  | _        d S )Nz
.self_attn)rk   rF   rH   rJ   z.mlpmlp_biasF)rQ   r   rH   rJ   rT   eps)r\   r]   model_config	hf_configrF   rH   rQ   rD   	self_attnr   r   ra   mlpr   rms_norm_epsinput_layernormpost_attention_layernorm)rj   r   rJ   rk   rF   rH   rp   s         r@   r]   zStepDecoderLayer.__init__   s    )3"/"/!-&%%(((	
 
 
 ($6%???U33
 
 
  '# 
  
  
 )0#)
 )
 )
%%%rB   	positionsr)   rq   residualtorch.Tensor | Noner(   !tuple[torch.Tensor, torch.Tensor]c                    ||}|                      |          }n|                      ||          \  }}|                     |          }|                     ||          \  }}|                     |          }||fS )N)rq   )r   r   r   r   )rj   r   rq   r   s       r@   r|   zStepDecoderLayer.forward   s     $H 00??MM&*&:&:=(&S&S#M8]CC"&"?"?x"X"Xx//h&&rB   weights"Iterable[tuple[str, torch.Tensor]]set[str]c                (   g d}t          |                                           }t                      }|D ]\  }}|D ]i\  }}}	||vr|                    ||          }|                    d          r||vr;t          ||           rL||         }
|
j        } ||
||	            nU|                    d          r||vrt          ||           r||         }
t          |
dt                    } ||
|           |	                    |           |S )N))rP   z.q_projrw   )rP   z.k_projrx   )rP   z.v_projry   )r   z
.gate_projr   )r   z.up_projr-   z.biasweight_loader)
dictnamed_parameterssetreplaceendswithr   r   ra   r   add)rj   r   stacked_params_mappingparams_dictloaded_paramsnameloaded_weight
param_nameweight_nameshard_idparamr   s               r@   load_weightszStepDecoderLayer.load_weights   sg   "
 "
 "
 4002233"%%%#* 	$ 	$D-5K 4 41
Kd**||K<<==)) d+.E.E*466 #D) % 3e]H=== ==)) d+.E.E*466 #D) '@U V Ve]333d####rB   )rE   r   r	   rJ   rK   )r   r)   rq   r)   r   r   r(   r   r   r   r(   r   )r~   r   r   r]   r|   r   r   r   s   @r@   r   r      sj        
 
 
 
 
 
 
:' ' ' ' " " " " " " " "rB   r   c                  :     e Zd Zddd fdZddZ	 dddZ xZS )StepDecoderModelrE   rJ   r   r	   rJ   rK   c                  t                                                       j        j        }j        }|| _        || _        t                      j        s|j        r5t                      j	        r"t          |j        |j        |          | _        nt                      | _        t          |j        fdt#          |d                    \  | _        | _        | _        t                      j	        r!t+          |j        |j                  | _        nt                      | _        t1          |dd          | _        t5          dd	g|j                  | _        d S )
N)rH   c                &    t          |           S )Nr   rJ   )r   )rJ   r   s    r@   <lambda>z+StepDecoderModel.__init__.<locals>.<lambda>)  s    +FSSS rB   layersr   r   aux_hidden_state_layers rq   r   )r\   r]   r   r   rH   rk   r
   is_first_ranktie_word_embeddingsis_last_rankr   
vocab_sizerQ   embed_tokensr   r   num_hidden_layersr   start_layer	end_layerr   r   r   normra   r   r   make_empty_intermediate_tensorsrj   r   rJ   rk   rH   rp   s    `   r@   r]   zStepDecoderModel.__init__  s^   )3"/(>>' 		1&		1+7>>+F		1 !7!")! ! !D !/ 0 0D8C$SSSS119
 9
 9
5$.$+
 >>& 	) 28KLLLDII&((DI8?-r9
 9
$ 0Wj)0
 0
,,,rB   	input_idsr)   r(   c                ,    |                      |          S r   )r   rj   r   s     r@   embed_input_idsz StepDecoderModel.embed_input_ids9  s      +++rB   Nr   r   intermediate_tensorsIntermediateTensors | Noneinputs_embedsLtorch.Tensor | IntermediateTensors | tuple[torch.Tensor, list[torch.Tensor]]c                   t                      j        r!||}n|J |                     |          }d }n|J |d         }|d         }g }t          | j        | j        | j                           D ]N\  }}	|| j        v r0||                    |           n|                    ||z               |	|||          \  }}Ot                      j	        st          ||d          S |                     ||          \  }}
|r||fS |S )Nrq   r   )rq   r   )r
   r   r   	enumerater   r   r   r   appendr   r   r   )rj   r   r   r   r   rq   r   aux_hidden_statesidxlayerrv   s              r@   r|   zStepDecoderModel.forward<  sX    >>' 
	8( - ,,, $ 4 4Y ? ?HH'3330AM+J7H#DK0@4>0Q$RSS 	P 	PJCd222#%,,];;;;%,,]X-EFFF&+eI}h&O&O#M88~~* 	&"/XFF    99]H==q 	4 "333rB   r   r   r)   r(   r)   r   )
r   r   r   r)   r   r   r   r   r(   r   )r~   r   r   r]   r   r|   r   r   s   @r@   r   r     s}        AC "
 "
 "
 "
 "
 "
 "
 "
H, , , , .2$ $ $ $ $ $ $ $ $rB   r   c                  N     e Zd ZeZddd fdZddZ	 dddZddZd dZ	 xZ
S )!Step1ForCausalLMrE   r   r   r	   rJ   rK   c          	     V   t                                                       |j        j        }|j        }|| _        || _        t          |t          |d                    | _        t                      j
        rt          |j        |j        |t          |d                    | _        t          |dd          r)| j                            | j        j                  | _        t%          |j                  | _        nt)                      | _        d | _        | j        j        | _        d S )Nmodelr   lm_head)rH   rJ   r   T)r\   r]   r   r   rH   rk   r   r   r   r
   r   r   r   rQ   r   ra   tie_weightsr   r   logits_processorr   r   r   s        r@   r]   zStep1ForCausalLM.__init__f  s   )3"/(%#00
 
 


 >>& 	))!")#FI66	  DL v4d;; Q#|77
8OPP$3F4E$F$FD!!)++DL$(D! J6 	,,,rB   r   r)   r(   c                6    | j                             |          S r   )r   r   r   s     r@   r   z Step1ForCausalLM.embed_input_ids  s    z)))444rB   Ntorch.LongTensor | Noner   r   r   r   r   r   c                4    |                      ||||          S )N)r   )r   )rj   r   r   r   r   s        r@   r|   zStep1ForCausalLM.forward  s+     zz '	  
 
 	
rB   rq   c                b    t                      j        sd S |                     | j        |          S r   )r
   r   r   r   )rj   rq   s     r@   compute_logitszStep1ForCausalLM.compute_logits  s0     ~~* 	4$$T\=AAArB   r   r   r   c                J    t          |           }|                    |          S r   )r   r   )rj   r   loaders      r@   r   zStep1ForCausalLM.load_weights  s#    "4((""7+++rB   r   r   r   )
r   r   r   r)   r   r   r   r   r(   r   )rq   r)   r(   r   r   )r~   r   r   STEP_PACKED_MODULES_MAPPINGpacked_modules_mappingr]   r   r|   r   r   r   r   s   @r@   r   r   c  s        8AC 
 
 
 
 
 
 
 
<5 5 5 5 .2
 
 
 
 
B B B B, , , , , , , ,rB   r   )r&   r'   r(   r)   )6__doc__
__future__r   r0   collections.abcr   r3   r   vllm.attention.layerr   r   vllm.configr   r	   vllm.distributedr
   r   r   %vllm.model_executor.layers.activationr   $vllm.model_executor.layers.layernormr   !vllm.model_executor.layers.linearr   r   r   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr   %vllm.model_executor.models.interfacesr    vllm.model_executor.models.utilsr   r   r   r   r   r   vllm.sequencer   r   rA   ModulerD   r   r   r   r   r   rB   r@   <module>r      s!   ; : " " " " " "  $ $ $ $ $ $        9 9 9 9 9 9 9 9 / / / / / / / /         
 = < < < < < 8 8 8 8 8 8         
 H G G G G G F F F F F F        P O O O O O < < < < < <                . - - - - - /.. ),     <L L L L LBI L L L^    bi   BP P P P Pry P P PfL L L L Lry L L L^<, <, <, <, <,ry* <, <, <, <, <,rB   