
    .`iE                     X   d Z ddlZddlmZ ddlmZ ddlZddlmZ ddlm	Z	 ddl
mZ ddlmZ dd	lmZmZ dd
lmZmZmZ ddlmZ ddlmZ ddlmZmZmZ ddlmZ ddlm Z  ddl!m"Z" ddl#m$Z$m%Z% ddl&m'Z'm(Z( ddl)m*Z* ddl+m,Z,m-Z-m.Z. ddl/m0Z0m1Z1m2Z2m3Z3m4Z4 de5dej6        fdZ7 G d dej8                  Z9 G d dej8                  Z: G d dej8                  Z;e G d  d!ej8                              Z< G d" d#ej8        e,e-e.          Z= G d$ d%e=          Z> G d& d'e=          Z?dS )(zBInference-only BaiChuan model compatible with HuggingFace weights.    N)Iterable)islice)nn)PretrainedConfig)	Attention)support_torch_compile)CacheConfig
VllmConfig)get_pp_groupget_tensor_model_parallel_rank$get_tensor_model_parallel_world_size)
SiluAndMul)RMSNorm)MergedColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)get_rope)ParallelLMHeadVocabParallelEmbedding)default_weight_loaderrow_parallel_weight_loader)IntermediateTensors   )SupportsLoRA
SupportsPPSupportsQuant)AutoWeightsLoaderis_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefixtotal_num_headsreturnc                    dt          j        t          j        |                     z  }t          j        ddt          j        |          dz
   z   z  t          j                  }t          j        dd|z   t          j                  }t          j        ||          }|| k    rt          j        ddt          j        d|z            dz
   z   z  t          j                  }t          || |z
            }t          j        ddd|z  z   dt          j                  }t          j
        |t          j        ||          gd          }|S )N      )dtyper   )startendstepr)   r   )dim)mathfloorlog2torchtensorfloat32arangeint32powmincat)r$   closest_power_of_2basepowersslopes
extra_basenum_remaining_headsextra_powerss           w/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/baichuan.py_get_alibi_slopesrA   F   sU   dj?)C)CDDD<	ty!344q899:;m  D \!Q!335;GGGFYtV$$F_,,\A49Q);%;<<q@AABC-
 
 

 "2D D
 
 |Q!4441EK
 
 
 FEIj,$G$GHaPPPM    c                   F     e Zd Z	 	 d
dededededz  def
 fdZd	 Z xZS )BaiChuanMLPN hidden_sizeintermediate_size
hidden_actquant_configprefixc                    t                                                       t          ||gdz  d|| d          | _        t	          ||d|| d          | _        |dk    rt          d| d          t                      | _        d S )	Nr'   Fz.gate_up_projbiasrI   rJ   z
.down_projsiluzUnsupported activation: z!. Only silu is supported for now.)	super__init__r   gate_up_projr   	down_proj
ValueErrorr   act_fn)selfrF   rG   rH   rI   rJ   	__class__s         r@   rP   zBaiChuanMLP.__init___   s     	6!#%+++
 
 
 +%(((
 
 
 X:XXX   !llrB   c                     |                      |          \  }}|                     |          }|                     |          \  }}|S N)rQ   rT   rR   )rU   xgate_up_s       r@   forwardzBaiChuanMLP.forward|   sD    &&q))
KK  ~~a  1rB   )NrE   )	__name__
__module____qualname__intstrr   rP   r\   __classcell__rV   s   @r@   rD   rD   ^   s         37# ## # 	#
 )4/# # # # # # #:      rB   rD   c                        e Zd ZdZ	 	 	 	 ddedededed	ed
edz  dedz  def fdZ	de
j        de
j        de
j        fdZ xZS )BaiChuanAttentionz=Multi-headed attention from 'Attention Is All You Need' paper    NrE   rF   	num_headsposition_embeddingrope_parametersmax_position_embeddingscache_configrI   rJ   c	           
         t                                                       || _        t                      }	|| _        | j        |	z  dk    sJ | j        |	z  | _        || j        z  | _        || _        || _        t          || j        | j        | j        d|| d          | _
        t          | j        | j        z  |d|| d          | _        | j        dk    rt                      }
|
| j        z  }|
dz   | j        z  }t          | j                  }|||                                         }| j        dz  }t!          | j        | j        |||| d	
          | _        d S t%          | j        | j        |          | _        | j        dz  | _        t!          | j        | j        | j        ||| d	          | _        d S )Nr   Fz.W_packrL   z.o_projALIBIr   g      z.attn)alibi_slopesrI   rJ   )max_positionri   )rk   rI   rJ   )rO   rP   rF   r   r$   rg   head_dimrh   rj   r   W_packr   o_projr   rA   tolistr   attnr   
rotary_embscaling)rU   rF   rg   rh   ri   rj   rk   rI   rJ    tensor_model_parallel_world_sizetp_rank
head_starthead_endrn   rv   rV   s                  r@   rP   zBaiChuanAttention.__init__   s    	&+O+Q+Q((#&FF!KKKK-1QQ#t';;"4'>$ (M  %%%%
 
 
 ( 4=0%%%%
 
 
 "g--466G 4>1J!t~5H,T-ABBL'
8(;<CCEELmT)G!)) '''  DIII '!9 /  DO
  =$.DL!)) '''  DIIIrB   	positionshidden_statesr%   c                    |                      |          \  }}|                    dd          \  }}}| j        dk    r|                     |||          \  }}|                     |||          }|                     |          \  }	}|	S )Nr(   )chunksr-   rm   )rq   chunkrh   ru   rt   rr   )
rU   r{   r|   qkvr[   qkvattn_outputoutputs
             r@   r\   zBaiChuanAttention.forward   s    
 ]++Q))1")--1a"g--??9a33DAqii1a((KK,,	rB   )rf   NNrE   )r]   r^   r_   __doc__r`   ra   dictr	   r   rP   r1   Tensorr\   rb   rc   s   @r@   re   re      s        GG (,+/26E EE E  	E
 E "%E "D(E )4/E E E E E E EN< | 
	       rB   re   c                        e Zd Z	 	 	 ddedededz  dedz  def
 fdZd	ej	        d
ej	        dej	        dz  de
ej	        ej	        f         fdZ xZS )BaiChuanDecoderLayerNrE   configrh   rk   rI   rJ   c                    t                                                       |j        | _        t          |dd          }t	          | j        |j        |t          |dd           |||| d          | _        t          | j        |j        |j	        || d          | _
        t          |j        |j                  | _        t          |j        |j                  | _        d S )	Nrj   rf   ri   z
.self_attn)rF   rg   rh   ri   rj   rk   rI   rJ   z.mlp)rF   rG   rH   rI   rJ   eps)rO   rP   rF   getattrre   num_attention_heads	self_attnrD   rG   rH   mlpr   rms_norm_epsinput_layernormpost_attention_layernorm)rU   r   rh   rk   rI   rJ   rj   rV   s          r@   rP   zBaiChuanDecoderLayer.__init__   s     	!-")&2KT"R"R*(01#F,=tDD$;%%(((	
 	
 	
 ($6(%???
 
 
  'v'9v?RSSS(/F$7)
 )
 )
%%%rB   r{   r|   residualr%   c                     ||}|                      |          }n|                      ||          \  }}|                     ||          }|                     ||          \  }}|                     |          }||fS )N)r{   r|   )r   r   r   r   )rU   r{   r|   r   s       r@   r\   zBaiChuanDecoderLayer.forward   s     $H 00??MM&*&:&:=(&S&S#M8' ' 
 
 #'"?"?x"X"Xx//h&&rB   )NNrE   )r]   r^   r_   r   ra   r	   r   rP   r1   r   tupler\   rb   rc   s   @r@   r   r      s        
 ,026
 
 
  
 "D(	

 )4/
 
 
 
 
 
 
B'<' |' ,%	'
 
u|U\)	*' ' ' ' ' ' ' 'rB   r   c                        e Zd Z	 	 ddedededdf fdZd	ej        dej        fd
Z	 dd	ej        dej        de	dz  dej        dz  dej        e	z  f
dZ
deeeej        f                  dee         fdZ xZS )BaiChuanModelrE   ROPEvllm_configrJ   rh   r%   Nc                    t                                                       |j        j        |j        |j        | _        j        | _        t          j        j	                  | _
        t          j        fd| d          \  | _        | _        | _        t!          j	        j                  | _        t'          ddgj	                  | _        d S )Nc                 ,    t          |           S )NrJ   )r   )rJ   rk   r   rh   rI   s    r@   <lambda>z(BaiChuanModel.__init__.<locals>.<lambda>+  s#    /*L,v   rB   z.layersr   r   r|   r   )rO   rP   model_config	hf_configrk   rI   r   
vocab_sizer   rF   embed_tokensr"   num_hidden_layersstart_layer	end_layerlayersr   r   normr!   make_empty_intermediate_tensors)rU   r   rJ   rh   rk   r   rI   rV   s      `@@@r@   rP   zBaiChuanModel.__init__  s    	)3"/"/ +2
 
 9D$       %%%9
 9
 9
5$.$+ F.F4GHHH	/Vj)6+=0
 0
,,,rB   	input_idsc                 ,    |                      |          S rX   )r   rU   r   s     r@   embed_input_idszBaiChuanModel.embed_input_ids5  s      +++rB   r{   intermediate_tensorsinputs_embedsc                 p   t                      j        r||}n|                     |          }d }n|J |d         }|d         }t          | j        | j        | j                  D ]} ||||          \  }}t                      j        st          ||d          S | 	                    ||          \  }}|S )Nr|   r   )r|   r   )
r   is_first_rankr   r   r   r   r   is_last_rankr   r   )	rU   r   r{   r   r   r|   r   layerr[   s	            r@   r\   zBaiChuanModel.forward8  s     >>' 		8( - $ 4 4Y ? ?HH'3330AM+J7HDK)94>JJ 	 	E&+e' '#M88
 ~~* 	&%2 (     99]H==qrB   weightsc                 2   ddg}t          |                                           }t                      }|D ]\  }}d|v r
|D ]i\  }}}	||vr|                    ||          }|                    d          r||vr;t          ||           rL||         }
|
j        } ||
||	            nU|                    d          r||vrt          ||           r||         }
t          |
dt                    } ||
|           |	                    |           |S )N)rQ   	gate_projr   )rQ   up_projr   zrotary_emb.inv_freqz.biasweight_loader)
r   named_parameterssetreplaceendswithr    r   r   r   add)rU   r   stacked_params_mappingparams_dictloaded_paramsnameloaded_weight
param_nameweight_nameshard_idparamr   s               r@   load_weightszBaiChuanModel.load_weightsY  ss    -*"

 4002233"%%%#* 	$ 	$D-$,,5K 4 41
Kd**||K<<==)) d+.E.E*466 #D) % 3e]H=== ==)) d+.E.E*466 #D) '@U V Ve]333d####rB   )rE   r   rX   )r]   r^   r_   r
   ra   rP   r1   r   r   r   r\   r   r   r   r   rb   rc   s   @r@   r   r     s4       
 "(	
 

 
  	

 

 
 
 
 
 
>, ,%, , , , , .2 < < 2D8	
 |d* 
+	+   B#HU33D-E$F #3s8 # # # # # # # #rB   r   c                   T    e Zd ZdgddgdZdddded	ed
ef fdZdej        dej        fdZ		 	 ddej        dej        de
dz  dej        dz  dej        e
z  f
dZdej        dej        dz  fdZdeeeej        f                  dee         fdZdej        dej        fdZ xZS )BaiChuanBaseForCausalLMrq   r   r   )rq   rQ   rE   r   )rJ   rh   r   rJ   rh   c          	         t                                                       |j        j        }|j        }|| _        t                      | _        || _        t          |||          | _	        t          |j        |j        |t          |d                    | _        | j        | j        j        _        | j        j        r| j	        j        j        | j        _        t)          |j                  | _        | j	        j        | _        d S )Nr   rJ   rh   lm_head)rI   rJ   )rO   rP   r   r   rI   r   r   tp_sizer   modelr   r   rF   r#   r   lm_head_weight_loaderweightr   tie_word_embeddingsr   r   logits_processorr   )rU   r   rJ   rh   r   rI   rV   s         r@   rP   z BaiChuanBaseForCausalLM.__init__  s     	)3"/;==("#1
 
 


 &%	22	
 
 
 -1,F);* 	A"&*"9"@DL /0A B BJ6 	,,,rB   r   r%   c                 6    | j                             |          S rX   )r   r   r   s     r@   r   z'BaiChuanBaseForCausalLM.embed_input_ids  s    z)))444rB   Nr{   r   r   c                 6    |                      ||||          }|S rX   )r   )rU   r   r{   r   r   r|   s         r@   r\   zBaiChuanBaseForCausalLM.forward  s)     

y"6
 
 rB   r|   c                 <    |                      | j        |          }|S rX   )r   r   )rU   r|   logitss      r@   compute_logitsz&BaiChuanBaseForCausalLM.compute_logits  s      &&t|]CCrB   r   c                 J    t          |           }|                    |          S rX   )r   r   )rU   r   loaders      r@   r   z$BaiChuanBaseForCausalLM.load_weights  s#    "4((""7+++rB   r   r   c                     | j         j        dk    }|r$t          j        j                            |          }| j        dk    rt          ||           d S t          ||           d S )Ni  r   )	r   r   r1   r   
functional	normalizer   r   r   )rU   r   r   is_baichuan2s       r@   r   z-BaiChuanBaseForCausalLM.lm_head_weight_loader  sm     {-7 	I!H/99-HHM<!&um<<<<<!%77777rB   )NN)r]   r^   r_   packed_modules_mappingr
   ra   rP   r1   r   r   r   r\   r   r   r   r   r   r   	Parameterr   rb   rc   s   @r@   r   r     s       *
  "( 
  
  
   
 	 

   
  
  
  
  
  
D5 5%, 5 5 5 5 <@-1
 
<
 <
 2D8	

 |d*
 
+	+
 
 
 
| 
	   ,HU33D-E$F ,3s8 , , , ,82< 8 8 8 8 8 8 8 8 8rB   r   c                   2     e Zd ZdZdddedef fdZ xZS )BaichuanForCausalLMzVBaichuan 13B and Baichuan2 7B/13B.
    NOTE: the class name has a lower case 'c'.
    rE   r   r   rJ   c                    |j         j        }|j        dk    r&t                                          ||d           d S t                                          ||d           d S )Ni   r   r   rm   )r   r   rF   rO   rP   )rU   r   rJ   r   rV   s       r@   rP   zBaichuanForCausalLM.__init__  s    )3%%GG'6       GG'7      rB   r]   r^   r_   r   r
   ra   rP   rb   rc   s   @r@   r   r     sf          BD 	 	 	z 	3 	 	 	 	 	 	 	 	 	 	rB   r   c                   2     e Zd ZdZdddedef fdZ xZS )BaiChuanForCausalLMzABaichuan 7B.
    NOTE: the class name has an upper case 'C'.
    rE   r   r   rJ   c                P    t                                          ||d           d S )Nr   r   )rO   rP   )rU   r   rJ   rV   s      r@   rP   zBaiChuanForCausalLM.__init__  s5    #Fv 	 	
 	
 	
 	
 	
rB   r   rc   s   @r@   r   r     sf          BD 
 
 
z 
3 
 
 
 
 
 
 
 
 
 
rB   r   )@r   r.   collections.abcr   	itertoolsr   r1   r   transformersr   vllm.attention.layerr   vllm.compilation.decoratorsr   vllm.configr	   r
   vllm.distributedr   r   r   %vllm.model_executor.layers.activationr   $vllm.model_executor.layers.layernormr   !vllm.model_executor.layers.linearr   r   r   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr   +vllm.model_executor.layers.rotary_embeddingr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr   r   vllm.sequencer   
interfacesr   r   r   utilsr   r    r!   r"   r#   r`   r   rA   ModulerD   re   r   r   r   r   r    rB   r@   <module>r      s  , I H  $ $ $ $ $ $              ) ) ) ) ) ) * * * * * * = = = = = = / / / / / / / /         
 = < < < < < 8 8 8 8 8 8         
 H G G G G G F F F F F F @ @ @ @ @ @               . - - - - - ? ? ? ? ? ? ? ? ? ?             s u|    0" " " " "") " " "JU U U U U	 U U Up6' 6' 6' 6' 6'29 6' 6' 6'r g g g g gBI g g gTR8 R8 R8 R8 R8biz= R8 R8 R8j    1   "
 
 
 
 
1 
 
 
 
 
rB   