
    .`iW/                        d dl Z d dlmZ d dlmZ d dlZd dlmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZmZ d dlmZmZmZ d d	lmZ d d
lmZmZmZ d dlmZ d dlmZ d dlm Z  d dl!m"Z" d dl#m$Z$ ddl%m&Z& ddl'm(Z(m)Z)m*Z*m+Z+m,Z, de-de-dej.        fdZ/ G d dej0                  Z1 G d dej0                  Z2 G d dej0                  Z3e G d dej0                              Z4 G d d ej0        e&          Z5dS )!    N)Iterable)islice)	MptConfig)	Attention)support_torch_compile)CacheConfig
VllmConfig)get_pp_groupget_tensor_model_parallel_rank$get_tensor_model_parallel_world_size)
get_act_fn)ColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)VocabParallelEmbedding)default_weight_loader)IntermediateTensors   )
SupportsPP)AutoWeightsLoaderis_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefixtotal_num_headsalibi_bias_maxreturnc                 l   dt          j        t          j        |                     z  }t          j        d|dz   t          j                  }|                    ||z            }dt          j        d|          z  }|| k    r0t          j        |dd d         |d d d         g          d |          }|S )N   r   )dtype      ?)	mathceillog2torcharangefloat32mulpowconcat)r   r   next_power_of_2mslopess        r/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/mpt.py_get_alibi_slopesr1   +   s     49TY%?%?@@@OQ!+5=AAAA	n.//A59Q??"F/))vadd|VCCaC[9::;KO;KLM    c            	       v     e Zd Z	 	 	 ddededz  dedz  def fdZdej	        d	ej	        d
ej	        fdZ
 xZS )MPTAttentionN configcache_configquant_configprefixc                 t   t                                                       |j        | _        |j        | _        | j        | j        z  | _        |j        j        | _        |j        j        | _        |j        j	        | _	        d|j        v r|j        j
        | _        n| j        | _        |j        j        rJ |j        j        sJ t          | j        | j        | j        z  | j        | j        |j         || d          | _        | j        r<t#          j        | j                  | _        t#          j        | j                  | _        t+          | j        | j        |j         || d          | _        t/                      }| j        |z  dk    sJ | j        |z  | _        | j        |k    r| j        |z  dk    sJ n|| j        z  dk    sJ t3          d| j        |z            | _        | j        | j        z  | _        | j        | j        z  | _        t;                      }|| j        z  }|dz   | j        z  }t=          | j        | j	                  }	|	||                                         }	| j        | j        z  | _        | j        dz  }
tA          | j        | j        |
|	| j        ||| d	          | _!        d S )
N
kv_n_headsz.Wqkvbiasr8   r9   z	.out_projr   r   g      .attn)alibi_slopesnum_kv_headsr7   r8   r9   )"super__init__d_modeln_headsr   head_dimattn_configclip_qkvqk_lnr   r;   total_num_kv_heads	prefix_lmalibir   no_biasWqkvnn	LayerNormq_lnk_lnr   out_projr   	num_headsmaxr@   q_sizekv_sizer   r1   tolistr   attn)selfr6   r7   r8   r9   tp_world_sizetp_rank
head_starthead_endr?   scaling	__class__s              r0   rB   zMPTAttention.__init__9   s    	~%~(<<*3'-
$0?6---&,&8&CD##&*&:D#%////!'''' &LLD00 #^#%###
 
 
	 : 	3T\22DIT\22DI)LL^#%'''
 
 
 =>>#m3q8888->"m33 *]:a????? !4#::a????4#:m#KLLnt}4(4=8022t~-
aK4>1()=t?RSS#Jx$78??AA(<<-%NM%*%%###	
 	
 	
			r2   position_idshidden_statesr   c                    ~|                      |          \  }}| j        "|                    | j         | j                   |                    | j        | j        | j        gd          \  }}}| j        r*|                     |          }|                     |          }| 	                    |||          }| 
                    |          \  }	}|	S )N)minrT   )dim)rM   rG   clamp_splitrU   rV   rH   rP   rQ   rX   rR   )
rY   r`   ra   qkv_qkvattn_outputoutputs
             r0   forwardzMPTAttention.forward   s    
 =))Q=$JJDM>t}J===))T[$,E2)NN1a: 			!A		!Aii1a((MM+..	r2   NNr5   __name__
__module____qualname__r   r   r   strrB   r'   Tensorro   __classcell__r_   s   @r0   r4   r4   8   s         ,026K
 K
K
 "D(K
 )4/	K

 K
 K
 K
 K
 K
 K
Zl | 
	       r2   r4   c                   \     e Zd Z	 	 d
dededz  def fdZdej        dej        fd	Z	 xZ
S )MPTMLPNr5   r6   r8   r9   c                    t                                                       |j        }|j        }||z  }t	          |||j         || d          | _        t          d          | _        t          |||j         || d          | _
        d S )Nz.up_projr<   geluz
.down_proj)rA   rB   rC   expansion_ratior   rL   up_projr   actr   	down_proj)rY   r6   r8   r9   hidden_sizer}   intermediate_sizer_   s          r0   rB   zMPTMLP.__init__   s     	n 0+k9+^#%&&&
 
 
 f%%*^#%(((
 
 
r2   xr   c                     |                      |          \  }}|                     |          }|                     |          \  }}|S N)r~   r   r   )rY   r   ri   s      r0   ro   zMPTMLP.forward   s>    ||A1HHQKK~~a  1r2   )Nr5   )rr   rs   rt   r   r   ru   rB   r'   rv   ro   rw   rx   s   @r0   rz   rz      s         37	
 

 )4/
 	
 
 
 
 
 
4 %,        r2   rz   c            	       v     e Zd Z	 	 	 ddededz  dedz  def fdZdej	        d	ej	        d
ej	        fdZ
 xZS )MPTBlockNr5   r6   r7   r8   r9   c                 $   t                                                       |j        }t          j        |          | _        t          |||| d          | _        t          j        |          | _        t          ||| d          | _
        d S )Nr>   r9   z.ffn)rA   rB   rC   rN   rO   norm_1r4   rX   norm_2rz   ffn)rY   r6   r7   r8   r9   r   r_   s         r0   rB   zMPTBlock.__init__   s     	nl;// L,&7G7G7G
 
 
	 l;//&,&GGGr2   r`   ra   r   c                     |                      |          }|                     ||          }||z   }|                     |          }|                     |          }||z   }|S )N)r`   ra   )r   rX   r   r   )rY   r`   ra   r   s       r0   ro   zMPTBlock.forward   so    
 KK&&II%  
 
 &)KK&&HHQKK%)r2   rp   rq   rx   s   @r0   r   r      s         ,026H HH "D(H )4/	H
 H H H H H H l | 
	       r2   r   c                        e Zd Zdddedef fdZdej        dej        fdZ	 ddej        d
ej        de	d	z  dej        d	z  dej        e	z  f
dZ
deeeej        f                  dee         fdZ xZS )MPTModelr5   r   vllm_configr9   c                   t                                                       |j        j        |j        |j        j        dk    sJ j        dk    sJ t          j	        j
                  | _        t          j        fd| d          \  | _        | _        | _        t#          j        j
                  | _        j        r\|                                 D ]G}t-          |d          r5t/          |j        t"          j                  r|                    dd            Ht7          dgj
                  | _        d S )Nr#   low_precision_layernormc                 *    t          |           S )Nr   )r   )r9   r7   r6   r8   s    r0   <lambda>z#MPTModel.__init__.<locals>.<lambda>   s    8FL,vVVV r2   z.blocksr   r=   ra   )rA   rB   model_config	hf_configr7   r8   embedding_fraction	norm_typer   
vocab_sizerC   wter   n_layersstart_layer	end_layerblocksrN   rO   norm_frL   moduleshasattr
isinstancer=   	Parameterregister_parameterr   make_empty_intermediate_tensors)rY   r   r9   moduler7   r6   r8   r_   s       @@@r0   rB   zMPTModel.__init__   sZ   )3"/"/(C////#<<<<<)N
 
 9DOVVVVVV%%%9
 9
 9
5$.$+
 l6>22> 	<,,.. < <66** <z&+r|/T/T <--fd;;;/Vv~0
 0
,,,r2   	input_idsr   c                 ,    |                      |          S r   )r   rY   r   s     r0   embed_input_idszMPTModel.embed_input_ids   s    xx	"""r2   Nr`   intermediate_tensorsinputs_embedsc                 J   t                      j        r||}n"|                     |          }n|J |d         }t          | j        | j        | j                  D ]} |||          }t                      j        st          d|i          S | 	                    |          }|S )Nra   )
r
   is_first_rankr   r   r   r   r   is_last_rankr   r   )rY   r   r`   r   r   ra   blocks          r0   ro   zMPTModel.forward   s     >>' 	B( - $ 4 4Y ? ?'3330AMDK)94>JJ 	? 	?E!E,>>MM~~* 	I&'GHHHM22r2   weightsc                 L   t          |                     d                    }t                      }|D ]o\  }}|                    d          r||vrt	          ||           r0||         }t          |dt                    } |||           |                    |           p|S )NF)remove_duplicatez.biasweight_loader)dictnamed_parameterssetendswithr   getattrr   add)rY   r   params_dictloaded_paramsnameloaded_weightparamr   s           r0   load_weightszMPTModel.load_weights  s    400%0HHII"%%%#* 		$ 		$D-}}W%% $k*A*A&tT22 %E#E?<QRRMM%///d####r2   r   )rr   rs   rt   r	   ru   rB   r'   rv   r   r   ro   r   tupler   r   rw   rx   s   @r0   r   r      s       AC 
 
 
z 
3 
 
 
 
 
 
:# #%, # # # # .2 < l 2D8	
 |d* 
+	+   .HU33D-E$F 3s8        r2   r   c                       e Zd Zdddedef fdZdej        dej        fdZ	 	 ddej        d
ej        de	d	z  dej        d	z  dej        e	z  f
dZ
dej        dej        d	z  fdZdeeeej        f                  dee         fdZ xZS )MPTForCausalLMr5   r   r   r9   c                Z   t                                                       |j        j        }|j        }|| _        |j        sJ || _        t          |t          |d                    | _	        | j	        j
        | _        t          |j                  | _        | j	        j        | _        d S )Ntransformer)r   r9   )rA   rB   r   r   r8   r6   tie_word_embeddingsr   r   r   r   lm_headr   r   logits_processorr   )rY   r   r9   r6   r8   r_   s        r0   rB   zMPTForCausalLM.__init__&  s    )3"/))))(##L,O,O
 
 
 '+ /0A B B< 	,,,r2   r   r   c                 6    | j                             |          S r   )r   r   r   s     r0   r   zMPTForCausalLM.embed_input_ids7  s    //	:::r2   N	positionsr   r   c                 6    |                      ||||          }|S r   )r   )rY   r   r   r   r   ra   s         r0   ro   zMPTForCausalLM.forward:  s+     ((y"6
 
 r2   ra   c                 <    |                      | j        |          }|S r   )r   r   )rY   ra   logitss      r0   compute_logitszMPTForCausalLM.compute_logitsF  s      &&t|]CCr2   r   c                 J    t          |           }|                    |          S r   )r   r   )rY   r   loaders      r0   r   zMPTForCausalLM.load_weightsM  s#    "4((""7+++r2   )NN)rr   rs   rt   r	   ru   rB   r'   rv   r   r   ro   r   r   r   r   r   rw   rx   s   @r0   r   r   %  sN       AC 
 
 
z 
3 
 
 
 
 
 
"; ;%, ; ; ; ; <@-1
 
<
 <
 2D8	

 |d*
 
+	+
 
 
 
| 
	   ,HU33D-E$F ,3s8 , , , , , , , ,r2   r   )6r$   collections.abcr   	itertoolsr   r'   torch.nnrN   transformersr   vllm.attention.layerr   vllm.compilation.decoratorsr   vllm.configr   r	   vllm.distributedr
   r   r   %vllm.model_executor.layers.activationr   !vllm.model_executor.layers.linearr   r   r   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr   3vllm.model_executor.layers.vocab_parallel_embeddingr   -vllm.model_executor.model_loader.weight_utilsr   vllm.sequencer   
interfacesr   utilsr   r   r   r   r   intrv   r1   Moduler4   rz   r   r   r    r2   r0   <module>r      s  
  $ $ $ $ $ $              " " " " " " * * * * * * = = = = = = / / / / / / / /         
 = < < < < <         
 H G G G G G F F F F F F V V V V V V O O O O O O - - - - - - " " " " " "             


 \
 
 
 
] ] ] ] ]29 ] ] ]@    RY   D    ry   D E E E E Ery E E EP*, *, *, *, *,RY
 *, *, *, *, *,r2   