
    .`iV/                     `   d Z ddlmZ ddlZddlmZ ddlmZ ddlmZ ddl	m
Z
mZ ddlmZ dd	lmZmZmZ dd
lmZ ddlmZmZmZmZ ddlmZ ddlmZ ddlmZ  G d dej                  Z  G d de          Z! G d de          Z"e G d de                      Z# G d de          Z$dS )z-Mistral adaptation of the LLaMA architecture.    )IterableN)nn)LlamaConfig)support_torch_compile)CacheConfig
VllmConfig)
SiluAndMul)ColumnParallelLinearMergedColumnParallelLinearRowParallelLinear)QuantizationConfig)LlamaAttentionLlamaDecoderLayerLlamaForCausalLM
LlamaModel)IntermediateTensors)AttentionType   )AutoWeightsLoaderc                   h     e Zd Z	 	 	 	 	 	 ddededededz  d	ed
edz  dedededdf fdZd Z xZ	S )
MistralMLPNF Thidden_sizeintermediate_size
hidden_actquant_configbiasgate_up_proj_biasprefixreduce_results
disable_tpreturnc
           
      *   t                                                       ||n|}t          ||gdz  |||	| d          | _        t	          ||||||	| d          | _        |dk    rt          d| d          t                      | _        d S )	N   z.gate_up_proj)
input_sizeoutput_sizesr   r   r!   r   z
.down_proj)r%   output_sizer   r   r    r!   r   siluzUnsupported activation: z!. Only silu is supported for now.)	super__init__r   gate_up_projr   	down_proj
ValueErrorr	   act_fn)selfr   r   r   r   r   r   r   r    r!   	__class__s             v/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/mistral.pyr*   zMistralMLP.__init__!   s     	$5$=DDCT6"+,q0"%!+++
 
 
 +(#%)!(((
 
 
 X:XXX   !ll    c                     |                      |          \  }}|                     |          }|                     |          \  }}|S N)r+   r.   r,   )r/   x_s      r1   forwardzMistralMLP.forwardF   sB      ##1KKNN~~a  1r2   )NFNr   TF)
__name__
__module____qualname__intstrr   boolr*   r7   __classcell__r0   s   @r1   r   r       s         37)-# ## #### ## 	##
 )4/## ##  $;## ## ## ## 
## ## ## ## ## ##J      r2   r   c                        e Zd Zddddddej        fdedededed	ed
edz  dedede	dz  de
de
ddf fdZdej        dej        fdZdej        dej        dej        fdZ xZS )MistralAttentioni    NFr   configr   	num_headsnum_kv_headsmax_position_embeddingsr   r   bias_o_projcache_configr   	attn_typer"   c                     t                                          |||||||||	|
|           t          |dd           }|d u| _        | j        r |J |d         | _        |d         | _        d S d S )N)rB   r   rC   rD   rE   r   r   rF   rG   r   rH   llama_4_scaling original_max_position_embeddingsbeta)r)   r*   getattrdo_llama_4_scaling0llama_4_scaling_original_max_position_embeddingsllama_4_scaling_beta)r/   rB   r   rC   rD   rE   r   r   rF   rG   r   rH   llama_4_scaling_configr0   s                r1   r*   zMistralAttention.__init__N   s     	#%$;%#% 	 	
 	
 	
 GN%tG
 G
 #9"D" 	G)555&'IJ A )?v(FD%%%	G 	Gr2   	positionsc           	          d| j         t          j        dt          j        || j        z            z             z  z   }|                    d          S )Nr   )rP   torchlogfloorrO   	unsqueeze)r/   rR   scalings      r1   _get_llama_4_attn_scalez(MistralAttention._get_llama_4_attn_scaleu   s\    d/%)kDQQ 3
 3
 
 
   $$$r2   hidden_statesc                    |                      |          \  }}|                    | j        | j        | j        gd          \  }}}|                     |||          \  }}| j        r2|                     |          }||z                      |j                  }| 	                    |||          }	| 
                    |	          \  }
}|
S )NrT   )dim)qkv_projsplitq_sizekv_size
rotary_embrN   rZ   todtypeattno_proj)r/   rR   r[   qkvr6   qkv
attn_scaleattn_outputoutputs              r1   r7   zMistralAttention.forward   s    
 }--Q))T[$,E2)NN1ay!Q//1" 	-55i@@JZ##AG,,Aii1a((KK,,	r2   )r8   r9   r:   r   DECODERr   r;   r   r=   r   r<   r*   rU   TensorrZ   r7   r>   r?   s   @r1   rA   rA   M   sL        (,26!+/&.%G %G%G %G 	%G
 %G "%%G )4/%G %G %G "D(%G %G %G 
%G %G %G %G %G %GN	% 	%%, 	% 	% 	% 	%< | 
	       r2   rA   c                        e Zd Z	 	 ddedededz  ddf fdZ	 ddej        d	ej        d
ej        dz  dej        dz  de	ej        ej        f         f
dZ
 xZS )MistralDecoderLayerr   Nvllm_configr   rB   r"   c           
         t                                          |||t                     t          |                    d          d                   | _        |                     |          }|p|j        j        }t          |dd          o|j
        j                            d          }|r,| j        j        | j        _        | j        j        | j        _        t          |dd          rdt)          j        t-          |j        |j        dd	          t)          j                    t5          |j        |j        dd	                    | _        d S d | _        d S )
N)rr   r   rB   attn_layer_type.)seprT   "enable_quantization_scaling_fusionFfp8ada_rms_norm_t_cond)r%   r'   r   return_bias)r)   r*   rA   r;   r_   	layer_idxget_quant_configmodel_config	hf_configrM   rG   cache_dtype
startswith	self_attnr^   input_layernormquant_scaling_frommlpr+   post_attention_layernormr   
Sequentialr
   r   ada_rms_norm_t_cond_dimGELUr   ry   )r/   rr   r   rB   r   	do_fusionr0   s         r1   r*   zMistralDecoderLayer.__init__   sm    	#,	 	 	
 	
 	
 V\\c\222677,,[99=;3=>
 
 E&2==eDD 	  	U6:n6MD 3?Cx?TD)<60%88 	,')}$%1 & > %	   		!%= & 2 %	  ( (D$$$  (,D$$$r2   rR   r[   residualt_condc                 F   ||}|                      |          }n|                      ||          \  }}|                     ||          }|                     ||          \  }}| j        |J |d|                     |          z   z  }|                     |          }||fS )N)rR   r[   r   )r   r   r   ry   r   )r/   rR   r[   r   r   s        r1   r7   zMistralDecoderLayer.forward   s     $H 00??MM&*&:&:=(&S&S#M8-XX #'"?"?x"X"Xx#/%%%)Q1I1I&1Q1Q-QRM//h&&r2   )r   Nr4   )r8   r9   r:   r   r<   r   r*   rU   ro   tupler7   r>   r?   s   @r1   rq   rq      s         %)	), ),), ), d"	),
 
), ), ), ), ), ),` '+' '<' |' ,%	'
 t#' 
u|U\)	*' ' ' ' ' ' ' 'r2   rq   c                        e Zd Zdeddededeej                 f fdZ		 	 dde
j        dz  d	e
j        d
edz  de
j        dz  de
j        dz  de
j        ez  ee
j        ee
j                 f         z  f fdZ xZS )MistralModelr   r   
layer_typerr   r   r   c                P    t                                          |||           d S N)rr   r   r   r)   r*   r/   rr   r   r   r0   s       r1   r*   zMistralModel.__init__   *     	[JWWWWWr2   N	input_idsrR   intermediate_tensorsinputs_embedsr   r"   c                 P    t                                          |||||          S )N)r   )r)   r7   )r/   r   rR   r   r   r   r0   s         r1   r7   zMistralModel.forward   s0     wwy"6f  
 
 	
r2   )NN)r8   r9   r:   rq   r   r<   typer   Moduler*   rU   ro   r   r   listr7   r>   r?   s   @r1   r   r      s        &9X X X  X 	X
 OX X X X X X .2&*

 

<$&

 <

 2D8	


 |d*

 t#

 
+	+eEL$u|BT4T.U	U

 

 

 

 

 

 

 

 

 

r2   r   c                       e Zd ZU i Zeeef         ed<   i dddddddd	d
dddddddddddddddddddddd d!d"d#d$d%d&d'd(Zd)ed*d+e	d,ed-e
ej                 f fd.Zd)efd+e	d,ed-e
ej                 fd/Zd0eeeej        f                  d1ee         fd2Zd3ed4ej        d1eeej        f         fd5Z xZS )6MistralForCausalLMembedding_moduleslayerszmodel.layers	attentionr   
qscale_actinput_scaleqscale_weightweight_scalezkv_fake_quantizer.qscale_actkv_scalezq_fake_quantizer.qscale_actzattn.q_scalezk_fake_quantizer.qscale_actk_scalezv_fake_quantizer.qscale_actv_scalewqq_projwkk_projwvv_projworf   attention_normr   feed_forwardr   w1	gate_projw2r,   w3up_projr   zmodel.embed_tokenslm_headz
model.norm)ffn_normtok_embeddingsrm   normr   r   rr   r   r   c                P    t                                          |||           d S r   r   r   s       r1   r*   zMistralForCausalLM.__init__  r   r2   c                 &    t          |||          S r   )r   )r/   rr   r   r   s       r1   _init_modelzMistralForCausalLM._init_model  s"     #Fz
 
 
 	
r2   weightsr"   c                      t            j        j        rdgnd           }|                     fd|D                       S )Nzlm_head.)skip_prefixesc              3   J   K   | ]\  }}                     ||          V  d S r4   )maybe_remap_mistral).0nameloaded_weightr/   s      r1   	<genexpr>z2MistralForCausalLM.load_weights.<locals>.<genexpr>$  sK       #
 #
#m $$T=99#
 #
 #
 #
 #
 #
r2   )r   rB   tie_word_embeddingsload_weights)r/   r   loaders   `  r1   r   zMistralForCausalLM.load_weights  sn    "+/;+JTJ<<PT
 
 
 "" #
 #
 #
 #
'.#
 #
 #
 
 
 	
r2   r   r   c                 t    dt           j        dt          dt          f fd} j        }|                    d          }d|v r.|d         dk    r" || j        j         j        j                  }nd|v r<|d         d	k    r0|                                d
k    r || j        j        d
          }nqd|v r.|d         dk    r" || j        j	         j        j                  }n?d|v r;|d         d	k    r/|                                d
k    r || j        j	        d
          }t          |          }t          |          D ]v}||         }||d
z
  k     r||d
z            nd }	|	| d|	 nd }
|
|v r|                    |
||
                   }L||v r&||         |vr|                    |||                   }w||fS )Nwn_headsattn_outc                     j         j        |z  }|                     |||z  dz  d|                              dd                              ||          S )Nr$   r   )rB   head_dimview	transposereshape)r   r   r   attn_inr/   s       r1   permutez7MistralForCausalLM.maybe_remap_mistral.<locals>.permute.  sT    k*W4G w7 2a 7HEE1a(++r2   ru   r   rT   weightr   r   r   )rU   ro   r;   mistral_mappingr_   rB   num_key_value_headsr   numelnum_attention_headslenrangereplace)r/   r   r   r   mappingmodulesnum_modulesiitem	next_itemcombined_items   `          r1   r   z&MistralForCausalLM.maybe_remap_mistral)  s&   
	u| 	c 	S 	 	 	 	 	 	 &**S//
 7??wr{h66#Gt{>@W MM GOO..##%%))#GM4;3RTUVVMMW__!8!8#Gt{>@W MM GOO..##%%))#GM4;3RTUVVM'll{## 		9 		9A1:D*+kAo*=*=A4I5>5Jt11i111PTM''||M7=3IJJWT]$%>%>||D'$-88]""r2   )r8   r9   r:   r   dictr<   __annotations__r   rq   r   r   r   r   r*   r   r   r   rU   ro   setr   r   r>   r?   s   @r1   r   r      s>        (*tCH~***.[ 	m 		
 	'
 	&~ 	&y 	&y 	h 	h 	h 	h 	+ 	 	k  	k!" 	i#$ /.+  O8 &9X X X  X 	X
 OX X X X X X &9	
 

 
 O	
 
 
 

HU33D-E$F 
3s8 
 
 
 
5#5# |5# 
sEL 	!	5# 5# 5# 5# 5# 5# 5# 5#r2   r   )%__doc__collections.abcr   rU   r   transformersr   vllm.compilation.decoratorsr   vllm.configr   r   %vllm.model_executor.layers.activationr	   !vllm.model_executor.layers.linearr
   r   r   'vllm.model_executor.layers.quantizationr    vllm.model_executor.models.llamar   r   r   r   vllm.sequencer   vllm.v1.attention.backendr   utilsr   r   r   rA   rq   r   r    r2   r1   <module>r      sM   4 3 $ $ $ $ $ $        $ $ $ $ $ $ = = = = = = / / / / / / / / < < < < < <         
 G F F F F F            . - - - - - 3 3 3 3 3 3 $ $ $ $ $ $* * * * * * * *Z@ @ @ @ @~ @ @ @FC' C' C' C' C'+ C' C' C'L 
 
 
 
 
: 
 
 
.p# p# p# p# p#) p# p# p# p# p#r2   