
    .`iy!                     $   d Z ddlZddlmZ ddlmZ ddlmZ ddlmZm	Z	 ddl
mZ ddlmZ dd	lmZmZmZ dd
lmZ ddlmZ ddlmZmZmZ ddlmZ  G d dej                  Z G d de          Z G d de          Z G d de          Z dS )zBInference-only MiniCPM3 model compatible with HuggingFace weights.    N)nn)PretrainedConfig)	Attention)CacheConfig
VllmConfig)$get_tensor_model_parallel_world_size)RMSNorm)ColumnParallelLinearReplicatedLinearRowParallelLinear)QuantizationConfig)get_rope)MiniCPMDecoderLayerMiniCPMForCausalLMMiniCPMModel   )make_layersc                        e Zd Z	 	 	 	 ddededededed	ed
ededededz  dedz  deddf fdZde	j
        de	j
        de	j
        fdZ xZS )MiniCPM3Attention    N confighidden_size	num_headsqk_nope_head_dimqk_rope_head_dim
v_head_dimq_lora_rankkv_lora_rankmax_position_embeddingscache_configquant_configprefixreturnc           
         t                                                       || _        || _        || _        ||z   | _        || _        || _        || _        || _	        t                      }| j	        |z  dk    sJ ||z  | _        | j        dz  | _        |	| _        t          | j        | j        d|          | _        t!          | j        |j                  | _        t'          || j	        | j        z  d|| d          | _        t          | j        | j        | j        z   d|| d          | _        t!          | j        |j                  | _        t'          | j        | j	        | j        | j        z   z  d|| d	          | _        t1          | j	        | j        z  | j        d|| d
          | _        t5          | j        |	|j                  | _        t;          | j        | j        | j        | j        |
|| d          | _        d S )Nr   g      F)biasr"   epsz	.q_b_proj)r&   r"   r#   z.kv_a_proj_with_mqaz
.kv_b_projz.o_proj)max_positionrope_parametersz.attn)num_kv_headsr!   r"   r#   )super__init__r   r   r   qk_head_dimr   r   r   r   r   num_local_headsscalingr    r   q_a_projr	   rms_norm_epsq_a_layernormr
   q_b_projkv_a_proj_with_mqakv_a_layernorm	kv_b_projr   o_projr   r*   
rotary_embr   attn)selfr   r   r   r   r   r   r   r   r    r!   r"   r#   tp_size	__class__s                 w/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/minicpm3.pyr-   zMiniCPM3Attention.__init__5   sf    	& 0 0+.>>$&("688~'1,,,,(G3'-'>$(d.U
 
 
 %T%56;NOOO,NT--%'''
 
 
 #3 55%111#
 #
 #
 &d&7V=PQQQ-Nd3doEF%(((
 
 
 (NT_,%%%%
 
 
 #!0"2
 
 

  L-%%###
 
 
			    	positionshidden_statesc                    |                      |          \  }}|                     |          }|                     |          \  }}|                    d| j        | j                  }|                    | j        | j        gd          \  }}| 	                    |          \  }}|                    | j
        | j        gd          \  }}|                    d          }|                     |                                          }|                     |          \  }}|                    d| j        | j        | j        z             }|                    | j        | j        gd          \  }	}
|d d d d | j
        d f         }|                     ||                    d| j        | j        z            |                    d| j                            \  }}|                    d| j        | j                  }|                    dd| j                  }||d| j        d f<   t%          j        |          }|	|dd | j        f<   ||d| j        d f<   |                    d| j        | j        z            }|                    d| j        | j        z            }t$          j        j                            |
d| j        | j        z
  gd                              d| j        | j        z            }
|                     |||
          }|                    d| j        | j                  dd | j        f                             d| j        | j        z            }|                     |          \  }}|S )N)dimr   .r   )value)r1   r3   r4   viewr/   r.   splitr   r   r5   r   	unsqueezer6   
contiguousr7   r   r9   reshapetorch
empty_liker   
functionalpadr:   r8   )r;   r@   rA   q_q_pelatent_cachekv_akvk_nopevk_pekattn_outputoutputs                  r>   forwardzMiniCPM3Attention.forward   sQ   
 }}]++1q!!}}Q1FF2t+T-=>>''40$2GHb'QQ411-@@a$$d&79N%OUW$XXa#--a00""4??#4#455t$$AWWR-t/Dt/VWWHHd3T_E2HNN	AAAqqq$"3"5"556__LLT1D4IIJJLLT233
 

d
 yyT143HIIyyQ 566*.#t$&&
&'Q*0#&&&
&'*.#t$&&
&'IIb$.1AABBFF2t+d.>>??H##4#do56a $ 
 

$r4'$*::
;
; 	
 ii1a((!&&r4+?AQRR"4?""

'"d*T_<
=
= 	 KK,,	r?   )r   NNr   )__name__
__module____qualname__r   intr   r   strr-   rK   Tensorr[   __classcell__)r=   s   @r>   r   r   4   s%        (,+/26Q
 Q
 Q
 Q
 	Q

 Q
 Q
 Q
 Q
 Q
 "%Q
 "D(Q
 )4/Q
 Q
 
Q
 Q
 Q
 Q
 Q
 Q
f/</ |/ 
	/ / / / / / / /r?   r   c                       e Zd Zd ZdS )MiniCPM3DecoderLayerc                 T   t          | j        j        | j        j                  | _        t          | j        | j        | j        j        | j        j        | j        j        | j        j	        | j        j
        | j        j        | j        | j        | j        | j         d          | _        d S )Nr'   z
.self_attn)r   r   r   r   r   r   r   r   r    r!   r"   r#   )r	   r   r   r2   input_layernormr   num_attention_headsr   r   r   r   r   r    r!   r"   r#   	self_attn)r;   s    r>   _init_attn_blockz%MiniCPM3DecoderLayer._init_attn_block   s    &K#)A 
  
  
 +;(k5![9![9{-/1$($@**k---
 
 
r?   N)r\   r]   r^   ri    r?   r>   rd   rd      s#        
 
 
 
 
r?   rd   c            	       2    e Zd Zdedededz  dedz  fdZdS )MiniCPM3Modelr#   r   r!   Nr"   c                 p    t          j        fd| d          \  | _        | _        | _        d S )Nc                 *    t          |           S )Nr#   )rd   )r#   r!   r   r"   s    r>   <lambda>z,MiniCPM3Model._init_layers.<locals>.<lambda>   s     /l6   r?   z.layersro   )r   num_hidden_layersstart_layer	end_layerlayers)r;   r#   r   r!   r"   s     ```r>   _init_layerszMiniCPM3Model._init_layers   s`     9D$      %%%9
 9
 9
5$.$+++r?   )r\   r]   r^   r`   r   r   r   ru   rj   r?   r>   rl   rl      sX        

 !
 "D(	

 )4/
 
 
 
 
 
r?   rl   c                   0    e Zd ZdddgiZdddedefdZd	S )
MiniCPM3ForCausalLMgate_up_proj	gate_projup_projr   ro   vllm_configr#   c                $    t          ||          S )N)r{   r#   )rl   )r;   r{   r#   s      r>   _init_modelzMiniCPM3ForCausalLM._init_model   s    VDDDDr?   N)r\   r]   r^   packed_modules_mappingr   r`   r}   rj   r?   r>   rw   rw      sg        
 EG E E E* Ec E E E E E Er?   rw   )!__doc__rK   r   transformersr   vllm.attention.layerr   vllm.configr   r   vllm.distributedr   $vllm.model_executor.layers.layernormr	   !vllm.model_executor.layers.linearr
   r   r   'vllm.model_executor.layers.quantizationr   +vllm.model_executor.layers.rotary_embeddingr   "vllm.model_executor.models.minicpmr   r   r   utilsr   Moduler   rd   rl   rw   rj   r?   r>   <module>r      s  4 I H        ) ) ) ) ) ) * * * * * * / / / / / / / / A A A A A A 8 8 8 8 8 8         
 G F F F F F @ @ @ @ @ @               C C C C C	 C C CL
 
 
 
 
. 
 
 
*
 
 
 
 
L 
 
 
"	E 	E 	E 	E 	E, 	E 	E 	E 	E 	Er?   