
    .`i=                     t   d Z ddlmZ ddlmZ ddlmZ ddlZddlmZ ddl	m
Z
 ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZmZmZ ddlmZ ddlm Z  ddl!m"Z" ddl#m$Z$m%Z%m&Z& ddl'm(Z(m)Z)m*Z* ddl+m,Z,m-Z- ddl.m/Z/m0Z0m1Z1m2Z2m3Z3 ddl4m5Z5 ddl6m7Z7  G d de
          Z8de9de(fdZ: G d dej;                  Z< G d  d!ej;                  Z= G d" d#ej;                  Z> G d$ d%ej        j;                  Z?e G d& d'ej;                              Z@ G d( d)ej;        e,e-          ZAdS )*zInference-only PLaMo3 model.    )Iterable)islice)AnyN)nn)PretrainedConfig)	Attention)support_torch_compile)
VllmConfig)$get_tensor_model_parallel_world_size)get_pp_group)
SiluAndMul)RMSNorm)MergedColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)get_rope)DEFAULT_VOCAB_PADDING_SIZEParallelLMHeadVocabParallelEmbedding)LoaderFunctioncomposed_weight_loaderdefault_weight_loader)SupportsLoRA
SupportsPP)AutoWeightsLoaderextract_layer_index'make_empty_intermediate_tensors_factorymake_layersmaybe_prefix)set_weight_attrs)IntermediateTensorsc                       e Zd ZU dZeed<   eed<   eed<   eed<   eed<   eed<   eed<   eed	z           ed
<   eed<   e	ee
f         ed<   eed<   eed<   eed<   d	S )Plamo3Configplamo3
model_typehidden_sizenum_hidden_layersrms_norm_epsnum_attention_headshead_dimnum_key_value_headsNinterleaved_sliding_windowsliding_window_patternrope_parametersrope_local_thetaintermediate_size
vocab_size)__name__
__module____qualname__r'   str__annotations__intfloatlistdictr        u/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/plamo3.pyr%   r%   3   s         JMMM !%S4Z 0000#s(^###OOOOOr>   r%   offsetreturnc                 4     t          t           fd          S )Nc                     | z   S Nr=   )xr@   s    r?   <lambda>z(rms_norm_weight_loader.<locals>.<lambda>L   s    !f* r>   )r   r   r@   s   `r?   rms_norm_weight_loaderrH   I   s$    !  r>   c            	       `     e Zd Z	 	 d
dededz  deddf fdZdej        dej        fd	Z	 xZ
S )DenseMLPN configquant_configprefixrA   c                 B   t                                                       |j        | _        |j        | _        t	          | j        | j        gdz  d| d|d          | _        t                      | _        t          | j        | j        d| d|d          | _	        d S )N   Fz.gate_up_proj)biasrN   rM   return_biasz
.down_proj)
super__init__r(   r2   r   gate_up_projr   actr   	down_proj)selfrL   rM   rN   	__class__s       r?   rT   zDenseMLP.__init__Q   s     	!-!'!96#$q(+++%
 
 
 <<*"(((%
 
 
r>   hidden_statesc                     |                      |          }|                     |          }|                     |          S rD   )rU   rV   rW   )rX   rZ   hs      r?   forwardzDenseMLP.forwardl   s6    m,,HHQKK~~a   r>   )NrK   )r4   r5   r6   r%   r   r7   rT   torchTensorr]   __classcell__rY   s   @r?   rJ   rJ   P   s         37	
 

 )4/
 	

 

 
 
 
 
 
6!U\ !el ! ! ! ! ! ! ! !r>   rJ   c            
       |     e Zd Zdddededdf fdZdej        d	ej        d
ej        dz  dedej        f
dZ	 xZ
S )Plamo3AttentionMixerrK   rN   vllm_configrN   rA   Nc          
      &   t                                                       |j        j        }|j        }|j        | _        t                      }|j        | _        | j        |z  dk    sJ | j        |z  | _	        |j
        | _        | j        |k    r| j        |z  dk    sJ n|| j        z  dk    sJ t          d| j        |z            | _        |j        | _        | j	        | j        z  | _        | j        | j        z  | _        | j        dz  | _        t%          |j        | j        | j        | j        d|| d          | _        t)          | j        | j        z  |j        d|| d          | _        t-          |          }|j        |         }|dk    }	||j        v r|j        |         }
n|j        }
|	rt3          d	|j        
          }
|j        }t9          |j        d          r9t;          |j        j        t>                    rtA          ||j        j                  }tC          | j        ||
          | _"        tG          | j        |j$                  | _%        tM          | j%        j'        dtQ          d          i           tG          | j        |j$                  | _)        tM          | j)        j'        dtQ          d          i           tU          | j	        | j        | j        | j        |j+        |j,        |         | d          | _-        d S )Nr      g      Fz	.qkv_proj)rQ   rM   rN   z.o_projsliding_attentiondefault)	rope_type
rope_thetamax_model_len)max_positionr0   epsweight_loader      ?rG   z.attn)num_kv_headscache_configper_layer_sliding_windowrN   ).rS   rT   model_config	hf_configrM   r(   r   r+   total_num_heads	num_headsr-   total_num_kv_headsmaxrr   r,   q_sizekv_sizescalingr   qkv_projr   o_projr   layer_typesr0   r<   r1   max_position_embeddingshasattr
isinstancerl   r9   minr   
rotary_embr   r*   q_normr"   weightrH   k_normr   rs   r.   attn)rX   re   rN   kwargsrL   rM   tp_size	layer_idx
layer_type
is_slidingr0   rm   rY   s               r?   rT   zPlamo3AttentionMixer.__init__s   s`   )3"/!-688%9#g-2222-8"("<"g-- *W499999 T4499994#:g#EFFnt}4(4=8}d*)M #%'''
 
 
 ( 4=0%%%%
 
 
 (//	'	2
#66
 ///$4Z@OO %4O "&'F4K# # # 5;+_== 	U*$2CC
 C
 	U |[-E-STTL"M%+
 
 

 dm1DEEEK2HPS2T2T2T U	
 	
 	
 dm1DEEEK2HPS2T2T2T U	
 	
 	
 NML*$1%+%Fy%Q###
 
 
			r>   	positionsrZ   residualr   c                    |                      |          \  }}|                    | j        | j        | j        gd          \  }}}	|j        }
|                    |
d d         |
d         | j        z  | j        fz             }| j                            |                              |
          }|j        }|                    |d d         |d         | j        z  | j        fz             }| j	                            |                              |          }| 
                    |||          \  }}|                     |||	          }|                     |          \  }}|S )N)dim)r~   splitr{   r|   shapereshaper,   r   forward_nativer   r   r   r   )rX   r   rZ   r   r   qkv_qkvq_shapek_shapeattn_outputoutputs                 r?   r]   zPlamo3AttentionMixer.forward   s=    }--Q))T[$,E2)NN1a'IIgcrclgbkT]&BDM%RRSSK&&q))11'::'IIgcrclgbkT]&BDM%RRSSK&&q))11'::y!Q//1ii1a((KK,,	r>   )r4   r5   r6   r
   r7   rT   r^   r_   r   r]   r`   ra   s   @r?   rc   rc   r   s        AC V
 V
 V
z V
3 V
RV V
 V
 V
 V
 V
 V
p< | ,%	
  
       r>   rc   c                        e Zd Z	 ddedededdf fdZdej        d	ej        d
ej        dz  dede	ej        ej        dz  f         f
dZ
 xZS )Plamo3DecoderLayerrK   re   rN   r   rA   Nc                 $   t                                                       |j        j        }|j        }t          || d          | _        t          ||| d          | _        t          |j
        |j                  | _        t          | j        j        dt          d          i           t          |j
        |j                  | _        t          | j        j        dt          d	          i           t          |j
        |j                  | _        t          | j        j        dt          d          i           t          |j
        |j                  | _        t          | j        j        dt          d
          i           d S )Nz.mixerre   rN   z.mlp)rL   rM   rN   rn   rp   rq   rG   g?gWfѷ?)rS   rT   ru   rv   rM   rc   mixerrJ   mlpr   r(   r*   pre_mixer_normr"   r   rH   post_mixer_normpre_mlp_normpost_mlp_norm)rX   re   rN   r   rL   rM   rY   s         r?   rT   zPlamo3DecoderLayer.__init__   s    	)3"/)#$$$
 
 


 ___
 
 
 &f&8f>QRRR&4C@@@A	
 	
 	
  'v'9v?RSSS '4GDDDE	
 	
 	
 $F$6F<OPPP$4C@@@A	
 	
 	
 %V%7V=PQQQ%4NKKKL	
 	
 	
 	
 	
r>   r   rZ   r   c                 P   ||}|                      |          }n|                      ||          \  }}|                     |||          }|                     |          }|                     ||          \  }}|                     |          }|                     |          }||fS Nr   rZ   r   )r   r   r   r   r   r   )rX   r   rZ   r   r   s        r?   r]   zPlamo3DecoderLayer.forward  s     $H //>>MM&*&9&9-&R&R#M8

}x # 
 
 ,,];;"&"3"3M8"L"Lx//**=99h&&r>   rK   )r4   r5   r6   r
   r7   r   rT   r^   r_   tupler]   r`   ra   s   @r?   r   r      s        57"
 "
%"
/2"
CF"
	"
 "
 "
 "
 "
 "
H'<' |' ,%	'
 ' 
u|U\D00	1' ' ' ' ' ' ' 'r>   r   c                        e Zd Zddededdf fdZdej        dej        d	ej        dz  deej        ej        dz  f         fd
Z	 xZ
S )Plamo3DecoderrK   re   rN   rA   Nc                     t                                                       j        j        j        }t          |fd| d          \  | _        | _        | _        d S )Nc                 &    t          |           S )Nrd   )r   )rN   re   s    r?   rF   z(Plamo3Decoder.__init__.<locals>.<lambda>&  s    -k&III r>   .layersrd   )	rS   rT   ru   rv   r)   r    start_layer	end_layerlayers)rX   re   rN   r)   rY   s    `  r?   rT   zPlamo3Decoder.__init__   sj    '4>P8CIIII%%%9
 9
 9
5$.$+++r>   r   rZ   r   c                 r    t          | j        | j        | j                  D ]} ||||          \  }}||fS r   )r   r   r   r   )rX   r   rZ   r   layers        r?   r]   zPlamo3Decoder.forward*  sY     DK)94>JJ 	 	E&+e#+!' ' '#M88
 h&&r>   r   )r4   r5   r6   r
   r7   rT   r^   r_   r   r]   r`   ra   s   @r?   r   r     s        
 
J 
 
T 
 
 
 
 
 
'<' |' ,%	'
 
u|U\D00	1' ' ' ' ' ' ' 'r>   r   c                        e Zd Zdddedef fdZdej        dej        fdZ	 	 ddej        d
ej        de	d	z  dej        d	z  dej        f
dZ
 xZS )Plamo3ModelrK   rd   re   rN   c                   t                                                       |j        j        }|| _        |j        | _        |j        | _        |j        | _        t          | j        |j
        |j        | d          | _        t          ddg|j
                  | _        t          || d          | _        t!          |j
        |j                  | _        t'          | j        j        dt+          d	
          i           d S )Nz.embed_tokens)org_num_embeddingsrN   rZ   r   r   rd   rn   rp   rq   rG   )rS   rT   ru   rv   rL   pad_token_idpadding_idxr3   org_vocab_sizer   r(   embed_tokensr   make_empty_intermediate_tensorsr   r   r   r*   normr"   r   rH   )rX   re   rN   rL   rY   s       r?   rT   zPlamo3Model.__init__;  s   )3!. +$/2O%0+++	
 
 
 0Wj)6+=0
 0
, $K68J8J8JKKKF.F4GHHH	I4C@@@A	
 	
 	
 	
 	
r>   	input_idsrA   c                 ,    |                      |          S rD   )r   rX   r   s     r?   embed_input_idszPlamo3Model.embed_input_idsT  s      +++r>   Nr   intermediate_tensorsinputs_embedsc                 @   t                      j        r||}n|                     |          }d }n|J |d         }|d         }|                     |||          \  }}t                      j        st          ||d          S |                     ||          \  }}|S )NrZ   r   r   )rZ   r   )r   is_first_rankr   r   is_last_rankr#   r   )rX   r   r   r   r   rZ   r   r   s           r?   r]   zPlamo3Model.forwardW  s     >>' 		8( - $ 4 4Y ? ?HH'3330AM+J7H"&++}x #. #
 #
x ~~* 	&"/XFF    99]H==qr>   NN)r4   r5   r6   r
   r7   rT   r^   r_   r   r#   r]   r`   ra   s   @r?   r   r   9  s        AC 
 
 
z 
3 
 
 
 
 
 
2, ,%, , , , , <@-1 < < 2D8	
 |d* 
       r>   r   c                       e Zd ZdgdgdZdddededd	f fd
Zdej        dej        fdZ		 	 ddej        dej        de
d	z  dej        d	z  dej        f
dZdej        dej        d	z  fdZdeeeej        f                  fdZ xZS )Plamo3ForCausalLMr~   rU   )r~   rU   rK   rd   re   rN   rA   Nc                   t                                                       |j        j        | _        || _        |j        | _        |j        | _        t          |t          |d                    | _	        | j        j
        | _
        | j        j
        | _        | j
        dz   dz  dz  }t          || j        j        | j        j
        t          | d          | _        | j        j        r)| j                            | j	        j                  | _        t'          | j        | j        j
                  | _        | j	        j        | _        d S )Nmodelr         z.lm_head)r   padding_sizerN   )rS   rT   ru   rv   rL   re   scheduler_configr   r!   r   r3   unpadded_vocab_sizer   r(   r   lm_headtie_word_embeddingstie_weightsr   r   logits_processorr   )rX   re   rN   num_embeddingsrY   s       r?   rT   zPlamo3ForCausalLM.__init__z  s5   !.8&'4 + < #L,I,I
 
 

 +0#';#9 ?R/B6"<%K##{53&&&
 
 
 ;* 	M<33DJ4KLLDL /$dk&<!
 !
 J6 	,,,r>   r   c                 6    | j                             |          S rD   )r   r   r   s     r?   r   z!Plamo3ForCausalLM.embed_input_ids  s    z)))444r>   r   r   r   c                 6    |                      ||||          }|S rD   )r   )rX   r   r   r   r   rZ   s         r?   r]   zPlamo3ForCausalLM.forward  s)     

y"6
 
 r>   rZ   c                 <    |                      | j        |          }|S rD   )r   r   )rX   rZ   logitss      r?   compute_logitsz Plamo3ForCausalLM.compute_logits  s      &&t|]CCr>   weightsc                 l    t          | | j        j        rdgnd           }|                    |          S )Nzlm_head.)skip_prefixes)r   rL   r   load_weights)rX   r   loaders      r?   r   zPlamo3ForCausalLM.load_weights  sC    "+/;+JTJ<<PT
 
 
 ""7+++r>   r   )r4   r5   r6   packed_modules_mappingr
   r7   rT   r^   r_   r   r#   r]   r   r   r   r   r`   ra   s   @r?   r   r   t  sa       L'( 
 BD 
 
 
z 
3 
 
 
 
 
 
 
@5 5%, 5 5 5 5 <@-1
 
<
 <
 2D8	

 |d*
 

 
 
 
| 
	   ,HU33D-E$F , , , , , , , ,r>   r   )B__doc__collections.abcr   	itertoolsr   typingr   r^   r   transformersr   vllm.attention.layerr   vllm.compilation.decoratorsr	   vllm.configr
   vllm.distributedr   vllm.distributed.parallel_stater   %vllm.model_executor.layers.activationr   $vllm.model_executor.layers.layernormr   !vllm.model_executor.layers.linearr   r   r   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr   +vllm.model_executor.layers.rotary_embeddingr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   r   -vllm.model_executor.model_loader.weight_utilsr   r   r   %vllm.model_executor.models.interfacesr   r    vllm.model_executor.models.utilsr   r   r   r    r!   vllm.model_executor.utilsr"   vllm.sequencer#   r%   r:   rH   ModulerJ   rc   r   r   r   r   r=   r>   r?   <module>r      s   # " $ $ $ $ $ $                    ) ) ) ) ) ) * * * * * * = = = = = = " " " " " " A A A A A A 8 8 8 8 8 8 < < < < < < 8 8 8 8 8 8         
 H G G G G G F F F F F F @ @ @ @ @ @         
         
 K J J J J J J J              7 6 6 6 6 6 - - - - - -    #   ,5 ^    ! ! ! ! !ry ! ! !Dm m m m m29 m m m`:' :' :' :' :' :' :' :'z' ' ' ' 'EHO ' ' '4 7 7 7 7 7") 7 7 7tA, A, A, A, A,	< A, A, A, A, A,r>   