
    .`iE                     P   d Z ddlmZ ddlmZ ddlZddlmZ ddlmZ	 ddl
mZ ddlmZ dd	lmZmZ dd
lmZmZ ddlmZ ddlmZ ddlmZ ddlmZmZmZ ddlmZ ddl m!Z! ddl"m#Z# ddl$m%Z%m&Z& ddl'm(Z(m)Z) ddl*m+Z+ ddl,m-Z- ddl.m/Z/ ddl0m1Z1m2Z2 ddl3m4Z4m5Z5m6Z6m7Z7m8Z8m9Z9  ee:          Z; G d dej<                  Z= G d dej<                  Z> G d dej<                  Z? edd ddd!"           G d# d$ej<                              Z@ G d% d&ej<        e1e2          ZAdS )'zAInference-only SeedOss model compatible with HuggingFace weights.    )Iterable)isliceN)nn)PretrainedConfig)	Attention)support_torch_compile)CacheConfig
VllmConfig)get_pp_group$get_tensor_model_parallel_world_size)init_logger)
SiluAndMul)RMSNorm)MergedColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)get_rope)ParallelLMHeadVocabParallelEmbedding)default_weight_loadermaybe_remap_kv_scale_name)IntermediateTensors)set_default_rope_theta)AttentionType   )SupportsLoRA
SupportsPP)AutoWeightsLoaderPPMissingLayeris_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefixc                   J     e Zd Z	 	 ddededededz  deddf fd	Zd
 Z xZS )
SeedOssMLPN hidden_sizeintermediate_size
hidden_actquant_configprefixreturnc                    t                                                       t          ||gdz  d|| d          | _        t	          ||d|| d          | _        |dk    rt          d| d          t                      | _        d S )	N   Fz.gate_up_projbiasr,   r-   z
.down_projsiluzUnsupported activation: z!. Only silu is supported for now.)	super__init__r   gate_up_projr   	down_proj
ValueErrorr   act_fn)selfr)   r*   r+   r,   r-   	__class__s         w/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/seed_oss.pyr5   zSeedOssMLP.__init__J   s     	6!#%+++
 
 
 +%(((
 
 
 X:XXX   !ll    c                     |                      |          \  }}|                     |          }|                     |          \  }}|S N)r6   r9   r7   )r:   xgate_up_s       r<   forwardzSeedOssMLP.forwardg   sD    &&q))
KK  ~~a  1r=   )Nr(   )	__name__
__module____qualname__intstrr   r5   rC   __classcell__r;   s   @r<   r'   r'   I   s         37# ## # 	#
 )4/# # 
# # # # # #:      r=   r'   c                        e Zd Zddddej        fdededededed	ed
edz  dedz  de	de	ddf fdZ
dej        dej        dej        fdZ xZS )SeedOssAttentioni   Nr(   r)   	num_headsnum_kv_headshead_dimrope_parametersmax_positioncache_configr,   r-   	attn_typer.   c                 $   t                                                       || _        t                      }|| _        | j        |z  dk    sJ | j        |z  | _        || _        || _        | j        |k    r| j        |z  dk    sJ n|| j        z  dk    sJ t          d| j        |z            | _	        | j        | j        z  | _
        | j	        | j        z  | _        | j        dz  | _        t          || j        | j        | j        d||	 d          | _        t          | j        | j        z  |d||	 d          | _        t#          | j        ||	          | _        t'          | j        | j        | j        | j	        |||
|	 d
          | _        d S )Nr   r   g      Tz	.qkv_projr1   Fz.o_proj)rQ   rP   z.attn)rN   rR   r,   rS   r-   )r4   r5   r)   r   total_num_headsrM   total_num_kv_headsrO   maxrN   q_sizekv_sizescalingr   qkv_projr   o_projr   
rotary_embr   attn)r:   r)   rM   rN   rO   rP   rQ   rR   r,   r-   rS   tp_sizer;   s               r<   r5   zSeedOssAttention.__init__o   s    	&688(#g-2222-8". "g-- *W499999 T4499994#:g#EFFnt}4(4=8}d*)M #%'''
 
 
 ( 4=0%%%%
 
 
 #M%+
 
 

 NML*%%###	
 	
 	
			r=   	positionshidden_statesc                 "   |                      |          \  }}|                    | j        | j        | j        gd          \  }}}|                     |||          \  }}|                     |||          }|                     |          \  }	}|	S )N)dim)r[   splitrX   rY   r]   r^   r\   )
r:   r`   ra   qkvrB   qkvattn_outputoutputs
             r<   rC   zSeedOssAttention.forward   s    
 }--Q))T[$,E2)NN1ay!Q//1ii1a((KK,,	r=   )rD   rE   rF   r   DECODERrG   dictr	   r   rH   r5   torchTensorrC   rI   rJ   s   @r<   rL   rL   n   s        &+/26&.A
 A
A
 A
 	A

 A
 A
 A
 "D(A
 )4/A
 A
 A
 
A
 A
 A
 A
 A
 A
F
<
 |
 
	
 
 
 
 
 
 
 
r=   rL   c                        e Zd Z	 	 	 ddededz  dedz  deddf
 fdZd	ej	        d
ej	        dej	        dz  de
ej	        ej	        f         fdZ xZS )SeedOssDecoderLayerNr(   configrR   r,   r-   r.   c                 *   t                                                       |j        | _        t          |d           t	          |dd          rt
          j        }nt
          j        }t          | j        |j	        |j
        |j        |j        |||j        | d|
  
        | _        t          | j        |j        |j        || d          | _        t'          |j        |j        	          | _        t'          |j        |j        	          | _        d S )
Ni@B )default_theta	is_causalTz
.self_attn)
r)   rM   rQ   rN   rO   rR   r,   rP   r-   rS   z.mlp)r)   r*   r+   r,   r-   eps)r4   r5   r)   r   getattrr   rl   ENCODER_ONLYrL   num_attention_headsmax_position_embeddingsnum_key_value_headsrO   rP   	self_attnr'   r*   r+   mlpr   rms_norm_epsinput_layernormpost_attention_layernorm)r:   rr   rR   r,   r-   rS   r;   s         r<   r5   zSeedOssDecoderLayer.__init__   s*    	!-vW==== 6;-- 	3%-II%2I)(073_%%"2(((
 
 
 ($6(%???
 
 
  'v'9v?RSSS(/F$7)
 )
 )
%%%r=   r`   ra   residualc                     ||}|                      |          }n|                      ||          \  }}|                     ||          }|                     ||          \  }}|                     |          }||fS )N)r`   ra   )r   r}   r   r~   )r:   r`   ra   r   s       r<   rC   zSeedOssDecoderLayer.forward   s     $H 00??MM&*&:&:=(&S&S#M8' ' 
 
 #'"?"?x"X"Xx//h&&r=   )NNr(   )rD   rE   rF   SeedOssConfigr	   r   rH   r5   rn   ro   tuplerC   rI   rJ   s   @r<   rq   rq      s         ,026*
 *
*
 "D(*
 )4/	*

 *
 
*
 *
 *
 *
 *
 *
X'<' |' ,%	'
 
u|U\)	*' ' ' ' ' ' ' 'r=   rq   rc   )	input_idsr`   intermediate_tensorsinputs_embeds)dynamic_arg_dimsc                       e Zd Zdeddededeej                 f fdZ	de
j        de
j        fd	Z	 	 dde
j        de
j        ded
z  de
j        d
z  de
j        ez  f
dZdeeee
j        f                  dee         fdZ xZS )SeedOssModelr(   )r-   decoder_layer_typevllm_configr-   r   c                b   t                                                       |j        j        |j        |j        j        Ht          d          r8j        j	        k    s(J d
                    j        j	                              | _        | _        j        | _        t                      j        sj        r9t                      j        r&t#          j        j        | d          | _        nt)                      | _        pt*          t-          j	        fd| d          \  | _        | _        | _        t5          dd	gj                  | _        t                      j        r"t9          j        j        
          | _        d S t)                      | _        d S )Nmax_window_layerszSliding window for some but all layers is not supported. This model uses sliding window but `max_window_layers` = {} is less than `num_hidden_layers` = {}. Please open an issue to discuss this feature.z.embed_tokensr,   r-   c                 "     |           S )N)rr   rR   r,   r-    )r-   rR   rr   r   r,   s    r<   <lambda>z'SeedOssModel.__init__.<locals>.<lambda>;  s&    --))	   r=   z.layersr-   ra   r   rv   )r4   r5   model_config	hf_configrR   r,   sliding_windowhasattrr   num_hidden_layersformatrr   
vocab_sizer   is_first_ranktie_word_embeddingsis_last_rankr   r)   embed_tokensr!   rq   r$   start_layer	end_layerlayersr#   make_empty_intermediate_tensorsr   r   norm)r:   r   r-   r   rR   rr   r,   r;   s      `@@@r<   r5   zSeedOssModel.__init__  s    	)3"/"/ &2w'8
 8
2 +v/GGGG+ ,26,,, ,	 HGG ( +>>' 
	1&
	1+7>>+F
	1 !7!") ///	! ! !D !/ 0 0D 0F3F8C$       %%%	9
 	9
 	9
5$.$+ 0Wj)6+=0
 0
, >>& 	) 28KLLLDIII&((DIIIr=   r   r.   c                 ,    |                      |          S r?   )r   r:   r   s     r<   embed_input_idszSeedOssModel.embed_input_idsL  s      +++r=   Nr`   r   r   c                 p   t                      j        r||}n|                     |          }d }n|J |d         }|d         }t          | j        | j        | j                  D ]} ||||          \  }}t                      j        st          ||d          S | 	                    ||          \  }}|S )Nra   r   )ra   r   )
r   r   r   r   r   r   r   r   r   r   )	r:   r   r`   r   r   ra   r   layerrB   s	            r<   rC   zSeedOssModel.forwardO  s     >>' 		8( - $ 4 4Y ? ?HH'3330AM+J7HDK)94>JJ 	 	E&+e' '#M88
 ~~* 	&"/XFF    99]H==qr=   weightsc                 p   g d}t          |                     d                    }t                      }|D ]|\  }}d|v r| j        ~| j                            |          x}rb||         }t          |dt                    }	|                                dk    r|n|d         } |	||           |                    |           |D ]i\  }
}}||vr|	                    ||
          }|
                    d          r||vr;t          ||           rL||         }|j        }	 |	|||            nk|
                    d          r||vrt          ||          }|+t          ||           r=||         }t          |dt                    }	 |	||           |                    |           ~|S )N))r[   q_projrg   )r[   k_projrh   )r[   v_projri   )r6   	gate_projr   )r6   up_projr   F)remove_duplicatezrotary_emb.inv_freqweight_loaderr   z.bias)rm   named_parameterssetr,   get_cache_scalerx   r   rd   addreplaceendswithr"   r   r   )r:   r   stacked_params_mappingparams_dictloaded_paramsnameloaded_weight
scale_nameparamr   
param_nameweight_nameshard_ids                r<   load_weightszSeedOssModel.load_weightsm  s)   "
 "
 "
 400%0HHII"%%%#* )	$ )	$D-$,, ,"/??EEE
 - $J/ '@U V V%2%6%6%8%8A%=%=MM=QRCS  e]333!!*---5K 4 41
Kd**||K<<==)) d+.E.E*466 #D) % 3e]H=== ==)) d+.E.E0{CC<*466 #D) '@U V Ve]333d####r=   NN)rD   rE   rF   rq   r
   rH   typer   Moduler5   rn   ro   r   r   rC   r   r   r   r   rI   rJ   s   @r<   r   r     s;        .A>) >) >)  >) 	>)
 !O>) >) >) >) >) >)@, ,%, , , , , <@-1 < < 2D8	
 |d* 
+	+   <5HU33D-E$F 53s8 5 5 5 5 5 5 5 5r=   r   c                   ,    e Zd Zg dddgdZdddedef fd	Zd
ej        dej        fdZ		 	 dd
ej        dej        de
dz  dej        dz  dej        e
z  f
dZdej        dej        dz  fdZdeeeej        f                  dee         fdZ xZS )SeedOssForCausalLM)r   r   r   r   r   )r[   r6   r(   r   r   r-   c          	         t                                                       |j        j        }|j        }|| _        || _        t          |t          |d                    | _        t                      j
        rJ|j        r| j        j        | _        nDt          |j        |j        |t          |d                    | _        nt#                      | _        t%          |j                  | _        | j        j        | _        d S )Nmodel)r   r-   lm_headr   )r4   r5   r   r   r,   rr   r   r%   r   r   r   r   r   r   r   r   r)   r!   r   logits_processorr   )r:   r   r-   rr   r,   r;   s        r<   r5   zSeedOssForCausalLM.__init__  s    )3"/(!#L,I,I
 
 

 >>& 	,) #z6-%&!-'	::	      *++DL /0A B B J6 	,,,r=   r   r.   c                 6    | j                             |          S r?   )r   r   r   s     r<   r   z"SeedOssForCausalLM.embed_input_ids  s    z)))444r=   Nr`   r   r   c                 6    |                      ||||          }|S r?   )r   )r:   r   r`   r   r   ra   s         r<   rC   zSeedOssForCausalLM.forward  s)     

y"6
 
 r=   ra   c                 <    |                      | j        |          }|S r?   )r   r   )r:   ra   logitss      r<   compute_logitsz!SeedOssForCausalLM.compute_logits  s      &&t|]CCr=   r   c                 l    t          | | j        j        rdgnd           }|                    |          S )Nzlm_head.)skip_prefixes)r    rr   r   r   )r:   r   loaders      r<   r   zSeedOssForCausalLM.load_weights  sC    "+/;+JTJ<<PT
 
 
 ""7+++r=   r   )rD   rE   rF   packed_modules_mappingr
   rH   r5   rn   ro   r   r   rC   r   r   r   r   r   rI   rJ   s   @r<   r   r     s{       
 
 
 

 
 BD 
 
 
z 
3 
 
 
 
 
 
>5 5%, 5 5 5 5 <@-1
 
<
 <
 2D8	

 |d*
 
+	+
 
 
 
| 
	   ,HU33D-E$F ,3s8 , , , , , , , ,r=   r   )B__doc__collections.abcr   	itertoolsr   rn   r   transformersr   r   vllm.attention.layerr   vllm.compilation.decoratorsr   vllm.configr	   r
   vllm.distributedr   r   vllm.loggerr   %vllm.model_executor.layers.activationr   $vllm.model_executor.layers.layernormr   !vllm.model_executor.layers.linearr   r   r   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr   +vllm.model_executor.layers.rotary_embeddingr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr   r   vllm.sequencer   vllm.transformers_utils.configr   vllm.v1.attention.backendr   
interfacesr   r   utilsr    r!   r"   r#   r$   r%   rD   loggerr   r'   rL   rq   r   r   r   r=   r<   <module>r      s  0 H G $ $ $ $ $ $              : : : : : : * * * * * * = = = = = = / / / / / / / / O O O O O O O O # # # # # # < < < < < < 8 8 8 8 8 8         
 H G G G G G F F F F F F @ @ @ @ @ @               . - - - - - A A A A A A 3 3 3 3 3 3 0 0 0 0 0 0 0 0                
X		" " " " " " " "JN N N N Nry N N NbA' A' A' A' A'") A' A' A'H  !	   W W W W W29 W W WtG, G, G, G, G,L* G, G, G, G, G,r=   