
    .`ip                     f   d dl mZ d dlZd dlmZ d dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZmZ d dlmZmZmZmZmZmZmZ d d	lmZ d d
lmZ d dlmZ d dlmZm Z  d dl!m"Z" d dl#m$Z$ d dl%m&Z& d dl'm(Z( d dl)m*Z*m+Z+ d dl,m-Z- d dl.m/Z/ d dl0m1Z1 d dl2m3Z3 d dl4m5Z5 d dl6m7Z7 ddl8m9Z9m:Z:m;Z; ddl<m=Z=m>Z>m?Z?m@Z@mAZAmBZBmCZC  G d dejD                  ZE G d dej        jD                  ZF G d d ej        jD                  ZGe G d! d"ejD                              ZH G d# d$ejD        e;e9e:          ZIdS )%    )IterableN)nn)GptOssConfig)	Attention)support_torch_compile)CacheConfig
VllmConfig)get_dp_groupget_ep_groupget_pcp_groupget_pp_groupget_tensor_model_parallel_rank$get_tensor_model_parallel_world_size tensor_model_parallel_all_gather)FusedMoE)FusedMoEParallelConfig)RMSNorm)QKVParallelLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)get_rope)rocm_unquantized_gemm)ParallelLMHeadVocabParallelEmbedding)default_weight_loader)sequence_parallel_chunk)current_platform)IntermediateTensors)cdiv)AttentionType   )SupportsEagle3SupportsLoRA
SupportsPP)AutoWeightsLoaderWeightsMapperextract_layer_indexis_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefixc            	       v     e Zd Z	 	 	 ddededz  dedz  def fdZdej	        d	ej	        d
ej	        fdZ
 xZS )OAIAttentionN configquant_configcache_configprefixc                    t                                                       t          |          | _        |j        | _        |j        | _        |j        | _        |j        | _        t          | j        |j	        t          j        |j        d         d|j        d         |j        d         |j        d         |j        d         |j                            dd          d	d
          | _        t                      }t          j                            t          j        |j        |z  d                    | _        | j        | j        z  |z  | _        | j        | j        z  |z  | _        | j        dz  | _        t/          | j        | j        | j        | j        || d          | _        t3          | j        | j        z  | j        || d          | _        |j        |z  | _        |j        |z  | _        | j        dz  dk    r|j        nd }t=          | j        | j        | j        | j        |||t>          j         | d| j        
  
        | _!        d S )N
rope_thetayarnfactor original_max_position_embeddings	beta_fast	beta_slowtruncateT)r5   	rope_typer7   r8   r9   r:   r;   )max_positiondtyperope_parametersis_neox_styleF)requires_gradg      	.qkv_proj)hidden_size	head_sizetotal_num_headstotal_num_kv_headsr1   r3   z.o_proj)
input_sizeoutput_sizer1   r3      r   .attn)num_kv_headsr2   r1   per_layer_sliding_window	attn_typer3   sinks)"super__init__r(   	layer_idxhead_dimnum_attention_headsnum_key_value_headsrC   r   max_position_embeddingstorchfloat32r?   get
rotary_embr   r   	ParameteremptyrN   q_sizekv_sizescalingr   qkv_projr   o_projnum_local_attention_headsnum_local_key_value_headssliding_windowr   r!   DECODERattn)selfr0   r1   r2   r3   tp_sizerc   	__class__s          v/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/gpt_oss.pyrP   zOAIAttention.__init__6   sb    	,V44#)#= #)#= !-"M7-$4\B# 0:4:4J65 $3K@#3K@"266z4HH
 
 
 
 
$ 788X''K2g=USSS
 

 .>'I/$-?7J}d*)(m 4#7%'''
 
 
 (/$-?(%%%%	
 
 
 *0)Cw)N&)/)Cw)N& 37.12D2I2I..t*ML7%%%3#+###*
 
 
			    hidden_states	positionsreturnc                 J   |                      |          \  }}|                    | j        | j        | j        gd          \  }}}|                     |||          \  }}|                                }|                     |||          }|                     |          \  }	}|	S )N)dim)r_   splitr\   r]   rY   
contiguousre   r`   )
rf   rk   rl   qkv_qkvattn_outputoutputs
             ri   forwardzOAIAttention.forward   s     }--Q))T[$,E2)NN1ay!Q//1LLNNii1a((KK,,	rj   )NNr/   )__name__
__module____qualname__r   r   r   strrP   rV   Tensorrz   __classcell__rh   s   @ri   r.   r.   5   s         37+/J
 J
J
 )4/J
 "D(	J

 J
 J
 J
 J
 J
 J
X	"\	6;l			 	 	 	 	 	 	 	rj   r.   c                   T     e Zd Z	 d	dededef fdZdej        dej        fdZ	 xZ
S )
MLPBlockr/   vllm_configrQ   r3   c                 D   t                                                       |j        j        }|j        }|j        }|j        | _        || _        |j	        | _
        |j        | _        |j        | _        t          j                    rt          j                    nd| _        t$          j                            |j        |j	                  | _        |j        | j        z  dk    sJ t/          |j	        |j        |j        |j        dd|| dddd| j                  | _        d S )Nr"   r   Tz.expertsF	swigluoai)num_expertstop_krC   intermediate_sizereduce_resultsrenormalizer1   r3   apply_router_weight_on_inputhas_bias
activationis_sequence_parallel)rO   rP   model_config	hf_configr1   parallel_configuse_sequence_parallel_moer   rQ   num_local_expertsr   rC   num_experts_per_tokexperts_per_tokendistis_initializedget_world_size
world_sizerV   r   Linearrouterr   r   experts)rf   r   rQ   r3   r0   r1   r   rh   s          ri   rP   zMLPBlock.__init__   s    	)3"/%5$3$M!"!3!-!'!;373F3H3HO$-///ahoof&8&:RSS'$/9Q>>>>0,*$6%&&&)."!%!:
 
 
rj   xrm   c                    |j         d         }| j        rt          |          }t          j                    r8t          | |d d d | j        f         | j        j        | j        j	                  }n|                     |          }| 
                    ||          }| j        r,t          |                                d          }|d |         }|S )Nr   )rk   router_logits)shaper   r   r   is_rocmr   rC   r   weightbiasr   r   rr   )rf   r   
num_tokensgs       ri   rz   zMLPBlock.forward   s    WQZ
$ 	+'**A#%% 	%a-T---.0BDKDT AA AALLqL::$ 	0CCA+:+Arj   r/   )r{   r|   r}   r	   intr~   rP   rV   r   rz   r   r   s   @ri   r   r      s        
 	"
 "
"
 "
 	"
 "
 "
 "
 "
 "
H %,        rj   r   c                   v     e Zd Z	 ddededef fdZdej        dej        dej        d	z  d
ej        fdZ	 xZ
S )TransformerBlockr/   r   r1   r3   c                 v   t                                                       |j        j        }|j        }t          |          | _        t          || d||          | _        t          || j        | d          | _
        t          |j        d          | _        t          |j        d          | _        d S )NrJ   )r3   r1   r2   z.mlpr3   h㈵>eps)rO   rP   r   r   r2   r(   rQ   r.   re   r   mlpr   rC   input_layernormpost_attention_layernorm)rf   r   r1   r3   r0   r2   rh   s         ri   rP   zTransformerBlock.__init__   s     	)3"/,V44 ###%%	
 
 
	 K6PPP&v'9tDDD(/0B(M(M(M%%%rj   rk   rl   residualNrm   c                     ||}|                      |          }n|                      ||          \  }}|                     ||          }|                     ||          \  }}|                     |          }||fS N)r   re   r   r   )rf   rk   rl   r   ry   s        ri   rz   zTransformerBlock.forward   s     $H 00??MM&*&:&:=(&S&S#M8		-;; #'"?"?x"X"Xx-((xrj   r   )r{   r|   r}   r	   r   r~   rP   rV   r   rz   r   r   s   @ri   r   r      s        
 	N NN )N 	N N N N N N, |  <  ,%	 
 
               rj   r   c                       e Zd Zdddedef fdZdej        dej        fdZ	 	 ddej        d
ej        de	d	z  dej        d	z  dej        f
dZ
dededededeeeej        f                  deeedf                  dee         fdZdededededeeeej        f                  deeedf                  dee         fdZdeeeej        f                  dee         fdZ xZS )GptOssModelr/   r   r   r3   c                J    t                                                       j        j         _        j         _        j         _         j        j         j        _        t           j        j	         j        j                   _
        t           j        j         fd| d          \   _         _         _        t!           j        j        d           _        t%          ddg j        j                   _        t)          t*          df                      _        d S )	Nc                 2    t          | j                  S )N)r3   r1   )r   r1   )r3   rf   r   s    ri   <lambda>z&GptOssModel.__init__.<locals>.<lambda>  s#    +!.   rj   z.layersr   r   r   rk   r   .)rO   rP   r   r   r0   r1   r   rC   r   
vocab_size	embeddingr+   num_hidden_layersstart_layer	end_layerlayersr   normr*   make_empty_intermediate_tensorstupler   aux_hidden_state_layersrf   r   r3   rh   s   `` ri   rP   zGptOssModel.__init__   s    	!.8'4*:"&+"9/K"K#
 
 9DK)    
 %%%9
 9
 9
5$.$+ DK3>>>	/Vj)4;+B0
 0
, (-S#X'8'8$$$rj   	input_idsrm   c                 ,    |                      |          S r   )r   rf   r   s     ri   embed_input_idszGptOssModel.embed_input_ids  s    ~~i(((rj   Nrl   intermediate_tensorsinputs_embedsc                    t                      j        r||}n|                     |          }d }n|J |d         }|d         }g }t          | j        | j                  D ]D}| j        |         }	|| j        v r|                    ||n||z               |	|||          \  }}Et                      j	        st          ||d          S |                     ||          \  }}
t          |          dk    r||fS |S )Nrk   r   )rk   r   r   )r   is_first_rankr   ranger   r   r   r   appendis_last_rankr   r   len)rf   r   rl   r   r   r   r   aux_hidden_statesilayerrt   s              ri   rz   zGptOssModel.forward  s3    >>' 
	8(!((33HH'333$_5A+J7Ht'88 	8 	8AKNED000!((h.>ALQQQ%9h77KAxx~~* 	S&'Q'QRRRyyH%%1 !!A%%'''rj   ep_rank_endep_rank_startheads_per_rank
head_startweightsstacked_params_mapping.c                 	   t          |                                           }t                      }d}	| j        j        }
| j        j        }t          j        t                      t                      j        t                      j        t                      j        t                      j                  \  }}| j        j        }||	z  }t          ||          }||	z  }||z  }t!          |dz   |z  |          }|D ]\  }}t#          ||           rd|v rh|
r|||df         }n|d d d|z  d|z  df         }||         }t%          |dt&                    } ||||d d            |                    |           d	|v re|
r|||df         }n|d||	z  ||	z  f         }||         }t%          |dt&                    } ||||d d            |                    |           d
|v r|                    |d|z  d                                          }|
r|||df         }n|d d d|z  d|z  df         }||         }t%          |dt&                    } ||||d d            |                    |           d|v r|                    |d|dz                                            }|
r|||df         }n|d|dz  |dz  f         }||         }t%          |dt&                    } ||||d d            |                    |           d|v rh|
r|||df         }n|d d d|z  d|z  f         }||         }t%          |dt&                    } ||||d d            |                    |           d|v rn||         }t%          |dt&                    }|
r|||df         }n|dk    r|                                  ||||d d            |                    |           d|v rP||         }|                    d||          }|j                            |           |                    |           M|D ]e\  }}}||vr|                    ||          }||         }t%          |dt&                    }|t&          k    r |||           n ||||            n0||vr||         }t%          |dt&                    } |||           |                    |           |S )N    rg   dp_sizedp_rankpcp_sizepcp_rankr"   .w13_weight_scale.rI   weight_loader)weight_nameshard_id	expert_id.w2_weight_scale.w13_weightro   
.w2_weight	.w13_bias.w2_biasr   rN   )dictnamed_parameterssetr   enable_expert_parallelr0   r   r   flatten_tp_across_dp_and_pcpr   r
   r   rank_in_groupr   r   r    minr)   getattrr   addviewrr   zero_narrowdatacopy_replace)rf   r   r   r   r   r   r   params_dictloaded_paramsmxfp4_blockuse_epr   rg   tp_rankr   intermediate_size_block per_rank_intermediate_size_blockper_rank_intermediate_sizetp_rank_starttp_rank_endnamer   narrow_weightparamr   
param_namer   r   s                               ri   _load_weights_mxfp4zGptOssModel._load_weights_mxfp45  s    4002233"%%%%<k3 2N8:: NN- NN0"__/"__2
 
 
 !K9"3{"B+/0G+Q+Q(%E%S"  "<<7Q;*DDFWXX# W	$ W	$LD&&tT22 "d** X$*=+Dc+I$JMM$*111a-.?!k/.QSV+V$WM#D) '@U V V! $!"    !!$'''#t++ $*=+Dc+I$JMM$*]k9K;<VVV%M $D) '@U V V! $!"    !!$'''$&&  %6!6 *,,   X$*=+Dc+I$JMM$*111a-.?!k/.QSV+V$WM#D) '@U V V! $!"    !!$'''%%  %6!%; *,,   W$*=+Dc+I$JMM$*30B[TUEU0U+U$VM#D) '@U V V! $!"    !!$'''$$  S$*=+Dc+I$JMM$*111a-.?!k/.Q+Q$RM#D) '@U V V! $!"    !!$'''t###D) '@U V V '#M+$=s$BCFF !||6tdd    !!$'''D#D) &a^ L L
  ///!!$'''5K - -1
Kd**||K<<#D) '@U V V $999!M%0000!M%::: {**#D) '@U V VeV,,,d####rj   c                    t          |                                           }t                      }| j        j        }	t          j        t                      t                      j	        t                      j
        t                      j	        t                      j
                  \  }
}| j        j        }t          ||
          }||z  }t          |dz   |z  |          }|D ]\  }}t!          ||           rd|v r|	r|||df         }n|d d d d d|z  d|z  f         }|                    ddd                                          }||         }|                    |           |                    |           d|v r}|	r|||df         }n|d d ||d d f         }|                    ddd                                          }||         }|                    |           |                    |           d|v rW|	r|||df         }n|d d d|z  d|z  f         }||         }|                    |           |                    |           yd	|v r]|	r|||df         }n|dk    r|                                 ||         }|                    |           |                    |           d
|v rP||         }|                    d||          }|j                            |           |                    |           .|D ]e\  }}}||vr|                    ||          }||         }t3          |dt4                    }|t4          k    r |||           n ||||            n0||vr||         }t3          |dt4                    } |||           |                    |           |S )Nr   r"   r   .rI   r   r   r   r   rN   r   )r   r   r   r   r   r   r   r   r
   r   r   r   r0   r   r    r   r)   permuterr   r   r   r   r   r   r   r   r   )rf   r   r   r   r   r   r   r   r   r   rg   r   r   r  r  r  r  r   r  r  r  r   r   r   s                           ri   _load_weights_otherzGptOssModel._load_weights_other  se    4002233"%%%%< 2N8:: NN- NN0"__/"__2
 
 
 !K9%)*;W%E%E""<<7Q;*DDFWXX# P	$ P	$LD&&tT22 $$  V$*=+Dc+I$JMM$*111aaa]1BQ_1T+T$UM - 5 5aA > > I I K K#D)M***!!$'''%% L$*=+Dc+I$JMM$*111mK.G+J$KM - 5 5aA > > I I K K#D)M***!!$'''$$  S$*=+Dc+I$JMM$*111a-.?!k/.Q+Q$RM#D)M***!!$'''t## '#M+$=s$BCFF !||#D)F###!!$'''D#D) &a^ L L
  ///!!$'''5K - -1
Kd**||K<<#D) '@U V V $999!M%0000!M%::: {**#D) '@U V VeV,,,d####rj   c                    g d}t                      }t                      }| j        j        |z  }||z  }t	                      j        }t	                      j        }| j        j        }	|	|z  }
||
z  }|dz   |
z  }t          | j        d          r| j        j	        d         nd }|dk    r| 
                    ||||||          S |                     ||||||          S )N))rB   z.q_projru   )rB   z.k_projrv   )rB   z.v_projrw   r"   quantization_configquant_methodmxfp4)r   r   r0   rS   r   r   rankr   hasattrr  r	  r  )rf   r   r   r   rg   r   r   ep_sizeep_rankr   experts_per_rankr   r   r  s                 ri   load_weightszGptOssModel.load_weightsc  s   "
 "
 "
 122688 8GC~-
..+..%k3&'1"22{&66 t{$9::DK+N;; 	
 7""++&   ++&  rj   NN)r{   r|   r}   r	   r~   rP   rV   r   r   r   rz   r   r   r   listr   r	  r  r  r   r   s   @ri   r   r      s.        	9 9 9  9 	9 9 9 9 9 9<) )%, ) ) ) ) <@-1 < < 2D8	
 |d* 
   B{{ { 	{
 { %U\ 123{ !%U38_ 5{ 
S{ { { {zoo o 	o
 o %U\ 123o !%U38_ 5o 
So o o ob,HU33D-E$F ,3s8 , , , , , , , ,rj   r   c                       e Zd ZU dZeed<   dg diZ eddiddd	d
ddd	ddd	          Z	 d%de	de
f fdZdeedf         ddfdZdeedf         fdZdej        dej        fdZ	 	 d&dej        dej        dedz  dej        dz  dej        f
dZd ej        dej        fd!Zdeee
e
ee
f                  fd"Zd#eee
ej        f                  dee
         fd$Z xZS )'GptOssForCausalLMTis_3d_moe_weightr_   )q_projk_projv_projz.self_attn.z.attn.z.embedding.weightr   r   r   r   r   r   )	z.embed_tokens.weightz.gate_up_proj_blocksz.down_proj_blocksz.gate_up_proj_scalesz.down_proj_scalesz.gate_up_projz
.down_projz.gate_up_proj_biasz.down_proj_bias)orig_to_new_substrorig_to_new_suffixr/   r   r3   c                    t                                                       || _        |j        j        | _        t          |t          |d                    | _        t          | j        j
        | j        j        t          |d                    | _        t          | j        j
                  | _        | j        j        | _        d S )Nmodel)r   r3   lm_headr   )rO   rP   r   r   r   r0   r   r,   r"  r   r   rC   r#  r   logits_processorr   r   s      ri   rP   zGptOssForCausalLM.__init__  s    
 	&!.8 #00
 
 

 &K"K#	22
 
 

 !00F G GJ6 	,,,rj   r   .rm   Nc                     || j         _        d S r   )r"  r   )rf   r   s     ri   set_aux_hidden_state_layersz-GptOssForCausalLM.set_aux_hidden_state_layers  s    -3
***rj   c                 J    t          | j        j                  }d|dz  |dz
  fS )NrI      )r   r"  r   )rf   
num_layerss     ri   "get_eagle3_aux_hidden_state_layersz4GptOssForCausalLM.get_eagle3_aux_hidden_state_layers  s)    *++
:?JN33rj   r   c                 6    | j                             |          S r   )r"  r   r   s     ri   r   z!GptOssForCausalLM.embed_input_ids  s    z)))444rj   rl   r   r   c                 2    |                      ||||          S r   )r"  )rf   r   rl   r   r   s        ri   rz   zGptOssForCausalLM.forward  s     zz)Y0DmTTTrj   rk   c                 <    |                      | j        |          }|S r   )r$  r#  )rf   rk   logitss      ri   compute_logitsz GptOssForCausalLM.compute_logits  s    &&t|]CCrj   c                 J    t          j        | ddd| j        j        d          S )N	gate_proj	down_projup_projr   )ckpt_gate_proj_nameckpt_down_proj_nameckpt_up_proj_namer   num_redundant_experts)r   make_expert_params_mappingr0   r   )rf   s    ri   get_expert_mappingz$GptOssForCausalLM.get_expert_mapping  s5     2 + +'5"#
 
 
 	
rj   r   c                 z    t          | | j        j        rdgnd           }|                    || j                  S )Nzlm_head.)skip_prefixes)mapper)r&   r0   tie_word_embeddingsr  hf_to_vllm_mapper)rf   r   loaders      ri   r  zGptOssForCausalLM.load_weights  sK    "+/;+JTJ<<PT
 
 
 ""743I"JJJrj   r   r  )r{   r|   r}   r  bool__annotations__packed_modules_mappingr'   r>  r	   r~   rP   r   r   r&  r*  rV   r   r   r   rz   r/  r  r9  r   r   r  r   r   s   @ri   r  r    s<        !d!!!(*H*H*HI%8
 %8$1!-$7!3*&"-)
 
	  . 
 

 
 
 
 
 
 
.4%S/ 4d 4 4 4 44E#s(O 4 4 4 45 5%, 5 5 5 5 <@-1U U<U <U 2D8	U
 |d*U 
U U U UEL U\    

DsCc/A)B$C 

 

 

 

KHU33D-E$F K3s8 K K K K K K K Krj   r  )Jcollections.abcr   rV   torch.distributeddistributedr   r   transformersr   vllm.attention.layerr   vllm.compilation.decoratorsr   vllm.configr   r	   vllm.distributedr
   r   r   r   r   r   r   $vllm.model_executor.layers.fused_moer   +vllm.model_executor.layers.fused_moe.configr   $vllm.model_executor.layers.layernormr   !vllm.model_executor.layers.linearr   r   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr   +vllm.model_executor.layers.rotary_embeddingr    vllm.model_executor.layers.utilsr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr    vllm.model_executor.models.utilsr   vllm.platformsr   vllm.sequencer   vllm.utils.math_utilsr    vllm.v1.attention.backendr!   
interfacesr#   r$   r%   utilsr&   r'   r(   r)   r*   r+   r,   Moduler.   r   r   r   r   rj   ri   <module>r^     s   % $ $ $ $ $                    % % % % % % * * * * * * = = = = = = / / / / / / / /                  : 9 9 9 9 9 N N N N N N 8 8 8 8 8 8 R R R R R R R R G G G G G G F F F F F F @ @ @ @ @ @ B B B B B B        P O O O O O D D D D D D + + + + + + - - - - - - & & & & & & 3 3 3 3 3 3 @ @ @ @ @ @ @ @ @ @                 V V V V V29 V V Vr5 5 5 5 5ux 5 5 5p(  (  (  (  ( ux (  (  ( V ] ] ] ] ]") ] ] ]@WK WK WK WK WK	:~| WK WK WK WK WKrj   