
    .`i1                        d Z ddlmZ ddlmZ ddlZddlmZ ddlmZ ddl	m
Z
 ddlmZ dd	lmZmZ dd
lmZmZ ddlmZ ddlmZmZmZ ddlmZ ddlmZ ddlmZ ddlm Z m!Z! ddl"m#Z# ddl$m%Z% ddl&m'Z' ddl(m)Z)m*Z*m+Z+m,Z,m-Z-  G d dej.                  Z/ G d dej.                  Z0 G d dej.                  Z1e G d dej.                              Z2 G d dej.        e'          Z3dS ) zBInference-only GPT-NeoX model compatible with HuggingFace weights.    )Iterable)isliceN)nn)GPTNeoXConfig)	Attention)support_torch_compile)CacheConfig
VllmConfig)get_pp_group$get_tensor_model_parallel_world_size)
get_act_fn)ColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)get_rope)ParallelLMHeadVocabParallelEmbedding)default_weight_loader)IntermediateTensors   )
SupportsPP)AutoWeightsLoaderis_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefixc            	       v     e Zd Z	 	 	 ddededz  dedz  def fdZdej	        d	ej	        d
ej	        fdZ
 xZS )GPTNeoXAttentionN configcache_configquant_configprefixc           	         t                                                       |j        | _        |j        | _        | j        | j        z  | _        t          |dd          | _        t                      }| j        |z  dk    sJ | j        |z  | _	        t          |j        | j        | j        | j        || d          | _        t          |j        |j        | j        || d          | _        t          |dd          }t          | j        ||j        	          | _        | j        d
z  }t#          | j	        | j        |||| d          | _        d S )Nattention_biasTr   z.query_key_value)biasr$   r%   z.densemax_position_embeddingsi    )max_positionrope_parametersg      z.attn)r#   r$   r%   )super__init__num_attention_headstotal_num_headshidden_size	head_sizegetattrr(   r   	num_headsr   query_key_valuer   denser   r+   
rotary_embr   attn)	selfr"   r#   r$   r%    tensor_model_parallel_world_sizer)   scaling	__class__s	           w/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/gpt_neox.pyr-   zGPTNeoXAttention.__init__<   s    	%9!-)T-AAF$4d;;	+O+Q+Q(#&FF!KKKK-1QQ0N %... 
  
  
 '%$$$
 
 

 #*&2KT"R"R"N0"2
 
 

 .$&NN%%###
 
 
			    position_idshidden_statesreturnc                     |                      |          \  }}|                    dd          \  }}}|                     |||          \  }}|                     |||          }|                     |          \  }	}|	S )N   )chunksdim)r4   chunkr6   r7   r5   )
r8   r>   r?   qkv_qkvattn_outputoutputs
             r<   forwardzGPTNeoXAttention.forwardl   s~    
 %%m44Q))1")--1a|Q221ii1a((JJ{++	r=   NNr!   __name__
__module____qualname__r   r	   r   strr-   torchTensorrN   __classcell__r;   s   @r<   r    r    ;   s         ,026.
 .
.
 "D(.
 )4/	.

 .
 .
 .
 .
 .
 .
`
l
 |
 
	
 
 
 
 
 
 
 
r=   r    c                   >     e Zd Z	 	 ddededz  def fdZd Z xZS )	
GPTNeoXMLPNr!   r"   r$   r%   c                    t                                                       t          |j        |j        || d          | _        t          |j        |j        || d          | _        t          |j	                  | _
        d S )Nz.dense_h_to_4hr$   r%   z.dense_4h_to_h)r,   r-   r   r0   intermediate_sizedense_h_to_4hr   dense_4h_to_hr   
hidden_actact)r8   r"   r$   r%   r;   s       r<   r-   zGPTNeoXMLP.__init__z   s     	1$%,,,	
 
 
 /$%,,,	
 
 
 f/00r=   c                     |                      |          \  }}|                     |          }|                     |          \  }}|S N)r^   ra   r_   )r8   r?   rH   s      r<   rN   zGPTNeoXMLP.forward   sI    --m<<q//--m<<qr=   )Nr!   )	rQ   rR   rS   r   r   rT   r-   rN   rW   rX   s   @r<   rZ   rZ   y   sz         37	1 11 )4/1 	1 1 1 1 1 1*      r=   rZ   c            	       v     e Zd Z	 	 	 ddededz  dedz  def fdZdej	        d	ej	        d
ej	        fdZ
 xZS )GPTNeoXLayerNr!   r"   r#   r$   r%   c                 ^   t                                                       |j        | _        t          j        |j        |j                  | _        t          j        |j        |j                  | _        t          |||| d          | _
        t          ||| d          | _        d S )Nepsz
.attentionr%   z.mlp)r,   r-   use_parallel_residualr   	LayerNormr0   layer_norm_epsinput_layernormpost_attention_layernormr    	attentionrZ   mlp)r8   r"   r#   r$   r%   r;   s        r<   r-   zGPTNeoXLayer.__init__   s     	%+%A"!|F$9 
  
  
 )+F$9)
 )
 )
% *L,&7L7L7L
 
 
 flf???KKKr=   r>   r?   r@   c                 :   |                      |          }|                     ||          }| j        r3|                     |          }|                     |          }||z   |z   }n4||z   }|                     |          }|                     |          }||z   }|S )N)r>   r?   )rm   ro   rj   rn   rp   )r8   r>   r?   
attn_inputrL   	mlp_input
mlp_outputs          r<   rN   zGPTNeoXLayer.forward   s    
 ))-88
nn%$ % 
 

 % 	5 55mDDI),,J&4}DMM
 &5K55kBBI),,J&4Mr=   rO   rP   rX   s   @r<   re   re      s         ,026L LL "D(L )4/	L
 L L L L L L(l | 
	       r=   re   c                        e Zd Zdddedef fdZdej        dej        fdZ	 ddej        d
ej        de	d	z  dej        d	z  dej        e	z  f
dZ
deeeej        f                  dee         fdZ xZS )GPTNeoXModelr!   ri   vllm_configr%   c                   t                                                       |j        j        |j        |j        | _        t          j        j	                  | _
        t          j        fd| d          \  | _        | _        | _        t!          j        j	        j                  | _        t)          dgj	                  | _        d S )Nc                 *    t          |           S )Nri   )re   )r%   r#   r"   r$   s    r<   <lambda>z'GPTNeoXModel.__init__.<locals>.<lambda>   s    <l6   r=   z.layersri   rg   r?   )r,   r-   model_config	hf_configr#   r$   r"   r   
vocab_sizer0   embed_inr   num_hidden_layersstart_layer	end_layerlayersr   rk   rl   final_layer_normr   make_empty_intermediate_tensors)r8   rw   r%   r#   r"   r$   r;   s      @@@r<   r-   zGPTNeoXModel.__init__   s    )3"/"/.
 
 9D$      %%%9
 9
 9
5$.$+ !#F$9!
 !
 !
 0Wv10
 0
,,,r=   	input_idsr@   c                 ,    |                      |          S rc   )r~   r8   r   s     r<   embed_input_idszGPTNeoXModel.embed_input_ids   s    }}Y'''r=   Nr>   intermediate_tensorsinputs_embedsc                 B   t                      j        r||}n|                     |          }n|d         }t          | j        | j        | j                  D ]} |||          }t                      j        st          d|i          S | 	                    |          }|S )Nr?   )
r   is_first_rankr   r   r   r   r   is_last_rankr   r   )r8   r   r>   r   r   r?   layers          r<   rN   zGPTNeoXModel.forward   s     >>' 	B( - $ 4 4Y ? ?0AMDK)94>JJ 	? 	?E!E,>>MM~~* 	I&'GHHH--m<<r=   weightsc                 R   t          |                                           }t                      }|D ]\  }}d|v sd|v sd|v rd|v sd|v rt          ||           r,||         }d|v rt	          |dd           }| j        j        }|f|j        }	|                    |	d |         |dd	fz   |	|d
z   d          z             }|	                    ||d
z             }|
                    |	          }t	          |dt                    }
 |
||           |                    |           |S )Nzattention.biaszattention.masked_biaszrotary_emb.inv_freqzrotary_emb.cos_cachedzrotary_emb.sin_cachedr4   
output_dimrB   rC   r   weight_loader)dictnamed_parameterssetr   r2   r"   r.   shapeview	transposereshaper   add)r8   r   params_dictloaded_paramsnameloaded_weightparamr   r3   loaded_weight_shaper   s              r<   load_weightszGPTNeoXModel.load_weights   s   4002233"%%%#* "	$ "	$D- D((*d22(D00&$..2IT2Q2Q &tT22 %E D((
 %UL$??
 K;	)*7*='$1$6$6+KZK8$a,--j1n.>.>?@% %M
 %2$;$;J
UV$W$WM$1$9$9:M$N$NM#E?<QRRMM%///d####r=   rc   )rQ   rR   rS   r
   rT   r-   rU   rV   r   r   rN   r   tupler   r   rW   rX   s   @r<   rv   rv      s       AC 
 
 
z 
3 
 
 
 
 
 
6( (%, ( ( ( ( .2 < l 2D8	
 |d* 
+	+   *&HU33D-E$F &3s8 & & & & & & & &r=   rv   c                       e Zd Zdddedef fdZdej        dej        fdZ	 	 ddej        d
ej        de	d	z  dej        d	z  dej        e	z  f
dZ
dej        dej        d	z  fdZdeeeej        f                  dee         fdZ xZS )GPTNeoXForCausalLMr!   ri   rw   r%   c          	         t                                                       |j        j        }|j        }|| _        || _        t          |t          |d                    | _        t          |j
        |j        |t          |d                    | _        | j        j        r| j        j        j        | j        _        t!          |j
                  | _        | j        j        | _        d S )Ngpt_neox)rw   r%   	embed_outr\   )r,   r-   r{   r|   r$   r"   rv   r   r   r   r}   r0   r   tie_word_embeddingsr~   weightr   logits_processorr   )r8   rw   r%   r"   r$   r;   s        r<   r-   zGPTNeoXForCausalLM.__init__&  s    )3"/($#L,L,L
 
 
 (%44	
 
 
 ;* 	B$(M$:$ADN! /0A B BM9 	,,,r=   r   r@   c                 6    | j                             |          S rc   )r   r   r   s     r<   r   z"GPTNeoXForCausalLM.embed_input_ids<  s    },,Y777r=   N	positionsr   r   c                 6    |                      ||||          }|S rc   )r   )r8   r   r   r   r   r?   s         r<   rN   zGPTNeoXForCausalLM.forward?  s)     y"6
 
 r=   r?   c                 <    |                      | j        |          }|S rc   )r   r   )r8   r?   logitss      r<   compute_logitsz!GPTNeoXForCausalLM.compute_logitsK  s      &&t~}EEr=   r   c                 J    t          |           }|                    |          S rc   )r   r   )r8   r   loaders      r<   r   zGPTNeoXForCausalLM.load_weightsR  s#    "4((""7+++r=   )NN)rQ   rR   rS   r
   rT   r-   rU   rV   r   r   rN   r   r   r   r   r   rW   rX   s   @r<   r   r   %  sN       AC 
 
 
z 
3 
 
 
 
 
 
,8 8%, 8 8 8 8 <@-1
 
<
 <
 2D8	

 |d*
 
+	+
 
 
 
| 
	   ,HU33D-E$F ,3s8 , , , , , , , ,r=   r   )4__doc__collections.abcr   	itertoolsr   rU   r   transformersr   vllm.attention.layerr   vllm.compilation.decoratorsr   vllm.configr	   r
   vllm.distributedr   r   %vllm.model_executor.layers.activationr   !vllm.model_executor.layers.linearr   r   r   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr   +vllm.model_executor.layers.rotary_embeddingr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr   vllm.sequencer   
interfacesr   utilsr   r   r   r   r   Moduler    rZ   re   rv   r    r=   r<   <module>r      s  ( I H $ $ $ $ $ $              & & & & & & * * * * * * = = = = = = / / / / / / / / O O O O O O O O < < < < < <         
 H G G G G G F F F F F F @ @ @ @ @ @        P O O O O O - - - - - - " " " " " "             ; ; ; ; ;ry ; ; ;|       :. . . . .29 . . .b Z Z Z Z Z29 Z Z Zz/, /, /, /, /,J /, /, /, /, /,r=   