
    .`i1                        d Z ddlmZ ddlmZ ddlZddlmZ ddlmZ ddl	m
Z
 ddlmZ dd	lmZmZ dd
lmZmZ ddlmZ ddlmZmZmZ ddlmZ ddlmZ ddlmZ ddlm Z m!Z! ddl"m#Z#m$Z$ ddl%m&Z& ddl'm(Z( ddl)m*Z*m+Z+m,Z,m-Z-m.Z.  G d dej/                  Z0 G d dej/                  Z1 G d dej/                  Z2e G d dej/                              Z3 G d dej/        e(          Z4dS ) z?Inference-only GPT-J model compatible with HuggingFace weights.    )Iterable)isliceN)nn)
GPTJConfig)	Attention)support_torch_compile)CacheConfig
VllmConfig)get_pp_group$get_tensor_model_parallel_world_size)
get_act_fn)ColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)get_rope)ParallelLMHeadVocabParallelEmbedding)default_weight_loadermaybe_remap_kv_scale_name)IntermediateTensors   )
SupportsPP)AutoWeightsLoaderis_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefixc            	       v     e Zd Z	 	 	 ddededz  dedz  def fdZdej	        d	ej	        d
ej	        fdZ
 xZS )GPTJAttentionN configcache_configquant_configprefixc           	         t                                                       |j        | _        |j        | _        | j        | j        z  | _        t          |j        | j        | j        d|| d          | _        t          |j        |j        d|| d          | _	        t                      }| j        |z  dk    sJ | j        |z  | _        | j        dz  }t          |dd          sJ |j        d	z  dk    sJ t          |d
i           }|j        | j        z  |d<   t          |dd          }t          | j        ||d          | _        t!          | j        | j        |||| d          | _        d S )NFz	.qkv_projbiasr%   r&   z	.out_projr   g      rotaryT   rope_parameterspartial_rotary_factormax_position_embeddingsi    )max_positionr,   is_neox_style.attn)r$   r%   r&   )super__init__num_attention_headstotal_num_headshidden_size	head_sizer   qkv_projr   out_projr   	num_headsgetattr
rotary_dimr   
rotary_embr   attn)
selfr#   r$   r%   r&   tp_world_sizescalingr,   r.   	__class__s
            t/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/gpt_j.pyr3   zGPTJAttention.__init__?   s    	%9!-)T-AA)N %'''
 
 
 *%'''
 
 
 =>>#m3q8888->.$&vx..... 1$))))!&*;R@@393Dt~3U/0")&2KT"R"R"N0+	
 
 
 NN%%###
 
 
			    position_idshidden_statesreturnc                     |                      |          \  }}|                    dd          \  }}}|                     |||          \  }}|                     |||          }|                     |          \  }}|S )N   )chunksdim)r8   chunkr=   r>   r9   )	r?   rE   rF   qkv_qkvattn_outputs	            rC   forwardzGPTJAttention.forwardt   s}    
 }--Q))1")--1a|Q221ii1a(({33QrD   NNr"   __name__
__module____qualname__r   r	   r   strr3   torchTensorrT   __classcell__rB   s   @rC   r!   r!   >   s         ,0263
 3
3
 "D(3
 )4/	3

 3
 3
 3
 3
 3
 3
j
l
 |
 
	
 
 
 
 
 
 
 
rD   r!   c            	       `     e Zd Z	 	 ddedededz  def fdZdej	        d	ej	        fd
Z
 xZS )GPTJMLPNr"   intermediate_sizer#   r%   r&   c                     t                                                       |j        }t          |||| d          | _        t          |||| d          | _        t          |j                  | _	        d S )Nz.fc_in)r%   r&   z.fc_out)
r2   r3   n_embdr   fc_inr   fc_outr   activation_functionact)r?   ra   r#   r%   r&   r6   rB   s         rC   r3   zGPTJMLP.__init__   s     	m)%$$$	
 
 

 (%%%%	
 
 
 f899rD   rF   rG   c                     |                      |          \  }}|                     |          }|                     |          \  }}|S N)rd   rg   re   )r?   rF   rO   s      rC   rT   zGPTJMLP.forward   sE    ::m44q//;;}55qrD   )Nr"   )rW   rX   rY   intr   r   rZ   r3   r[   r\   rT   r]   r^   s   @rC   r`   r`      s        
 37: :: : )4/	:
 : : : : : :.U\ el        rD   r`   c            	       v     e Zd Z	 	 	 ddededz  dedz  def fdZdej	        d	ej	        d
ej	        fdZ
 xZS )	GPTJBlockNr"   r#   r$   r%   r&   c                 .   t                                                       |j        
d|j        z  n|j        }t	          j        |j        |j                  | _        t          |||| d          | _	        t          |||| d          | _        d S )N   epsr1   r&   z.mlp)r2   r3   n_innerrc   r   	LayerNormlayer_norm_epsilonln_1r!   r>   r`   mlp)r?   r#   r$   r%   r&   	inner_dimrB   s         rC   r3   zGPTJBlock.__init__   s     	)/)?A%%V^	LF4MNNN	!L,&7G7G7G
 
 
	 9flf???SSSrD   rE   rF   rG   c                     |}|                      |          }|                     ||          }|                     |          }||z   |z   }|S )N)rE   rF   )ru   r>   rv   )r?   rE   rF   residualrS   
mlp_outputs         rC   rT   zGPTJBlock.forward   s`    
 !		-00ii%'   
 
 XXm,,
#j08;rD   rU   rV   r^   s   @rC   rl   rl      s         ,026T TT "D(T )4/	T
 T T T T T Tl | 
	       rD   rl   c                        e Zd Zdddedef fdZdej        dej        fdZ	 ddej        d
ej        de	d	z  dej        d	z  dej        e	z  f
dZ
deeeej        f                  dee         fdZ xZS )	GPTJModelr"   rq   vllm_configr&   c                   t                                                       |j        j        |j        |j        | _        | _        j        | _        t          j
        | j                  | _        t          j        fd| d          \  | _        | _        | _        t#          j        | j        j                  | _        t+          dgj                  | _        d S )Nc                 *    t          |           S )Nrq   )rl   )r&   r$   r#   r%   s    rC   <lambda>z$GPTJModel.__init__.<locals>.<lambda>   s    9V\<PVWWW rD   z.hrq   ro   rF   )r2   r3   model_config	hf_configr$   r%   r#   rc   	embed_dimr   
vocab_sizewter   n_layerstart_layer	end_layerhr   rs   rt   ln_fr   make_empty_intermediate_tensors)r?   r}   r&   r$   r#   r%   rB   s      @@@rC   r3   zGPTJModel.__init__   s    )3"/"/()N
 
 4?NWWWWWW===4
 4
 4
0$.$&
 LV5NOOO	/Vv}0
 0
,,,rD   	input_idsrG   c                 ,    |                      |          S ri   )r   r?   r   s     rC   embed_input_idszGPTJModel.embed_input_ids   s    xx	"""rD   NrE   intermediate_tensorsinputs_embedsc                 B   t                      j        r||}n|                     |          }n|d         }t          | j        | j        | j                  D ]} |||          }t                      j        st          d|i          S | 	                    |          }|S )NrF   )
r   is_first_rankr   r   r   r   r   is_last_rankr   r   )r?   r   rE   r   r   rF   layers          rC   rT   zGPTJModel.forward   s     >>' 	B( - $ 4 4Y ? ?0AMDFD$4dnEE 	? 	?E!E,>>MM~~* 	I&'GHHH		-00rD   weightsc                 t   g d}t          |                                           }t                      }|D ]\  }}d|v sd|v r| j        ~| j                            |          x}rb||         }t          |dt                    }	|                                dk    r|n|d         } |	||           |                    |           |D ]i\  }
}}||vr|	                    ||
          }|
                    d          r||vr;t          ||           rL||         }|j        }	 |	|||            nkt          ||          }||
                    d          r||vr/t          ||           rA||         }t          |dt                    }	 |	||           |                    |           |S )N))r8   q_projrP   )r8   k_projrQ   )r8   v_projrR   )gate_up_proj	gate_projr   )r   up_projr   z	attn.biaszattn.masked_biasweight_loaderr   z.bias)dictnamed_parameterssetr%   get_cache_scaler;   r   rL   addreplaceendswithr   r   r   )r?   r   stacked_params_mappingparams_dictloaded_paramsnameloaded_weight
scale_nameparamr   
param_nameweight_nameshard_ids                rC   load_weightszGPTJModel.load_weights   s-   "
 "
 "
 4002233"%%%#* *	$ *	$D-d""&8D&@&@ ,"/??EEE
 - $J/ '@U V V%2%6%6%8%8A%=%=MM=QRCS  e]333!!*---5K 4 41
Kd**||K<<==)) d+.E.E*466 #D) % 3e]H===0{CC<==)) d+.E.E*466 #D) '@U V Ve]333d####rD   ri   )rW   rX   rY   r
   rZ   r3   r[   r\   r   r   rT   r   tupler   r   r]   r^   s   @rC   r|   r|      s       AC 
 
 
z 
3 
 
 
 
 
 
0# #%, # # # # .2 < l 2D8	
 |d* 
+	+   *6HU33D-E$F 63s8 6 6 6 6 6 6 6 6rD   r|   c                       e Zd Zdddedef fdZdej        dej        fdZ	 	 ddej        d
ej        de	d	z  dej        d	z  dej        e	z  f
dZ
dej        dej        d	z  fdZdeeeej        f                  dee         fdZ xZS )GPTJForCausalLMr"   rq   r}   r&   c          
         t                                                       |j        j        }|j        }|| _        || _        |j        rJ t          |t          |d                    | _	        t          |j        |j        d|t          |d                    | _        t          |j                  | _        | j	        j        | _        d S )Ntransformer)r}   r&   Tlm_headr(   )r2   r3   r   r   r%   r#   tie_word_embeddingsr|   r   r   r   r   rc   r   r   logits_processorr   )r?   r}   r&   r#   r%   rB   s        rC   r3   zGPTJForCausalLM.__init__,  s    )3"/(----$#L,O,O
 
 
 &M%	22
 
 
 !00A B B< 	,,,rD   r   rG   c                 6    | j                             |          S ri   )r   r   r   s     rC   r   zGPTJForCausalLM.embed_input_idsB  s    //	:::rD   N	positionsr   r   c                 6    |                      ||||          }|S ri   )r   )r?   r   r   r   r   rF   s         rC   rT   zGPTJForCausalLM.forwardE  s+     ((y"6
 
 rD   rF   c                 R    |                      | j        || j        j                  }|S ri   )r   r   r)   )r?   rF   logitss      rC   compute_logitszGPTJForCausalLM.compute_logitsQ  s'     &&t|]DLDUVVrD   r   c                 J    t          |           }|                    |          S ri   )r   r   )r?   r   loaders      rC   r   zGPTJForCausalLM.load_weightsX  s#    "4((""7+++rD   )NN)rW   rX   rY   r
   rZ   r3   r[   r\   r   r   rT   r   r   r   r   r   r]   r^   s   @rC   r   r   +  sN       AC 
 
 
z 
3 
 
 
 
 
 
,; ;%, ; ; ; ; <@-1
 
<
 <
 2D8	

 |d*
 
+	+
 
 
 
| 
	   ,HU33D-E$F ,3s8 , , , , , , , ,rD   r   )5__doc__collections.abcr   	itertoolsr   r[   r   transformersr   vllm.attention.layerr   vllm.compilation.decoratorsr   vllm.configr	   r
   vllm.distributedr   r   %vllm.model_executor.layers.activationr   !vllm.model_executor.layers.linearr   r   r   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr   +vllm.model_executor.layers.rotary_embeddingr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr   r   vllm.sequencer   
interfacesr   utilsr   r   r   r   r   Moduler!   r`   rl   r|   r    rD   rC   <module>r      s  ( F E $ $ $ $ $ $              # # # # # # * * * * * * = = = = = = / / / / / / / / O O O O O O O O < < < < < <         
 H G G G G G F F F F F F @ @ @ @ @ @               . - - - - - " " " " " "             @ @ @ @ @BI @ @ @F    bi   >    	   @ g g g g g	 g g gT/, /, /, /, /,bi /, /, /, /, /,rD   