
    .`i^6                     f   d Z ddlmZ ddlmZ ddlZddlmZ ddlmZ ddl	m
Z
 ddlmZ dd	lmZmZ dd
lmZmZ ddlmZ ddlmZmZmZ ddlmZ ddlmZ ddlmZ ddlm Z m!Z! ddl"m#Z# ddl$m%Z% ddl&m'Z'm(Z( ddl)m*Z*m+Z+m,Z,m-Z-m.Z.  G d dej/                  Z0 G d dej/                  Z1 G d dej/                  Z2e G d dej/                              Z3 G d dej/        e(          Z4 G d  d!ej/        e'          Z5d"ee6e7ej8        f                  d#ee6e7ej8        f                  fd$Z9dS )%z?Inference-only GPT-2 model compatible with HuggingFace weights.    )Iterable)isliceN)nn)
GPT2Config)	Attention)support_torch_compile)CacheConfig
VllmConfig)get_pp_group$get_tensor_model_parallel_world_size)
get_act_fn)ColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessor)DispatchPooler)QuantizationConfig)ParallelLMHeadVocabParallelEmbedding)default_weight_loader)IntermediateTensors   )SupportsCrossEncoding
SupportsPP)AutoWeightsLoaderis_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefixc            	       h     e Zd Z	 	 	 ddededz  dedz  def fdZdej	        d	ej	        fd
Z
 xZS )GPT2AttentionN configcache_configquant_configprefixc           	         t                                                       |j        | _        |j        }t	                      }||z  dk    sJ ||z  | _        | j        |z  | _        | j        dz  | _        t          | j        | j        |d|| d          | _	        t          | j        | j        d|| d          | _        t          | j        | j        | j        ||| d          | _        d S )	Nr   g      Tz.c_attnbiasr%   r&   .c_proj.attn)scaler$   r%   r&   )super__init__hidden_sizenum_attention_headsr   	num_headshead_dimr,   r   c_attnr   c_projr   attn)selfr#   r$   r%   r&   total_num_heads tensor_model_parallel_world_size	__class__s          s/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/gpt2.pyr.   zGPT2Attention.__init__@   s$    	!- 4+O+Q+Q(!AAQFFFF(,LL(O;]D(
'M%%%%
 
 
 (%%%%
 
 
 NM*%%###
 
 
			    hidden_statesreturnc                     |                      |          \  }}|                    dd          \  }}}|                     |||          }|                     |          \  }}|S )N   )chunksdim)r3   chunkr5   r4   )r6   r<   qkv_qkvattn_outputs           r:   forwardzGPT2Attention.forwardh   sd     ]++Q))1")--1aii1a(([11Qr;   NNr"   __name__
__module____qualname__r   r	   r   strr.   torchTensorrJ   __classcell__r9   s   @r:   r!   r!   ?   s         ,026&
 &
&
 "D(&
 )4/	&

 &
 &
 &
 &
 &
 &
P| 
       r;   r!   c            	       `     e Zd Z	 	 ddedededz  def fdZdej	        d	ej	        fd
Z
 xZS )GPT2MLPNr"   intermediate_sizer#   r%   r&   c                     t                                                       |j        }t          ||d|| d          | _        t          ||d|| d          | _        t          |j                  | _	        d S )NTz.c_fcr(   r*   )
r-   r.   r/   r   c_fcr   r4   r   activation_functionact)r6   rW   r#   r%   r&   r/   r9   s         r:   r.   zGPT2MLP.__init__t   s     	((%###
 
 
	 (%%%%
 
 
 f899r;   r<   r=   c                     |                      |          \  }}|                     |          }|                     |          \  }}|S N)rY   r[   r4   )r6   r<   rE   s      r:   rJ   zGPT2MLP.forward   sE    99]33q//;;}55qr;   )Nr"   )rM   rN   rO   intr   r   rP   r.   rQ   rR   rJ   rS   rT   s   @r:   rV   rV   s   s        
 37: :: : )4/	:
 : : : : : :2U\ el        r;   rV   c            	       h     e Zd Z	 	 	 ddededz  dedz  def fdZdej	        d	ej	        fd
Z
 xZS )	GPT2BlockNr"   r#   r$   r%   r&   c                 h   t                                                       |j        }|j        |j        nd|z  }t	          j        ||j                  | _        t          |||| d          | _	        t	          j        ||j                  | _
        t          |||| d          | _        d S )N   epsr+   r&   z.mlp)r-   r.   r/   n_innerr   	LayerNormlayer_norm_epsilonln_1r!   r5   ln_2rV   mlp)r6   r#   r$   r%   r&   r/   	inner_dimr9   s          r:   r.   zGPT2Block.__init__   s     	(&,n&@FNNa+o	L&2KLLL	!L,&7G7G7G
 
 
	 L&2KLLL	9flf???SSSr;   r<   r=   c                     |}|                      |          }|                     |          }||z   }|}|                     |          }|                     |          }||z   }|S )N)r<   )ri   r5   rj   rk   )r6   r<   residualrI   feed_forward_hidden_statess        r:   rJ   zGPT2Block.forward   sp     !		-00iimi<<#h. 		-00%)XXm%<%<" #==r;   rK   rL   rT   s   @r:   r`   r`      s         ,026T TT "D(T )4/	T
 T T T T T T$| 
       r;   r`   c                        e Zd Zdddedef fdZdej        dej        fdZdej        d	ej        d
e	dz  dej        dz  dej        e	z  f
dZ
deeeej        f                  dee         fdZ xZS )	GPT2Modelr"   re   vllm_configr&   c                T   t                                                       |j        j        |j        |j        | _        j        rJ j        rJ j	        rJ j
        | _        t          j        | j        | d          | _        t          j        j        | j                  | _        t'          j        fd| d          \  | _        | _        | _        t          j        | j        j                  | _        t7          dgj                  | _        d S )Nz.wter%   r&   c                 *    t          |           S )Nre   )r`   )r&   r$   r#   r%   s    r:   <lambda>z$GPT2Model.__init__.<locals>.<lambda>   s    9V\<PVWWW r;   z.hre   rc   r<   )r-   r.   model_config	hf_configr$   r%   r#   add_cross_attentionscale_attn_by_inverse_layer_idxreorder_and_upcast_attnr/   	embed_dimr   
vocab_sizewter   	Embeddingmax_position_embeddingswper   num_hidden_layersstart_layer	end_layerhrg   rh   ln_fr   n_embdmake_empty_intermediate_tensors)r6   rr   r&   r$   r#   r%   r9   s      @@@r:   r.   zGPT2Model.__init__   s>   )3"/"/----99991111+)N%???	
 
 
 < >OO3>$WWWWWW===4
 4
 4
0$.$&
 LV5NOOO	/Vv}0
 0
,,,r;   	input_idsr=   c                 ,    |                      |          S r]   )r~   r6   r   s     r:   embed_input_idszGPT2Model.embed_input_ids   s    xx	"""r;   position_idsintermediate_tensorsNinputs_embedsc                 v   t                      j        r2||                     |          }|                     |          }||z   }n|J |d         }t	          | j        | j        | j                  D ]} ||          }t                      j        st          d|i          S | 
                    |          }|S )Nr<   )r   is_first_rankr   r   r   r   r   r   is_last_rankr   r   )r6   r   r   r   r   position_embedsr<   layers           r:   rJ   zGPT2Model.forward   s     >>' 	B$ $ 4 4Y ? ?"hh|44O)O;MM'3330AMDFD$4dnEE 	1 	1E!E-00MM~~* 	I&'GHHH		-00r;   weightsc                    t          |                     d                    }t                      }|D ]\  }}d|v sd|v rt          ||           r||         }dD ]1}||vr|                    d          s|                                }2t          |dt                    } |||           |                    |           |S )NF)remove_duplicatez
.attn.biasz.attn.masked_bias)r3   r4   rY   z.weightweight_loader)	dictnamed_parameterssetr   endswithtgetattrr   add)	r6   r   params_dictloaded_paramsnameloaded_weightparamconv1d_weight_namer   s	            r:   load_weightszGPT2Model.load_weights   s   400%0HHII"%%%#* 	$ 	$D-t##':d'B'B &tT22 %E 'C 2 2"%T11}}Y//  - 1 1#E?<QRRMM%///d####r;   )rM   rN   rO   r
   rP   r.   rQ   rR   r   r   rJ   r   tupler   r   rS   rT   s   @r:   rq   rq      s       AC 
 
 
z 
3 
 
 
 
 
 
:# #%, # # # #< l 2D8	
 |d* 
+	+   2HU33D-E$F 3s8        r;   rq   c                       e Zd Zdddedef fdZdej        dej        fdZ	 	 ddej        d
ej        de	d	z  dej        d	z  dej        e	z  f
dZ
dej        dej        d	z  fdZdeeeej        f                  dee         fdZ xZS )GPT2LMHeadModelr"   re   rr   r&   c                   t                                                       |j        j        }|j        }|| _        || _        t          |t          |d                    | _        t          | j        j
        | j        j        || d          | _        | j        j        r)| j                            | j        j                  | _        t!          |j
                  | _        | j        j        | _        d S )Ntransformerrr   r&   z.lm_headrt   )r-   r.   rw   rx   r%   r#   rq   r   r   r   r}   r/   lm_headtie_word_embeddingstie_weightsr~   r   logits_processorr   )r6   rr   r&   r#   r%   r9   s        r:   r.   zGPT2LMHeadModel.__init__  s    )3"/($#L,O,O
 
 
 &K"K#%&&&	
 
 
 ;* 	J<33D4D4HIIDL /0A B B< 	,,,r;   r   r=   c                 6    | j                             |          S r]   r   r   r   s     r:   r   zGPT2LMHeadModel.embed_input_ids(      //	:::r;   N	positionsr   r   c                 6    |                      ||||          }|S r]   r   r6   r   r   r   r   r<   s         r:   rJ   zGPT2LMHeadModel.forward+  s+     ((y"6
 
 r;   r<   c                 <    |                      | j        |          }|S r]   )r   r   )r6   r<   logitss      r:   compute_logitszGPT2LMHeadModel.compute_logits7  s      &&t|]CCr;   r   c                 h    t          |           }t          |          }|                    |          S r]   )r   _add_transformer_prefixr   r6   r   loaders      r:   r   zGPT2LMHeadModel.load_weights>  s0    "4(()'22""7+++r;   NN)rM   rN   rO   r
   rP   r.   rQ   rR   r   r   rJ   r   r   r   r   r   rS   rT   s   @r:   r   r     sN       AC 
 
 
z 
3 
 
 
 
 
 
.; ;%, ; ; ; ; <@-1
 
<
 <
 2D8	

 |d*
 
+	+
 
 
 
| 
	   ,HU33D-E$F ,3s8 , , , , , , , ,r;   r   c                        e Zd ZdZdZdddedef fdZdej	        d	ej	        fd
Z
deeeej	        f                  fdZ	 	 ddej	        dej	        dedz  dej	        dz  d	ej	        f
dZ xZS )GPT2ForSequenceClassificationaq  GPT2 Model for sequence classification.

    This class expands GPT2Model with pooling and score functions - last token
    is being used for classification.

    Attributes:
        transformer: An instance of GPT2Model used for forward operations.
        score: A layer for calculating logits.
        _pooler: An instance of Pooler used for pooling operations.
    Tr"   re   rr   r&   c                j   t                                                       |j        j        }t	          |t          |d                    | _        t          j        |j	        |j
        d|j        j                  | _        |j        j        }|J t          j        || j                  | _        d S )Ngpt2r   F)r)   dtype)
classifier)r-   r.   rw   rx   rq   r   r   r   Linearr   
num_labels
head_dtypescorepooler_configr   for_seq_clspooler)r6   rr   r&   r#   r   r9   s        r:   r.   z&GPT2ForSequenceClassification.__init__R  s    )3$#L,H,H
 
 
 YM*5	
 
 

 $0>((($04:VVVr;   r   r=   c                 6    | j                             |          S r]   r   r   s     r:   r   z-GPT2ForSequenceClassification.embed_input_idsd  r   r;   r   c                 J    t          |           }|                    |          S r]   )r   r   r   s      r:   r   z*GPT2ForSequenceClassification.load_weightsg  s#    "4((""7+++r;   Nr   r   r   c                 8    |                      ||||          }|S )N)r   r   r   r   r   r   s         r:   rJ   z%GPT2ForSequenceClassification.forwardk  s2     (("'!5	 ) 
 
 r;   r   )rM   rN   rO   __doc__is_pooling_modelr
   rP   r.   rQ   rR   r   r   r   r   r   rJ   rS   rT   s   @r:   r   r   D  s*       	 	 AC W W Wz W3 W W W W W W$; ;%, ; ; ; ;,HU33D-E$F , , , , <@-1 < < 2D8	
 |d* 
       r;   r   r   r=   c              #      K   | D ]:\  }}|                     d          s|                     d          sd|z   }||fV  ;d S )Nztransformer.r   )
startswith)r   r   tensors      r:   r   r   {  si          f~.. 	)ty7Q7Q 	)!D(DFl r;   ):r   collections.abcr   	itertoolsr   rQ   r   transformersr   vllm.attention.layerr   vllm.compilation.decoratorsr   vllm.configr	   r
   vllm.distributed.parallel_stater   r   %vllm.model_executor.layers.activationr   !vllm.model_executor.layers.linearr   r   r   +vllm.model_executor.layers.logits_processorr   !vllm.model_executor.layers.poolerr   'vllm.model_executor.layers.quantizationr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr   vllm.sequencer   
interfacesr   r   utilsr   r   r   r   r   Moduler!   rV   r`   rq   r   r   r   rP   rR   r    r;   r:   <module>r      sg  * F E $ $ $ $ $ $              # # # # # # * * * * * * = = = = = = / / / / / / / /        = < < < < <         
 H G G G G G < < < < < < F F F F F F        P O O O O O - - - - - - 9 9 9 9 9 9 9 9             1 1 1 1 1BI 1 1 1h    bi   B" " " " "	 " " "J S S S S S	 S S Sl1, 1, 1, 1, 1,bi 1, 1, 1,h4 4 4 4 4BI/D 4 4 4neC-./eC%&'     r;   