
    .`i)@                     ,   d dl mZ d dlZd dlmZ d dlmZ d dlmZ d dlm	Z	 d dl
mZmZ d dlmZ d d	lmZ d d
lmZmZ d dlmZ d dlmZ d dlmZmZmZ d dlmZ d dlmZ d dl m!Z! d dl"m#Z# d dl$m%Z% ddl&m'Z' ddl(m)Z)m*Z* ddl+m,Z,m-Z-m.Z.  G d dej/                  Z0 G d dej/                  Z1 G d dej/                  Z2 G d dej/                  Z3 G d d ej/                  Z4e	 e*d!"           G d# d$ej/                                          Z5 G d% d&e          Z6 e*d!"           G d' d(ej/        e'                      Z7 G d) d*ej/                  Z8 e)d+           e*d,-           G d. d/ej/                                          Z9dS )0    )IterableN)nn)ModernBertConfig)ACT2FN)support_torch_compile)ModelConfig
VllmConfig)$get_tensor_model_parallel_world_size)EncoderOnlyAttention)QKVParallelLinearRowParallelLinear)DispatchPooler)LambdaPoolerActivation)EmbeddingPoolerHeadSequencePoolerget_seq_pooling_method)pooler_for_token_classify)get_rope)VocabParallelEmbedding)default_weight_loader)IntermediateTensors   )SupportsCrossEncoding)	attn_typedefault_pooling_type)AutoWeightsLoaderWeightsMappermaybe_prefixc                        e Zd Zdef fdZdej        dej        fdZ	 d	dej        dej        dz  dej        fdZ xZ	S )
ModernBertEmbeddingsconfigc                 (   t                                                       || _        t          |j        |j                  | _        t          |dd           pt          |dd           pd}t          j	        |j        ||j
                  | _        d S )Nnorm_epslayer_norm_epsh㈵>epsbias)super__init__r!   r   
vocab_sizehidden_sizetok_embeddingsgetattrr   	LayerNorm	norm_biasnorm)selfr!   r'   	__class__s      y/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/vllm/model_executor/models/modernbert.pyr*   zModernBertEmbeddings.__init__$   s    4v1
 
 FJ-- v/66 	
 L!36CSTTT			    	input_idsreturnc                 ,    |                      |          S N)r-   r2   r6   s     r4   embed_input_idsz$ModernBertEmbeddings.embed_input_ids1   s    ""9---r5   Ninputs_embedsc                     ||                      |          S |                     |          }|                      |          }|S r9   )r1   r-   )r2   r6   r<   
embeddingss       r4   forwardzModernBertEmbeddings.forward4   sF    
 $99]+++ //	::M=11Jr5   r9   )
__name__
__module____qualname__r   r*   torchTensorr;   r?   __classcell__r3   s   @r4   r    r    #   s        U/ U U U U U U. .%, . . . . .2
 
<
 |d*
 
	
 
 
 
 
 
 
 
r5   r    c                   h     e Zd Z	 ddededz  def fdZdej        dej        d	ej        fd
Z	 xZ
S )ModernBertAttentionN r!   layer_idprefixc                    t                                                       || _        |j        | _        t	                      }|| _        |j        | _        |j        | _        | j        |z  dk    sJ |j        |j        z  | _	        | j	        | j        z  | _
        | j	        dz  | _        t          |j        | j	        | j        |j        | d          | _        t          |dd           x}r(||         }|j        |         }d }|dk    r
|j        dz  }n<d }||j        z  dk    r |j        dz  }|j        |j        n|j        }	n|j        }	d|	d	}t+          | j	        |j        |t.          j        
          | _        t5          | j        | j	        | j        | d|          | _        t9          |j        |j        |j        | d          | _        d S )Nr   g      z.Wqkvr(   rK   layer_typessliding_attention   default)	rope_type
rope_theta)	head_sizemax_positionrope_parametersdtype.attn)rK   per_layer_sliding_window.Wo)r)   r*   r!   r,   r
   rJ   deterministic_flash_attnnum_attention_heads	num_headshead_dimall_head_sizescalingr   attention_biasWqkvr.   rV   local_attentionglobal_attn_every_n_layerslocal_rope_thetaglobal_rope_thetar   max_position_embeddingsrC   float16
rotary_embr   attnr   Wo)r2   r!   rJ   rK   tp_sizerN   
layer_typerV   sliding_windowrS   r3   s             r4   r*   zModernBertAttention.__init__B   s    	!-688 (.(G%3~'1,,,,*f.HH!]T^;}d*%MN&###
 
 
	 "&->>>; 	Q$X.J$4Z@O)-N000!'!71!< "N&;;q@@!'!71!< .: ++1 
 $5
,5ZPPO"m7+-	
 
 
 )NML%%%%3
 
 
	 $&>>>	
 
 
r5   hidden_statesposition_idsr7   c                    |                      |          \  }}|                    | j        gdz  d          \  }}}|                     |||          \  }}|                     |||          }|}|                     |          \  }}|S )N   dim)rb   splitr_   ri   rj   rk   )	r2   ro   rp   qkv_qkvattn_outputss	            r4   r?   zModernBertAttention.forward   s    
 =))Q))T/014")==1a|Q221yyAq))$77=11qr5   )NrI   )r@   rA   rB   r   intstrr*   rC   rD   r?   rE   rF   s   @r4   rH   rH   A   s        SU=
 =
&=
25*=
MP=
 =
 =
 =
 =
 =
~| l 
	       r5   rH   c                   N     e Zd Zddedef fdZdej        dej        fdZ xZ	S )	ModernBertMLPrI   r!   rK   c                 P   t                                                       || _        t          j        |j        t          |j                  dz  |j                  | _	        t          j
                    | _        t          |j        |j        |j        | d          | _        d S )NrP   r(   rZ   rM   )r)   r*   r!   r   Linearr,   r}   intermediate_sizemlp_biasWiGELUactr   rk   )r2   r!   rK   r3   s      r4   r*   zModernBertMLP.__init__   s    )F$< = = A
 
 
 799#$>>>	
 
 
r5   ro   r7   c                     |                      |                              dd          \  }}|                     |                     |          |z            d         S )NrP   rs   rt   r   )r   chunkrk   r   )r2   ro   inputgates       r4   r?   zModernBertMLP.forward   sP    ggm,,221"2==twwtxx-..q11r5   rI   )
r@   rA   rB   r   r~   r*   rC   rD   r?   rE   rF   s   @r4   r   r      sw        
 
/ 
 
 
 
 
 
 
2U\ 2el 2 2 2 2 2 2 2 2r5   r   c                   h     e Zd Z	 ddedededz  f fdZdej        dej        d	ej        fd
Z	 xZ
S )ModernBertLayerrI   Nr!   rK   rJ   c                    t                                                       || _        |dk    rt          j                    | _        n+t          j        |j        |j        |j	                  | _        t          ||| d          | _        t          j        |j        |j        |j	                  | _        t          || d          | _        d S )Nr   r&   rX   r!   rJ   rK   z.mlprK   )r)   r*   r!   r   Identity	attn_normr/   r,   r#   r0   rH   rj   mlp_normr   mlp)r2   r!   rK   rJ   r3   s       r4   r*   zModernBertLayer.__init__   s     	q==[]]DNN\"f>N  DN (H5E5E5E
 
 
	 FO&:J
 
 
 !6@@@r5   ro   rp   r7   c                     |                      |                     |          |          }||z   }|                     |                     |                    }||z   }|S )Nro   rp   )rj   r   r   r   )r2   ro   rp   r|   
mlp_outputs        r4   r?   zModernBertLayer.forward   sf    
 yy..77l ! 
 
 &4XXdmmM::;;
%
2r5   )rI   N)r@   rA   rB   r   r~   r}   r*   rC   rD   r?   rE   rF   s   @r4   r   r      s        QUA A&A03ADG$JA A A A A A&| l 
	       r5   r   c                   \     e Zd Zd	dedef fdZdej        dej        dej        fdZ xZ	S )
ModernBertEncoderLayerrI   vllm_configrK   c                     t                                                       |j        j        t	          j        fdt          j                  D                       | _        d S )Nc           	      >    g | ]}t          | d |           S )z.layers.r   )r   ).0rJ   r!   rK   s     r4   
<listcomp>z3ModernBertEncoderLayer.__init__.<locals>.<listcomp>   sQ          !%$88h88    r5   )	r)   r*   model_config	hf_configr   
ModuleListrangenum_hidden_layerslayersr2   r   rK   r!   r3   s     `@r4   r*   zModernBertEncoderLayer.__init__   su    )3m     !&f&> ? ?  	
 	
r5   ro   rp   r7   c                 R    t          | j                  D ]\  }} |||          }|S r9   )	enumerater   )r2   ro   rp   ilayers        r4   r?   zModernBertEncoderLayer.forward   s:    
 "$+.. 	? 	?HAu!E->>MMr5   r   )
r@   rA   rB   r	   r~   r*   rC   rD   r?   rE   rF   s   @r4   r   r      s        
 
J 
 
 
 
 
 
 
| l 
	       r5   r   CLS)seq_pooling_typec                       e Zd Z eddi          Z	 ddedef fdZdej	        d	ej	        fd
Z
deeeej	        f                  d	ee         fdZ	 	 ddej	        dej	        dedz  dej	        dz  d	ej	        f
dZ xZS )ModernBertModelzlayers.zencoder_layer.layers.)orig_to_new_prefixrI   r   rK   c                    t                                                       |j        j        }|| _        t          |          | _        t          || d          | _        t          j
        |j        |j        |j                  | _        d S )Nz.encoder_layerr   r&   )r)   r*   r   r   r!   r    r>   r   encoder_layerr   r/   r,   r#   r0   
final_normr   s       r4   r*   zModernBertModel.__init__   s    
 	)3.v6636 9 9 9
 
 
 ,FO&:J
 
 
r5   r6   r7   c                 6    | j                             |          S r9   )r>   r;   r:   s     r4   r;   zModernBertModel.embed_input_ids   s    ..y999r5   weightsc                 Z   | j                             |          }t          |                                           }t	                      }|D ]^\  }}|                    d          r||vr||         }t          |dt                    } |||           |                    |           _|S )Nz.biasweight_loader)	hf_to_vllm_mapperapplydictnamed_parameterssetendswithr.   r   add)r2   r   params_dictloaded_paramsnameloaded_weightparamr   s           r4   load_weightszModernBertModel.load_weights   s    (..w774002233"%%%#* 	$ 	$D-}}W%% $k*A*A%E#E?<QRRMM%///d####r5   N	positionsintermediate_tensorsr<   c                     ||}n|                      ||          }|                     ||          }|                     |          }|S )N)r6   r<   r   )r>   r   r   )r2   r6   r   r   r<   ro   outputsnorm_outputss           r4   r?   zModernBertModel.forward  si     $)MM OO#= ,  M $$'" % 
 
 w//r5   r   NN)r@   rA   rB   r   r   r	   r~   r*   rC   rD   r;   r   tupler   r   r   r?   rE   rF   s   @r4   r   r      s<        &%'>?   
 

 
 
 
 
 
 
 : :%, : : : :HU33D-E$F 3s8    " <@-1 < < 2D8	
 |d* 
       r5   r   c                   $     e Zd Zdef fdZ xZS )ModernBertPoolerr   c                 $    |j         }|J |j        }|j                                        }t	                                          t          |          t          j                               |j	        }t          j
        |j        |j        |j        |           _        t          j                     _        t          j        |j        |j        |j        |           _        t)          | fdt+           fd                     _        d S )N)poolingheadrW   )r'   r(   rW   c                 .                         |           S r9   )densexr2   s    r4   <lambda>z+ModernBertPooler.__init__.<locals>.<lambda>>  s    

1 r5   c                 T                                             |                     S r9   )r1   r   r   s    r4   r   z+ModernBertPooler.__init__.<locals>.<lambda>?  s    		$((1++8N8N r5   )
head_dtype	projector
activation)pooler_configr   classifier_poolingupperr)   r*   r   r   r   r   r   r,   classifier_biasr   r   r   r/   r#   r0   r1   r   r   r   )r2   r   r   r!   hf_pooling_typer   r3   s   `     r4   r*   zModernBertPooler.__init__  s   $2(((#/#9 399;;
 	*?;; 	 	
 	
 	
 ",
Y"	
 
 

 799L!	
 
 
	 (!-----.N.N.N.NOO
 
 
			r5   )r@   rA   rB   r   r*   rE   rF   s   @r4   r   r     sD        $
[ $
 $
 $
 $
 $
 $
 $
 $
 $
 $
r5   r   c                        e Zd ZdZdddedef fdZdej        dej        fd	Z	d
e
eeej        f                  fdZ	 	 ddej        dz  dej        dedz  dej        dz  dej        f
dZ xZS )#ModernBertForSequenceClassificationTrI   r   r   rK   c                   t                                                       |j        j        }|| _        t          |t          |d                    | _        t          j	        |j
        |j        |j        j                  | _        |j        j        }|J t          |j                  | _        t#          j        || j        | j                  | _        d S )N
modernbertr   rK   r   )r   
classifier)r)   r*   r   r   r!   r   r   modelr   r   r,   
num_labelsr   r   r   r   r   r   for_seq_clspoolerr2   r   rK   r!   r   r3   s        r4   r*   z,ModernBertForSequenceClassification.__init__G  s    )3$#L,N,N
 
 

 )*5
 
 
 $0>((('(@AA$0L
 
 
r5   r6   r7   c                 6    | j                             |          S r9   r   r;   r:   s     r4   r;   z3ModernBertForSequenceClassification.embed_input_idsa      z)))444r5   r   c                    g fd}| j                              |                       t          |                                           }D ]\  }}|                    d          r*||         }t          |dt                    } |||           |                    d          rE|d|t          d          dz   d          z            }t          |dt                    } |||           d S )Nc               3      K   D ]M\  } }|                      d          r| t          d          d          |fV  6                    | |f           Nd S )Nzmodel.)
startswithlenappend)r   weightself_weightsr   s     r4   weight_filterzGModernBertForSequenceClassification.load_weights.<locals>.weight_filterg  sv       ' 8 8f??8,, 8s8}}/77777 ''v7777	8 8r5   r   r   r   zpooling.r   )r   r   r   r   r   r.   r   r   )	r2   r   r   r   r   r   r   r   r   s	    `      @r4   r   z0ModernBertForSequenceClassification.load_weightsd  s   	8 	8 	8 	8 	8 	8 	
0004002233#/ 	4 	4D-|,, 4#D) '@U V Ve]333v&& 4#Jc&kkAo6G6G1H$HI '@U V Ve]333	4 	4r5   Nr   r   r<   c                 2    |                      |||          S )N)r6   r<   r   )r   )r2   r6   r   r   r<   s        r4   r?   z+ModernBertForSequenceClassification.forward|  s(     zz'  
 
 	
r5   r   )r@   rA   rB   is_pooling_modelr	   r~   r*   rC   rD   r;   r   r   r   
LongTensorr   r?   rE   rF   s   @r4   r   r   C  s       AC 
 
 
z 
3 
 
 
 
 
 
45 5%, 5 5 5 54HU33D-E$F 4 4 4 48 <@-1
 
#d*
 <
 2D8	

 |d*
 

 
 
 
 
 
 
 
r5   r   c                   B     e Zd Z fdZdej        dej        fdZ xZS )ModernBertPredictionHeadc           	      X   t                                                       || _        t          j        |j        |j        |j                  | _        t          |j	                 | _
        t          j        |j        t          |dd          t          |dd                    | _        d S )Nr   r#   r%   r0   Tr&   )r)   r*   r!   r   r   r,   r   r   r   classifier_activationr   r/   r.   r1   )r2   r!   r3   s     r4   r*   z!ModernBertPredictionHead.__init__  s    Y 29O
 
 

 &67L
D11d33
 
 
			r5   ro   r7   c                 x    |                      |                     |                     |                              S r9   )r1   r   r   )r2   ro   s     r4   r?   z ModernBertPredictionHead.forward  s,    yy$**]";";<<===r5   )r@   rA   rB   r*   rC   rD   r?   rE   rF   s   @r4   r   r     s^        
 
 
 
 
>U\ >el > > > > > > > >r5   r   encoder_onlyALL)tok_pooling_typec                        e Zd ZdZdddedef fdZdej        dej        fd	Z	d
e
eeej        f                  fdZ	 	 ddej        dz  dej        dedz  dej        dz  dej        f
dZ xZS ) ModernBertForTokenClassificationTrI   r   r   rK   c                   t                                                       |j        j        }|j        j        | _        |j        | _        t          |t          |d                    | _        t          |          | _
        t          j        |j        |j        | j                  | _        |j        j        }|J t!          |          | _        d S )Nr   r   r   )r)   r*   r   r   r   r   r   r   r   r   r   r   r   r,   r   r   r   r   r   s        r4   r*   z)ModernBertForTokenClassification.__init__  s    )3%2= +$#L,N,N
 
 

 -V44	) 1
 
 
 $0>(((/>>r5   r6   r7   c                 6    | j                             |          S r9   r   r:   s     r4   r;   z0ModernBertForTokenClassification.embed_input_ids  r   r5   r   c                 T    t          | dg          }|                    |          }|S )Ndrop)skip_prefixes)r   r   )r2   r   loaderr   s       r4   r   z-ModernBertForTokenClassification.load_weights  s/    "4x@@@++G44r5   Nr   r   r<   c                     |                      ||||          }|                     |          }|                    | j                  }|                     |          S )N)r6   r   r<   r   )r   r   tor   r   )r2   r6   r   r   r<   ro   s         r4   r?   z(ModernBertForTokenClassification.forward  sb     

'!5	 # 
 
 		-00%((99}---r5   r   )r@   rA   rB   r   r	   r~   r*   rC   rD   r;   r   r   r   r   r?   rE   rF   s   @r4   r  r    s        AC ? ? ?z ?3 ? ? ? ? ? ?$5 5%, 5 5 5 5HU33D-E$F     <@-1. .<$&. <. 2D8	.
 |d*. 
. . . . . . . .r5   r  ):collections.abcr   rC   r   transformersr   transformers.activationsr   vllm.compilation.decoratorsr   vllm.configr   r	   vllm.distributedr
   ;vllm.model_executor.layers.attention.encoder_only_attentionr   !vllm.model_executor.layers.linearr   r   !vllm.model_executor.layers.poolerr   -vllm.model_executor.layers.pooler.activationsr   )vllm.model_executor.layers.pooler.seqwiser   r   r   )vllm.model_executor.layers.pooler.tokwiser   +vllm.model_executor.layers.rotary_embeddingr   3vllm.model_executor.layers.vocab_parallel_embeddingr   -vllm.model_executor.model_loader.weight_utilsr   vllm.sequencer   
interfacesr   interfaces_baser   r   utilsr   r   r   Moduler    rH   r   r   r   r   r   r   r   r   r5   r4   <module>r      s   % $ $ $ $ $        ) ) ) ) ) ) + + + + + + = = = = = = / / / / / / / / A A A A A A      S R R R R R R R < < < < < < P P P P P P         
 P O O O O O @ @ @ @ @ @ V V V V V V O O O O O O - - - - - - - - - - - - < < < < < < < < A A A A A A A A A A    29   <K K K K K") K K K\2 2 2 2 2BI 2 2 2(    bi   D    RY   2 u---8 8 8 8 8bi 8 8 .- 8v%
 %
 %
 %
 %
~ %
 %
 %
P u---C
 C
 C
 C
 C
")5J C
 C
 .-C
L> > > > >ry > > >$ >u---,. ,. ,. ,. ,.ry ,. ,. .- ,. ,. ,.r5   