
     `i
                        d Z ddlZddlmZmZ ddlZddlmZ ddlmZm	Z	m
Z
 ddlmZmZ ddlmZmZmZmZmZmZmZ dd	lmZ dd
lmZmZ ddlmZmZ ddlmZ  ej         e!          Z"e G d de                      Z# G d dej$                  Z% G d dej$                  Z& G d dej$                  Z' G d dej$                  Z( G d dej$                  Z) G d dej$                  Z* G d dej$                  Z+ G d dej$                  Z,e G d  d!e#                      Z- G d" d#e#          Z. G d$ d%ej$                  Z/ ed&'           G d( d)e#                      Z0e G d* d+e#                      Z1e G d, d-e#                      Z2 G d. d/ej$                  Z3e G d0 d1e#                      Z4d2 Z5g d3Z6dS )4zPyTorch MPNet model.    N)OptionalUnion)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )ACT2FNgelu)BaseModelOutputBaseModelOutputWithPoolingMaskedLMOutputMultipleChoiceModelOutputQuestionAnsweringModelOutputSequenceClassifierOutputTokenClassifierOutput)PreTrainedModel) find_pruneable_heads_and_indicesprune_linear_layer)auto_docstringlogging   )MPNetConfigc                   $    e Zd ZU eed<   dZd ZdS )MPNetPreTrainedModelconfigmpnetc                    t          |t          j                  rT|j        j                            d| j        j                   |j         |j        j        	                                 dS dS t          |t          j
                  r_|j        j                            d| j        j                   |j        +|j        j        |j                 	                                 dS dS t          |t          j                  r?|j        j        	                                 |j        j                            d           dS t          |t                    r |j        j        	                                 dS dS )zInitialize the weightsg        )meanstdNg      ?)
isinstancer   Linearweightdatanormal_r   initializer_rangebiaszero_	Embeddingpadding_idx	LayerNormfill_MPNetLMHead)selfmodules     |/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/models/mpnet/modeling_mpnet.py_init_weightsz"MPNetPreTrainedModel._init_weights1   sX   fbi(( 	% M&&CT[5R&SSS{& &&((((( '&-- 	%M&&CT[5R&SSS!-"6#56<<>>>>> .--- 	%K""$$$M$$S))))),, 	%K""$$$$$	% 	%    N)__name__
__module____qualname__r   __annotations__base_model_prefixr1    r2   r0   r   r   ,   s7         % % % % %r2   r   c                   ,     e Zd Z fdZddZd Z xZS )MPNetEmbeddingsc                    t                                                       d| _        t          j        |j        |j        | j                  | _        t          j        |j        |j        | j                  | _	        t          j
        |j        |j                  | _
        t          j        |j                  | _        |                     dt!          j        |j                                      d          d           d S )Nr   )r*   epsposition_ids)r   F)
persistent)super__init__r*   r   r)   
vocab_sizehidden_sizeword_embeddingsmax_position_embeddingsposition_embeddingsr+   layer_norm_epsDropouthidden_dropout_probdropoutregister_buffertorcharangeexpandr.   r   	__class__s     r0   rB   zMPNetEmbeddings.__init__E   s    !|F,=v?Q_c_oppp#%<*F,>DL\$
 $
 $
  f&8f>STTTz&"<==EL)GHHOOPWXXej 	 	
 	
 	
 	
 	
r2   Nc                    |-|t          || j                  }n|                     |          }||                                }n|                                d d         }|d         }|| j        d d d |f         }||                     |          }|                     |          }||z   }|                     |          }|                     |          }|S )Nr?   r   )	"create_position_ids_from_input_idsr*   &create_position_ids_from_inputs_embedssizer>   rE   rG   r+   rK   )	r.   	input_idsr>   inputs_embedskwargsinput_shape
seq_lengthrG   
embeddingss	            r0   forwardzMPNetEmbeddings.forwardS   s    $A)TM]^^#JJ=YY #..**KK',,..ss3K ^
,QQQ^<L  00;;M"66|DD"%88
^^J//
\\*--
r2   c                    |                                 dd         }|d         }t          j        | j        dz   || j        z   dz   t          j        |j                  }|                    d                              |          S )z
        We are provided embeddings directly. We cannot infer which are padded so just generate sequential position ids.

        Args:
            inputs_embeds: torch.Tensor

        Returns: torch.Tensor
        Nr?   r   )dtypedevicer   )rU   rM   rN   r*   longr_   	unsqueezerO   )r.   rW   rY   sequence_lengthr>   s        r0   rT   z6MPNetEmbeddings.create_position_ids_from_inputs_embedsm   s     $((**3B3/%a.|q /D4D"Dq"HPUPZcpcw
 
 
 %%a((//<<<r2   )NNN)r3   r4   r5   rB   r\   rT   __classcell__rQ   s   @r0   r:   r:   D   s[        
 
 
 
 
   4= = = = = = =r2   r:   c                   .     e Zd Z fdZ	 	 	 	 ddZ xZS )MPNetSelfAttentionc                    t                                                       |j        |j        z  dk    r0t	          |d          s t          d|j         d|j         d          |j        | _        t          |j        |j        z            | _        | j        | j        z  | _        t          j
        |j        | j                  | _        t          j
        |j        | j                  | _        t          j
        |j        | j                  | _        t          j
        |j        |j                  | _        t          j        |j                  | _        d S )Nr   embedding_sizezThe hidden size (z6) is not a multiple of the number of attention heads ())rA   rB   rD   num_attention_headshasattr
ValueErrorintattention_head_sizeall_head_sizer   r"   qkvorI   attention_probs_dropout_probrK   rP   s     r0   rB   zMPNetSelfAttention.__init__   s2    ::a??PVXhHiHi?8F$6 8 8 48 8 8  
 $*#= #&v'9F<V'V#W#W !58PP6-t/ABB6-t/ABB6-t/ABB6-v/ABBz&"EFFr2   NFc                 
   |j         \  }}}	|                     |                              |d| j        | j                                      dd          }
|                     |                              |d| j        | j                                      dd          }|                     |                              |d| j        | j                                      dd          }t          j	        |
|                    dd                    }|t          j        | j                  z  }|||z  }|||z   }t          j                            |d          }|                     |          }|||z  }t          j	        ||          }|                    dddd                                          }|                                d d         | j        fz   } |j        | }|                     |          }|r||fn|f}|S )Nr?   r      dimr   r	   )shaperp   viewrj   rn   	transposerq   rr   rM   matmulmathsqrtr   
functionalsoftmaxrK   permute
contiguousrU   ro   rs   )r.   hidden_statesattention_mask	head_maskposition_biasoutput_attentionsrX   
batch_sizerZ   _rp   rq   rr   attention_scoresattention_probscnew_c_shapers   outputss                      r0   r\   zMPNetSelfAttention.forward   s    %2$7!
JFF=!!T*b$":D<TUUYq!__ 	
 FF=!!T*b$":D<TUUYq!__ 	
 FF=!!T*b$":D<TUUYq!__ 	
 !<1;;r2+>+>??+di8P.Q.QQ $-%/.@ -//0@b/II,,77 -	9OL!,,IIaAq!!,,..ffhhssmt'9&;;AFK FF1II*;E1o&&!r2   NNNFr3   r4   r5   rB   r\   rc   rd   s   @r0   rf   rf      s_        G G G G G, 6 6 6 6 6 6 6 6r2   rf   c                   4     e Zd Z fdZd Z	 	 	 	 ddZ xZS )MPNetAttentionc                    t                                                       t          |          | _        t	          j        |j        |j                  | _        t	          j        |j	                  | _
        t                      | _        d S Nr<   )rA   rB   rf   attnr   r+   rD   rH   rI   rJ   rK   setpruned_headsrP   s     r0   rB   zMPNetAttention.__init__   sj    &v..	f&8f>STTTz&"<==EEr2   c                    t          |          dk    rd S t          || j        j        | j        j        | j                  \  }}t          | j        j        |          | j        _        t          | j        j        |          | j        _        t          | j        j	        |          | j        _	        t          | j        j
        |d          | j        _
        | j        j        t          |          z
  | j        _        | j        j        | j        j        z  | j        _        | j                            |          | _        d S )Nr   r   rx   )lenr   r   rj   rn   r   r   rp   rq   rr   rs   ro   union)r.   headsindexs      r0   prune_headszMPNetAttention.prune_heads   s    u::??F7490$)2OQUQb
 
u )e<<	(e<<	(e<<	(eCCC	(,	(EE

(R	%"&)"?$)B_"_	 -33E::r2   NFc                     |                      |||||          }|                     |                     |d                   |z             }|f|dd          z   }	|	S )N)r   r   r   )r   r+   rK   )
r.   r   r   r   r   r   rX   self_outputsattention_outputr   s
             r0   r\   zMPNetAttention.forward   so     yy/ ! 
 
  >>$,,|A*G*G-*WXX#%QRR(88r2   r   )r3   r4   r5   rB   r   r\   rc   rd   s   @r0   r   r      si        " " " " "; ; ;&        r2   r   c                   B     e Zd Z fdZdej        dej        fdZ xZS )MPNetIntermediatec                    t                                                       t          j        |j        |j                  | _        t          |j        t                    rt          |j                 | _        d S |j        | _        d S N)rA   rB   r   r"   rD   intermediate_sizedenser!   
hidden_actstrr
   intermediate_act_fnrP   s     r0   rB   zMPNetIntermediate.__init__   sn    Yv163KLL
f'-- 	9'-f.?'@D$$$'-'8D$$$r2   r   returnc                 Z    |                      |          }|                     |          }|S r   )r   r   )r.   r   s     r0   r\   zMPNetIntermediate.forward  s,    

=1100??r2   r3   r4   r5   rB   rM   Tensorr\   rc   rd   s   @r0   r   r      s^        9 9 9 9 9U\ el        r2   r   c                   P     e Zd Z fdZdej        dej        dej        fdZ xZS )MPNetOutputc                    t                                                       t          j        |j        |j                  | _        t          j        |j        |j                  | _        t          j	        |j
                  | _        d S r   )rA   rB   r   r"   r   rD   r   r+   rH   rI   rJ   rK   rP   s     r0   rB   zMPNetOutput.__init__  sf    Yv79KLL
f&8f>STTTz&"<==r2   r   input_tensorr   c                     |                      |          }|                     |          }|                     ||z             }|S r   )r   rK   r+   )r.   r   r   s      r0   r\   zMPNetOutput.forward  s@    

=11]33}|'CDDr2   r   rd   s   @r0   r   r     si        > > > > >U\  RWR^        r2   r   c                   .     e Zd Z fdZ	 	 	 	 ddZ xZS )
MPNetLayerc                     t                                                       t          |          | _        t	          |          | _        t          |          | _        d S r   )rA   rB   r   	attentionr   intermediater   outputrP   s     r0   rB   zMPNetLayer.__init__  sK    '//-f55!&))r2   NFc                     |                      |||||          }|d         }|dd          }	|                     |          }
|                     |
|          }|f|	z   }	|	S )N)r   r   r   r   )r   r   r   )r.   r   r   r   r   r   rX   self_attention_outputsr   r   intermediate_outputlayer_outputs               r0   r\   zMPNetLayer.forward   s     "&'/ "0 "
 "
 2!4(,"//0@AA{{#68HII/G+r2   r   r   rd   s   @r0   r   r     sZ        * * * * *        r2   r   c                        e Zd Z fdZ	 	 	 	 	 ddej        deej                 deej                 deded	efd
ZddZ	e
dd            Z xZS )MPNetEncoderc                 &   t                                                       | _        j        | _        t          j        fdt          j                  D                       | _	        t          j
        j        | j                  | _        d S )Nc                 .    g | ]}t                    S r8   )r   ).0r   r   s     r0   
<listcomp>z)MPNetEncoder.__init__.<locals>.<listcomp>>  s!    #`#`#`1Jv$6$6#`#`#`r2   )rA   rB   r   rj   n_headsr   
ModuleListrangenum_hidden_layerslayerr)   relative_attention_num_bucketsrelative_attention_biasrP   s    `r0   rB   zMPNetEncoder.__init__:  s}    1]#`#`#`#`fF^@_@_#`#`#`aa
')|F4Y[_[g'h'h$$$r2   NFr   r   r   r   output_hidden_statesreturn_dictc                 L   |                      |          }|rdnd }	|rdnd }
t          | j                  D ]7\  }}|r|	|fz   }	 |||||         |fd|i|}|d         }|r|
|d         fz   }
8|r|	|fz   }	|st          d ||	|
fD                       S t	          ||	|
          S )Nr8   r   r   r   c              3      K   | ]}||V  	d S r   r8   )r   rr   s     r0   	<genexpr>z'MPNetEncoder.forward.<locals>.<genexpr>d  s(      hhqZ[ZgZgZgZgZghhr2   )last_hidden_stater   
attentions)compute_position_bias	enumerater   tupler   )r.   r   r   r   r   r   r   rX   r   all_hidden_statesall_attentionsilayer_modulelayer_outputss                 r0   r\   zMPNetEncoder.forwardA  s3    22=AA"6@BBD0:d(44 	F 	FOA|# I$58H$H!(L!	 
 #4  M *!,M  F!/=3C2E!E   	E 1]4D D 	ihh]4E~$Vhhhhhh++%
 
 
 	
r2       c                    |                     d          |                     d          |                     d          }}}||d d d d d f         }|d d d d d f         }nTt          j        |t          j                  d d d f         }t          j        |t          j                  d d d f         }||z
  }	|                     |	|          }
|
                    |j                  }
|                     |
          }|                    g d          	                    d          }|
                    |d||f                                          }|S )Nr   r   )r^   )num_buckets)rv   r   r   r?   )rU   rM   rN   r`   relative_position_buckettor_   r   r   ra   rO   r   )r.   xr>   r   bszqlenklencontext_positionmemory_positionrelative_position	rp_bucketvaluess               r0   r   z"MPNetEncoder.compute_position_biask  sG   &&))QVVAYYq		4T#+AAAqqq$J7*111dAAA:6OO$|D
CCCAAAtGL#l4uzBBB47KO+.>>112CQ\1]]	LL**	--i88			**44Q77Rt455@@BBr2      c                     d}|  }|dz  }||dk                          t          j                  |z  z  }t          j        |          }|dz  }||k     }|t          j        |                                |z            t          j        ||z            z  ||z
  z                       t          j                  z   }t          j        |t          j        ||dz
                      }|t          j	        |||          z  }|S )Nr   rv   r   )
r   rM   r`   abslogfloatr~   min	full_likewhere)r   r   max_distanceretn	max_exactis_smallval_if_larges           r0   r   z%MPNetEncoder.relative_position_bucket}  s    Azz%*%%33IaLL1$	y= Iaggii)+,,txy8P/Q/QQU`clUlm
"UZ.. yu|[[\_/]/]^^u{8Q555
r2   )NNFFF)Nr   )r   r   )r3   r4   r5   rB   rM   r   r   boolr\   r   staticmethodr   rc   rd   s   @r0   r   r   9  s        i i i i i 26,0"'%*!(
 (
|(
 !.(
 EL)	(

  (
 #(
 (
 (
 (
 (
T   $    \    r2   r   c                   B     e Zd Z fdZdej        dej        fdZ xZS )MPNetPoolerc                     t                                                       t          j        |j        |j                  | _        t          j                    | _        d S r   )rA   rB   r   r"   rD   r   Tanh
activationrP   s     r0   rB   zMPNetPooler.__init__  sC    Yv163EFF
'))r2   r   r   c                 r    |d d df         }|                      |          }|                     |          }|S Nr   )r   r   )r.   r   first_token_tensorpooled_outputs       r0   r\   zMPNetPooler.forward  s@     +111a40

#56666r2   r   rd   s   @r0   r   r     s^        $ $ $ $ $
U\ el        r2   r   c                   8    e Zd Zd fd	Zd Zd Zd Ze	 	 	 	 	 	 	 	 ddee	j
                 dee	j                 d	ee	j
                 d
ee	j                 dee	j                 dee         dee         dee         deee	j                 ef         fd            Z xZS )
MPNetModelTc                     t                                          |           || _        t          |          | _        t          |          | _        |rt          |          nd| _        | 	                                 dS )zv
        add_pooling_layer (bool, *optional*, defaults to `True`):
            Whether to add a pooling layer
        N)
rA   rB   r   r:   r[   r   encoderr   pooler	post_init)r.   r   add_pooling_layerrQ   s      r0   rB   zMPNetModel.__init__  ss    
 	   )&11#F++->Hk&)))D 	r2   c                     | j         j        S r   r[   rE   r.   s    r0   get_input_embeddingszMPNetModel.get_input_embeddings  s    ..r2   c                     || j         _        d S r   r	  )r.   values     r0   set_input_embeddingszMPNetModel.set_input_embeddings  s    */'''r2   c                     |                                 D ]/\  }}| j        j        |         j                            |           0dS )z
        Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
        class PreTrainedModel
        N)itemsr  r   r   r   )r.   heads_to_pruner   r   s       r0   _prune_headszMPNetModel._prune_heads  sU    
 +0022 	C 	CLE5Lu%/;;EBBBB	C 	Cr2   NrV   r   r>   r   rW   r   r   r   r   c	                    ||n| j         j        }||n| j         j        }||n| j         j        }||t	          d          |+|                     ||           |                                }
n.||                                d d         }
nt	          d          ||j        n|j        }|t          j	        |
|          }| 
                    ||
          }|                     || j         j                  }|                     |||          }|                     ||||||          }|d         }| j        |                     |          nd }|s||f|dd          z   S t!          |||j        |j        	          S )
NzDYou cannot specify both input_ids and inputs_embeds at the same timer?   z5You have to specify either input_ids or inputs_embeds)r_   )rV   r>   rW   )r   r   r   r   r   r   r   )r   pooler_outputr   r   )r   r   r   use_return_dictrl   %warn_if_padding_and_no_attention_maskrU   r_   rM   onesget_extended_attention_maskget_head_maskr   r[   r  r  r   r   r   )r.   rV   r   r>   r   rW   r   r   r   rX   rY   r_   extended_attention_maskembedding_outputencoder_outputssequence_outputr   s                    r0   r\   zMPNetModel.forward  s    2C1N--TXT_Tq$8$D  $+Jj 	 &1%<kk$+B] ]%>cddd"66y.QQQ#..**KK&',,..ss3KKTUUU%.%:!!@T!"ZFCCCN040P0PQ_al0m0m&&y$+2OPP	??Y\iv?ww,,2/!5# ' 
 
 *!,8<8OO444UY 	J#]3oabb6III)-')7&1	
 
 
 	
r2   )T)NNNNNNNN)r3   r4   r5   rB   r  r  r  r   r   rM   
LongTensorFloatTensorr   r   r   r   r   r\   rc   rd   s   @r0   r  r    sJ            / / /0 0 0C C C  156:371559,0/3&*7
 7
E,-7
 !!237
 u/0	7

 E-.7
   127
 $D>7
 'tn7
 d^7
 
uU\"$>>	?7
 7
 7
 ^7
 7
 7
 7
 7
r2   r  c                   R    e Zd ZdgZ fdZd Zd Ze	 	 	 	 	 	 	 	 	 ddee	j
                 dee	j                 dee	j
                 d	ee	j                 d
ee	j                 dee	j
                 dee         dee         dee         deee	j                 ef         fd            Z xZS )MPNetForMaskedLMzlm_head.decoderc                     t                                          |           t          |d          | _        t	          |          | _        |                                  d S NF)r  )rA   rB   r  r   r-   lm_headr  rP   s     r0   rB   zMPNetForMaskedLM.__init__  sV       %@@@
"6** 	r2   c                     | j         j        S r   )r$  decoderr
  s    r0   get_output_embeddingsz&MPNetForMaskedLM.get_output_embeddings  s    |##r2   c                 @    || j         _        |j        | j         _        d S r   )r$  r&  r'   )r.   new_embeddingss     r0   set_output_embeddingsz&MPNetForMaskedLM.set_output_embeddings  s    -*/r2   NrV   r   r>   r   rW   labelsr   r   r   r   c
           
         |	|	n| j         j        }	|                     ||||||||	          }
|
d         }|                     |          }d}|Kt	                      } ||                    d| j         j                  |                    d                    }|	s|f|
dd         z   }||f|z   n|S t          |||
j        |
j	                  S )a  
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
            config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the
            loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`
        Nr   r>   r   rW   r   r   r   r   r?   rv   losslogitsr   r   )
r   r  r   r$  r   r{   rC   r   r   r   )r.   rV   r   r>   r   rW   r+  r   r   r   r   r  prediction_scoresmasked_lm_lossloss_fctr   s                   r0   r\   zMPNetForMaskedLM.forward  s   & &1%<kk$+B]**)%'/!5#  	
 	
 "!* LL99'))H%X&7&<&<RAW&X&XZ`ZeZefhZiZijjN 	Z')GABBK7F3A3M^%..SYY$!/)	
 
 
 	
r2   	NNNNNNNNN)r3   r4   r5   _tied_weights_keysrB   r'  r*  r   r   rM   r  r  r   r   r   r   r   r\   rc   rd   s   @r0   r!  r!    sP       +,    $ $ $0 0 0  156:371559-1,0/3&*0
 0
E,-0
 !!230
 u/0	0

 E-.0
   120
 )*0
 $D>0
 'tn0
 d^0
 
uU\"N2	30
 0
 0
 ^0
 0
 0
 0
 0
r2   r!  c                   .     e Zd ZdZ fdZd Zd Z xZS )r-   z5MPNet Head for masked and permuted language modeling.c                    t                                                       t          j        |j        |j                  | _        t          j        |j        |j                  | _        t          j        |j        |j	        d          | _
        t          j        t          j        |j	                            | _        | j        | j
        _        d S )Nr<   F)r'   )rA   rB   r   r"   rD   r   r+   rH   
layer_normrC   r&  	ParameterrM   zerosr'   rP   s     r0   rB   zMPNetLMHead.__init__F  s    Yv163EFF
,v'9v?TUUUy!3V5FUSSSLV->!?!?@@	 !Ir2   c                 (    | j         | j        _         d S r   )r'   r&  r
  s    r0   _tie_weightszMPNetLMHead._tie_weightsQ  s     Ir2   c                     |                      |          }t          |          }|                     |          }|                     |          }|S r   )r   r   r8  r&  r.   featuresrX   r   s       r0   r\   zMPNetLMHead.forwardT  sE    JJx  GGOOA LLOOr2   )r3   r4   r5   __doc__rB   r<  r\   rc   rd   s   @r0   r-   r-   C  s\        ??	& 	& 	& 	& 	&& & &      r2   r-   z
    MPNet Model transformer with a sequence classification/regression head on top (a linear layer on top of the pooled
    output) e.g. for GLUE tasks.
    )custom_introc                   @    e Zd Z fdZe	 	 	 	 	 	 	 	 	 ddeej                 deej                 deej                 deej                 deej                 deej                 d	ee	         d
ee	         dee	         de
eej                 ef         fd            Z xZS )MPNetForSequenceClassificationc                     t                                          |           |j        | _        t          |d          | _        t          |          | _        |                                  d S r#  )rA   rB   
num_labelsr  r   MPNetClassificationHead
classifierr  rP   s     r0   rB   z'MPNetForSequenceClassification.__init__f  s`        +%@@@
1&99 	r2   NrV   r   r>   r   rW   r+  r   r   r   r   c
           
         |	|	n| j         j        }	|                     ||||||||	          }
|
d         }|                     |          }d}|Z| j         j        f| j        dk    rd| j         _        nN| j        dk    r7|j        t          j        k    s|j        t          j	        k    rd| j         _        nd| j         _        | j         j        dk    rWt                      }| j        dk    r1 ||                                |                                          }n |||          }n| j         j        dk    rGt                      } ||                    d| j                  |                    d                    }n*| j         j        dk    rt                      } |||          }|	s|f|
d	d         z   }||f|z   n|S t          |||
j        |
j        
          S )a  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        Nr-  r   r   
regressionsingle_label_classificationmulti_label_classificationr?   rv   r.  )r   r  r   rG  problem_typerE  r^   rM   r`   rm   r   squeezer   r{   r   r   r   r   r.   rV   r   r>   r   rW   r+  r   r   r   r   r  r0  r/  r3  r   s                   r0   r\   z&MPNetForSequenceClassification.forwardp  s   ( &1%<kk$+B]**)%'/!5#  	
 	
 "!*11{'/?a''/;DK,,_q((flej.H.HFL\a\eLeLe/LDK,,/KDK,{'<77"99?a''#8FNN$4$4fnn6F6FGGDD#8FF33DD)-JJJ+--xB @ @&++b//RR)-III,..x// 	FY,F)-)9TGf$$vE'!/)	
 
 
 	
r2   r4  )r3   r4   r5   rB   r   r   rM   r  r  r   r   r   r   r   r\   rc   rd   s   @r0   rC  rC  _  s=             156:371559-1,0/3&*A
 A
E,-A
 !!23A
 u/0	A

 E-.A
   12A
 )*A
 $D>A
 'tnA
 d^A
 
uU\"$<<	=A
 A
 A
 ^A
 A
 A
 A
 A
r2   rC  c                   @    e Zd Z fdZe	 	 	 	 	 	 	 	 	 ddeej                 deej                 deej                 deej                 deej                 deej                 d	ee	         d
ee	         dee	         de
eej                 ef         fd            Z xZS )MPNetForMultipleChoicec                    t                                          |           t          |          | _        t	          j        |j                  | _        t	          j        |j	        d          | _
        |                                  d S )Nr   )rA   rB   r  r   r   rI   rJ   rK   r"   rD   rG  r  rP   s     r0   rB   zMPNetForMultipleChoice.__init__  sl       ''
z&"<==)F$6:: 	r2   NrV   r   r>   r   rW   r+  r   r   r   r   c
           
      ^   |	|	n| j         j        }	||j        d         n|j        d         }
|)|                    d|                    d                    nd}|)|                    d|                    d                    nd}|)|                    d|                    d                    nd}|=|                    d|                    d          |                    d                    nd}|                     ||||||||	          }|d         }|                     |          }|                     |          }|                    d|
          }d}|t                      } |||          }|	s|f|dd         z   }||f|z   n|S t          |||j
        |j                  S )a  
        input_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`):
            Indices of input sequence tokens in the vocabulary.

            Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
            [`PreTrainedTokenizer.__call__`] for details.

            [What are input IDs?](../glossary#input-ids)
        position_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`, *optional*):
            Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
            config.max_position_embeddings - 1]`.

            [What are position IDs?](../glossary#position-ids)
        inputs_embeds (`torch.FloatTensor` of shape `(batch_size, num_choices, sequence_length, hidden_size)`, *optional*):
            Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
            is useful if you want more control over how to convert *input_ids* indices into associated vectors than the
            model's internal embedding lookup matrix.
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the multiple choice classification loss. Indices should be in `[0, ...,
            num_choices-1]` where `num_choices` is the size of the second dimension of the input tensors. (See
            `input_ids` above)
        Nr   r?   rw   )r>   r   r   rW   r   r   r   rv   r.  )r   r  rz   r{   rU   r   rK   rG  r   r   r   r   )r.   rV   r   r>   r   rW   r+  r   r   r   num_choicesflat_input_idsflat_position_idsflat_attention_maskflat_inputs_embedsr   r   r0  reshaped_logitsr/  r3  r   s                         r0   r\   zMPNetForMultipleChoice.forward  s   H &1%<kk$+B],5,Aioa((}GZ[\G]CLCXINN2,>,>???^bLXLdL--b,2C2CB2G2GHHHjnR`Rln11"n6I6I"6M6MNNNrv ( r=#5#5b#9#9=;M;Mb;Q;QRRR 	 ***.,/!5#  	
 	
  
]33// ++b+66'))H8OV44D 	F%''!""+5F)-)9TGf$$vE("!/)	
 
 
 	
r2   r4  )r3   r4   r5   rB   r   r   rM   r  r  r   r   r   r   r   r\   rc   rd   s   @r0   rP  rP    s=             156:371559-1,0/3&*M
 M
E,-M
 !!23M
 u/0	M

 E-.M
   12M
 )*M
 $D>M
 'tnM
 d^M
 
uU\"$==	>M
 M
 M
 ^M
 M
 M
 M
 M
r2   rP  c                   @    e Zd Z fdZe	 	 	 	 	 	 	 	 	 ddeej                 deej                 deej                 deej                 deej                 deej                 d	ee	         d
ee	         dee	         de
eej                 ef         fd            Z xZS )MPNetForTokenClassificationc                 :   t                                          |           |j        | _        t          |d          | _        t          j        |j                  | _        t          j	        |j
        |j                  | _        |                                  d S r#  )rA   rB   rE  r  r   r   rI   rJ   rK   r"   rD   rG  r  rP   s     r0   rB   z$MPNetForTokenClassification.__init__  s~        +%@@@
z&"<==)F$68IJJ 	r2   NrV   r   r>   r   rW   r+  r   r   r   r   c
           
         |	|	n| j         j        }	|                     ||||||||	          }
|
d         }|                     |          }|                     |          }d}|Ft                      } ||                    d| j                  |                    d                    }|	s|f|
dd         z   }||f|z   n|S t          |||
j	        |
j
                  S )z
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
        Nr-  r   r?   rv   r.  )r   r  r   rK   rG  r   r{   rE  r   r   r   rN  s                   r0   r\   z#MPNetForTokenClassification.forward  s   $ &1%<kk$+B]**)%'/!5#  	
 	
 "!*,,7711'))H8FKKDO<<fkk"ooNND 	FY,F)-)9TGf$$vE$!/)	
 
 
 	
r2   r4  )r3   r4   r5   rB   r   r   rM   r  r  r   r   r   r   r   r\   rc   rd   s   @r0   rZ  rZ    s*       	 	 	 	 	  156:371559-1,0/3&*1
 1
E,-1
 !!231
 u/0	1

 E-.1
   121
 )*1
 $D>1
 'tn1
 d^1
 
uU\"$99	:1
 1
 1
 ^1
 1
 1
 1
 1
r2   rZ  c                   (     e Zd ZdZ fdZd Z xZS )rF  z-Head for sentence-level classification tasks.c                    t                                                       t          j        |j        |j                  | _        t          j        |j                  | _        t          j        |j        |j	                  | _
        d S r   )rA   rB   r   r"   rD   r   rI   rJ   rK   rE  out_projrP   s     r0   rB   z MPNetClassificationHead.__init__W  sc    Yv163EFF
z&"<==	&"4f6GHHr2   c                     |d d dd d f         }|                      |          }|                     |          }t          j        |          }|                      |          }|                     |          }|S r   )rK   r   rM   tanhr_  r>  s       r0   r\   zMPNetClassificationHead.forward]  sj    QQQ111WLLOOJJqMMJqMMLLOOMM!r2   )r3   r4   r5   r@  rB   r\   rc   rd   s   @r0   rF  rF  T  sR        77I I I I I      r2   rF  c                   \    e Zd Z fdZe	 	 	 	 	 	 	 	 	 	 ddeej                 deej                 deej                 deej                 deej                 deej                 d	eej                 d
ee	         dee	         dee	         de
eej                 ef         fd            Z xZS )MPNetForQuestionAnsweringc                     t                                          |           |j        | _        t          |d          | _        t          j        |j        |j                  | _        | 	                                 d S r#  )
rA   rB   rE  r  r   r   r"   rD   
qa_outputsr  rP   s     r0   rB   z"MPNetForQuestionAnswering.__init__i  sj        +%@@@
)F$68IJJ 	r2   NrV   r   r>   r   rW   start_positionsend_positionsr   r   r   r   c           
         |
|
n| j         j        }
|                     |||||||	|
          }|d         }|                     |          }|                    dd          \  }}|                    d                                          }|                    d                                          }d }||t          |                                          dk    r|                    d          }t          |                                          dk    r|                    d          }|                    d          }|	                    d|          }|	                    d|          }t          |          } |||          } |||          }||z   dz  }|
s||f|dd          z   }||f|z   n|S t          ||||j        |j                  S )	Nr-  r   r   r?   rx   )ignore_indexrv   )r/  start_logits
end_logitsr   r   )r   r  r   re  splitrM  r   r   rU   clampr   r   r   r   )r.   rV   r   r>   r   rW   rf  rg  r   r   r   r   r  r0  rj  rk  
total_lossignored_indexr3  
start_lossend_lossr   s                         r0   r\   z!MPNetForQuestionAnswering.forwards  s    &1%<kk$+B]**)%'/!5#  	
 	
 "!*11#)<<r<#:#: j#++B//::<<''++6688

&=+D?''))**Q.."1"9"9""="==%%''((1,, - 5 5b 9 9(--a00M-33A}EEO)//=AAM']CCCH!,@@Jx
M::H$x/14J 	R"J/'!""+=F/9/EZMF**6Q+%!!/)
 
 
 	
r2   )
NNNNNNNNNN)r3   r4   r5   rB   r   r   rM   r  r  r   r   r   r   r   r\   rc   rd   s   @r0   rc  rc  g  s?             156:3715596:48,0/3&*<
 <
E,-<
 !!23<
 u/0	<

 E-.<
   12<
 "%"23<
   01<
 $D><
 'tn<
 d^<
 
uU\"$@@	A<
 <
 <
 ^<
 <
 <
 <
 <
r2   rc  c                     |                      |                                          }t          j        |d                              |          |z  }|                                |z   S )z
    Replace non-padding symbols with their position numbers. Position numbers begin at padding_idx+1. Padding symbols
    are ignored. This is modified from fairseq's `utils.make_positions`. :param torch.Tensor x: :return torch.Tensor:
    r   rx   )nerm   rM   cumsumtype_asr`   )rV   r*   maskincremental_indicess       r0   rS   rS     s`     <<$$((**D,t333;;DAADH##%%33r2   )r!  rP  rc  rC  rZ  r   r  r   )7r@  r~   typingr   r   rM   r   torch.nnr   r   r   activationsr
   r   modeling_outputsr   r   r   r   r   r   r   modeling_utilsr   pytorch_utilsr   r   utilsr   r   configuration_mpnetr   
get_loggerr3   loggerr   Moduler:   rf   r   r   r   r   r   r   r  r!  r-   rC  rP  rZ  rF  rc  rS   __all__r8   r2   r0   <module>r     s       " " " " " " " "        A A A A A A A A A A ' ' ' ' ' ' ' '                  . - - - - - Q Q Q Q Q Q Q Q , , , , , , , , , , , , , , 
	H	%	% % % % % %? % % %.8= 8= 8= 8= 8=bi 8= 8= 8=vJ J J J J J J JZ+ + + + +RY + + +^    	        ")          @V V V V V29 V V Vt    ")    V
 V
 V
 V
 V
% V
 V
 V
rD
 D
 D
 D
 D
+ D
 D
 D
N    ")   8   M
 M
 M
 M
 M
%9 M
 M
 M
` Y
 Y
 Y
 Y
 Y
1 Y
 Y
 Y
x >
 >
 >
 >
 >
"6 >
 >
 >
B    bi   & H
 H
 H
 H
 H
 4 H
 H
 H
V4 4 4	 	 	r2   