
     `i                     b   d Z ddlZddlmZ ddlmZ ddlmZmZ ddl	Z	ddl	m
Z
 ddlmZmZmZ dd	lmZmZ  e            rdd
lmZ ddlmZ ddlmZ ddlmZmZmZmZmZmZmZm Z m!Z! ddl"m#Z# ddl$m%Z% ddlm&Z& ddl'm(Z(  e&j)        e*          Z+d Z,d Z-d Z. G d de
j/                  Z0 G d de
j/                  Z1 G d de
j/                  Z2 G d de
j/                  Z3 G d de
j/                  Z4 G d  d!e
j/                  Z5 G d" d#e          Z6 G d$ d%e
j/                  Z7 G d& d'e
j/                  Z8 G d( d)e
j/                  Z9 G d* d+e
j/                  Z: G d, d-e
j/                  Z; G d. d/e
j/                  Z< G d0 d1e
j/                  Z=e G d2 d3e#                      Z>e ed45           G d6 d7e                                  Z?e G d8 d9e>                      Z@ ed:5           G d; d<e>                      ZAe G d= d>e>                      ZB ed?5           G d@ dAe>                      ZC edB5           G dC dDe>                      ZDe G dE dFe>                      ZEe G dG dHe>                      ZFe G dI dJe>                      ZGg dKZHdS )LzPyTorch FNet model.    N)	dataclass)partial)OptionalUnion)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )auto_docstringis_scipy_available)linalg)ACT2FN)GradientCheckpointingLayer)	BaseModelOutputBaseModelOutputWithPoolingMaskedLMOutputModelOutputMultipleChoiceModelOutputNextSentencePredictorOutputQuestionAnsweringModelOutputSequenceClassifierOutputTokenClassifierOutput)PreTrainedModel)apply_chunking_to_forward)logging   )
FNetConfigc                     | j         d         }|d|d|f         }|                     t          j                  } t          j        d| ||          S )z4Applies 2D matrix multiplication to 3D input arrays.r   Nzbij,jk,ni->bnk)shapetypetorch	complex64einsum)xmatrix_dim_onematrix_dim_two
seq_lengths       z/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/models/fnet/modeling_fnet.py_two_dim_matmulr*   7   sN    J#KZK*$<=N	uA<(!^^LLL    c                 $    t          | ||          S N)r*   )r%   r&   r'   s      r)   two_dim_matmulr.   @   s    1nn===r+   c                     | }t          t          | j                  dd                   D ]#}t          j                            ||          }$|S )z
    Applies n-dimensional Fast Fourier Transform (FFT) to input array.

    Args:
        x: Input n-dimensional array.

    Returns:
        n-dimensional Fourier transform of input n-dimensional array.
    r   N)axis)reversedrangendimr"   fft)r%   outr0   s      r)   fftnr6   E   sO     Cqvqrr*++ , ,immCdm++Jr+   c                   *     e Zd ZdZ fdZddZ xZS )FNetEmbeddingszGConstruct the embeddings from word, position and token_type embeddings.c                    t                                                       t          j        |j        |j        |j                  | _        t          j        |j        |j                  | _	        t          j        |j
        |j                  | _        t          j        |j        |j                  | _        t          j        |j        |j                  | _        t          j        |j                  | _        |                     dt)          j        |j                                      d          d           |                     dt)          j        | j                                        t(          j                  d           d S )	N)padding_idxepsposition_ids)r   F)
persistenttoken_type_idsdtype)super__init__r   	Embedding
vocab_sizehidden_sizepad_token_idword_embeddingsmax_position_embeddingsposition_embeddingstype_vocab_sizetoken_type_embeddings	LayerNormlayer_norm_epsLinear
projectionDropouthidden_dropout_probdropoutregister_bufferr"   arangeexpandzerosr=   sizelongselfconfig	__class__s     r)   rD   zFNetEmbeddings.__init__X   sP   !|F,=v?Q_e_rsss#%<0NPVPb#c#c %'\&2H&J\%]%]" f&8f>STTT)F$68JKKz&"<== 	EL)GHHOOPWXXej 	 	
 	
 	
 	ek$*;*@*@*B*B%*UUUbg 	 	
 	
 	
 	
 	
r+   Nc                    ||                                 }n|                                 d d         }|d         }|| j        d d d |f         }|mt          | d          r2| j        d d d |f         }|                    |d         |          }|}n+t          j        |t
          j        | j        j                  }|| 	                    |          }| 
                    |          }	||	z   }
|                     |          }|
|z  }
|                     |
          }
|                     |
          }
|                     |
          }
|
S )Nr>   r   r@   r   rB   device)rY   r=   hasattrr@   rW   r"   rX   rZ   ra   rI   rM   rK   rN   rQ   rT   )r\   	input_idsr@   r=   inputs_embedsinput_shaper(   buffered_token_type_ids buffered_token_type_ids_expandedrM   
embeddingsrK   s               r)   forwardzFNetEmbeddings.forwardn   se    #..**KK',,..ss3K ^
,QQQ^<L
 !t-.. m*.*=aaa*n*M'3J3Q3QR]^_R`bl3m3m0!A!&[
SWSdSk!l!l!l  00;;M $ : :> J J"%::
"66|DD))
^^J//
__Z00
\\*--
r+   )NNNN)__name__
__module____qualname____doc__rD   ri   __classcell__r^   s   @r)   r8   r8   U   sR        QQ
 
 
 
 
,! ! ! ! ! ! ! !r+   r8   c                   *     e Zd Z fdZd Zd Z xZS )FNetBasicFourierTransformc                 r    t                                                       |                     |           d S r-   )rC   rD   _init_fourier_transformr[   s     r)   rD   z"FNetBasicFourierTransform.__init__   s3    $$V,,,,,r+   c                 l   |j         s't          t          j        j        d          | _        d S |j        dk    rt                      r|                     dt          j	        t          j        |j                  t          j                             |                     dt          j	        t          j        |j                  t          j                             t          t          | j        | j                  | _        d S t%          j        d           t          | _        d S t          | _        d S )	N)r      dim   dft_mat_hiddenrA   dft_mat_seq)r&   r'   zpSciPy is needed for DFT matrix calculation and is not found. Using TPU optimized fast fourier transform instead.)use_tpu_fourier_optimizationsr   r"   r4   r6   fourier_transformrJ   r   rU   tensorr   dftrG   r#   tpu_short_seq_lengthr.   rz   ry   r   warning)r\   r]   s     r)   rs   z1FNetBasicFourierTransform._init_fourier_transform   s*   3 	*%,UY^%H%H%HD"""+t33!## .$$$el6:f>P3Q3QY^Yh&i&i&i   $$!5<
6;V0W0W_d_n#o#o#o   *1"43CTXTg* * *&&& *   *.&&&%)D"""r+   c                 <    |                      |          j        }|fS r-   )r|   real)r\   hidden_statesoutputss      r)   ri   z!FNetBasicFourierTransform.forward   s"     ((77<zr+   )rj   rk   rl   rD   rs   ri   rn   ro   s   @r)   rq   rq      sV        - - - - -* * *.      r+   rq   c                   $     e Zd Z fdZd Z xZS )FNetBasicOutputc                     t                                                       t          j        |j        |j                  | _        d S Nr;   )rC   rD   r   rN   rG   rO   r[   s     r)   rD   zFNetBasicOutput.__init__   s9    f&8f>STTTr+   c                 6    |                      ||z             }|S r-   )rN   r\   r   input_tensors      r)   ri   zFNetBasicOutput.forward   s    |m'CDDr+   rj   rk   rl   rD   ri   rn   ro   s   @r)   r   r      sL        U U U U U      r+   r   c                   $     e Zd Z fdZd Z xZS )FNetFourierTransformc                     t                                                       t          |          | _        t	          |          | _        d S r-   )rC   rD   rq   r\   r   outputr[   s     r)   rD   zFNetFourierTransform.__init__   s;    -f55	%f--r+   c                 n    |                      |          }|                     |d         |          }|f}|S Nr   )r\   r   )r\   r   self_outputsfourier_outputr   s        r)   ri   zFNetFourierTransform.forward   s7    yy//\!_mDD!#r+   r   ro   s   @r)   r   r      sG        . . . . .
      r+   r   c                   B     e Zd Z fdZdej        dej        fdZ xZS )FNetIntermediatec                    t                                                       t          j        |j        |j                  | _        t          |j        t                    rt          |j                 | _        d S |j        | _        d S r-   )rC   rD   r   rP   rG   intermediate_sizedense
isinstance
hidden_actstrr   intermediate_act_fnr[   s     r)   rD   zFNetIntermediate.__init__   sn    Yv163KLL
f'-- 	9'-f.?'@D$$$'-'8D$$$r+   r   returnc                 Z    |                      |          }|                     |          }|S r-   )r   r   r\   r   s     r)   ri   zFNetIntermediate.forward   s,    

=1100??r+   rj   rk   rl   rD   r"   Tensorri   rn   ro   s   @r)   r   r      s^        9 9 9 9 9U\ el        r+   r   c                   P     e Zd Z fdZdej        dej        dej        fdZ xZS )
FNetOutputc                    t                                                       t          j        |j        |j                  | _        t          j        |j        |j                  | _        t          j	        |j
                  | _        d S r   )rC   rD   r   rP   r   rG   r   rN   rO   rR   rS   rT   r[   s     r)   rD   zFNetOutput.__init__   sf    Yv79KLL
f&8f>STTTz&"<==r+   r   r   r   c                     |                      |          }|                     |          }|                     ||z             }|S r-   )r   rT   rN   r   s      r)   ri   zFNetOutput.forward   s@    

=11]33}|'CDDr+   r   ro   s   @r)   r   r      si        > > > > >U\  RWR^        r+   r   c                   *     e Zd Z fdZd Zd Z xZS )	FNetLayerc                     t                                                       |j        | _        d| _        t	          |          | _        t          |          | _        t          |          | _	        d S Nr   )
rC   rD   chunk_size_feed_forwardseq_len_dimr   fourierr   intermediater   r   r[   s     r)   rD   zFNetLayer.__init__   s^    '-'E$+F33,V44 ((r+   c                     |                      |          }|d         }t          | j        | j        | j        |          }|f}|S r   )r   r   feed_forward_chunkr   r   )r\   r   self_fourier_outputsr   layer_outputr   s         r)   ri   zFNetLayer.forward   sN    #||M::-a00#T%A4CSUc
 
  /r+   c                 \    |                      |          }|                     ||          }|S r-   )r   r   )r\   r   intermediate_outputr   s       r)   r   zFNetLayer.feed_forward_chunk  s0    "//??{{#6GGr+   )rj   rk   rl   rD   ri   r   rn   ro   s   @r)   r   r      sV        ) ) ) ) )
 
 
      r+   r   c                   &     e Zd Z fdZddZ xZS )FNetEncoderc                     t                                                       | _        t          j        fdt          j                  D                       | _        d| _        d S )Nc                 .    g | ]}t                    S  )r   ).0_r]   s     r)   
<listcomp>z(FNetEncoder.__init__.<locals>.<listcomp>  s!    #_#_#_!If$5$5#_#_#_r+   F)	rC   rD   r]   r   
ModuleListr2   num_hidden_layerslayergradient_checkpointingr[   s    `r)   rD   zFNetEncoder.__init__
  s`    ]#_#_#_#_uVE]?^?^#_#_#_``
&+###r+   FTc                     |rdnd }t          | j                  D ] \  }}|r||fz   } ||          }|d         }!|r||fz   }|st          d ||fD                       S t          ||          S )Nr   r   c              3      K   | ]}||V  	d S r-   r   )r   vs     r)   	<genexpr>z&FNetEncoder.forward.<locals>.<genexpr>  s"      XXq!-----XXr+   )last_hidden_stater   )	enumerater   tupler   )r\   r   output_hidden_statesreturn_dictall_hidden_statesilayer_modulelayer_outputss           r)   ri   zFNetEncoder.forward  s    "6@BBD(44 	- 	-OA|# I$58H$H!(L77M)!,MM 	E 1]4D D 	YXX]4E$FXXXXXXN_````r+   )FTr   ro   s   @r)   r   r   	  sT        , , , , ,a a a a a a a ar+   r   c                   B     e Zd Z fdZdej        dej        fdZ xZS )
FNetPoolerc                     t                                                       t          j        |j        |j                  | _        t          j                    | _        d S r-   )rC   rD   r   rP   rG   r   Tanh
activationr[   s     r)   rD   zFNetPooler.__init__&  sC    Yv163EFF
'))r+   r   r   c                 r    |d d df         }|                      |          }|                     |          }|S r   )r   r   )r\   r   first_token_tensorpooled_outputs       r)   ri   zFNetPooler.forward+  s@     +111a40

#56666r+   r   ro   s   @r)   r   r   %  s^        $ $ $ $ $
U\ el        r+   r   c                   B     e Zd Z fdZdej        dej        fdZ xZS )FNetPredictionHeadTransformc                 V   t                                                       t          j        |j        |j                  | _        t          |j        t                    rt          |j                 | _
        n|j        | _
        t          j        |j        |j                  | _        d S r   )rC   rD   r   rP   rG   r   r   r   r   r   transform_act_fnrN   rO   r[   s     r)   rD   z$FNetPredictionHeadTransform.__init__6  s    Yv163EFF
f'-- 	6$*6+<$=D!!$*$5D!f&8f>STTTr+   r   r   c                     |                      |          }|                     |          }|                     |          }|S r-   )r   r   rN   r   s     r)   ri   z#FNetPredictionHeadTransform.forward?  s=    

=11--m<<}55r+   r   ro   s   @r)   r   r   5  sc        U U U U UU\ el        r+   r   c                   ,     e Zd Z fdZd ZddZ xZS )FNetLMPredictionHeadc                 :   t                                                       t          |          | _        t	          j        |j        |j                  | _        t	          j	        t          j        |j                            | _        | j        | j        _        d S r-   )rC   rD   r   	transformr   rP   rG   rF   decoder	Parameterr"   rX   biasr[   s     r)   rD   zFNetLMPredictionHead.__init__G  ss    4V<< y!3V5FGGLV->!?!?@@	 Ir+   c                 Z    |                      |          }|                     |          }|S r-   )r   r   r   s     r)   ri   zFNetLMPredictionHead.forwardR  s*    }55]33r+   r   Nc                     | j         j        j        j        dk    r| j        | j         _        d S | j         j        | _        d S )Nmeta)r   r   ra   r!   r\   s    r)   _tie_weightsz!FNetLMPredictionHead._tie_weightsW  s<    <#(F22 $	DL )DIIIr+   )r   N)rj   rk   rl   rD   ri   r   rn   ro   s   @r)   r   r   F  s[        	& 	& 	& 	& 	&  
* * * * * * * *r+   r   c                   $     e Zd Z fdZd Z xZS )FNetOnlyMLMHeadc                 p    t                                                       t          |          | _        d S r-   )rC   rD   r   predictionsr[   s     r)   rD   zFNetOnlyMLMHead.__init__a  s/    /77r+   c                 0    |                      |          }|S r-   )r   )r\   sequence_outputprediction_scoress      r)   ri   zFNetOnlyMLMHead.forwarde  s     ,,_==  r+   r   ro   s   @r)   r   r   `  sG        8 8 8 8 8! ! ! ! ! ! !r+   r   c                   $     e Zd Z fdZd Z xZS )FNetOnlyNSPHeadc                     t                                                       t          j        |j        d          | _        d S Nru   )rC   rD   r   rP   rG   seq_relationshipr[   s     r)   rD   zFNetOnlyNSPHead.__init__l  s6     "	&*<a @ @r+   c                 0    |                      |          }|S r-   )r   )r\   r   seq_relationship_scores      r)   ri   zFNetOnlyNSPHead.forwardp  s    !%!6!6}!E!E%%r+   r   ro   s   @r)   r   r   k  sL        A A A A A& & & & & & &r+   r   c                   $     e Zd Z fdZd Z xZS )FNetPreTrainingHeadsc                     t                                                       t          |          | _        t	          j        |j        d          | _        d S r   )rC   rD   r   r   r   rP   rG   r   r[   s     r)   rD   zFNetPreTrainingHeads.__init__w  sF    /77 "	&*<a @ @r+   c                 ^    |                      |          }|                     |          }||fS r-   )r   r   )r\   r   r   r   r   s        r)   ri   zFNetPreTrainingHeads.forward|  s6     ,,_==!%!6!6}!E!E "888r+   r   ro   s   @r)   r   r   v  sL        A A A A A
9 9 9 9 9 9 9r+   r   c                   (    e Zd ZU eed<   dZdZd ZdS )FNetPreTrainedModelr]   fnetTc                    t          |t          j                  rT|j        j                            d| j        j                   |j         |j        j        	                                 dS dS t          |t          j
                  r_|j        j                            d| j        j                   |j        +|j        j        |j                 	                                 dS dS t          |t          j                  r?|j        j        	                                 |j        j                            d           dS dS )zInitialize the weightsg        )meanstdNg      ?)r   r   rP   weightdatanormal_r]   initializer_ranger   zero_rE   r:   rN   fill_)r\   modules     r)   _init_weightsz!FNetPreTrainedModel._init_weights  s)   fbi(( 	* M&&CT[5R&SSS{& &&((((( '&-- 	*M&&CT[5R&SSS!-"6#56<<>>>>> .--- 	*K""$$$M$$S)))))	* 	*r+   N)rj   rk   rl   r   __annotations__base_model_prefixsupports_gradient_checkpointingr  r   r+   r)   r   r     s=         &*#* * * * *r+   r   z0
    Output type of [`FNetForPreTraining`].
    )custom_introc                       e Zd ZU dZdZeej                 ed<   dZ	eej                 ed<   dZ
eej                 ed<   dZeeej                          ed<   dS )FNetForPreTrainingOutputa  
    loss (*optional*, returned when `labels` is provided, `torch.FloatTensor` of shape `(1,)`):
        Total loss as the sum of the masked language modeling loss and the next sequence prediction
        (classification) loss.
    prediction_logits (`torch.FloatTensor` of shape `(batch_size, sequence_length, config.vocab_size)`):
        Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).
    seq_relationship_logits (`torch.FloatTensor` of shape `(batch_size, 2)`):
        Prediction scores of the next sequence prediction (classification) head (scores of True/False continuation
        before SoftMax).
    Nlossprediction_logitsseq_relationship_logitsr   )rj   rk   rl   rm   r  r   r"   FloatTensorr  r  r  r   r   r   r+   r)   r  r    s         	 	 )-D(5$
%,,,59x 12999;?Xe&78???8<M8E%"345<<<<<r+   r  c                        e Zd ZdZd fd	Zd Zd Ze	 	 	 	 	 	 ddee	j
                 dee	j
                 d	ee	j
                 d
ee	j                 dee         dee         deeef         fd            Z xZS )	FNetModelz

    The model can behave as an encoder, following the architecture described in [FNet: Mixing Tokens with Fourier
    Transforms](https://huggingface.co/papers/2105.03824) by James Lee-Thorp, Joshua Ainslie, Ilya Eckstein, Santiago Ontanon.

    Tc                     t                                          |           || _        t          |          | _        t          |          | _        |rt          |          nd| _        | 	                                 dS )zv
        add_pooling_layer (bool, *optional*, defaults to `True`):
            Whether to add a pooling layer
        N)
rC   rD   r]   r8   rh   r   encoderr   pooler	post_init)r\   r]   add_pooling_layerr^   s      r)   rD   zFNetModel.__init__  ss    
 	   (00"6**,=Gj(((4 	r+   c                     | j         j        S r-   rh   rI   r   s    r)   get_input_embeddingszFNetModel.get_input_embeddings  s    ..r+   c                     || j         _        d S r-   r  )r\   values     r)   set_input_embeddingszFNetModel.set_input_embeddings  s    */'''r+   Nrc   r@   r=   rd   r   r   r   c                 X   ||n| j         j        }||n| j         j        }||t          d          ||                                }|\  }}	n3|"|                                d d         }|\  }}	nt          d          | j         j        r%|	dk    r| j         j        |	k    rt          d          ||j        n|j        }
|gt          | j	        d          r1| j	        j
        d d d |	f         }|                    ||	          }|}n!t          j        |t          j        |
          }| 	                    ||||          }|                     |||	          }|d
         }| j        |                     |          nd }|s||f|dd          z   S t#          |||j                  S )NzDYou cannot specify both input_ids and inputs_embeds at the same timer>   z5You have to specify either input_ids or inputs_embedsrx   zThe `tpu_short_seq_length` in FNetConfig should be set equal to the sequence length being passed to the model when using TPU optimizations.r@   r`   )rc   r=   r@   rd   )r   r   r   r   )r   pooler_outputr   )r]   r   use_return_dict
ValueErrorrY   r{   r   ra   rb   rh   r@   rW   r"   rX   rZ   r  r  r   r   )r\   rc   r@   r=   rd   r   r   re   
batch_sizer(   ra   rf   rg   embedding_outputencoder_outputsr   r  s                    r)   ri   zFNetModel.forward  s&    %9$D  $+Jj 	 &1%<kk$+B] ]%>cddd"#..**K%0"J

&',,..ss3K%0"J

TUUU K5	d""0J>>;  
 &/%:!!@T!t(899 [*./*HKZK*X'3J3Q3QR\^h3i3i0!A!&[
SY!Z!Z!Z??%)'	 + 
 
 ,,!5# ' 
 

 *!,8<8OO444UY 	J#]3oabb6III)-')7
 
 
 	
r+   )T)NNNNNN)rj   rk   rl   rm   rD   r  r  r   r   r"   
LongTensorr  boolr   r   r   ri   rn   ro   s   @r)   r  r    s$              / / /0 0 0  15593759/3&*C
 C
E,-C
 !!12C
 u/0	C

   12C
 'tnC
 d^C
 
uo%	&C
 C
 C
 ^C
 C
 C
 C
 C
r+   r  z
    FNet Model with two heads on top as done during the pretraining: a `masked language modeling` head and a `next
    sentence prediction (classification)` head.
    c                   ,    e Zd ZddgZ fdZd Zd Ze	 	 	 	 	 	 	 	 ddee	j
                 dee	j
                 d	ee	j
                 d
ee	j
                 dee	j
                 dee	j
                 dee         dee         deeef         fd            Z xZS )FNetForPreTrainingcls.predictions.decoder.biascls.predictions.decoder.weightc                     t                                          |           t          |          | _        t	          |          | _        |                                  d S r-   )rC   rD   r  r   r   clsr  r[   s     r)   rD   zFNetForPreTraining.__init__!  sQ       f%%	'// 	r+   c                 $    | j         j        j        S r-   r,  r   r   r   s    r)   get_output_embeddingsz(FNetForPreTraining.get_output_embeddings*      x#++r+   c                 T    || j         j        _        |j        | j         j        _        d S r-   r,  r   r   r   r\   new_embeddingss     r)   set_output_embeddingsz(FNetForPreTraining.set_output_embeddings-  %    '5$$2$7!!!r+   Nrc   r@   r=   rd   labelsnext_sentence_labelr   r   r   c	                    ||n| j         j        }|                     ||||||          }	|	dd         \  }
}|                     |
|          \  }}d}||t	                      } ||                    d| j         j                  |                    d                    } ||                    dd          |                    d                    }||z   }|s||f|	dd         z   }||f|z   n|S t          ||||	j                  S )aH  
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
            config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the
            loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`
        next_sentence_label (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the next sequence prediction (classification) loss. Input should be a sequence pair
            (see `input_ids` docstring) Indices should be in `[0, 1]`:

            - 0 indicates sequence B is a continuation of sequence A,
            - 1 indicates sequence B is a random sequence.

        Example:

        ```python
        >>> from transformers import AutoTokenizer, FNetForPreTraining
        >>> import torch

        >>> tokenizer = AutoTokenizer.from_pretrained("google/fnet-base")
        >>> model = FNetForPreTraining.from_pretrained("google/fnet-base")
        >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
        >>> outputs = model(**inputs)
        >>> prediction_logits = outputs.prediction_logits
        >>> seq_relationship_logits = outputs.seq_relationship_logits
        ```Nr@   r=   rd   r   r   ru   r>   )r  r  r  r   )	r]   r   r   r,  r	   viewrF   r  r   )r\   rc   r@   r=   rd   r7  r8  r   r   r   r   r   r   r   
total_lossloss_fctmasked_lm_lossnext_sentence_lossr   s                      r)   ri   zFNetForPreTraining.forward1  s_   J &1%<kk$+B])))%'!5#  
 
 *1!&48HH_m4\4\11
"5"A'))H%X&7&<&<RAW&X&XZ`ZeZefhZiZijjN!)*@*E*Eb!*L*LNaNfNfgiNjNj!k!k'*<<J 	R')?@7122;NF/9/EZMF**6Q'/$:!/	
 
 
 	
r+   NNNNNNNN)rj   rk   rl   _tied_weights_keysrD   r/  r5  r   r   r"   r   r&  r   r   r  ri   rn   ro   s   @r)   r(  r(    sJ        9:Z[    , , ,8 8 8  -115/304)-6:/3&*B
 B
EL)B
 !.B
 u|,	B

  -B
 &B
 &el3B
 'tnB
 d^B
 
u..	/B
 B
 B
 ^B
 B
 B
 B
 B
r+   r(  c                       e Zd ZddgZ fdZd Zd Ze	 	 	 	 	 	 	 ddee	j
                 dee	j
                 d	ee	j
                 d
ee	j
                 dee	j
                 dee         dee         deeef         fd            Z xZS )FNetForMaskedLMr)  r*  c                     t                                          |           t          |          | _        t	          |          | _        |                                  d S r-   )rC   rD   r  r   r   r,  r  r[   s     r)   rD   zFNetForMaskedLM.__init__{  Q       f%%	"6** 	r+   c                 $    | j         j        j        S r-   r.  r   s    r)   r/  z%FNetForMaskedLM.get_output_embeddings  r0  r+   c                 T    || j         j        _        |j        | j         j        _        d S r-   r2  r3  s     r)   r5  z%FNetForMaskedLM.set_output_embeddings  r6  r+   Nrc   r@   r=   rd   r7  r   r   r   c                    ||n| j         j        }|                     ||||||          }|d         }	|                     |	          }
d}|Kt	                      } ||
                    d| j         j                  |                    d                    }|s|
f|dd         z   }||f|z   n|S t          ||
|j                  S )a  
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
            config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the
            loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`.
        Nr:  r   r>   ru   r  logitsr   )	r]   r   r   r,  r	   r;  rF   r   r   )r\   rc   r@   r=   rd   r7  r   r   r   r   r   r>  r=  r   s                 r)   ri   zFNetForMaskedLM.forward  s    " &1%<kk$+B])))%'!5#  
 
 "!* HH_55'))H%X&7&<&<RAW&X&XZ`ZeZefhZiZijjN 	Z')GABBK7F3A3M^%..SYY>:K[b[pqqqqr+   NNNNNNN)rj   rk   rl   rA  rD   r/  r5  r   r   r"   r   r&  r   r   r   ri   rn   ro   s   @r)   rC  rC  w  s2       8:Z[    , , ,8 8 8  -115/304)-/3&*'r 'rEL)'r !.'r u|,	'r
  -'r &'r 'tn'r d^'r 
un$	%'r 'r 'r ^'r 'r 'r 'r 'rr+   rC  zT
    FNet Model with a `next sentence prediction (classification)` head on top.
    c                        e Zd Z fdZe	 	 	 	 	 	 	 ddeej                 deej                 deej                 deej                 deej                 dee         d	ee         d
e	e
ef         fd            Z xZS )FNetForNextSentencePredictionc                     t                                          |           t          |          | _        t	          |          | _        |                                  d S r-   )rC   rD   r  r   r   r,  r  r[   s     r)   rD   z&FNetForNextSentencePrediction.__init__  rE  r+   Nrc   r@   r=   rd   r7  r   r   r   c                    d|v r/t          j        dt                     |                    d          }||n| j        j        }|                     ||||||          }	|	d         }
|                     |
          }d}|At                      } ||	                    dd          |	                    d                    }|s|f|	dd         z   }||f|z   n|S t          |||	j                  S )	a  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the next sequence prediction (classification) loss. Input should be a sequence pair
            (see `input_ids` docstring). Indices should be in `[0, 1]`:

            - 0 indicates sequence B is a continuation of sequence A,
            - 1 indicates sequence B is a random sequence.

        Example:

        ```python
        >>> from transformers import AutoTokenizer, FNetForNextSentencePrediction
        >>> import torch

        >>> tokenizer = AutoTokenizer.from_pretrained("google/fnet-base")
        >>> model = FNetForNextSentencePrediction.from_pretrained("google/fnet-base")
        >>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
        >>> next_sentence = "The sky is blue due to the shorter wavelength of blue light."
        >>> encoding = tokenizer(prompt, next_sentence, return_tensors="pt")
        >>> outputs = model(**encoding, labels=torch.LongTensor([1]))
        >>> logits = outputs.logits
        >>> assert logits[0, 0] < logits[0, 1]  # next sentence was random
        ```r8  zoThe `next_sentence_label` argument is deprecated and will be removed in a future version, use `labels` instead.Nr:  r   r>   ru   rI  )warningswarnFutureWarningpopr]   r   r   r,  r	   r;  r   r   )r\   rc   r@   r=   rd   r7  r   r   kwargsr   r   seq_relationship_scoresr?  r=  r   s                  r)   ri   z%FNetForNextSentencePrediction.forward  s:   H !F**M%  
 ZZ 566F%0%<kk$+B])))%'!5#  
 
  
"&((="9"9!'))H!)*A*F*Fr1*M*Mv{{[]!_!_ 	b-/'!""+=F7I7U')F22[aa*#*!/
 
 
 	
r+   rK  )rj   rk   rl   rD   r   r   r"   r   r&  r   r   r   ri   rn   ro   s   @r)   rM  rM    s	             -115/304)-/3&*G
 G
EL)G
 !.G
 u|,	G

  -G
 &G
 'tnG
 d^G
 
u11	2G
 G
 G
 ^G
 G
 G
 G
 G
r+   rM  z
    FNet Model transformer with a sequence classification/regression head on top (a linear layer on top of the pooled
    output) e.g. for GLUE tasks.
    c                        e Zd Z fdZe	 	 	 	 	 	 	 ddeej                 deej                 deej                 deej                 deej                 dee         d	ee         d
e	e
ef         fd            Z xZS )FNetForSequenceClassificationc                 6   t                                          |           |j        | _        t          |          | _        t          j        |j                  | _        t          j	        |j
        |j                  | _        |                                  d S r-   rC   rD   
num_labelsr  r   r   rR   rS   rT   rP   rG   
classifierr  r[   s     r)   rD   z&FNetForSequenceClassification.__init__  sy        +f%%	z&"<==)F$68IJJ 	r+   Nrc   r@   r=   rd   r7  r   r   r   c                    ||n| j         j        }|                     ||||||          }|d         }	|                     |	          }	|                     |	          }
d}|Z| j         j        f| j        dk    rd| j         _        nN| j        dk    r7|j        t          j	        k    s|j        t          j
        k    rd| j         _        nd| j         _        | j         j        dk    rWt                      }| j        dk    r1 ||
                                |                                          }n ||
|          }n| j         j        dk    rGt                      } ||
                    d| j                  |                    d                    }n*| j         j        dk    rt                      } ||
|          }|s|
f|dd         z   }||f|z   n|S t!          ||
|j        	          S )
a  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        Nr:  r   
regressionsingle_label_classificationmulti_label_classificationr>   ru   rI  )r]   r   r   rT   r[  problem_typerZ  rB   r"   rZ   intr
   squeezer	   r;  r   r   r   )r\   rc   r@   r=   rd   r7  r   r   r   r   rJ  r  r=  r   s                 r)   ri   z%FNetForSequenceClassification.forward"  s   " &1%<kk$+B])))%'!5#  
 
  
]33//{'/?a''/;DK,,_q((flej.H.HFL\a\eLeLe/LDK,,/KDK,{'<77"99?a''#8FNN$4$4fnn6F6FGGDD#8FF33DD)-JJJ+--xB @ @&++b//RR)-III,..x// 	FY,F)-)9TGf$$vE'T&PWPeffffr+   rK  )rj   rk   rl   rD   r   r   r"   r   r&  r   r   r   ri   rn   ro   s   @r)   rW  rW    s	       	 	 	 	 	  -115/304)-/3&*9g 9gEL)9g !.9g u|,	9g
  -9g &9g 'tn9g d^9g 
u..	/9g 9g 9g ^9g 9g 9g 9g 9gr+   rW  c                        e Zd Z fdZe	 	 	 	 	 	 	 ddeej                 deej                 deej                 deej                 deej                 dee         d	ee         d
e	e
ef         fd            Z xZS )FNetForMultipleChoicec                    t                                          |           t          |          | _        t	          j        |j                  | _        t	          j        |j	        d          | _
        |                                  d S r   )rC   rD   r  r   r   rR   rS   rT   rP   rG   r[  r  r[   s     r)   rD   zFNetForMultipleChoice.__init__a  sl       f%%	z&"<==)F$6:: 	r+   Nrc   r@   r=   rd   r7  r   r   r   c                 N   ||n| j         j        }||j        d         n|j        d         }|)|                    d|                    d                    nd}|)|                    d|                    d                    nd}|)|                    d|                    d                    nd}|=|                    d|                    d          |                    d                    nd}|                     ||||||          }	|	d         }
|                     |
          }
|                     |
          }|                    d|          }d}|t                      } |||          }|s|f|	dd         z   }||f|z   n|S t          |||	j
                  S )a[  
        input_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`):
            Indices of input sequence tokens in the vocabulary.

            Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
            [`PreTrainedTokenizer.__call__`] for details.

            [What are input IDs?](../glossary#input-ids)
        token_type_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`, *optional*):
            Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,
            1]`:

            - 0 corresponds to a *sentence A* token,
            - 1 corresponds to a *sentence B* token.

            [What are token type IDs?](../glossary#token-type-ids)
        position_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`, *optional*):
            Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
            config.max_position_embeddings - 1]`.

            [What are position IDs?](../glossary#position-ids)
        inputs_embeds (`torch.FloatTensor` of shape `(batch_size, num_choices, sequence_length, hidden_size)`, *optional*):
            Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
            is useful if you want more control over how to convert *input_ids* indices into associated vectors than the
            model's internal embedding lookup matrix.
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the multiple choice classification loss. Indices should be in `[0, ...,
            num_choices-1]` where `num_choices` is the size of the second dimension of the input tensors. (See
            `input_ids` above)
        Nr   r>   r:  ru   rI  )r]   r   r    r;  rY   r   rT   r[  r	   r   r   )r\   rc   r@   r=   rd   r7  r   r   num_choicesr   r   rJ  reshaped_logitsr  r=  r   s                   r)   ri   zFNetForMultipleChoice.forwardk  s   R &1%<kk$+B],5,Aioa((}GZ[\G]>G>SINN2y~~b'9'9:::Y]	M[Mg,,R1D1DR1H1HIIImqGSG_|((\->->r-B-BCCCei ( r=#5#5b#9#9=;M;Mb;Q;QRRR 	 )))%'!5#  
 
  
]33// ++b+66'))H8OV44D 	F%''!""+5F)-)9TGf$$vE(d?ZaZoppppr+   rK  )rj   rk   rl   rD   r   r   r"   r   r&  r   r   r   ri   rn   ro   s   @r)   rd  rd  _  s             -115/304)-/3&*Lq LqEL)Lq !.Lq u|,	Lq
  -Lq &Lq 'tnLq d^Lq 
u//	0Lq Lq Lq ^Lq Lq Lq Lq Lqr+   rd  c                        e Zd Z fdZe	 	 	 	 	 	 	 ddeej                 deej                 deej                 deej                 deej                 dee         d	ee         d
e	e
ef         fd            Z xZS )FNetForTokenClassificationc                 6   t                                          |           |j        | _        t          |          | _        t          j        |j                  | _        t          j	        |j
        |j                  | _        |                                  d S r-   rY  r[   s     r)   rD   z#FNetForTokenClassification.__init__  sy        +f%%	z&"<==)F$68IJJ 	r+   Nrc   r@   r=   rd   r7  r   r   r   c                    ||n| j         j        }|                     ||||||          }|d         }	|                     |	          }	|                     |	          }
d}|Ft                      } ||
                    d| j                  |                    d                    }|s|
f|dd         z   }||f|z   n|S t          ||
|j	                  S )z
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
        Nr:  r   r>   ru   rI  )
r]   r   r   rT   r[  r	   r;  rZ  r   r   )r\   rc   r@   r=   rd   r7  r   r   r   r   rJ  r  r=  r   s                 r)   ri   z"FNetForTokenClassification.forward  s     &1%<kk$+B])))%'!5#  
 
 "!*,,7711'))H8FKKDO<<fkk"ooNND 	FY,F)-)9TGf$$vE$$vWMbccccr+   rK  )rj   rk   rl   rD   r   r   r"   r   r&  r   r   r   ri   rn   ro   s   @r)   rk  rk    s	       
 
 
 
 
  -115/304)-/3&*(d (dEL)(d !.(d u|,	(d
  -(d &(d 'tn(d d^(d 
u++	,(d (d (d ^(d (d (d (d (dr+   rk  c                       e Zd Z fdZe	 	 	 	 	 	 	 	 ddeej                 deej                 deej                 deej                 deej                 deej                 d	ee         d
ee         de	e
ef         fd            Z xZS )FNetForQuestionAnsweringc                     t                                          |           |j        | _        t          |          | _        t          j        |j        |j                  | _        | 	                                 d S r-   )
rC   rD   rZ  r  r   r   rP   rG   
qa_outputsr  r[   s     r)   rD   z!FNetForQuestionAnswering.__init__  se        +f%%	)F$68IJJ 	r+   Nrc   r@   r=   rd   start_positionsend_positionsr   r   r   c	                    ||n| j         j        }|                     ||||||          }	|	d         }
|                     |
          }|                    dd          \  }}|                    d                                          }|                    d                                          }d }||t          |                                          dk    r|                    d          }t          |                                          dk    r|                    d          }|                    d          }|	                    d|          }|	                    d|          }t          |          } |||          } |||          }||z   dz  }|s||f|	dd          z   }||f|z   n|S t          ||||	j                  S )	Nr:  r   r   r>   rv   )ignore_indexru   )r  start_logits
end_logitsr   )r]   r   r   rq  splitrb  
contiguouslenrY   clampr	   r   r   )r\   rc   r@   r=   rd   rr  rs  r   r   r   r   rJ  rv  rw  r<  ignored_indexr=  
start_lossend_lossr   s                       r)   ri   z FNetForQuestionAnswering.forward  s    &1%<kk$+B])))%'!5#  
 
 "!*11#)<<r<#:#: j#++B//::<<''++6688

&=+D?''))**Q.."1"9"9""="==%%''((1,, - 5 5b 9 9(--a00M-33A}EEO)//=AAM']CCCH!,@@Jx
M::H$x/14J 	R"J/'!""+=F/9/EZMF**6Q+,:]d]r
 
 
 	
r+   r@  )rj   rk   rl   rD   r   r   r"   r   r&  r   r   r   ri   rn   ro   s   @r)   ro  ro    s       	 	 	 	 	  -115/3042604/3&*4
 4
EL)4
 !.4
 u|,	4

  -4
 "%,/4
  -4
 'tn4
 d^4
 
u22	34
 4
 4
 ^4
 4
 4
 4
 4
r+   ro  )
rC  rd  rM  r(  ro  rW  rk  r   r  r   )Irm   rP  dataclassesr   	functoolsr   typingr   r   r"   r   torch.nnr   r	   r
   utilsr   r   scipyr   activationsr   modeling_layersr   modeling_outputsr   r   r   r   r   r   r   r   r   modeling_utilsr   pytorch_utilsr   r   configuration_fnetr   
get_loggerrj   loggerr*   r.   r6   Moduler8   rq   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r(  rC  rM  rW  rd  rk  ro  __all__r   r+   r)   <module>r     sX      ! ! ! ! ! !       " " " " " " " "        A A A A A A A A A A 7 7 7 7 7 7 7 7   ! ! ! ! ! ! 9 9 9 9 9 9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 . - - - - - 6 6 6 6 6 6       * * * * * * 
	H	%	%M M M> > >
   : : : : :RY : : :z# # # # #	 # # #L    bi   
 
 
 
 
29 
 
 
    ry               *   6a a a a a") a a a8            ")   "* * * * *29 * * *4! ! ! ! !bi ! ! !& & & & &bi & & &	9 	9 	9 	9 	929 	9 	9 	9 * * * * */ * * *.   
= = = = ={ = =  =$ b
 b
 b
 b
 b
# b
 b
 b
J   V
 V
 V
 V
 V
, V
 V
 V
r ;r ;r ;r ;r ;r) ;r ;r ;r|   
R
 R
 R
 R
 R
$7 R
 R
 
R
j   Fg Fg Fg Fg Fg$7 Fg Fg FgR Xq Xq Xq Xq Xq/ Xq Xq Xqv 6d 6d 6d 6d 6d!4 6d 6d 6dr A
 A
 A
 A
 A
2 A
 A
 A
H  r+   