
     `i                        d Z ddlZddlmZ ddlmZmZ ddlZddl	Z	ddl	m
Z
 ddlmZmZmZ ddlmZ dd	lmZmZmZmZmZmZ dd
lmZ ddlmZmZmZ ddlmZ  ej         e!          Z"dZ#d Z$ G d de
j%                  Z& G d de
j%                  Z'de	j(        de)de)de	j(        fdZ* G d de
j%                  Z+ G d de
j%                  Z, G d de
j%                  Z- G d d e
j%                  Z.	 dId#e	j(        d$e)d%e)d&e/d'e/de	j(        fd(Z0 G d) d*e
j%                  Z1 G d+ d,e
j%                  Z2e G d- d.e                      Z3 G d/ d0e
j%                  Z4e ed12           G d3 d4e                                  Z5 ed52           G d6 d7e3                      Z6e G d8 d9e3                      Z7 ed:2           G d; d<e3                      Z8e G d= d>e3                      Z9 ed?2           G d@ dAe3                      Z:e G dB dCe3                      Z;e G dD dEe3                      Z<e G dF dGe3                      Z=g dHZ>dS )Jz!PyTorch Funnel Transformer model.    N)	dataclass)OptionalUnion)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )ACT2FN)BaseModelOutputMaskedLMOutputMultipleChoiceModelOutputQuestionAnsweringModelOutputSequenceClassifierOutputTokenClassifierOutput)PreTrainedModel)ModelOutputauto_docstringlogging   )FunnelConfigg    .Ac                    	 ddl }ddl}ddl}n)# t          $ r t                              d            w xY wt          j                            |          }t          	                    d|            |j
                            |          }g }g }	|D ]j\  }
}t          	                    d|
 d|            |j
                            ||
          }|                    |
           |	                    |           kddd	d
ddddddddddd}t          ||	          D ]_\  }
}|
                    d          }
t!          d |
D                       r1t          	                    dd                    |
                      e|
d         dk    rr| }d}|
dd         D ]h}t%          |t&                    s|                    d|          rt+          |                    d|                                          d                   }||j        k     rMd}||j        |         k    r&||j        |         z  }|dz  }||j        |         k    &|j        |         |         }||j        z  }|j        |         }|dk    rt%          |t8                    r	|j        } nm||v rt=          |||                   }	 t=          ||          },# t>          $ r1 tA          dd                    |
           |j!                   d}Y  nw xY w|sstE          |j!                  tE          |j!                  k    r|#                    |j!                  }|dk    r |j$        |          }tK          j&        |          |_'        a| S )z'Load tf checkpoints in a pytorch model.r   NzLoading a TensorFlow model in PyTorch, requires TensorFlow to be installed. Please see https://www.tensorflow.org/install/ for installation instructions.z&Converting TensorFlow checkpoint from zLoading TF weight z with shape k_headq_headv_head	post_projlinear_1linear_2	attentionffnweightbiasword_embeddings
embeddings)kqvolayer_1layer_2rel_attnffkernelgammabetalookup_tableword_embeddinginput/c              3      K   | ]}|d v V  	dS ))adam_vadam_mAdamWeightDecayOptimizerAdamWeightDecayOptimizer_1global_stepN ).0ns     ~/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/models/funnel/modeling_funnel.py	<genexpr>z,load_tf_weights_in_funnel.<locals>.<genexpr>\   s<       
 
 nn
 
 
 
 
 
    z	Skipping 	generatorFr   z	layer_\d+zlayer_(\d+)rTr-   )(renumpy
tensorflowImportErrorloggererrorospathabspathinfotrainlist_variablesload_variableappendzipsplitanyjoin
isinstanceFunnelPositionwiseFFN	fullmatchintsearchgroupsnum_hidden_layersblock_sizesblockslayersFunnelRelMultiheadAttentionr_kernelgetattrAttributeErrorprintshapelenreshape	transposetorch
from_numpydata)modelconfigtf_checkpoint_pathrB   nptftf_path	init_varsnamesarraysnamerc   array
_layer_mappointerskippedm_namelayer_index	block_idxs                      r=   load_tf_weights_in_funnelr{   .   s   
			   Q	
 	
 	
 	 goo011G
KKBBBCCC''00IEF   eBBB5BBCCC&&w55Te  + J" 5&)) +3 +3ezz#  
 

 
 
 
 
 	 KK4CHHTNN445557k!!122h 	 	Fg'<== ",,|]cBdBd !"))NF"C"C"J"J"L"LQ"OPP!999 !I%);I)FFF#v'9)'DD!Q	 &);I)FFF &nY7DGG6#;;K%n[9GG3:g7R#S#S!*:%%!':f+=>>%gv66GG%   6chhtnn66DDD"GEE  	37=!!S%5%555gm44!!$U++ +E22GLLs    &5:K7LLc                   v     e Zd Zdeddf fdZ	 ddeej                 deej                 dej        fdZ xZ	S )	FunnelEmbeddingsrk   returnNc                 $   t                                                       t          j        |j        |j        |j                  | _        t          j        |j	        |j
                  | _        t          j        |j                  | _        d S )N)padding_idxeps)super__init__r   	Embedding
vocab_sizehidden_sizepad_token_idr#   	LayerNormd_modellayer_norm_eps
layer_normDropouthidden_dropoutdropoutselfrk   	__class__s     r=   r   zFunnelEmbeddings.__init__   so    !|F,=v?Q_e_rsss,v~6;PQQQz&"788r?   	input_idsinputs_embedsc                     ||                      |          }|                     |          }|                     |          }|S N)r#   r   r   )r   r   r   r$   s       r=   forwardzFunnelEmbeddings.forward   sE       00;;M__]33
\\*--
r?   NN)
__name__
__module____qualname__r   r   r   rg   Tensorr   __classcell__r   s   @r=   r}   r}      s        9| 9 9 9 9 9 9 9 ae !%,/GOPUP\G]	       r?   r}   c                       e Zd ZU dZdZeed<   deddf fdZ	 	 d"de	j
        d	ee	j
                 d
ee	j
                 dee	j
                 fdZd
e	j
        de	j
        fdZdede	j        de	j        deee	j
                 eee	j
                          f         fdZde	j
        defdZd#de	j
        dedede	j
        fdZdee	j
        ee	j
                 ee	j
                 f         deeee         ee         f         de	j
        fdZ	 d$dee	j
        ee	j
                 ee	j
                 f         dedede	j
        fdZdee	j
                 dee	j
        ee	j
                 f         fd Zdee	j
                 dee	j
                 fd!Z xZS )%FunnelAttentionStructurez>
    Contains helpers for `FunnelRelMultiheadAttention `.
       cls_token_type_idrk   r~   Nc                     t                                                       || _        t          j        |j                  | _        t          j        |j                  | _        d | _        d S r   )	r   r   rk   r   r   r   sin_dropoutcos_dropoutpooling_multr   s     r=   r   z!FunnelAttentionStructure.__init__   sZ    :f&;<<:f&;<< !r?   r   attention_masktoken_type_idsc                 Z   d| _         |                    d          x| _        }|                     ||j        |j                  }||                     |          nd}| j        j        r;t          j
                            |                    |dz
  |dz
  g          d          nd}||||fS )zCReturns the attention inputs associated to the inputs of the model.r   N)r   r   r   r   )r   sizeseq_lenget_position_embedsdtypedevicetoken_type_ids_to_matrk   separate_clsr   
functionalpadnew_ones)r   r   r   r   r   position_embedstoken_type_matcls_masks           r=   init_attention_inputsz.FunnelAttentionStructure.init_attention_inputs   s     !.!3!3A!6!66w227M<OQ^QeffGUGa33NCCCgk {'BMm44gk7Q;5OPPR^___ 	
  JJr?   c                     |dddddf         |dddf         k    }|| j         k    }|dddddf         |dddf         z  }||z  S )z-Convert `token_type_ids` to `token_type_mat`.N)r   )r   r   r   cls_idscls_mats        r=   r   z.FunnelAttentionStructure.token_type_ids_to_mat   sn    '111d
3~aaag7NN D$::!!!QQQ*%4(88''r?   r   r   r   c                    | j         j        }| j         j        dk    rKt          j        d|dt          j        |                              |          }t          j        d|dz  dt          j        |                              |          }dd||dz  z  z  z  }|dddf         |d         z  }t          j        |          }	|                     |	          }
t          j	        |          }| 
                    |          }t          j        |
|
gd	
          }t          j        ||	gd	
          }t          j        ||gd	
          }t          j        |	 |gd	
          }||||fS t          j        d|dz  dt          j        |                              |          }dd||dz  z  z  z  }t          j        | dz  |dz  dt          j        |                              |          }|dz  }|dddf         |d         z  }|                     t          j        |                    }	| 
                    t          j	        |                    }t          j        |	|gd	
          }t          j        d|t          j        |                              |          }|}g }t          d| j         j                  D ]}|dk    rd}n|                     ||          }d|dz
  z  }|                     |||d          }|dddf         |z   }|                    |                    d          |          }t          j        |d|          }|}d|z  }|                     ||          }|dddf         |z   }|                    |                    d          |          }t          j        |d|          }|                    ||g           |S )a  
        Create and cache inputs related to relative position encoding. Those are very different depending on whether we
        are using the factorized or the relative shift attention:

        For the factorized attention, it returns the matrices (phi, pi, psi, omega) used in the paper, appendix A.2.2,
        final formula.

        For the relative shift attention, it returns all possible vectors R used in the paper, appendix A.2.1, final
        formula.

        Paper link: https://huggingface.co/papers/2006.03236
        
factorizedr         ?r   r   r   r   i'  Ndim)shift)rk   r   attention_typerg   arangeint64tosinr   cosr   catrange
num_blocksstride_pool_posrelative_posexpandr   gatherrO   )r   r   r   r   r   pos_seqfreq_seqinv_freqsinusoid	sin_embedsin_embed_d	cos_embedcos_embed_dphipsipiomega
rel_pos_idzero_offset	pos_embedpos
pooled_posposition_embeds_listblock_indexposition_embeds_poolingstriderel_posposition_embeds_no_poolings                               r=   r   z,FunnelAttentionStructure.get_position_embeds   s    +%;%55 l1gs%+fUUUXXY^__G|Aw!|STZ[[[^^_deeHEh'Q,&?@AHqqq$w'(4.8H	(++I**955K	(++I**955K)[+6B???C)Y	2;;;CK52>>>BI	z952>>>ES%(( |Aw!|STZ[[[^^_deeHEh'Q,&?@AHwhlGaKEK`fgggjjkpqqJ!A+K!!!!T'*Xd^;H((8)<)<==I((8)<)<==I	9i"8bAAAI,q'VLLLOOPUVVCJ#% $Q(>?? c c !##.2++!%!5!5c;!G!GJ ;?3F"//VZq/QQG%aaag.<G%nnW\\!__gFFG.3l9a.Q.Q+ !K++C88!!!!T'*[8!..a'BB-2\)Q-P-P*$++-GI`,abbbb''r?   pos_idr   c                     | j         j        r]|                    d|z   dz   g          }| j         j        r
|dd         n	|dd         }t	          j        ||ddd         gd          S |ddd         S )ze
        Pool `pos_id` while keeping the cls token separate (if `config.separate_cls=True`).
        r   r   r   Nr   )rk   r   
new_tensortruncate_seqrg   r   )r   r   r   cls_pospooled_pos_ids        r=   r   z(FunnelAttentionStructure.stride_pool_pos  s     ;# 		
 ''1k>):Q)>(?@@G,0K,DTF1R4LL&QRQSQS*M9g}SSqS'9:A>>>##A#;r?   r   r   r   r   c                     ||}|d         |d         z
  }|t          |          z  }|||z  z   }|d         |d         z
  }t          j        ||dz
  | t          j        |j                  S )zV
        Build the relative positional vector between `pos` and `pooled_pos`.
        Nr   r   r   r   )rd   rg   r   longr   )	r   r   r   r   r   	ref_point
num_removemax_distmin_dists	            r=   r   z%FunnelAttentionStructure.relative_pos$  sy     JqMCF*	S__,
zF22a=3r7*|HhlVG5:VYV`aaaar?   tensoraxisc                 f    |dS t          t          t          f          rD ]}                     ||          }|S t          |t          t          f          r% t	          |           fd|D                       S |j        z   j        j        r j        j        rt          ddd          nt          ddd          }t          d          gz  |gz   } j        j        rCt          d          gz  t          dd          gz   }t          j        ||         |g          }||         S )zT
        Perform pooling by stride slicing the tensor along the given axis.
        Nc              3   D   K   | ]}                     |          V  d S r   )stride_pool)r;   xr   r   s     r=   r>   z7FunnelAttentionStructure.stride_pool.<locals>.<genexpr>E  s3      JJa 0 0D 9 9JJJJJJr?   r   r   r   )r   )rT   listtupler   typendimrk   r   r   slicerg   r   )r   r   r   ax
axis_slice	enc_slice	cls_slices   ` `    r=   r   z$FunnelAttentionStructure.stride_pool2  s[    >4 dT5M** 	 6 6))&"55M fudm,, 	K4<<JJJJJ6JJJJJJ 	 #'+":qt{?WqE$A]bcgimop]q]q 	 4[[MD(J<7	;# 	Gt,dA/??IYy 16:FFFFi  r?   meanmodec                     dS t          t          t          f          r' t                     fdD                       S  j        j        r@ j        j        rddddf         n}t          j        ddddf         |gd          j	        }|dk    rddddddf         n|dk    rdddddddf         dfdk    r$t          j                            d	
          nedk    r$t          j                            d	
          n;dk    r&t          j                             d	
           nt          d          |dk    rddddddf         S |dk    rdddf         S S )z3Apply 1D pooling to a tensor of size [B x T (x H)].Nc              3   H   K   | ]}                                V  dS ))r  r   N)pool_tensor)r;   r   r  r   r   r   s     r=   r>   z7FunnelAttentionStructure.pool_tensor.<locals>.<genexpr>\  s9      ccWX 0 0d6 0 R Rccccccr?   r   r   r   r   r
   r  T)r   	ceil_modemaxminz0The supported modes are 'mean', 'max' and 'min'.r   )rT   r   r   r   rk   r   r   rg   r   r   r   r   
avg_pool2d
max_pool2dNotImplementedError)r   r   r  r   suffixr   s   ````  r=   r  z$FunnelAttentionStructure.pool_tensorS  s    >4 fudm,, 	d4<<ccccccc\bcccccc;# 	?'+{'?KVAAAssF^^VFYqqq"1"uv6A>>>F{199AAAtQQQ,-FFQYYAAAtQQQM*F!6>>]--ffVW[-\\FFU]]]--ffVW[-\\FFU]]m..wvY].^^^FF%&XYYY199!!!Q1*%%QYY!!!Q$<r?   attention_inputsc                    |\  }}}}| j         j        r| j         j        dk    r)|                     |dd         d          |dd         z   }|                     |d          }|                     |d          }|                     || j         j                  }n| xj        dz  c_        | j         j        dk    r|                     |d          }|                     |ddg          }|                     |ddg          }|                     |d          }|                     || j         j                  }||||f}||fS )zTPool `output` and the proper parts of `attention_inputs` before the attention layer.r   Nr   r   r   r  r	  )rk   pool_q_onlyr   r   r  pooling_typer   )r   outputr  r   r   r   r   s          r=   pre_attention_poolingz.FunnelAttentionStructure.pre_attention_poolingy  se    EUA;" 	M{)\99"&"2"2?2A23F"J"J_]^]_]_M`"`!--na@@N''!44H%%f4;3K%LLFF"{)\99"&"2"2?A"F"F!--nq!fEEN''1a&99H!--n5-IIN%%f4;3K%LLF+^^XV'''r?   c                 P   |\  }}}}| j         j        r| xj        dz  c_        | j         j        dk    r)|dd         |                     |dd         d          z   }|                     |d          }|                     |d          }|                     |d          }||||f}|S )zFPool the proper parts of `attention_inputs` after the attention layer.r   r   Nr   r   r	  r  )rk   r  r   r   r   r  )r   r  r   r   r   r   s         r=   post_attention_poolingz/FunnelAttentionStructure.post_attention_pooling  s    DTA;" 	J"{)\99"1"1""58H8HYZY[Y[I\^_8`8`"`!--na@@N''!44H!--n5-IIN+^^XVr?   r   Nr   )r  r   )r   r   r   __doc__r   rW   __annotations__r   r   rg   r   r   r   r   r   r   r   r   r   r   r   r   r   strr  r  r  r   r   s   @r=   r   r      s          s!| ! ! ! ! ! ! ! 2615	K K|K !.K !.	K
 
u|	K K K K((EL (U\ ( ( ( (N(N(#(;N(8=N(	uU\"Del);$<<	=N( N( N( N(`el     b b bc bSV b_d_k b b b b!elE%,$7el9KKL! CsT#Y./! 
	! ! ! !D wx$ $EL%*=tEL?QQR$Z]$ps$	$ $ $ $L((-el(;(	u|U5<00	1( ( ( (, uU\7J  uUZUaOb                r?   r   positional_attncontext_lenr   r~   c                     | j         \  }}}}t          j        | ||||g          } | d d d d |d d d f         } t          j        | |||||z
  g          } | dd |f         } | S )N.)rc   rg   re   )r  r  r   
batch_sizen_headr   max_rel_lens          r=   _relative_shift_gatherr!    s    />/D,J mOj&+W^5_``O%aaaEFFAAAo6OmOj&'S^afSf5ghhO%c<K<&78Or?   c                        e Zd Zdededdf fdZddZddZ	 dd	ej	        d
ej	        dej	        de
ej	                 dede
ej	        df         fdZ xZS )r^   rk   r   r~   Nc                    t                                                       || _        || _        |j        |j        |j        }}}t          j        |j	                  | _	        t          j        |j
                  | _
        t          j        |||z  d          | _        t          j        |||z            | _        t          j        |||z            | _        t          j        t!          j        ||g                    | _        t          j        t!          j        ||g                    | _        t          j        t!          j        |||g                    | _        t          j        t!          j        ||g                    | _        t          j        t!          j        d||g                    | _        t          j        ||z  |          | _        t          j        ||j                  | _        d|dz  z  | _        d S )NF)r"   r   r   r   g      ?)r   r   rk   r   r   r  d_headr   r   r   attention_dropoutLinearr   r   r   	Parameterrg   zerosr_w_biasr_r_biasr_   r_s_bias	seg_embedr   r   r   r   scale)r   rk   r   r   r  r$  r   s         r=   r   z$FunnelRelMultiheadAttention.__init__  s   &"(.&- j)>??!#F,D!E!Ei&uEEEi&99i&99U[&&1A%B%BCCU[&&1A%B%BCCU['661J%K%KLLU[&&1A%B%BCCek1ff2E&F&FGG6F?G<<,wF4IJJJFCK(


r?   c                 :   | j         j        dk    r|\  }}}}| j        | j        z  }	| j        }
t          j        d||	z   |
          }||dddf         z  }||dddf         z  }t          j        d||          t          j        d||          z   }n|j        d         |k    rdnd}|| j                 |dz
           }| j        | j        z  }| j        }
t          j        d||
          }t          j        d||z   |          }t          |||          }|||z  }|S )	z5Relative attention score for the positional encodingsr   zbinh,dnh->bindNzbind,jd->bnijr   r   ztd,dnh->tnhzbinh,tnh->bnit)
rk   r   r*  r-  r_   rg   einsumrc   r   r!  )r   r   r   r  r   r   r   r   r   uw_rq_r_attentionq_r_attention_1q_r_attention_2r  r   rA   r'   r_heads                      r=   relative_positional_attentionz9FunnelRelMultiheadAttention.relative_positional_attention  sQ    ;%55 #2CS%
*A-C "L)96A:sKKM+c!!!T'l:O+bDk9O $l?OSQQTYT`%U U OO  aK77AAQE   01%!)<A
*A-C \-C88F#l+;VaZPPO4_kSXYYOx'Or?   c                    |dS |j         \  }}}| j        | j        z  }t          j        d||z   | j                  }|dddf                             ||j         d         ||g          }t          j        |dd          \  }	}
t          j        ||
                    |j                   |	                    |j                             }|||z  }|S )z/Relative attention score for the token_type_idsNr   zbind,snd->bnisr   r   r   r   )	rc   r+  r-  rg   r/  r,  r   rQ   where)r   r   r   r   r  r   r  r+  token_type_biasdiff_token_typesame_token_typetoken_type_attns               r=   relative_token_type_attentionz9FunnelRelMultiheadAttention.relative_token_type_attention  s    !1+9+?(
G[ =4:-  ,'7(9JDN[['4077V\RS_V]_j8kll+0;r+R+R+R(+O22>3GHH/J`J`aoauJvJv
 
 x'Or?   Fquerykeyvaluer  output_attentions.c                 "   |\  }}}}	|j         \  }
}}|j         d         }| j        j        | j        j        }}|                     |                              |
|||          }|                     |                              |
|||          }|                     |                              |
|||          }|| j        z  }| j	        | j        z  }t          j        d||z   |          }|                     ||||	          }|                     |||	          }||z   |z   }|j        }|                                }|-|t           d|d d d d f                                         z
  z  z
  }t          j        |d|          }|                     |          }t          j        d||          }|                     |                    |
|||z                      }|                     |          }|                     ||z             }|r||fn|fS )Nr   zbind,bjnd->bnijr   )r   r   zbnij,bjnd->bind)rc   rk   r  r$  r   viewr   r   r-  r)  rg   r/  r6  r=  r   floatINFsoftmaxr%  r   re   r   r   )r   r>  r?  r@  r  rA  r   r   r   r   r  r   _r  r  r$  r   r   r   r)  content_scorer  r<  
attn_scorer   	attn_probattn_vecattn_outr  s                                r=   r   z#FunnelRelMultiheadAttention.forward  s!    EUA!&
GQil+T[-? U##((WffMMS!!&&z;OOU##(([&&QQ$*$=4:-%68I6RR<<_fVackll<<^VU]^^ #_4F
  %%''
%#cQ41N1T1T1V1V-V&WWJM*"EBBB	**955	 < 19fEE >>("2"2:wQW"X"XYY&&x00!122&7F	""fYFr?   r   F)r   r   r   r   rW   r   r6  r=  rg   r   r   boolr   r   r   s   @r=   r^   r^     s        )| )# )$ ) ) ) ) ) ).( ( ( (T   < #(3G 3G|3G \3G |	3G
  -3G  3G 
u|S 	!3G 3G 3G 3G 3G 3G 3G 3Gr?   r^   c                   L     e Zd Zdeddf fdZdej        dej        fdZ xZS )rU   rk   r~   Nc                    t                                                       t          j        |j        |j                  | _        t          |j                 | _	        t          j
        |j                  | _        t          j        |j        |j                  | _        t          j
        |j                  | _        t          j        |j        |j                  | _        d S r   )r   r   r   r&  r   d_innerr   r   
hidden_actactivation_functionr   activation_dropoutr   r   r   r   r   r   r   s     r=   r   zFunnelPositionwiseFFN.__init__>  s    	&.&.AA#)&*;#< "$*V-F"G"G	&.&.AAz&"788,v~v7LMMr?   hiddenc                    |                      |          }|                     |          }|                     |          }|                     |          }|                     |          }|                     ||z             S r   )r   rS  rT  r   r   r   )r   rU  hs      r=   r   zFunnelPositionwiseFFN.forwardG  so    MM&!!$$Q''##A&&MM!LLOOvz***r?   )	r   r   r   r   r   rg   r   r   r   r   s   @r=   rU   rU   =  sy        N| N N N N N N N+el +u| + + + + + + + +r?   rU   c                   j     e Zd Zdededdf fdZ	 ddej        dej        d	ej        d
ede	f
dZ
 xZS )FunnelLayerrk   r   r~   Nc                     t                                                       t          ||          | _        t	          |          | _        d S r   )r   r   r^   r   rU   r    )r   rk   r   r   s      r=   r   zFunnelLayer.__init__Q  s=    4V[II(00r?   Fr>  r?  r@  rA  c                     |                      |||||          }|                     |d                   }|r
||d         fn|fS )NrA  r   r   )r   r    )r   r>  r?  r@  r  rA  attnr  s           r=   r   zFunnelLayer.forwardV  sQ     ~~eS%1AUf~gg$q'""$5DQ  F9Dr?   rM  )r   r   r   r   rW   r   rg   r   rN  r   r   r   r   s   @r=   rY  rY  P  s        1| 1# 1$ 1 1 1 1 1 1 #(
E 
E|
E \
E |	
E  
E 

E 
E 
E 
E 
E 
E 
E 
Er?   rY  c                        e Zd Zdeddf fdZ	 	 	 	 	 ddej        deej                 d	eej                 d
ededede	e
ef         fdZ xZS )FunnelEncoderrk   r~   Nc                     t                                                       | _        t                    | _        t          j        fdt          j                  D                       | _	        d S )Nc                 t    g | ]3\  }t          j        fd t          |          D                       4S )c                 0    g | ]}t                    S r:   rY  )r;   rG  r   rk   s     r=   
<listcomp>z5FunnelEncoder.__init__.<locals>.<listcomp>.<listcomp>j  s#    [[[A{6;??[[[r?   )r   
ModuleListr   )r;   
block_sizer   rk   s     @r=   rd  z*FunnelEncoder.__init__.<locals>.<listcomp>i  sX       +K [[[[[zIZIZ[[[\\  r?   )
r   r   rk   r   attention_structurer   re  	enumerater[   r\   r   s    `r=   r   zFunnelEncoder.__init__d  sy    #;F#C#C m   /89K/L/L  
 
r?   FTr   r   r   rA  output_hidden_statesreturn_dictc           
         |                     |          }| j                            |||          }|}|r|fnd }	|rdnd }
t          | j                  D ]\  }}|                    d          | j        j        rdndk    }|o|dk    }|r| j                            ||          \  }}t          |          D ]\  }}t          | j        j
        |                   D ]x}|dk    o|dk    o|}|r|}| j        j        r|n|x}}n|x}x}} ||||||          }|d         }|r| j                            |          }|r|
|dd          z   }
|r|	|fz   }	y|st          d ||	|
fD                       S t          ||	|
          S )	Nr   r   r:   r   r   r   r\  c              3      K   | ]}||V  	d S r   r:   r;   r'   s     r=   r>   z(FunnelEncoder.forward.<locals>.<genexpr>  (      aaqSTS`S`S`S`S`aar?   last_hidden_statehidden_states
attentions)type_asrg  r   rh  r\   r   rk   r   r  r   block_repeatsr  r  r   r   )r   r   r   r   rA  ri  rj  r  rU  all_hidden_statesall_attentionsr   blockpooling_flagpooled_hiddenry   layerrepeat_index
do_poolingr>  r?  r@  layer_outputs                          r=   r   zFunnelEncoder.forwardo  sC    (//>>3II)) J 
 

 0DN],,$0:d"+DK"8"8 	J 	JK!;;q>>$+2J-QQQPQRL';K!OL 262J2`2`,3 3// '0&6&6 J J"U$)$+*CK*P$Q$Q J JL".!"3!\+:J!\P\J! 5 -040G&Zff]Zee.444e#(5U<L`q#r#r#rL)!_F! m+/+C+Z+Z[k+l+l(( K)7,qrr:J)J+ J,=	,I)JJ$  	baaV->$OaaaaaaGXesttttr?   NNFFTr   r   r   r   r   rg   r   r   rN  r   r   r   r   r   r   s   @r=   r_  r_  c  s        	
| 	
 	
 	
 	
 	
 	
 	
 2615"'%* 0u 0u|0u !.0u !.	0u
  0u #0u 0u 
uo%	&0u 0u 0u 0u 0u 0u 0u 0ur?   r_  TFr   r   
target_lenr   r   c           	      J   |dk    r| S |r| ddddf         }| ddddf         } t          j        | |d          }|rU|r)t          j                            |ddd|dz
  ddf          }|ddd|dz
  f         }t          j        ||gd          }n|ddd|f         }|S )z{
    Upsample tensor `x` to match `target_len` by repeating the tokens `stride` time on the sequence length dimension.
    r   N)repeatsr   r   r   )rg   repeat_interleaver   r   r   r   )r   r   r  r   r   clsr  s          r=   upsampler    s     {{ 2A2haaaeH$QA>>>F ( 	L]&&v1a!Q/JKKF+Z!^++,C=a000;J;'Mr?   c                        e Zd Zdeddf fdZ	 	 	 	 	 ddej        dej        d	eej                 d
eej                 dededede	e
ef         fdZ xZS )FunnelDecoderrk   r~   Nc                     t                                                       | _        t                    | _        t          j        fdt          j                  D                       | _	        d S )Nc                 0    g | ]}t          d           S )r   rc  )r;   rG  rk   s     r=   rd  z*FunnelDecoder.__init__.<locals>.<listcomp>  s#    $f$f$f[%;%;$f$f$fr?   )
r   r   rk   r   rg  r   re  r   num_decoder_layersr]   r   s    `r=   r   zFunnelDecoder.__init__  sf    #;F#C#C m$f$f$f$fU6KdEeEe$f$f$fggr?   FTfinal_hiddenfirst_block_hiddenr   r   rA  ri  rj  c                    t          |dt          | j        j                  dz
  z  |j        d         | j        j        | j        j                  }||z   }	|r|	fnd }
|rdnd }| j                            |	||          }| j	        D ]1} ||	|	|	||          }|d         }	|r||dd          z   }|r|
|	fz   }
2|st          d |	|
|fD                       S t          |	|
|	          S )
Nr   r   )r   r  r   r   r:   rl  r\  r   c              3      K   | ]}||V  	d S r   r:   rn  s     r=   r>   z(FunnelDecoder.forward.<locals>.<genexpr>  ro  r?   rp  )r  rd   rk   r[   rc   r   r   rg  r   r]   r   r   )r   r  r  r   r   rA  ri  rj  upsampled_hiddenrU  rv  rw  r  r{  r~  s                  r=   r   zFunnelDecoder.forward  sW    $T[4559:)/211
 
 
 "$66)=GVII40:d3II)) J 
 
 [ 	B 	BE 59I]noooL!!_F  C!/,qrr2B!B# B$5	$A! 	baaV->$OaaaaaaGXesttttr?   r  r  r   s   @r=   r  r    s        h| h h h h h h h 2615"'%* 'u 'ul'u "L'u !.	'u
 !.'u  'u #'u 'u 
uo%	&'u 'u 'u 'u 'u 'u 'u 'ur?   r  c                   P     e Zd ZdZdeddf fdZdej        dej        fdZ xZ	S )FunnelDiscriminatorPredictionszEPrediction module for the discriminator, made up of two dense layers.rk   r~   Nc                     t                                                       || _        t          j        |j        |j                  | _        t          j        |j        d          | _        d S r  )r   r   rk   r   r&  r   densedense_predictionr   s     r=   r   z'FunnelDiscriminatorPredictions.__init__  sS    Yv~v~>>
 "	&.! < <r?   discriminator_hidden_statesc                     |                      |          }t          | j        j                 |          }|                     |                              d          }|S )Nr   )r  r   rk   rR  r  squeeze)r   r  rr  logitss       r=   r   z&FunnelDiscriminatorPredictions.forward  sQ    

#>??t{56}EE&&}55==bAAr?   )
r   r   r   r  r   r   rg   r   r   r   r   s   @r=   r  r    sw        OO=| = = = = = = =5< EL        r?   r  c                   (    e Zd ZU eed<   eZdZd ZdS )FunnelPreTrainedModelrk   funnelc                    |j         j        }|                    d          dk    rt          |dd           u| j        j        7|j        j        \  }}t          j	        dt          ||z             z            }n| j        j        }t          j                            |j        |           t          |dd           't          j                            |j        d           d S d S |dk    rt          j                            |j        | j        j        	           t          j                            |j        | j        j        	           t          j                            |j        | j        j        	           t          j                            |j        | j        j        	           t          j                            |j        | j        j        	           d S |d
k    r| j        j        dn| j        j        }t          j                            |j        j        |           |j        j        7|j        j        j        |j        j                                                  d S d S d S )Nr&  r   r!   r   )stdr"   g        r^   )br}   )r   r   findr`   rk   initializer_stdr!   rc   rm   sqrtrD  r   initnormal_	constant_r"   uniform_r)  initializer_ranger*  r_   r+  r,  r#   r   ri   zero_)r   module	classnamefan_outfan_inr  s         r=   _init_weightsz#FunnelPreTrainedModel._init_weights  s   $-	>>(##r))vx..:;.6&,m&9OGV'#fw.>(?(?"?@@CC+5C3777vvt,,8!!&+s33333 98777GV_0MNNNGV_0MNNNGV_0MNNNGV_0MNNNGV-1NOOOOO,,,4<##$+B]CGOOF29sOCCC%1=&-263I3UV\\^^^^^	 -, >=r?   N)	r   r   r   r   r  r{   load_tf_weightsbase_model_prefixr  r:   r?   r=   r  r    sA         /O _ _ _ _ _r?   r  c                   P     e Zd Zdededdf fdZdej        dej        fdZ xZ	S )FunnelClassificationHeadrk   n_labelsr~   Nc                 
   t                                                       t          j        |j        |j                  | _        t          j        |j                  | _        t          j        |j        |          | _	        d S r   )
r   r   r   r&  r   linear_hiddenr   r   r   
linear_out)r   rk   r  r   s      r=   r   z!FunnelClassificationHead.__init__  s^    Yv~v~FFz&"788)FNH==r?   rU  c                     |                      |          }t          j        |          }|                     |          }|                     |          S r   )r  rg   tanhr   r  )r   rU  s     r=   r   z FunnelClassificationHead.forward  sG    ##F++F##f%%v&&&r?   )
r   r   r   r   rW   r   rg   r   r   r   r   s   @r=   r  r    sx        >| >s >t > > > > > >'el 'u| ' ' ' ' ' ' ' 'r?   r  z2
    Output type of [`FunnelForPreTraining`].
    )custom_introc                       e Zd ZU dZdZeej                 ed<   dZ	eej                 ed<   dZ
eeej                          ed<   dZeeej                          ed<   dS )FunnelForPreTrainingOutputa1  
    loss (*optional*, returned when `labels` is provided, `torch.FloatTensor` of shape `(1,)`):
        Total loss of the ELECTRA-style objective.
    logits (`torch.FloatTensor` of shape `(batch_size, sequence_length)`):
        Prediction scores of the head (scores for each token before SoftMax).
    Nlossr  rr  rs  )r   r   r   r  r  r   rg   FloatTensorr  r  rr  r   rs  r:   r?   r=   r  r  &  s           )-D(5$
%,,,*.FHU&'...8<M8E%"345<<<59Ju01299999r?   r  z
    The base Funnel Transformer Model transformer outputting raw hidden-states without upsampling head (also called
    decoder) or any task-specific head on top.
    c                   d    e Zd Zdeddf fdZdej        fdZdej        ddfdZe		 	 	 	 	 	 	 	 	 dde
ej                 d	e
ej                 d
e
ej                 de
ej                 de
ej                 de
ej                 de
e         de
e         de
e         deeef         fd            Z xZS )FunnelBaseModelrk   r~   Nc                     t                                          |           t          |          | _        t	          |          | _        |                                  d S r   )r   r   r}   r$   r_  encoder	post_initr   s     r=   r   zFunnelBaseModel.__init__A  sQ       *622$V,, 	r?   c                     | j         j        S r   r$   r#   r   s    r=   get_input_embeddingsz$FunnelBaseModel.get_input_embeddingsJ      ..r?   new_embeddingsc                     || j         _        d S r   r  r   r  s     r=   set_input_embeddingsz$FunnelBaseModel.set_input_embeddingsM      *8'''r?   r   r   r   position_ids	head_maskr   rA  ri  rj  c
                 <   ||n| j         j        }||n| j         j        }|	|	n| j         j        }	||t	          d          |+|                     ||           |                                }
n.||                                d d         }
nt	          d          ||j        n|j        }|t          j	        |
|          }|!t          j
        |
t          j        |          }|                     ||          }|                     ||||||	          }|S )NDYou cannot specify both input_ids and inputs_embeds at the same timer   5You have to specify either input_ids or inputs_embedsr   r   r   r   r   rA  ri  rj  )rk   rA  ri  use_return_dict
ValueError%warn_if_padding_and_no_attention_maskr   r   rg   onesr(  r   r$   r  )r   r   r   r   r  r  r   rA  ri  rj  input_shaper   encoder_outputss                r=   r   zFunnelBaseModel.forwardP  sX    2C1N--TXT_Tq$8$D  $+Jj 	 &1%<kk$+B] ]%>cddd"66y.QQQ#..**KK&',,..ss3KKTUUU%.%:!!@T!"ZFCCCN!"[EJvVVVN 	OO,,))/!5# ' 
 
 r?   	NNNNNNNNNr   r   r   r   r   r   r   r  r  r   r   rg   r   rN  r   r   r   r   r   r   s   @r=   r  r  :  sp       |       /bl / / / /92< 9D 9 9 9 9  -11515/3,004,0/3&*/ /EL)/ !./ !.	/
 u|,/ EL)/  -/ $D>/ 'tn/ d^/ 
uo%	&/ / / ^/ / / / /r?   r  c                   ,    e Zd Zdeddf fdZdej        fdZdej        ddfdZe		 	 	 	 	 	 	 dde
ej                 d	e
ej                 d
e
ej                 de
ej                 de
e         de
e         de
e         deeef         fd            Z xZS )FunnelModelrk   r~   Nc                     t                                          |           || _        t          |          | _        t          |          | _        t          |          | _        | 	                                 d S r   )
r   r   rk   r}   r$   r_  r  r  decoderr  r   s     r=   r   zFunnelModel.__init__  sg       *622$V,,$V,, 	r?   c                     | j         j        S r   r  r  s    r=   r  z FunnelModel.get_input_embeddings  r  r?   r  c                     || j         _        d S r   r  r  s     r=   r  z FunnelModel.set_input_embeddings  r  r?   r   r   r   r   rA  ri  rj  c           	         ||n| j         j        }||n| j         j        }||n| j         j        }||t	          d          |+|                     ||           |                                }n.||                                d d         }nt	          d          ||j        n|j        }	|t          j	        ||	          }|!t          j
        |t          j        |	          }|                     ||          }|                     ||||d|          }
|                     |
d	         |
d
         | j         j        d	                  |||||          }|sEd	}|d	         f}|r|d
z  }||
d
         ||         z   fz   }|r|d
z  }||
d         ||         z   fz   }|S t!          |d	         |r|
j        |j        z   nd |r|
j        |j        z   nd           S )Nr  r   r  r  r   r  Tr  r   r   )r  r  r   r   rA  ri  rj  r   rp  )rk   rA  ri  r  r  r  r   r   rg   r  r(  r   r$   r  r  r[   r   rr  rs  )r   r   r   r   r   rA  ri  rj  r  r   r  decoder_outputsidxoutputss                 r=   r   zFunnelModel.forward  sh    2C1N--TXT_Tq$8$D  $+Jj 	 &1%<kk$+B] ]%>cddd"66y.QQQ#..**KK&',,..ss3KKTUUU%.%:!!@T!"ZFCCCN!"[EJvVVVN 	OO,,))/!%# ' 
 
 ,,(+.q1$+2I!2LM))/!5# ' 
 
  		C&q)+G# Qq!_Q%7/#:N%N$PP  Qq!_Q%7/#:N%N$PPN-a0#?8?;XXXTeo2_5OOOko
 
 
 	
r?   )NNNNNNNr  r   s   @r=   r  r    sY       |       /bl / / / /92< 9D 9 9 9 9  -1151504,0/3&*H
 H
EL)H
 !.H
 !.	H

  -H
 $D>H
 'tnH
 d^H
 
uo%	&H
 H
 H
 ^H
 H
 H
 H
 H
r?   r  z
    Funnel Transformer model with a binary classification head on top as used during pretraining for identifying
    generated tokens.
    c                       e Zd Zdeddf fdZe	 	 	 	 	 	 	 	 ddeej                 deej                 deej                 deej                 d	eej                 d
ee	         dee	         dee	         de
eef         fd            Z xZS )FunnelForPreTrainingrk   r~   Nc                     t                                          |           t          |          | _        t	          |          | _        |                                  d S r   )r   r   r  r  r  discriminator_predictionsr  r   s     r=   r   zFunnelForPreTraining.__init__  sP       !&)))G)O)O&r?   r   r   r   r   labelsrA  ri  rj  c	           	         ||n| j         j        }|                     |||||||          }	|	d         }
|                     |
          }d}|t	          j                    }|s|                    d|
j        d                   dk    }|                    d|
j        d                   |         }||         } |||                                          }n= ||                    d|
j        d                   |                                          }|s|f|	dd         z   }||f|z   n|S t          |||	j
        |	j                  S )a"  
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the ELECTRA-style loss. Input should be a sequence of tokens (see `input_ids`
            docstring) Indices should be in `[0, 1]`:

            - 0 indicates the token is an original token,
            - 1 indicates the token was replaced.

        Examples:

        ```python
        >>> from transformers import AutoTokenizer, FunnelForPreTraining
        >>> import torch

        >>> tokenizer = AutoTokenizer.from_pretrained("funnel-transformer/small")
        >>> model = FunnelForPreTraining.from_pretrained("funnel-transformer/small")

        >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
        >>> logits = model(**inputs).logits
        ```Nr   r   r   rA  ri  rj  r   r   r   r  r  rr  rs  )rk   r  r  r  r   r   rC  rc   rD  r  rr  rs  )r   r   r   r   r   r  rA  ri  rj  r  discriminator_sequence_outputr  r  loss_fctactive_lossactive_logitsactive_labelsr  s                     r=   r   zFunnelForPreTraining.forward  s   @ &1%<kk$+B]&*kk))'/!5# '2 '
 '
# )DA(F%//0MNN+--H),11"6S6YZ[6\]]abb &B0M0STU0V W WXc d &{ 3x}/B/B/D/DEExB0M0STU0V W WY_YeYeYgYghh 	FY!<QRR!@@F)-)9TGf$$vE)5C2=	
 
 
 	
r?   NNNNNNNN)r   r   r   r   r   r   r   rg   r   rN  r   r   r  r   r   r   s   @r=   r  r    s.       |         -1151504)-,0/3&*B
 B
EL)B
 !.B
 !.	B

  -B
 &B
 $D>B
 'tnB
 d^B
 
u00	1B
 B
 B
 ^B
 B
 B
 B
 B
r?   r  c                   N    e Zd ZdgZdeddf fdZdej        fdZdej	        ddfdZ
e	 	 	 	 	 	 	 	 dd	eej                 d
eej                 deej                 deej                 deej                 dee         dee         dee         deeef         fd            Z xZS )FunnelForMaskedLMzlm_head.weightrk   r~   Nc                     t                                          |           t          |          | _        t	          j        |j        |j                  | _        | 	                                 d S r   )
r   r   r  r  r   r&  r   r   lm_headr  r   s     r=   r   zFunnelForMaskedLM.__init__:  sZ       !&))y1BCC 	r?   c                     | j         S r   r  r  s    r=   get_output_embeddingsz'FunnelForMaskedLM.get_output_embeddingsC  s
    |r?   r  c                     || _         d S r   r  r  s     r=   set_output_embeddingsz'FunnelForMaskedLM.set_output_embeddingsF  s    %r?   r   r   r   r   r  rA  ri  rj  c	           	         ||n| j         j        }|                     |||||||          }	|	d         }
|                     |
          }d}|Kt	                      } ||                    d| j         j                  |                    d                    }|s|f|	dd         z   }||f|z   n|S t          |||	j        |	j	                  S )a  
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
            config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the
            loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`
        Nr  r   r   r   r  )
rk   r  r  r  r   rC  r   r   rr  rs  )r   r   r   r   r   r  rA  ri  rj  r  rq  prediction_logitsmasked_lm_lossr  r  s                  r=   r   zFunnelForMaskedLM.forwardI  s   $ &1%<kk$+B]++))'/!5#  
 
 $AJ LL):;;'))H%X&7&<&<RAW&X&XZ`ZeZefhZiZijjN 	Z')GABBK7F3A3M^%..SYY$!/)	
 
 
 	
r?   r  )r   r   r   _tied_weights_keysr   r   r   r&  r  r   r  r   r   rg   r   rN  r   r   r   r   r   r   s   @r=   r  r  6  se       *+|       ry    &BL &T & & & &  -1151504)-,0/3&*.
 .
EL).
 !..
 !.	.

  -.
 &.
 $D>.
 'tn.
 d^.
 
un$	%.
 .
 .
 ^.
 .
 .
 .
 .
r?   r  z
    Funnel Transformer Model with a sequence classification/regression head on top (two linear layer on top of the
    first timestep of the last hidden state) e.g. for GLUE tasks.
    c                       e Zd Zdeddf fdZe	 	 	 	 	 	 	 	 ddeej                 deej                 deej                 deej                 d	eej                 d
ee	         dee	         dee	         de
eef         fd            Z xZS )FunnelForSequenceClassificationrk   r~   Nc                     t                                          |           |j        | _        || _        t	          |          | _        t          ||j                  | _        |                                  d S r   )	r   r   
num_labelsrk   r  r  r  
classifierr  r   s     r=   r   z(FunnelForSequenceClassification.__init__  se        +%f--266;LMMr?   r   r   r   r   r  rA  ri  rj  c	           	         ||n| j         j        }|                     |||||||          }	|	d         }
|
dddf         }|                     |          }d}|Z| j         j        f| j        dk    rd| j         _        nN| j        dk    r7|j        t          j        k    s|j        t          j	        k    rd| j         _        nd| j         _        | j         j        dk    rWt                      }| j        dk    r1 ||                                |                                          }n |||          }n| j         j        dk    rGt                      } ||                    d| j                  |                    d                    }n*| j         j        dk    rt                      } |||          }|s|f|	dd         z   }||f|z   n|S t          |||	j        |	j        	          S )
a  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        Nr  r   r   
regressionsingle_label_classificationmulti_label_classificationr   r  )rk   r  r  r  problem_typer  r   rg   r   rW   r	   r  r   rC  r   r   rr  rs  )r   r   r   r   r   r  rA  ri  rj  r  rq  pooled_outputr  r  r  r  s                   r=   r   z'FunnelForSequenceClassification.forward  s   $ &1%<kk$+B]++))'/!5#  
 
 $AJ)!!!Q$///{'/?a''/;DK,,_q((flej.H.HFL\a\eLeLe/LDK,,/KDK,{'<77"99?a''#8FNN$4$4fnn6F6FGGDD#8FF33DD)-JJJ+--xB @ @&++b//RR)-III,..x// 	FY,F)-)9TGf$$vE'!/)	
 
 
 	
r?   r  )r   r   r   r   r   r   r   rg   r   rN  r   r   r   r   r   r   s   @r=   r  r  {  s.       |         -1151504)-,0/3&*A
 A
EL)A
 !.A
 !.	A

  -A
 &A
 $D>A
 'tnA
 d^A
 
u..	/A
 A
 A
 ^A
 A
 A
 A
 A
r?   r  c                       e Zd Zdeddf fdZe	 	 	 	 	 	 	 	 ddeej                 deej                 deej                 deej                 d	eej                 d
ee	         dee	         dee	         de
eef         fd            Z xZS )FunnelForMultipleChoicerk   r~   Nc                     t                                          |           t          |          | _        t	          |d          | _        |                                  d S r  )r   r   r  r  r  r  r  r   s     r=   r   z FunnelForMultipleChoice.__init__  sQ       %f--261==r?   r   r   r   r   r  rA  ri  rj  c	           	      J   ||n| j         j        }||j        d         n|j        d         }	|)|                    d|                    d                    nd}|)|                    d|                    d                    nd}|)|                    d|                    d                    nd}|=|                    d|                    d          |                    d                    nd}|                     |||||||          }
|
d         }|dddf         }|                     |          }|                    d|	          }d}|t                      } |||          }|s|f|
dd         z   }||f|z   n|S t          |||
j	        |
j
                  S )aJ  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the multiple choice classification loss. Indices should be in `[0, ...,
            num_choices-1]` where `num_choices` is the size of the second dimension of the input tensors. (See
            `input_ids` above)
        Nr   r   r  r   r  )rk   r  rc   rC  r   r  r  r   r   rr  rs  )r   r   r   r   r   r  rA  ri  rj  num_choicesr  rq  r  r  reshaped_logitsr  r  r  s                     r=   r   zFunnelForMultipleChoice.forward  s   $ &1%<kk$+B],5,Aioa((}GZ[\G]>G>SINN2y~~b'9'9:::Y]	M[Mg,,R1D1DR1H1HIIImqM[Mg,,R1D1DR1H1HIIImq ( r=#5#5b#9#9=;M;Mb;Q;QRRR 	 ++))'/!5#  
 
 $AJ)!!!Q$/// ++b+66'))H8OV44D 	F%''!""+5F)-)9TGf$$vE("!/)	
 
 
 	
r?   r  )r   r   r   r   r   r   r   rg   r   rN  r   r   r   r   r   r   s   @r=   r  r    s       |         -1151504)-,0/3&*:
 :
EL):
 !.:
 !.	:

  -:
 &:
 $D>:
 'tn:
 d^:
 
u//	0:
 :
 :
 ^:
 :
 :
 :
 :
r?   r  c                       e Zd Zdeddf fdZe	 	 	 	 	 	 	 	 ddeej                 deej                 deej                 deej                 d	eej                 d
ee	         dee	         dee	         de
eef         fd            Z xZS )FunnelForTokenClassificationrk   r~   Nc                 6   t                                          |           |j        | _        t          |          | _        t          j        |j                  | _        t          j	        |j
        |j                  | _        |                                  d S r   )r   r   r  r  r  r   r   r   r   r&  r   r  r  r   s     r=   r   z%FunnelForTokenClassification.__init__  sy        +!&))z&"788)F$68IJJ 	r?   r   r   r   r   r  rA  ri  rj  c	           	         ||n| j         j        }|                     |||||||          }	|	d         }
|                     |
          }
|                     |
          }d}|Ft                      } ||                    d| j                  |                    d                    }|s|f|	dd         z   }||f|z   n|S t          |||	j	        |	j
                  S )z
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
        Nr  r   r   r   r  )rk   r  r  r   r  r   rC  r  r   rr  rs  )r   r   r   r   r   r  rA  ri  rj  r  rq  r  r  r  r  s                  r=   r   z$FunnelForTokenClassification.forward&  s     &1%<kk$+B]++))'/!5#  
 
 $AJ LL):;;!233'))H8FKKDO<<fkk"ooNND 	FY,F)-)9TGf$$vE$!/)	
 
 
 	
r?   r  )r   r   r   r   r   r   r   rg   r   rN  r   r   r   r   r   r   s   @r=   r  r    s       	| 	 	 	 	 	 	 	  -1151504)-,0/3&*-
 -
EL)-
 !.-
 !.	-

  --
 &-
 $D>-
 'tn-
 d^-
 
u++	,-
 -
 -
 ^-
 -
 -
 -
 -
r?   r  c                   4    e Zd Zdeddf fdZe	 	 	 	 	 	 	 	 	 ddeej                 deej                 deej                 deej                 d	eej                 d
eej                 dee	         dee	         dee	         de
eef         fd            Z xZS )FunnelForQuestionAnsweringrk   r~   Nc                     t                                          |           |j        | _        t          |          | _        t          j        |j        |j                  | _        | 	                                 d S r   )
r   r   r  r  r  r   r&  r   
qa_outputsr  r   s     r=   r   z#FunnelForQuestionAnswering.__init__Y  se        +!&)))F$68IJJ 	r?   r   r   r   r   start_positionsend_positionsrA  ri  rj  c
           	         |	|	n| j         j        }	|                     |||||||	          }
|
d         }|                     |          }|                    dd          \  }}|                    d                                          }|                    d                                          }d }||t          |                                          dk    r|	                    d          }t          |                                          dk    r|                    d          }|                    d          }|
                    d|          }|
                    d|          }t          |          } |||          } |||          }||z   dz  }|	s||f|
dd          z   }||f|z   n|S t          ||||
j        |
j                  S )	Nr  r   r   r   r   )ignore_indexr   )r  start_logits
end_logitsrr  rs  )rk   r  r  r  rQ   r  
contiguousrd   r   squezeclampr   r   rr  rs  )r   r   r   r   r   r  r  rA  ri  rj  r  rq  r  r  r   
total_lossignored_indexr  
start_lossend_lossr  s                        r=   r   z"FunnelForQuestionAnswering.forwardc  s    &1%<kk$+B]++))'/!5#  
 
 $AJ!233#)<<r<#:#: j#++B//::<<''++6688

&=+D?''))**Q.."1"8"8"<"<=%%''((1,, - 5 5b 9 9(--a00M-33A}EEO)//=AAM']CCCH!,@@Jx
M::H$x/14J 	R"J/'!""+=F/9/EZMF**6Q+%!!/)
 
 
 	
r?   r  )r   r   r   r   r   r   r   rg   r   rN  r   r   r   r   r   r   s   @r=   r  r  W  s0       |         -11515042604,0/3&*:
 :
EL):
 !.:
 !.	:

  -:
 "%,/:
  -:
 $D>:
 'tn:
 d^:
 
u22	3:
 :
 :
 ^:
 :
 :
 :
 :
r?   r  )
r  r  r  r  r  r  r  r  r  r{   )TF)?r  rH   dataclassesr   typingr   r   rC   rm   rg   r   torch.nnr   r   r	   activationsr   modeling_outputsr   r   r   r   r   r   modeling_utilsr   utilsr   r   r   configuration_funnelr   
get_loggerr   rF   rE  r{   Moduler}   r   r   rW   r!  r^   rU   rY  r_  rN  r  r  r  r  r  r  r  r  r  r  r  r  r  r  __all__r:   r?   r=   <module>r3     s   ( ' 				 ! ! ! ! ! ! " " " " " " " "            A A A A A A A A A A ! ! ! ! ! !                . - - - - - 9 9 9 9 9 9 9 9 9 9 . . . . . . 
	H	%	% 
W W Wt    ry   "A  A  A  A  A ry A  A  A HEL s SV [`[g     MG MG MG MG MG") MG MG MG`+ + + + +BI + + +&E E E E E") E E E&<u <u <u <u <uBI <u <u <u@ di | .1AE\`
\   ,.u .u .u .u .uBI .u .u .ub    RY     _ _ _ _ _O _ _ _<' ' ' ' 'ry ' ' '   
: : : : : : :  :   @ @ @ @ @+ @ @ @F Z
 Z
 Z
 Z
 Z
' Z
 Z
 Z
z   L
 L
 L
 L
 L
0 L
 L
 L
^ A
 A
 A
 A
 A
- A
 A
 A
H   M
 M
 M
 M
 M
&; M
 M
 M
` D
 D
 D
 D
 D
3 D
 D
 D
N :
 :
 :
 :
 :
#8 :
 :
 :
z F
 F
 F
 F
 F
!6 F
 F
 F
R  r?   