
     `iG                      d Z ddlmZ ddlZddlmZ ddlZddlZ	ddl
mZ ddlmZmZmZmZmZmZmZmZ ddlmZmZmZmZmZmZmZmZmZmZm Z m!Z! dd	l"m#Z#m$Z$m%Z% dd
l&m'Z'm(Z(m)Z)m*Z*m+Z+m,Z, ddl-m.Z.  e+j/        e0          Z1dZ2dZ3dZ4dZ5dZ6dZ7dZ8dZ9dZ:dZ;dZ<dZ=dZ> G d d          Z? G d dej@        jA                  ZB G d dej@        jC                  ZD G d  d!ej@        jA                  ZEeDeEd"ZF G d# d$ej@        jA                  ZG G d% d&ej@        jA                  ZH G d' d(ej@        jA                  ZI G d) d*ej@        jA                  ZJ G d+ d,ej@        jA                  ZK G d- d.ej@        jA                  ZL G d/ d0ej@        jA                  ZM G d1 d2ej@        jA                  ZN G d3 d4ej@        jA                  ZO G d5 d6ej@        jA                  ZP G d7 d8ej@        jA                  ZQ G d9 d:ej@        jA                  ZR G d; d<ej@        jA                  ZS G d= d>ej@        jA                  ZT G d? d@ej@        jA                  ZU G dA dBej@        jA                  ZVe  G dC dDej@        jA                              ZW G dE dFe          ZXe G dG dHe'                      ZYdIZZdJZ[ e)dKeZ           G dL dMeX                      Z\ e)dNeZ           G dO dPeXe?                      Z] e)dQeZ           G dR dSeXe                      Z^ G dT dUej@        jA                  Z_ e)dVeZ           G dW dXeXe                      Z` e)dYeZ           G dZ d[eXe                      Za e)d\eZ           G d] d^eXe                      Zb e)d_eZ           G d` daeXe                      Zc e)dbeZ           G dc ddeXe                      Zdg deZedS )fzTF 2.0 MobileBERT model.    )annotationsN)	dataclass   )get_tf_activation)TFBaseModelOutputTFBaseModelOutputWithPoolingTFMaskedLMOutputTFMultipleChoiceModelOutputTFNextSentencePredictorOutputTFQuestionAnsweringModelOutputTFSequenceClassifierOutputTFTokenClassifierOutput)TFMaskedLanguageModelingLossTFModelInputTypeTFMultipleChoiceLossTFNextSentencePredictionLossTFPreTrainedModelTFQuestionAnsweringLossTFSequenceClassificationLossTFTokenClassificationLossget_initializerkeraskeras_serializableunpack_inputs)check_embeddings_within_bounds
shape_liststable_softmax)ModelOutputadd_code_sample_docstringsadd_start_docstrings%add_start_docstrings_to_model_forwardloggingreplace_return_docstrings   )MobileBertConfigzgoogle/mobilebert-uncasedr%   z"vumichien/mobilebert-finetuned-nerzK['I-ORG', 'I-ORG', 'O', 'O', 'O', 'O', 'O', 'I-LOC', 'O', 'I-LOC', 'I-LOC']gQ?z%vumichien/mobilebert-uncased-squad-v2z'a nice puppet'gףp=
@      zvumichien/emo-mobilebertz'others'z4.72c                      e Zd ZdZddZdS )	TFMobileBertPreTrainingLossz
    Loss function suitable for BERT-like pretraining, that is, the task of pretraining a language model by combining
    NSP + MLM. .. note:: Any label of -100 will be ignored (along with the corresponding logits) in the loss
    computation.
    labels	tf.Tensorlogitsreturnc                   t           j                            dt           j        j        j                  } |t
          j                            |d                   |d                   }t          j        |d         dk    |j	                  }||z  }t          j
        |          t          j
        |          z  } |t
          j                            |d                   |d	                   }t          j        |d         dk    |j	                  }	||	z  }
t          j
        |
          t          j
        |	          z  }t          j        ||z   d
          S )NT)from_logits	reductionr*   r   )y_truey_predidtypenext_sentence_labelr$   )r$   )r   lossesSparseCategoricalCrossentropy	ReductionNONEtfnnrelucastr4   
reduce_sumreshape)selfr*   r,   loss_fnunmasked_lm_losseslm_loss_maskmasked_lm_lossesreduced_masked_lm_lossunmasked_ns_lossns_loss_maskmasked_ns_lossreduced_masked_ns_losss               /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/models/mobilebert/modeling_tf_mobilebert.pyhf_compute_lossz+TFMobileBertPreTrainingLoss.hf_compute_loss^   s>   ,<<Y^YeYoYt<uu %WBEJJvh7G,H,HQWXYQZ[[[ wvh/47?Q?WXXX-<!#/?!@!@2=Q]C^C^!^ #7"%**V<Q5R*S*S\bcd\efffwv&;<DL\Lbccc)L8!#~!>!>|A\A\!\z03II4PPP    N)r*   r+   r,   r+   r-   r+   )__name__
__module____qualname____doc__rK    rL   rJ   r)   r)   W   s8         Q Q Q Q Q QrL   r)   c                  ,     e Zd Z fdZd ZddZ xZS )TFMobileBertIntermediatec                    t                      j        di | t          j                            |j        d          | _        t          |j        t                    rt          |j                  | _        n|j        | _        || _        d S )NdensenamerQ   )super__init__r   layersDenseintermediate_sizerU   
isinstance
hidden_actstrr   intermediate_act_fnconfigr@   ra   kwargs	__class__s      rJ   rY   z!TFMobileBertIntermediate.__init__t   s    ""6"""\''(@w'OO
f'-- 	9'89J'K'KD$$'-'8D$rL   c                Z    |                      |          }|                     |          }|S N)rU   r`   r@   hidden_statess     rJ   callzTFMobileBertIntermediate.call   s,    

=1100??rL   Nc                   | j         rd S d| _         t          | dd           `t          j        | j        j                  5  | j                            d d | j        j        g           d d d            d S # 1 swxY w Y   d S d S NTrU   )	builtgetattrr:   
name_scoperU   rW   buildra   true_hidden_sizer@   input_shapes     rJ   ro   zTFMobileBertIntermediate.build   s    : 	F
4$''3tz// M M
  $dk.J!KLLLM M M M M M M M M M M M M M M M M M 43    (A55A9<A9rf   rM   rN   rO   rY   ri   ro   __classcell__rd   s   @rJ   rS   rS   s   sc        	 	 	 	 	  M M M M M M M MrL   rS   c                  *     e Zd Z fdZd fd	Z xZS )TFLayerNormc                H    || _          t                      j        |i | d S rf   )	feat_sizerX   rY   )r@   rz   argsrc   rd   s       rJ   rY   zTFLayerNorm.__init__   s,    "$)&)))))rL   Nc                Z    t                                          d d | j        g           d S rf   )rX   ro   rz   r@   rr   rd   s     rJ   ro   zTFLayerNorm.build   s(    tT4>233333rL   rf   )rM   rN   rO   rY   ro   ru   rv   s   @rJ   rx   rx      sV        * * * * *4 4 4 4 4 4 4 4 4 4rL   rx   c                  2     e Zd Zd fd	Z fdZddZ xZS )	TFNoNormNc                H     t                      j        di | || _        d S )NrQ   )rX   rY   rz   )r@   rz   epsilonrc   rd   s       rJ   rY   zTFNoNorm.__init__   s+    ""6""""rL   c                    |                      d| j        gd          | _        |                      d| j        gd          | _        t	                                          |           d S )Nbiaszeros)shapeinitializerweightones)
add_weightrz   r   r   rX   ro   r}   s     rJ   ro   zTFNoNorm.build   s]    OOF4>2BPWOXX	ooht~6FTZo[[k"""""rL   inputsr+   c                &    || j         z  | j        z   S rf   )r   r   )r@   r   s     rJ   ri   zTFNoNorm.call   s    #di//rL   rf   )r   r+   )rM   rN   rO   rY   ro   ri   ru   rv   s   @rJ   r   r      sj        # # # # # ## # # # #
0 0 0 0 0 0 0 0rL   r   )
layer_normno_normc                  2     e Zd ZdZ fdZddZddZ xZS )	TFMobileBertEmbeddingszGConstruct the embeddings from word, position and token_type embeddings.c                    t                      j        di | |j        | _        |j        | _        || _        |j        | _        |j        | _        |j        | _        t          j	        
                    |j        d          | _        t          |j                 |j        |j        d          | _        t          j	                            |j                  | _        | j        | j        rdndz  | _        d S )	Nembedding_transformationrV   	LayerNormr   rW   )rater   r$   rQ   )rX   rY   trigram_inputembedding_sizera   hidden_sizemax_position_embeddingsinitializer_ranger   rZ   r[   r   NORM2FNnormalization_typelayer_norm_epsr   Dropouthidden_dropout_probdropoutembedded_input_sizerb   s      rJ   rY   zTFMobileBertEmbeddings.__init__   s    ""6"""#1$3!-'-'E$!'!9(-(:(:6;MTn(:(o(o% !!:;(=K
 
 
 |++1K+LL#'#6t?Q:X!!WX#Y   rL   Nc                D   t          j        d          5  |                     d| j        j        | j        gt          | j                            | _        d d d            n# 1 swxY w Y   t          j        d          5  |                     d| j        j	        | j
        gt          | j                            | _        d d d            n# 1 swxY w Y   t          j        d          5  |                     d| j        | j
        gt          | j                            | _        d d d            n# 1 swxY w Y   | j        rd S d| _        t          | d	d           Xt          j        | j        j                  5  | j                            d d | j        g           d d d            n# 1 swxY w Y   t          | d
d           St          j        | j        j                  5  | j                            d            d d d            d S # 1 swxY w Y   d S d S )Nword_embeddingsr   )r   )rW   r   r   token_type_embeddings
embeddingsposition_embeddingsTr   r   )r:   rn   r   ra   
vocab_sizer   r   r   r   type_vocab_sizer   r   r   r   rl   rm   r   rW   ro   r   r   rq   s     rJ   ro   zTFMobileBertEmbeddings.build   sc   ],-- 	 	//{-t/BC+d>TUUU *  DK	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ]233 	 	)-!{2D4DE+d>TUUU *9 * *D&	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ]011 	 	'+!3T5EF+d>TUUU (7 ( (D$	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 : 	F
43T::Ft<ABB \ \-33T4AY4Z[[[\ \ \ \ \ \ \ \ \ \ \ \ \ \ \4d++7t~233 + +$$T***+ + + + + + + + + + + + + + + + + + 87s[   AA##A'*A'ACCC/=D88D<?D<#F11F58F5+HHHFc           
     ,   ||J |5t          || j        j                   t          j        | j        |          }t          |          dd         }|t          j        |d          }| j        rWt          j	        t          j
        |ddddf         d          |t          j
        |ddddf         d          gd	
          }| j        s| j        | j        k    r|                     |          }|0t          j        t          j        d|d                   d
          }t          j        | j        |          }t          j        | j        |          }||z   |z   }	|                     |	          }	|                     |	|          }	|	S )z
        Applies embedding based on inputs tensor.

        Returns:
            final_embeddings (`tf.Tensor`): output embedding tensor.
        N)paramsindicesr   )dimsvaluer$   )r   r   )r   r$   r   )r   )r$   r   r      axis)startlimit)r   )r   training)r   ra   r   r:   gatherr   r   fillr   concatpadr   r   r   expand_dimsranger   r   r   r   )
r@   	input_idsposition_idstoken_type_idsinputs_embedsr   rr   position_embedstoken_type_embedsfinal_embeddingss
             rJ   ri   zTFMobileBertEmbeddings.call   s    %-*?*?@ *9dk6LMMMIT[)LLLM //4!W+Q???N 	 IF=ABB/1IJJ!F=CRC02JKK
   M  	I!48H!H!H 99-HHM>"(+b/*R*R*RYZ[[[L)4+C\ZZZIT-GQ_```(?:=NN>>1A>BB<</?(<SSrL   rf   )NNNNF)rM   rN   rO   rP   rY   ro   ri   ru   rv   s   @rJ   r   r      sl        QQZ Z Z Z Z&+ + + +@/  /  /  /  /  /  /  / rL   r   c                  6     e Zd Z fdZd Z	 ddZddZ xZS )	TFMobileBertSelfAttentionc                6    t                      j        di | |j        |j        z  dk    rt	          d|j         d|j                   |j        | _        |j        | _        |j        |j        z  dk    sJ t          |j        |j        z            | _        | j        | j        z  | _	        t          j                            | j	        t          |j                  d          | _        t          j                            | j	        t          |j                  d          | _        t          j                            | j	        t          |j                  d          | _        t          j                            |j                  | _        || _        d S )	Nr   zThe hidden size (z6) is not a multiple of the number of attention heads (querykernel_initializerrW   keyr   rQ   )rX   rY   r   num_attention_heads
ValueErroroutput_attentionsintrp   attention_head_sizeall_head_sizer   rZ   r[   r   r   r   r   r   r   attention_probs_dropout_probr   ra   rb   s      rJ   rY   z"TFMobileBertSelfAttention.__init__  s   ""6""" ::a??7F$6 7 7 47 7  
 $*#= !'!9!F$>>!CCCC#&v'>A['[#\#\ !58PP\''?6C[3\3\cj ( 
 

 <%%?6C[3\3\ch & 
 
 \''?6C[3\3\cj ( 
 

 |++F,OPPrL   c                x    t          j        ||d| j        | j        f          }t          j        |g d          S )Nr   r   r   r$   r   perm)r:   r?   r   r   	transpose)r@   x
batch_sizes      rJ   transpose_for_scoresz.TFMobileBertSelfAttention.transpose_for_scores,  s;    Jq:r4+CTE]^__|ALLL1111rL   Fc                H   t          |          d         }|                     |          }	|                     |          }
|                     |          }|                     |	|          }|                     |
|          }|                     ||          }t          j        ||d          }t          j        t          |          d         |j                  }|t
          j	        
                    |          z  }| t          j        ||j                  }||z   }t          |d          }|                     ||          }|||z  }t          j        ||          }t          j        |g d	          }t          j        ||d| j        f          }|r||fn|f}|S )
Nr   T)transpose_br   r3   r   r   r   r   )r   r   r   r   r   r:   matmulr=   r4   mathsqrtr   r   r   r?   r   )r@   query_tensor
key_tensorvalue_tensorattention_mask	head_maskr   r   r   mixed_query_layermixed_key_layermixed_value_layerquery_layer	key_layervalue_layerattention_scoresdkattention_probscontext_layeroutputss                       rJ   ri   zTFMobileBertSelfAttention.call1  s     //2
 JJ|44((:.. JJ|44//0A:NN--ozJJ	//0A:NN 9
 
 
 WZ	**2.6F6LMMM+bgll2.>.>>%W^;K;QRRRN/.@ ))9CCC ,,,JJ  -	9O	/;??]FFF
JD,>?
 
 7H]=/22mM]rL   Nc                   | j         rd S d| _         t          | dd           ]t          j        | j        j                  5  | j                            d d | j        j        g           d d d            n# 1 swxY w Y   t          | dd           ]t          j        | j	        j                  5  | j	                            d d | j        j        g           d d d            n# 1 swxY w Y   t          | dd           xt          j        | j
        j                  5  | j
                            d d | j        j        r| j        j        n| j        j        g           d d d            d S # 1 swxY w Y   d S d S )NTr   r   r   )rl   rm   r:   rn   r   rW   ro   ra   rp   r   r   use_bottleneck_attentionr   rq   s     rJ   ro   zTFMobileBertSelfAttention.build^  sH   : 	F
4$''3tz// M M
  $dk.J!KLLLM M M M M M M M M M M M M M M4%%1tx}-- K KdDK,HIJJJK K K K K K K K K K K K K K K4$''3tz// 	 	
  ;?544![4  	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 43s7    (A44A8;A8.(C""C&)C&A E))E-0E-Frf   )rM   rN   rO   rY   r   ri   ro   ru   rv   s   @rJ   r   r     sw            62 2 2 ns+ + + +Z       rL   r   c                  .     e Zd Z fdZddZddZ xZS )TFMobileBertSelfOutputc                    t                      j        di | |j        | _        t          j                            |j        t          |j                  d          | _	        t          |j                 |j        |j        d          | _        | j        s)t          j                            |j                  | _        || _        d S )NrU   r   r   r   rQ   )rX   rY   use_bottleneckr   rZ   r[   rp   r   r   rU   r   r   r   r   r   r   r   ra   rb   s      rJ   rY   zTFMobileBertSelfOutput.__init__v  s    ""6"""$3\''#H`8a8aho ( 
 

 !!:;#V-B
 
 
 " 	L <//0JKKDLrL   Fc                    |                      |          }| j        s|                     ||          }|                     ||z             }|S Nr   )rU   r   r   r   )r@   rh   residual_tensorr   s       rJ   ri   zTFMobileBertSelfOutput.call  sP    

=11" 	K LLLJJM}'FGGrL   Nc                   | j         rd S d| _         t          | dd           ]t          j        | j        j                  5  | j                            d d | j        j        g           d d d            n# 1 swxY w Y   t          | dd           St          j        | j	        j                  5  | j	                            d            d d d            d S # 1 swxY w Y   d S d S NTrU   r   
rl   rm   r:   rn   rU   rW   ro   ra   rp   r   rq   s     rJ   ro   zTFMobileBertSelfOutput.build  o   : 	F
4$''3tz// M M
  $dk.J!KLLLM M M M M M M M M M M M M M M4d++7t~233 + +$$T***+ + + + + + + + + + + + + + + + + + 87$    (A44A8;A8.CCCr   rf   rt   rv   s   @rJ   r   r   u  s`               	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+rL   r   c                  6     e Zd Z fdZd Z	 ddZddZ xZS )	TFMobileBertAttentionc                     t                      j        di | t          |d          | _        t	          |d          | _        d S )Nr@   rV   outputrQ   )rX   rY   r   r@   r   mobilebert_outputrb   s      rJ   rY   zTFMobileBertAttention.__init__  sO    ""6"""-f6BBB	!7X!N!N!NrL   c                    t           rf   NotImplementedError)r@   headss     rJ   prune_headsz!TFMobileBertAttention.prune_heads  s    !!rL   Fc	           	         |                      |||||||          }	|                     |	d         ||          }
|
f|	dd          z   }|S )Nr   r   r$   )r@   r  )r@   r   r   r   layer_inputr   r   r   r   self_outputsattention_outputr   s               rJ   ri   zTFMobileBertAttention.call  sj     yy*lNIO`ks ! 
 
  11,q/;Ya1bb#%QRR(88rL   Nc                   | j         rd S d| _         t          | dd           Pt          j        | j        j                  5  | j                            d            d d d            n# 1 swxY w Y   t          | dd           St          j        | j        j                  5  | j                            d            d d d            d S # 1 swxY w Y   d S d S )NTr@   r  )rl   rm   r:   rn   r@   rW   ro   r  rq   s     rJ   ro   zTFMobileBertAttention.build  sT   : 	F
4&&2ty~.. & &	%%%& & & & & & & & & & & & & & &4,d33?t5:;; 3 3&,,T2223 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 @?$    A''A+.A+!C		CCr   rf   )rM   rN   rO   rY   r  ri   ro   ru   rv   s   @rJ   r   r     sy        O O O O O
" " "    &	3 	3 	3 	3 	3 	3 	3 	3rL   r   c                  .     e Zd Z fdZddZddZ xZS )TFOutputBottleneckc                H    t                      j        di | t          j                            |j        d          | _        t          |j                 |j        |j	        d          | _
        t          j                            |j                  | _        || _        d S NrU   rV   r   r   rQ   )rX   rY   r   rZ   r[   r   rU   r   r   r   r   r   r   r   ra   rb   s      rJ   rY   zTFOutputBottleneck.__init__  s    ""6"""\''(:'II
 !:;(=K
 
 
 |++F,FGGrL   Fc                    |                      |          }|                     ||          }|                     ||z             }|S r   )rU   r   r   )r@   rh   r   r   layer_outputss        rJ   ri   zTFOutputBottleneck.call  sD    

=11]XFF}'FGGrL   Nc                   | j         rd S d| _         t          | dd           ]t          j        | j        j                  5  | j                            d d | j        j        g           d d d            n# 1 swxY w Y   t          | dd           St          j        | j	        j                  5  | j	                            d            d d d            d S # 1 swxY w Y   d S d S r   r   rq   s     rJ   ro   zTFOutputBottleneck.build  r   r   r   rf   rt   rv   s   @rJ   r  r    s`               	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+rL   r  c                  .     e Zd Z fdZddZddZ xZS )TFMobileBertOutputc                    t                      j        di | |j        | _        t          j                            |j        t          |j                  d          | _	        t          |j                 |j        |j        d          | _        | j        s*t          j                            |j                  | _        nt#          |d          | _        || _        d S )NrU   r   r   r   
bottleneckrV   rQ   )rX   rY   r   r   rZ   r[   rp   r   r   rU   r   r   r   r   r   r   r   r  r  ra   rb   s      rJ   rY   zTFMobileBertOutput.__init__  s    ""6"""$3\''#H`8a8aho ( 
 

 !!:;#V-B
 
 
 " 	L <//0JKKDLL0lKKKDOrL   Fc                    |                      |          }| j        s0|                     ||          }|                     ||z             }n.|                     ||z             }|                     ||          }|S r   )rU   r   r   r   r  )r@   rh   residual_tensor_1residual_tensor_2r   s        rJ   ri   zTFMobileBertOutput.call  s~    

=11" 	N LLLJJM NN=;L+LMMMM NN=;L+LMMM OOM;LMMMrL   Nc                   | j         rd S d| _         t          | dd           ]t          j        | j        j                  5  | j                            d d | j        j        g           d d d            n# 1 swxY w Y   t          | dd           Pt          j        | j	        j                  5  | j	                            d            d d d            n# 1 swxY w Y   t          | dd           St          j        | j
        j                  5  | j
                            d            d d d            d S # 1 swxY w Y   d S d S )NTrU   r   r  )rl   rm   r:   rn   rU   rW   ro   ra   r\   r   r  rq   s     rJ   ro   zTFMobileBertOutput.build  s   : 	F
4$''3tz// N N
  $dk.K!LMMMN N N N N N N N N N N N N N N4d++7t~233 + +$$T***+ + + + + + + + + + + + + + +4t,,8t344 , ,%%d+++, , , , , , , , , , , , , , , , , , 98s6    (A44A8;A8.CCCD77D;>D;r   rf   rt   rv   s   @rJ   r  r    s`               , , , , , , , ,rL   r  c                  ,     e Zd Z fdZd ZddZ xZS )TFBottleneckLayerc                     t                      j        di | t          j                            |j        d          | _        t          |j                 |j        |j	        d          | _
        || _        d S r  )rX   rY   r   rZ   r[   intra_bottleneck_sizerU   r   r   r   r   ra   rb   s      rJ   rY   zTFBottleneckLayer.__init__  sw    ""6"""\''(D7'SS
 !:;(&2Gk
 
 
 rL   c                Z    |                      |          }|                     |          }|S rf   rU   r   )r@   r   rh   s      rJ   ri   zTFBottleneckLayer.call  s*    

6**}55rL   Nc                   | j         rd S d| _         t          | dd           ]t          j        | j        j                  5  | j                            d d | j        j        g           d d d            n# 1 swxY w Y   t          | dd           St          j        | j	        j                  5  | j	                            d            d d d            d S # 1 swxY w Y   d S d S r   
rl   rm   r:   rn   rU   rW   ro   ra   r   r   rq   s     rJ   ro   zTFBottleneckLayer.build  o   : 	F
4$''3tz// H H
  $dk.E!FGGGH H H H H H H H H H H H H H H4d++7t~233 + +$$T***+ + + + + + + + + + + + + + + + + + 87r   rf   rt   rv   s   @rJ   r  r    [              
	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+rL   r  c                  ,     e Zd Z fdZd ZddZ xZS )TFBottleneckc                     t                      j        di | |j        | _        |j        | _        t	          |d          | _        | j        rt	          |d          | _        d S d S )NinputrV   	attentionrQ   )rX   rY   key_query_shared_bottleneckr   r  bottleneck_inputr*  rb   s      rJ   rY   zTFBottleneck.__init__  s{    ""6"""+1+M((.(G% 1&w G G G+ 	I.vKHHHDNNN	I 	IrL   c                    |                      |          }| j        r|fdz  S | j        r|                     |          }||||fS ||||fS )N   )r,  r   r+  r*  )r@   rh   bottlenecked_hidden_statesshared_attention_inputs       rJ   ri   zTFBottleneck.call&  sq    " &*%:%:=%I%I"( 	].0144- 	]%)^^M%B%B"*,BMSmnn!=-A[\\rL   Nc                   | j         rd S d| _         t          | dd           Pt          j        | j        j                  5  | j                            d            d d d            n# 1 swxY w Y   t          | dd           St          j        | j        j                  5  | j                            d            d d d            d S # 1 swxY w Y   d S d S )NTr,  r*  )rl   rm   r:   rn   r,  rW   ro   r*  rq   s     rJ   ro   zTFBottleneck.build@  sV   : 	F
4+T22>t49:: 2 2%++D1112 2 2 2 2 2 2 2 2 2 2 2 2 2 24d++7t~233 + +$$T***+ + + + + + + + + + + + + + + + + + 87r  rf   rt   rv   s   @rJ   r'  r'    sc        I I I I I] ] ]4	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+rL   r'  c                  ,     e Zd Z fdZd ZddZ xZS )TFFFNOutputc                     t                      j        di | t          j                            |j        d          | _        t          |j                 |j        |j	        d          | _
        || _        d S r  )rX   rY   r   rZ   r[   rp   rU   r   r   r   r   ra   rb   s      rJ   rY   zTFFFNOutput.__init__M  sw    ""6"""\''(?g'NN
 !:;#V-B
 
 
 rL   c                `    |                      |          }|                     ||z             }|S rf   r!  )r@   rh   r   s      rJ   ri   zTFFFNOutput.callU  s/    

=11}'FGGrL   Nc                   | j         rd S d| _         t          | dd           ]t          j        | j        j                  5  | j                            d d | j        j        g           d d d            n# 1 swxY w Y   t          | dd           St          j        | j	        j                  5  | j	                            d            d d d            d S # 1 swxY w Y   d S d S r   )
rl   rm   r:   rn   rU   rW   ro   ra   r\   r   rq   s     rJ   ro   zTFFFNOutput.buildZ  so   : 	F
4$''3tz// N N
  $dk.K!LMMMN N N N N N N N N N N N N N N4d++7t~233 + +$$T***+ + + + + + + + + + + + + + + + + + 87r   rf   rt   rv   s   @rJ   r3  r3  L  r%  rL   r3  c                  ,     e Zd Z fdZd ZddZ xZS )
TFFFNLayerc                     t                      j        di | t          |d          | _        t	          |d          | _        d S )NintermediaterV   r   rQ   )rX   rY   rS   r:  r3  r  rb   s      rJ   rY   zTFFFNLayer.__init__g  sP    ""6"""4V.QQQ!,V(!C!C!CrL   c                \    |                      |          }|                     ||          }|S rf   )r:  r  )r@   rh   intermediate_outputr  s       rJ   ri   zTFFFNLayer.calll  s2    "//>>../BMRRrL   Nc                   | j         rd S d| _         t          | dd           Pt          j        | j        j                  5  | j                            d            d d d            n# 1 swxY w Y   t          | dd           St          j        | j        j                  5  | j                            d            d d d            d S # 1 swxY w Y   d S d S )NTr:  r  )rl   rm   r:   rn   r:  rW   ro   r  rq   s     rJ   ro   zTFFFNLayer.buildq  sY   : 	F
4..:t0566 . .!''---. . . . . . . . . . . . . . .4,d33?t5:;; 3 3&,,T2223 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 @?r  rf   rt   rv   s   @rJ   r8  r8  f  s`        D D D D D
  
	3 	3 	3 	3 	3 	3 	3 	3rL   r8  c                  .     e Zd Z fdZddZddZ xZS )TFMobileBertLayerc                    t                      j        di | j        | _        j        | _        t	          d          | _        t          d          | _        t          d          | _	        | j        rt          d          | _        j        dk    r*fdt          j        dz
            D             | _        d S d S )	Nr*  rV   r:  r   r  r$   c                8    g | ]}t          d |           S )zffn.rV   )r8  .0ira   s     rJ   
<listcomp>z.TFMobileBertLayer.__init__.<locals>.<listcomp>  s,    ppp
6
q

;;;ppprL   rQ   )rX   rY   r   num_feedforward_networksr   r*  rS   r:  r  r  r'  r  r   ffnrb   s    ` rJ   rY   zTFMobileBertLayer.__init__~  s    ""6"""$3(.(G%.vKHHH4V.QQQ!3F!J!J!J 	F*6EEEDO*Q..ppppU6KjmnKnEoEopppDHHH /.rL   Fc           
        | j         r|                     |          \  }}}}	n|gdz  \  }}}}	|                     ||||	||||          }
|
d         }|f}| j        dk    r+t	          | j                  D ]\  }} ||          }||fz  }|                     |          }|                     ||||          }|f|
dd          z   t          j	        d          ||||	||fz   |z   }|S )Nr.  r   r   r$   )
r   r  r*  rF  	enumeraterG  r:  r  r:   constant)r@   rh   r   r   r   r   r   r   r   r  attention_outputsr
  srD  
ffn_moduler<  layer_outputr   s                     rJ   ri   zTFMobileBertLayer.call  s_    	VBF//R_B`B`?L*lKKCP/TUBU?L*lK NN + 	
 	
 -Q/(A--!*48!4!4 ) ):#-:.>#?#? &(("//0@AA--.ACSUbmu-vv O#$ A #
  	 rL   Nc                   | j         rd S d| _         t          | dd           Pt          j        | j        j                  5  | j                            d            d d d            n# 1 swxY w Y   t          | dd           Pt          j        | j        j                  5  | j                            d            d d d            n# 1 swxY w Y   t          | dd           Pt          j        | j        j                  5  | j                            d            d d d            n# 1 swxY w Y   t          | dd           Pt          j        | j	        j                  5  | j	                            d            d d d            n# 1 swxY w Y   t          | dd           P| j
        D ]J}t          j        |j                  5  |                    d            d d d            n# 1 swxY w Y   Id S d S )NTr*  r:  r  r  rG  )rl   rm   r:   rn   r*  rW   ro   r:  r  r  rG  r@   rr   layers      rJ   ro   zTFMobileBertLayer.build  s   : 	F
4d++7t~233 + +$$T***+ + + + + + + + + + + + + + +4..:t0566 . .!''---. . . . . . . . . . . . . . .4,d33?t5:;; 3 3&,,T2223 3 3 3 3 3 3 3 3 3 3 3 3 3 34t,,8t344 , ,%%d+++, , , , , , , , , , , , , , ,4%%1 & &]5:.. & &KK%%%& & & & & & & & & & & & & & & 21& &sZ    A''A+.A+!CCCD))D-0D-#F

FFG**G.	1G.	r   rf   rt   rv   s   @rJ   r?  r?  }  sf        q q q q q+ + + +Z& & & & & & & &rL   r?  c                  0     e Zd Z fdZ	 ddZddZ xZS )TFMobileBertEncoderc                     t                      j        di | j        | _        j        | _        fdt	          j                  D             | _        d S )Nc                8    g | ]}t          d |           S )zlayer_._rV   )r?  rB  s     rJ   rE  z0TFMobileBertEncoder.__init__.<locals>.<listcomp>  s-    nnn'^^^DDDnnnrL   rQ   )rX   rY   r   output_hidden_statesr   num_hidden_layersrQ  rb   s    ` rJ   rY   zTFMobileBertEncoder.__init__  sb    ""6"""!'!9$*$?!nnnneTZTlNmNmnnn


rL   Fc                &   |rdnd }|rdnd }	t          | j                  D ]9\  }
}|r||fz   } |||||
         ||          }|d         }|r|	|d         fz   }	:|r||fz   }|st          d |||	fD                       S t          |||	          S )NrQ   r   r   r$   c              3     K   | ]}||V  	d S rf   rQ   )rC  vs     rJ   	<genexpr>z+TFMobileBertEncoder.call.<locals>.<genexpr>  s(      hhqZ[ZgZgZgZgZghhrL   )last_hidden_staterh   
attentions)rI  rQ  tupler   )r@   rh   r   r   r   rV  return_dictr   all_hidden_statesall_attentionsrD  layer_moduler  s                rJ   ri   zTFMobileBertEncoder.call  s    #7@BBD0:d(44 	F 	FOA|# I$58H$H!(L~y|=NYa  M *!,M  F!/=3C2E!E   	E 1]4D D 	ihh]4E~$Vhhhhhh +;LYg
 
 
 	
rL   Nc                    | j         rd S d| _         t          | dd           P| j        D ]J}t          j        |j                  5  |                    d            d d d            n# 1 swxY w Y   Id S d S )NTrQ  )rl   rm   rQ  r:   rn   rW   ro   rP  s      rJ   ro   zTFMobileBertEncoder.build  s    : 	F
4$''3 & &]5:.. & &KK%%%& & & & & & & & & & & & & & & 43& &s   A&&A*	-A*	r   rf   rt   rv   s   @rJ   rS  rS    sk        o o o o o !
 !
 !
 !
F& & & & & & & &rL   rS  c                  ,     e Zd Z fdZd ZddZ xZS )TFMobileBertPoolerc                     t                      j        di | |j        | _        | j        r?t          j                            |j        t          |j	                  dd          | _
        || _        d S )NtanhrU   )r   
activationrW   rQ   )rX   rY   classifier_activationdo_activater   rZ   r[   r   r   r   rU   ra   rb   s      rJ   rY   zTFMobileBertPooler.__init__  s{    ""6"""!7 	++"#263K#L#L!	 ,  DJ rL   c                Z    |d d df         }| j         s|S |                     |          }|S Nr   )rj  rU   )r@   rh   first_token_tensorpooled_outputs       rJ   ri   zTFMobileBertPooler.call  s@     +111a40 	!%% JJ'9::M  rL   Nc                   | j         rd S d| _         t          | dd           `t          j        | j        j                  5  | j                            d d | j        j        g           d d d            d S # 1 swxY w Y   d S d S rk   )	rl   rm   r:   rn   rU   rW   ro   ra   r   rq   s     rJ   ro   zTFMobileBertPooler.build  s    : 	F
4$''3tz// H H
  $dk.E!FGGGH H H H H H H H H H H H H H H H H H 43rs   rf   rt   rv   s   @rJ   re  re    sc        
 
 
 
 
! ! !H H H H H H H HrL   re  c                  ,     e Zd Z fdZd ZddZ xZS )#TFMobileBertPredictionHeadTransformc                    t                      j        di | t          j                            |j        t          |j                  d          | _        t          |j
        t                    rt          |j
                  | _        n|j
        | _        t          d         |j        |j        d          | _        || _        d S )NrU   r   r   r   r   rQ   )rX   rY   r   rZ   r[   r   r   r   rU   r]   r^   r_   r   transform_act_fnr   r   r   ra   rb   s      rJ   rY   z,TFMobileBertPredictionHeadTransform.__init__#  s    ""6"""\''?6C[3\3\cj ( 
 

 f'-- 	6$5f6G$H$HD!!$*$5D! .v/A6K`grsssrL   c                    |                      |          }|                     |          }|                     |          }|S rf   )rU   rs  r   rg   s     rJ   ri   z(TFMobileBertPredictionHeadTransform.call/  s=    

=11--m<<}55rL   Nc                   | j         rd S d| _         t          | dd           ]t          j        | j        j                  5  | j                            d d | j        j        g           d d d            n# 1 swxY w Y   t          | dd           St          j        | j	        j                  5  | j	                            d            d d d            d S # 1 swxY w Y   d S d S r   r#  rq   s     rJ   ro   z)TFMobileBertPredictionHeadTransform.build5  r$  r   rf   rt   rv   s   @rJ   rq  rq  "  s[        
 
 
 
 
  	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+rL   rq  c                  D     e Zd Z fdZd	dZd Zd Zd Zd Zd Z	 xZ
S )
TFMobileBertLMPredictionHeadc                t     t                      j        di | t          |d          | _        || _        d S )N	transformrV   rQ   )rX   rY   rq  ry  ra   rb   s      rJ   rY   z%TFMobileBertLMPredictionHead.__init__B  s?    ""6"""<V+VVVrL   Nc                *   |                      | j        j        fddd          | _        |                      | j        j        | j        j        z
  | j        j        fddd          | _        |                      | j        j        | j        j        fddd          | _        | j        rd S d| _        t          | dd           St          j        | j        j                  5  | j                            d            d d d            d S # 1 swxY w Y   d S d S )Nr   Tr   )r   r   	trainablerW   zdense/weightzdecoder/weightry  )r   ra   r   r   r   r   rU   decoderrl   rm   r:   rn   ry  rW   ro   rq   s     rJ   ro   z"TFMobileBertLMPredictionHead.buildG  s_   OO4;+A*CQXdhouOvv	__;*T[-GGI_`	 % 
 

 ;)4;+EF!	 ' 
 
 : 	F
4d++7t~233 + +$$T***+ + + + + + + + + + + + + + + + + + 87s   DD
D
c                    | S rf   rQ   r@   s    rJ   get_output_embeddingsz2TFMobileBertLMPredictionHead.get_output_embeddings]  s    rL   c                R    || _         t          |          d         | j        _        d S rl  )r|  r   ra   r   r@   r   s     rJ   set_output_embeddingsz2TFMobileBertLMPredictionHead.set_output_embeddings`  s&    !+E!2!21!5rL   c                    d| j         iS )Nr   )r   r~  s    rJ   get_biasz%TFMobileBertLMPredictionHead.get_biasd  s    	""rL   c                j    |d         | _         t          |d                   d         | j        _        d S )Nr   r   )r   r   ra   r   r  s     rJ   set_biasz%TFMobileBertLMPredictionHead.set_biasg  s.    &M	!+E&M!:!:1!=rL   c                    |                      |          }t          j        |t          j        t          j        | j                  | j        gd                    }|| j        z   }|S )Nr   r   )ry  r:   r   r   r   r|  rU   r   rg   s     rJ   ri   z!TFMobileBertLMPredictionHead.callk  s[    }55	-BL<V<VX\Xb;cjk1l1l1lmm%	1rL   rf   )rM   rN   rO   rY   ro   r  r  r  r  ri   ru   rv   s   @rJ   rw  rw  A  s            
+ + + +,  6 6 6# # #> > >      rL   rw  c                  ,     e Zd Z fdZd ZddZ xZS )TFMobileBertMLMHeadc                f     t                      j        di | t          |d          | _        d S )NpredictionsrV   rQ   )rX   rY   rw  r  rb   s      rJ   rY   zTFMobileBertMLMHead.__init__s  s;    ""6"""7]SSSrL   c                0    |                      |          }|S rf   r  )r@   sequence_outputprediction_scoress      rJ   ri   zTFMobileBertMLMHead.callw  s     ,,_==  rL   Nc                    | j         rd S d| _         t          | dd           St          j        | j        j                  5  | j                            d            d d d            d S # 1 swxY w Y   d S d S )NTr  )rl   rm   r:   rn   r  rW   ro   rq   s     rJ   ro   zTFMobileBertMLMHead.build{  s    : 	F
4--9t/455 - - &&t,,,- - - - - - - - - - - - - - - - - - :9    A((A,/A,rf   rt   rv   s   @rJ   r  r  r  s`        T T T T T! ! !- - - - - - - -rL   r  c                  j     e Zd ZeZd
 fd	Zd Zd Zd Ze		 	 	 	 	 	 	 	 	 	 dd            Z
dd	Z xZS )TFMobileBertMainLayerTc                4    t                      j        di | || _        |j        | _        |j        | _        |j        | _        |j        | _        t          |d          | _	        t          |d          | _        |rt          |d          nd | _        d S )Nr   rV   encoderpoolerrQ   )rX   rY   ra   rW  r   rV  use_return_dictr_  r   r   rS  r  re  r  )r@   ra   add_pooling_layerrc   rd   s       rJ   rY   zTFMobileBertMainLayer.__init__  s    ""6"""!'!9!'!9$*$?!!10lKKK*6	BBBCT^(h????Z^rL   c                    | j         S rf   )r   r~  s    rJ   get_input_embeddingsz*TFMobileBertMainLayer.get_input_embeddings  s
    rL   c                \    || j         _        t          |          d         | j         _        d S rl  )r   r   r   r   r  s     rJ   set_input_embeddingsz*TFMobileBertMainLayer.set_input_embeddings  s)    !&%/%6%6q%9"""rL   c                    t           )z
        Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
        class PreTrainedModel
        r  )r@   heads_to_prunes     rJ   _prune_headsz"TFMobileBertMainLayer._prune_heads  s
    
 "!rL   NFc           	     r   ||t          d          |t          |          }n)|t          |          d d         }nt          d          |t          j        |d          }|t          j        |d          }|                     |||||
          }t          j        ||d         dd|d         f          }t          j        ||j                  }t          j        d|j                  }t          j        d	|j                  }t          j	        t          j
        ||          |          }|t          d g| j        z  }|                     ||||||	|
          }|d         }| j        |                     |          nd }|	s||f|dd          z   S t          |||j        |j        
          S )NzDYou cannot specify both input_ids and inputs_embeds at the same timer   z5You have to specify either input_ids or inputs_embedsr$   r   r   r3   g      ?g     )r\  pooler_outputrh   r]  )r   r   r:   r   r   r?   r=   r4   rJ  multiplysubtractr  rW  r  r  r   rh   r]  )r@   r   r   r   r   r   r   r   rV  r_  r   rr   embedding_outputextended_attention_maskone_cstten_thousand_cstencoder_outputsr  rn  s                      rJ   ri   zTFMobileBertMainLayer.call  s     ]%>cddd"$Y//KK&$]33CRC8KKTUUU!W[!44N!W[!44N??9lNTalt?uu #%*^k!naQRT_`aTb=c"d"d #%'*AIYI_"`"`"`+c)9)?@@@;x7G7MNNN"$+bk'CZ.[.[]m"n"n  %%!77I,,#  ' 
 
 *!,8<8OO444UY 	$  #$ $
 ,-')7&1	
 
 
 	
rL   c                r   | j         rd S d| _         t          | dd           Pt          j        | j        j                  5  | j                            d            d d d            n# 1 swxY w Y   t          | dd           Pt          j        | j        j                  5  | j                            d            d d d            n# 1 swxY w Y   t          | dd           St          j        | j        j                  5  | j                            d            d d d            d S # 1 swxY w Y   d S d S )NTr   r  r  )	rl   rm   r:   rn   r   rW   ro   r  r  rq   s     rJ   ro   zTFMobileBertMainLayer.build  s   : 	F
4t,,8t344 , ,%%d+++, , , , , , , , , , , , , , ,4D))5t|011 ) )""4((() ) ) ) ) ) ) ) ) ) ) ) ) ) )44((4t{/00 ( (!!$'''( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( 546    A''A+.A+!CCCD**D.1D.)T
NNNNNNNNNFrf   )rM   rN   rO   r%   config_classrY   r  r  r  r   ri   ro   ru   rv   s   @rJ   r  r    s        #L_ _ _ _ _ _  : : :" " "  !Q
 Q
 Q
 ]Q
f( ( ( ( ( ( ( (rL   r  c                      e Zd ZdZeZdZdS )TFMobileBertPreTrainedModelz
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    
mobilebertN)rM   rN   rO   rP   r%   r  base_model_prefixrQ   rL   rJ   r  r    s'         
 $L$rL   r  c                  Z    e Zd ZU dZdZded<   dZded<   dZded<   dZded<   dZ	ded	<   dS )
 TFMobileBertForPreTrainingOutputaE  
    Output type of [`TFMobileBertForPreTraining`].

    Args:
        prediction_logits (`tf.Tensor` of shape `(batch_size, sequence_length, config.vocab_size)`):
            Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).
        seq_relationship_logits (`tf.Tensor` of shape `(batch_size, 2)`):
            Prediction scores of the next sequence prediction (classification) head (scores of True/False continuation
            before SoftMax).
        hidden_states (`tuple(tf.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `tf.Tensor` (one for the output of the embeddings + one for the output of each layer) of shape
            `(batch_size, sequence_length, hidden_size)`.

            Hidden-states of the model at the output of each layer plus the initial embedding outputs.
        attentions (`tuple(tf.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `tf.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`.

            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
            heads.
    Nztf.Tensor | Nonelossprediction_logitsseq_relationship_logitsztuple[tf.Tensor] | Nonerh   r]  )
rM   rN   rO   rP   r  __annotations__r  r  rh   r]  rQ   rL   rJ   r  r    su          , "D!!!!*.....044444-1M1111*.J......rL   r  a	  

    This model inherits from [`TFPreTrainedModel`]. Check the superclass documentation for the generic methods the
    library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads
    etc.)

    This model is also a [keras.Model](https://www.tensorflow.org/api_docs/python/tf/keras/Model) subclass. Use it
    as a regular TF 2.0 Keras Model and refer to the TF 2.0 documentation for all matter related to general usage and
    behavior.

    <Tip>

    TensorFlow models and layers in `transformers` accept two formats as input:

    - having all inputs as keyword arguments (like PyTorch models), or
    - having all inputs as a list, tuple or dict in the first positional argument.

    The reason the second format is supported is that Keras methods prefer this format when passing inputs to models
    and layers. Because of this support, when using methods like `model.fit()` things should "just work" for you - just
    pass your inputs and labels in any format that `model.fit()` supports! If, however, you want to use the second
    format outside of Keras methods like `fit()` and `predict()`, such as when creating your own layers or models with
    the Keras `Functional` API, there are three possibilities you can use to gather all the input Tensors in the first
    positional argument:

    - a single Tensor with `input_ids` only and nothing else: `model(input_ids)`
    - a list of varying length with one or several input Tensors IN THE ORDER given in the docstring:
    `model([input_ids, attention_mask])` or `model([input_ids, attention_mask, token_type_ids])`
    - a dictionary with one or several input Tensors associated to the input names given in the docstring:
    `model({"input_ids": input_ids, "token_type_ids": token_type_ids})`

    Note that when creating models and layers with
    [subclassing](https://keras.io/guides/making_new_layers_and_models_via_subclassing/) then you don't need to worry
    about any of this, as you can just pass inputs like you would to any other Python function!

    </Tip>

    Parameters:
        config ([`MobileBertConfig`]): Model configuration class with all the parameters of the model.
            Initializing with a config file does not load the weights associated with the model, only the
            configuration. Check out the [`~PreTrainedModel.from_pretrained`] method to load the model weights.
a  
    Args:
        input_ids (`Numpy array` or `tf.Tensor` of shape `({0})`):
            Indices of input sequence tokens in the vocabulary.

            Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.__call__`] and
            [`PreTrainedTokenizer.encode`] for details.

            [What are input IDs?](../glossary#input-ids)
        attention_mask (`Numpy array` or `tf.Tensor` of shape `({0})`, *optional*):
            Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:

            - 1 for tokens that are **not masked**,
            - 0 for tokens that are **masked**.

            [What are attention masks?](../glossary#attention-mask)
        token_type_ids (`Numpy array` or `tf.Tensor` of shape `({0})`, *optional*):
            Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,
            1]`:

            - 0 corresponds to a *sentence A* token,
            - 1 corresponds to a *sentence B* token.

            [What are token type IDs?](../glossary#token-type-ids)
        position_ids (`Numpy array` or `tf.Tensor` of shape `({0})`, *optional*):
            Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
            config.max_position_embeddings - 1]`.

            [What are position IDs?](../glossary#position-ids)
        head_mask (`Numpy array` or `tf.Tensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
            Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`:

            - 1 indicates the head is **not masked**,
            - 0 indicates the head is **masked**.

        inputs_embeds (`tf.Tensor` of shape `({0}, hidden_size)`, *optional*):
            Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
            is useful if you want more control over how to convert `input_ids` indices into associated vectors than the
            model's internal embedding lookup matrix.
        output_attentions (`bool`, *optional*):
            Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
            tensors for more detail. This argument can be used only in eager mode, in graph mode the value in the
            config will be used instead.
        output_hidden_states (`bool`, *optional*):
            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
            more detail. This argument can be used only in eager mode, in graph mode the value in the config will be
            used instead.
        return_dict (`bool`, *optional*):
            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This argument can be used in
            eager mode, in graph mode the value will always be set to True.
        training (`bool`, *optional*, defaults to `False`):
            Whether or not to use the model in training mode (some modules like dropout modules have different
            behaviors between training and evaluation).
zdThe bare MobileBert Model transformer outputting raw hidden-states without any specific head on top.c                       e Zd Z fdZe ee                    d                     ee	e
e          	 	 	 	 	 	 	 	 	 	 ddd                                    ZddZ xZS )TFMobileBertModelc                n     t                      j        |g|R i | t          |d          | _        d S )Nr  rV   )rX   rY   r  r  r@   ra   r   rc   rd   s       rJ   rY   zTFMobileBertModel.__init__  sB    3&333F333/\JJJrL   batch_size, sequence_length
checkpointoutput_typer  NFr   TFModelInputType | Noner   np.ndarray | tf.Tensor | Noner   r   r   r   r   bool | NonerV  r_  r   r-   $tuple | TFBaseModelOutputWithPoolingc                D    |                      |||||||||	|

  
        }|S )N)
r   r   r   r   r   r   r   rV  r_  r   )r  )r@   r   r   r   r   r   r   r   rV  r_  r   r   s               rJ   ri   zTFMobileBertModel.call  sA    ( //))%'/!5# " 
 
 rL   c                    | j         rd S d| _         t          | dd           St          j        | j        j                  5  | j                            d            d d d            d S # 1 swxY w Y   d S d S )NTr  )rl   rm   r:   rn   r  rW   ro   rq   s     rJ   ro   zTFMobileBertModel.build  s    : 	F
4t,,8t344 , ,%%d+++, , , , , , , , , , , , , , , , , , 98r  r  )r   r  r   r  r   r  r   r  r   r  r   r  r   r  rV  r  r_  r  r   r  r-   r  rf   )rM   rN   rO   rY   r   r!   MOBILEBERT_INPUTS_DOCSTRINGformatr   _CHECKPOINT_FOR_DOCr   _CONFIG_FOR_DOCri   ro   ru   rv   s   @rJ   r  r    s        
K K K K K **+F+M+MNk+l+lmm&0$   .28<8<6:377;)-,0#' %     nm ]8, , , , , , , ,rL   r  z
    MobileBert Model with two heads on top as done during the pretraining: a `masked language modeling` head and a
    `next sentence prediction (classification)` head.
    c                       e Zd Z fdZd Zd Ze ee	                    d                     e
ee          	 	 	 	 	 	 	 	 	 	 	 	 ddd                                    ZddZd Z xZS )TFMobileBertForPreTrainingc                     t                      j        |g|R i | t          |d          | _        t	          |d          | _        t          |d          | _        d S )Nr  rV   predictions___clsseq_relationship___cls)rX   rY   r  r  r  r  TFMobileBertOnlyNSPHeadseq_relationshipr  s       rJ   rY   z#TFMobileBertForPreTraining.__init__  sp    3&333F333/\JJJ.v<OPPP 7E] ^ ^ ^rL   c                    | j         j         S rf   r  r~  s    rJ   get_lm_headz&TFMobileBertForPreTraining.get_lm_head      ++rL   c                    t          j        dt                     | j        dz   | j        j        z   dz   | j        j        j        z   S NzMThe method get_prefix_bias_name is deprecated. Please use `get_bias` instead./)warningswarnFutureWarningrW   r  r~  s    rJ   get_prefix_bias_namez/TFMobileBertForPreTraining.get_prefix_bias_name  s@    egtuuuy3!1!66<t?O?[?```rL   r  r  r  NFr   r  r   r  r   r   r   r   r   r  rV  r_  r*   r5   r   r-   (tuple | TFMobileBertForPreTrainingOutputc                p   |                      |||||||||	|
  
        }|dd         \  }}|                     |          }|                     |          }d}|
$|"d|
i}||d<   |                     |||f          }|	s||f|dd         z   }||f|z   n|S t	          ||||j        |j                  S )a9  
        Return:

        Examples:

        ```python
        >>> import tensorflow as tf
        >>> from transformers import AutoTokenizer, TFMobileBertForPreTraining

        >>> tokenizer = AutoTokenizer.from_pretrained("google/mobilebert-uncased")
        >>> model = TFMobileBertForPreTraining.from_pretrained("google/mobilebert-uncased")
        >>> input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute"))[None, :]  # Batch size 1
        >>> outputs = model(input_ids)
        >>> prediction_scores, seq_relationship_scores = outputs[:2]
        ```	r   r   r   r   r   r   rV  r_  r   Nr   r*   r5   r*   r,   )r  r  r  rh   r]  )r  r  r  rK   r  rh   r]  )r@   r   r   r   r   r   r   r   rV  r_  r*   r5   r   r   r  rn  r  seq_relationship_score
total_lossd_labelsr   s                        rJ   ri   zTFMobileBertForPreTraining.call  s    B //))%'/!5# " 
 
 *1!& ,,_==!%!6!6}!E!E
"5"A &)H.AH*+--XGXZpFq-rrJ 	R')?@7122;NF/9/EZMF**6Q//$:!/)
 
 
 	
rL   c                r   | j         rd S d| _         t          | dd           Pt          j        | j        j                  5  | j                            d            d d d            n# 1 swxY w Y   t          | dd           Pt          j        | j        j                  5  | j                            d            d d d            n# 1 swxY w Y   t          | dd           St          j        | j        j                  5  | j                            d            d d d            d S # 1 swxY w Y   d S d S )NTr  r  r  )	rl   rm   r:   rn   r  rW   ro   r  r  rq   s     rJ   ro   z TFMobileBertForPreTraining.build  s   : 	F
4t,,8t344 , ,%%d+++, , , , , , , , , , , , , , ,4--9t/455 - - &&t,,,- - - - - - - - - - - - - - -4+T22>t49:: 2 2%++D1112 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 ?>r  c                    |dk    r|dfS |fS Nzcls.predictions.decoder.weightz,mobilebert.embeddings.word_embeddings.weightrQ   r@   	tf_weights     rJ   tf_to_pt_weight_renamez1TFMobileBertForPreTraining.tf_to_pt_weight_rename-  "    888LLL<rL   NNNNNNNNNNNF)r   r  r   r  r   r  r   r  r   r  r   r  r   r  rV  r  r_  r  r*   r  r5   r  r   r  r-   r  rf   )rM   rN   rO   rY   r  r  r   r!   r  r  r#   r  r  ri   ro   r  ru   rv   s   @rJ   r  r    s       _ _ _ _ _, , ,a a a **+F+M+MNk+l+lmm+KZijjj .28<8<6:377;)-,0#'04=A %?
 ?
 ?
 ?
 kj nm ]?
B2 2 2 2             rL   r  z8MobileBert Model with a `language modeling` head on top.c            	           e Zd Zg dZ fdZd Zd Ze ee	
                    d                     eeeedd          	 	 	 	 	 	 	 	 	 	 	 ddd                                    Zd dZd Z xZS )!TFMobileBertForMaskedLM)r  r  cls.seq_relationshipc                     t                      j        |g|R i | t          |dd          | _        t	          |d          | _        d S )NFr  r  rW   r  rV   )rX   rY   r  r  r  r  r  s       rJ   rY   z TFMobileBertForMaskedLM.__init__=  s]    3&333F333/%Vbccc.v<OPPPrL   c                    | j         j         S rf   r  r~  s    rJ   r  z#TFMobileBertForMaskedLM.get_lm_headC  r  rL   c                    t          j        dt                     | j        dz   | j        j        z   dz   | j        j        j        z   S r  )r  r  r  rW   mlmr  r~  s    rJ   r  z,TFMobileBertForMaskedLM.get_prefix_bias_nameF  s=    egtuuuy3.4tx7K7PPPrL   r  z'paris'g=
ףp=?r  r  r  expected_outputexpected_lossNFr   r  r   r  r   r   r   r   r   r  rV  r_  r*   r   r-   tuple | TFMaskedLMOutputc                    |                      |||||||||	|
  
        }|d         }|                     ||          }|
dn|                     |
|          }|	s|f|dd         z   }||f|z   n|S t          |||j        |j                  S )az  
        labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
            config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the
            loss is only computed for the tokens with labels
        r  r   r   Nr   r  r,   rh   r]  )r  r  rK   r	   rh   r]  )r@   r   r   r   r   r   r   r   rV  r_  r*   r   r   r  r  r  r   s                    rJ   ri   zTFMobileBertForMaskedLM.callJ  s    : //))%'/!5# " 
 
 "!* ,,_x,PP~tt4+?+?HY+Z+Z 	F')GABBK7F)-)9TGf$$vE$!/)	
 
 
 	
rL   c                   | j         rd S d| _         t          | dd           Pt          j        | j        j                  5  | j                            d            d d d            n# 1 swxY w Y   t          | dd           St          j        | j        j                  5  | j                            d            d d d            d S # 1 swxY w Y   d S d S )NTr  r  )rl   rm   r:   rn   r  rW   ro   r  rq   s     rJ   ro   zTFMobileBertForMaskedLM.build  sU   : 	F
4t,,8t344 , ,%%d+++, , , , , , , , , , , , , , ,4--9t/455 - - &&t,,,- - - - - - - - - - - - - - - - - - :9r  c                    |dk    r|dfS |fS r  rQ   r  s     rJ   r  z.TFMobileBertForMaskedLM.tf_to_pt_weight_rename  r  rL   NNNNNNNNNNF)r   r  r   r  r   r  r   r  r   r  r   r  r   r  rV  r  r_  r  r*   r  r   r  r-   r  rf   )rM   rN   rO   "_keys_to_ignore_on_load_unexpectedrY   r  r  r   r!   r  r  r   r  r	   r  ri   ro   r  ru   rv   s   @rJ   r  r  4  s1       * * *&Q Q Q Q Q, , ,Q Q Q **+F+M+MNk+l+lmm&$$!   .28<8<6:377;)-,0#'04 %.
 .
 .
 .
  nm ].
`	- 	- 	- 	-             rL   r  c                  ,     e Zd Z fdZd ZddZ xZS )r  c                     t                      j        di | t          j                            dd          | _        || _        d S )Nr   r  rV   rQ   )rX   rY   r   rZ   r[   r  ra   rb   s      rJ   rY   z TFMobileBertOnlyNSPHead.__init__  sI    ""6""" % 2 21;M 2 N NrL   c                0    |                      |          }|S rf   )r  )r@   rn  r  s      rJ   ri   zTFMobileBertOnlyNSPHead.call  s    !%!6!6}!E!E%%rL   Nc                   | j         rd S d| _         t          | dd           `t          j        | j        j                  5  | j                            d d | j        j        g           d d d            d S # 1 swxY w Y   d S d S )NTr  )	rl   rm   r:   rn   r  rW   ro   ra   r   rq   s     rJ   ro   zTFMobileBertOnlyNSPHead.build  s    : 	F
4+T22>t49:: S S%++T49P,QRRRS S S S S S S S S S S S S S S S S S ?>rs   rf   rt   rv   s   @rJ   r  r    sc            
& & &S S S S S S S SrL   r  zPMobileBert Model with a `next sentence prediction (classification)` head on top.c                       e Zd ZddgZ fdZe ee                    d                     e	e
e          	 	 	 	 	 	 	 	 	 	 	 ddd                                    ZddZ xZS )%TFMobileBertForNextSentencePredictionr  cls.predictionsc                     t                      j        |g|R i | t          |d          | _        t	          |d          | _        d S )Nr  rV   r  )rX   rY   r  r  r  clsr  s       rJ   rY   z.TFMobileBertForNextSentencePrediction.__init__  sW    3&333F333/\JJJ*68PQQQrL   r  r  NFr   r  r   r  r   r   r   r   r   r  rV  r_  r5   r   r-   %tuple | TFNextSentencePredictorOutputc                   |                      |||||||||	|
  
        }|d         }|                     |          }|
dn|                     |
|          }|	s|f|dd         z   }||f|z   n|S t          |||j        |j                  S )a  
        Return:

        Examples:

        ```python
        >>> import tensorflow as tf
        >>> from transformers import AutoTokenizer, TFMobileBertForNextSentencePrediction

        >>> tokenizer = AutoTokenizer.from_pretrained("google/mobilebert-uncased")
        >>> model = TFMobileBertForNextSentencePrediction.from_pretrained("google/mobilebert-uncased")

        >>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
        >>> next_sentence = "The sky is blue due to the shorter wavelength of blue light."
        >>> encoding = tokenizer(prompt, next_sentence, return_tensors="tf")

        >>> logits = model(encoding["input_ids"], token_type_ids=encoding["token_type_ids"])[0]
        ```r  r$   Nr  r   r  )r  r  rK   r   rh   r]  )r@   r   r   r   r   r   r   r   rV  r_  r5   r   r   rn  seq_relationship_scoresnext_sentence_lossr   s                    rJ   ri   z*TFMobileBertForNextSentencePrediction.call  s    F //))%'/!5# " 
 
  
"&((="9"9 #* D%%-@I`%aa 	  	b-/'!""+=F7I7U')F22[aa,#*!/)	
 
 
 	
rL   c                   | j         rd S d| _         t          | dd           Pt          j        | j        j                  5  | j                            d            d d d            n# 1 swxY w Y   t          | dd           St          j        | j        j                  5  | j                            d            d d d            d S # 1 swxY w Y   d S d S )NTr  r  )rl   rm   r:   rn   r  rW   ro   r  rq   s     rJ   ro   z+TFMobileBertForNextSentencePrediction.build  sP   : 	F
4t,,8t344 , ,%%d+++, , , , , , , , , , , , , , ,4%%1tx}-- % %t$$$% % % % % % % % % % % % % % % % % % 21r  r  )r   r  r   r  r   r  r   r  r   r  r   r  r   r  rV  r  r_  r  r5   r  r   r  r-   r  rf   )rM   rN   rO   r  rY   r   r!   r  r  r#   r   r  ri   ro   ru   rv   s   @rJ   r  r    s         +?@R)S&R R R R R **+F+M+MNk+l+lmm+HWfggg .28<8<6:377;)-,0#'=A %>
 >
 >
 >
 hg nm ]>
@	% 	% 	% 	% 	% 	% 	% 	%rL   r  z
    MobileBert Model transformer with a sequence classification/regression head on top (a linear layer on top of the
    pooled output) e.g. for GLUE tasks.
    c            	           e Zd Zg dZdgZ fdZe ee	                    d                     e
eeeee          	 	 	 	 	 	 	 	 	 	 	 ddd                                    ZddZ xZS )%TFMobileBertForSequenceClassificationr  r  r  r  r   c                    t                      j        |g|R i | |j        | _        t          |d          | _        |j        |j        n|j        }t          j        	                    |          | _
        t          j                            |j        t          |j                  d          | _        || _        d S )Nr  rV   
classifierr   rX   rY   
num_labelsr  r  classifier_dropoutr   r   rZ   r   r   r[   r   r   r  ra   r@   ra   r   rc   r  rd   s        rJ   rY   z.TFMobileBertForSequenceClassification.__init__  s    3&333F333 +/\JJJ)/)B)NF%%TZTn 	 |++,>??,,,/&BZ2[2[bn - 
 
 rL   r  r  NFr   r  r   r  r   r   r   r   r   r  rV  r_  r*   r   r-   "tuple | TFSequenceClassifierOutputc                J   |                      |||||||||	|
  
        }|d         }|                     ||          }|                     |          }|
dn|                     |
|          }|	s|f|dd         z   }||f|z   n|S t	          |||j        |j                  S )a  
        labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        r  r$   r   Nr   r  )r  r   r  rK   r   rh   r]  )r@   r   r   r   r   r   r   r   rV  r_  r*   r   r   rn  r,   r  r   s                    rJ   ri   z*TFMobileBertForSequenceClassification.call$  s    : //))%'/!5# " 
 
  
]XFF//~tt4+?+?+O+O 	FY,F)-)9TGf$$vE)!/)	
 
 
 	
rL   c                   | j         rd S d| _         t          | dd           Pt          j        | j        j                  5  | j                            d            d d d            n# 1 swxY w Y   t          | dd           `t          j        | j        j                  5  | j                            d d | j        j	        g           d d d            d S # 1 swxY w Y   d S d S NTr  r  
rl   rm   r:   rn   r  rW   ro   r  ra   r   rq   s     rJ   ro   z+TFMobileBertForSequenceClassification.build_  s   : 	F
4t,,8t344 , ,%%d+++, , , , , , , , , , , , , , ,4t,,8t344 M M%%tT4;3J&KLLLM M M M M M M M M M M M M M M M M M 98$    A''A+.A+!(CCCr  )r   r  r   r  r   r  r   r  r   r  r   r  r   r  rV  r  r_  r  r*   r  r   r  r-   r  rf   )rM   rN   rO   r  _keys_to_ignore_on_load_missingrY   r   r!   r  r  r   '_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATIONr   r  _SEQ_CLASS_EXPECTED_OUTPUT_SEQ_CLASS_EXPECTED_LOSSri   ro   ru   rv   s   @rJ   r  r    s       * * *& (2l#     **+F+M+MNk+l+lmm:.$2.   .28<8<6:377;)-,0#'04 %0
 0
 0
 0
  nm ]0
d	M 	M 	M 	M 	M 	M 	M 	MrL   r  z
    MobileBert Model with a span classification head on top for extractive question-answering tasks like SQuAD (a
    linear layers on top of the hidden-states output to compute `span start logits` and `span end logits`).
    c                       e Zd Zg dZ fdZe ee                    d                     e	e
eeeeee          	 	 	 	 	 	 	 	 	 	 	 	 ddd                                    ZddZ xZS ) TFMobileBertForQuestionAnsweringr  r  r  r  r  c                    t                      j        |g|R i | |j        | _        t          |dd          | _        t
          j                            |j        t          |j	                  d          | _
        || _        d S )NFr  r  
qa_outputsr   )rX   rY   r  r  r  r   rZ   r[   r   r   r"  ra   r  s       rJ   rY   z)TFMobileBertForQuestionAnswering.__init__|  s    3&333F333 +/%Vbccc,,,/&BZ2[2[bn - 
 
 rL   r  )r  r  r  qa_target_start_indexqa_target_end_indexr  r  NFr   r  r   r  r   r   r   r   r   r  rV  r_  start_positionsend_positionsr   r-   &tuple | TFQuestionAnsweringModelOutputc                   |                      |||||||||	|
  
        }|d         }|                     |          }t          j        |dd          \  }}t          j        |d          }t          j        |d          }d}|
||
|d}|                     |||f          }|	s||f|dd         z   }||f|z   n|S t          ||||j        |j                  S )	a  
        start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*):
            Labels for position (index) of the start of the labelled span for computing the token classification loss.
            Positions are clamped to the length of the sequence (`sequence_length`). Position outside of the sequence
            are not taken into account for computing the loss.
        end_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*):
            Labels for position (index) of the end of the labelled span for computing the token classification loss.
            Positions are clamped to the length of the sequence (`sequence_length`). Position outside of the sequence
            are not taken into account for computing the loss.
        r  r   r   r   r   N)start_positionend_position)r  start_logits
end_logitsrh   r]  )	r  r"  r:   splitsqueezerK   r   rh   r]  )r@   r   r   r   r   r   r   r   rV  r_  r%  r&  r   r   r  r,   r+  r,  r  r*   r   s                        rJ   ri   z%TFMobileBertForQuestionAnswering.call  s4   H //))%'/!5# " 
 
 "!*11#%8FAB#?#?#? jz,R888Z
444
&=+D(7WWF''z0JKKD 	F"J/'!""+=F)-)9TGf$$vE-%!!/)
 
 
 	
rL   c                   | j         rd S d| _         t          | dd           Pt          j        | j        j                  5  | j                            d            d d d            n# 1 swxY w Y   t          | dd           `t          j        | j        j                  5  | j                            d d | j        j	        g           d d d            d S # 1 swxY w Y   d S d S )NTr  r"  )
rl   rm   r:   rn   r  rW   ro   r"  ra   r   rq   s     rJ   ro   z&TFMobileBertForQuestionAnswering.build  r  r  r  )r   r  r   r  r   r  r   r  r   r  r   r  r   r  rV  r  r_  r  r%  r  r&  r  r   r  r-   r'  rf   )rM   rN   rO   r  rY   r   r!   r  r  r   _CHECKPOINT_FOR_QAr   r  _QA_TARGET_START_INDEX_QA_TARGET_END_INDEX_QA_EXPECTED_OUTPUT_QA_EXPECTED_LOSSri   ro   ru   rv   s   @rJ   r  r  k  s       * * *&     **+F+M+MNk+l+lmm%2$40+'   .28<8<6:377;)-,0#'9=7; %;
 ;
 ;
 ;
  nm ];
z	M 	M 	M 	M 	M 	M 	M 	MrL   r  z
    MobileBert Model with a multiple choice classification head on top (a linear layer on top of the pooled output and
    a softmax) e.g. for RocStories/SWAG tasks.
    c                       e Zd Zg dZdgZ fdZe ee	                    d                     e
eee          	 	 	 	 	 	 	 	 	 	 	 ddd                                    ZddZ xZS )TFMobileBertForMultipleChoicer  r   c                @    t                      j        |g|R i | t          |d          | _        t          j                            |j                  | _        t          j        	                    dt          |j                  d          | _        || _        d S )Nr  rV   r$   r  r   )rX   rY   r  r  r   rZ   r   r   r   r[   r   r   r  ra   r  s       rJ   rY   z&TFMobileBertForMultipleChoice.__init__  s    3&333F333/\JJJ|++F,FGG,,,/&2J"K"KR^ - 
 
 rL   z(batch_size, num_choices, sequence_lengthr  NFr   r  r   r  r   r   r   r   r   r  rV  r_  r*   r   r-   #tuple | TFMultipleChoiceModelOutputc                \   |+t          |          d         }t          |          d         }n*t          |          d         }t          |          d         }|t          j        |d|f          nd}|t          j        |d|f          nd}|t          j        |d|f          nd}|t          j        |d|f          nd}|+t          j        |d|t          |          d         f          nd}|                     |||||||||	|
  
        }|d         }|                     ||          }|                     |          }t          j        |d|f          }|
dn|                     |
|          }|	s|f|dd         z   }||f|z   n|S t          |||j        |j	                  S )	a5  
        labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the multiple choice classification loss. Indices should be in `[0, ..., num_choices]`
            where `num_choices` is the size of the second dimension of the input tensors. (See `input_ids` above)
        Nr$   r   r   r   )r_  r   r   r  )
r   r:   r?   r  r   r  rK   r
   rh   r]  )r@   r   r   r   r   r   r   r   rV  r_  r*   r   num_choices
seq_lengthflat_input_idsflat_attention_maskflat_token_type_idsflat_position_idsflat_inputs_embedsr   rn  r,   reshaped_logitsr  r   s                            rJ   ri   z"TFMobileBertForMultipleChoice.call  s   8  $Y//2K#I..q1JJ$]33A6K#M2215JDMDYIJ/?@@@_cN\Nhbj"j9IJJJnrN\Nhbj"j9IJJJnrJVJbBJ|b*5EFFFhl ( J}r:z-7P7PQR7S&TUUU 	
 // # " 
 
  
]XFF//*Vb+->??~tt4+?+?+X+X 	F%''!""+5F)-)9TGf$$vE*"!/)	
 
 
 	
rL   c                   | j         rd S d| _         t          | dd           Pt          j        | j        j                  5  | j                            d            d d d            n# 1 swxY w Y   t          | dd           `t          j        | j        j                  5  | j                            d d | j        j	        g           d d d            d S # 1 swxY w Y   d S d S r  r  rq   s     rJ   ro   z#TFMobileBertForMultipleChoice.build?  r  r  r  )r   r  r   r  r   r  r   r  r   r  r   r  r   r  rV  r  r_  r  r*   r  r   r  r-   r8  rf   )rM   rN   rO   r  r  rY   r   r!   r  r  r   r  r
   r  ri   ro   ru   rv   s   @rJ   r6  r6    s       * * *& (2l#     **#**+UVV   &/$   .28<8<6:377;)-,0#'04 %?
 ?
 ?
 ?
   ]?
B	M 	M 	M 	M 	M 	M 	M 	MrL   r6  z
    MobileBert Model with a token classification head on top (a linear layer on top of the hidden-states output) e.g.
    for Named-Entity-Recognition (NER) tasks.
    c            	           e Zd Zg dZdgZ fdZe ee	                    d                     e
eeeee          	 	 	 	 	 	 	 	 	 	 	 ddd                                    ZddZ xZS )"TFMobileBertForTokenClassificationr   r   c                    t                      j        |g|R i | |j        | _        t          |dd          | _        |j        |j        n|j        }t          j        	                    |          | _
        t          j                            |j        t          |j                  d          | _        || _        d S )NFr  r  r  r   r  r  s        rJ   rY   z+TFMobileBertForTokenClassification.__init__]  s    3&333F333 +/%Vbccc)/)B)NF%%TZTn 	 |++,>??,,,/&BZ2[2[bn - 
 
 rL   r  r  NFr   r  r   r  r   r   r   r   r   r  rV  r_  r*   r   r-   tuple | TFTokenClassifierOutputc                J   |                      |||||||||	|
  
        }|d         }|                     ||          }|                     |          }|
dn|                     |
|          }|	s|f|dd         z   }||f|z   n|S t	          |||j        |j                  S )z
        labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
        r  r   r   Nr   r  )r  r   r  rK   r   rh   r]  )r@   r   r   r   r   r   r   r   rV  r_  r*   r   r   r  r,   r  r   s                    rJ   ri   z'TFMobileBertForTokenClassification.callk  s    6 //))%'/!5# " 
 
 "!*,,,JJ11~tt4+?+?+O+O 	FY,F)-)9TGf$$vE&!/)	
 
 
 	
rL   c                   | j         rd S d| _         t          | dd           Pt          j        | j        j                  5  | j                            d            d d d            n# 1 swxY w Y   t          | dd           `t          j        | j        j                  5  | j                            d d | j        j	        g           d d d            d S # 1 swxY w Y   d S d S r  r  rq   s     rJ   ro   z(TFMobileBertForTokenClassification.build  r  r  r  )r   r  r   r  r   r  r   r  r   r  r   r  r   r  rV  r  r_  r  r*   r  r   r  r-   rF  rf   )rM   rN   rO   r  r  rY   r   r!   r  r  r   $_CHECKPOINT_FOR_TOKEN_CLASSIFICATIONr   r  _TOKEN_CLASS_EXPECTED_OUTPUT_TOKEN_CLASS_EXPECTED_LOSSri   ro   ru   rv   s   @rJ   rD  rD  K  s       * * *& (2l#     **+F+M+MNk+l+lmm7+$40   .28<8<6:377;)-,0#'04 %.
 .
 .
 .
  nm ].
`	M 	M 	M 	M 	M 	M 	M 	MrL   rD  )
r  r6  r  r  r  r  rD  r  r  r  )frP   
__future__r   r  dataclassesr   numpynp
tensorflowr:   activations_tfr   modeling_tf_outputsr   r   r	   r
   r   r   r   r   modeling_tf_utilsr   r   r   r   r   r   r   r   r   r   r   r   tf_utilsr   r   r   utilsr   r   r    r!   r"   r#   configuration_mobilebertr%   
get_loggerrM   loggerr  r  rI  rJ  rK  r0  r3  r4  r1  r2  r  r  r  r)   rZ   LayerrS   LayerNormalizationrx   r   r   r   r   r   r   r  r  r  r'  r3  r8  r?  rS  re  rq  rw  r  r  r  r  MOBILEBERT_START_DOCSTRINGr  r  r  r  r  r  r  r  r6  rD  __all__rQ   rL   rJ   <module>r]     s 
      " " " " " "  ! ! ! ! ! !         / / / / / /	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	                            S R R R R R R R R R                7 6 6 6 6 6 
	H	%	%1 $ (L $l !  = '     +E '' ! Q Q Q Q Q Q Q Q8M M M M Mu|1 M M M64 4 4 4 4%,1 4 4 40 0 0 0 0u|! 0 0 0 %
:
:e  e  e  e  e U\/ e  e  e Pb b b b b 2 b b bJ+ + + + +U\/ + + +B%3 %3 %3 %3 %3EL. %3 %3 %3P+ + + + ++ + + +8&, &, &, &, &,+ &, &, &,R+ + + + +* + + +4,+ ,+ ,+ ,+ ,+5<% ,+ ,+ ,+^+ + + + +%,$ + + +43 3 3 3 3# 3 3 3.N& N& N& N& N&* N& N& N&b1& 1& 1& 1& 1&%,, 1& 1& 1&hH H H H H+ H H H@+ + + + +%,*< + + +>. . . . .5<#5 . . .b- - - - -%,, - - -$ ~( ~( ~( ~( ~(EL. ~( ~( ~(B% % % % %"3 % % % / / / / /{ / / /<( T5 p j ., ., ., ., .,3 ., .,	 .,b   d  d  d  d  d !<>Y d  d  d N TVpqq]  ]  ]  ]  ] 9;W ]  ]  rq] @S S S S Sel0 S S S& Z V% V% V% V% V%,GIe V% V%	 V%r   \M \M \M \M \M,GIe \M \M \M~   eM eM eM eM eM'BD[ eM eM eMP   gM gM gM gM gM$?AU gM gM gMT   [M [M [M [M [M)DF_ [M [M [M|  rL   