
     `i                       d Z ddlmZ ddlZddlZddlmZ ddlZ	ddl
mZ ddlmZmZmZmZ ddlmZmZmZmZmZmZ dd	lmZmZ dd
lmZmZmZmZm Z m!Z! ddl"m#Z#  e j$        e%          Z&dZ'dZ(g dZ)dZ*dZ+e G d de                      Z, G d dej-        j.                  Z/ G d dej-        j.                  Z0 G d dej-        j.                  Z1 G d dej-        j.                  Z2 G d dej-        j.                  Z3 G d dej-        j.                  Z4 G d d ej-        j.                  Z5 G d! d"ej-        j.                  Z6 G d# d$ej-        j.                  Z7e G d% d&ej-        j.                              Z8 G d' d(e          Z9d)Z:d*Z; ed+e:           G d, d-e9                      Z< G d. d/ej-        j.                  Z= G d0 d1ej-        j.                  Z> G d2 d3ej-        j.                  Z? ed4e:           G d5 d6e9                      Z@ ed7e:           G d8 d9e9e                      ZA ed:e:           G d; d<e9                      ZBg d=ZCdS )>zTensorFlow DeiT model.    )annotationsN)	dataclass   )get_tf_activation)TFBaseModelOutputTFBaseModelOutputWithPoolingTFImageClassifierOutputTFMaskedImageModelingOutput)TFPreTrainedModelTFSequenceClassificationLossget_initializerkeraskeras_serializableunpack_inputs)
shape_liststable_softmax)ModelOutputadd_code_sample_docstringsadd_start_docstrings%add_start_docstrings_to_model_forwardloggingreplace_return_docstrings   )
DeiTConfigr   z(facebook/deit-base-distilled-patch16-224)r      i   ztabby, tabby catc                  Z    e Zd ZU dZdZded<   dZded<   dZded<   dZded<   dZ	ded	<   dS )
-TFDeiTForImageClassificationWithTeacherOutputa  
    Output type of [`DeiTForImageClassificationWithTeacher`].

    Args:
        logits (`tf.Tensor` of shape `(batch_size, config.num_labels)`):
            Prediction scores as the average of the cls_logits and distillation logits.
        cls_logits (`tf.Tensor` of shape `(batch_size, config.num_labels)`):
            Prediction scores of the classification head (i.e. the linear layer on top of the final hidden state of the
            class token).
        distillation_logits (`tf.Tensor` of shape `(batch_size, config.num_labels)`):
            Prediction scores of the distillation head (i.e. the linear layer on top of the final hidden state of the
            distillation token).
        hidden_states (`tuple(tf.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `tf.Tensor` (one for the output of the embeddings + one for the output of each layer) of shape
            `(batch_size, sequence_length, hidden_size)`. Hidden-states of the model at the output of each layer plus
            the initial embedding outputs.
        attentions (`tuple(tf.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `tf.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`. Attentions weights after the attention softmax, used to compute the weighted average in
            the self-attention heads.
    Ntf.Tensor | Nonelogits
cls_logitsdistillation_logitsztuple[tf.Tensor] | Nonehidden_states
attentions)
__name__
__module____qualname____doc__r   __annotations__r    r!   r"   r#        }/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/models/deit/modeling_tf_deit.pyr   r   B   st          ,  $F#####'J'''',00000-1M1111*.J......r*   r   c                  F     e Zd ZdZdd fd	ZddZddZ	 	 	 dddZ xZS )TFDeiTEmbeddingszv
    Construct the CLS token, distillation token, position and patch embeddings. Optionally, also the mask token.
    Fconfigr   use_mask_tokenboolreturnNonec                     t                      j        di | || _        || _        t	          |d          | _        t          j                            |j	        d          | _
        d S )Npatch_embeddings)r.   namedropoutr5   r)   )super__init__r.   r/   TFDeiTPatchEmbeddingsr4   r   layersDropouthidden_dropout_probr6   )selfr.   r/   kwargs	__class__s       r+   r9   zTFDeiTEmbeddings.__init__f   sh    ""6""", 5VJ\ ] ] ]|++F,FY+WWr*   Nc                "   |                      dd| j        j        ft          j                                        dd          | _        |                      dd| j        j        ft          j                                        dd          | _        d | _        | j	        rG|                      dd| j        j        ft          j                                        dd          | _        | j
        j        }|                      d|dz   | j        j        ft          j                                        dd          | _        | j        rd S d| _        t          | d	d           Pt          j        | j
        j                  5  | j
                            d            d d d            n# 1 swxY w Y   t          | d
d           St          j        | j        j                  5  | j                            d            d d d            d S # 1 swxY w Y   d S d S )Nr   T	cls_token)shapeinitializer	trainabler5   distillation_token
mask_token   position_embeddingsr4   r6   )
add_weightr.   hidden_sizer   initializerszerosrB   rF   rG   r/   r4   num_patchesrI   builtgetattrtf
name_scoper5   buildr6   )r>   input_shaperN   s      r+   rS   zTFDeiTEmbeddings.buildm   s   a01*0022	 ) 
 
 #'//a01*0022%	 #2 #
 #
  	"oo!T[45!.4466!	 .  DO +7#'??kAot{'>?*0022&	 $3 $
 $
  : 	F
4+T22>t49:: 2 2%++D1112 2 2 2 2 2 2 2 2 2 2 2 2 2 24D))5t|011 ) )""4((() ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) 65s$   9F  F$'F$HH	H
embeddings	tf.Tensorheightintwidthc           
     d   |j         d         dz
  }| j        j         d         dz
  }||k    r||k    r| j        S | j        d d dd d f         }| j        d d dd d f         }| j        d d dd d d f         }|j         d         }	|| j        j        z  }
|| j        j        z  }|
dz   |dz   }}
t	          j        |dt          t          j        |                    t          t          j        |                    |	f          }t          j	        
                    |t          |
          t          |          fd          }t	          j        |g d	          }t	          j        |dd|	f          }t	          j        t	          j        |d
          t	          j        |d
          |gd
          S )Nr   rH   r   g?bicubic)sizemethodr   rH   r   r   permaxis)rC   rI   r.   
patch_sizerQ   reshaperX   mathsqrtimageresize	transposeconcatexpand_dims)r>   rU   rW   rY   rN   num_positionsclass_pos_embeddist_pos_embedpatch_pos_embeddimh0w0s               r+   interpolate_pos_encodingz)TFDeiTEmbeddings.interpolate_pos_encoding   s    &q)A-06q9A=-''FeOO++2111a7;1!!!Q':2111abb!!!8<r"t{--dk,, c28B*aTY}%=%=!>!>DImD\D\@]@]_bc
 
 (///R#b''@R[d/ee,\\\JJJ*_q"clCCy^O!444bn^Z[6\6\6\^mnuv
 
 
 	
r*   pixel_valuesbool_masked_posr   trainingrt   c                D   |j         \  }}}}|                     |          }t          |          \  }	}
}|\t          j        | j        |	|
dg          }t          j        |d          }t          j        ||j                  }|d|z
  z  ||z  z   }t          j	        | j
        |	d          }t          j	        | j        |	d          }t          j        |||fd          }| j        }|r|                     |||          }||z   }|                     ||          }|S )	Nr   r[   rb   dtypeg      ?r   )repeatsrc   rw   )rC   r4   r   rQ   tilerG   rl   castrz   repeatrB   rF   rk   rI   rt   r6   )r>   ru   rv   rw   rt   _rW   rY   rU   
batch_size
seq_lengthmask_tokensmask
cls_tokensdistillation_tokensposition_embeddings                   r+   callzTFDeiTEmbeddings.call   s@    +065!**<88
$.z$:$:!
J&'$/J
A3NOOK>/;;;D74{'8999D#sTz2[45GGJYt~zJJJ
 i(?Z[\\\Y
,?LSTUUU
!5# 	Z!%!>!>z6SX!Y!Y"44
\\*x\@@
r*   F)r.   r   r/   r0   r1   r2   N)rU   rV   rW   rX   rY   rX   r1   rV   )NFF)
ru   rV   rv   r   rw   r0   rt   r0   r1   rV   )	r$   r%   r&   r'   r9   rS   rt   r   __classcell__r@   s   @r+   r-   r-   a   s         X X X X X X X%) %) %) %)N
 
 
 
< -1).        r*   r-   c                  4     e Zd ZdZd fdZdd	ZddZ xZS )r:   z
    This class turns `pixel_values` of shape `(batch_size, num_channels, height, width)` into the initial
    `hidden_states` (patch embeddings) of shape `(batch_size, seq_length, hidden_size)` to be consumed by a
    Transformer.
    r.   r   r1   r2   c                    t                      j        di | |j        |j        }}|j        |j        }}t          |t          j        j	                  r|n||f}t          |t          j        j	                  r|n||f}|d         |d         z  |d         |d         z  z  }|| _        || _        || _        || _
        t          j                            |||d          | _        d S )Nr   r   
projection)kernel_sizestridesr5   r)   )r8   r9   
image_sizerd   num_channelsrK   
isinstancecollectionsabcIterablerN   r   r;   Conv2Dr   )	r>   r.   r?   r   rd   r   rK   rN   r@   s	           r+   r9   zTFDeiTPatchEmbeddings.__init__   s    ""6"""!'!2F4EJ
$*$79Kk#-j+/:R#S#SqZZZdfpYq
#-j+/:R#S#SqZZZdfpYq
!!}
15*Q-:VW=:XY$$(&,--Z, . 
 
r*   ru   rV   c                   t          |          \  }}}}t          j                    r|| j        k    rt	          d          |                     |          }t          |          \  }}}}t          j        ||||z  |f          }|S )NzeMake sure that the channel dimension of the pixel values match with the one set in the configuration.)r   rQ   executing_eagerlyr   
ValueErrorr   re   )r>   ru   r   rW   rY   r   xs          r+   r   zTFDeiTPatchEmbeddings.call   s    2<\2J2J/
FE<!! 	ld6G&G&Gw   OOL))2<Q--/
FE<Jq:v~|DEEr*   Nc                    | j         rd S d| _         t          | dd           \t          j        | j        j                  5  | j                            d d d | j        g           d d d            d S # 1 swxY w Y   d S d S )NTr   )rO   rP   rQ   rR   r   r5   rS   r   r>   rT   s     r+   rS   zTFDeiTPatchEmbeddings.build   s    : 	F
4t,,8t344 M M%%tT49J&KLLLM M M M M M M M M M M M M M M M M M 98s    $A11A58A5r.   r   r1   r2   )ru   rV   r1   rV   r   r$   r%   r&   r'   r9   r   rS   r   r   s   @r+   r:   r:      sw         
 
 
 
 
 
"
 
 
 
M M M M M M M Mr*   r:   c                  <     e Zd Zd fdZdd	Z	 dddZddZ xZS )TFDeiTSelfAttentionr.   r   c                4    t                      j        d
i | |j        |j        z  dk    r t	          d|j         d|j         d          |j        | _        t          |j        |j        z            | _        | j        | j        z  | _        t          j	        | j                  | _
        t          j                            | j        t          |j                  d          | _        t          j                            | j        t          |j                  d          | _        t          j                            | j        t          |j                  d          | _        t          j                            |j        	          | _        || _        d S )Nr   zThe hidden size (z6) is not a multiple of the number of attention heads ()queryunitskernel_initializerr5   keyvaluerater)   )r8   r9   rK   num_attention_headsr   rX   attention_head_sizeall_head_sizerf   rg   sqrt_att_head_sizer   r;   Denser   initializer_ranger   r   r   r<   attention_probs_dropout_probr6   r.   r>   r.   r?   r@   s      r+   r9   zTFDeiTSelfAttention.__init__   s   ""6""" ::a??EF$6 E E'-'AE E E  
 $*#= #&v'9F<V'V#W#W !58PP"&)D,D"E"E\''$Ia9b9bip ( 
 

 <%%$Ia9b9bin & 
 
 \''$Ia9b9bip ( 
 

 |++1T+UUr*   tensorrV   r   rX   r1   c                z    t          j        ||d| j        | j        f          }t          j        |g d          S )Nr[   r   rC   r   rH   r   r   r`   )rQ   re   r   r   rj   )r>   r   r   s      r+   transpose_for_scoresz(TFDeiTSelfAttention.transpose_for_scores  sA    6*b$BZ\`\t1uvvv |F6666r*   Fr"   	head_maskoutput_attentionsr0   rw   tuple[tf.Tensor]c                   t          |          d         }|                     |          }|                     |          }|                     |          }|                     ||          }	|                     ||          }
|                     ||          }t          j        |	|
d          }t          j        | j        |j	                  }t          j
        ||          }t          |d          }|                     ||          }|t          j        ||          }t          j        ||          }t          j        |g d	
          }t          j        ||d| j        f          }|r||fn|f}|S )Nr   inputsT)transpose_bry   r[   )r   rc   r   rw   r   r`   r   )r   r   r   r   r   rQ   matmulr~   r   rz   divider   r6   multiplyrj   re   r   )r>   r"   r   r   rw   r   mixed_query_layermixed_key_layermixed_value_layerquery_layer	key_layervalue_layerattention_scoresdkattention_probsattention_outputoutputss                    r+   r   zTFDeiTSelfAttention.call  s     ..q1
 JJmJ<<((-(88 JJmJ<<//0A:NN--ozJJ	//0A:NN 9[)NNNWT,4D4JKKK9%5r:: )0@rJJJ ,,o,QQ   k/9EEO9_kBB<(8|||LLL :-=jRTVZVhEijjj9Jc#_55QaPcr*   Nc                   | j         rd S d| _         t          | dd           ]t          j        | j        j                  5  | j                            d d | j        j        g           d d d            n# 1 swxY w Y   t          | dd           ]t          j        | j	        j                  5  | j	                            d d | j        j        g           d d d            n# 1 swxY w Y   t          | dd           `t          j        | j
        j                  5  | j
                            d d | j        j        g           d d d            d S # 1 swxY w Y   d S d S )NTr   r   r   )rO   rP   rQ   rR   r   r5   rS   r.   rK   r   r   r   s     r+   rS   zTFDeiTSelfAttention.buildG  s:   : 	F
4$''3tz// H H
  $dk.E!FGGGH H H H H H H H H H H H H H H4%%1tx}-- F FdDK,CDEEEF F F F F F F F F F F F F F F4$''3tz// H H
  $dk.E!FGGGH H H H H H H H H H H H H H H H H H 43s6    (A44A8;A8.(C""C&)C&(EEEr.   r   )r   rV   r   rX   r1   rV   r   
r"   rV   r   rV   r   r0   rw   r0   r1   r   r   )r$   r%   r&   r9   r   r   rS   r   r   s   @r+   r   r      s             47 7 7 7 ' ' ' ' 'RH H H H H H H Hr*   r   c                  6     e Zd ZdZd fdZdddZddZ xZS )TFDeiTSelfOutputz
    The residual connection is defined in TFDeiTLayer instead of here (as is the case with other models), due to the
    layernorm applied before each block.
    r.   r   c                    t                      j        di | t          j                            |j        t          |j                  d          | _        t          j        	                    |j
                  | _        || _        d S Ndenser   r   r)   r8   r9   r   r;   r   rK   r   r   r   r<   r=   r6   r.   r   s      r+   r9   zTFDeiTSelfOutput.__init__]  ~    ""6"""\''$Ia9b9bip ( 
 

 |++1K+LLr*   Fr"   rV   input_tensorrw   r0   r1   c                `    |                      |          }|                     ||          }|S Nr   r   r   r6   r>   r"   r   rw   s       r+   r   zTFDeiTSelfOutput.callf  s0    

-
88MHMMr*   Nc                   | j         rd S d| _         t          | dd           `t          j        | j        j                  5  | j                            d d | j        j        g           d d d            d S # 1 swxY w Y   d S d S NTr   	rO   rP   rQ   rR   r   r5   rS   r.   rK   r   s     r+   rS   zTFDeiTSelfOutput.buildl      : 	F
4$''3tz// H H
  $dk.E!FGGGH H H H H H H H H H H H H H H H H H 43    (A55A9<A9r   r   r"   rV   r   rV   rw   r0   r1   rV   r   r   r   s   @r+   r   r   W  s|         
         H H H H H H H Hr*   r   c                  :     e Zd Zd fdZd Z	 dddZddZ xZS )TFDeiTAttentionr.   r   c                     t                      j        di | t          |d          | _        t	          |d          | _        d S )N	attentionr7   outputr)   )r8   r9   r   self_attentionr   dense_outputr   s      r+   r9   zTFDeiTAttention.__init__w  sP    ""6"""1&{KKK,V(CCCr*   c                    t           r   NotImplementedError)r>   headss     r+   prune_headszTFDeiTAttention.prune_heads}  s    !!r*   Fr   rV   r   r   r0   rw   r1   r   c                    |                      ||||          }|                     |d         ||          }|f|dd          z   }|S )Nr"   r   r   rw   r   r"   r   rw   r   )r   r   )r>   r   r   r   rw   self_outputsr   r   s           r+   r   zTFDeiTAttention.call  sn     **&)O`ks + 
 
  ,,&q/x - 
 
 $%QRR(88r*   Nc                   | j         rd S d| _         t          | dd           Pt          j        | j        j                  5  | j                            d            d d d            n# 1 swxY w Y   t          | dd           St          j        | j        j                  5  | j                            d            d d d            d S # 1 swxY w Y   d S d S )NTr   r   )rO   rP   rQ   rR   r   r5   rS   r   r   s     r+   rS   zTFDeiTAttention.build  sX   : 	F
4)400<t2788 0 0#))$///0 0 0 0 0 0 0 0 0 0 0 0 0 0 04..:t0566 . .!''---. . . . . . . . . . . . . . . . . . ;:$    A''A+.A+!C		CCr   r   )
r   rV   r   rV   r   r0   rw   r0   r1   r   r   )r$   r%   r&   r9   r   r   rS   r   r   s   @r+   r   r   v  s        D D D D D D" " "     "	. 	. 	. 	. 	. 	. 	. 	.r*   r   c                  0     e Zd Zd
 fdZddZdd	Z xZS )TFDeiTIntermediater.   r   c                D    t                      j        di | t          j                            |j        t          |j                  d          | _        t          |j
        t                    rt          |j
                  | _        n|j
        | _        || _        d S )Nr   r   r)   )r8   r9   r   r;   r   intermediate_sizer   r   r   r   
hidden_actstrr   intermediate_act_fnr.   r   s      r+   r9   zTFDeiTIntermediate.__init__  s    ""6"""\''*vOg?h?hov ( 
 

 f'-- 	9'89J'K'KD$$'-'8D$r*   r"   rV   r1   c                \    |                      |          }|                     |          }|S )Nr   )r   r   )r>   r"   s     r+   r   zTFDeiTIntermediate.call  s.    

-
8800??r*   Nc                   | j         rd S d| _         t          | dd           `t          j        | j        j                  5  | j                            d d | j        j        g           d d d            d S # 1 swxY w Y   d S d S r   r   r   s     r+   rS   zTFDeiTIntermediate.build  r   r   r   r"   rV   r1   rV   r   r$   r%   r&   r9   r   rS   r   r   s   @r+   r   r     sm                H H H H H H H Hr*   r   c                  2     e Zd Zd fdZdddZddZ xZS )TFDeiTOutputr.   r   c                    t                      j        di | t          j                            |j        t          |j                  d          | _        t          j        	                    |j
                  | _        || _        d S r   r   r   s      r+   r9   zTFDeiTOutput.__init__  r   r*   Fr"   rV   r   rw   r0   r1   c                j    |                      |          }|                     ||          }||z   }|S r   r   r   s       r+   r   zTFDeiTOutput.call  s:    

-
88MHMM%4r*   Nc                   | j         rd S d| _         t          | dd           `t          j        | j        j                  5  | j                            d d | j        j        g           d d d            d S # 1 swxY w Y   d S d S r   )	rO   rP   rQ   rR   r   r5   rS   r.   r   r   s     r+   rS   zTFDeiTOutput.build  s    : 	F
4$''3tz// N N
  $dk.K!LMMMN N N N N N N N N N N N N N N N N N 43r   r   r   r   r   r  r   s   @r+   r  r    sr                 N N N N N N N Nr*   r  c                  8     e Zd ZdZd fdZ	 dddZddZ xZS )TFDeiTLayerz?This corresponds to the Block class in the timm implementation.r.   r   c                x    t                      j        di | t          |d          | _        t	          |d          | _        t          |d          | _        t          j	        
                    |j        d          | _        t          j	        
                    |j        d          | _        || _        d S )	Nr   r7   intermediater   layernorm_beforeepsilonr5   layernorm_afterr)   )r8   r9   r   r   r   r
  r  deit_outputr   r;   LayerNormalizationlayer_norm_epsr  r  r.   r   s      r+   r9   zTFDeiTLayer.__init__  s    ""6"""(kBBB.vNKKK'X>>> % ? ?H]dv ? w w$|>>vG\ct>uur*   Fr"   rV   r   r   r0   rw   r1   r   c                $   |                      |                     ||          |||          }|d         }||z   }|                     ||          }|                     ||          }|                     |||          }|f|dd          z   }	|	S )Nr   )r   r   r   rw   r   )r"   rw   r   r   )r   r  r  r
  r  )
r>   r"   r   r   rw   attention_outputsr   layer_outputintermediate_outputr   s
             r+   r   zTFDeiTLayer.call  s     !NN..mh.WW/ + 
 
 -Q/ )=8 ++=8+TT"//lU]/^^ ''-MT\ ( 
 
  /$5abb$99r*   Nc                *   | j         rd S d| _         t          | dd           Pt          j        | j        j                  5  | j                            d            d d d            n# 1 swxY w Y   t          | dd           Pt          j        | j        j                  5  | j                            d            d d d            n# 1 swxY w Y   t          | dd           Pt          j        | j        j                  5  | j                            d            d d d            n# 1 swxY w Y   t          | dd           ]t          j        | j	        j                  5  | j	                            d d | j
        j        g           d d d            n# 1 swxY w Y   t          | dd           `t          j        | j        j                  5  | j                            d d | j
        j        g           d d d            d S # 1 swxY w Y   d S d S )NTr   r
  r  r  r  )rO   rP   rQ   rR   r   r5   rS   r
  r  r  r.   rK   r  r   s     r+   rS   zTFDeiTLayer.build  sP   : 	F
4d++7t~233 + +$$T***+ + + + + + + + + + + + + + +4..:t0566 . .!''---. . . . . . . . . . . . . . .4--9t/455 - - &&t,,,- - - - - - - - - - - - - - -4+T22>t49:: S S%++T49P,QRRRS S S S S S S S S S S S S S S4*D11=t3899 R R$**D$8O+PQQQR R R R R R R R R R R R R R R R R R >=sZ    A''A+.A+!CCCD))D-0D-#(FFF(HH
H
r   r   r   r   r   r   s   @r+   r  r    s~        II	 	 	 	 	 	      @R R R R R R R Rr*   r  c                  4     e Zd Zd fdZ	 dddZddZ xZS )TFDeiTEncoderr.   r   c                     t                      j        di | fdt          j                  D             | _        d S )Nc                8    g | ]}t          d |           S )zlayer_._r7   )r  ).0ir.   s     r+   
<listcomp>z*TFDeiTEncoder.__init__.<locals>.<listcomp>  s,    hhh1k&~!~~>>>hhhr*   r)   )r8   r9   rangenum_hidden_layerslayerr   s    ` r+   r9   zTFDeiTEncoder.__init__  sK    ""6"""hhhhfNfHgHghhh


r*   Fr"   rV   r   r   r0   output_hidden_statesreturn_dictrw   r1   $TFBaseModelOutput | tuple[tf.Tensor]c                $   |rdnd }|rdnd }t          | j                  D ]8\  }	}
|r||fz   } |
|||	         ||          }|d         }|r||d         fz   }9|r||fz   }|st          d |||fD                       S t          |||          S )Nr)   r   r   r   c              3     K   | ]}||V  	d S r   r)   )r  vs     r+   	<genexpr>z%TFDeiTEncoder.call.<locals>.<genexpr>@  s(      hhqZ[ZgZgZgZgZghhr*   )last_hidden_stater"   r#   )	enumerater   tupler   )r>   r"   r   r   r!  r"  rw   all_hidden_statesall_attentionsr  layer_modulelayer_outputss               r+   r   zTFDeiTEncoder.call   s    #7@BBD0:d(44 	F 	FOA|# I$58H$H!(L+#A,"3!	  M *!,M  F!/=3C2E!E   	E 1]4D D 	ihh]4E~$Vhhhhhh +;LYg
 
 
 	
r*   Nc                    | j         rd S d| _         t          | dd           P| j        D ]J}t          j        |j                  5  |                    d            d d d            n# 1 swxY w Y   Id S d S )NTr   )rO   rP   r   rQ   rR   r5   rS   )r>   rT   r   s      r+   rS   zTFDeiTEncoder.buildF  s    : 	F
4$''3 & &]5:.. & &KK%%%& & & & & & & & & & & & & & & 43& &s   A&&A*	-A*	r   r   )r"   rV   r   rV   r   r0   r!  r0   r"  r0   rw   r0   r1   r#  r   r  r   s   @r+   r  r    sv        i i i i i i $
 $
 $
 $
 $
L& & & & & & & &r*   r  c                  n     e Zd ZeZ	 dd fd
ZddZd Zd Ze		 	 	 	 	 	 	 	 d d!d            Z
d"dZ xZS )#TFDeiTMainLayerTFr.   r   add_pooling_layerr0   r/   r1   r2   c                ,    t                      j        di | || _        t          ||d          | _        t          |d          | _        t          j        	                    |j
        d          | _        |rt          |d          nd | _        d S )	NrU   )r/   r5   encoderr7   	layernormr  poolerr)   )r8   r9   r.   r-   rU   r  r4  r   r;   r  r  r5  TFDeiTPoolerr6  r>   r.   r2  r/   r?   r@   s        r+   r9   zTFDeiTMainLayer.__init__T  s     	""6"""*6.Wcddd$V)<<<88AV]h8ii=NXl69999TXr*   r:   c                    | j         j        S r   )rU   r4   )r>   s    r+   get_input_embeddingsz$TFDeiTMainLayer.get_input_embeddings`  s    //r*   c                    t           )z
        Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
        class PreTrainedModel
        r   )r>   heads_to_prunes     r+   _prune_headszTFDeiTMainLayer._prune_headsc  s
    
 "!r*   c                8    |t           d g| j        j        z  }|S r   )r   r.   r  )r>   r   s     r+   get_head_maskzTFDeiTMainLayer.get_head_maskj  s%     %%!>>Ir*   Nru   r   rv   r   r   bool | Noner!  r"  rt   rw   4TFBaseModelOutputWithPooling | tuple[tf.Tensor, ...]c	                (   ||n| j         j        }||n| j         j        }||n| j         j        }|t	          d          t          j        |d          }|                     |          }|                     ||||          }	| 	                    |	|||||          }
|
d         }| 
                    ||          }| j        |                     ||          nd }|s|||fn|f}||
dd          z   S t          |||
j        |
j                  S )	Nz You have to specify pixel_valuesr_   )rv   rw   rt   )r   r   r!  r"  rw   r   r|   r   )r(  pooler_outputr"   r#   )r.   r   r!  use_return_dictr   rQ   rj   r?  rU   r4  r5  r6  r   r"   r#   )r>   ru   rv   r   r   r!  r"  rt   rw   embedding_outputencoder_outputssequence_outputpooled_outputhead_outputss                 r+   r   zTFDeiTMainLayer.callr  sx    2C1N--TXT_Tq$8$D  $+Jj 	 &1%<kk$+B]?@@@ |L,?? &&y11	??+%=	 + 
 
 ,,/!5# ' 
 
 *!,..8.LLKO;KbOhGGGhl 	6?L?XO];;_n^pL/!"""555+-')7&1	
 
 
 	
r*   c                N   | j         rd S d| _         t          | dd           Pt          j        | j        j                  5  | j                            d            d d d            n# 1 swxY w Y   t          | dd           Pt          j        | j        j                  5  | j                            d            d d d            n# 1 swxY w Y   t          | dd           ]t          j        | j        j                  5  | j                            d d | j	        j
        g           d d d            n# 1 swxY w Y   t          | dd           St          j        | j        j                  5  | j                            d            d d d            d S # 1 swxY w Y   d S d S )NTrU   r4  r5  r6  )rO   rP   rQ   rR   rU   r5   rS   r4  r5  r.   rK   r6  r   s     r+   rS   zTFDeiTMainLayer.build  s   : 	F
4t,,8t344 , ,%%d+++, , , , , , , , , , , , , , ,4D))5t|011 ) )""4((() ) ) ) ) ) ) ) ) ) ) ) ) ) )4d++7t~233 L L$$dD$+2I%JKKKL L L L L L L L L L L L L L L44((4t{/00 ( (!!$'''( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( 54sH    A''A+.A+!CCC(D66D:=D:0FFFTFr.   r   r2  r0   r/   r0   r1   r2   )r1   r:   NNNNNNFF)ru   r   rv   r   r   r   r   r@  r!  r@  r"  r@  rt   r0   rw   r0   r1   rA  r   )r$   r%   r&   r   config_classr9   r:  r=  r?  r   r   rS   r   r   s   @r+   r1  r1  P  s        L Z_
Y 
Y 
Y 
Y 
Y 
Y 
Y0 0 0 0" " "    *.,0&*)-,0#').;
 ;
 ;
 ;
 ];
z( ( ( ( ( ( ( (r*   r1  c                      e Zd ZdZeZdZdZdS )TFDeiTPreTrainedModelz
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    deitru   N)r$   r%   r&   r'   r   rN  base_model_prefixmain_input_namer)   r*   r+   rP  rP    s*         
 L$OOOr*   rP  aR  
    This model is a TensorFlow
    [keras.layers.Layer](https://www.tensorflow.org/api_docs/python/tf/keras/layers/Layer). Use it as a regular
    TensorFlow Module and refer to the TensorFlow documentation for all matter related to general usage and behavior.

    Parameters:
        config ([`DeiTConfig`]): Model configuration class with all the parameters of the model.
            Initializing with a config file does not load the weights associated with the model, only the
            configuration. Check out the [`~PreTrainedModel.from_pretrained`] method to load the model weights.
a  
    Args:
        pixel_values (`tf.Tensor` of shape `(batch_size, num_channels, height, width)`):
            Pixel values. Pixel values can be obtained using [`AutoImageProcessor`]. See
            [`DeiTImageProcessor.__call__`] for details.

        head_mask (`tf.Tensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
            Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`:

            - 1 indicates the head is **not masked**,
            - 0 indicates the head is **masked**.

        output_attentions (`bool`, *optional*):
            Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
            tensors for more detail.
        output_hidden_states (`bool`, *optional*):
            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
            more detail.
        interpolate_pos_encoding (`bool`, *optional*, defaults to `False`):
            Whether to interpolate the pre-trained position encodings.
        return_dict (`bool`, *optional*):
            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
z^The bare DeiT Model transformer outputting raw hidden-states without any specific head on top.c            	           e Zd Z	 dd fd
Ze ee           eee	e
de          	 	 	 	 	 	 	 	 ddd                                    ZddZ xZS ) TFDeiTModelTFr.   r   r2  r0   r/   r1   r2   c                l     t                      j        |fi | t          |||d          | _        d S )NrQ  r2  r/   r5   )r8   r9   r1  rQ  r8  s        r+   r9   zTFDeiTModel.__init__  sI     	**6***#&7]c
 
 
			r*   vision)
checkpointoutput_typerN  modalityexpected_outputNru   r   rv   r   r   r@  r!  r"  rt   rw   $tuple | TFBaseModelOutputWithPoolingc	           
     @    |                      ||||||||          }	|	S )N)ru   rv   r   r   r!  r"  rt   rw   )rQ  )
r>   ru   rv   r   r   r!  r"  rt   rw   r   s
             r+   r   zTFDeiTModel.call   s;    ( ))%+/!5#%=  	
 	
 r*   c                    | j         rd S d| _         t          | dd           St          j        | j        j                  5  | j                            d            d d d            d S # 1 swxY w Y   d S d S )NTrQ  )rO   rP   rQ   rR   rQ  r5   rS   r   s     r+   rS   zTFDeiTModel.build   s    : 	F
4&&2ty~.. & &	%%%& & & & & & & & & & & & & & & & & & 32s    A((A,/A,rK  rL  rM  )ru   r   rv   r   r   r   r   r@  r!  r@  r"  r@  rt   r0   rw   r0   r1   r]  r   )r$   r%   r&   r9   r   r   DEIT_INPUTS_DOCSTRINGr   _CHECKPOINT_FOR_DOCr   _CONFIG_FOR_DOC_EXPECTED_OUTPUT_SHAPEr   rS   r   r   s   @r+   rU  rU    s         Z_
 
 
 
 
 
 
 **+@AA&0$.   *.,0&*)-,0#').     BA ].& & & & & & & &r*   rU  c                  0     e Zd Zd
 fdZddZdd	Z xZS )r7  r.   r   c                     t                      j        di | t          j                            |j        t          |j                  |j        d          | _	        || _
        d S )Nr   )r   r   
activationr5   r)   )r8   r9   r   r;   r   pooler_output_sizer   r   
pooler_actr   r.   r   s      r+   r9   zTFDeiTPooler.__init__+  sh    ""6"""\''+.v/GHH(	 ( 
 

 r*   r"   rV   r1   c                J    |d d df         }|                      |          }|S )Nr   r   )r   )r>   r"   first_token_tensorrH  s       r+   r   zTFDeiTPooler.call6  s1     +111a40

*<
==r*   Nc                   | j         rd S d| _         t          | dd           `t          j        | j        j                  5  | j                            d d | j        j        g           d d d            d S # 1 swxY w Y   d S d S r   r   r   s     r+   rS   zTFDeiTPooler.build>  r   r   r   r   r   r  r   s   @r+   r7  r7  *  sm        	 	 	 	 	 	   H H H H H H H Hr*   r7  c                  ,     e Zd ZdZd
 fdZdd	Z xZS )TFDeitPixelShufflez0TF layer implementation of torch.nn.PixelShuffleupscale_factorrX   r1   r2   c                     t                      j        di | t          |t                    r|dk     rt	          d|           || _        d S )NrH   z1upscale_factor must be an integer value >= 2 got r)   )r8   r9   r   rX   r   rn  )r>   rn  r?   r@   s      r+   r9   zTFDeitPixelShuffle.__init__J  sc    ""6""".#.. 	c.12D2DaQ_aabbb,r*   r   rV   c                r   |}t          |          \  }}}}| j        dz  t          |z            t          j        fdt                    D             g          }t          j        |t          j        ||dg          d          }t          j        	                    || j        d          }|S )NrH   c                D    g | ]}t                    D ]
}||z  z   S r)   )r  )r  r  jblock_size_squaredoutput_depths      r+   r  z+TFDeitPixelShuffle.call.<locals>.<listcomp>Z  s<    iiiQUZ[gUhUhiiPQa!(((iiiir*   r   r[   )paramsindices
batch_dimsNHWC)
block_sizedata_format)
r   rn  rX   rQ   constantr  gatherr}   nndepth_to_space)	r>   r   r"   r   r   num_input_channelspermutationrs  rt  s	          @@r+   r   zTFDeitPixelShuffle.callP  s    /9-/H/H,
Aq,!0!3-0BBCC
 kiiiii%8J2K2Kiiij
 
 	V`bcUd@e@ertuuu,,]tGZhn,oor*   )rn  rX   r1   r2   )r   rV   r1   rV   )r$   r%   r&   r'   r9   r   r   r   s   @r+   rm  rm  G  sW        ::- - - - - -       r*   rm  c                  2     e Zd Zd fdZdddZddZ xZS )TFDeitDecoderr.   r   r1   r2   c                     t                      j        di | t          j                            |j        dz  |j        z  dd          | _        t          |j        d          | _	        || _
        d S )NrH   r   0)filtersr   r5   1r7   r)   )r8   r9   r   r;   r   encoder_strider   conv2drm  pixel_shuffler.   r   s      r+   r9   zTFDeitDecoder.__init__b  s|    ""6"""l)))1,v/BBPQX[ * 
 
 00ECPPPr*   Fr   rV   rw   r0   c                ^    |}|                      |          }|                     |          }|S r   )r  r  )r>   r   rw   r"   s       r+   r   zTFDeitDecoder.callj  s1    M22**=99r*   Nc                   | j         rd S d| _         t          | dd           ^t          j        | j        j                  5  | j                            d d d | j        j        g           d d d            n# 1 swxY w Y   t          | dd           St          j        | j	        j                  5  | j	                            d            d d d            d S # 1 swxY w Y   d S d S )NTr  r  )
rO   rP   rQ   rR   r  r5   rS   r.   rK   r  r   s     r+   rS   zTFDeitDecoder.buildp  st   : 	F
44((4t{/00 O O!!4tT[5L"MNNNO O O O O O O O O O O O O O O4$//;t1677 / /"((.../ / / / / / / / / / / / / / / / / / <;s$    )A55A9<A9/CCCr   r   )r   rV   rw   r0   r1   rV   r   r  r   s   @r+   r  r  a  sj                 	/ 	/ 	/ 	/ 	/ 	/ 	/ 	/r*   r  z~DeiT Model with a decoder on top for masked image modeling, as proposed in [SimMIM](https://huggingface.co/papers/2111.09886).c                       e Zd Zd fdZe ee           eee	          	 	 	 	 	 	 	 	 ddd                                    Z
ddZ xZS )TFDeiTForMaskedImageModelingr.   r   r1   r2   c                    t                                          |           t          |ddd          | _        t	          |d          | _        d S )NFTrQ  rW  decoderr7   )r8   r9   r1  rQ  r  r  r>   r.   r@   s     r+   r9   z%TFDeiTForMaskedImageModeling.__init__  sM       #FeTX_efff	$V)<<<r*   rZ  rN  NFru   r   rv   r   r   r@  r!  r"  rt   r0   rw   #tuple | TFMaskedImageModelingOutputc	           
        ||n| j         j        }|                     ||||||||          }	|	d         }
|
ddddf         }
t          |
          \  }}}t	          |dz            x}}t          j        |
||||f          }
|                     |
|          }t          j        |d          }d}|U| j         j	        | j         j
        z  }t          j        |d||f          }t          j        || j         j
        d          }t          j        || j         j
        d	          }t          j        |d          }t          j        |t
          j                  }t          j                            t          j        |d
          t          j        |d
                    }t          j        |d          }t          j        ||z            }t          j        |          dz   | j         j        z  }||z  }t          j        |d          }|s|f|	dd         z   }||f|z   n|S t)          |||	j        |	j                  S )a  
        bool_masked_pos (`tf.Tensor` of type bool and shape `(batch_size, num_patches)`):
            Boolean masked positions. Indicates which patches are masked (1) and which aren't (0).

        Returns:

        Examples:
        ```python
        >>> from transformers import AutoImageProcessor, TFDeiTForMaskedImageModeling
        >>> import tensorflow as tf
        >>> from PIL import Image
        >>> import requests

        >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
        >>> image = Image.open(requests.get(url, stream=True).raw)

        >>> image_processor = AutoImageProcessor.from_pretrained("facebook/deit-base-distilled-patch16-224")
        >>> model = TFDeiTForMaskedImageModeling.from_pretrained("facebook/deit-base-distilled-patch16-224")

        >>> num_patches = (model.config.image_size // model.config.patch_size) ** 2
        >>> pixel_values = image_processor(images=image, return_tensors="tf").pixel_values
        >>> # create random boolean mask of shape (batch_size, num_patches)
        >>> bool_masked_pos = tf.cast(tf.random.uniform((1, num_patches), minval=0, maxval=2, dtype=tf.int32), tf.bool)

        >>> outputs = model(pixel_values, bool_masked_pos=bool_masked_pos)
        >>> loss, reconstructed_pixel_values = outputs.loss, outputs.reconstruction
        >>> list(reconstructed_pixel_values.shape)
        [1, 3, 224, 224]
        ```N)rv   r   r   r!  r"  rt   rw   r   r   r[   g      ?r|   )r   r   r   rH   rH   )r   rH   r   r   gh㈵>)r   )lossreconstructionr"   r#   )r.   rD  rQ  r   rX   rQ   re   r  rj   r   rd   r   rl   r~   float32r   lossesmean_absolute_error
reduce_sumr   r
   r"   r#   )r>   ru   rv   r   r   r!  r"  rt   rw   r   rG  r   sequence_lengthr   rW   rY   reconstructed_pixel_valuesmasked_im_lossr]   r   reconstruction_loss
total_lossnum_masked_pixelsr   s                           r+   r   z!TFDeiTForMaskedImageModeling.call  se   V &1%<kk$+B]))+/!5#%=  	
 	
 "!* *!!!QrT'24>4O4O1
O\_c1222*_z65R^6_`` &*\\/H\%U%U" &(\2Ll%[%["&;)T[-CCD j2tT:JKKO9_dk.DaHHD9T4;#91==D>$**D74,,D"',"B"B\<887FF# #
 #%.1Da"H"H':T'ABBJ!#t!4!4t!;t{?W W'*;;NZ==N 	Z02WQRR[@F3A3M^%..SYY*5!/)	
 
 
 	
r*   c                   | j         rd S d| _         t          | dd           Pt          j        | j        j                  5  | j                            d            d d d            n# 1 swxY w Y   t          | dd           St          j        | j        j                  5  | j                            d            d d d            d S # 1 swxY w Y   d S d S )NTrQ  r  )rO   rP   rQ   rR   rQ  r5   rS   r  r   s     r+   rS   z"TFDeiTForMaskedImageModeling.build  sP   : 	F
4&&2ty~.. & &	%%%& & & & & & & & & & & & & & &4D))5t|011 ) )""4((() ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) 65r   r   rM  )ru   r   rv   r   r   r   r   r@  r!  r@  r"  r@  rt   r0   rw   r0   r1   r  r   )r$   r%   r&   r9   r   r   r`  r   r
   rb  r   rS   r   r   s   @r+   r  r  |  s        = = = = = = **+@AA+FUdeee *.,0&*)-,0#').a
 a
 a
 a
 fe BA ]a
F	) 	) 	) 	) 	) 	) 	) 	)r*   r  z
    DeiT Model transformer with an image classification head on top (a linear layer on top of the final hidden state of
    the [CLS] token) e.g. for ImageNet.
    c                       e Zd Zd fdZe ee           eee	          	 	 	 	 	 	 	 	 ddd                                    Z
ddZ xZS )TFDeiTForImageClassificationr.   r   c                L   t                                          |           |j        | _        t          |dd          | _        |j        dk    r&t
          j                            |j        d          n t
          j                            dd          | _	        || _
        d S )NFrQ  r2  r5   r   
classifierr7   linear)r8   r9   
num_labelsr1  rQ  r   r;   r   
Activationr  r.   r  s     r+   r9   z%TFDeiTForImageClassification.__init__  s        +#Fe&QQQ	
  1$$ Lv0|DDD(((EE 	
 r*   r  NFru   r   r   labelsr   r@  r!  r"  rt   r0   rw   r1   #tf.Tensor | TFImageClassifierOutputc	           	     P   ||n| j         j        }|                     |||||||          }	|	d         }
|                     |
dddddf                   }|dn|                     ||          }|s|f|	dd         z   }||f|z   n|S t          |||	j        |	j                  S )a  
        labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).

        Returns:

        Examples:

        ```python
        >>> from transformers import AutoImageProcessor, TFDeiTForImageClassification
        >>> import tensorflow as tf
        >>> from PIL import Image
        >>> import requests

        >>> keras.utils.set_random_seed(3)  # doctest: +IGNORE_RESULT
        >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
        >>> image = Image.open(requests.get(url, stream=True).raw)

        >>> # note: we are loading a TFDeiTForImageClassificationWithTeacher from the hub here,
        >>> # so the head will be randomly initialized, hence the predictions will be random
        >>> image_processor = AutoImageProcessor.from_pretrained("facebook/deit-base-distilled-patch16-224")
        >>> model = TFDeiTForImageClassification.from_pretrained("facebook/deit-base-distilled-patch16-224")

        >>> inputs = image_processor(images=image, return_tensors="tf")
        >>> outputs = model(**inputs)
        >>> logits = outputs.logits
        >>> # model predicts one of the 1000 ImageNet classes
        >>> predicted_class_idx = tf.math.argmax(logits, axis=-1)[0]
        >>> print("Predicted class:", model.config.id2label[int(predicted_class_idx)])
        Predicted class: little blue heron, Egretta caerulea
        ```Nr   r   r!  r"  rt   rw   r   r   )r  r   r"   r#   )r.   rD  rQ  r  hf_compute_lossr	   r"   r#   )r>   ru   r   r  r   r!  r"  rt   rw   r   rG  r   r  r   s                 r+   r   z!TFDeiTForImageClassification.call  s    ^ &1%<kk$+B]))/!5#%=  
 
 "!*Aqqq!9:: ~tt4+?+?+O+O 	FY,F)-)9TGf$$vE&!/)	
 
 
 	
r*   c                   | j         rd S d| _         t          | dd           Pt          j        | j        j                  5  | j                            d            d d d            n# 1 swxY w Y   t          | dd           `t          j        | j        j                  5  | j                            d d | j        j	        g           d d d            d S # 1 swxY w Y   d S d S )NTrQ  r  )
rO   rP   rQ   rR   rQ  r5   rS   r  r.   rK   r   s     r+   rS   z"TFDeiTForImageClassification.build]  sp   : 	F
4&&2ty~.. & &	%%%& & & & & & & & & & & & & & &4t,,8t344 M M%%tT4;3J&KLLLM M M M M M M M M M M M M M M M M M 98s$    A''A+.A+!(CCCr   rM  )ru   r   r   r   r  r   r   r@  r!  r@  r"  r@  rt   r0   rw   r0   r1   r  r   )r$   r%   r&   r9   r   r   r`  r   r	   rb  r   rS   r   r   s   @r+   r  r    s              **+@AA+BQ`aaa *.&*#')-,0#').H
 H
 H
 H
 ba BA ]H
T	M 	M 	M 	M 	M 	M 	M 	Mr*   r  a  
    DeiT Model transformer with image classification heads on top (a linear layer on top of the final hidden state of
    the [CLS] token and a linear layer on top of the final hidden state of the distillation token) e.g. for ImageNet.

    .. warning::

            This model supports inference-only. Fine-tuning with distillation (i.e. with a teacher) is not yet
            supported.
    c                       e Zd Zd fdZe ee           eee	e
e          	 	 	 	 	 	 	 ddd                                    ZddZ xZS )'TFDeiTForImageClassificationWithTeacherr.   r   r1   r2   c                   t                                          |           |j        | _        t          |dd          | _        |j        dk    r&t
          j                            |j        d          n t
          j                            dd          | _	        |j        dk    r&t
          j                            |j        d          n t
          j                            dd          | _
        || _        d S )	NFrQ  r  r   cls_classifierr7   r  distillation_classifier)r8   r9   r  r1  rQ  r   r;   r   r  r  r  r.   r  s     r+   r9   z0TFDeiTForImageClassificationWithTeacher.__init__v  s        +#Fe&QQQ	
  1$$ Lv07GHHH((8H(II 	  1$$ Lv07PQQQ((8Q(RR 	$
 r*   )rY  rZ  rN  r\  NFru   r   r   r   r@  r!  r"  rt   r0   rw   5tuple | TFDeiTForImageClassificationWithTeacherOutputc           	     f   ||n| j         j        }|                     |||||||          }|d         }	|                     |	d d dd d f                   }
|                     |	d d dd d f                   }|
|z   dz  }|s||
|f|dd          z   }|S t          ||
||j        |j                  S )Nr  r   r   rH   )r   r    r!   r"   r#   )r.   rD  rQ  r  r  r   r"   r#   )r>   ru   r   r   r!  r"  rt   rw   r   rG  r    r!   r   r   s                 r+   r   z,TFDeiTForImageClassificationWithTeacher.call  s   $ &1%<kk$+B]))/!5#%=  
 
 "!*((Aqqq)ABB
"::?111aQRQRQR7;STT 22a7 	j*=>LFM<! 3!/)
 
 
 	
r*   c                   | j         rd S d| _         t          | dd           Pt          j        | j        j                  5  | j                            d            d d d            n# 1 swxY w Y   t          | dd           ]t          j        | j        j                  5  | j                            d d | j        j	        g           d d d            n# 1 swxY w Y   t          | dd           `t          j        | j
        j                  5  | j
                            d d | j        j	        g           d d d            d S # 1 swxY w Y   d S d S )NTrQ  r  r  )rO   rP   rQ   rR   rQ  r5   rS   r  r.   rK   r  r   s     r+   rS   z-TFDeiTForImageClassificationWithTeacher.build  s&   : 	F
4&&2ty~.. & &	%%%& & & & & & & & & & & & & & &4)400<t2788 Q Q#))4t{7N*OPPPQ Q Q Q Q Q Q Q Q Q Q Q Q Q Q42D99Et;@AA Z Z,22D$@W3XYYYZ Z Z Z Z Z Z Z Z Z Z Z Z Z Z Z Z Z FEs6    A''A+.A+!(CCC(EEEr   )NNNNNFF)ru   r   r   r   r   r@  r!  r@  r"  r@  rt   r0   rw   r0   r1   r  r   )r$   r%   r&   r9   r   r   r`  r   _IMAGE_CLASS_CHECKPOINTr   rb  _IMAGE_CLASS_EXPECTED_OUTPUTr   rS   r   r   s   @r+   r  r  i  s             & **+@AA*A$4	   *.&*)-,0#').(
 (
 (
 (
  BA ](
TZ Z Z Z Z Z Z Zr*   r  )r  r  r  rU  rP  )Dr'   
__future__r   collections.abcr   rf   dataclassesr   
tensorflowrQ   activations_tfr   modeling_tf_outputsr   r   r	   r
   modeling_tf_utilsr   r   r   r   r   r   tf_utilsr   r   utilsr   r   r   r   r   r   configuration_deitr   
get_loggerr$   loggerrb  ra  rc  r  r  r   r;   Layerr-   r:   r   r   r   r   r  r  r  r1  rP  DEIT_START_DOCSTRINGr`  rU  r7  rm  r  r  r  r  __all__r)   r*   r+   <module>r     sm     " " " " " "      ! ! ! ! ! !     / / / / / /                           3 2 2 2 2 2 2 2                + * * * * * 
	H	%	%  A &  E 1  / / / / /K / / /<j j j j ju|) j j jZ*M *M *M *M *MEL. *M *M *M\WH WH WH WH WH%,, WH WH WHvH H H H Hu|) H H H>$. $. $. $. $.el( $. $. $.PH H H H H+ H H H<N N N N N5<% N N N4@R @R @R @R @R%,$ @R @R @RH3& 3& 3& 3& 3&EL& 3& 3& 3&l n( n( n( n( n(el( n( n( n(d% % % % %- % % %	  2 d 0& 0& 0& 0& 0&' 0& 0&	 0&hH H H H H5<% H H H:    +   4/ / / / /EL& / / /6 ; 
v) v) v) v) v)#8 v) v) 
v)r   eM eM eM eM eM#8:V eM eM eMP   RZ RZ RZ RZ RZ.C RZ RZ RZj  r*   