
     `iY+                   <   d Z ddlmZ ddlZddlZddlZddlmZ ddl	m
Z
mZmZmZ ddlmZmZmZmZmZmZmZmZ ddlmZ dd	lmZmZmZ d
dlmZ dZg dg dgZ g dg dg dgg dg dg dggZ!dZ" G d dej#        j$                  Z% G d dej#        j$                  Z& G d dej#        j$                  Z' G d dej#        j$                  Z( G d dej#        j$                  Z) G d  d!ej#        j$                  Z* G d" d#ej#        j$                  Z+ G d$ d%ej#        j$                  Z, G d& d'ej#        j$                  Z-e G d( d)ej#        j$                              Z. G d* d+e          Z/d,Z0d-Z1 ed.e0           G d/ d0e/                      Z2 G d1 d2ej#        j$                  Z3 ed3e0           G d4 d5e/e                      Z4 ed6e0           G d7 d8e/e                      Z5 ed9e0           G d: d;e/e                      Z6g d<Z7dS )=zTF 2.0 LayoutLMv3 model.    )annotationsN   )get_tf_activation)TFBaseModelOutputTFQuestionAnsweringModelOutputTFSequenceClassifierOutputTFTokenClassifierOutput)TFPreTrainedModelTFQuestionAnsweringLossTFSequenceClassificationLossTFTokenClassificationLossget_initializerkeraskeras_serializableunpack_inputs)check_embeddings_within_bounds)add_start_docstrings%add_start_docstrings_to_model_forwardreplace_return_docstrings   )LayoutLMv3Configr   )      r   )r      r   )r   r   r      )   r   r      )	   
         )            )            )            g    חc                  4     e Zd ZdZd fdZddZdd
Z xZS )TFLayoutLMv3PatchEmbeddingsz$LayoutLMv3 image (patch) embeddings.configr   c                    t                      j        d	i | t          |j        t          j        j                  r|j        n|j        |j        f}t          j        	                    |j
        ||dddt          |j                  d          | _        |j
        | _
        |j        dz  |d         |d         z  z  | _        || _        d S )
Nvalidchannels_lastTproj)filterskernel_sizestridespaddingdata_formatuse_biaskernel_initializernamer   r   r    )super__init__
isinstance
patch_sizecollectionsabcIterabler   layersConv2Dhidden_sizer   initializer_ranger4   
input_sizenum_patchesr0   )selfr0   kwargspatch_sizes	__class__s       /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/models/layoutlmv3/modeling_tf_layoutlmv3.pyr?   z$TFLayoutLMv3PatchEmbeddings.__init__A   s    ""6""" &+[_-EFF8F#V%67 	
 L''&#'.v/GHH ( 	
 	
	 "-"-q0k!n{ST~6UV    pixel_values	tf.Tensorreturnc                    t          j        |g d          }|                     |          }t          j        |d| j        | j        f          }|S )N)r   r   r   r   perm)tf	transposer4   reshaperJ   rG   )rK   rQ   
embeddingss      rO   callz TFLayoutLMv3PatchEmbeddings.callV   sQ     |L|||DDDYY|,,
Z
R1A4CS,TUU
rP   Nc                
   | j         rd S d| _         t          | dd           at          j        | j        j                  5  | j                            d d d | j        j        g           d d d            d S # 1 swxY w Y   d S d S )NTr4   )	builtgetattrrX   
name_scoper4   r<   buildr0   num_channelsrK   input_shapes     rO   ra   z!TFLayoutLMv3PatchEmbeddings.build_   s    : 	F
4&&2ty~.. N N	tT4;3K LMMMN N N N N N N N N N N N N N N N N N 32s    )A66A:=A:r0   r   rQ   rR   rS   rR   N__name__
__module____qualname____doc__r?   r\   ra   __classcell__rN   s   @rO   r/   r/   >   ss        ..     *   N N N N N N N NrP   r/   c                  b     e Zd ZdZd fdZddZdd
ZddZddZ	 	 	 	 	 	 dddZ	ddZ
 xZS ) TFLayoutLMv3TextEmbeddingszm
    LayoutLMv3 text embeddings. Same as `RobertaEmbeddings` but with added spatial (layout) embeddings.
    r0   r   c                    t                      j        di | t          j                            |j        |j        t          |j                  d          | _	        t          j                            |j
        |j        t          |j                  d          | _        t          j                            |j        d          | _        t          j                            |j                  | _        |j        | _        t          j                            |j        |j        t          |j                  d          | _        t          j                            |j        |j        t          |j                  d          | _        t          j                            |j        |j        t          |j                  d          | _        t          j                            |j        |j        t          |j                  d	          | _        t          j                            |j        |j        t          |j                  d
          | _        |j        | _        || _        d S )Nword_embeddings)embeddings_initializerr<   token_type_embeddings	LayerNormepsilonr<   position_embeddingsx_position_embeddingsy_position_embeddingsh_position_embeddingsw_position_embeddingsr=   )r>   r?   r   rE   	Embedding
vocab_sizerG   r   rH   rr   type_vocab_sizert   LayerNormalizationlayer_norm_epsru   Dropouthidden_dropout_probdropoutpad_token_idpadding_token_indexmax_position_embeddingsrx   max_2d_position_embeddingscoordinate_sizery   rz   
shape_sizer{   r|   max_2d_positionsr0   rK   r0   rL   rN   s      rO   r?   z#TFLayoutLMv3TextEmbeddings.__init__m   s-   ""6"""$|55#263K#L#L"	  6  
  
 &+\%;%;"#263K#L#L(	 &< &
 &
" 88AV]h8ii|++F,FGG#)#6 #(<#9#9*#263K#L#L&	 $: $
 $
  &+\%;%;-"#263K#L#L(	 &< &
 &
" &+\%;%;-"#263K#L#L(	 &< &
 &
" &+\%;%;-#263K#L#L(	 &< &
 &
" &+\%;%;-#263K#L#L(	 &< &
 &
" !' ArP   bboxrR   rS   c           	     ,   	 |d d d d df         }|d d d d df         }|d d d d df         }|d d d d df         }n"# t           $ r}t          d          |d }~ww xY w	 |                     |          }|                     |          }|                     |          }	|                     |          }
n+# t           $ r}t          d| j         d          |d }~ww xY w| j        dz
  }|                     t          j        |d d d d df         |d d d d df         z
  d|                    }|                     t          j        |d d d d df         |d d d d df         z
  d|                    }t          j        |||	|
||gd	          }|S )
Nr   r   r   r   z9Bounding box is not of shape (batch_size, seq_length, 4).z0The `bbox` coordinate values should be within 0-z range.rW   axis)	
IndexErrorry   rz   r   r{   rX   clip_by_valuer|   concat)rK   r   left_position_idsupper_position_idsright_position_idslower_position_ids	exceptionleft_position_embeddingsupper_position_embeddingsright_position_embeddingslower_position_embeddingsmax_position_idr{   r|   spatial_position_embeddingss                  rO   %calculate_spatial_position_embeddingsz@TFLayoutLMv3TextEmbeddings.calculate_spatial_position_embeddings   s1   	i $QQQ1W!%aaaAg!%aaaAg!%aaaAg 	i 	i 	iXYY_hh	i	'+'A'ABS'T'T$(,(B(BCU(V(V%(,(B(BCU(V(V%(,(B(BCU(V(V%% 	 	 	a4CXaaa 	
 /!3 $ : :T!!!QQQ']T!!!QQQ']:AOO!
 !
 !% : :T!!!QQQ']T!!!QQQ']:AOO!
 !

 ')i()))%% 
'
 
'
 
'
# +*s.   <? 
A	AA"AB7 7
CCCinputs_embdsc                   t          j        |          }|d         }| j        dz   }| j        |z   dz   }t          j        ||t           j                  }|d         }t          j        |d|f          }t          j        ||df          }|S )z
        We are provided embeddings directly. We cannot infer which are padded, so just generate sequential position
        ids.
        r   dtyper   )rX   shaper   rangeint32rZ   tile)rK   r   rd   sequence_lengthstart_index	end_indexposition_ids
batch_sizes           rO   &create_position_ids_from_inputs_embedszATFLayoutLMv3TextEmbeddings.create_position_ids_from_inputs_embeds   s    
 h|,,%a..2,>B	xYbhGGG ^
z,O0DEEw|j!_==rP   	input_idsc                    t          j        t          j        || j                  |j                  }t          j        |d          |z  }|| j        z   }|S )z}
        Replace non-padding symbols with their position numbers. Position numbers begin at padding_token_index + 1.
        r   r   )rX   cast	not_equalr   r   cumsum)rK   r   maskr   s       rO   "create_position_ids_from_input_idsz=TFLayoutLMv3TextEmbeddings.create_position_ids_from_input_ids   sR     wr|It/GHH)/ZZyA...5#d&>>rP   inputs_embedsc                Z    ||                      |          S |                     |          S rg   )r   r   )rK   r   r   s      rO   create_position_idsz.TFLayoutLMv3TextEmbeddings.create_position_ids   s0    >>}MMM::9EEErP   NFtf.Tensor | Nonetoken_type_idsr   trainingboolc                ,   ||                      ||          }|t          j        |          }nt          j        |          d d         }|t          j        ||j                  }|/t          || j        j                   |                     |          }|                     |          }||z   }	| 	                    |          }
|	|
z  }	| 
                    |          }|	|z  }	|                     |	          }	|                     |	|          }	|	S )NrW   r   r   )r   rX   r   zerosr   r   rr   	input_dimrt   rx   r   ru   r   )rK   r   r   r   r   r   r   rd   rt   r[   rx   r   s               rO   r\   zTFLayoutLMv3TextEmbeddings.call   s$    33I}MML (9--KK(=11#2#6K!Xk9KLLLN *9d6J6TUUU 00;;M $ : :> J J"%::
"66|DD))
&*&P&PQU&V&V#11
^^J//
\\*x\@@
rP   c                V   | j         rd S d| _         t          | dd           Pt          j        | j        j                  5  | j                            d            d d d            n# 1 swxY w Y   t          | dd           Pt          j        | j        j                  5  | j                            d            d d d            n# 1 swxY w Y   t          | dd           ]t          j        | j        j                  5  | j                            d d | j	        j
        g           d d d            n# 1 swxY w Y   t          | dd           Pt          j        | j        j                  5  | j                            d            d d d            n# 1 swxY w Y   t          | dd           Pt          j        | j        j                  5  | j                            d            d d d            n# 1 swxY w Y   t          | dd           Pt          j        | j        j                  5  | j                            d            d d d            n# 1 swxY w Y   t          | dd           Pt          j        | j        j                  5  | j                            d            d d d            n# 1 swxY w Y   t          | d	d           St          j        | j        j                  5  | j                            d            d d d            d S # 1 swxY w Y   d S d S )
NTrr   rt   ru   rx   ry   rz   r{   r|   )r^   r_   rX   r`   rr   r<   ra   rt   ru   r0   rG   rx   ry   rz   r{   r|   rc   s     rO   ra   z TFLayoutLMv3TextEmbeddings.build  s   : 	F
4*D11=t3899 1 1$**40001 1 1 1 1 1 1 1 1 1 1 1 1 1 140$77Ct9>?? 7 7*006667 7 7 7 7 7 7 7 7 7 7 7 7 7 74d++7t~233 L L$$dD$+2I%JKKKL L L L L L L L L L L L L L L4.55At7<== 5 5(..t4445 5 5 5 5 5 5 5 5 5 5 5 5 5 540$77Ct9>?? 7 7*006667 7 7 7 7 7 7 7 7 7 7 7 7 7 740$77Ct9>?? 7 7*006667 7 7 7 7 7 7 7 7 7 7 7 7 7 740$77Ct9>?? 7 7*006667 7 7 7 7 7 7 7 7 7 7 7 7 7 740$77Ct9>?? 7 7*006667 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 DCs    A''A+.A+!CCC(D66D:=D:0FFFG88G<?G<2II IJ::J>J>4LL #L re   )r   rR   rS   rR   )r   rR   rS   rR   )r   rR   rS   rR   )r   rR   r   rR   rS   rR   )NNNNNF)r   r   r   r   r   r   r   r   r   r   r   r   rS   rR   rg   )ri   rj   rk   rl   r?   r   r   r   r   r\   ra   rm   rn   s   @rO   rp   rp   h   s         0 0 0 0 0 0d'+ '+ '+ '+R      F F F F '+!%+/)-*.# # # # #J7 7 7 7 7 7 7 7rP   rp   c                  J     e Zd Zd fdZddZdddZ	 	 	 dd dZd!dZ xZS )"TFLayoutLMv3SelfAttentionr0   r   c                b    t                      j        d	i | |j        |j        z  dk    r t	          d|j         d|j         d          |j        | _        t          |j        |j        z            | _        | j        | j        z  | _        t          j	        | j                  | _
        t          j                            | j        t          |j                  d          | _        t          j                            | j        t          |j                  d          | _        t          j                            | j        t          |j                  d          | _        t          j                            |j                  | _        |j        | _        |j        | _        || _        d S )
Nr   zThe hidden size (z6) is not a multiple of the number of attention heads ()queryr;   r<   keyvaluer=   )r>   r?   rG   num_attention_heads
ValueErrorintattention_head_sizeall_head_sizemathsqrtattention_score_normaliserr   rE   Denser   rH   r   r   r   r   attention_probs_dropout_probr   has_relative_attention_biashas_spatial_attention_biasr0   r   s      rO   r?   z"TFLayoutLMv3SelfAttention.__init__*  s   ""6""" ::a??8F$6 8 8 48 8 8  
 $*#= #&v'9F<V'V#W#W !58PP*.)D4L*M*M'\''.v/GHH ( 
 


 <%%.v/GHH & 
 

 \''.v/GHH ( 
 

 |++F,OPP+1+M(*0*K'rP   xrR   c                    t          j        |          }|d         |d         | j        | j        f}t          j        ||          }t          j        |g d          S )Nr   r   r   r   r   r   rU   )rX   r   r   r   rZ   rY   )rK   r   r   	new_shapes       rO   transpose_for_scoresz.TFLayoutLMv3SelfAttention.transpose_for_scoresL  s\    !H!H$$	
	 Jq)$$|ALLL1111rP       attention_scoresalphafloat | intc                    ||z  }t          j        t          j        |d          d          }||z
  |z  }t           j                            |d          S )a  
        https://huggingface.co/papers/2105.13290 Section 2.4 Stabilization of training: Precision Bottleneck Relaxation
        (PB-Relax). A replacement of the original keras.layers.Softmax(axis=-1)(attention_scores). Seems the new
        attention_probs will result in a slower speed and a little bias. Can use
        tf.debugging.assert_near(standard_attention_probs, cogview_attention_probs, atol=1e-08) for comparison. The
        smaller atol (e.g., 1e-08), the better.
        rW   r   )rX   expand_dims
reduce_maxr   softmax)rK   r   r   scaled_attention_scores	max_valuenew_attention_scoress         rO   cogview_attentionz+TFLayoutLMv3SelfAttention.cogview_attentionW  s]     #3U":N2=1Hr#R#R#RY[\\\	 7) CuLw3"===rP   NFhidden_statesattention_maskr   	head_maskoutput_attentionsr   rel_pos
rel_2d_posr   rS   .tuple[tf.Tensor] | tuple[tf.Tensor, tf.Tensor]c                $   |                      |                     |                    }|                      |                     |                    }	|                      |                     |                    }
|
| j        z  }t          j        |g d          }t          j        ||          }| j        r| j	        r|||z   | j        z  z  }n| j        r||| j        z  z  }|||z  }| 
                    |          }|                     ||          }|||z  }t          j        ||	          }t          j        |g d          }t          j        |          }t          j        ||d         |d         | j        f          }|r||fn|f}|S )N)r   r   r   r   rU   r   r   r   r   )r   r   r   r   r   rX   rY   matmulr   r   r   r   r   rZ   r   )rK   r   r   r   r   r   r   r   	key_layervalue_layerquery_layernormalised_query_layertransposed_key_layerr   attention_probscontext_layerr   outputss                     rO   r\   zTFLayoutLMv3SelfAttention.calld  s    --dhh}.E.EFF	//

=0I0IJJ//

=0I0IJJ "-t/N!N!|LLL 
  
  
 9%;=QRR+ 	J0O 	J:!59X XX- 	J$*I II%. 001ABB,,,JJ  -	9O	/;??
 
 
 ''
E!HeAh0BC
 
 7H]=/22mM]rP   c                   | j         rd S d| _         t          | dd           ]t          j        | j        j                  5  | j                            d d | j        j        g           d d d            n# 1 swxY w Y   t          | dd           ]t          j        | j	        j                  5  | j	                            d d | j        j        g           d d d            n# 1 swxY w Y   t          | dd           `t          j        | j
        j                  5  | j
                            d d | j        j        g           d d d            d S # 1 swxY w Y   d S d S )NTr   r   r   )r^   r_   rX   r`   r   r<   ra   r0   rG   r   r   rc   s     rO   ra   zTFLayoutLMv3SelfAttention.build  s:   : 	F
4$''3tz// H H
  $dk.E!FGGGH H H H H H H H H H H H H H H4%%1tx}-- F FdDK,CDEEEF F F F F F F F F F F F F F F4$''3tz// H H
  $dk.E!FGGGH H H H H H H H H H H H H H H H H H 43s6    (A44A8;A8.(C""C&)C&(EEEre   )r   rR   )r   )r   rR   r   r   NNFr   rR   r   r   r   r   r   r   r   r   r   r   r   r   rS   r   rg   )	ri   rj   rk   r?   r   r   r\   ra   rm   rn   s   @rO   r   r   )  s                   D	2 	2 	2 	2> > > > >& %)'+3 3 3 3 3jH H H H H H H HrP   r   c                  2     e Zd Zd fdZdddZddZ xZS )TFLayoutLMv3SelfOutputr0   r   c                n    t                      j        di | t          j                            |j        t          |j                  d          | _        t          j        	                    |j
        d          | _        t          j                            |j                  | _        || _        d S Ndenseunitsr;   r<   ru   rv   )rater=   r>   r?   r   rE   r   rG   r   rH   r   r   r   ru   r   r   r   r0   r   s      rO   r?   zTFLayoutLMv3SelfOutput.__init__      ""6"""\''$Ia9b9bip ( 
 

 88AV]h8ii|++1K+LLrP   Fr   rR   input_tensorr   r   rS   c                    |                      |          }|                     ||          }|                     ||z             }|S Ninputs)r  r   r   r   ru   rK   r   r  r   s       rO   r\   zTFLayoutLMv3SelfOutput.call  H    

-
88MHMMml.JKKrP   Nc                   | j         rd S d| _         t          | dd           ]t          j        | j        j                  5  | j                            d d | j        j        g           d d d            n# 1 swxY w Y   t          | dd           `t          j        | j	        j                  5  | j	                            d d | j        j        g           d d d            d S # 1 swxY w Y   d S d S NTr   ru   )
r^   r_   rX   r`   r   r<   ra   r0   rG   ru   rc   s     rO   ra   zTFLayoutLMv3SelfOutput.build  s   : 	F
4$''3tz// H H
  $dk.E!FGGGH H H H H H H H H H H H H H H4d++7t~233 L L$$dD$+2I%JKKKL L L L L L L L L L L L L L L L L L 87$    (A44A8;A8.(C##C'*C're   Fr   rR   r  rR   r   r   rS   rR   rg   ri   rj   rk   r?   r\   ra   rm   rn   s   @rO   r   r     r                 	L 	L 	L 	L 	L 	L 	L 	LrP   r   c                  8     e Zd Zd fdZ	 	 	 dddZddZ xZS )TFLayoutLMv3Attentionr0   r   c                     t                      j        di | t          |d          | _        t	          |d          | _        d S )NrK   r<   outputr=   )r>   r?   r   self_attentionr   self_outputr   s      rO   r?   zTFLayoutLMv3Attention.__init__  sP    ""6"""7VLLL1&xHHHrP   NFr   rR   r   r   r   r   r   r   r   r   rS   r   c           	         |                      |||||||          }|                     |d         ||          }	|	f|dd          z   }
|
S )Nr   r   r   )r  r  )rK   r   r   r   r   r   r   r   self_outputsattention_outputr   s              rO   r\   zTFLayoutLMv3Attention.call  sp     ** + 
 
  ++LO]U]+^^#%QRR(88rP   c                   | j         rd S d| _         t          | dd           Pt          j        | j        j                  5  | j                            d            d d d            n# 1 swxY w Y   t          | dd           St          j        | j        j                  5  | j                            d            d d d            d S # 1 swxY w Y   d S d S )NTr  r  )r^   r_   rX   r`   r  r<   ra   r  rc   s     rO   ra   zTFLayoutLMv3Attention.build  sX   : 	F
4)400<t2788 0 0#))$///0 0 0 0 0 0 0 0 0 0 0 0 0 0 04--9t/455 - - &&t,,,- - - - - - - - - - - - - - - - - - :9$    A''A+.A+!C		CCre   r   r   rg   r  rn   s   @rO   r  r    s{        I I I I I I %)'+    .	- 	- 	- 	- 	- 	- 	- 	-rP   r  c                  0     e Zd Zd
 fdZddZdd	Z xZS )TFLayoutLMv3Intermediater0   r   c                D    t                      j        di | t          j                            |j        t          |j                  d          | _        t          |j
        t                    rt          |j
                  | _        n|j
        | _        || _        d S )Nr   r   r=   )r>   r?   r   rE   r   intermediate_sizer   rH   r   r@   
hidden_actstrr   intermediate_act_fnr0   r   s      rO   r?   z!TFLayoutLMv3Intermediate.__init__  s    ""6"""\''*vOg?h?hov ( 
 

 f'-- 	9'89J'K'KD$$'-'8D$rP   r   rR   rS   c                \    |                      |          }|                     |          }|S )Nr  )r   r%  )rK   r   s     rO   r\   zTFLayoutLMv3Intermediate.call  s.    

-
8800??rP   Nc                   | j         rd S d| _         t          | dd           `t          j        | j        j                  5  | j                            d d | j        j        g           d d d            d S # 1 swxY w Y   d S d S )NTr   )	r^   r_   rX   r`   r   r<   ra   r0   rG   rc   s     rO   ra   zTFLayoutLMv3Intermediate.build  s    : 	F
4$''3tz// H H
  $dk.E!FGGGH H H H H H H H H H H H H H H H H H 43s    (A55A9<A9re   )r   rR   rS   rR   rg   r  rn   s   @rO   r   r     sm                H H H H H H H HrP   r   c                  2     e Zd Zd fdZdddZddZ xZS )TFLayoutLMv3Outputr0   r   c                n    t                      j        di | t          j                            |j        t          |j                  d          | _        t          j        	                    |j
        d          | _        t          j                            |j                  | _        || _        d S r   r  r   s      rO   r?   zTFLayoutLMv3Output.__init__  r  rP   Fr   rR   r  r   r   rS   c                    |                      |          }|                     ||          }|                     ||z             }|S r  r	  r
  s       rO   r\   zTFLayoutLMv3Output.call  r  rP   Nc                   | j         rd S d| _         t          | dd           ]t          j        | j        j                  5  | j                            d d | j        j        g           d d d            n# 1 swxY w Y   t          | dd           `t          j        | j	        j                  5  | j	                            d d | j        j
        g           d d d            d S # 1 swxY w Y   d S d S r  )r^   r_   rX   r`   r   r<   ra   r0   r"  ru   rG   rc   s     rO   ra   zTFLayoutLMv3Output.build!  s   : 	F
4$''3tz// N N
  $dk.K!LMMMN N N N N N N N N N N N N N N4d++7t~233 L L$$dD$+2I%JKKKL L L L L L L L L L L L L L L L L L 87r  re   r  r  rg   r  rn   s   @rO   r)  r)    r  rP   r)  c                  8     e Zd Zd fdZ	 	 	 dddZddZ xZS )TFLayoutLMv3Layerr0   r   c                     t                      j        di | t          |d          | _        t	          |d          | _        t          |d          | _        d S )N	attentionr  intermediater  r=   )r>   r?   r  r0  r   r1  r)  bert_outputr   s      rO   r?   zTFLayoutLMv3Layer.__init__.  sd    ""6""".vKHHH4V.QQQ-f8DDDrP   NFr   rR   r   r   r   r   r   r   r   r   rS   r   c           	         |                      |||||||          }|d         }	|dd          }
|                     |	          }|                     ||	|          }|f|
z   }
|
S )N)r   r   r   r   r   r   r   )r0  r1  r2  )rK   r   r   r   r   r   r   r   self_attention_outputsr  r   intermediate_outputlayer_outputs                rO   r\   zTFLayoutLMv3Layer.call4  s     "&/! "0 "
 "
 2!4(,"//0@AA''(;=MX`'aa/G+rP   c                r   | j         rd S d| _         t          | dd           Pt          j        | j        j                  5  | j                            d            d d d            n# 1 swxY w Y   t          | dd           Pt          j        | j        j                  5  | j                            d            d d d            n# 1 swxY w Y   t          | dd           St          j        | j        j                  5  | j                            d            d d d            d S # 1 swxY w Y   d S d S )NTr0  r1  r2  )	r^   r_   rX   r`   r0  r<   ra   r1  r2  rc   s     rO   ra   zTFLayoutLMv3Layer.buildN  s   : 	F
4d++7t~233 + +$$T***+ + + + + + + + + + + + + + +4..:t0566 . .!''---. . . . . . . . . . . . . . .4--9t/455 - - &&t,,,- - - - - - - - - - - - - - - - - - :9s6    A''A+.A+!CCCD**D.1D.re   r   r   rg   r  rn   s   @rO   r.  r.  -  s{        E E E E E E %)'+    4- - - - - - - -rP   r.  c                  b     e Zd Zd! fdZd"d	Zd#dZd$dZd%dZ	 	 	 	 	 	 	 	 d&d'dZd(d Z	 xZ
S ))TFLayoutLMv3Encoderr0   r   c                    t                      j        di | | _        fdt          j                  D             | _        j        | _        j        | _        | j        rWj        | _        j	        | _	        t          j                            j        t          j                  dd          | _        | j        rj        | _        j        | _        t          j                            j        t          j                  dd          | _        t          j                            j        t          j                  dd          | _        d S d S )Nc                8    g | ]}t          d |           S )zlayer.r  )r.  ).0ir0   s     rO   
<listcomp>z0TFLayoutLMv3Encoder.__init__.<locals>.<listcomp>a  s-    lllq'\a\\BBBlllrP   Frel_pos_bias)r   r;   r:   r<   rel_pos_x_biasrel_pos_y_biasr=   )r>   r?   r0   r   num_hidden_layerslayerr   r   rel_pos_binsmax_rel_posr   rE   r   r   r   rH   r?  max_rel_2d_posrel_2d_pos_binsr@  rA  r   s    ` rO   r?   zTFLayoutLMv3Encoder.__init__^  sf   ""6"""llllERXRjLkLklll
+1+M(*0*K'+ 	 & 3D%1D % 2 20#263K#L#L#	 !3 ! !D * 	"("7D#)#9D "',"4"40#263K#L#L%	 #5 # #D #(,"4"40#263K#L#L%	 #5 # #D	 	rP   relative_positionsrR   num_bucketsr   max_distancec                   |dz  }t          j        |          }|dz  }||k     }t           j                            t          j        |t           j                  |z            }t          j        ||z            }||z  ||z
  z  }	||	z   }
t          j        |
|j                  }
t          j        |
|dz
            }
t          j        |dk    |j                  |z  t          j        |||
          z   S )Nr   r   r   )	rX   absr   logr   float32r   minimumwhere)rK   rH  rI  rJ  bucketsmax_exact_bucketsis_smallbuckets_log_ratiodistance_log_ratiobuckets_big_offsetbuckets_bigs              rO   relative_position_bucketz,TFLayoutLMv3Encoder.relative_position_bucket  s    "Q&&+,, (1,.. GKK(D(DGX(XYY!Xl5F&FGG 22kDU6UV 	 (*<<gk7=99jkAo>>*Q.>>LPRPXg{Q
 Q
 
 	
rP   dense_layerkeras.layers.Denser   c                @   t          j        |d          t          j        |d          z
  }|                     |||          }t          j        ||| j                  } ||          }t          j        |g d          }t          j        || j                  }|S )Nr   rW   )depthr   )r   r   r   r   r   )rX   r   rX  one_hotcompute_dtyperY   r   )	rK   rY  r   rI  rJ  rel_pos_matrixr   rel_pos_one_hot	embeddings	            rO   _cal_pos_embz TFLayoutLMv3Encoder._cal_pos_emb  s     2>>>P\ceAfAfAff//\ZZ*WKtGYZZZK00	LLLL99	GIT-?@@@	rP   c                P    |                      | j        || j        | j                  S rg   )rc  r?  rD  rE  )rK   r   s     rO   _cal_1d_pos_embz#TFLayoutLMv3Encoder._cal_1d_pos_emb  s&      !2L$BSUYUefffrP   r   c                    |d d d d df         }|d d d d df         }|                      | j        || j        | j                  }|                      | j        || j        | j                  }||z   }|S )Nr   r   )rc  r@  rG  rF  rA  )rK   r   position_coord_xposition_coord_y	rel_pos_x	rel_pos_yr   s          rO   _cal_2d_pos_embz#TFLayoutLMv3Encoder._cal_2d_pos_emb  s    111a=111a=%% 	
 
	 %% 	
 
	 *
rP   NFTr   r   r   r   r   r   output_hidden_statesreturn_dictr   rS   kTFBaseModelOutput | tuple[tf.Tensor] | tuple[tf.Tensor, tf.Tensor] | tuple[tf.Tensor, tf.Tensor, tf.Tensor]c
           
        |rdnd }
|rdnd }| j         r|                     |          nd }| j        r|                     |          nd }t	          | j                  D ]A\  }}|r|
|fz   }
|||         nd } ||||||||	          }|d         }|r||d         fz   }B|r|
|fz   }
|rt          ||
|          S t          d ||
|fD                       S )Nr=   )r   r   r   r   r   last_hidden_stater   
attentionsc              3     K   | ]}||V  	d S rg   r=   )r<  r   s     rO   	<genexpr>z+TFLayoutLMv3Encoder.call.<locals>.<genexpr>  s1        ^c^o^o^o^o^o rP   )r   re  r   rk  	enumeraterC  r   tuple)rK   r   r   r   r   r   rl  rm  r   r   all_hidden_statesall_self_attentionsr   r   r=  layer_modulelayer_head_masklayer_outputss                     rO   r\   zTFLayoutLMv3Encoder.call  sx    #7@BBD$5?bb48<8Xb$&&|444^b373R\T))$///X\
(44 	P 	POA|# I$58H$H!.7.CillO(L!%!  M *!,M  P&9]1=M<O&O# 	E 1]4D D 		$"//.      $13DFY#Z     rP   c                b   | j         rd S d| _         t          | dd           Xt          j        | j        j                  5  | j                            d d | j        g           d d d            n# 1 swxY w Y   t          | dd           Xt          j        | j        j                  5  | j                            d d | j	        g           d d d            n# 1 swxY w Y   t          | dd           Xt          j        | j
        j                  5  | j
                            d d | j	        g           d d d            n# 1 swxY w Y   t          | dd           P| j        D ]J}t          j        |j                  5  |                    d            d d d            n# 1 swxY w Y   Id S d S )NTr?  r@  rA  rC  )r^   r_   rX   r`   r?  r<   ra   rD  r@  rG  rA  rC  )rK   rd   rC  s      rO   ra   zTFLayoutLMv3Encoder.build  s   : 	F
4..:t0566 I I!''tT5F(GHHHI I I I I I I I I I I I I I I4)400<t2788 N N#))4t7K*LMMMN N N N N N N N N N N N N N N4)400<t2788 N N#))4t7K*LMMMN N N N N N N N N N N N N N N4$''3 & &]5:.. & &KK%%%& & & & & & & & & & & & & & & 43& &sH    #A//A36A3)#CCC#EEE?F!!F%	(F%	re   )rH  rR   rI  r   rJ  r   )rY  rZ  r   rR   rI  r   rJ  r   )r   rR   )r   rR   )NNNFFTNF)r   rR   r   r   r   r   r   r   r   r   rl  r   rm  r   r   r   r   r   rS   rn  rg   )ri   rj   rk   r?   rX  rc  re  rk  r\   ra   rm   rn   s   @rO   r9  r9  ]  s                   D
 
 
 
2    g g g g   * "&+/&*"'%* )-2 2 2 2 2h& & & & & & & &rP   r9  c                       e Zd ZeZd. fdZd/dZd0dZd1dZd Z	d2d3dZ
d4dZd5dZd6dZd7dZe	 	 	 	 	 	 	 	 	 	 	 	 d8d9d-            Z xZS ):TFLayoutLMv3MainLayerr0   r   c                J    t                      j        di | || _        |j        rt	          |d          | _        |j        rt          |d          | _        t          j
                            |j        d          | _        t          j
                            |j        d          | _        |j        s|j        r'|j        |j        z  }|                     ||f           t          j
                            dd	          | _        t/          |d
          | _        d S )Nr[   r  patch_embedru   rv   r   )
image_sizegư>normencoderr=   )r>   r?   r0   
text_embedrp   r[   visual_embedr/   r  r   rE   r   r   ru   r   r   r   r   r   rI   rA   init_visual_bboxr  r9  r  )rK   r0   rL   r  rN   s       rO   r?   zTFLayoutLMv3MainLayer.__init__	  s   ""6""" 	T8lSSSDO 		S:6VVVD"\<<VEZal<mmDN <//0JQZ/[[DL1 KV5V K#.&2CC
%%*j1I%JJJ7767RRDI*6	BBBrP   Nc                   | j         j        r| j         j        | j         j        z  }|                     dd| j         j        fddt          j        d          | _        |                     d||z  dz   | j         j        fddt          j        d          | _	        | j
        rd S d| _
        t          | dd           Pt          j        | j        j                  5  | j                            d            d d d            n# 1 swxY w Y   t          | dd           Pt          j        | j        j                  5  | j                            d            d d d            n# 1 swxY w Y   t          | d	d           Pt          j        | j        j                  5  | j                            d            d d d            n# 1 swxY w Y   t          | d
d           ]t          j        | j        j                  5  | j                            d d | j         j        g           d d d            n# 1 swxY w Y   t          | dd           Pt          j        | j        j                  5  | j                            d            d d d            n# 1 swxY w Y   t          | dd           `t          j        | j        j                  5  | j                            d d | j         j        g           d d d            d S # 1 swxY w Y   d S d S )Nr   r   T	cls_token)r   initializer	trainabler   r<   	pos_embedr  r[   r  ru   r   r  )r0   r  rI   rA   
add_weightrG   rX   rN  r  r  r^   r_   r`   r  r<   ra   r[   r  ru   r   r  )rK   rd   r  s      rO   ra   zTFLayoutLMv3MainLayer.build  st   ;# 	/4;3IIJ!__!T[45#j  -  DN "__*z1A5t{7NO#j  -  DN : 	F
4D))5t|011 ) )""4((() ) ) ) ) ) ) ) ) ) ) ) ) ) )4t,,8t344 , ,%%d+++, , , , , , , , , , , , , , ,4--9t/455 - - &&t,,,- - - - - - - - - - - - - - -4d++7t~233 L L$$dD$+2I%JKKKL L L L L L L L L L L L L L L4D))5t|011 ) )""4((() ) ) ) ) ) ) ) ) ) ) ) ) ) )4&&2ty~.. G G	tT[-D EFFFG G G G G G G G G G G G G G G G G G 32sl   C>>DD8EE#&E#G  GG:(H..H25H2(JJJ	(K>>LLrS   keras.layers.Layerc                    | j         j        S rg   )r[   rr   )rK   s    rO   get_input_embeddingsz*TFLayoutLMv3MainLayer.get_input_embeddingsF  s    ..rP   r   tf.Variablec                (    || j         j        _        d S rg   )r[   rr   weight)rK   r   s     rO   set_input_embeddingsz*TFLayoutLMv3MainLayer.set_input_embeddingsI  s    16'...rP   c                    t           )z
        Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
        class PreTrainedModel
        )NotImplementedError)rK   heads_to_prunes     rO   _prune_headsz"TFLayoutLMv3MainLayer._prune_headsM  s
    
 "!rP     r  tuple[int, int]max_lenr   c                   |\  }}t          j        d||dz   z  |          |z  }t          j        |d          }t          j        ||dg          }t          j        d||dz   z  |          |z  }t          j        |d          }t          j        |d|g          }t          j        |d d d df         |d d         |d d dd f         |dd          gd          }t          j        |ddg          }t          j        dd|dz
  |dz
  ggt           j                  }t          j        ||gd          | _	        d S )Nr   r   r   rW   r   r   )
rX   r   r   r   stackrZ   constantr   r   visual_bbox)	rK   r  r  heightwidthvisual_bbox_xvisual_bbox_yr  cls_token_boxs	            rO   r  z&TFLayoutLMv3MainLayer.init_visual_bboxT  s`    #Guqy$97CCuL}1===qz::Gvz$:GDDN}1===6{;;h111crc6"M#2#$6aaae8Lm\]\^\^N_`
 
 
 jr1g66aGaK1%E$FbhWWW9m[%AJJJrP   r   r   tf.DTypec                    t          j        | j        d          }t          j        ||ddg          }t          j        ||          }|S )Nr   r   r   r   )rX   r   r  r   r   )rK   r   r   r  s       rO   calculate_visual_bboxz+TFLayoutLMv3MainLayer.calculate_visual_bboxk  sK    nT%5A>>>gkJ1+=>>gk777rP   rQ   rR   c                .   |                      |          }t          j        |          d         }t          j        | j        |ddg          }t          j        ||gd          }t          | dd           
|| j        z  }|                     |          }|S )Nr   r   r   r  )	r  rX   r   r   r  r   r_   r  r  )rK   rQ   r[   r   
cls_tokenss        rO   embed_imagez!TFLayoutLMv3MainLayer.embed_imageq  s    %%l33
 Xj))!,
WT^j!Q-?@@
Y
J7a@@@
 4d++7$.(JYYz**
rP   r   c                L   t          |j                  }|dk    rt          j        |d          }nK|dk    r-t          j        |d          }t          j        |d          }nt	          d|j         d          t          j        || j                  }d|z
  t          z  }|S )Nr   r   r   r   z&Wrong shape for attention_mask (shape ).g      ?)lenr   rX   r   r   r   r_  LARGE_NEGATIVE)rK   r   n_dimsextended_attention_masks       rO   get_extended_attention_maskz1TFLayoutLMv3MainLayer.get_extended_attention_mask  s     ^)** Q;;&(n^!&L&L&L##q[[ ')n^!&L&L&L#&(n5LST&U&U&U##^nFZ^^^___ #%'*A4CU"V"V#&)@#@N"R&&rP   r   r   "tf.Tensor | list[tf.Tensor | None]c                   |d g| j         j        z  S t          j        |          }|dk    r}t          j        |d          }t          j        |d          }t          j        |d          }t          j        |d          }t          j        || j         j        ddddg          }ng|dk    rCt          j        |d          }t          j        |d          }t          j        |d          }n|dk    rt          d|j         d          t          j        |          dk    s J d	t          j        |           d
            t          j        || j	                  }|S )Nr   r   r   rW   r   r   z!Wrong shape for head_mask (shape r  zGot head_mask rank of z, but require 5.)
r0   rB  rX   rankr   r   r   r   r   r_  )rK   r   r  s      rO   get_head_maskz#TFLayoutLMv3MainLayer.get_head_mask  sk   6DK999##Q;;yq999Iyq999Iyr:::Iyr:::IDK91aAF II q[[yq999Iyr:::Iyr:::IIq[[TTTTUUUwy!!Q&&&(eASAS(e(e(e&&&GIt'9::	rP   Fr   r   r   r   r   r   bool | Nonerl  rm  r   r   rn  c           
        |	|	n| j         j        }	|
|
n| j         j        }
||n| j         j        }|%t	          j        |          }|d         }|d         }nS|%t	          j        |          }|d         }|d         }n,|t	          j        |          d         }nt          d          ||j        }n*||j        }n ||j        }n||j        }nt          j        }||j|t	          j	        ||f|          }|t	          j
        ||f|          }|t	          j
        ||df|          }|                     ||||||          }d }d }||                     |          }t	          j	        |t	          j        |          d         f|          }||}nt	          j        ||gd          }| j         j        r3|                     ||          }||}nt	          j        ||gd          }| j         j        s| j         j        rt	          j        dt	          j        |          d         |          }t	          j        |d          }t	          j        ||dg          }||[t	          j        t	          j        d||          d          }t	          j        ||dg          }t	          j        ||gd          }n|}|||}nt	          j        ||gd          }|                     |          }|                     ||          }nv| j         j        s| j         j        r^| j         j        rDt	          j        t	          j        d||          d          }t	          j        ||dg          }|}| j         j        r|}|                     |          }|                     |          }|                     ||||||	|
|	          }|d         }|s|f|dd          z   S t3          ||j        |j        
          S )Nr   r   zEYou have to specify either input_ids or inputs_embeds or pixel_valuesr   r   )r   r   r   r   r   r   r   r   )r   r   r   r   r   rl  rm  rp  )r0   r   rl  rm  rX   r   r   r   r   onesr   r[   r  r   r   r  r   r   r   r   ru   r   r  r  r  r   r   rr  )rK   r   r   r   r   r   r   r   rQ   r   rl  rm  r   rd   r   
seq_length	int_dtypeembedding_output
final_bboxfinal_position_idsvisual_embeddingsvisual_attention_maskr  visual_position_idsr  encoder_outputssequence_outputs                              rO   r\   zTFLayoutLMv3MainLayer.call  s   , 2C1N--TXT_Tq$8$D  $+Jj 	 &1%<kk$+BY (9--K$QJ$QJJ&(=11K$QJ$QJJ%,//2JJdeee  !II
II'&,II'&,III M$=%!#*j)A!S!S!S%!#:z*B)!T!T!T|xZ ;9MMM##)-+!  /     
!# $ 0 0 > > %'GZBS9T9TUV9W,X`i$j$j$j!%!6!#N<Q+RYZ![![![ {5 H"88YOO<!,JJ!#D++>Q!G!G!GJ {6 
=$+:` 
=&(hq"(;L2M2Ma2PXa&b&b&b#&(n5Hq&Q&Q&Q#&(g.AJPQ?&S&S#(M,E#%>"(1jPY2Z2Z2Zab#c#c#cL#%7<*a#I#IL)+LBU3V]^)_)_)_&&)<&  ]%:#4  #%9.>@Q-RYZ#[#[#[ #~~.>??#||,<x|PP[4 	"8^ 	"{6 2!~bhq*I.V.V.V]^___!w|j!_EE%1"{5 "!
"&"B"B>"R"R &&y11	,,+2/!5# ' 	
 	
 *!, 	<#%(;;; -)7&1
 
 
 	
rP   re   rg   )rS   r  )r   r  )r  )r  r  r  r   )r   r   r   r  rf   )r   rR   rS   rR   )r   r   rS   r  NNNNNNNNNNNFr   r   r   r   r   r   r   r   r   r   r   r   r   r   rQ   r   r   r  rl  r  rm  r  r   r   rS   rn  )ri   rj   rk   r   config_classr?   ra   r  r  r  r  r  r  r  r  r   r\   rm   rn   s   @rO   r~  r~    sZ       #LC C C C C C*&G &G &G &GP/ / / /7 7 7 7" " "K K K K K.      ' ' ' '6   2  '+!%+/+/)-&**.)-)-,0#'[
 [
 [
 [
 ][
 [
 [
 [
 [
rP   r~  c                  :     e Zd ZdZeZdZe fd            Z xZ	S )TFLayoutLMv3PreTrainedModelz
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    
layoutlmv3c                v    t                      j        }t          j        dt          j        d          |d<   |S )N)NNr   r   r  )r>   input_signaturerX   
TensorSpecr   )rK   sigrN   s     rO   r  z+TFLayoutLMv3PreTrainedModel.input_signature\  s0    gg%mORXFKKKF
rP   )
ri   rj   rk   rl   r   r  base_model_prefixpropertyr  rm   rn   s   @rO   r  r  S  sY         
 $L$    X    rP   r  a	  
    This model inherits from [`TFPreTrainedModel`]. Check the superclass documentation for the generic methods the
    library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads
    etc.)

    This model is also a [keras.Model](https://www.tensorflow.org/api_docs/python/tf/keras/Model) subclass. Use it
    as a regular TF 2.0 Keras Model and refer to the TF 2.0 documentation for all matter related to general usage and
    behavior.

    <Tip>

    TensorFlow models and layers in `transformers` accept two formats as input:

    - having all inputs as keyword arguments (like PyTorch models), or
    - having all inputs as a list, tuple or dict in the first positional argument.

    The reason the second format is supported is that Keras methods prefer this format when passing inputs to models
    and layers. Because of this support, when using methods like `model.fit()` things should "just work" for you - just
    pass your inputs and labels in any format that `model.fit()` supports! If, however, you want to use the second
    format outside of Keras methods like `fit()` and `predict()`, such as when creating your own layers or models with
    the Keras `Functional` API, there are three possibilities you can use to gather all the input Tensors in the first
    positional argument:

    - a single Tensor with `input_ids` only and nothing else: `model(input_ids)`
    - a list of varying length with one or several input Tensors IN THE ORDER given in the docstring:
    `model([input_ids, attention_mask])` or `model([input_ids, attention_mask, token_type_ids])`
    - a dictionary with one or several input Tensors associated to the input names given in the docstring:
    `model({"input_ids": input_ids, "token_type_ids": token_type_ids})`

    Note that when creating models and layers with
    [subclassing](https://keras.io/guides/making_new_layers_and_models_via_subclassing/) then you don't need to worry
    about any of this, as you can just pass inputs like you would to any other Python function!

    </Tip>

    Parameters:
        config ([`LayoutLMv3Config`]): Model configuration class with all the parameters of the model.
            Initializing with a config file does not load the weights associated with the model, only the
            configuration. Check out the [`~TFPreTrainedModel.from_pretrained`] method to load the model weights.
a  
    Args:
        input_ids (`Numpy array` or `tf.Tensor` of shape `(batch_size, sequence_length)`):
            Indices of input sequence tokens in the vocabulary.

            Note that `sequence_length = token_sequence_length + patch_sequence_length + 1` where `1` is for [CLS]
            token. See `pixel_values` for `patch_sequence_length`.

            Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
            [`PreTrainedTokenizer.__call__`] for details.

            [What are input IDs?](../glossary#input-ids)

        bbox (`Numpy array` or `tf.Tensor` of shape `(batch_size, sequence_length, 4)`, *optional*):
            Bounding boxes of each input sequence tokens. Selected in the range `[0,
            config.max_2d_position_embeddings-1]`. Each bounding box should be a normalized version in (x0, y0, x1, y1)
            format, where (x0, y0) corresponds to the position of the upper left corner in the bounding box, and (x1,
            y1) represents the position of the lower right corner.

            Note that `sequence_length = token_sequence_length + patch_sequence_length + 1` where `1` is for [CLS]
            token. See `pixel_values` for `patch_sequence_length`.

        pixel_values (`tf.Tensor` of shape `(batch_size, num_channels, height, width)`):
            Batch of document images. Each image is divided into patches of shape `(num_channels, config.patch_size,
            config.patch_size)` and the total number of patches (=`patch_sequence_length`) equals to `((height /
            config.patch_size) * (width / config.patch_size))`.

        attention_mask (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
            Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:

            - 1 for tokens that are **not masked**,
            - 0 for tokens that are **masked**.

            Note that `sequence_length = token_sequence_length + patch_sequence_length + 1` where `1` is for [CLS]
            token. See `pixel_values` for `patch_sequence_length`.

            [What are attention masks?](../glossary#attention-mask)
        token_type_ids (`Numpy array` or `tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
            Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,
            1]`:

            - 0 corresponds to a *sentence A* token,
            - 1 corresponds to a *sentence B* token.

            Note that `sequence_length = token_sequence_length + patch_sequence_length + 1` where `1` is for [CLS]
            token. See `pixel_values` for `patch_sequence_length`.

            [What are token type IDs?](../glossary#token-type-ids)
        position_ids (`Numpy array` or `tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
            Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
            config.max_position_embeddings - 1]`.

            Note that `sequence_length = token_sequence_length + patch_sequence_length + 1` where `1` is for [CLS]
            token. See `pixel_values` for `patch_sequence_length`.

            [What are position IDs?](../glossary#position-ids)
        head_mask (`tf.Tensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
            Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`:

            - 1 indicates the head is **not masked**,
            - 0 indicates the head is **masked**.

        inputs_embeds (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
            Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
            is useful if you want more control over how to convert *input_ids* indices into associated vectors than the
            model's internal embedding lookup matrix.
        output_attentions (`bool`, *optional*):
            Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
            tensors for more detail.
        output_hidden_states (`bool`, *optional*):
            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
            more detail.
        return_dict (`bool`, *optional*):
            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
zdThe bare LayoutLMv3 Model transformer outputting raw hidden-states without any specific head on top.c                       e Zd ZdgZ fdZe ee           ee	e
          	 	 	 	 	 	 	 	 	 	 	 	 ddd                                    ZddZ xZS )TFLayoutLMv3Modelr   c                n     t                      j        |g|R i | t          |d          | _        d S )Nr  r  )r>   r?   r~  r  )rK   r0   r  rL   rN   s       rO   r?   zTFLayoutLMv3Model.__init__  sB    3&333F333/\JJJrP   output_typer  NFr   r   r   r   r   r   r   rQ   r   r  rl  rm  r   r   rS   rn  c                H    |                      |||||||||	|
||          }|S )a  
        Returns:

        Examples:

        ```python
        >>> from transformers import AutoProcessor, TFAutoModel
        >>> from datasets import load_dataset

        >>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False)
        >>> model = TFAutoModel.from_pretrained("microsoft/layoutlmv3-base")

        >>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
        >>> example = dataset[0]
        >>> image = example["image"]
        >>> words = example["tokens"]
        >>> boxes = example["bboxes"]

        >>> encoding = processor(image, words, boxes=boxes, return_tensors="tf")

        >>> outputs = model(**encoding)
        >>> last_hidden_states = outputs.last_hidden_state
        ```)r   r   r   r   r   r   r   rQ   r   rl  rm  r   )r  )rK   r   r   r   r   r   r   r   rQ   r   rl  rm  r   r   s                 rO   r\   zTFLayoutLMv3Model.call  sH    T //))%'%/!5# " 
 
 rP   c                    | j         rd S d| _         t          | dd           St          j        | j        j                  5  | j                            d            d d d            d S # 1 swxY w Y   d S d S )NTr  )r^   r_   rX   r`   r  r<   ra   rc   s     rO   ra   zTFLayoutLMv3Model.build   s    : 	F
4t,,8t344 , ,%%d+++, , , , , , , , , , , , , , , , , , 98s    A((A,/A,r  r  rg   )ri   rj   rk   "_keys_to_ignore_on_load_unexpectedr?   r   r   LAYOUTLMV3_INPUTS_DOCSTRINGr   r   _CONFIG_FOR_DOCr\   ra   rm   rn   s   @rO   r  r    s         +:):&K K K K K **+FGG+<?[[[ '+!%+/+/)-&**.)-)-,0#'6 6 6 6 \[ HG ]6p, , , , , , , ,rP   r  c                  6     e Zd ZdZd fdZdddZddZ xZS )TFLayoutLMv3ClassificationHeadz\
    Head for sentence-level classification tasks. Reference: RobertaClassificationHead
    r0   r   c                    t                      j        di | t          j                            |j        dt          |j                  d          | _        |j	        |j	        n|j
        }t          j                            |d          | _        t          j                            |j        t          |j                  d          | _        || _        d S )	Ntanhr   )
activationr;   r<   r   r  out_projr   r=   )r>   r?   r   rE   r   rG   r   rH   r   classifier_dropoutr   r   r   
num_labelsr  r0   )rK   r0   rL   r  rN   s       rO   r?   z'TFLayoutLMv3ClassificationHead.__init__.  s    ""6"""\''.v/GHH	 ( 
 

 *0)B)NF%%TZTn 	 |++ , 
 
 **.v/GHH + 
 

 rP   Fr  rR   r   r   rS   c                    |                      ||          }|                     |          }|                      ||          }|                     |          }|S )Nr   )r   r   r  )rK   r  r   r   s       rO   r\   z#TFLayoutLMv3ClassificationHead.callD  sS    ,,v,99**W%%,,w,::--((rP   Nc                   | j         rd S d| _         t          | dd           ]t          j        | j        j                  5  | j                            d d | j        j        g           d d d            n# 1 swxY w Y   t          | dd           Pt          j        | j	        j                  5  | j	                            d            d d d            n# 1 swxY w Y   t          | dd           `t          j        | j
        j                  5  | j
                            d d | j        j        g           d d d            d S # 1 swxY w Y   d S d S )NTr   r   r  )r^   r_   rX   r`   r   r<   ra   r0   rG   r   r  rc   s     rO   ra   z$TFLayoutLMv3ClassificationHead.buildK  s!   : 	F
4$''3tz// H H
  $dk.E!FGGGH H H H H H H H H H H H H H H4D))5t|011 ) )""4((() ) ) ) ) ) ) ) ) ) ) ) ) ) )4T**6t}122 K K##T41H$IJJJK K K K K K K K K K K K K K K K K K 76s6    (A44A8;A8.CCC(EEEre   r  )r  rR   r   r   rS   rR   rg   rh   rn   s   @rO   r  r  )  s|              ,    K K K K K K K KrP   r  a
  
    LayoutLMv3 Model with a sequence classification head on top (a linear layer on top of the final hidden state of the
    [CLS] token) e.g. for document image classification tasks such as the
    [RVL-CDIP](https://www.cs.cmu.edu/~aharley/rvl-cdip/) dataset.
    c                       e Zd ZdgZd fdZe ee           ee	e
          	 	 	 	 	 	 	 	 	 	 	 	 	 ddd                                    ZddZ xZS )%TFLayoutLMv3ForSequenceClassificationr   r0   r   c                     t                      j        |fi | || _        t          |d          | _        t          |d          | _        d S )Nr  r  
classifier)r>   r?   r0   r~  r  r  r  r   s      rO   r?   z.TFLayoutLMv3ForSequenceClassification.__init__f  sU    **6***/\JJJ8lSSSrP   r  NFr   r   r   r   r   r   labelsr   r  rl  rm  r   rQ   r   rS   TFSequenceClassifierOutput | tuple[tf.Tensor] | tuple[tf.Tensor, tf.Tensor] | tuple[tf.Tensor, tf.Tensor, tf.Tensor] | tuple[tf.Tensor, tf.Tensor, tf.Tensor, tf.Tensor]c                ^   |
|
n| j         j        }
|                     ||||||||	|
|||          }|d         dddddf         }|                     ||          }|dn|                     ||          }|
s|f|dd         z   }||f|z   n|S t          |||j        |j                  S )a  
        Returns:

        Examples:

        ```python
        >>> from transformers import AutoProcessor, TFAutoModelForSequenceClassification
        >>> from datasets import load_dataset
        >>> import tensorflow as tf

        >>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False)
        >>> model = TFAutoModelForSequenceClassification.from_pretrained("microsoft/layoutlmv3-base")

        >>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
        >>> example = dataset[0]
        >>> image = example["image"]
        >>> words = example["tokens"]
        >>> boxes = example["bboxes"]

        >>> encoding = processor(image, words, boxes=boxes, return_tensors="tf")
        >>> sequence_label = tf.convert_to_tensor([1])

        >>> outputs = model(**encoding, labels=sequence_label)
        >>> loss = outputs.loss
        >>> logits = outputs.logits
        ```Nr   r   r   r   r   r   rl  rm  r   rQ   r   r   r   r   losslogitsr   rr  )r0   use_return_dictr  r  hf_compute_lossr   r   rr  )rK   r   r   r   r   r   r   r  r   rl  rm  r   rQ   r   r   r  r  r  r  s                      rO   r\   z*TFLayoutLMv3ForSequenceClassification.calll  s   h &1%<kk$+B]//))%'/!5#% " 
 
 "!*QQQ111W-8DD~tt4+?+?+O+O 	FY,F)-)9TGf$$vE)!/)	
 
 
 	
rP   c                   | j         rd S d| _         t          | dd           Pt          j        | j        j                  5  | j                            d            d d d            n# 1 swxY w Y   t          | dd           St          j        | j        j                  5  | j                            d            d d d            d S # 1 swxY w Y   d S d S )NTr  r  )r^   r_   rX   r`   r  r<   ra   r  rc   s     rO   ra   z+TFLayoutLMv3ForSequenceClassification.build  S   : 	F
4t,,8t344 , ,%%d+++, , , , , , , , , , , , , , ,4t,,8t344 , ,%%d+++, , , , , , , , , , , , , , , , , , 98r  re   NNNNNNNNNNNNF)r   r   r   r   r   r   r   r   r   r   r   r   r  r   r   r  rl  r  rm  r  r   r   rQ   r   r   r  rS   r  rg   )ri   rj   rk   r  r?   r   r   r  r   r   r  r\   ra   rm   rn   s   @rO   r  r  Z  s         +:):&T T T T T T **+FGG+ETcddd '++/+/)-&**.#')-,0#'!%)- %O
 O
 O
 O
 ed HG ]O
b	, 	, 	, 	, 	, 	, 	, 	,rP   r  a  
    LayoutLMv3 Model with a token classification head on top (a linear layer on top of the final hidden states) e.g.
    for sequence labeling (information extraction) tasks such as [FUNSD](https://guillaumejaume.github.io/FUNSD/),
    [SROIE](https://rrc.cvc.uab.es/?ch=13), [CORD](https://github.com/clovaai/cord) and
    [Kleister-NDA](https://github.com/applicaai/kleister-nda).
    c                       e Zd ZdgZd fdZe ee           ee	e
          	 	 	 	 	 	 	 	 	 	 	 	 	 ddd                                    ZddZ xZS )"TFLayoutLMv3ForTokenClassificationr   r0   r   c                    t                      j        |fi | |j        | _        t          |d          | _        t
          j                            |j        d          | _	        |j        dk     r?t
          j        
                    |j        t          |j                  d          | _        nt          |d          | _        || _        d S )Nr  r  r   r   r  r   )r>   r?   r  r~  r  r   rE   r   r   r   r   r   rH   r  r  r0   r   s      rO   r?   z+TFLayoutLMv3ForTokenClassification.__init__  s    **6*** +/\JJJ|++F,FY+WWr!!#l00!#263K#L#L! 1  DOO =V,WWWDOrP   r  NFr   r   r   r   r   r   r   r  r   r  rl  rm  rQ   r   rS   TFTokenClassifierOutput | tuple[tf.Tensor] | tuple[tf.Tensor, tf.Tensor] | tuple[tf.Tensor, tf.Tensor, tf.Tensor] | tuple[tf.Tensor, tf.Tensor, tf.Tensor, tf.Tensor]c                   ||n| j         j        }|                     ||||||||	|
|||          }|t          j        |          }nt          j        |          dd         }|d         }|d         ddd|f         }|                     ||          }|                     |          }|dn|                     ||          }|s|f|dd         z   }||f|z   n|S t          |||j	        |j
                  S )aO  
        labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.

        Returns:

        Examples:

        ```python
        >>> from transformers import AutoProcessor, TFAutoModelForTokenClassification
        >>> from datasets import load_dataset

        >>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False)
        >>> model = TFAutoModelForTokenClassification.from_pretrained("microsoft/layoutlmv3-base", num_labels=7)

        >>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
        >>> example = dataset[0]
        >>> image = example["image"]
        >>> words = example["tokens"]
        >>> boxes = example["bboxes"]
        >>> word_labels = example["ner_tags"]

        >>> encoding = processor(image, words, boxes=boxes, word_labels=word_labels, return_tensors="tf")

        >>> outputs = model(**encoding)
        >>> loss = outputs.loss
        >>> logits = outputs.logits
        ```N)r   r   r   r   r   r   r   rl  rm  rQ   r   rW   r   r   r   r  )r0   r  r  rX   r   r   r  r  r	   r   rr  )rK   r   r   r   r   r   r   r   r  r   rl  rm  rQ   r   r   rd   r  r  r  r  r  s                        rO   r\   z'TFLayoutLMv3ForTokenClassification.call  sH   l &1%<kk$+B]//))%'/!5#% " 
 
  (9--KK(=11#2#6K ^
!!*QQQ^4,,,JJ11~tt4+?+?+O+O 	FY,F)-)9TGf$$vE&!/)	
 
 
 	
rP   c                   | j         rd S d| _         t          | dd           Pt          j        | j        j                  5  | j                            d            d d d            n# 1 swxY w Y   t          | dd           Pt          j        | j        j                  5  | j                            d            d d d            n# 1 swxY w Y   t          | dd           `t          j        | j        j                  5  | j                            d d | j	        j
        g           d d d            d S # 1 swxY w Y   d S d S )NTr  r   r  )r^   r_   rX   r`   r  r<   ra   r   r  r0   rG   rc   s     rO   ra   z(TFLayoutLMv3ForTokenClassification.buildG  s   : 	F
4t,,8t344 , ,%%d+++, , , , , , , , , , , , , , ,4D))5t|011 ) )""4((() ) ) ) ) ) ) ) ) ) ) ) ) ) )4t,,8t344 M M%%tT4;3J&KLLLM M M M M M M M M M M M M M M M M M 98s6    A''A+.A+!CCC(D77D;>D;re   r  )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r   r   r  rl  r  rm  r  rQ   r   r   r  rS   r  rg   )ri   rj   rk   r  r?   r   r   r  r   r	   r  r\   ra   rm   rn   s   @rO   r  r    s         +:):&       **+FGG+BQ`aaa '+!%+/+/)-&**.#')-,0#')- %Y
 Y
 Y
 Y
 ba HG ]Y
vM M M M M M M MrP   r  a  
    LayoutLMv3 Model with a span classification head on top for extractive question-answering tasks such as
    [DocVQA](https://rrc.cvc.uab.es/?ch=17) (a linear layer on top of the text part of the hidden-states output to
    compute `span start logits` and `span end logits`).
    c                       e Zd ZdgZd fdZe ee           ee	e
          	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddd                                    ZddZ xZS )  TFLayoutLMv3ForQuestionAnsweringr   r0   r   c                     t                      j        |fi | |j        | _        t          |d          | _        t          |d          | _        d S )Nr  r  
qa_outputs)r>   r?   r  r~  r  r  r  r   s      rO   r?   z)TFLayoutLMv3ForQuestionAnswering.__init__b  sX    **6*** +/\JJJ8lSSSrP   r  NFr   r   r   r   r   r   start_positionsend_positionsr   r  rl  r   rQ   rm  r   r   rS   TFQuestionAnsweringModelOutput | tuple[tf.Tensor] | tuple[tf.Tensor, tf.Tensor] | tuple[tf.Tensor, tf.Tensor, tf.Tensor] | tuple[tf.Tensor, tf.Tensor, tf.Tensor, tf.Tensor]c                   ||n| j         j        }|                     |||||||	|
||||          }|d         }|                     ||          }t	          j        |dd          \  }}t	          j        |d          }t	          j        |d          }d}| |||d	}|                     |||f
          }|s||f|dd         z   }||f|z   n|S t          ||||j	        |j
                  S )aS  
        start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*):
            Labels for position (index) of the start of the labelled span for computing the token classification loss.
            Positions are clamped to the length of the sequence (`sequence_length`). Position outside of the sequence
            are not taken into account for computing the loss.
        end_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*):
            Labels for position (index) of the end of the labelled span for computing the token classification loss.
            Positions are clamped to the length of the sequence (`sequence_length`). Position outside of the sequence
            are not taken into account for computing the loss.

        Returns:

        Examples:

        ```python
        >>> from transformers import AutoProcessor, TFAutoModelForQuestionAnswering
        >>> from datasets import load_dataset
        >>> import tensorflow as tf

        >>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False)
        >>> model = TFAutoModelForQuestionAnswering.from_pretrained("microsoft/layoutlmv3-base")

        >>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
        >>> example = dataset[0]
        >>> image = example["image"]
        >>> question = "what's his name?"
        >>> words = example["tokens"]
        >>> boxes = example["bboxes"]

        >>> encoding = processor(image, question, words, boxes=boxes, return_tensors="tf")
        >>> start_positions = tf.convert_to_tensor([1])
        >>> end_positions = tf.convert_to_tensor([3])

        >>> outputs = model(**encoding, start_positions=start_positions, end_positions=end_positions)
        >>> loss = outputs.loss
        >>> start_scores = outputs.start_logits
        >>> end_scores = outputs.end_logits
        ```Nr  r   r   r   rW   )r   num_or_size_splitsr   )inputr   )start_positionend_position)r  r   )r  start_logits
end_logitsr   rr  )r0   r  r  r  rX   splitsqueezer  r   r   rr  )rK   r   r   r   r   r   r   r  r  r   rl  r   rQ   rm  r   r   r  r  r  r  r  r  r  s                          rO   r\   z%TFLayoutLMv3ForQuestionAnswering.callj  sU   B &1%<kk$+B]//))%'/!5#% " 
 
 "!*8DD#%8&QUW#X#X#X jz2>>>Zjr:::
&=+D(7WWF''j7Q'RRD 	F"J/'!""+=F)-)9TGf$$vE-%!!/)
 
 
 	
rP   c                   | j         rd S d| _         t          | dd           Pt          j        | j        j                  5  | j                            d            d d d            n# 1 swxY w Y   t          | dd           St          j        | j        j                  5  | j                            d            d d d            d S # 1 swxY w Y   d S d S )NTr  r  )r^   r_   rX   r`   r  r<   ra   r  rc   s     rO   ra   z&TFLayoutLMv3ForQuestionAnswering.build  r  r  re   )NNNNNNNNNNNNNF)r   r   r   r   r   r   r   r   r   r   r   r   r  r   r  r   r   r  rl  r  r   r   rQ   r   rm  r  r   r   rS   r  rg   )ri   rj   rk   r  r?   r   r   r  r   r   r  r\   ra   rm   rn   s   @rO   r  r  V  s         +:):&T T T T T T **+FGG+IXghhh '++/+/)-&**.,0*.)-,0!%)-#'f
 f
 f
 f
 ih HG ]f
P	, 	, 	, 	, 	, 	, 	, 	,rP   r  )r  r  r  r  r  )8rl   
__future__r   rB   r   
tensorflowrX   activations_tfr   modeling_tf_outputsr   r   r   r	   modeling_tf_utilsr
   r   r   r   r   r   r   r   tf_utilsr   utilsr   r   r   configuration_layoutlmv3r   r  _DUMMY_INPUT_IDS_DUMMY_BBOXr  rE   Layerr/   rp   r   r   r  r   r)  r.  r9  r~  r  LAYOUTLMV3_START_DOCSTRINGr  r  r  r  r  r  __all__r=   rP   rO   <module>r     s     " " " " " "          / / / / / /           	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 7 6 6 6 6 6 k k k k k k k k k k 6 6 6 6 6 6 % IIII  \\<<<1''')9)9)9: 'N 'N 'N 'N 'N%,"4 'N 'N 'NT~7 ~7 ~7 ~7 ~7!3 ~7 ~7 ~7B|H |H |H |H |H 2 |H |H |H@L L L L LU\/ L L L<&- &- &- &- &-EL. &- &- &-TH H H H Hu|1 H H H<L L L L L+ L L L<-- -- -- -- --* -- -- --`e& e& e& e& e&%,, e& e& e&P J
 J
 J
 J
 J
EL. J
 J
 J
Z
    "3    ' RJ Z j I, I, I, I, I,3 I, I,	 I,X.K .K .K .K .KU\%7 .K .K .Kb 
  g, g, g, g, g,,GIe g, g, g,T   ~M ~M ~M ~M ~M)DF_ ~M ~M ~MB 
  @, @, @, @, @,'BD[ @, @, @,F  rP   