
     `i                     h   d Z ddlmZmZmZ ddlZddlmZ ddlmZm	Z	m
Z
 ddlmZ ddlmZ dd	lmZmZmZmZmZmZ dd
lmZmZ ddlmZmZmZ ddlmZmZm Z  ddl!m"Z"  e j#        e$          Z%ej&        Z' G d dej(                  Z)	 	 dAdej(        dej*        dej*        dej*        deej*                 de+de+deej*                 fdZ, G d dej(                  Z- G d dej(                  Z. G d d ej(                  Z/ G d! d"ej(                  Z0 G d# d$ej(                  Z1 G d% d&e          Z2 G d' d(ej(                  Z3 G d) d*ej(                  Z4 G d+ d,ej(                  Z5 G d- d.ej(                  Z6 G d/ d0ej(                  Z7e G d1 d2e                      Z8e G d3 d4e8                      Z9e G d5 d6e8                      Z: ed78           G d9 d:e8                      Z; ed;8           G d< d=e8                      Z<e G d> d?e8                      Z=g d@Z>dS )BzPyTorch LayoutLM model.    )CallableOptionalUnionN)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )ACT2FN)GradientCheckpointingLayer)BaseModelOutputBaseModelOutputWithPoolingMaskedLMOutputQuestionAnsweringModelOutputSequenceClassifierOutputTokenClassifierOutput)ALL_ATTENTION_FUNCTIONSPreTrainedModel)apply_chunking_to_forward find_pruneable_heads_and_indicesprune_linear_layer)auto_docstringcan_return_tuplelogging   )LayoutLMConfigc                   4     e Zd ZdZ fdZ	 	 	 	 	 ddZ xZS )LayoutLMEmbeddingszGConstruct the embeddings from word, position and token_type embeddings.c                 N   t                                                       t          j        |j        |j        |j                  | _        t          j        |j        |j                  | _	        t          j        |j
        |j                  | _        t          j        |j
        |j                  | _        t          j        |j
        |j                  | _        t          j        |j
        |j                  | _        t          j        |j        |j                  | _        t#          |j        |j                  | _        t          j        |j                  | _        |                     dt1          j        |j                                      d          d           d S )N)padding_idxepsposition_ids)r   F)
persistent)super__init__r   	Embedding
vocab_sizehidden_sizepad_token_idword_embeddingsmax_position_embeddingsposition_embeddingsmax_2d_position_embeddingsx_position_embeddingsy_position_embeddingsh_position_embeddingsw_position_embeddingstype_vocab_sizetoken_type_embeddingsLayoutLMLayerNormlayer_norm_eps	LayerNormDropouthidden_dropout_probdropoutregister_buffertorcharangeexpandselfconfig	__class__s     /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/models/layoutlm/modeling_layoutlm.pyr'   zLayoutLMEmbeddings.__init__0   sV   !|F,=v?Q_e_rsss#%<0NPVPb#c#c %'\&2SU[Ug%h%h"%'\&2SU[Ug%h%h"%'\&2SU[Ug%h%h"%'\&2SU[Ug%h%h"%'\&2H&J\%]%]"*6+=6CXYYYz&"<==EL)GHHOOPWXXej 	 	
 	
 	
 	
 	
    Nc                 :   ||                                 }n|                                 d d         }|d         }||j        n|j        }|| j        d d d |f         }|!t          j        |t          j        |          }||                     |          }|}	|                     |          }
	 |                     |d d d d df                   }| 	                    |d d d d df                   }|                     |d d d d df                   }| 	                    |d d d d df                   }n"# t          $ r}t          d          |d }~ww xY w|                     |d d d d df         |d d d d df         z
            }|                     |d d d d df         |d d d d df         z
            }|                     |          }|	|
z   |z   |z   |z   |z   |z   |z   |z   }|                     |          }|                     |          }|S )Nr$   r   dtypedevicer      r
   z:The `bbox`coordinate values should be within 0-1000 range.)sizerI   r#   r=   zeroslongr,   r.   r0   r1   
IndexErrorr2   r3   r5   r8   r;   )rA   	input_idsbboxtoken_type_idsr#   inputs_embedsinput_shape
seq_lengthrI   words_embeddingsr.   left_position_embeddingsupper_position_embeddingsright_position_embeddingslower_position_embeddingser2   r3   r5   
embeddingss                       rD   forwardzLayoutLMEmbeddings.forwardA   s     #..**KK',,..ss3K ^
%.%:!!@T,QQQ^<L!"[EJvVVVN  00;;M("66|DD	b'+'A'A$qqq!!!Qw-'P'P$(,(B(B4111a=(Q(Q%(,(B(B4111a=(Q(Q%(,(B(B4111a=(Q(Q%% 	b 	b 	bYZZ`aa	b !% : :4111a=4PQPQPQSTSTSTVWPW=;X Y Y $ : :4111a=4PQPQPQSTSTSTVWPW=;X Y Y $ : :> J J !"&' (( (	(
 (( $$ $$ $$ 	 ^^J//
\\*--
s   3BD< <
EEE)NNNNN)__name__
__module____qualname____doc__r'   r\   __classcell__rC   s   @rD   r   r   -   sc        QQ
 
 
 
 
& 5 5 5 5 5 5 5 5rE   r           modulequerykeyvalueattention_maskscalingr;   	head_maskc                 8   t          j        ||                    dd                    |z  }	|$|d d d d d d d |j        d         f         }
|	|
z   }	t          j                            |	dt           j                                      |j	                  }	t          j        
                    |	|| j                  }	||	|                    dddd          z  }	t          j        |	|          }|                    dd                                          }||	fS )NrJ   r
   r$   )dimrH   )ptrainingr   )r=   matmul	transposeshaper   
functionalsoftmaxfloat32torH   r;   ro   view
contiguous)rd   re   rf   rg   rh   ri   r;   rj   kwargsattn_weightscausal_maskattn_outputs               rD   eager_attention_forwardr}   z   s    <s}}Q':':;;gEL!$QQQ111o	"o%=>#k1=((2U](SSVVW\WbccL=((6?([[L#innQAq&A&AA,|U33K''1--88::K$$rE   c                        e Zd Z fdZ	 	 	 d
dej        deej                 deej                 dee         de	ej                 f
d	Z
 xZS )LayoutLMSelfAttentionc                    t                                                       |j        |j        z  dk    r0t	          |d          s t          d|j         d|j         d          || _        |j        | _        t          |j        |j        z            | _        | j        | j        z  | _	        t          j        |j        | j	                  | _        t          j        |j        | j	                  | _        t          j        |j        | j	                  | _        t          j        |j                  | _        |j        | _        | j        dz  | _        d S )Nr   embedding_sizezThe hidden size (z6) is not a multiple of the number of attention heads ()g      )r&   r'   r*   num_attention_headshasattr
ValueErrorrB   intattention_head_sizeall_head_sizer   Linearre   rf   rg   r9   attention_probs_dropout_probr;   attention_dropoutri   r@   s     rD   r'   zLayoutLMSelfAttention.__init__   s:    ::a??PVXhHiHi?8F$6 8 8 48 8 8  
 #)#= #&v'9F<V'V#W#W !58PPYv143EFF
9V/1CDDYv143EFF
z&"EFF!'!D/5rE   NFhidden_statesrh   rj   output_attentionsreturnc                    |j         d d         }g |d| j        R }|                     |                              |                              dd          }|                     |                              |                              dd          }	|                     |                              |                              dd          }
t          }| j        j	        dk    rt          | j        j	                 } || ||	|
|f| j        sdn| j        | j        |d|\  }} |j        g |dR                                  }|r||fn|f}|S )Nr$   r   rJ   eagerrc   )r;   ri   rj   )rr   r   re   rw   rq   rf   rg   r}   rB   _attn_implementationr   ro   r   ri   reshaperx   )rA   r   rh   rj   r   ry   rS   hidden_shapequery_states
key_statesvalue_statesattention_interfacer|   rz   outputss                  rD   r\   zLayoutLMSelfAttention.forward   s    $)#2#.CCbC$*BCCzz-0055lCCMMaQRSSXXm,,11,??II!QOO
zz-0055lCCMMaQRSS(?;+w66"9$+:Z"[$7$7
%
  $}HCC$2HL
%
 
%
 
%
 
%
!\ *k);;;;;;FFHH1BV;--rE   NNF)r]   r^   r_   r'   r=   Tensorr   FloatTensorbooltupler\   ra   rb   s   @rD   r   r      s        6 6 6 6 60 7;15,1! !|! !!23! E-.	!
 $D>! 
u|	! ! ! ! ! ! ! !rE   r   c                   P     e Zd Z fdZdej        dej        dej        fdZ xZS )LayoutLMSelfOutputc                    t                                                       t          j        |j        |j                  | _        t          j        |j        |j                  | _        t          j        |j	                  | _
        d S Nr!   )r&   r'   r   r   r*   denser8   r7   r9   r:   r;   r@   s     rD   r'   zLayoutLMSelfOutput.__init__   sf    Yv163EFF
f&8f>STTTz&"<==rE   r   input_tensorr   c                     |                      |          }|                     |          }|                     ||z             }|S Nr   r;   r8   rA   r   r   s      rD   r\   zLayoutLMSelfOutput.forward   @    

=11]33}|'CDDrE   r]   r^   r_   r'   r=   r   r\   ra   rb   s   @rD   r   r      i        > > > > >U\  RWR^        rE   r   c                        e Zd Z fdZd Z	 	 	 ddej        deej                 deej                 dee	         d	e
ej                 f
d
Z xZS )LayoutLMAttentionc                     t                                                       t          |          | _        t	          |          | _        t                      | _        d S r   )r&   r'   r   rA   r   outputsetpruned_headsr@   s     rD   r'   zLayoutLMAttention.__init__   sI    )&11	(00EErE   c                    t          |          dk    rd S t          || j        j        | j        j        | j                  \  }}t          | j        j        |          | j        _        t          | j        j        |          | j        _        t          | j        j	        |          | j        _	        t          | j
        j        |d          | j
        _        | j        j        t          |          z
  | j        _        | j        j        | j        j        z  | j        _        | j                            |          | _        d S )Nr   r   rm   )lenr   rA   r   r   r   r   re   rf   rg   r   r   r   union)rA   headsindexs      rD   prune_headszLayoutLMAttention.prune_heads   s    u::??F7490$)2OQUQb
 
u
 -TY_eDD	*49=%@@	,TY_eDD	.t{/@%QOOO )-	(EE

(R	%"&)"?$)B_"_	 -33E::rE   NFr   rh   rj   r   r   c                 ~     | j         |f|||d|}|                     |d         |          }|f|dd          z   }|S N)rh   rj   r   r   r   )rA   r   )	rA   r   rh   rj   r   ry   self_outputsattention_outputr   s	            rD   r\   zLayoutLMAttention.forward   sl     !ty
)/	
 

 
 
  ;;|AFF#%QRR(88rE   r   )r]   r^   r_   r'   r   r=   r   r   r   r   r   r\   ra   rb   s   @rD   r   r      s        " " " " "; ; ;* 7;15,1 | !!23 E-.	
 $D> 
u|	       rE   r   c                   B     e Zd Z fdZdej        dej        fdZ xZS )LayoutLMIntermediatec                    t                                                       t          j        |j        |j                  | _        t          |j        t                    rt          |j                 | _        d S |j        | _        d S r   )r&   r'   r   r   r*   intermediate_sizer   
isinstance
hidden_actstrr   intermediate_act_fnr@   s     rD   r'   zLayoutLMIntermediate.__init__  sn    Yv163KLL
f'-- 	9'-f.?'@D$$$'-'8D$$$rE   r   r   c                 Z    |                      |          }|                     |          }|S r   )r   r   rA   r   s     rD   r\   zLayoutLMIntermediate.forward  s,    

=1100??rE   r   rb   s   @rD   r   r     s^        9 9 9 9 9U\ el        rE   r   c                   P     e Zd Z fdZdej        dej        dej        fdZ xZS )LayoutLMOutputc                    t                                                       t          j        |j        |j                  | _        t          j        |j        |j                  | _        t          j	        |j
                  | _        d S r   )r&   r'   r   r   r   r*   r   r8   r7   r9   r:   r;   r@   s     rD   r'   zLayoutLMOutput.__init__  sf    Yv79KLL
f&8f>STTTz&"<==rE   r   r   r   c                     |                      |          }|                     |          }|                     ||z             }|S r   r   r   s      rD   r\   zLayoutLMOutput.forward%  r   rE   r   rb   s   @rD   r   r     r   rE   r   c                        e Zd Z fdZ	 	 	 ddej        deej                 deej                 dee         de	ej                 f
d	Z
d
 Z xZS )LayoutLMLayerc                     t                                                       |j        | _        d| _        t	          |          | _        t          |          | _        t          |          | _	        d S )Nr   )
r&   r'   chunk_size_feed_forwardseq_len_dimr   	attentionr   intermediater   r   r@   s     rD   r'   zLayoutLMLayer.__init__.  s^    '-'E$*622088$V,,rE   NFr   rh   rj   r   r   c                      | j         |f|||d|}|d         }|dd          }t          | j        | j        | j        |          }	|	f|z   }|S r   )r   r   feed_forward_chunkr   r   )
rA   r   rh   rj   r   ry   self_attention_outputsr   r   layer_outputs
             rD   r\   zLayoutLMLayer.forward6  s     "0"
)/	"
 "

 "
 "
 2!4(,0#T%A4CSUe
 
  /G+rE   c                 \    |                      |          }|                     ||          }|S r   )r   r   )rA   r   intermediate_outputr   s       rD   r   z LayoutLMLayer.feed_forward_chunkO  s2    "//0@AA{{#68HIIrE   r   )r]   r^   r_   r'   r=   r   r   r   r   r   r\   r   ra   rb   s   @rD   r   r   -  s        - - - - - 7;15,1 | !!23 E-.	
 $D> 
u|	   2      rE   r   c                        e Zd Z fdZe	 	 	 	 	 ddej        deej                 deej                 dee	         d	ee	         d
ee	         de
eej                 ef         fd            Z xZS )LayoutLMEncoderc                     t                                                       | _        t          j        fdt          j                  D                       | _        d| _        d S )Nc                 .    g | ]}t                    S  )r   ).0irB   s     rD   
<listcomp>z,LayoutLMEncoder.__init__.<locals>.<listcomp>Z  s!    #c#c#caM&$9$9#c#c#crE   F)	r&   r'   rB   r   
ModuleListrangenum_hidden_layerslayergradient_checkpointingr@   s    `rD   r'   zLayoutLMEncoder.__init__W  s`    ]#c#c#c#c5IaCbCb#c#c#cdd
&+###rE   NFTr   rh   rj   r   output_hidden_statesreturn_dictr   c           	          |rdnd }|rdnd }	t          | j                  D ]<\  }
}|r||fz   }|||
         nd } |d||||d|}|d         }|r|	|d         fz   }	=|r||fz   }t          |||	          S )Nr   )r   rh   rj   r   r   r   )last_hidden_stater   
attentions)	enumerater   r   )rA   r   rh   rj   r   r   r   ry   all_hidden_statesall_self_attentionsr   layer_modulelayer_head_masklayer_outputss                 rD   r\   zLayoutLMEncoder.forward]  s     #7@BBD$5?bb4(44 	P 	POA|# I$58H$H!.7.CillO(L +-)"3	 
  M *!,M  P&9]1=M<O&O# 	E 1]4D D++*
 
 
 	
rE   )NNFFT)r]   r^   r_   r'   r   r=   r   r   r   r   r   r   r   r\   ra   rb   s   @rD   r   r   V  s        , , , , ,  7;15,1/4&*&
 &
|&
 !!23&
 E-.	&

 $D>&
 'tn&
 d^&
 
uU\"O3	4&
 &
 &
 &
 &
 &
 &
 &
rE   r   c                   B     e Zd Z fdZdej        dej        fdZ xZS )LayoutLMPoolerc                     t                                                       t          j        |j        |j                  | _        t          j                    | _        d S r   )r&   r'   r   r   r*   r   Tanh
activationr@   s     rD   r'   zLayoutLMPooler.__init__  sC    Yv163EFF
'))rE   r   r   c                 r    |d d df         }|                      |          }|                     |          }|S )Nr   )r   r   )rA   r   first_token_tensorpooled_outputs       rD   r\   zLayoutLMPooler.forward  s@     +111a40

#56666rE   r   rb   s   @rD   r   r     s^        $ $ $ $ $
U\ el        rE   r   c                   B     e Zd Z fdZdej        dej        fdZ xZS )LayoutLMPredictionHeadTransformc                 V   t                                                       t          j        |j        |j                  | _        t          |j        t                    rt          |j                 | _
        n|j        | _
        t          j        |j        |j                  | _        d S r   )r&   r'   r   r   r*   r   r   r   r   r   transform_act_fnr8   r7   r@   s     rD   r'   z(LayoutLMPredictionHeadTransform.__init__  s    Yv163EFF
f'-- 	6$*6+<$=D!!$*$5D!f&8f>STTTrE   r   r   c                     |                      |          }|                     |          }|                     |          }|S r   )r   r   r8   r   s     rD   r\   z'LayoutLMPredictionHeadTransform.forward  s=    

=11--m<<}55rE   r   rb   s   @rD   r   r     sc        U U U U UU\ el        rE   r   c                   *     e Zd Z fdZd Zd Z xZS )LayoutLMLMPredictionHeadc                 >   t                                                       t          |          | _        t	          j        |j        |j        d          | _        t	          j	        t          j        |j                            | _        | j        | j        _        d S )NF)bias)r&   r'   r   	transformr   r   r*   r)   decoder	Parameterr=   rL   r   r@   s     rD   r'   z!LayoutLMLMPredictionHead.__init__  sz    8@@ y!3V5FUSSSLV->!?!?@@	 !IrE   c                 (    | j         | j        _         d S r   )r   r  rA   s    rD   _tie_weightsz%LayoutLMLMPredictionHead._tie_weights  s     IrE   c                 Z    |                      |          }|                     |          }|S r   )r   r  r   s     rD   r\   z LayoutLMLMPredictionHead.forward  s*    }55]33rE   )r]   r^   r_   r'   r  r\   ra   rb   s   @rD   r   r     sV        & & & & && & &      rE   r   c                   B     e Zd Z fdZdej        dej        fdZ xZS )LayoutLMOnlyMLMHeadc                 p    t                                                       t          |          | _        d S r   )r&   r'   r   predictionsr@   s     rD   r'   zLayoutLMOnlyMLMHead.__init__  s/    3F;;rE   sequence_outputr   c                 0    |                      |          }|S r   )r
  )rA   r  prediction_scoress      rD   r\   zLayoutLMOnlyMLMHead.forward  s     ,,_==  rE   r   rb   s   @rD   r  r    s^        < < < < <!u| ! ! ! ! ! ! ! ! !rE   r  c                   (    e Zd ZU eed<   dZdZd ZdS )LayoutLMPreTrainedModelrB   layoutlmTc                    t          |t          j                  rT|j        j                            d| j        j                   |j         |j        j        	                                 dS dS t          |t          j
                  r_|j        j                            d| j        j                   |j        +|j        j        |j                 	                                 dS dS t          |t                    r?|j        j        	                                 |j        j                            d           dS t          |t                    r |j        j        	                                 dS dS )zInitialize the weightsrc   )meanstdN      ?)r   r   r   weightdatanormal_rB   initializer_ranger   zero_r(   r    r6   fill_r   )rA   rd   s     rD   _init_weightsz%LayoutLMPreTrainedModel._init_weights  sX   fbi(( 	% M&&CT[5R&SSS{& &&((((( '&-- 	%M&&CT[5R&SSS!-"6#56<<>>>>> .- 122 	%K""$$$M$$S))))) 899 	%K""$$$$$	% 	%rE   N)r]   r^   r_   r   __annotations__base_model_prefixsupports_gradient_checkpointingr  r   rE   rD   r  r    s=         "&*#% % % % %rE   r  c                   h    e Zd Z fdZd Zd Zd Zee	 	 	 	 	 	 	 	 	 	 dde	e
j                 de	e
j                 de	e
j                 d	e	e
j                 d
e	e
j                 de	e
j                 de	e
j                 de	e         de	e         de	e         deeef         fd                        Z xZS )LayoutLMModelc                     t                                          |           || _        t          |          | _        t          |          | _        t          |          | _        | 	                                 d S r   )
r&   r'   rB   r   r[   r   encoderr   pooler	post_initr@   s     rD   r'   zLayoutLMModel.__init__  sg       ,V44&v..$V,, 	rE   c                     | j         j        S r   r[   r,   r  s    rD   get_input_embeddingsz"LayoutLMModel.get_input_embeddings  s    ..rE   c                     || j         _        d S r   r&  )rA   rg   s     rD   set_input_embeddingsz"LayoutLMModel.set_input_embeddings  s    */'''rE   c                     |                                 D ]/\  }}| j        j        |         j                            |           0dS )z
        Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
        class PreTrainedModel
        N)itemsr"  r   r   r   )rA   heads_to_pruner   r   s       rD   _prune_headszLayoutLMModel._prune_heads  sU    
 +0022 	C 	CLE5Lu%/;;EBBBB	C 	CrE   NrO   rP   rh   rQ   r#   rj   rR   r   r   r   r   c                 "   ||n| j         j        }|	|	n| j         j        }	|
|
n| j         j        }
||t	          d          |+|                     ||           |                                }n.||                                dd         }nt	          d          ||j        n|j        }|t          j	        ||          }|!t          j
        |t          j        |          }|$t          j
        |dz   t          j        |          }|                    d                              d	          }|                    | j        
          }d|z
  t          j        | j                  j        z  }||                                dk    rr|                    d                              d                              d                              d          }|                    | j         j        dddd          }nS|                                d	k    r;|                    d                              d                              d          }|                    t)          |                                           j        
          }ndg| j         j        z  }|                     |||||          }|                     |||||	d          }|d         }|                     |          }t3          |||j        |j                  S )a  
        bbox (`torch.LongTensor` of shape `(batch_size, sequence_length, 4)`, *optional*):
            Bounding boxes of each input sequence tokens. Selected in the range `[0,
            config.max_2d_position_embeddings-1]`. Each bounding box should be a normalized version in (x0, y0, x1, y1)
            format, where (x0, y0) corresponds to the position of the upper left corner in the bounding box, and (x1,
            y1) represents the position of the lower right corner. See [Overview](#Overview) for normalization.

        Examples:

        ```python
        >>> from transformers import AutoTokenizer, LayoutLMModel
        >>> import torch

        >>> tokenizer = AutoTokenizer.from_pretrained("microsoft/layoutlm-base-uncased")
        >>> model = LayoutLMModel.from_pretrained("microsoft/layoutlm-base-uncased")

        >>> words = ["Hello", "world"]
        >>> normalized_word_boxes = [637, 773, 693, 782], [698, 773, 733, 782]

        >>> token_boxes = []
        >>> for word, box in zip(words, normalized_word_boxes):
        ...     word_tokens = tokenizer.tokenize(word)
        ...     token_boxes.extend([box] * len(word_tokens))
        >>> # add bounding boxes of cls + sep tokens
        >>> token_boxes = [[0, 0, 0, 0]] + token_boxes + [[1000, 1000, 1000, 1000]]

        >>> encoding = tokenizer(" ".join(words), return_tensors="pt")
        >>> input_ids = encoding["input_ids"]
        >>> attention_mask = encoding["attention_mask"]
        >>> token_type_ids = encoding["token_type_ids"]
        >>> bbox = torch.tensor([token_boxes])

        >>> outputs = model(
        ...     input_ids=input_ids, bbox=bbox, attention_mask=attention_mask, token_type_ids=token_type_ids
        ... )

        >>> last_hidden_states = outputs.last_hidden_state
        ```NzDYou cannot specify both input_ids and inputs_embeds at the same timer$   z5You have to specify either input_ids or inputs_embeds)rI   rG   )   r   rJ   )rH   r  r   )rO   rP   r#   rQ   rR   T)rj   r   r   r   )r   pooler_outputr   r   )rB   r   r   use_return_dictr   %warn_if_padding_and_no_attention_maskrK   rI   r=   onesrL   rM   	unsqueezerv   rH   finfominrm   r?   r   next
parametersr[   r"  r#  r   r   r   )rA   rO   rP   rh   rQ   r#   rj   rR   r   r   r   rS   rI   extended_attention_maskembedding_outputencoder_outputsr  r   s                     rD   r\   zLayoutLMModel.forward   s"   j 2C1N--TXT_Tq$8$D  $+Jj 	 &1%<kk$+B] ]%>cddd"66y.QQQ#..**KK&',,..ss3KKTUUU%.%:!!@T!"ZFCCCN!"[EJvVVVN<;{T1FSSSD"0":":1"="="G"G"J"J"9"<"<4:"<"N"N#&)@#@EKPTPZD[D[D_"_ }}!##%//22<<Q??II"MMWWXZ[[	%,,T[-JBPRTVXZ[[		A%%%//22<<R@@JJ2NN	!40A0A+B+B+HIIII!>>I??%)' + 
 
 ,,#/!5 ' 
 
 *!,O44)-')7&1	
 
 
 	
rE   )
NNNNNNNNNN)r]   r^   r_   r'   r'  r)  r-  r   r   r   r=   
LongTensorr   r   r   r   r   r\   ra   rb   s   @rD   r   r     s       	 	 	 	 	/ / /0 0 0C C C  15+/6:59371559,0/3&*s
 s
E,-s
 u'(s
 !!23	s

 !!12s
 u/0s
 E-.s
   12s
 $D>s
 'tns
 d^s
 
u00	1s
 s
 s
 ^ s
 s
 s
 s
 s
rE   r   c                       e Zd ZddgZ fdZd Zd Zd Zee		 	 	 	 	 	 	 	 	 	 	 dde
ej                 d	e
ej                 d
e
ej                 de
ej                 de
ej                 de
ej                 de
ej                 de
ej                 de
e         de
e         de
e         deeef         fd                        Z xZS )LayoutLMForMaskedLMzcls.predictions.decoder.biaszcls.predictions.decoder.weightc                     t                                          |           t          |          | _        t	          |          | _        |                                  d S r   )r&   r'   r   r  r  clsr$  r@   s     rD   r'   zLayoutLMForMaskedLM.__init__|  sQ       %f--&v.. 	rE   c                 $    | j         j        j        S r   r  r[   r,   r  s    rD   r'  z(LayoutLMForMaskedLM.get_input_embeddings      }'77rE   c                 $    | j         j        j        S r   )r@  r
  r  r  s    rD   get_output_embeddingsz)LayoutLMForMaskedLM.get_output_embeddings  s    x#++rE   c                 T    || j         j        _        |j        | j         j        _        d S r   )r@  r
  r  r   )rA   new_embeddingss     rD   set_output_embeddingsz)LayoutLMForMaskedLM.set_output_embeddings  s%    '5$$2$7!!!rE   NrO   rP   rh   rQ   r#   rj   rR   labelsr   r   r   r   c                 r   ||n| j         j        }|                     ||||||||	|
d
  
        }|d         }|                     |          }d}|Kt	                      } ||                    d| j         j                  |                    d                    }t          |||j        |j	                  S )a2	  
        bbox (`torch.LongTensor` of shape `(batch_size, sequence_length, 4)`, *optional*):
            Bounding boxes of each input sequence tokens. Selected in the range `[0,
            config.max_2d_position_embeddings-1]`. Each bounding box should be a normalized version in (x0, y0, x1, y1)
            format, where (x0, y0) corresponds to the position of the upper left corner in the bounding box, and (x1,
            y1) represents the position of the lower right corner. See [Overview](#Overview) for normalization.
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
            config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the
            loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`

        Examples:

        ```python
        >>> from transformers import AutoTokenizer, LayoutLMForMaskedLM
        >>> import torch

        >>> tokenizer = AutoTokenizer.from_pretrained("microsoft/layoutlm-base-uncased")
        >>> model = LayoutLMForMaskedLM.from_pretrained("microsoft/layoutlm-base-uncased")

        >>> words = ["Hello", "[MASK]"]
        >>> normalized_word_boxes = [637, 773, 693, 782], [698, 773, 733, 782]

        >>> token_boxes = []
        >>> for word, box in zip(words, normalized_word_boxes):
        ...     word_tokens = tokenizer.tokenize(word)
        ...     token_boxes.extend([box] * len(word_tokens))
        >>> # add bounding boxes of cls + sep tokens
        >>> token_boxes = [[0, 0, 0, 0]] + token_boxes + [[1000, 1000, 1000, 1000]]

        >>> encoding = tokenizer(" ".join(words), return_tensors="pt")
        >>> input_ids = encoding["input_ids"]
        >>> attention_mask = encoding["attention_mask"]
        >>> token_type_ids = encoding["token_type_ids"]
        >>> bbox = torch.tensor([token_boxes])

        >>> labels = tokenizer("Hello world", return_tensors="pt")["input_ids"]

        >>> outputs = model(
        ...     input_ids=input_ids,
        ...     bbox=bbox,
        ...     attention_mask=attention_mask,
        ...     token_type_ids=token_type_ids,
        ...     labels=labels,
        ... )

        >>> loss = outputs.loss
        ```NT)rh   rQ   r#   rj   rR   r   r   r   r   r$   losslogitsr   r   )
rB   r1  r  r@  r   rw   r)   r   r   r   )rA   rO   rP   rh   rQ   r#   rj   rR   rI  r   r   r   r   r  r  masked_lm_lossloss_fcts                    rD   r\   zLayoutLMForMaskedLM.forward  s    @ &1%<kk$+B]--))%'/!5   
 
 "!* HH_55'))H%X!&&r4;+ABBB N
 $!/)	
 
 
 	
rE   NNNNNNNNNNN)r]   r^   r_   _tied_weights_keysr'   r'  rE  rH  r   r   r   r=   r<  r   r   r   r   r   r\   ra   rb   s   @rD   r>  r>  x  s       8:Z[    8 8 8, , ,8 8 8  15+/6:59371559-1,0/3&*]
 ]
E,-]
 u'(]
 !!23	]

 !!12]
 u/0]
 E-.]
   12]
 )*]
 $D>]
 'tn]
 d^]
 
un$	%]
 ]
 ]
 ^ ]
 ]
 ]
 ]
 ]
rE   r>  z
    LayoutLM Model with a sequence classification head on top (a linear layer on top of the pooled output) e.g. for
    document image classification tasks such as the [RVL-CDIP](https://www.cs.cmu.edu/~aharley/rvl-cdip/) dataset.
    )custom_introc                   x    e Zd Z fdZd Zee	 	 	 	 	 	 	 	 	 	 	 ddeej	                 deej	                 deej
                 deej	                 deej	                 d	eej
                 d
eej
                 deej	                 dee         dee         dee         deeef         fd                        Z xZS )!LayoutLMForSequenceClassificationc                 6   t                                          |           |j        | _        t          |          | _        t          j        |j                  | _        t          j	        |j
        |j                  | _        |                                  d S r   r&   r'   
num_labelsr   r  r   r9   r:   r;   r   r*   
classifierr$  r@   s     rD   r'   z*LayoutLMForSequenceClassification.__init__  y        +%f--z&"<==)F$68IJJ 	rE   c                 $    | j         j        j        S r   rB  r  s    rD   r'  z6LayoutLMForSequenceClassification.get_input_embeddings  rC  rE   NrO   rP   rh   rQ   r#   rj   rR   rI  r   r   r   r   c                    ||n| j         j        }|                     ||||||||	|
d
  
        }|d         }|                     |          }|                     |          }d}|Z| j         j        f| j        dk    rd| j         _        nN| j        dk    r7|j        t          j	        k    s|j        t          j
        k    rd| j         _        nd| j         _        | j         j        dk    rWt                      }| j        dk    r1 ||                                |                                          }n |||          }n| j         j        dk    rGt                      } ||                    d| j                  |                    d                    }n*| j         j        dk    rt                      } |||          }t!          |||j        |j        	          S )
aB	  
        bbox (`torch.LongTensor` of shape `(batch_size, sequence_length, 4)`, *optional*):
            Bounding boxes of each input sequence tokens. Selected in the range `[0,
            config.max_2d_position_embeddings-1]`. Each bounding box should be a normalized version in (x0, y0, x1, y1)
            format, where (x0, y0) corresponds to the position of the upper left corner in the bounding box, and (x1,
            y1) represents the position of the lower right corner. See [Overview](#Overview) for normalization.
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).

        Examples:

        ```python
        >>> from transformers import AutoTokenizer, LayoutLMForSequenceClassification
        >>> import torch

        >>> tokenizer = AutoTokenizer.from_pretrained("microsoft/layoutlm-base-uncased")
        >>> model = LayoutLMForSequenceClassification.from_pretrained("microsoft/layoutlm-base-uncased")

        >>> words = ["Hello", "world"]
        >>> normalized_word_boxes = [637, 773, 693, 782], [698, 773, 733, 782]

        >>> token_boxes = []
        >>> for word, box in zip(words, normalized_word_boxes):
        ...     word_tokens = tokenizer.tokenize(word)
        ...     token_boxes.extend([box] * len(word_tokens))
        >>> # add bounding boxes of cls + sep tokens
        >>> token_boxes = [[0, 0, 0, 0]] + token_boxes + [[1000, 1000, 1000, 1000]]

        >>> encoding = tokenizer(" ".join(words), return_tensors="pt")
        >>> input_ids = encoding["input_ids"]
        >>> attention_mask = encoding["attention_mask"]
        >>> token_type_ids = encoding["token_type_ids"]
        >>> bbox = torch.tensor([token_boxes])
        >>> sequence_label = torch.tensor([1])

        >>> outputs = model(
        ...     input_ids=input_ids,
        ...     bbox=bbox,
        ...     attention_mask=attention_mask,
        ...     token_type_ids=token_type_ids,
        ...     labels=sequence_label,
        ... )

        >>> loss = outputs.loss
        >>> logits = outputs.logits
        ```NT
rO   rP   rh   rQ   r#   rj   rR   r   r   r   r   
regressionsingle_label_classificationmulti_label_classificationr$   rK  )rB   r1  r  r;   rX  problem_typerW  rH   r=   rM   r   r	   squeezer   rw   r   r   r   r   )rA   rO   rP   rh   rQ   r#   rj   rR   rI  r   r   r   r   r   rM  rL  rO  s                    rD   r\   z)LayoutLMForSequenceClassification.forward  s   @ &1%<kk$+B]--))%'/!5   
 
  
]33//{'/?a''/;DK,,_q((flej.H.HFL\a\eLeLe/LDK,,/KDK,{'<77"99?a''#8FNN$4$4fnn6F6FGGDD#8FF33DD)-JJJ+--xB @ @&++b//RR)-III,..x//'!/)	
 
 
 	
rE   rP  )r]   r^   r_   r'   r'  r   r   r   r=   r<  r   r   r   r   r   r\   ra   rb   s   @rD   rT  rT    s|           8 8 8  15+/6:59371559-1,0/3&*n
 n
E,-n
 u'(n
 !!23	n

 !!12n
 u/0n
 E-.n
   12n
 )*n
 $D>n
 'tnn
 d^n
 
u..	/n
 n
 n
 ^ n
 n
 n
 n
 n
rE   rT  a3  
    LayoutLM Model with a token classification head on top (a linear layer on top of the hidden-states output) e.g. for
    sequence labeling (information extraction) tasks such as the [FUNSD](https://guillaumejaume.github.io/FUNSD/)
    dataset and the [SROIE](https://rrc.cvc.uab.es/?ch=13) dataset.
    c                   x    e Zd Z fdZd Zee	 	 	 	 	 	 	 	 	 	 	 ddeej	                 deej	                 deej
                 deej	                 deej	                 d	eej
                 d
eej
                 deej	                 dee         dee         dee         deeef         fd                        Z xZS )LayoutLMForTokenClassificationc                 6   t                                          |           |j        | _        t          |          | _        t          j        |j                  | _        t          j	        |j
        |j                  | _        |                                  d S r   rV  r@   s     rD   r'   z'LayoutLMForTokenClassification.__init__  rY  rE   c                 $    | j         j        j        S r   rB  r  s    rD   r'  z3LayoutLMForTokenClassification.get_input_embeddings  rC  rE   NrO   rP   rh   rQ   r#   rj   rR   rI  r   r   r   r   c                    ||n| j         j        }|                     ||||||||	|
d
  
        }|d         }|                     |          }|                     |          }d}|Ft                      } ||                    d| j                  |                    d                    }t          |||j	        |j
                  S )a  
        bbox (`torch.LongTensor` of shape `(batch_size, sequence_length, 4)`, *optional*):
            Bounding boxes of each input sequence tokens. Selected in the range `[0,
            config.max_2d_position_embeddings-1]`. Each bounding box should be a normalized version in (x0, y0, x1, y1)
            format, where (x0, y0) corresponds to the position of the upper left corner in the bounding box, and (x1,
            y1) represents the position of the lower right corner. See [Overview](#Overview) for normalization.
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.

        Examples:

        ```python
        >>> from transformers import AutoTokenizer, LayoutLMForTokenClassification
        >>> import torch

        >>> tokenizer = AutoTokenizer.from_pretrained("microsoft/layoutlm-base-uncased")
        >>> model = LayoutLMForTokenClassification.from_pretrained("microsoft/layoutlm-base-uncased")

        >>> words = ["Hello", "world"]
        >>> normalized_word_boxes = [637, 773, 693, 782], [698, 773, 733, 782]

        >>> token_boxes = []
        >>> for word, box in zip(words, normalized_word_boxes):
        ...     word_tokens = tokenizer.tokenize(word)
        ...     token_boxes.extend([box] * len(word_tokens))
        >>> # add bounding boxes of cls + sep tokens
        >>> token_boxes = [[0, 0, 0, 0]] + token_boxes + [[1000, 1000, 1000, 1000]]

        >>> encoding = tokenizer(" ".join(words), return_tensors="pt")
        >>> input_ids = encoding["input_ids"]
        >>> attention_mask = encoding["attention_mask"]
        >>> token_type_ids = encoding["token_type_ids"]
        >>> bbox = torch.tensor([token_boxes])
        >>> token_labels = torch.tensor([1, 1, 0, 0]).unsqueeze(0)  # batch size of 1

        >>> outputs = model(
        ...     input_ids=input_ids,
        ...     bbox=bbox,
        ...     attention_mask=attention_mask,
        ...     token_type_ids=token_type_ids,
        ...     labels=token_labels,
        ... )

        >>> loss = outputs.loss
        >>> logits = outputs.logits
        ```NTr\  r   r$   rK  )rB   r1  r  r;   rX  r   rw   rW  r   r   r   )rA   rO   rP   rh   rQ   r#   rj   rR   rI  r   r   r   r   r  rM  rL  rO  s                    rD   r\   z&LayoutLMForTokenClassification.forward  s    | &1%<kk$+B]--))%'/!5   
 
 "!*,,7711'))H8FKKDO<<fkk"ooNND$!/)	
 
 
 	
rE   rP  )r]   r^   r_   r'   r'  r   r   r   r=   r<  r   r   r   r   r   r\   ra   rb   s   @rD   rc  rc  x  s|           8 8 8  15+/6:59371559-1,0/3&*Z
 Z
E,-Z
 u'(Z
 !!23	Z

 !!12Z
 u/0Z
 E-.Z
   12Z
 )*Z
 $D>Z
 'tnZ
 d^Z
 
u++	,Z
 Z
 Z
 ^ Z
 Z
 Z
 Z
 Z
rE   rc  c                       e Zd Zd fd	Zd Zee	 	 	 	 	 	 	 	 	 	 	 	 ddeej	                 deej	                 deej
                 deej	                 d	eej	                 d
eej
                 deej
                 deej	                 deej	                 dee         dee         dee         deeef         fd                        Z xZS )LayoutLMForQuestionAnsweringTc                     t                                          |           |j        | _        t          |          | _        t          j        |j        |j                  | _        | 	                                 dS )z
        has_visual_segment_embedding (`bool`, *optional*, defaults to `True`):
            Whether or not to add visual segment embeddings.
        N)
r&   r'   rW  r   r  r   r   r*   
qa_outputsr$  )rA   rB   has_visual_segment_embeddingrC   s      rD   r'   z%LayoutLMForQuestionAnswering.__init__  sg    
 	    +%f--)F$68IJJ 	rE   c                 $    | j         j        j        S r   rB  r  s    rD   r'  z1LayoutLMForQuestionAnswering.get_input_embeddings  rC  rE   NrO   rP   rh   rQ   r#   rj   rR   start_positionsend_positionsr   r   r   r   c                 |   ||n| j         j        }|                     ||||||||
|d
  
        }|d         }|                     |          }|                    dd          \  }}|                    d                                          }|                    d                                          }d}||	t          |                                          dk    r|                    d          }t          |	                                          dk    r|	                    d          }	|                    d          }|	                    d|          }|		                    d|          }	t          |          } |||          } |||	          }||z   d	z  }t          ||||j        |j        
          S )a4	  
        bbox (`torch.LongTensor` of shape `(batch_size, sequence_length, 4)`, *optional*):
            Bounding boxes of each input sequence tokens. Selected in the range `[0,
            config.max_2d_position_embeddings-1]`. Each bounding box should be a normalized version in (x0, y0, x1, y1)
            format, where (x0, y0) corresponds to the position of the upper left corner in the bounding box, and (x1,
            y1) represents the position of the lower right corner. See [Overview](#Overview) for normalization.

        Example:

        In the example below, we prepare a question + context pair for the LayoutLM model. It will give us a prediction
        of what it thinks the answer is (the span of the answer within the texts parsed from the image).

        ```python
        >>> from transformers import AutoTokenizer, LayoutLMForQuestionAnswering
        >>> from datasets import load_dataset
        >>> import torch

        >>> tokenizer = AutoTokenizer.from_pretrained("impira/layoutlm-document-qa", add_prefix_space=True)
        >>> model = LayoutLMForQuestionAnswering.from_pretrained("impira/layoutlm-document-qa", revision="1e3ebac")

        >>> dataset = load_dataset("nielsr/funsd", split="train")
        >>> example = dataset[0]
        >>> question = "what's his name?"
        >>> words = example["words"]
        >>> boxes = example["bboxes"]

        >>> encoding = tokenizer(
        ...     question.split(), words, is_split_into_words=True, return_token_type_ids=True, return_tensors="pt"
        ... )
        >>> bbox = []
        >>> for i, s, w in zip(encoding.input_ids[0], encoding.sequence_ids(0), encoding.word_ids(0)):
        ...     if s == 1:
        ...         bbox.append(boxes[w])
        ...     elif i == tokenizer.sep_token_id:
        ...         bbox.append([1000] * 4)
        ...     else:
        ...         bbox.append([0] * 4)
        >>> encoding["bbox"] = torch.tensor([bbox])

        >>> word_ids = encoding.word_ids(0)
        >>> outputs = model(**encoding)
        >>> loss = outputs.loss
        >>> start_scores = outputs.start_logits
        >>> end_scores = outputs.end_logits
        >>> start, end = word_ids[start_scores.argmax(-1)], word_ids[end_scores.argmax(-1)]
        >>> print(" ".join(words[start : end + 1]))
        M. Hamann P. Harper, P. Martinez
        ```NTr\  r   r   r$   r   )ignore_indexrJ   )rL  start_logits
end_logitsr   r   )rB   r1  r  rj  splitra  rx   r   rK   clampr   r   r   r   )rA   rO   rP   rh   rQ   r#   rj   rR   rm  rn  r   r   r   r   r  rM  rq  rr  
total_lossignored_indexrO  
start_lossend_losss                          rD   r\   z$LayoutLMForQuestionAnswering.forward  s   D &1%<kk$+B]--))%'/!5   
 
 "!*11#)<<r<#:#: j#++B//::<<''++6688

&=+D?''))**Q.."1"9"9""="==%%''((1,, - 5 5b 9 9(--a00M-33A}EEO)//=AAM']CCCH!,@@Jx
M::H$x/14J+%!!/)
 
 
 	
rE   )T)NNNNNNNNNNNN)r]   r^   r_   r'   r'  r   r   r   r=   r<  r   r   r   r   r   r\   ra   rb   s   @rD   rh  rh    s            8 8 8  15+/6:593715596:48,0/3&*m
 m
E,-m
 u'(m
 !!23	m

 !!12m
 u/0m
 E-.m
   12m
 "%"23m
   01m
 $D>m
 'tnm
 d^m
 
u22	3m
 m
 m
 ^ m
 m
 m
 m
 m
rE   rh  )r>  rT  rc  rh  r   r  )rc   N)?r`   typingr   r   r   r=   r   torch.nnr   r   r	   activationsr   modeling_layersr   modeling_outputsr   r   r   r   r   r   modeling_utilsr   r   pytorch_utilsr   r   r   utilsr   r   r   configuration_layoutlmr   
get_loggerr]   loggerr8   r6   Moduler   r   floatr}   r   r   r   r   r   r   r   r   r   r   r  r  r   r>  rT  rc  rh  __all__r   rE   rD   <module>r     s     , , , , , , , , , ,        A A A A A A A A A A ! ! ! ! ! ! 9 9 9 9 9 9                G F F F F F F F l l l l l l l l l l > > > > > > > > > > 2 2 2 2 2 2 
	H	%	% L I I I I I I I Ih (,% %I%<% 
% <	%
 U\*% % % %% % % %87 7 7 7 7BI 7 7 7v       * * * * *	 * * *\    29        RY   % % % % %. % % %R.
 .
 .
 .
 .
bi .
 .
 .
d    RY        bi   $    ry   0! ! ! ! !") ! ! ! % % % % %o % % %0 O
 O
 O
 O
 O
+ O
 O
 O
d u
 u
 u
 u
 u
1 u
 u
 u
p   ~
 ~
 ~
 ~
 ~
(? ~
 ~
 ~
B   j
 j
 j
 j
 j
%< j
 j
 j
Z A
 A
 A
 A
 A
#: A
 A
 A
H  rE   