
     `io                     d   d Z ddlZddlmZmZmZ ddlZddlmZ ddlm	Z	m
Z
mZ ddlmZ ddlmZ dd	lmZmZmZmZmZmZ dd
lmZmZ ddlmZmZmZ ddlmZm Z m!Z! ddl"m#Z#  e!j$        e%          Z& G d dej'                  Z(dBdZ) G d dej'                  Z* G d dej'                  Z+ G d dej'                  Z, G d dej'                  Z- G d dej'                  Z. G d dej'                  Z/ G d dej'                  Z0 G d  d!ej'                  Z1	 	 dCd#ej'        d$ej2        d%ej2        d&ej2        d'eej2                 d(e3d)e3d*eej2                 fd+Z4 G d, d-ej'                  Z5 G d. d/ej'                  Z6 G d0 d1e          Z7 G d2 d3ej'                  Z8e G d4 d5e                      Z9e G d6 d7e9                      Z:e G d8 d9e9                      Z; ed:;           G d< d=e9                      Z< ed>;           G d? d@e9                      Z=g dAZ>dS )DzPyTorch MarkupLM model.    N)CallableOptionalUnion)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )ACT2FN)GradientCheckpointingLayer)BaseModelOutputBaseModelOutputWithPoolingMaskedLMOutputQuestionAnsweringModelOutputSequenceClassifierOutputTokenClassifierOutput)ALL_ATTENTION_FUNCTIONSPreTrainedModel)apply_chunking_to_forward find_pruneable_heads_and_indicesprune_linear_layer)auto_docstringcan_return_tuplelogging   )MarkupLMConfigc                   *     e Zd ZdZ fdZddZ xZS )XPathEmbeddingszConstruct the embeddings from xpath tags and subscripts.

    We drop tree-id in this version, as its info can be covered by xpath.
    c                    t                                                       j        | _        t          j        j        | j        z  j                  | _        t          j        j	                  | _
        t          j                    | _        t          j        j        | j        z  dj        z            | _        t          j        dj        z  j                  | _        t          j        fdt!          | j                  D                       | _        t          j        fdt!          | j                  D                       | _        d S )N   c                 N    g | ]!}t          j        j        j                  "S  )r   	Embeddingmax_xpath_tag_unit_embeddingsxpath_unit_hidden_size.0_configs     /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/models/markuplm/modeling_markuplm.py
<listcomp>z,XPathEmbeddings.__init__.<locals>.<listcomp>>   s;        VA6C`aa      c                 N    g | ]!}t          j        j        j                  "S r"   )r   r#   max_xpath_subs_unit_embeddingsr%   r&   s     r*   r+   z,XPathEmbeddings.__init__.<locals>.<listcomp>E   s;        VBFDabb  r,   )super__init__	max_depthr   Linearr%   hidden_sizexpath_unitseq2_embeddingsDropouthidden_dropout_probdropoutReLU
activationxpath_unitseq2_inner	inner2emb
ModuleListrangexpath_tag_sub_embeddingsxpath_subs_sub_embeddingsselfr)   	__class__s    `r*   r0   zXPathEmbeddings.__init__1   s?   ))+63PSWSa3acicu)v)v&z&"<=='))$&If.Kdn.\^_bhbt^t$u$u!1v'9#96;MNN(*   t~..  )
 )
% *,   t~..  *
 *
&&&r,   Nc           	         g }g }t          | j                  D ]n}|                     | j        |         |d d d d |f                              |                     | j        |         |d d d d |f                              ot          j        |d          }t          j        |d          }||z   }|                     |                     | 	                    | 
                    |                                        }|S )Ndim)r=   r1   appendr>   r?   torchcatr;   r7   r9   r:   )rA   xpath_tags_seqxpath_subs_seqxpath_tags_embeddingsxpath_subs_embeddingsixpath_embeddingss          r*   forwardzXPathEmbeddings.forwardK   s'    " "t~&& 	e 	eA!(()I)Fq)I.YZYZYZ\]\]\]_`Y`Ja)b)bccc!(()J)G)J>Z[Z[Z[]^]^]^`aZaKb)c)cdddd %	*?R H H H %	*?R H H H03HH>>$,,ttG`G`aqGrGr7s7s*t*tuur,   )NN)__name__
__module____qualname____doc__r0   rP   __classcell__rB   s   @r*   r   r   +   sV         

 
 
 
 
4               r,   r   c                     |                      |                                          }t          j        |d                              |          |z   |z  }|                                |z   S )a  
    Replace non-padding symbols with their position numbers. Position numbers begin at padding_idx+1. Padding symbols
    are ignored. This is modified from fairseq's `utils.make_positions`.

    Args:
        x: torch.Tensor x:

    Returns: torch.Tensor
    r   rE   )neintrH   cumsumtype_aslong)	input_idspadding_idxpast_key_values_lengthmaskincremental_indicess        r*   "create_position_ids_from_input_idsrb   ^   sg     <<$$((**D <!444<<TBBE[[_cc##%%33r,   c                   >     e Zd ZdZ fdZd Z	 	 	 	 	 	 	 ddZ xZS )MarkupLMEmbeddingszGConstruct the embeddings from word, position and token_type embeddings.c                    t                                                       || _        t          j        |j        |j        |j                  | _        t          j        |j	        |j                  | _
        |j        | _        t          |          | _        t          j        |j        |j                  | _        t          j        |j        |j                  | _        t          j        |j                  | _        |                     dt-          j        |j	                                      d          d           |j        | _        t          j        |j	        |j        | j                  | _
        d S )N)r^   epsposition_ids)r   rD   F)
persistent)r/   r0   r)   r   r#   
vocab_sizer3   pad_token_idword_embeddingsmax_position_embeddingsposition_embeddingsr1   r   rO   type_vocab_sizetoken_type_embeddings	LayerNormlayer_norm_epsr5   r6   r7   register_bufferrH   arangeexpandr^   r@   s     r*   r0   zMarkupLMEmbeddings.__init__q   s?   !|F,=v?Q_e_rsss#%<0NPVPb#c#c ) / 7 7%'\&2H&J\%]%]"f&8f>STTTz&"<==EL)GHHOOPWXXej 	 	
 	
 	
 ".#%<*F,>DL\$
 $
 $
   r,   c                    |                                 dd         }|d         }t          j        | j        dz   || j        z   dz   t          j        |j                  }|                    d                              |          S )z
        We are provided embeddings directly. We cannot infer which are padded so just generate sequential position ids.

        Args:
            inputs_embeds: torch.Tensor

        Returns: torch.Tensor
        NrD   r   dtypedevicer   )sizerH   rt   r^   r\   ry   	unsqueezeru   )rA   inputs_embedsinput_shapesequence_lengthrh   s        r*   &create_position_ids_from_inputs_embedsz9MarkupLMEmbeddings.create_position_ids_from_inputs_embeds   s     $((**3B3/%a.|q /D4D"Dq"HPUPZcpcw
 
 
 %%a((//<<<r,   Nr   c                    ||                                 }n|                                 d d         }||j        n|j        }	|.|t          || j        |          }n|                     |          }|!t          j        |t
          j        |	          }||                     |          }|Q| j	        j
        t          j        t          t          |          | j        gz             t
          j        |	          z  }|Q| j	        j        t          j        t          t          |          | j        gz             t
          j        |	          z  }|}
|                     |          }|                     |          }|                     ||          }|
|z   |z   |z   }|                     |          }|                     |          }|S )NrD   rw   )rz   ry   rb   r^   r   rH   zerosr\   rl   r)   
tag_pad_idonestuplelistr1   subs_pad_idrn   rp   rO   rq   r7   )rA   r]   rJ   rK   token_type_idsrh   r|   r_   r}   ry   words_embeddingsrn   rp   rO   
embeddingss                  r*   rP   zMarkupLMEmbeddings.forward   s     #..**KK',,..ss3K%.%:!!@T$A)TM]_uvv#JJ=YY!"[EJvVVVN  00;;M !![3ejd;''4>*::;;5:V\7 7 7 N !![4uzd;''4>*::;;5:V\8 8 8 N )"66|DD $ : :> J J00PP%(;;>SSVff
^^J//
\\*--
r,   )NNNNNNr   )rQ   rR   rS   rT   r0   r   rP   rU   rV   s   @r*   rd   rd   n   sx        QQ
 
 
 
 
2= = =&  2 2 2 2 2 2 2 2r,   rd   c                   P     e Zd Z fdZdej        dej        dej        fdZ xZS )MarkupLMSelfOutputc                    t                                                       t          j        |j        |j                  | _        t          j        |j        |j                  | _        t          j        |j	                  | _
        d S Nrf   )r/   r0   r   r2   r3   denserq   rr   r5   r6   r7   r@   s     r*   r0   zMarkupLMSelfOutput.__init__   sf    Yv163EFF
f&8f>STTTz&"<==r,   hidden_statesinput_tensorreturnc                     |                      |          }|                     |          }|                     ||z             }|S Nr   r7   rq   rA   r   r   s      r*   rP   zMarkupLMSelfOutput.forward   @    

=11]33}|'CDDr,   rQ   rR   rS   r0   rH   TensorrP   rU   rV   s   @r*   r   r      i        > > > > >U\  RWR^        r,   r   c                   B     e Zd Z fdZdej        dej        fdZ xZS )MarkupLMIntermediatec                    t                                                       t          j        |j        |j                  | _        t          |j        t                    rt          |j                 | _        d S |j        | _        d S r   )r/   r0   r   r2   r3   intermediate_sizer   
isinstance
hidden_actstrr   intermediate_act_fnr@   s     r*   r0   zMarkupLMIntermediate.__init__   sn    Yv163KLL
f'-- 	9'-f.?'@D$$$'-'8D$$$r,   r   r   c                 Z    |                      |          }|                     |          }|S r   )r   r   rA   r   s     r*   rP   zMarkupLMIntermediate.forward   s,    

=1100??r,   r   rV   s   @r*   r   r      s^        9 9 9 9 9U\ el        r,   r   c                   P     e Zd Z fdZdej        dej        dej        fdZ xZS )MarkupLMOutputc                    t                                                       t          j        |j        |j                  | _        t          j        |j        |j                  | _        t          j	        |j
                  | _        d S r   )r/   r0   r   r2   r   r3   r   rq   rr   r5   r6   r7   r@   s     r*   r0   zMarkupLMOutput.__init__   sf    Yv79KLL
f&8f>STTTz&"<==r,   r   r   r   c                     |                      |          }|                     |          }|                     ||z             }|S r   r   r   s      r*   rP   zMarkupLMOutput.forward   r   r,   r   rV   s   @r*   r   r      r   r,   r   c                   B     e Zd Z fdZdej        dej        fdZ xZS )MarkupLMPoolerc                     t                                                       t          j        |j        |j                  | _        t          j                    | _        d S r   )r/   r0   r   r2   r3   r   Tanhr9   r@   s     r*   r0   zMarkupLMPooler.__init__   sC    Yv163EFF
'))r,   r   r   c                 r    |d d df         }|                      |          }|                     |          }|S )Nr   )r   r9   )rA   r   first_token_tensorpooled_outputs       r*   rP   zMarkupLMPooler.forward  s@     +111a40

#56666r,   r   rV   s   @r*   r   r      s^        $ $ $ $ $
U\ el        r,   r   c                   B     e Zd Z fdZdej        dej        fdZ xZS )MarkupLMPredictionHeadTransformc                 V   t                                                       t          j        |j        |j                  | _        t          |j        t                    rt          |j                 | _
        n|j        | _
        t          j        |j        |j                  | _        d S r   )r/   r0   r   r2   r3   r   r   r   r   r   transform_act_fnrq   rr   r@   s     r*   r0   z(MarkupLMPredictionHeadTransform.__init__  s    Yv163EFF
f'-- 	6$*6+<$=D!!$*$5D!f&8f>STTTr,   r   r   c                     |                      |          }|                     |          }|                     |          }|S r   )r   r   rq   r   s     r*   rP   z'MarkupLMPredictionHeadTransform.forward  s=    

=11--m<<}55r,   r   rV   s   @r*   r   r     sc        U U U U UU\ el        r,   r   c                   *     e Zd Z fdZd Zd Z xZS )MarkupLMLMPredictionHeadc                 >   t                                                       t          |          | _        t	          j        |j        |j        d          | _        t	          j	        t          j        |j                            | _        | j        | j        _        d S )NF)bias)r/   r0   r   	transformr   r2   r3   rj   decoder	ParameterrH   r   r   r@   s     r*   r0   z!MarkupLMLMPredictionHead.__init__"  sz    8@@ y!3V5FUSSSLV->!?!?@@	 !Ir,   c                 (    | j         | j        _         d S r   )r   r   rA   s    r*   _tie_weightsz%MarkupLMLMPredictionHead._tie_weights/  s     Ir,   c                 Z    |                      |          }|                     |          }|S r   )r   r   r   s     r*   rP   z MarkupLMLMPredictionHead.forward2  s*    }55]33r,   )rQ   rR   rS   r0   r   rP   rU   rV   s   @r*   r   r   !  sV        & & & & && & &      r,   r   c                   B     e Zd Z fdZdej        dej        fdZ xZS )MarkupLMOnlyMLMHeadc                 p    t                                                       t          |          | _        d S r   )r/   r0   r   predictionsr@   s     r*   r0   zMarkupLMOnlyMLMHead.__init__:  s/    3F;;r,   sequence_outputr   c                 0    |                      |          }|S r   )r   )rA   r   prediction_scoress      r*   rP   zMarkupLMOnlyMLMHead.forward>  s     ,,_==  r,   r   rV   s   @r*   r   r   9  s^        < < < < <!u| ! ! ! ! ! ! ! ! !r,   r           modulequerykeyvalueattention_maskscalingr7   	head_maskc                 8   t          j        ||                    dd                    |z  }	|$|d d d d d d d |j        d         f         }
|	|
z   }	t          j                            |	dt           j                                      |j	                  }	t          j        
                    |	|| j                  }	||	|                    dddd          z  }	t          j        |	|          }|                    dd                                          }||	fS )N   r
   rD   )rF   rx   )ptrainingr   )rH   matmul	transposeshaper   
functionalsoftmaxfloat32torx   r7   r   view
contiguous)r   r   r   r   r   r   r7   r   kwargsattn_weightscausal_maskattn_outputs               r*   eager_attention_forwardr   D  s    <s}}Q':':;;gEL!$QQQ111o	"o%=>#k1=((2U](SSVVW\WbccL=((6?([[L#innQAq&A&AA,|U33K''1--88::K$$r,   c                        e Zd Z fdZ	 	 	 d
dej        deej                 deej                 dee         de	ej                 f
d	Z
 xZS )MarkupLMSelfAttentionc                    t                                                       |j        |j        z  dk    r0t	          |d          s t          d|j         d|j         d          || _        |j        | _        t          |j        |j        z            | _        | j        | j        z  | _	        t          j        |j        | j	                  | _        t          j        |j        | j	                  | _        t          j        |j        | j	                  | _        t          j        |j                  | _        |j        | _        | j        dz  | _        d S )Nr   embedding_sizezThe hidden size (z6) is not a multiple of the number of attention heads ()g      )r/   r0   r3   num_attention_headshasattr
ValueErrorr)   rY   attention_head_sizeall_head_sizer   r2   r   r   r   r5   attention_probs_dropout_probr7   attention_dropoutr   r@   s     r*   r0   zMarkupLMSelfAttention.__init__a  s:    ::a??PVXhHiHi?8F$6 8 8 48 8 8  
 #)#= #&v'9F<V'V#W#W !58PPYv143EFF
9V/1CDDYv143EFF
z&"EFF!'!D/5r,   NFr   r   r   output_attentionsr   c                    |j         d d         }g |d| j        R }|                     |                              |                              dd          }|                     |                              |                              dd          }	|                     |                              |                              dd          }
t          }| j        j	        dk    rt          | j        j	                 } || ||	|
|f| j        sdn| j        | j        |d|\  }} |j        g |dR                                  }|r||fn|f}|S )NrD   r   r   eagerr   )r7   r   r   )r   r   r   r   r   r   r   r   r)   _attn_implementationr   r   r   r   reshaper   )rA   r   r   r   r   r   r}   hidden_shapequery_states
key_statesvalue_statesattention_interfacer   r   outputss                  r*   rP   zMarkupLMSelfAttention.forwardv  s    $)#2#.CCbC$*BCCzz-0055lCCMMaQRSSXXm,,11,??II!QOO
zz-0055lCCMMaQRSS(?;+w66"9$+:Z"[$7$7
%
  $}HCC$2HL
%
 
%
 
%
 
%
!\ *k);;;;;;FFHH1BV;--r,   NNF)rQ   rR   rS   r0   rH   r   r   FloatTensorboolr   rP   rU   rV   s   @r*   r   r   `  s        6 6 6 6 60 7;15,1! !|! !!23! E-.	!
 $D>! 
u|	! ! ! ! ! ! ! !r,   r   c                        e Zd Z fdZd Z	 	 	 ddej        deej                 deej                 dee	         d	e
ej                 f
d
Z xZS )MarkupLMAttentionc                     t                                                       t          |          | _        t	          |          | _        t                      | _        d S r   )r/   r0   r   rA   r   outputsetpruned_headsr@   s     r*   r0   zMarkupLMAttention.__init__  sI    )&11	(00EEr,   c                    t          |          dk    rd S t          || j        j        | j        j        | j                  \  }}t          | j        j        |          | j        _        t          | j        j        |          | j        _        t          | j        j	        |          | j        _	        t          | j
        j        |d          | j
        _        | j        j        t          |          z
  | j        _        | j        j        | j        j        z  | j        _        | j                            |          | _        d S )Nr   r   rE   )lenr   rA   r   r   r   r   r   r   r   r   r   r   union)rA   headsindexs      r*   prune_headszMarkupLMAttention.prune_heads  s    u::??F7490$)2OQUQb
 
u
 -TY_eDD	*49=%@@	,TY_eDD	.t{/@%QOOO )-	(EE

(R	%"&)"?$)B_"_	 -33E::r,   NFr   r   r   r   r   c                 ~     | j         |f|||d|}|                     |d         |          }|f|dd          z   }|S N)r   r   r   r   r   )rA   r   )	rA   r   r   r   r   r   self_outputsattention_outputr   s	            r*   rP   zMarkupLMAttention.forward  sl     !ty
)/	
 

 
 
  ;;|AFF#%QRR(88r,   r   )rQ   rR   rS   r0   r  rH   r   r   r   r   r   rP   rU   rV   s   @r*   r   r     s        " " " " "; ; ;* 7;15,1 | !!23 E-.	
 $D> 
u|	       r,   r   c                        e Zd Z fdZ	 	 	 ddej        deej                 deej                 dee         de	ej                 f
d	Z
d
 Z xZS )MarkupLMLayerc                     t                                                       |j        | _        d| _        t	          |          | _        t          |          | _        t          |          | _	        d S )Nr   )
r/   r0   chunk_size_feed_forwardseq_len_dimr   	attentionr   intermediater   r   r@   s     r*   r0   zMarkupLMLayer.__init__  s^    '-'E$*622088$V,,r,   NFr   r   r   r   r   c                      | j         |f|||d|}|d         }|dd          }t          | j        | j        | j        |          }	|	f|z   }|S r  )r  r   feed_forward_chunkr  r  )
rA   r   r   r   r   r   self_attention_outputsr
  r   layer_outputs
             r*   rP   zMarkupLMLayer.forward  s     "0"
)/	"
 "

 "
 "
 2!4(,0#T%A4CSUe
 
  /G+r,   c                 \    |                      |          }|                     ||          }|S r   )r  r   )rA   r
  intermediate_outputr  s       r*   r  z MarkupLMLayer.feed_forward_chunk  s2    "//0@AA{{#68HIIr,   r   )rQ   rR   rS   r0   rH   r   r   r   r   r   rP   r  rU   rV   s   @r*   r  r    s        - - - - - 7;15,1 | !!23 E-.	
 $D> 
u|	   2      r,   r  c                        e Zd Z fdZe	 	 	 	 	 ddej        deej                 deej                 dee	         d	ee	         d
ee	         de
eej                 ef         fd            Z xZS )MarkupLMEncoderc                     t                                                       | _        t          j        fdt          j                  D                       | _        d| _        d S )Nc                 .    g | ]}t                    S r"   )r  )r'   rN   r)   s     r*   r+   z,MarkupLMEncoder.__init__.<locals>.<listcomp>  s!    #c#c#caM&$9$9#c#c#cr,   F)	r/   r0   r)   r   r<   r=   num_hidden_layerslayergradient_checkpointingr@   s    `r*   r0   zMarkupLMEncoder.__init__  s`    ]#c#c#c#c5IaCbCb#c#c#cdd
&+###r,   NFTr   r   r   r   output_hidden_statesreturn_dictr   c           	          |rdnd }|rdnd }	t          | j                  D ]<\  }
}|r||fz   }|||
         nd } |d||||d|}|d         }|r|	|d         fz   }	=|r||fz   }t          |||	          S )Nr"   )r   r   r   r   r   r   )last_hidden_stater   
attentions)	enumerater  r   )rA   r   r   r   r   r  r   r   all_hidden_statesall_self_attentionsrN   layer_modulelayer_head_masklayer_outputss                 r*   rP   zMarkupLMEncoder.forward  s     #7@BBD$5?bb4(44 	P 	POA|# I$58H$H!.7.CillO(L +-)"3	 
  M *!,M  P&9]1=M<O&O# 	E 1]4D D++*
 
 
 	
r,   )NNFFT)rQ   rR   rS   r0   r   rH   r   r   r   r   r   r   r   rP   rU   rV   s   @r*   r  r    s        , , , , ,  7;15,1/4&*&
 &
|&
 !!23&
 E-.	&

 $D>&
 'tn&
 d^&
 
uU\"O3	4&
 &
 &
 &
 &
 &
 &
 &
r,   r  c                   p     e Zd ZU eed<   dZd Zedee	e
ej        f                  f fd            Z xZS )MarkupLMPreTrainedModelr)   markuplmc                    t          |t          j                  rT|j        j                            d| j        j                   |j         |j        j        	                                 dS dS t          |t          j
                  r_|j        j                            d| j        j                   |j        +|j        j        |j                 	                                 dS dS t          |t          j                  r?|j        j        	                                 |j        j                            d           dS t          |t                    r |j        j        	                                 dS dS )zInitialize the weightsr   )meanstdN      ?)r   r   r2   weightdatanormal_r)   initializer_ranger   zero_r#   r^   rq   fill_r   )rA   r   s     r*   _init_weightsz%MarkupLMPreTrainedModel._init_weights)  sY   fbi(( 	% M&&CT[5R&SSS{& &&((((( '&-- 	%M&&CT[5R&SSS!-"6#56<<>>>>> .--- 	%K""$$$M$$S))))) 899 	%K""$$$$$	% 	%r,   pretrained_model_name_or_pathc                 >     t                      j        |g|R i |S r   )r/   from_pretrained)clsr8  
model_argsr   rB   s       r*   r:  z'MarkupLMPreTrainedModel.from_pretrained;  s,    &uww&'D\z\\\U[\\\r,   )rQ   rR   rS   r   __annotations__base_model_prefixr7  classmethodr   r   r   osPathLiker:  rU   rV   s   @r*   r+  r+  #  s         "% % %$ ]HU3PRP[K[E\<] ] ] ] ] ] [] ] ] ] ]r,   r+  c                       e Zd Zd fd	Zd Zd Zd Zee	 	 	 	 	 	 	 	 	 	 	 dde	e
j                 de	e
j                 d	e	e
j                 d
e	e
j                 de	e
j                 de	e
j                 de	e
j                 de	e
j                 de	e         de	e         de	e         deeef         fd                        Z xZS )MarkupLMModelTc                     t                                          |           || _        t          |          | _        t          |          | _        |rt          |          nd| _        | 	                                 dS )zv
        add_pooling_layer (bool, *optional*, defaults to `True`):
            Whether to add a pooling layer
        N)
r/   r0   r)   rd   r   r  encoderr   pooler	post_init)rA   r)   add_pooling_layerrB   s      r*   r0   zMarkupLMModel.__init__C  ss    
 	   ,V44&v..0AKnV,,,t 	r,   c                     | j         j        S r   r   rl   r   s    r*   get_input_embeddingsz"MarkupLMModel.get_input_embeddingsS  s    ..r,   c                     || j         _        d S r   rJ  )rA   r   s     r*   set_input_embeddingsz"MarkupLMModel.set_input_embeddingsV  s    */'''r,   c                     |                                 D ]/\  }}| j        j        |         j                            |           0dS )z
        Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
        class PreTrainedModel
        N)itemsrE  r  r  r  )rA   heads_to_pruner  r  s       r*   _prune_headszMarkupLMModel._prune_headsY  sU    
 +0022 	C 	CLE5Lu%/;;EBBBB	C 	Cr,   Nr]   rJ   rK   r   r   rh   r   r|   r   r  r   r   c                    |	|	n| j         j        }	|
|
n| j         j        }
||n| j         j        }||t	          d          |+|                     ||           |                                }n.||                                dd         }nt	          d          ||j        n|j        }|t          j	        ||          }|!t          j
        |t          j        |          }|                    d                              d          }|                    | j        	          }d
|z
  dz  }||                                dk    rr|                    d                              d                              d                              d          }|                    | j         j        dddd          }nS|                                dk    r;|                    d                              d                              d          }|                    t%          |                                           j        	          }ndg| j         j        z  }|                     ||||||          }|                     ||||	|
d          }|d         }| j        |                     |          nd}t/          |||j        |j                  S )a  
        xpath_tags_seq (`torch.LongTensor` of shape `(batch_size, sequence_length, config.max_depth)`, *optional*):
            Tag IDs for each token in the input sequence, padded up to config.max_depth.
        xpath_subs_seq (`torch.LongTensor` of shape `(batch_size, sequence_length, config.max_depth)`, *optional*):
            Subscript IDs for each token in the input sequence, padded up to config.max_depth.

        Examples:

        ```python
        >>> from transformers import AutoProcessor, MarkupLMModel

        >>> processor = AutoProcessor.from_pretrained("microsoft/markuplm-base")
        >>> model = MarkupLMModel.from_pretrained("microsoft/markuplm-base")

        >>> html_string = "<html> <head> <title>Page Title</title> </head> </html>"

        >>> encoding = processor(html_string, return_tensors="pt")

        >>> outputs = model(**encoding)
        >>> last_hidden_states = outputs.last_hidden_state
        >>> list(last_hidden_states.shape)
        [1, 4, 768]
        ```NzDYou cannot specify both input_ids and inputs_embeds at the same timerD   z5You have to specify either input_ids or inputs_embeds)ry   rw   r   r   )rx   r0  g     r   )r]   rJ   rK   rh   r   r|   T)r   r   r  r   )r"  pooler_outputr   r#  )r)   r   r  use_return_dictr   %warn_if_padding_and_no_attention_maskrz   ry   rH   r   r   r\   r{   r   rx   rF   ru   r  next
parametersr   rE  rF  r   r   r#  )rA   r]   rJ   rK   r   r   rh   r   r|   r   r  r   r}   ry   extended_attention_maskembedding_outputencoder_outputsr   r   s                      r*   rP   zMarkupLMModel.forwarda  s   N 2C1N--TXT_Tq$8$D  $+Jj 	 &1%<kk$+B] ]%>cddd"66y.QQQ#..**KK&',,..ss3KKTUUU%.%:!!@T!"ZFCCCN!"[EJvVVVN"0":":1"="="G"G"J"J"9"<"<4:"<"N"N#&)@#@H"L }}!##%//22<<Q??II"MMWWXZ[[	%,,T[-JBPRTVXZ[[		A%%%//22<<R@@JJ2NN	!40A0A+B+B+HIIII!>>I??))%)' + 
 
 ,,#/!5 ' 
 
 *!,8<8OO444UY)-')7&1	
 
 
 	
r,   )T)NNNNNNNNNNN)rQ   rR   rS   r0   rK  rM  rQ  r   r   r   rH   
LongTensorr   r   r   r   r   rP   rU   rV   s   @r*   rC  rC  @  s             / / /0 0 0C C C  1559596:59371559,0/3&*c
 c
E,-c
 !!12c
 !!12	c

 !!23c
 !!12c
 u/0c
 E-.c
   12c
 $D>c
 'tnc
 d^c
 
u00	1c
 c
 c
 ^ c
 c
 c
 c
 c
r,   rC  c            !           e Zd Z fdZee	 	 	 	 	 	 	 	 	 	 	 	 	 ddeej                 deej                 deej                 deej                 deej                 deej                 d	eej                 d
eej                 deej                 deej                 dee	         dee	         dee	         de
eej                 ef         fd                        Z xZS )MarkupLMForQuestionAnsweringc                     t                                          |           |j        | _        t          |d          | _        t          j        |j        |j                  | _        | 	                                 d S NF)rH  )
r/   r0   
num_labelsrC  r,  r   r2   r3   
qa_outputsrG  r@   s     r*   r0   z%MarkupLMForQuestionAnswering.__init__  sj        +%fFFF)F$68IJJ 	r,   Nr]   rJ   rK   r   r   rh   r   r|   start_positionsend_positionsr   r  r   r   c                 ~   ||n| j         j        }|                     ||||||||||d          }|d         }|                     |          }|                    dd          \  }}|                    d                                          }|                    d                                          }d}|	|
t          |	                                          dk    r|	                    d          }	t          |
                                          dk    r|
                    d          }
|                    d          }|		                    d|           |
	                    d|           t          |          } |||	          } |||
          }||z   d	z  }t          ||||j        |j        
          S )ae  
        xpath_tags_seq (`torch.LongTensor` of shape `(batch_size, sequence_length, config.max_depth)`, *optional*):
            Tag IDs for each token in the input sequence, padded up to config.max_depth.
        xpath_subs_seq (`torch.LongTensor` of shape `(batch_size, sequence_length, config.max_depth)`, *optional*):
            Subscript IDs for each token in the input sequence, padded up to config.max_depth.

        Examples:

        ```python
        >>> from transformers import AutoProcessor, MarkupLMForQuestionAnswering
        >>> import torch

        >>> processor = AutoProcessor.from_pretrained("microsoft/markuplm-base-finetuned-websrc")
        >>> model = MarkupLMForQuestionAnswering.from_pretrained("microsoft/markuplm-base-finetuned-websrc")

        >>> html_string = "<html> <head> <title>My name is Niels</title> </head> </html>"
        >>> question = "What's his name?"

        >>> encoding = processor(html_string, questions=question, return_tensors="pt")

        >>> with torch.no_grad():
        ...     outputs = model(**encoding)

        >>> answer_start_index = outputs.start_logits.argmax()
        >>> answer_end_index = outputs.end_logits.argmax()

        >>> predict_answer_tokens = encoding.input_ids[0, answer_start_index : answer_end_index + 1]
        >>> processor.decode(predict_answer_tokens).strip()
        'Niels'
        ```NT
rJ   rK   r   r   rh   r   r|   r   r  r   r   r   rD   rE   )ignore_indexr   )lossstart_logits
end_logitsr   r#  )r)   rT  r,  ra  splitsqueezer   r  rz   clamp_r   r   r   r#  )rA   r]   rJ   rK   r   r   rh   r   r|   rb  rc  r   r  r   r   r   logitsrh  ri  
total_lossignored_indexloss_fct
start_lossend_losss                           r*   rP   z$MarkupLMForQuestionAnswering.forward  s   ` &1%<kk$+B]--))))%'/!5   
 
 "!*11#)<<r<#:#: j#++B//::<<''++6688

&=+D?''))**Q.."1"9"9""="==%%''((1,, - 5 5b 9 9(--a00M""1m444  M222']CCCH!,@@Jx
M::H$x/14J+%!!/)
 
 
 	
r,   )NNNNNNNNNNNNN)rQ   rR   rS   r0   r   r   r   rH   r   r   r   r   r   rP   rU   rV   s   @r*   r]  r]    s             -115151515/3,0042604,0/3&*\
 \
EL)\
 !.\
 !.	\

 !.\
 !.\
 u|,\
 EL)\
  -\
 "%,/\
  -\
 $D>\
 'tn\
 d^\
 
uU\"$@@	A\
 \
 \
 ^ \
 \
 \
 \
 \
r,   r]  zC
    MarkupLM Model with a `token_classification` head on top.
    )custom_introc                       e Zd Z fdZee	 	 	 	 	 	 	 	 	 	 	 	 ddeej                 deej                 deej                 deej                 deej                 deej                 d	eej                 d
eej                 deej                 dee	         dee	         dee	         de
eej                 ef         fd                        Z xZS )MarkupLMForTokenClassificationc                 Z   t                                          |           |j        | _        t          |d          | _        |j        |j        n|j        }t          j        |          | _	        t          j
        |j        |j                  | _        |                                  d S r_  )r/   r0   r`  rC  r,  classifier_dropoutr6   r   r5   r7   r2   r3   
classifierrG  rA   r)   rw  rB   s      r*   r0   z'MarkupLMForTokenClassification.__init__>  s        +%fFFF)/)B)NF%%TZTn 	 z"455)F$68IJJ 	r,   Nr]   rJ   rK   r   r   rh   r   r|   labelsr   r  r   r   c                 t   ||n| j         j        }|                     |||||||||
|d          }|d         }|                     |          }d}|	Kt	                      } ||                    d| j         j                  |	                    d                    }t          |||j        |j	                  S )a  
        xpath_tags_seq (`torch.LongTensor` of shape `(batch_size, sequence_length, config.max_depth)`, *optional*):
            Tag IDs for each token in the input sequence, padded up to config.max_depth.
        xpath_subs_seq (`torch.LongTensor` of shape `(batch_size, sequence_length, config.max_depth)`, *optional*):
            Subscript IDs for each token in the input sequence, padded up to config.max_depth.
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.

        Examples:

        ```python
        >>> from transformers import AutoProcessor, AutoModelForTokenClassification
        >>> import torch

        >>> processor = AutoProcessor.from_pretrained("microsoft/markuplm-base")
        >>> processor.parse_html = False
        >>> model = AutoModelForTokenClassification.from_pretrained("microsoft/markuplm-base", num_labels=7)

        >>> nodes = ["hello", "world"]
        >>> xpaths = ["/html/body/div/li[1]/div/span", "/html/body/div/li[1]/div/span"]
        >>> node_labels = [1, 2]
        >>> encoding = processor(nodes=nodes, xpaths=xpaths, node_labels=node_labels, return_tensors="pt")

        >>> with torch.no_grad():
        ...     outputs = model(**encoding)

        >>> loss = outputs.loss
        >>> logits = outputs.logits
        ```NTre  r   rD   rg  rm  r   r#  )
r)   rT  r,  rx  r   r   r`  r   r   r#  )rA   r]   rJ   rK   r   r   rh   r   r|   rz  r   r  r   r   r   r   rg  rp  s                     r*   rP   z&MarkupLMForTokenClassification.forwardL  s    \ &1%<kk$+B]--))))%'/!5   
 
 "!* OOO<<'))H8!&&r4;+ABBB D
 %$!/)	
 
 
 	
r,   NNNNNNNNNNNN)rQ   rR   rS   r0   r   r   r   rH   r   r   r   r   r   rP   rU   rV   s   @r*   ru  ru  7  s             -115151515/3,004)-,0/3&*L
 L
EL)L
 !.L
 !.	L

 !.L
 !.L
 u|,L
 EL)L
  -L
 &L
 $D>L
 'tnL
 d^L
 
uU\"N2	3L
 L
 L
 ^ L
 L
 L
 L
 L
r,   ru  z
    MarkupLM Model transformer with a sequence classification/regression head on top (a linear layer on top of the
    pooled output) e.g. for GLUE tasks.
    c                       e Zd Z fdZee	 	 	 	 	 	 	 	 	 	 	 	 ddeej                 deej                 deej                 deej                 deej                 deej                 d	eej                 d
eej                 deej                 dee	         dee	         dee	         de
eej                 ef         fd                        Z xZS )!MarkupLMForSequenceClassificationc                 d   t                                          |           |j        | _        || _        t	          |          | _        |j        |j        n|j        }t          j	        |          | _
        t          j        |j        |j                  | _        |                                  d S r   )r/   r0   r`  r)   rC  r,  rw  r6   r   r5   r7   r2   r3   rx  rG  ry  s      r*   r0   z*MarkupLMForSequenceClassification.__init__  s        +%f--)/)B)NF%%TZTn 	 z"455)F$68IJJ 	r,   Nr]   rJ   rK   r   r   rh   r   r|   rz  r   r  r   r   c                    ||n| j         j        }|                     |||||||||
|d          }|d         }|                     |          }|                     |          }d}|	Z| j         j        f| j        dk    rd| j         _        nN| j        dk    r7|	j        t          j	        k    s|	j        t          j
        k    rd| j         _        nd| j         _        | j         j        dk    rWt                      }| j        dk    r1 ||                                |	                                          }n |||	          }n| j         j        dk    rGt                      } ||                    d| j                  |	                    d                    }n*| j         j        dk    rt                      } |||	          }t!          |||j        |j        	          S )
a  
        xpath_tags_seq (`torch.LongTensor` of shape `(batch_size, sequence_length, config.max_depth)`, *optional*):
            Tag IDs for each token in the input sequence, padded up to config.max_depth.
        xpath_subs_seq (`torch.LongTensor` of shape `(batch_size, sequence_length, config.max_depth)`, *optional*):
            Subscript IDs for each token in the input sequence, padded up to config.max_depth.
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).

        Examples:

        ```python
        >>> from transformers import AutoProcessor, AutoModelForSequenceClassification
        >>> import torch

        >>> processor = AutoProcessor.from_pretrained("microsoft/markuplm-base")
        >>> model = AutoModelForSequenceClassification.from_pretrained("microsoft/markuplm-base", num_labels=7)

        >>> html_string = "<html> <head> <title>Page Title</title> </head> </html>"
        >>> encoding = processor(html_string, return_tensors="pt")

        >>> with torch.no_grad():
        ...     outputs = model(**encoding)

        >>> loss = outputs.loss
        >>> logits = outputs.logits
        ```NTre  r   
regressionsingle_label_classificationmulti_label_classificationrD   r|  )r)   rT  r,  r7   rx  problem_typer`  rx   rH   r\   rY   r	   rk  r   r   r   r   r   r#  )rA   r]   rJ   rK   r   r   rh   r   r|   rz  r   r  r   r   r   rm  rg  rp  s                     r*   rP   z)MarkupLMForSequenceClassification.forward  s   Z &1%<kk$+B]--))))%'/!5   
 
  
]33//{'/?a''/;DK,,_q((flej.H.HFL\a\eLeLe/LDK,,/KDK,{'<77"99?a''#8FNN$4$4fnn6F6FGGDD#8FF33DD)-JJJ+--xB @ @&++b//RR)-III,..x//'!/)	
 
 
 	
r,   r}  )rQ   rR   rS   r0   r   r   r   rH   r   r   r   r   r   rP   rU   rV   s   @r*   r  r    s             -115151515/3,004)-,0/3&*\
 \
EL)\
 !.\
 !.	\

 !.\
 !.\
 u|,\
 EL)\
  -\
 &\
 $D>\
 'tn\
 d^\
 
uU\"$<<	=\
 \
 \
 ^ \
 \
 \
 \
 \
r,   r  )r]  r  ru  rC  r+  )r   )r   N)?rT   r@  typingr   r   r   rH   r   torch.nnr   r   r	   activationsr   modeling_layersr   modeling_outputsr   r   r   r   r   r   modeling_utilsr   r   pytorch_utilsr   r   r   utilsr   r   r   configuration_markuplmr   
get_loggerrQ   loggerModuler   rb   rd   r   r   r   r   r   r   r   r   floatr   r   r   r  r  r+  rC  r]  ru  r  __all__r"   r,   r*   <module>r     s     				 , , , , , , , , , ,        A A A A A A A A A A ! ! ! ! ! ! 9 9 9 9 9 9                G F F F F F F F l l l l l l l l l l > > > > > > > > > > 2 2 2 2 2 2 
	H	%	%/  /  /  /  / bi /  /  / f4 4 4 4 _ _ _ _ _ _ _ _F           29        RY       RY        bi   $    ry   0! ! ! ! !") ! ! !$ (,% %I%<% 
% <	%
 U\*% % % %% % % %87 7 7 7 7BI 7 7 7v* * * * *	 * * *\% % % % %. % % %R.
 .
 .
 .
 .
bi .
 .
 .
b ] ] ] ] ]o ] ] ]8 E
 E
 E
 E
 E
+ E
 E
 E
P j
 j
 j
 j
 j
#: j
 j
 j
Z   
^
 ^
 ^
 ^
 ^
%< ^
 ^
 
^
B   o
 o
 o
 o
 o
(? o
 o
 o
d  r,   