
     `i]                        d Z ddlmZ ddlmZmZmZ ddlZddlmZ ddl	m
Z
 ddlmZ dd	lmZ dd
lmZmZmZ ddlmZmZ ddlmZmZmZ ddlmZmZmZ ddlmZ  ej         e!          Z" G d dej#                  Z$	 	 d<dej#        dej%        dej%        dej%        deej%                 de&de&deej%                 fdZ' G d dej#                  Z( G d dej#                  Z) G d  d!ej#                  Z* G d" d#ej#                  Z+ G d$ d%ej#                  Z, G d& d'e          Z- G d( d)ej#                  Z.e G d* d+e                      Z/e G d, d-e/                      Z0 G d. d/ej#                  Z1 G d0 d1ej#                  Z2e G d2 d3e/                      Z3e ed45           G d6 d7e                                  Z4 ed85           G d9 d:e/                      Z5g d;Z6dS )=zPyTorch Splinter model.    )	dataclass)CallableOptionalUnionN)nn)CrossEntropyLoss   )ACT2FN)GradientCheckpointingLayer)BaseModelOutputModelOutputQuestionAnsweringModelOutput)ALL_ATTENTION_FUNCTIONSPreTrainedModel)apply_chunking_to_forward find_pruneable_heads_and_indicesprune_linear_layer)auto_docstringcan_return_tuplelogging   )SplinterConfigc                        e Zd ZdZ fdZ	 	 	 	 d
deej                 deej                 deej                 deej                 de	f
d	Z
 xZS )SplinterEmbeddingszGConstruct the embeddings from word, position and token_type embeddings.c                 d   t                                                       t          j        |j        |j        |j                  | _        t          j        |j        |j                  | _	        t          j        |j
        |j                  | _        t          j        |j        |j                  | _        t          j        |j                  | _        |                     dt%          j        |j                                      d          d           t+          |dd          | _        d S )	N)padding_idxepsposition_ids)r   F)
persistentposition_embedding_typeabsolute)super__init__r   	Embedding
vocab_sizehidden_sizepad_token_idword_embeddingsmax_position_embeddingsposition_embeddingstype_vocab_sizetoken_type_embeddings	LayerNormlayer_norm_epsDropouthidden_dropout_probdropoutregister_buffertorcharangeexpandgetattrr"   selfconfig	__class__s     /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/models/splinter/modeling_splinter.pyr%   zSplinterEmbeddings.__init__+   s   !|F,=v?Q_e_rsss#%<0NPVPb#c#c %'\&2H&J\%]%]" f&8f>STTTz&"<== 	EL)GHHOOPWXXej 	 	
 	
 	
 (/v7PR\']']$$$    N	input_idstoken_type_idsr   inputs_embedsreturnc                     ||                                 }n|                                 d d         }|d         }|| j        d d d |f         }|+t          j        |t          j        | j        j                  }||                     |          }|                     |          }||z   }| j        dk    r| 	                    |          }	||	z  }| 
                    |          }|                     |          }|S )Nr    r   dtypedevicer#   )sizer   r5   zeroslongrF   r*   r.   r"   r,   r/   r3   )
r:   r?   r@   r   rA   input_shape
seq_lengthr.   
embeddingsr,   s
             r=   forwardzSplinterEmbeddings.forward<   s     #..**KK',,..ss3K ^
,QQQ^<L!"[EJtO`OghhhN  00;;M $ : :> J J"%::
':55"&":":<"H"H--J^^J//
\\*--
r>   )NNNN)__name__
__module____qualname____doc__r%   r   r5   
LongTensorFloatTensortuplerM   __classcell__r<   s   @r=   r   r   (   s        QQ^ ^ ^ ^ ^& 15593759 E,- !!12 u/0	
   12 
       r>   r           modulequerykeyvalueattention_maskscalingr3   	head_maskc                 8   t          j        ||                    dd                    |z  }	|$|d d d d d d d |j        d         f         }
|	|
z   }	t          j                            |	dt           j                                      |j	                  }	t          j        
                    |	|| j                  }	||	|                    dddd          z  }	t          j        |	|          }|                    dd                                          }||	fS )N   r	   r    )dimrE   )ptrainingr   )r5   matmul	transposeshaper   
functionalsoftmaxfloat32torE   r3   rd   view
contiguous)rX   rY   rZ   r[   r\   r]   r3   r^   kwargsattn_weightscausal_maskattn_outputs               r=   eager_attention_forwardrr   ^   s    <s}}Q':':;;gEL!$QQQ111o	"o%=>#k1=((2U](SSVVW\WbccL=((6?([[L#innQAq&A&AA,|U33K''1--88::K$$r>   c                        e Zd Z fdZ	 	 	 d
dej        deej                 deej                 dee         de	ej                 f
d	Z
 xZS )SplinterSelfAttentionc                    t                                                       |j        |j        z  dk    r0t	          |d          s t          d|j         d|j         d          || _        |j        | _        t          |j        |j        z            | _        | j        | j        z  | _	        t          j        |j        | j	                  | _        t          j        |j        | j	                  | _        t          j        |j        | j	                  | _        t          j        |j                  | _        |j        | _        | j        dz  | _        d S )Nr   embedding_sizezThe hidden size (z6) is not a multiple of the number of attention heads ()g      )r$   r%   r(   num_attention_headshasattr
ValueErrorr;   intattention_head_sizeall_head_sizer   LinearrY   rZ   r[   r1   attention_probs_dropout_probr3   attention_dropoutr]   r9   s     r=   r%   zSplinterSelfAttention.__init__{   s:    ::a??PVXhHiHi?8F$6 8 8 48 8 8  
 #)#= #&v'9F<V'V#W#W !58PPYv143EFF
9V/1CDDYv143EFF
z&"EFF!'!D/5r>   NFhidden_statesr\   r^   output_attentionsrB   c                    |j         d d         }g |d| j        R }|                     |                              |                              dd          }|                     |                              |                              dd          }	|                     |                              |                              dd          }
t          }| j        j	        dk    rt          | j        j	                 } || ||	|
|f| j        sdn| j        | j        |d|\  }} |j        g |dR                                  }|r||fn|f}|S )Nr    r   r`   eagerrW   )r3   r]   r^   )rg   r|   rY   rl   rf   rZ   r[   rr   r;   _attn_implementationr   rd   r   r]   reshaperm   )r:   r   r\   r^   r   rn   rJ   hidden_shapequery_states
key_statesvalue_statesattention_interfacerq   ro   outputss                  r=   rM   zSplinterSelfAttention.forward   s    $)#2#.CCbC$*BCCzz-0055lCCMMaQRSSXXm,,11,??II!QOO
zz-0055lCCMMaQRSS(?;+w66"9$+:Z"[$7$7
%
  $}HCC$2HL
%
 
%
 
%
 
%
!\ *k);;;;;;FFHH1BV;--r>   NNF)rN   rO   rP   r%   r5   Tensorr   rS   boolrT   rM   rU   rV   s   @r=   rt   rt   z   s        6 6 6 6 60 7;15,1! !|! !!23! E-.	!
 $D>! 
u|	! ! ! ! ! ! ! !r>   rt   c                   P     e Zd Z fdZdej        dej        dej        fdZ xZS )SplinterSelfOutputc                    t                                                       t          j        |j        |j                  | _        t          j        |j        |j                  | _        t          j        |j	                  | _
        d S Nr   )r$   r%   r   r~   r(   denser/   r0   r1   r2   r3   r9   s     r=   r%   zSplinterSelfOutput.__init__   sf    Yv163EFF
f&8f>STTTz&"<==r>   r   input_tensorrB   c                     |                      |          }|                     |          }|                     ||z             }|S Nr   r3   r/   r:   r   r   s      r=   rM   zSplinterSelfOutput.forward   @    

=11]33}|'CDDr>   rN   rO   rP   r%   r5   r   rM   rU   rV   s   @r=   r   r      i        > > > > >U\  RWR^        r>   r   c                        e Zd Z fdZd Z	 	 	 ddej        deej                 deej                 dee	         d	e
ej                 f
d
Z xZS )SplinterAttentionc                     t                                                       t          |          | _        t	          |          | _        t                      | _        d S r   )r$   r%   rt   r:   r   outputsetpruned_headsr9   s     r=   r%   zSplinterAttention.__init__   sI    )&11	(00EEr>   c                    t          |          dk    rd S t          || j        j        | j        j        | j                  \  }}t          | j        j        |          | j        _        t          | j        j        |          | j        _        t          | j        j	        |          | j        _	        t          | j
        j        |d          | j
        _        | j        j        t          |          z
  | j        _        | j        j        | j        j        z  | j        _        | j                            |          | _        d S )Nr   r   rb   )lenr   r:   rx   r|   r   r   rY   rZ   r[   r   r   r}   union)r:   headsindexs      r=   prune_headszSplinterAttention.prune_heads   s    u::??F7490$)2OQUQb
 
u
 -TY_eDD	*49=%@@	,TY_eDD	.t{/@%QOOO )-	(EE

(R	%"&)"?$)B_"_	 -33E::r>   NFr   r\   r^   r   rB   c                 ~     | j         |f|||d|}|                     |d         |          }|f|dd          z   }|S N)r\   r^   r   r   r   )r:   r   )	r:   r   r\   r^   r   rn   self_outputsattention_outputr   s	            r=   rM   zSplinterAttention.forward   sl     !ty
)/	
 

 
 
  ;;|AFF#%QRR(88r>   r   )rN   rO   rP   r%   r   r5   r   r   rS   r   rT   rM   rU   rV   s   @r=   r   r      s        " " " " "; ; ;* 7;15,1 | !!23 E-.	
 $D> 
u|	       r>   r   c                   B     e Zd Z fdZdej        dej        fdZ xZS )SplinterIntermediatec                    t                                                       t          j        |j        |j                  | _        t          |j        t                    rt          |j                 | _        d S |j        | _        d S r   )r$   r%   r   r~   r(   intermediate_sizer   
isinstance
hidden_actstrr
   intermediate_act_fnr9   s     r=   r%   zSplinterIntermediate.__init__   sn    Yv163KLL
f'-- 	9'-f.?'@D$$$'-'8D$$$r>   r   rB   c                 Z    |                      |          }|                     |          }|S r   )r   r   )r:   r   s     r=   rM   zSplinterIntermediate.forward   s,    

=1100??r>   r   rV   s   @r=   r   r      s^        9 9 9 9 9U\ el        r>   r   c                   P     e Zd Z fdZdej        dej        dej        fdZ xZS )SplinterOutputc                    t                                                       t          j        |j        |j                  | _        t          j        |j        |j                  | _        t          j	        |j
                  | _        d S r   )r$   r%   r   r~   r   r(   r   r/   r0   r1   r2   r3   r9   s     r=   r%   zSplinterOutput.__init__  sf    Yv79KLL
f&8f>STTTz&"<==r>   r   r   rB   c                     |                      |          }|                     |          }|                     ||z             }|S r   r   r   s      r=   rM   zSplinterOutput.forward	  r   r>   r   rV   s   @r=   r   r     r   r>   r   c                        e Zd Z fdZ	 	 	 ddej        deej                 deej                 dee         de	ej                 f
d	Z
d
 Z xZS )SplinterLayerc                     t                                                       |j        | _        d| _        t	          |          | _        t          |          | _        t          |          | _	        d S )Nr   )
r$   r%   chunk_size_feed_forwardseq_len_dimr   	attentionr   intermediater   r   r9   s     r=   r%   zSplinterLayer.__init__  s^    '-'E$*622088$V,,r>   NFr   r\   r^   r   rB   c                      | j         |f|||d|}|d         }|dd          }t          | j        | j        | j        |          }	|	f|z   }|S r   )r   r   feed_forward_chunkr   r   )
r:   r   r\   r^   r   rn   self_attention_outputsr   r   layer_outputs
             r=   rM   zSplinterLayer.forward  s     "0"
)/	"
 "

 "
 "
 2!4(,0#T%A4CSUe
 
  /G+r>   c                 \    |                      |          }|                     ||          }|S r   )r   r   )r:   r   intermediate_outputr   s       r=   r   z SplinterLayer.feed_forward_chunk3  s2    "//0@AA{{#68HIIr>   r   )rN   rO   rP   r%   r5   r   r   rS   r   rT   rM   r   rU   rV   s   @r=   r   r     s        - - - - - 7;15,1 | !!23 E-.	
 $D> 
u|	   2      r>   r   c                        e Zd Z fdZe	 	 	 	 	 ddej        deej                 deej                 dee	         d	ee	         d
ee	         de
eej                 ef         fd            Z xZS )SplinterEncoderc                     t                                                       | _        t          j        fdt          j                  D                       | _        d| _        d S )Nc                 .    g | ]}t                    S  )r   ).0ir;   s     r=   
<listcomp>z,SplinterEncoder.__init__.<locals>.<listcomp>>  s!    #c#c#caM&$9$9#c#c#cr>   F)	r$   r%   r;   r   
ModuleListrangenum_hidden_layerslayergradient_checkpointingr9   s    `r=   r%   zSplinterEncoder.__init__;  s`    ]#c#c#c#c5IaCbCb#c#c#cdd
&+###r>   NFTr   r\   r^   r   output_hidden_statesreturn_dictrB   c           	          |rdnd }|rdnd }	t          | j                  D ]<\  }
}|r||fz   }|||
         nd } |d||||d|}|d         }|r|	|d         fz   }	=|r||fz   }t          |||	          S )Nr   )r   r\   r^   r   r   r   last_hidden_stater   
attentions)	enumerater   r   )r:   r   r\   r^   r   r   r   rn   all_hidden_statesall_self_attentionsr   layer_modulelayer_head_masklayer_outputss                 r=   rM   zSplinterEncoder.forwardA  s     #7@BBD$5?bb4(44 	P 	POA|# I$58H$H!.7.CillO(L +-)"3	 
  M *!,M  P&9]1=M<O&O# 	E 1]4D D++*
 
 
 	
r>   )NNFFT)rN   rO   rP   r%   r   r5   r   r   rS   r   r   rT   r   rM   rU   rV   s   @r=   r   r   :  s        , , , , ,  7;15,1/4&*&
 &
|&
 !!23&
 E-.	&

 $D>&
 'tn&
 d^&
 
uU\"O3	4&
 &
 &
 &
 &
 &
 &
 &
r>   r   c                   (    e Zd ZU eed<   dZdZd ZdS )SplinterPreTrainedModelr;   splinterTc                    t          |t          j                  rT|j        j                            d| j        j                   |j         |j        j        	                                 dS dS t          |t          j
                  r_|j        j                            d| j        j                   |j        +|j        j        |j                 	                                 dS dS t          |t          j                  r?|j        j        	                                 |j        j                            d           dS dS )zInitialize the weightsrW   )meanstdNg      ?)r   r   r~   weightdatanormal_r;   initializer_rangebiaszero_r&   r   r/   fill_)r:   rX   s     r=   _init_weightsz%SplinterPreTrainedModel._init_weightsq  s)   fbi(( 	* M&&CT[5R&SSS{& &&((((( '&-- 	*M&&CT[5R&SSS!-"6#56<<>>>>> .--- 	*K""$$$M$$S)))))	* 	*r>   N)rN   rO   rP   r   __annotations__base_model_prefixsupports_gradient_checkpointingr   r   r>   r=   r   r   k  s=         "&*#* * * * *r>   r   c                   P    e Zd ZdZ fdZd Zd Zd Zee		 	 	 	 	 	 	 	 	 dde
ej                 de
ej                 d	e
ej                 d
e
ej                 de
ej                 de
ej                 de
e         de
e         de
e         deeef         fd                        Z xZS )SplinterModela2  
    The model is an encoder (with only self-attention) following the architecture described in [Attention is all you
    need](https://huggingface.co/papers/1706.03762) by Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones,
    Aidan N. Gomez, Lukasz Kaiser and Illia Polosukhin.
    c                     t                                          |           || _        t          |          | _        t          |          | _        |                                  d S r   )r$   r%   r;   r   rL   r   encoder	post_initr9   s     r=   r%   zSplinterModel.__init__  sX       ,V44&v.. 	r>   c                     | j         j        S r   rL   r*   )r:   s    r=   get_input_embeddingsz"SplinterModel.get_input_embeddings  s    ..r>   c                     || j         _        d S r   r   )r:   r[   s     r=   set_input_embeddingsz"SplinterModel.set_input_embeddings  s    */'''r>   c                     |                                 D ]/\  }}| j        j        |         j                            |           0dS )z
        Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
        class PreTrainedModel
        N)itemsr   r   r   r   )r:   heads_to_pruner   r   s       r=   _prune_headszSplinterModel._prune_heads  sU    
 +0022 	C 	CLE5Lu%/;;EBBBB	C 	Cr>   Nr?   r\   r@   r   r^   rA   r   r   r   rB   c
                    ||n| j         j        }||n| j         j        }|	|	n| j         j        }	||t	          d          |+|                     ||           |                                }
n.||                                dd         }
nt	          d          |
\  }}||j        n|j        }|t          j	        ||f|          }|!t          j
        |
t          j        |          }|                     ||
          }|                     || j         j                  }|                     ||||          }|                     |||||d	          }|d
         }t#          ||j        |j                  S )a  
        token_type_ids (`torch.LongTensor` of shape `batch_size, sequence_length`, *optional*):
            Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,
            1]`:

            - 0 corresponds to a *sentence A* token,
            - 1 corresponds to a *sentence B* token.

            [What are token type IDs?](../glossary#token-type-ids)
        position_ids (`torch.LongTensor` of shape `batch_size, sequence_length`, *optional*):
            Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
            config.max_position_embeddings - 1]`.

            [What are position IDs?](../glossary#position-ids)
        NzDYou cannot specify both input_ids and inputs_embeds at the same timer    z5You have to specify either input_ids or inputs_embeds)rF   rD   )r?   r   r@   rA   T)r\   r^   r   r   r   r   r   )r;   r   r   use_return_dictrz   %warn_if_padding_and_no_attention_maskrG   rF   r5   onesrH   rI   get_extended_attention_maskget_head_maskr   rL   r   r   r   r   )r:   r?   r\   r@   r   r^   rA   r   r   r   rJ   
batch_sizerK   rF   extended_attention_maskembedding_outputencoder_outputssequence_outputs                     r=   rM   zSplinterModel.forward  s   : 2C1N--TXT_Tq$8$D  $+Jj 	 &1%<kk$+B] ]%>cddd"66y.QQQ#..**KK&',,..ss3KKTUUU!,
J%.%:!!@T!"Z*j)A6RRRN!"[EJvVVVN 150P0PQ_al0m0m &&y$+2OPP	??%)'	 + 
 
 ,,2/!5 ' 
 
 *!,-)7&1
 
 
 	
r>   )	NNNNNNNNN)rN   rO   rP   rQ   r%   r   r   r   r   r   r   r5   r   r   r   rT   r   rM   rU   rV   s   @r=   r   r     st            / / /0 0 0C C C  -11515/3,004,0/3&*R
 R
EL)R
 !.R
 !.	R

 u|,R
 EL)R
  -R
 $D>R
 'tnR
 d^R
 
uo%	&R
 R
 R
 ^ R
 R
 R
 R
 R
r>   r   c                   D     e Zd Zd fd	Zdej        dej        fdZ xZS )SplinterFullyConnectedLayergeluc                    t                                                       || _        || _        t	          j        | j        | j                  | _        t          |         | _        t	          j	        | j                  | _	        d S r   )
r$   r%   	input_dim
output_dimr   r~   r   r
   act_fnr/   )r:   r  r  r   r<   s       r=   r%   z$SplinterFullyConnectedLayer.__init__  sa    "$Yt~t??
Z(do66r>   inputsrB   c                     |                      |          }|                     |          }|                     |          }|S r   )r   r  r/   )r:   r  r   s      r=   rM   z#SplinterFullyConnectedLayer.forward  s;    

6**M22}55r>   )r  r   rV   s   @r=   r  r    sc        7 7 7 7 7 7el u|        r>   r  c                   (     e Zd ZdZ fdZd Z xZS )QuestionAwareSpanSelectionHeadzf
    Implementation of Question-Aware Span Selection (QASS) head, described in Splinter's paper:

    c                    t                                                       t          |j        |j                  | _        t          |j        |j                  | _        t          |j        |j                  | _        t          |j        |j                  | _        t          j	        |j        |j        d          | _
        t          j	        |j        |j        d          | _        d S )NF)r   )r$   r%   r  r(   query_start_transformquery_end_transformstart_transformend_transformr   r~   start_classifierend_classifierr9   s     r=   r%   z'QuestionAwareSpanSelectionHead.__init__  s    %@ASU[Ug%h%h"#>v?QSYSe#f#f :6;MvOabb89KVM_`` "	&*<f>PW\ ] ] ] i(:F<NUZ[[[r>   c                 h   |                                 \  }}}|                    d                              dd|          }t          j        |d|          }|                     |          }|                     |          }|                     |          }	|                     |          }
| 	                    |          }|	
                    ddd          }	t          j        ||	          }|                     |          }|

                    ddd          }
t          j        ||
          }||fS )Nr    r   )rb   r   r   r`   )rG   	unsqueezerepeatr5   gatherr  r  r  r  r  permutere   r  )r:   r  	positions_rb   r   gathered_repsquery_start_repsquery_end_reps
start_repsend_repsr   start_logits
end_logitss                 r=   rM   z&QuestionAwareSpanSelectionHead.forward  s   KKMM	1c##B''..q!S99V%@@@55mDD11-@@))&11
%%f----.>??''1a00
|M:>>++N;;##Aq!,,\-::
Z''r>   )rN   rO   rP   rQ   r%   rM   rU   rV   s   @r=   r  r    sV         
	\ 	\ 	\ 	\ 	\( ( ( ( ( ( (r>   r  c                   ~    e Zd Z fdZe	 	 	 	 	 	 	 	 	 	 	 	 ddeej                 deej                 deej                 deej                 deej                 deej                 d	eej                 d
eej                 dee	         dee	         dee	         deej                 de
eef         fd            Z xZS )SplinterForQuestionAnsweringc                     t                                          |           t          |          | _        t	          |          | _        |j        | _        |                                  d S r   r$   r%   r   r   r  splinter_qassquestion_token_idr   r9   s     r=   r%   z%SplinterForQuestionAnswering.__init__3  ]       %f--;FCC!'!9 	r>   Nr?   r\   r@   r   r^   rA   start_positionsend_positionsr   r   r   question_positionsrB   c                    ||n| j         j        }d}||At          j        t          j        || j                                                  d          }n?t          j        |                    d          t          j	        |j
        |j                  }|                    d          }d}|                     |||||||	|
|	  	        }|d         }|                     ||          \  }}|r*|                    d	          |                    d	          }}|N|d	|z
  t          j        |j                  j        z  z   }|d	|z
  t          j        |j                  j        z  z   }d}||t'          |                                          d	k    r|                    d          }t'          |                                          d	k    r|                    d          }|                    d	          }|                    d|           |                    d|           t+          |
          } |||          } |||          }||z   dz  }|s||f|d	d         z   }||f|z   n|S t-          ||||j        |j                  S )a  
        token_type_ids (`torch.LongTensor` of shape `batch_size, sequence_length`, *optional*):
            Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,
            1]`:

            - 0 corresponds to a *sentence A* token,
            - 1 corresponds to a *sentence B* token.

            [What are token type IDs?](../glossary#token-type-ids)
        position_ids (`torch.LongTensor` of shape `batch_size, sequence_length`, *optional*):
            Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
            config.max_position_embeddings - 1]`.

            [What are position IDs?](../glossary#position-ids)
        question_positions (`torch.LongTensor` of shape `(batch_size, num_questions)`, *optional*):
            The positions of all question tokens. If given, start_logits and end_logits will be of shape `(batch_size,
            num_questions, sequence_length)`. If None, the first question token in each sequence in the batch will be
            the only one for which start_logits and end_logits are calculated and they will be of shape `(batch_size,
            sequence_length)`.
        NFr    r   r   )rE   layoutrF   Tr\   r@   r   r^   rA   r   r   r   r   ignore_indexr`   lossr)  r*  r   r   )r;   r  r5   argmaxeqr0  r{   rH   rG   rI   r6  rF   r  r   r/  squeezefinforE   minr   clamp_r   r   r   r   )r:   r?   r\   r@   r   r^   rA   r2  r3  r   r   r   r4  question_positions_were_none"question_position_for_each_exampler   r
  r)  r*  
total_lossignored_indexloss_fct
start_lossend_lossr   s                            r=   rM   z$SplinterForQuestionAnswering.forward=  s   H &1%<kk$+B]',$%$5:\Xi)?@@EEGGR6 6 622 6;[!&&q))MDXanau6 6 62 "D!M!Mb!Q!Q+/(--))%'/!5#   

 

 "!*#'#5#5oGY#Z#Z j' 	V'3';';A'>'>
@R@RST@U@U*L%'1~+=\M_A`A`Ad*ddL#q>'9U[IY=Z=Z=^&^^J
&=+D?''))**Q.."1"9"9""="==%%''((1,, - 5 5b 9 9(--a00M""1m444  M222']CCCH!,@@Jx
M::H$x/14J 	R"J/'!""+=F/9/EZMF**6Q+%!!/)
 
 
 	
r>   NNNNNNNNNNNN)rN   rO   rP   r%   r   r   r5   r   rR   r   r   rT   r   rM   rU   rV   s   @r=   r,  r,  1  sr             -11515/3,0046:48,0/3&*9=c
 c
EL)c
 !.c
 !.	c

 u|,c
 EL)c
  -c
 "%"23c
   01c
 $D>c
 'tnc
 d^c
 %U%56c
 
u22	3c
 c
 c
 ^c
 c
 c
 c
 c
r>   r,  zB
    Class for outputs of Splinter as a span selection model.
    )custom_introc                       e Zd ZU dZdZeej                 ed<   dZ	eej                 ed<   dZ
eej                 ed<   dZeeej                          ed<   dZeeej                          ed<   dS )SplinterForPreTrainingOutputa  
    loss (`torch.FloatTensor` of shape `(1,)`, *optional*, returned when start and end positions are provided):
        Total span extraction loss is the sum of a Cross-Entropy for the start and end positions.
    start_logits (`torch.FloatTensor` of shape `(batch_size, num_questions, sequence_length)`):
        Span-start scores (before SoftMax).
    end_logits (`torch.FloatTensor` of shape `(batch_size, num_questions, sequence_length)`):
        Span-end scores (before SoftMax).
    Nr;  r)  r*  r   r   )rN   rO   rP   rQ   r;  r   r5   rS   r   r)  r*  r   rT   r   r   r>   r=   rL  rL    s           )-D(5$
%,,,04L(5,-444.2J*+2228<M8E%"345<<<59Ju01299999r>   rL  z
    Splinter Model for the recurring span selection task as done during the pretraining. The difference to the QA task
    is that we do not have a question, but multiple question tokens that replace the occurrences of recurring spans
    instead.
    c                       e Zd Z fdZe	 	 	 	 	 	 	 	 	 	 	 	 ddeej                 deej                 deej                 deej                 deej                 deej                 d	eej                 d
eej                 dee	         dee	         dee	         deej                 de
eef         fd            Zdej        dej        fdZ xZS )SplinterForPreTrainingc                     t                                          |           t          |          | _        t	          |          | _        |j        | _        |                                  d S r   r.  r9   s     r=   r%   zSplinterForPreTraining.__init__  r1  r>   Nr?   r\   r@   r   r^   rA   r2  r3  r   r   r   r4  rB   c                    ||n| j         j        }|||t          d          ||t          d          ||                     |          }|                     |||||||	|
|	  	        }|d         }|                                \  }}}|                     ||          \  }}|                    d          }|x|                    d                              |||          }|d|z
  t          j
        |j                  j        z  z   }|d|z
  t          j
        |j                  j        z  z   }d}|||                    dt          d|dz
                       |                    dt          d|dz
                       t          | j         j                  } ||                    ||z  |          |                    ||z                      } ||                    ||z  |          |                    ||z                      }||z   dz  }|s||f|dd         z   }||f|z   n|S t%          ||||j        |j        	          S )
a  
        input_ids (`torch.LongTensor` of shape `(batch_size, num_questions, sequence_length)`):
            Indices of input sequence tokens in the vocabulary.

            Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
            [`PreTrainedTokenizer.__call__`] for details.

            [What are input IDs?](../glossary#input-ids)
        token_type_ids (`torch.LongTensor` of shape `batch_size, num_questions, sequence_length`, *optional*):
            Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,
            1]`:

            - 0 corresponds to a *sentence A* token,
            - 1 corresponds to a *sentence B* token.

            [What are token type IDs?](../glossary#token-type-ids)
        position_ids (`torch.LongTensor` of shape `batch_size, num_questions, sequence_length`, *optional*):
            Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
            config.max_position_embeddings - 1]`.

            [What are position IDs?](../glossary#position-ids)
        inputs_embeds (`torch.FloatTensor` of shape `(batch_size, num_questions, sequence_length, hidden_size)`, *optional*):
            Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
            is useful if you want more control over how to convert *input_ids* indices into associated vectors than the
            model's internal embedding lookup matrix.
        start_positions (`torch.LongTensor` of shape `(batch_size, num_questions)`, *optional*):
            Labels for position (index) of the start of the labelled span for computing the token classification loss.
            Positions are clamped to the length of the sequence (`sequence_length`). Position outside of the sequence
            are not taken into account for computing the loss.
        end_positions (`torch.LongTensor` of shape `(batch_size, num_questions)`, *optional*):
            Labels for position (index) of the end of the labelled span for computing the token classification loss.
            Positions are clamped to the length of the sequence (`sequence_length`). Position outside of the sequence
            are not taken into account for computing the loss.
        question_positions (`torch.LongTensor` of shape `(batch_size, num_questions)`, *optional*):
            The positions of all question tokens. If given, start_logits and end_logits will be of shape `(batch_size,
            num_questions, sequence_length)`. If None, the first question token in each sequence in the batch will be
            the only one for which start_logits and end_logits are calculated and they will be of shape `(batch_size,
            sequence_length)`.
        NzCquestion_positions must be specified in order to calculate the lossz>question_positions must be specified when input_embeds is usedr7  r   r   r8  r`   r:  )r;   r  	TypeError_prepare_question_positionsr   rG   r/  r  r7   r5   r?  rE   r@  rA  maxr   r)   rl   rL  r   r   )r:   r?   r\   r@   r   r^   rA   r2  r3  r   r   r   r4  r   r
  r  sequence_lengthrb   r)  r*  num_questions attention_mask_for_each_questionrD  rF  rG  rH  r   s                              r=   rM   zSplinterForPreTraining.forward  s   n &1%<kk$+B]%/*E-Jcabbb'I,=\]]]'!%!A!A)!L!L--))%'/!5#   

 

 "!*+:+?+?+A+A(
OS#'#5#5oGY#Z#Z j*//22%/=/G/G/J/J/Q/QM?0 0, (1/O+OSXS^_k_qSrSrSv*vvL#q+K'Ku{[e[kOlOlOp&ppJ
&=+D""1c!_q-@&A&ABBB  C?Q+>$?$?@@@ (T[5MNNNH!!!*}"<oNN$$Z-%?@@ J  x
] :OLL"":#=>> H %x/14J 	R"J/'!""+=F/9/EZMF**6Q+%!!/)
 
 
 	
r>   c                 r   t          j        || j        j        k              \  }}t          j        |          }t          j        |                    d          |                                f| j        j        t           j	        |j
                  }t          j        d |D                       }||||f<   |S )Nr   rD   c                 6    g | ]}t          j        |          S r   )r5   r6   )r   ns     r=   r   zFSplinterForPreTraining._prepare_question_positions.<locals>.<listcomp>S  s     AAAa%,q//AAAr>   )r5   wherer;   r0  bincountfullrG   rS  r)   rI   rF   cat)r:   r?   rowsflat_positionsrU  r"  colss          r=   rR  z2SplinterForPreTraining._prepare_question_positionsJ  s    ${98U+UVVnt,,J^^A 1 1 3 34K$*#	
 
 
	 yAA=AAABB .	$*r>   rI  )rN   rO   rP   r%   r   r   r5   r   rR   r   r   rT   rL  rM   rR  rU   rV   s   @r=   rN  rN    s             -11515/3,0046:48,0/3&*9=z
 z
EL)z
 !.z
 !.	z

 u|,z
 EL)z
  -z
 "%"23z
   01z
 $D>z
 'tnz
 d^z
 %U%56z
 
u22	3z
 z
 z
 ^z
xU\ el        r>   rN  )r,  rN  r   r   r   )rW   N)7rQ   dataclassesr   typingr   r   r   r5   r   torch.nnr   activationsr
   modeling_layersr   modeling_outputsr   r   r   modeling_utilsr   r   pytorch_utilsr   r   r   utilsr   r   r   configuration_splinterr   
get_loggerrN   loggerModuler   r   floatrr   rt   r   r   r   r   r   r   r   r   r  r  r,  rL  rN  __all__r   r>   r=   <module>rp     s     ! ! ! ! ! ! , , , , , , , , , ,        % % % % % % ! ! ! ! ! ! 9 9 9 9 9 9 Z Z Z Z Z Z Z Z Z Z F F F F F F F F l l l l l l l l l l         
 3 2 2 2 2 2 
	H	%	%2 2 2 2 2 2 2 2z (,% %I%<% 
% <	%
 U\*% % % %% % % %87 7 7 7 7BI 7 7 7v       * * * * *	 * * *\    29        RY   % % % % %. % % %R.
 .
 .
 .
 .
bi .
 .
 .
b * * * * *o * * *, s
 s
 s
 s
 s
+ s
 s
 s
l    ")   $#( #( #( #( #(RY #( #( #(L o
 o
 o
 o
 o
#: o
 o
 o
d   
: : : : :; : :  :"   S S S S S4 S S Sl  r>   