
     `izF                    f   d Z ddlZddlZddlmZ ddlmZmZ ddlZddlm	Z	 ddl
mZ ddlmZ dd	lmZ dd
lmZ ddlmZmZmZmZ ddlmZ ddlmZmZmZ ddlmZmZm Z m!Z! ddl"m#Z# ddl$m%Z%  e j&        e'          Z(dZ)dZ*dZ+dZ,d Z- G d de	j.                  Z/ G d de	j.                  Z0 G d de	j.                  Z1de0iZ2 G d de	j.                  Z3 G d  d!e	j.                  Z4 G d" d#e	j.                  Z5 G d$ d%e          Z6 G d& d'e	j.                  Z7 G d( d)e	j.                  Z8e G d* d+e                      Z9e G d, d-e                      Z:e G d. d/e                      Z;e G d0 d1e                      Z< G d2 d3e	j.                  Z= G d4 d5e	j.                  Z> G d6 d7e	j.                  Z? G d8 d9e	j.                  Z@ G d: d;e	j.                  ZAd<ZBd=ZC G d> d?e          ZD G d@ dAeD          ZE edBeB           G dC dDeD                      ZF edEeB           G dF dGeD                      ZG edHeB           G dI dJeD                      ZH edKeB           G dL dMeD                      ZIdNZJ edOeB           G dP dQeD                      ZKg dRZLdS )SzPyTorch REALM model.    N)	dataclass)OptionalUnion)nn)CrossEntropyLoss   )ACT2FN)Cache)GradientCheckpointingLayer))BaseModelOutputWithPastAndCrossAttentions,BaseModelOutputWithPoolingAndCrossAttentionsMaskedLMOutputModelOutput)PreTrainedModel)apply_chunking_to_forward find_pruneable_heads_and_indicesprune_linear_layer)add_start_docstrings%add_start_docstrings_to_model_forwardloggingreplace_return_docstrings)deprecate_kwarg   )RealmConfigz(google/realm-cc-news-pretrained-embedderz'google/realm-cc-news-pretrained-encoderz&google/realm-cc-news-pretrained-scorerr   c           	      Z   	 ddl }ddl}ddl}n)# t          $ r t                              d            w xY wt          j                            |          }t          	                    d|            |j
                            |          }g }g }	|D ]j\  }
}t          	                    d|
 d|            |j
                            ||
          }|                    |
           |	                    |           kt          ||	          D ]\  }
}t          | t                     r0d|
vr,t          	                    d|
 d	| j        j         d
           K|
                    d          s|
                    d          rAt          | t(                    r,|
                    dd          }
|
                    dd          }
|
                    d          s|
                    d          r+t          | t,                    r|
                    dd          }
|
                    d          rt          | t                     rdnd}|
                    d| d          }
|
                    d| d          }
|
                    d| d          }
|
                    d| d          }
|
                    d| d          }
|
                    d          rt          | t.                    rdnd}|
                    d| d          }
|
                    d| d           }
|
                    d!| d"          }
|
                    d#| d$          }
|
                    d%| d          }
|
                    d&| d$          }
n`|
                    d'          rKt          | t.                    rdnd}|
                    d(| d           }
|
                    d)| d"          }
|
                    d*          }
t3          d+ |
D                       r2t          	                    dd*                    |
                      ;| }|
D ]}|                    d,|          r|                    d-|          }n|g}|d         d.k    s|d         d/k    rt9          |d0          }n|d         d1k    s|d         d2k    rt9          |d3          }nX	 t9          ||d                   }n@# t:          $ r3 t          	                    dd*                    |
                      Y w xY wt=          |          d4k    rt?          |d5                   }||         }|d6d         d7k    rt9          |d0          }n|d.k    r|                     |          }	 |j!        |j!        k    sJ d8|j!         d9|j!         d:            n/# tD          $ r"}|xj#        |j!        |j!        fz  c_#         d}~ww xY wt          	                    d;|
            tI          j%        |          |_&        | S )<z'Load tf checkpoints in a pytorch model.r   NzLoading a TensorFlow model in PyTorch, requires TensorFlow to be installed. Please see https://www.tensorflow.org/install/ for installation instructions.z&Converting TensorFlow checkpoint from zLoading TF weight z with shape readerz	Skipping z as it is not z's parameterbertclszbert/zreader/realm/zcls/zreader/cls/zrealm/ zreader/zreader/module/bert/zreader/module/cls/zreader/dense/zqa_outputs/dense_intermediate/zreader/dense_1/zqa_outputs/dense_output/zreader/layer_normalizationzqa_outputs/layer_normalizationzmodule/module/module/z	embedder/z!module/module/module/module/bert/zmodule/module/module/LayerNorm/zcls/LayerNorm/zmodule/module/module/dense/z
cls/dense/z,module/module/module/module/cls/predictions/zcls/predictions/zmodule/module/module/bert/z%module/module/module/cls/predictions/zmodule/module/zmodule/module/LayerNorm/zmodule/module/dense//c              3      K   | ]}|d v V  	dS ))adam_vadam_mAdamWeightDecayOptimizerAdamWeightDecayOptimizer_1global_stepN ).0ns     /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/models/deprecated/realm/modeling_realm.py	<genexpr>z+load_tf_weights_in_realm.<locals>.<genexpr>s   s<       
 
 nn
 
 
 
 
 
    z[A-Za-z]+_\d+z_(\d+)kernelgammaweightoutput_biasbetabias   r   i_embeddingszPointer shape z and array shape z mismatchedzInitialize PyTorch weight )'renumpy
tensorflowImportErrorloggererrorospathabspathinfotrainlist_variablesload_variableappendzip
isinstanceRealmReader	__class____name__
startswithRealmForOpenQAreplaceRealmKnowledgeAugEncoderRealmEmbeddersplitanyjoin	fullmatchgetattrAttributeErrorlenint	transposeshapeAssertionErrorargstorch
from_numpydata)modelconfigtf_checkpoint_pathr5   nptftf_path	init_varsnamesarraysnamerV   arrayreader_prefixembedder_prefixpointerm_namescope_namesnumes                       r*   load_tf_weights_in_realmrn   1   s   
			   Q	
 	
 	
 	 goo011G
KKBBBCCC''00IEF   eBBB5BBCCC&&w55Te5&)) M/ M/ee[)) 	hd.B.BKK^D^^8P^^^___ OOF## 	7tu'='= 	7:eUcCdCd 	7<<99D<<66D OOF## 	3tu'='= 	3:eUmCnCn 	3<<22D ??8$$ 	p",UK"@"@OBBiM<< 5-7O7O7OPPD<< 46L6L6LMMD<<M1a1a1abbD<< 1m3]3]3]^^D<< <>n>n>nooD ??233 	X$.um$D$DUbb+O<< CE_E_E_``D<< AoCeCeCeffD<< =/?]?]?]^^D<< NSbPtPtPtuuD<< <>X>X>XYYD<< GOImImImnnDD__-.. 	X$.um$D$DUbb+O<< :<^<^<^__D<< 6?8V8V8VWWDzz#  
 

 
 
 
 
 	 KK4CHHTNN44555 	' 	'F||,f55 ' hhy&99%h1~))[^w-F-F!'844Q=00KNf4L4L!'622%g{1~>>GG%   KK <CHHTNN < <===H ;1$$+a.))!#,#$$<=((gx00GGxLL''E	=EK///YYYYYY 0///  	 	 	FFw}ek22FF	 	777888'..Ls2    &5S++:T('T(+W
W0W++W0c                        e Zd ZdZ fdZ	 	 	 	 	 ddeej                 deej                 deej                 deej                 d	e	d
ej
        fdZ xZS )RealmEmbeddingszGConstruct the embeddings from word, position and token_type embeddings.c                    t                                                       t          j        |j        |j        |j                  | _        t          j        |j        |j                  | _	        t          j        |j
        |j                  | _        t          j        |j        |j                  | _        t          j        |j                  | _        t#          |dd          | _        |                     dt)          j        |j                                      d          d           |                     d	t)          j        | j                                        t(          j        
          d           d S )N)padding_idxepsposition_embedding_typeabsoluteposition_ids)r   F)
persistenttoken_type_idsdtype)super__init__r   	Embedding
vocab_sizehidden_sizepad_token_idword_embeddingsmax_position_embeddingsposition_embeddingstype_vocab_sizetoken_type_embeddings	LayerNormlayer_norm_epsDropouthidden_dropout_probdropoutrQ   ru   register_bufferrY   arangeexpandzerosrw   sizelongselfr]   rF   s     r*   r~   zRealmEmbeddings.__init__   sK   !|F,=v?Q_e_rsss#%<0NPVPb#c#c %'\&2H&J\%]%]" f&8f>STTTz&"<=='.v7PR\']']$EL)GHHOOPWXXej 	 	
 	
 	
 	ek$*;*@*@*B*B%*UUUbg 	 	
 	
 	
 	
 	
r,   Nr   	input_idsrz   rw   inputs_embedspast_key_values_lengthreturnc                    ||                                 }n|                                 d d         }|d         }|| j        d d |||z   f         }|mt          | d          r2| j        d d d |f         }|                    |d         |          }	|	}n+t          j        |t
          j        | j        j                  }|| 	                    |          }| 
                    |          }
||
z   }| j        dk    r|                     |          }||z  }|                     |          }|                     |          }|S )Nrx   r   rz   r   r|   devicerv   )r   rw   hasattrrz   r   rY   r   r   r   r   r   ru   r   r   r   )r   r   rz   rw   r   r   input_shape
seq_lengthbuffered_token_type_ids buffered_token_type_ids_expandedr   
embeddingsr   s                r*   forwardzRealmEmbeddings.forward   sm     #..**KK',,..ss3K ^
,QQQ0FVlIl0l-lmL
 !t-.. m*.*=aaa*n*M'3J3Q3QR]^_R`bl3m3m0!A!&[
SWSdSk!l!l!l  00;;M $ : :> J J"%::
':55"&":":<"H"H--J^^J//
\\*--
r,   )NNNNr   )rG   
__module____qualname____doc__r~   r   rY   
LongTensorFloatTensorrT   Tensorr   __classcell__rF   s   @r*   rp   rp      s        QQ
 
 
 
 
* 15593759&'' 'E,-' !!12' u/0	'
   12' !$' 
' ' ' ' ' ' ' 'r,   rp   c                   2    e Zd Zd fd	Zdej        dej        fdZ eddd	          	 	 	 	 	 	 ddej        deej	                 deej	                 deej	                 deej	                 dee
         dee         deej                 fd            Z xZS )RealmSelfAttentionNc                 D   t                                                       |j        |j        z  dk    r0t	          |d          s t          d|j         d|j         d          |j        | _        t          |j        |j        z            | _        | j        | j        z  | _        t          j
        |j        | j                  | _        t          j
        |j        | j                  | _        t          j
        |j        | j                  | _        t          j        |j                  | _        |pt#          |dd          | _        | j        dk    s| j        d	k    r6|j        | _        t          j        d
|j        z  dz
  | j                  | _        |j        | _        d S )Nr   embedding_sizezThe hidden size (z6) is not a multiple of the number of attention heads ()ru   rv   relative_keyrelative_key_queryr3   r   )r}   r~   r   num_attention_headsr   
ValueErrorrT   attention_head_sizeall_head_sizer   Linearquerykeyvaluer   attention_probs_dropout_probr   rQ   ru   r   r   distance_embedding
is_decoderr   r]   ru   rF   s      r*   r~   zRealmSelfAttention.__init__   s    ::a??PVXhHiHi?8F$6 8 8 48 8 8  
 $*#= #&v'9F<V'V#W#W !58PPYv143EFF
9V/1CDDYv143EFF
z&"EFF'> (
'-zC
 C
$ '>99T=Y]q=q=q+1+ID(&(l1v7U3UXY3Y[_[s&t&tD# +r,   xr   c                     |                                 d d         | j        | j        fz   }|                    |          }|                    dddd          S )Nrx   r   r3   r      )r   r   r   viewpermute)r   r   new_x_shapes      r*   transpose_for_scoresz'RealmSelfAttention.transpose_for_scores   sP    ffhhssmt'?AY&ZZFF;yyAq!$$$r,   past_key_valuepast_key_values4.58new_nameversionFhidden_statesattention_mask	head_maskencoder_hidden_statesencoder_attention_maskoutput_attentionsc                 ^   |                      |          }|d u}	|	r||d         }
|d         }|}n4|	rS|                     |                     |                    }
|                     |                     |                    }|}n||                     |                     |                    }
|                     |                     |                    }t	          j        |d         |
gd          }
t	          j        |d         |gd          }nP|                     |                     |                    }
|                     |                     |                    }|                     |          }|d u}| j        r|
|f}t	          j        ||
                    dd                    }| j	        dk    s| j	        dk    rt|j
        d         |
j
        d         }}|r>t	          j        |dz
  t          j        |j        	                              dd          }n:t	          j        |t          j        |j        	                              dd          }t	          j        |t          j        |j        	                              dd          }||z
  }|                     || j        z   dz
            }|                    |j        
          }| j	        dk    rt	          j        d||          }||z   }n?| j	        dk    r4t	          j        d||          }t	          j        d|
|          }||z   |z   }|t+          j        | j                  z  }|||z   }t0          j                            |d          }|                     |          }|||z  }t	          j        ||          }|                    dddd                                          }|                                d d         | j        fz   }|                    |          }|r||fn|f}| j        r||fz   }|S )Nr   r   r3   dimrx   r   r   r   r{   zbhld,lrd->bhlrzbhrd,lrd->bhlrr   ) r   r   r   r   rY   catr   matmulrU   ru   rV   tensorr   r   r   r   r   r   tor|   einsummathsqrtr   r   
functionalsoftmaxr   r   
contiguousr   r   )r   r   r   r   r   r   r   r   mixed_query_layeris_cross_attention	key_layervalue_layerquery_layer	use_cacheattention_scoresquery_length
key_lengthposition_ids_lposition_ids_rdistancepositional_embeddingrelative_position_scoresrelative_position_scores_queryrelative_position_scores_keyattention_probscontext_layernew_context_layer_shapeoutputss                               r*   r   zRealmSelfAttention.forward   sZ    !JJ}55
 3$> 	O/"='*I)!,K3NN 	O11$((;P2Q2QRRI33DJJ?T4U4UVVK3NN(11$((=2I2IJJI33DJJ}4M4MNNK	?1#5y"AqIIII)_Q%7$E1MMMKK11$((=2I2IJJI33DJJ}4M4MNNK//0ABB#4/	? 	7  )+6O !<Y5H5HR5P5PQQ'>99T=Y]q=q=q'2'8';Y_Q=O*L w!&j1nEJWdWk!l!l!l!q!q" " "'l%*UbUi!j!j!j!o!oprtu!v!v"\*EJ}OcdddiijkmoppN%6H#'#:#:8dFb;bef;f#g#g #7#:#:AR#:#S#S +~==+0<8H+Wk+l+l(#36N#N  -1EEE16>NP[]q1r1r./4|<LiYm/n/n,#36T#TWs#s +di8P.Q.QQ%/.@ -//0@b/II ,,77  -	9O_kBB%--aAq99DDFF"/"4"4"6"6ss";t?Q>S"S%**+BCC6G]=/22mM]? 	3 22Gr,   NNNNNNF)rG   r   r   r~   rY   r   r   r   r   r   r
   booltupler   r   r   s   @r*   r   r      sA       , , , , , ,4%el %u| % % % %
 _%0A6RRR 7;15=A>B+/,1c c|c !!23c E-.	c
  ((9:c !)): ;c "%c $D>c 
u|	c c c SRc c c c cr,   r   c                   P     e Zd Z fdZdej        dej        dej        fdZ xZS )RealmSelfOutputc                    t                                                       t          j        |j        |j                  | _        t          j        |j        |j                  | _        t          j        |j	                  | _
        d S Nrs   )r}   r~   r   r   r   denser   r   r   r   r   r   s     r*   r~   zRealmSelfOutput.__init__d  sf    Yv163EFF
f&8f>STTTz&"<==r,   r   input_tensorr   c                     |                      |          }|                     |          }|                     ||z             }|S r   r   r   r   r   r   r   s      r*   r   zRealmSelfOutput.forwardj  @    

=11]33}|'CDDr,   rG   r   r   r~   rY   r   r   r   r   s   @r*   r   r   c  i        > > > > >U\  RWR^        r,   r   eagerc                       e Zd Zd fd	Zd Z eddd          	 	 	 	 	 	 dd	ej        d
eej	                 deej	                 deej	                 deej	                 dee
         dee         deej                 fd            Z xZS )RealmAttentionNc                     t                                                       t          |j                 ||          | _        t          |          | _        t                      | _        d S )Nru   )	r}   r~   REALM_SELF_ATTENTION_CLASSES_attn_implementationr   r   outputsetpruned_headsr   s      r*   r~   zRealmAttention.__init__w  s`    01LM,C
 
 
	 &f--EEr,   c                    t          |          dk    rd S t          || j        j        | j        j        | j                  \  }}t          | j        j        |          | j        _        t          | j        j        |          | j        _        t          | j        j	        |          | j        _	        t          | j
        j        |d          | j
        _        | j        j        t          |          z
  | j        _        | j        j        | j        j        z  | j        _        | j                            |          | _        d S )Nr   r   r   )rS   r   r   r   r   r  r   r   r   r   r
  r   r   union)r   headsindexs      r*   prune_headszRealmAttention.prune_heads  s    u::??F7490$)2OQUQb
 
u
 -TY_eDD	*49=%@@	,TY_eDD	.t{/@%QOOO )-	(EE

(R	%"&)"?$)B_"_	 -33E::r,   r   r   r   r   Fr   r   r   r   r   r   r   c           	          |                      |||||||          }|                     |d         |          }	|	f|dd          z   }
|
S )Nr   r   )r   r
  )r   r   r   r   r   r   r   r   self_outputsattention_outputr   s              r*   r   zRealmAttention.forward  sa     yy!"
 
  ;;|AFF#%QRR(88r,   r   r   )rG   r   r   r~   r  r   rY   r   r   r   r
   r   r   r   r   r   s   @r*   r  r  v  s       " " " " " "; ; ;$ _%0A6RRR 7;15=A>B+/,1 | !!23 E-.	
  ((9: !)): ; "% $D> 
u|	   SR    r,   r  c                   B     e Zd Z fdZdej        dej        fdZ xZS )RealmIntermediatec                    t                                                       t          j        |j        |j                  | _        t          |j        t                    rt          |j                 | _        d S |j        | _        d S r   )r}   r~   r   r   r   intermediate_sizer   rD   
hidden_actstrr	   intermediate_act_fnr   s     r*   r~   zRealmIntermediate.__init__  sn    Yv163KLL
f'-- 	9'-f.?'@D$$$'-'8D$$$r,   r   r   c                 Z    |                      |          }|                     |          }|S r   )r   r  r   r   s     r*   r   zRealmIntermediate.forward  s,    

=1100??r,   r  r   s   @r*   r  r    s^        9 9 9 9 9U\ el        r,   r  c                   P     e Zd Z fdZdej        dej        dej        fdZ xZS )RealmOutputc                    t                                                       t          j        |j        |j                  | _        t          j        |j        |j                  | _        t          j	        |j
                  | _        d S r   )r}   r~   r   r   r  r   r   r   r   r   r   r   r   s     r*   r~   zRealmOutput.__init__  sf    Yv79KLL
f&8f>STTTz&"<==r,   r   r   r   c                     |                      |          }|                     |          }|                     ||z             }|S r   r   r   s      r*   r   zRealmOutput.forward  r   r,   r  r   s   @r*   r  r    r  r,   r  c                       e Zd Z fdZ eddd          	 	 	 	 	 	 ddej        d	eej                 d
eej                 deej                 deej                 dee	         dee
         deej                 fd            Zd Z xZS )
RealmLayerc                    t                                                       |j        | _        d| _        t	          |          | _        |j        | _        |j        | _        | j        r/| j        st          |  d          t	          |d          | _	        t          |          | _        t          |          | _        d S )Nr   z> should be used as a decoder model if cross attention is addedrv   r  )r}   r~   chunk_size_feed_forwardseq_len_dimr  	attentionr   add_cross_attentionr   crossattentionr  intermediater  r
  r   s     r*   r~   zRealmLayer.__init__  s    '-'E$'// +#)#= # 	]? j D!h!h!hiii"0Q["\"\"\D-f55!&))r,   r   r   r   r   NFr   r   r   r   r   r   r   c           	         |
|d d         nd }|                      |||||          }	|	d         }
| j        r|	dd         }|	d         }n
|	dd          }d }| j        rp|nt          | d          st          d|  d          |
|d	d          nd }|                     |
||||||          }|d         }
||dd         z   }|d         }||z   }t          | j        | j        | j        |
          }|f|z   }| j        r||fz   }|S )
Nr3   )r   r   r   r   rx   r)  z'If `encoder_hidden_states` are passed, z` has to be instantiated with cross-attention layers by setting `config.add_cross_attention=True`r   )	r'  r   r   r   r)  r   feed_forward_chunkr%  r&  )r   r   r   r   r   r   r   r   self_attn_past_key_valueself_attention_outputsr  r   present_key_valuecross_attn_present_key_valuecross_attn_past_key_valuecross_attention_outputslayer_outputs                    r*   r   zRealmLayer.forward  s    ;J:U?2A2#6#6[_ !%/4 "0 "
 "
 2!4 ? 	1,QrT2G 6r :,QRR0G'+$? 	Q4@4!122  Dd D D D   AP@[(<(<ae%&*&9&9 %&)!' '#  7q9 7" ==G ,C2+F( 14P P0#T%A4CSUe
 
  /G+ ? 	5!2 44Gr,   c                 \    |                      |          }|                     ||          }|S r   )r*  r
  )r   r  intermediate_outputr3  s       r*   r,  zRealmLayer.feed_forward_chunk  s2    "//0@AA{{#68HIIr,   r   )rG   r   r   r~   r   rY   r   r   r   r
   r   r   r   r,  r   r   s   @r*   r#  r#    s       * * * * * _%0A6RRR 7;15=A>B+/,1? ?|? !!23? E-.	?
  ((9:? !)): ;? "%? $D>? 
u|	? ? ? SR?B      r,   r#  c                   *    e Zd Z fdZ	 	 	 	 	 	 	 	 	 ddej        deej                 deej                 deej                 d	eej                 d
ee         dee	         dee	         dee	         dee	         de
eej                 ef         fdZ xZS )RealmEncoderc                     t                                                       | _        t          j        fdt          j                  D                       | _        d| _        d S )Nc                 .    g | ]}t                    S r'   )r#  )r(   _r]   s     r*   
<listcomp>z)RealmEncoder.__init__.<locals>.<listcomp>"  s!    #`#`#`1Jv$6$6#`#`#`r,   F)	r}   r~   r]   r   
ModuleListrangenum_hidden_layerslayergradient_checkpointingr   s    `r*   r~   zRealmEncoder.__init__  s`    ]#`#`#`#`fF^@_@_#`#`#`aa
&+###r,   NFTr   r   r   r   r   r   r   r   output_hidden_statesreturn_dictr   c           
      $   |	rdnd }|rdnd }|r| j         j        rdnd }| j        r%| j        r|rt                              d           d}|rdnd }t          | j                  D ]p\  }}|	r||fz   }|||         nd } |||||||||         nd |          }|d         }|r||d         fz  }|r$||d         fz   }| j         j        r||d         fz   }q|	r||fz   }|
st          d |||||fD                       S t          |||||	          S )
Nr'   zZ`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...Fr   rx   r   r3   c              3      K   | ]}||V  	d S r   r'   )r(   vs     r*   r+   z'RealmEncoder.forward.<locals>.<genexpr>Z  s4       
 
 =  !===
 
r,   )last_hidden_stater   r   
attentionscross_attentions)
r]   r(  r@  trainingr9   warning_once	enumerater?  r   r   )r   r   r   r   r   r   r   r   r   rA  rB  all_hidden_statesall_self_attentionsall_cross_attentionsnext_decoder_cacheilayer_modulelayer_head_masklayer_outputss                      r*   r   zRealmEncoder.forward%  s    #7@BBD$5?bb4%6d4;;Zdrr`d& 	"4= 	" "##p   "	#,6RR$(44 	V 	VOA|# I$58H$H!.7.CillO(L%&&5&A""t! M *!,M ;"}R'8&::"  V&9]1=M<O&O#;2 V+?=QRCSBU+U( 	E 1]4D D 	 
 
 "&%'(
 
 
 
 
 
 9+.+*1
 
 
 	
r,   )	NNNNNNFFT)rG   r   r   r~   rY   r   r   r   r
   r   r   r   r   r   r   r   s   @r*   r7  r7    s6       , , , , , 7;15=A>B+/$(,1/4&*F
 F
|F
 !!23F
 E-.	F

  ((9:F
 !)): ;F
 "%F
 D>F
 $D>F
 'tnF
 d^F
 
uU\"$MM	NF
 F
 F
 F
 F
 F
 F
 F
r,   r7  c                   B     e Zd Z fdZdej        dej        fdZ xZS )RealmPoolerc                     t                                                       t          j        |j        |j                  | _        t          j                    | _        d S r   )r}   r~   r   r   r   r   Tanh
activationr   s     r*   r~   zRealmPooler.__init__o  sC    Yv163EFF
'))r,   r   r   c                 r    |d d df         }|                      |          }|                     |          }|S )Nr   )r   rX  )r   r   first_token_tensorpooled_outputs       r*   r   zRealmPooler.forwardt  s@     +111a40

#56666r,   r  r   s   @r*   rU  rU  n  s^        $ $ $ $ $
U\ el        r,   rU  c                       e Zd ZU dZdZeej                 ed<   dZ	ee
ej                          ed<   dZee
ej                          ed<   dS )RealmEmbedderOutputa*  
    Outputs of [`RealmEmbedder`] models.

    Args:
        projected_score (`torch.FloatTensor` of shape `(batch_size, config.retriever_proj_size)`):

            Projected score.
        hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer) of
            shape `(batch_size, sequence_length, hidden_size)`.

            Hidden-states of the model at the output of each layer plus the initial embedding outputs.
        attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `torch.FloatTensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`.

            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
            heads.
    Nprojected_scorer   rG  )rG   r   r   r   r^  r   rY   r   __annotations__r   r   rG  r'   r,   r*   r]  r]  }  sp          ( 48OXe/07778<M8E%"345<<<59Ju01299999r,   r]  c                       e Zd ZU dZdZeej                 ed<   dZ	eej                 ed<   dZ
eej                 ed<   dS )RealmScorerOutputa'  
    Outputs of [`RealmScorer`] models.

    Args:
        relevance_score (`torch.FloatTensor` of shape `(batch_size, config.num_candidates)`):
            The relevance score of document candidates (before softmax).
        query_score (`torch.FloatTensor` of shape `(batch_size, config.retriever_proj_size)`):
            Query score derived from the query embedder.
        candidate_score (`torch.FloatTensor` of shape `(batch_size, config.num_candidates, config.retriever_proj_size)`):
            Candidate score derived from the embedder.
    Nrelevance_scorequery_scorecandidate_score)rG   r   r   r   rb  r   rY   r   r_  rc  rd  r'   r,   r*   ra  ra    sf         
 
 48OXe/0777/3K%+,33337OXe/077777r,   ra  c                      e Zd ZU dZdZeej                 ed<   dZ	eej                 ed<   dZ
eej                 ed<   dZeej                 ed<   dZeej                 ed<   dZeej                 ed<   dZeej                 ed	<   dZeej                 ed
<   dZeej                 ed<   dZeeej                          ed<   dZeeej                          ed<   dS )RealmReaderOutputa+	  
    Outputs of [`RealmReader`] models.

    Args:
        loss (`torch.FloatTensor` of shape `(1,)`, *optional*, returned when `start_positions`, `end_positions`, `has_answers` are provided):
            Total loss.
        retriever_loss (`torch.FloatTensor` of shape `(1,)`, *optional*, returned when `start_positions`, `end_positions`, `has_answers` are provided):
            Retriever loss.
        reader_loss (`torch.FloatTensor` of shape `(1,)`, *optional*, returned when `start_positions`, `end_positions`, `has_answers` are provided):
            Reader loss.
        retriever_correct (`torch.BoolTensor` of shape `(config.searcher_beam_size,)`, *optional*):
            Whether or not an evidence block contains answer.
        reader_correct (`torch.BoolTensor` of shape `(config.reader_beam_size, num_candidates)`, *optional*):
            Whether or not a span candidate contains answer.
        block_idx (`torch.LongTensor` of shape `()`):
            The index of the retrieved evidence block in which the predicted answer is most likely.
        candidate (`torch.LongTensor` of shape `()`):
            The index of the retrieved span candidates in which the predicted answer is most likely.
        start_pos (`torch.IntTensor` of shape `()`):
            Predicted answer starting position in *RealmReader*'s inputs.
        end_pos (`torch.IntTensor` of shape `()`):
            Predicted answer ending position in *RealmReader*'s inputs.
        hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer) of
            shape `(batch_size, sequence_length, hidden_size)`.

            Hidden-states of the model at the output of each layer plus the initial embedding outputs.
        attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `torch.FloatTensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`.

            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
            heads.
    Nlossretriever_lossreader_lossretriever_correctreader_correct	block_idx	candidate	start_posend_posr   rG  )rG   r   r   r   rg  r   rY   r   r_  rh  ri  rj  
BoolTensorrk  rl  r   rm  rn  	IntTensorro  r   r   rG  r'   r,   r*   rf  rf    s0        ! !F )-D(5$
%,,,26NHU./666/3K%+,33348x 0188815NHU-.555,0Ix()000,0Ix()000+/Ix(///)-GXeo&---8<M8E%"345<<<59Ju01299999r,   rf  c                   R    e Zd ZU dZdZee         ed<   dZee	j
                 ed<   dS )RealmForOpenQAOutputz

    Outputs of [`RealmForOpenQA`] models.

    Args:
        reader_output (`dict`):
            Reader output.
        predicted_answer_ids (`torch.LongTensor` of shape `(answer_sequence_length)`):
            Predicted answer ids.
    Nreader_outputpredicted_answer_ids)rG   r   r   r   rt  r   dictr_  ru  rY   r   r'   r,   r*   rs  rs    sK         	 	 %)M8D>(((7;(5#34;;;;;r,   rs  c                   $     e Zd Z fdZd Z xZS )RealmPredictionHeadTransformc                 V   t                                                       t          j        |j        |j                  | _        t          |j        t                    rt          |j                 | _
        n|j        | _
        t          j        |j        |j                  | _        d S r   )r}   r~   r   r   r   r   rD   r  r  r	   transform_act_fnr   r   r   s     r*   r~   z%RealmPredictionHeadTransform.__init__  s    Yv163EFF
f'-- 	6$*6+<$=D!!$*$5D!f&8f>STTTr,   c                     |                      |          }|                     |          }|                     |          }|S r   )r   rz  r   r  s     r*   r   z$RealmPredictionHeadTransform.forward  s=    

=11--m<<}55r,   rG   r   r   r~   r   r   r   s   @r*   rx  rx    sL        U U U U U      r,   rx  c                   *     e Zd Z fdZd Zd Z xZS )RealmLMPredictionHeadc                 >   t                                                       t          |          | _        t	          j        |j        |j        d          | _        t	          j	        t          j        |j                            | _        | j        | j        _        d S )NF)r2   )r}   r~   rx  	transformr   r   r   r   decoder	ParameterrY   r   r2   r   s     r*   r~   zRealmLMPredictionHead.__init__   sz    5f== y!3V5FUSSSLV->!?!?@@	 !Ir,   c                 (    | j         | j        _         d S r   )r2   r  r   s    r*   _tie_weightsz"RealmLMPredictionHead._tie_weights  s     Ir,   c                 Z    |                      |          }|                     |          }|S r   )r  r  r  s     r*   r   zRealmLMPredictionHead.forward  s*    }55]33r,   )rG   r   r   r~   r  r   r   r   s   @r*   r~  r~    sV        & & & & && & &      r,   r~  c                   $     e Zd Z fdZd Z xZS )RealmOnlyMLMHeadc                 p    t                                                       t          |          | _        d S r   )r}   r~   r~  predictionsr   s     r*   r~   zRealmOnlyMLMHead.__init__  s/    088r,   c                 0    |                      |          }|S r   )r  )r   sequence_outputprediction_scoress      r*   r   zRealmOnlyMLMHead.forward  s     ,,_==  r,   r|  r   s   @r*   r  r    sG        9 9 9 9 9! ! ! ! ! ! !r,   r  c                   $     e Zd Z fdZd Z xZS )RealmScorerProjectionc                    t                                                       t          |          | _        t	          j        |j        |j                  | _        t	          j	        |j        |j
                  | _	        d S r   )r}   r~   r~  r  r   r   r   retriever_proj_sizer   r   r   r   s     r*   r~   zRealmScorerProjection.__init__!  sb    088Yv163MNN
f&@fF[\\\r,   c                 Z    |                      |          }|                     |          }|S r   )r   r   r  s     r*   r   zRealmScorerProjection.forward'  s*    

=11}55r,   r|  r   s   @r*   r  r     sL        ] ] ] ] ]      r,   r  c                   $     e Zd Z fdZd Z xZS )RealmReaderProjectionc                 \   t                                                       || _        t          j        |j        |j        dz            | _        t          j        |j        d          | _        t          j	        |j        |j
                  | _        t          j                    | _        d S )Nr3   r   rs   )r}   r~   r]   r   r   r   span_hidden_sizedense_intermediatedense_outputr   reader_layer_norm_epslayer_normalizationReLUrelur   s     r*   r~   zRealmReaderProjection.__init__.  s    "$)F,>@WZ[@["\"\If&=qAA#%<0GVMi#j#j#j GII			r,   c                      fd}t           j        fd}                     |          }|                    dd          \  }} ||          \  }}}	t          j        |d|          }
t          j        |d|          }|
|z   }                     |          }                     |          }                     |                              d          }| ||	|j	                  z  }|||fS )	Nc                 L     j         \  } fdt          fdt          	j        j                  D              \  }}t          j        |d          }t          j        |d          }t          j         d|          }t          j         d|          }||z  }|||fS )aK  
            Generate span candidates.

            Args:
                masks: <bool> [num_retrievals, max_sequence_len]

            Returns:
                starts: <int32> [num_spans] ends: <int32> [num_spans] span_masks: <int32> [num_retrievals, num_spans]
                whether spans locate in evidence block.
            c                     t          j        | z
  dz   j                  }t          j        | dz
  j                  }||fS )Nr   r   )rY   r   r   )widthcurrent_startscurrent_endsmasksmax_sequence_lens      r*   _spans_given_widthzRRealmReaderProjection.forward.<locals>.span_candidates.<locals>._spans_given_widthD  sP    !&.>.F.JSXS_!`!`!`$|EAI7GPUP\]]]%|33r,   c              3   4   K   | ]} |d z             V  dS )r   Nr'   )r(   wr  s     r*   r+   zIRealmReaderProjection.forward.<locals>.span_candidates.<locals>.<genexpr>I  s3       f fq!3!3AE!:!: f f f f f fr,   r   rx   r   r  )rV   rC   r=  r]   max_span_widthrY   r   index_select)
r  r:  startsendsstart_masks	end_masks
span_masksr  r  r   s
   `      @@r*   span_candidatesz6RealmReaderProjection.forward.<locals>.span_candidates7  s     #(+A4 4 4 4 4 4
  f f f fE$+JdDeDe f f fgLFD Yvq))F9T1%%D  ,U&IIIK*5bEEEI$y0J4++r,   c                 f    d|                      |          z
  t          j        |          j        z  S N      ?typerY   finfominmaskr|   s     r*   mask_to_scorez4RealmReaderProjection.forward.<locals>.mask_to_scoreV  s+    $))E***ek%.@.@.DDDr,   r3   rx   r   r   r  r{   )
rY   float32r  chunkr  r  r  r  squeezer|   )r   r   
block_maskr  r  start_projectionend_projectioncandidate_startscandidate_endscandidate_maskcandidate_start_projectionscandidate_end_projectionscandidate_hiddenreader_logitss   `             r*   r   zRealmReaderProjection.forward6  s:   	, 	, 	, 	, 	,> ',m 	E 	E 	E 	E //>>+8+>+>qb+>+I+I(.;J?:;V;V8..&+&89IqXh&i&i&i#$)$6~1Tb$c$c$c!69RR  99%566334DEE))*:;;CCBGG~]=PQQQQ.>>r,   r|  r   s   @r*   r  r  -  sG            7? 7? 7? 7? 7? 7? 7?r,   r  aH  
    This model is a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) sub-class. Use
    it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage and
    behavior.

    Parameters:
        config ([`RealmConfig`]): Model configuration class with all the parameters of the model.
            Initializing with a config file does not load the weights associated with the model, only the
            configuration. Check out the [`~PreTrainedModel.from_pretrained`] method to load the model weights.
a5
  
    Args:
        input_ids (`torch.LongTensor` of shape `({0})`):
            Indices of input sequence tokens in the vocabulary.

            Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
            [`PreTrainedTokenizer.__call__`] for details.

            [What are input IDs?](../glossary#input-ids)
        attention_mask (`torch.FloatTensor` of shape `({0})`, *optional*):
            Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:

            - 1 for tokens that are **not masked**,
            - 0 for tokens that are **masked**.

            [What are attention masks?](../glossary#attention-mask)
        token_type_ids (`torch.LongTensor` of shape `({0})`, *optional*):
            Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,
            1]`:

            - 0 corresponds to a *sentence A* token,
            - 1 corresponds to a *sentence B* token.

            [What are token type IDs?](../glossary#token-type-ids)
        position_ids (`torch.LongTensor` of shape `({0})`, *optional*):
            Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
            config.max_position_embeddings - 1]`.

            [What are position IDs?](../glossary#position-ids)
        head_mask (`torch.FloatTensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
            Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`:

            - 1 indicates the head is **not masked**,
            - 0 indicates the head is **masked**.

        inputs_embeds (`torch.FloatTensor` of shape `({0}, hidden_size)`, *optional*):
            Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
            is useful if you want more control over how to convert *input_ids* indices into associated vectors than the
            model's internal embedding lookup matrix.
        output_attentions (`bool`, *optional*):
            Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
            tensors for more detail.
        output_hidden_states (`bool`, *optional*):
            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
            more detail.
        return_dict (`bool`, *optional*):
            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
c                   2    e Zd ZU dZeed<   eZdZd Z	d Z
dS )RealmPreTrainedModelz
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    r]   realmc                    t          |t          j                  rT|j        j                            d| j        j                   |j         |j        j        	                                 dS dS t          |t          j
                  r_|j        j                            d| j        j                   |j        +|j        j        |j                 	                                 dS dS t          |t          j                  r?|j        j        	                                 |j        j                            d           dS dS )zInitialize the weightsg        )meanstdNr  )rD   r   r   r/   r[   normal_r]   initializer_ranger2   zero_r   rr   r   fill_)r   modules     r*   _init_weightsz"RealmPreTrainedModel._init_weights  s)   fbi(( 	* M&&CT[5R&SSS{& &&((((( '&-- 	*M&&CT[5R&SSS!-"6#56<<>>>>> .--- 	*K""$$$M$$S)))))	* 	*r,   c                     g }|D ]f}||                     d           |j        }t          |          dk    r|                    d|d         f          }|                     |           g|S )z.Flatten inputs' shape to (-1, input_shape[-1])Nr3   rx   )rB   rV   rS   r   )r   inputsflattened_inputsr   r   s        r*   _flatten_inputsz$RealmPreTrainedModel._flatten_inputs  s     	0 	0F~ ''----$l{##a''#[["k"o)>??F ''////r,   N)rG   r   r   r   r   r_  rn   load_tf_weightsbase_model_prefixr  r  r'   r,   r*   r  r    sW          
 .O* * *          r,   r  c                   X     e Zd ZdZd	 fd	Zd Zd Zd Z	 	 	 	 	 	 	 	 	 	 	 	 	 d
dZ xZ	S )RealmBertModelz?
    Same as the original BertModel but remove docstrings.
    Tc                     t                                          |           || _        t          |          | _        t          |          | _        |rt          |          nd | _        | 	                                 d S r   )
r}   r~   r]   rp   r   r7  encoderrU  pooler	post_init)r   r]   add_pooling_layerrF   s      r*   r~   zRealmBertModel.__init__  sq       )&11#F++->Hk&)))D 	r,   c                     | j         j        S r   r   r   r  s    r*   get_input_embeddingsz#RealmBertModel.get_input_embeddings  s    ..r,   c                     || j         _        d S r   r  r   r   s     r*   set_input_embeddingsz#RealmBertModel.set_input_embeddings  s    */'''r,   c                     |                                 D ]/\  }}| j        j        |         j                            |           0dS )z
        Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
        class PreTrainedModel
        N)itemsr  r?  r'  r  )r   heads_to_pruner?  r  s       r*   _prune_headszRealmBertModel._prune_heads  sU    
 +0022 	C 	CLE5Lu%/;;EBBBB	C 	Cr,   Nc                 4   ||n| j         j        }||n| j         j        }||n| j         j        }| j         j        r|
|
n| j         j        }
nd}
||t          d          |+|                     ||           |                                }n.||                                d d         }nt          d          |\  }}||j	        n|j	        }|	|	
                                nd}|t          j        |||z   f|          }|gt          | j        d          r1| j        j        d d d |f         }|                    ||          }|}n!t          j        |t          j        |          }|                     ||          }| j         j        rL|J|                                \  }}}||f}|t          j        ||          }|                     |          }nd }|                     || j         j                  }|                     |||||	          }|                     ||||||	|
|||

  
        }|d         }| j        |                     |          nd }|s||f|dd          z   S t3          |||j        |j        |j        |j                  S )NFzDYou cannot specify both input_ids and inputs_embeds at the same timerx   z5You have to specify either input_ids or inputs_embedsr   r  rz   r   )r   rw   rz   r   r   )	r   r   r   r   r   r   r   rA  rB  r   )rF  pooler_outputr   r   rG  rH  )r]   r   rA  use_return_dictr   r   r   %warn_if_padding_and_no_attention_maskr   r   get_seq_lengthrY   onesr   r   rz   r   r   r   get_extended_attention_maskinvert_attention_maskget_head_maskr>  r  r  r   r   r   rG  rH  )r   r   r   rz   rw   r   r   r   r   r   r   r   rA  rB  r   
batch_sizer   r   r   r   r   extended_attention_maskencoder_batch_sizeencoder_sequence_lengthr:  encoder_hidden_shapeencoder_extended_attention_maskembedding_outputencoder_outputsr  r[  s                                  r*   r   zRealmBertModel.forward  sL     2C1N--TXT_Tq$8$D  $+Jj 	 &1%<kk$+B];! 	%.%:		@UIII ]%>cddd"66y.QQQ#..**KK&',,..ss3KKTUUU!,
J%.%:!!@T FUE`!?!?!A!A!Afg!"Z*jCY6Y)ZdjkkkN!t(899 [*./*HKZK*X'3J3Q3QR\^h3i3i0!A!&[
SY!Z!Z!Z 150P0PQ_al0m0m ;! 	3&;&G=R=W=W=Y=Y: 7$68O#P %-).4HQW)X)X)X&.2.H.HI_.`.`++.2+ &&y$+2OPP	??%)'#9 + 
 
 ,,2"7#B+/!5# ' 
 
 *!,8<8OO444UY 	J#]3oabb6III;-'+;)7&1,=
 
 
 	
r,   )TNNNNNNNNNNNNN)
rG   r   r   r   r~   r  r  r  r   r   r   s   @r*   r  r    s              / / /0 0 0C C C "#!l
 l
 l
 l
 l
 l
 l
 l
r,   r  z`The embedder of REALM outputting projected score that will be used to calculate relevance score.c                       e Zd ZdgZ fdZd Zd Z ee	                    d                     e
ee          	 	 	 	 	 	 	 	 	 ddeej                 d	eej                 d
eej                 deej                 deej                 deej                 dee         dee         dee         deeef         fd                        Z xZS )rL   zcls.predictions.decoder.biasc                     t                                          |           t          | j                  | _        t          | j                  | _        |                                  d S r   )r}   r~   r  r]   r  r  r   r  r   s     r*   r~   zRealmEmbedder.__init__k  sS       #DK00
(55r,   c                 $    | j         j        j        S r   r  r   r   r  s    r*   r  z"RealmEmbedder.get_input_embeddingsr      z$44r,   c                 (    || j         j        _        d S r   r  r  s     r*   r  z"RealmEmbedder.set_input_embeddingsu      05
---r,   batch_size, sequence_lengthoutput_typeconfig_classNr   r   rz   rw   r   r   r   rA  rB  r   c
                     |	|	n| j         j        }	|                     |||||||||		  	        }
|
d         }|                     |          }|	s|f|
dd         z   S t	          ||
j        |
j                  S )a  
        Returns:

        Example:

        ```python
        >>> from transformers import AutoTokenizer, RealmEmbedder
        >>> import torch

        >>> tokenizer = AutoTokenizer.from_pretrained("google/realm-cc-news-pretrained-embedder")
        >>> model = RealmEmbedder.from_pretrained("google/realm-cc-news-pretrained-embedder")

        >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
        >>> outputs = model(**inputs)

        >>> projected_score = outputs.projected_score
        ```
        Nr   rz   rw   r   r   r   rA  rB  r   r3   r   )r^  r   rG  )r]   r  r  r   r]  r   rG  )r   r   r   rz   rw   r   r   r   rA  rB  realm_outputsr  r^  s                r*   r   zRealmEmbedder.forwardx  s    B &1%<kk$+B]

))%'/!5# # 

 

 &a(((=11 	#%ac(:::& /+9(3   r,   )	NNNNNNNNN)rG   r   r   _tied_weights_keysr~   r  r  r   REALM_INPUTS_DOCSTRINGformatr   r]  _CONFIG_FOR_DOCr   rY   r   r   r   r   r   r   r   r   s   @r*   rL   rL   d  s       
 99    5 5 56 6 6 +*+A+H+HIf+g+ghh+>_]]] 156:59371559,0/3&*9 9E,-9 !!239 !!12	9
 u/09 E-.9   129 $D>9 'tn9 d^9 
u))	*9 9 9 ^] ih9 9 9 9 9r,   rL   zoThe scorer of REALM outputting relevance scores representing the score of document candidates (before softmax).c            !           e Zd ZdZd fd	Z ee                    d                     ee	e
          	 	 	 	 	 	 	 	 	 	 	 	 	 ddeej                 deej                 deej                 d	eej                 d
eej                 deej                 deej                 deej                 deej                 deej                 dee         dee         dee         deee	f         fd                        Z xZS )RealmScorerz
    Args:
        query_embedder ([`RealmEmbedder`]):
            Embedder for input sequences. If not specified, it will use the same embedder as candidate sequences.
    Nc                     t                                          |           t          | j                  | _        ||n| j        | _        |                                  d S r   )r}   r~   rL   r]   embedderquery_embedderr  )r   r]   r  rF   s      r*   r~   zRealmScorer.__init__  sW       %dk220>0JnnPTP]r,   r  r  r   r   rz   rw   candidate_input_idscandidate_attention_maskcandidate_token_type_idscandidate_inputs_embedsr   r   r   rA  rB  r   c                    ||n| j         j        }||
t          d          ||t          d          |                     |||||	|
|||	  	        }|                     |||          \  }}}|                     |||||	||||	  	        }|d         }|d         }|                    d| j         j        | j         j                  }t          j
        d||          }|s|||fS t          |||          S )	a
  
        candidate_input_ids (`torch.LongTensor` of shape `(batch_size, num_candidates, sequence_length)`):
            Indices of candidate input sequence tokens in the vocabulary.

            Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
            [`PreTrainedTokenizer.__call__`] for details.

            [What are input IDs?](../glossary#input-ids)
        candidate_attention_mask (`torch.FloatTensor` of shape `(batch_size, num_candidates, sequence_length)`, *optional*):
            Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:

            - 1 for tokens that are **not masked**,
            - 0 for tokens that are **masked**.

            [What are attention masks?](../glossary#attention-mask)
        candidate_token_type_ids (`torch.LongTensor` of shape `(batch_size, num_candidates, sequence_length)`, *optional*):
            Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,
            1]`:

            - 0 corresponds to a *sentence A* token,
            - 1 corresponds to a *sentence B* token.

            [What are token type IDs?](../glossary#token-type-ids)
        candidate_inputs_embeds (`torch.FloatTensor` of shape `(batch_size * num_candidates, sequence_length, hidden_size)`, *optional*):
            Optionally, instead of passing `candidate_input_ids` you can choose to directly pass an embedded
            representation. This is useful if you want more control over how to convert *candidate_input_ids* indices
            into associated vectors than the model's internal embedding lookup matrix.

        Returns:

        Example:

        ```python
        >>> import torch
        >>> from transformers import AutoTokenizer, RealmScorer

        >>> tokenizer = AutoTokenizer.from_pretrained("google/realm-cc-news-pretrained-scorer")
        >>> model = RealmScorer.from_pretrained("google/realm-cc-news-pretrained-scorer", num_candidates=2)

        >>> # batch_size = 2, num_candidates = 2
        >>> input_texts = ["How are you?", "What is the item in the picture?"]
        >>> candidates_texts = [["Hello world!", "Nice to meet you!"], ["A cute cat.", "An adorable dog."]]

        >>> inputs = tokenizer(input_texts, return_tensors="pt")
        >>> candidates_inputs = tokenizer.batch_encode_candidates(candidates_texts, max_length=10, return_tensors="pt")

        >>> outputs = model(
        ...     **inputs,
        ...     candidate_input_ids=candidates_inputs.input_ids,
        ...     candidate_attention_mask=candidates_inputs.attention_mask,
        ...     candidate_token_type_ids=candidates_inputs.token_type_ids,
        ... )
        >>> relevance_score = outputs.relevance_score
        ```Nz5You have to specify either input_ids or input_embeds.zJYou have to specify either candidate_input_ids or candidate_inputs_embeds.r
  r   rx   z
bd,bnd->bn)rb  rc  rd  )r]   r  r   r  r  r  r   num_candidatesr  rY   r   ra  )r   r   r   rz   rw   r  r  r  r  r   r   r   rA  rB  query_outputsflattened_input_idsflattened_attention_maskflattened_token_type_idscandidate_outputsrc  rd  rb  s                         r*   r   zRealmScorer.forward  se   R &1%<kk$+B]!6TUUU&+B+Jijjj++))%'/!5# , 

 

 UYThTh!9;SU
 U
Q	68P !MM33%1/!5# * 

 

 $A&+A.)..r4;3Mt{Onoo,|[/RR 	A"K@@ +Ve
 
 
 	
r,   r   r  )rG   r   r   r   r~   r   r  r  r   ra  r  r   rY   r   r   r   r   r   r   r   r   s   @r*   r  r    s       
       +*+A+H+HIf+g+ghh+<?[[[ 156:5937:>@D?C?C1559,0/3&*z
 z
E,-z
 !!23z
 !!12	z

 u/0z
 &e&67z
 #+5+<"=z
 #+5+;"<z
 "*%*;!<z
 E-.z
   12z
 $D>z
 'tnz
 d^z
 
u''	(z
 z
 z
 \[ ihz
 z
 z
 z
 z
r,   r  zrThe knowledge-augmented encoder of REALM outputting masked language model logits and marginal log-likelihood loss.c                       e Zd ZdgZ fdZd Zd Zd Zd Z e	e
                    d                     eee          	 	 	 	 	 	 	 	 	 	 	 	 dd
eej                 deej                 deej                 deej                 deej                 deej                 deej                 deej                 deej                 dee         dee         dee         deeef         fd                        Z xZS )rK   zcls.predictions.decoderc                     t                                          |           t          | j                  | _        t          | j                  | _        |                                  d S r   )r}   r~   r  r]   r  r  r   r  r   s     r*   r~   z!RealmKnowledgeAugEncoder.__init__Q  sS       #DK00
#DK00r,   c                 $    | j         j        j        S r   r  r  s    r*   r  z-RealmKnowledgeAugEncoder.get_input_embeddingsW  r  r,   c                 (    || j         j        _        d S r   r  r  s     r*   r  z-RealmKnowledgeAugEncoder.set_input_embeddingsZ  r  r,   c                 $    | j         j        j        S r   )r   r  r  r  s    r*   get_output_embeddingsz.RealmKnowledgeAugEncoder.get_output_embeddings]  s    x#++r,   c                 T    || j         j        _        |j        | j         j        _        d S r   )r   r  r  r2   )r   new_embeddingss     r*   set_output_embeddingsz.RealmKnowledgeAugEncoder.set_output_embeddings`  s%    '5$$2$7!!!r,   z+batch_size, num_candidates, sequence_lengthr  Nr   r   rz   rw   r   r   rb  labelsmlm_maskr   rA  rB  r   c                 B   ||n| j         j        }||t          d          |                     |||          \  }}}|                     |||||||
||	  	        }|d         }|                     |          }|}d}|i|                                \  }}|	!t          j        |t          j	                  }	n|	
                    t          j	                  }	t          d          }|                    d| j         j                  }|                    d	| j         j                                      d          } |||                              || j         j        |           }|                    d                              d          }||z   }|                    d	          }t          j        t          j        ||	z            t          j        |	          z             }|s|f|d
d         z   }||f|z   n|S t+          |||j        |j                  S )a  
        relevance_score (`torch.FloatTensor` of shape `(batch_size, num_candidates)`, *optional*):
            Relevance score derived from RealmScorer, must be specified if you want to compute the masked language
            modeling loss.

        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
            config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the
            loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`

        mlm_mask (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Mask to avoid calculating joint loss on certain positions. If not specified, the loss will not be masked.
            Mask values selected in `[0, 1]`:

            - 1 for tokens that are **not masked**,
            - 0 for tokens that are **masked**.

        Returns:

        Example:

        ```python
        >>> import torch
        >>> from transformers import AutoTokenizer, RealmKnowledgeAugEncoder

        >>> tokenizer = AutoTokenizer.from_pretrained("google/realm-cc-news-pretrained-encoder")
        >>> model = RealmKnowledgeAugEncoder.from_pretrained(
        ...     "google/realm-cc-news-pretrained-encoder", num_candidates=2
        ... )

        >>> # batch_size = 2, num_candidates = 2
        >>> text = [["Hello world!", "Nice to meet you!"], ["The cute cat.", "The adorable dog."]]

        >>> inputs = tokenizer.batch_encode_candidates(text, max_length=10, return_tensors="pt")
        >>> outputs = model(**inputs)
        >>> logits = outputs.logits
        ```NzZYou have to specify `relevance_score` when `labels` is specified in order to compute loss.r
  r   r{   none)	reductionrx   r   r3   r   )rg  logitsr   rG  )r]   r  r   r  r  r   r   rY   	ones_liker  r  r   r   r   tiler  log_softmax	unsqueeze	logsumexpnansumsumr   r   rG  )r   r   r   rz   rw   r   r   rb  r)  r*  r   rA  rB  r  r  r  joint_outputsjoint_outputr  rd  masked_lm_lossr  r   loss_fct
mlm_logitsmlm_targetsmasked_lm_log_probcandidate_log_probjoint_gold_log_probmarginal_gold_log_probsr
  s                                  r*   r   z RealmKnowledgeAugEncoder.forwardd  sj   p &1%<kk$+B]/"9l   UYThTh~~U
 U
Q	68P 

33%'/!5# # 

 

 %Q' HH\22)%+[[]]"J
 ?6GGG#==77 (&999H +//DK4JKKJ ++a)CDDII"MMK"*(:{"C"C"H"HDK6
# # " "1!<!<R!@!@!J!J2!N!N"47I"I&9&C&CA&F&F##l595Lx5W+X+X[`[dem[n[n+noooN 	Z')M!A#,>>F3A3M^%..SYY$'5$/	
 
 
 	
r,   )NNNNNNNNNNNN)rG   r   r   r  r~   r  r  r%  r(  r   r  r  r   r   r  r   rY   r   r   r   r   r   r   r   r   s   @r*   rK   rK   I  s        44    5 5 56 6 6, , ,8 8 8 +*%%&STT  >XXX 156:593715597;-1/3,0/3&*x
 x
E,-x
 !!23x
 !!12	x

 u/0x
 E-.x
   12x
 "%"34x
 )*x
 5+,x
 $D>x
 'tnx
 d^x
 
un$	%x
 x
 x
 YX x
 x
 x
 x
 x
r,   rK   zThe reader of REALM.c            #           e Zd Z fdZ ee                    d                     eee	          	 	 	 	 	 	 	 	 	 	 	 	 	 	 dde
ej                 de
ej                 de
ej                 de
ej                 d	e
ej                 d
e
ej                 de
ej                 de
ej                 de
ej                 de
ej                 de
ej                 de
e         de
e         de
e         deeef         fd                        Z xZS )rE   c                    t                                          |           |j        | _        t          |          | _        t          |          | _        t          |          | _        | 	                                 d S r   )
r}   r~   
num_labelsr  r  r  r   r  
qa_outputsr  r   s     r*   r~   zRealmReader.__init__  sh        +#F++
#F++/77r,   z!reader_beam_size, sequence_lengthr  Nr   r   rz   rw   r   r   rb  r  start_positionsend_positionshas_answersr   rA  rB  r   c                     ||n| j         j        }|t          d          |t          d          |                    d          | j         j        k     rt          d          |                     |||||||||	  	        }|d         }|                     ||d| j         j                           \  }}}t          j	        |d| j         j                 d          }||z  }t          j
        t          j        |d	          j                  }t          j
        t          j        |d	          j                  }t          j        |d|
          }t          j        |d|
          }d}d}d}d}d}|	C|
@|=d }d }|                    d          } |	                    d|           }	|
                    d|           }
|}t          j        |          }! ||||	d| j         j                 |
d| j         j                           }t          j        |          }" |||          } ||                    d          |                    d                    }||!                    t          j                  z  }||"                    t          j                  z  }||z                                   }|s||||f|dd         z   }#|
|||||f|#z   n|#S t)          ||||||||||j        |j                  S )ar  
        relevance_score (`torch.FloatTensor` of shape `(searcher_beam_size,)`, *optional*):
            Relevance score, which must be specified if you want to compute the logits and marginal log loss.
        block_mask (`torch.BoolTensor` of shape `(searcher_beam_size, sequence_length)`, *optional*):
            The mask of the evidence block, which must be specified if you want to compute the logits and marginal log
            loss.
        start_positions (`torch.LongTensor` of shape `(searcher_beam_size,)`, *optional*):
            Labels for position (index) of the start of the labelled span for computing the token classification loss.
            Positions are clamped to the length of the sequence (`sequence_length`). Position outside of the sequence
            are not taken into account for computing the loss.
        end_positions (`torch.LongTensor` of shape `(searcher_beam_size,)`, *optional*):
            Labels for position (index) of the end of the labelled span for computing the token classification loss.
            Positions are clamped to the length of the sequence (`sequence_length`). Position outside of the sequence
            are not taken into account for computing the loss.
        has_answers (`torch.BoolTensor` of shape `(searcher_beam_size,)`, *optional*):
            Whether or not the evidence block has answer(s).

        Returns:
        NzCYou have to specify `relevance_score` to calculate logits and loss.zOYou have to specify `block_mask` to separate question block and evidence block.r   zQThe input sequence length must be greater than or equal to config.max_span_width.r
  r   rx   r   r  c                    t          j        t          j        t          j        | d          d          t          j        |d                    }t          j        t          j        t          j        |d          d          t          j        |d                    }t          j        t          j        ||          d          S )zCompute correct span.r   rx   r   )rY   eqr2  rN   logical_and)r  r  gold_starts	gold_endsis_gold_startis_gold_ends         r*   compute_correct_candidatesz7RealmReader.forward.<locals>.compute_correct_candidatesD  s     !&OEO4Da$H$H!LLeo^ikmNnNn! ! $hOEONA$F$FJJEO\egiLjLj 
 y!2=+!N!NPQRRRr,   c                     t           j        fd}t          j        |  ||| j                  z   d          }t          j        | d          }||z
  S )z3Loss based on the negative marginal log-likelihood.c                 f    d|                      |          z
  t          j        |          j        z  S r  r  r  s     r*   r  zERealmReader.forward.<locals>.marginal_log_loss.<locals>.mask_to_scoreT  s+    $))E"2"22ek%6H6H6LLLr,   r{   rx   r   )rY   r  r3  r|   )r.  
is_correctr  log_numeratorlog_denominators        r*   marginal_log_lossz.RealmReader.forward.<locals>.marginal_log_lossQ  so     /4m M M M M !&zY_Ye9f9f9f0fln o o o"'/&b"A"A"A&66r,   )r  r  rK  rL  r3   )rg  rh  ri  rj  rk  rl  rm  rn  ro  r   rG  )r]   r  r   r   r  r  rC  reader_beam_sizerY   r2  argmaxmaxvaluesr  clamprN   r   r  r  r  rf  r   rG  )$r   r   r   rz   rw   r   r   rb  r  rD  rE  rF  r   rA  rB  r   r  r  r  r  retriever_logitspredicted_block_indexpredicted_candidatepredicted_startpredicted_end
total_lossrh  ri  rj  rk  rO  rU  ignored_indexany_retriever_correctany_reader_correctr
  s$                                       r*   r   zRealmReader.forward  s   L &1%<kk$+B]"bcccnoooq!!DK$>>>pqqq**))%'/!5#  

 

 "!* ;?//ZDK,H(HI;
 ;
7' !??1t{?[;[+\^`aa)) %UY}!-L-L-L-S T T#l59]+J+J+J+QRR,-=1L_```*>qH[\\\
 &=+DI`S S S	7 	7 	7 ,0033M-33BFFO)//MBBM +$)I.?$@$@!77!1-+A0L,LM'DK,H(HI	  N "'>!:!:..@QRRN++M,>,>r,B,BNDWDWXZD[D[\\K388GGGN-225=AAAK(;6<<>>J 	+-@/S`adklmlnlndooF ) nk;Ln]`fff !)#/)+)%!!/)
 
 
 	
r,   )NNNNNNNNNNNNNN)rG   r   r   r~   r   r  r  r   rf  r  r   rY   r   r   rp  r   r   r   r   r   r   s   @r*   rE   rE     s            +*+A+H+HIl+m+mnn+<?[[[ 156:593715597;156:4826,0/3&*W
 W
E,-W
 !!23W
 !!12	W

 u/0W
 E-.W
   12W
 "%"34W
 U-.W
 "%"23W
   01W
 e./W
 $D>W
 'tnW
 d^W
  
u''	(!W
 W
 W
 \[ onW
 W
 W
 W
 W
r,   rE   ay  
    Args:
        input_ids (`torch.LongTensor` of shape `({0})`):
            Indices of input sequence tokens in the vocabulary.

            Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
            [`PreTrainedTokenizer.__call__`] for details.

            [What are input IDs?](../glossary#input-ids)
        attention_mask (`torch.FloatTensor` of shape `({0})`, *optional*):
            Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:

            - 1 for tokens that are **not masked**,
            - 0 for tokens that are **masked**.

            [What are attention masks?](../glossary#attention-mask)
        token_type_ids (`torch.LongTensor` of shape `({0})`, *optional*):
            Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,
            1]`:

            - 0 corresponds to a *sentence A* token,
            - 1 corresponds to a *sentence B* token (should not be used in this model by design).

            [What are token type IDs?](../glossary#token-type-ids)
        answer_ids (`list` of shape `(num_answers, answer_length)`, *optional*):
            Answer ids for computing the marginal log-likelihood loss. Indices should be in `[-1, 0, ...,
            config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-1` are ignored (masked), the
            loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`
        return_dict (`bool`, *optional*):
            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
z?`RealmForOpenQA` for end-to-end open domain question answering.c                   H    e Zd Zd fd	Zed             Zd Z ee	                    d                     e
ee          	 	 	 	 ddeej                 deej                 d	eej                 d
eej                 dee         deeef         fd                        Z xZS )rI   Nc           
         t                                          |           t          |          | _        t	          |          | _        |                     dt          j        d          	                    |j
        |j        ft          j        t          j        d                               || _        |                                  d S )N	block_embr'   cpu)r   r|   r   )r}   r~   rL   r  rE   r   r   rY   r   	new_emptynum_block_recordsr  r  r   	retrieverr  )r   r]   rj  rF   s      r*   r~   zRealmForOpenQA.__init__  s       %f--!&))KOO%%.0JKm|E** &  	
 	
 	
 #r,   c                 @    | j         r| j        j        S | j        j        S r   )rI  r]   searcher_beam_sizerV  r  s    r*   rl  z!RealmForOpenQA.searcher_beam_size  s"    = 	2;11{++r,   c                 D    | j                             |          | _         dS )zSend `self.block_emb` to a specific device.

        Args:
            device (`str` or `torch.device`):
                The device to which `self.block_emb` will be sent.
        N)rf  r   )r   r   s     r*   block_embedding_toz!RealmForOpenQA.block_embedding_to  s     **622r,   z1, sequence_lengthr  r   r   rz   
answer_idsrB  r   c                 T   ||n| j         j        }| |j        d         dk    rt          d          |                     |||d          }|d         }t          j        d| j        |                    | j        j	                            }t          j
        || j        d	          \  }	}
|
                                }
t          j        | j        d|

          }|                     |
                                ||| j         j                  \  }}}}|                    | j        j	                  }|j                            t
          j                                      | j        j	                  }|                                                    |j                            t
          j                             |t          j        |t
          j        | j        j	                  }t          j        |t
          j        | j        j	                  }t          j        |t
          j        | j        j	                  }t          j        d|                                |                    | j        j	                            }|                     |j        d| j         j                 |j        d| j         j                 |j        d| j         j                 |||||d	  	        }|j        |j                 }||j        |j        dz            }|s||fS tA          ||          S )a  
        Returns:

        Example:

        ```python
        >>> import torch
        >>> from transformers import RealmForOpenQA, RealmRetriever, AutoTokenizer

        >>> retriever = RealmRetriever.from_pretrained("google/realm-orqa-nq-openqa")
        >>> tokenizer = AutoTokenizer.from_pretrained("google/realm-orqa-nq-openqa")
        >>> model = RealmForOpenQA.from_pretrained("google/realm-orqa-nq-openqa", retriever=retriever)

        >>> question = "Who is the pioneer in modern computer science?"
        >>> question_ids = tokenizer([question], return_tensors="pt")
        >>> answer_ids = tokenizer(
        ...     ["alan mathison turing"],
        ...     add_special_tokens=False,
        ...     return_token_type_ids=False,
        ...     return_attention_mask=False,
        ... ).input_ids

        >>> reader_output, predicted_answer_ids = model(**question_ids, answer_ids=answer_ids, return_dict=False)
        >>> predicted_answer = tokenizer.decode(predicted_answer_ids)
        >>> loss = reader_output.loss
        ```Nr   r   z'The batch_size of the inputs must be 1.T)r   rz   r   rB  z	BD,QD->QBrx   )kr   r  )
max_lengthr  r   zD,BD->B)	r   r   rz   rb  r  rF  rD  rE  rB  )rt  ru  )!r]   r  rV   r   r  rY   r   rf  r   r   topkrl  r  r  rj  rg  reader_seq_lenr   special_tokens_maskr  r   logical_not_logical_and_rz   r   r   r   rV  r   rl  rn  ro  rs  )r   r   r   rz   ro  rB  question_outputsquestion_projectionbatch_scoresr:  retrieved_block_idsretrieved_block_embrF  rn  ro  concat_inputsr  retrieved_logitsrt  predicted_blockru  s                        r*   r   zRealmForOpenQA.forward  s   J &1%<kk$+B] Y_Q%71%<%<FGGG==~ko ) 
 
 /q1 |KATAWAWX\XfXmAnAnoo!&LD<SY[!\!\!\199;;#0QNabbb :>##%%y*Ic :H :
 :
6Y &(();<<"6;;EJGGJJRVR]RdJee
!!..}/K/P/PQVQ[/\/\]]]",{%*T[M_```KYejI[\\\Il7%*T[EWXXXG !<*22446I6L6LT[M_6`6`
 
 #-a$+2N.NO(7DK<X8XY(7DK<X8XY,!#%! $ 

 

 (1-2IJ.}/FI^abIb/bc 	7 "666#'!5
 
 
 	
r,   r   )NNNN)rG   r   r   r~   propertyrl  rn  r   REALM_FOR_OPEN_QA_DOCSTRINGr  r   rs  r  r   rY   r   r   r   r   r   r   r   r   s   @r*   rI   rI     sL       
       , , X,
3 3 3 +*+F+M+MNb+c+cdd+?o^^^ 7;5915&*a
 a
E,-a
 !!23a
 !!12	a

 U-.a
 d^a
 
u**	+a
 a
 a
 _^ eda
 a
 a
 a
 a
r,   rI   )rL   rI   rK   r  rE   r  rn   )Mr   r   r;   dataclassesr   typingr   r   rY   r   torch.nnr   activationsr	   cache_utilsr
   modeling_layersr   modeling_outputsr   r   r   r   modeling_utilsr   pytorch_utilsr   r   r   utilsr   r   r   r   utils.deprecationr   configuration_realmr   
get_loggerrG   r9   _EMBEDDER_CHECKPOINT_FOR_DOC_ENCODER_CHECKPOINT_FOR_DOC_SCORER_CHECKPOINT_FOR_DOCr  rn   Modulerp   r   r   r  r  r  r  r#  r7  rU  r]  ra  rf  rs  rx  r~  r  r  r  REALM_START_DOCSTRINGr  r  r  rL   r  rK   rE   r  rI   __all__r'   r,   r*   <module>r     sO      				 ! ! ! ! ! ! " " " " " " " "        % % % % % % " " " " " " ! ! ! ! ! ! : : : : : :            / . . . . . m m m m m m m m m m u u u u u u u u u u u u 1 1 1 1 1 1 , , , , , , 
	H	%	%I G E h h hV= = = = =bi = = =@D D D D D D D DN    bi      
1 1 1 1 1RY 1 1 1h    	       ")   T T T T T+ T T TnM
 M
 M
 M
 M
29 M
 M
 M
`    ")    : : : : :+ : : :4 8 8 8 8 8 8 8 8$ .: .: .: .: .: .: .: .:b < < < < <; < < <     29   "    BI   .! ! ! ! !ry ! ! !
 
 
 
 
BI 
 
 
@? @? @? @? @?BI @? @? @?F	 / d%  %  %  %  % ? %  %  % PL
 L
 L
 L
 L
) L
 L
 L
^ f K K K K K( K K	 K\ u L
 L
 L
 L
 L
& L
 L
	 L
^  
R
 R
 R
 R
 R
3 R
 R
 
R
j ,.CDDd
 d
 d
 d
 d
& d
 d
 EDd
N B E D
 D
 D
 D
 D
) D
 D
	 D
N  r,   