
     `i*Y                        d Z ddlmZ ddlmZmZ ddlZddlmZmZ ddl	m
Z
 ddlmZ dd	lmZmZmZ d
dlmZ ddlmZ  ej        e          Ze ed           G d de                                  Ze ed           G d de                                  Ze ed           G d de                                  Ze G d de                      Z G d de          Z G d de          Z G d de          Z G d de          Z G d  d!e          Z  ed"           G d# d$e                      Z! ed%           G d& d'e                      Z" ed(           G d) d*e                       Z#g d+Z$dS ),z5PyTorch DPR model for Open Domain Question Answering.    )	dataclass)OptionalUnionN)Tensornn   )BaseModelOutputWithPooling)PreTrainedModel)ModelOutputauto_docstringlogging   )	BertModel   )	DPRConfigz6
    Class for outputs of [`DPRQuestionEncoder`].
    )custom_introc                       e Zd ZU dZej        ed<   dZee	ej        df                  ed<   dZ
ee	ej        df                  ed<   dS )DPRContextEncoderOutputa  
    pooler_output (`torch.FloatTensor` of shape `(batch_size, embeddings_size)`):
        The DPR encoder outputs the *pooler_output* that corresponds to the context representation. Last layer
        hidden-state of the first token of the sequence (classification token) further processed by a Linear layer.
        This output is to be used to embed contexts for nearest neighbors queries with questions embeddings.
    pooler_outputN.hidden_states
attentions__name__
__module____qualname____doc__torchFloatTensor__annotations__r   r   tupler        x/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/models/dpr/modeling_dpr.pyr   r   *   q           $$$$=AM8E%"3S"89:AAA:>Ju0#567>>>>>r"   r   c                       e Zd ZU dZej        ed<   dZee	ej        df                  ed<   dZ
ee	ej        df                  ed<   dS )DPRQuestionEncoderOutputa  
    pooler_output (`torch.FloatTensor` of shape `(batch_size, embeddings_size)`):
        The DPR encoder outputs the *pooler_output* that corresponds to the question representation. Last layer
        hidden-state of the first token of the sequence (classification token) further processed by a Linear layer.
        This output is to be used to embed questions for nearest neighbors queries with context embeddings.
    r   N.r   r   r   r!   r"   r#   r&   r&   =   r$   r"   r&   c                       e Zd ZU dZej        ed<   dZeej                 ed<   dZ	eej                 ed<   dZ
eeej        df                  ed<   dZeeej        df                  ed<   dS )	DPRReaderOutputa  
    start_logits (`torch.FloatTensor` of shape `(n_passages, sequence_length)`):
        Logits of the start index of the span for each passage.
    end_logits (`torch.FloatTensor` of shape `(n_passages, sequence_length)`):
        Logits of the end index of the span for each passage.
    relevance_logits (`torch.FloatTensor` of shape `(n_passages, )`):
        Outputs of the QA classifier of the DPRReader that corresponds to the scores of each passage to answer the
        question, compared to all the other passages.
    start_logitsN
end_logitsrelevance_logits.r   r   )r   r   r   r   r   r   r   r*   r   r+   r   r    r   r!   r"   r#   r(   r(   P   s           ####.2J*+22248hu01888=AM8E%"3S"89:AAA:>Ju0#567>>>>>r"   r(   c                       e Zd ZdZd ZdS )DPRPreTrainedModelTc                    t          |t          j                  rT|j        j                            d| j        j                   |j         |j        j        	                                 dS dS t          |t          j
                  r_|j        j                            d| j        j                   |j        +|j        j        |j                 	                                 dS dS t          |t          j                  r?|j        j        	                                 |j        j                            d           dS dS )zInitialize the weightsg        )meanstdNg      ?)
isinstancer   Linearweightdatanormal_configinitializer_rangebiaszero_	Embeddingpadding_idx	LayerNormfill_)selfmodules     r#   _init_weightsz DPRPreTrainedModel._init_weightsl   s)   fbi(( 	* M&&CT[5R&SSS{& &&((((( '&-- 	*M&&CT[5R&SSS!-"6#56<<>>>>> .--- 	*K""$$$M$$S)))))	* 	*r"   N)r   r   r   _supports_sdpar@   r!   r"   r#   r-   r-   h   s(        N* * * * *r"   r-   c                        e Zd ZdZdef fdZ	 	 	 	 	 	 ddedee         dee         d	ee         d
ededede	e
eedf         f         fdZedefd            Z xZS )
DPREncoder
bert_modelr6   c                 p   t                                          |           t          |d          | _        | j        j        j        dk    rt          d          |j        | _        | j        dk    r.t          j	        | j        j        j        |j                  | _
        |                                  d S )NF)add_pooling_layerr   z!Encoder hidden_size can't be zero)super__init__r   rD   r6   hidden_size
ValueErrorprojection_dimr   r2   encode_proj	post_initr>   r6   	__class__s     r#   rH   zDPREncoder.__init__   s       #FeDDD?!-22@AAA$3""!y)?)KVMbccDr"   NF	input_idsattention_masktoken_type_idsinputs_embedsoutput_attentionsoutput_hidden_statesreturn_dictreturn.c           	         |                      |||||||          }|d         }	|	d d dd d f         }
| j        dk    r|                     |
          }
|s|	|
f|dd          z   S t          |	|
|j        |j                  S )NrP   rQ   rR   rS   rT   rU   rV   r   r   )last_hidden_stater   r   r   )rD   rK   rL   r	   r   r   )r>   rP   rQ   rR   rS   rT   rU   rV   outputssequence_outputpooled_outputs              r#   forwardzDPREncoder.forward   s     //))'/!5# " 
 
 "!*'1aaa0"" ,,];;M 	B#]3gabbkAA)-'!/)	
 
 
 	
r"   c                 R    | j         dk    r| j        j        S | j        j        j        S )Nr   )rK   rL   out_featuresrD   r6   rI   )r>   s    r#   embeddings_sizezDPREncoder.embeddings_size   s*    ""#00%11r"   )NNNFFF)r   r   r   base_model_prefixr   rH   r   r   boolr   r	   r    r^   propertyintra   __classcell__rO   s   @r#   rC   rC   }   s       $	y 	 	 	 	 	 	 ,0+/*."'%*!!
 !
!
 !(!
 !(	!

  '!
  !
 #!
 !
 
)5+==	>!
 !
 !
 !
F 2 2 2 2 X2 2 2 2 2r"   rC   c                        e Zd ZdZdef fdZ	 	 	 	 ddededee         d	ed
edede	e
eedf         f         fdZ xZS )DPRSpanPredictorencoderr6   c                 *   t                                          |           t          |          | _        t	          j        | j        j        d          | _        t	          j        | j        j        d          | _        | 	                                 d S )Nr   r   )
rG   rH   rC   rj   r   r2   ra   
qa_outputsqa_classifierrM   rN   s     r#   rH   zDPRSpanPredictor.__init__   sq       !&)))DL$@!DDYt|'CQGGr"   NFrP   rQ   rS   rT   rU   rV   rW   .c                    ||                                 n|                                 d d         \  }}|                     ||||||          }	|	d         }
|                     |
          }|                    dd          \  }}|                    d                                          }|                    d                                          }|                     |
d d dd d f                   }|                    ||          }|                    ||          }|                    |          }|s|||f|	dd          z   S t          ||||	j	        |	j
                  S )Nr   )rQ   rS   rT   rU   rV   r   r   )dim)r)   r*   r+   r   r   )sizerj   rl   splitsqueeze
contiguousrm   viewr(   r   r   )r>   rP   rQ   rS   rT   rU   rV   
n_passagessequence_lengthr[   r\   logitsr)   r*   r+   s                  r#   r^   zDPRSpanPredictor.forward   s    ;D:Oinn&6&6&6UbUgUgUiUijlkljlUm#
O,,)'/!5#  
 
 "!* 11#)<<r<#:#: j#++B//::<<''++6688
--oaaaAAAg.FGG $((_EE__ZAA
+00<< 	N *.>?'!""+MM%!-!/)
 
 
 	
r"   )NFFF)r   r   r   rb   r   rH   r   r   rc   r   r(   r    r^   rf   rg   s   @r#   ri   ri      s        !y       +/"'%*!+
 +
+
 +
  '	+

  +
 #+
 +
 
fck 22	3+
 +
 +
 +
 +
 +
 +
 +
r"   ri   c                   &    e Zd ZU dZeed<   dZdZdS )DPRPretrainedContextEncoder
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    r6   Nctx_encoderr   r   r   r   r   r   load_tf_weightsrb   r!   r"   r#   rz   rz      s5          
 O%r"   rz   c                   &    e Zd ZU dZeed<   dZdZdS )DPRPretrainedQuestionEncoderr{   r6   Nquestion_encoderr}   r!   r"   r#   r   r      s5          
 O*r"   r   c                   &    e Zd ZU dZeed<   dZdZdS )DPRPretrainedReaderr{   r6   Nspan_predictorr}   r!   r"   r#   r   r   	  s5          
 O(r"   r   zf
    The bare DPRContextEncoder transformer outputting pooler outputs as context representations.
    c                        e Zd Zdef fdZe	 	 	 	 	 	 	 ddee         dee         dee         dee         dee         d	ee         d
ee         de	e
eedf         f         fd            Z xZS )DPRContextEncoderr6   c                     t                                          |           || _        t          |          | _        |                                  d S N)rG   rH   r6   rC   r|   rM   rN   s     r#   rH   zDPRContextEncoder.__init__  sH       %f--r"   NrP   rQ   rR   rS   rT   rU   rV   rW   .c           	      ^   ||n| j         j        }||n| j         j        }||n| j         j        }||t	          d          ||                                }n.||                                dd         }nt	          d          ||j        n|j        }	|(|t          j        ||	          n|| j         j	        k    }|!t          j
        |t          j        |	          }|                     |||||||          }
|s
|
dd         S t          |
j        |
j        |
j        	          S )
aS  
        input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
            Indices of input sequence tokens in the vocabulary. To match pretraining, DPR input sequence should be
            formatted with [CLS] and [SEP] tokens as follows:

            (a) For sequence pairs (for a pair title+text for example):

            ```
            tokens:         [CLS] is this jack ##son ##ville ? [SEP] no it is not . [SEP]
            token_type_ids:   0   0  0    0    0     0       0   0   1  1  1  1   1   1
            ```

            (b) For single sequences (for a question for example):

            ```
            tokens:         [CLS] the dog is hairy . [SEP]
            token_type_ids:   0   0   0   0  0     0   0
            ```

            DPR is a model with absolute position embeddings so it's usually advised to pad the inputs on the right
            rather than the left.

            Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
            [`PreTrainedTokenizer.__call__`] for details.

            [What are input IDs?](../glossary#input-ids)

        Examples:

        ```python
        >>> from transformers import DPRContextEncoder, DPRContextEncoderTokenizer

        >>> tokenizer = DPRContextEncoderTokenizer.from_pretrained("facebook/dpr-ctx_encoder-single-nq-base")
        >>> model = DPRContextEncoder.from_pretrained("facebook/dpr-ctx_encoder-single-nq-base")
        >>> input_ids = tokenizer("Hello, is my dog cute ?", return_tensors="pt")["input_ids"]
        >>> embeddings = model(input_ids).pooler_output
        ```NDYou cannot specify both input_ids and inputs_embeds at the same timero   5You have to specify either input_ids or inputs_embedsdevicedtyper   rY   r   r   r   r   )r6   rT   rU   use_return_dictrJ   rq   r   r   onespad_token_idzeroslongr|   r   r   r   r   r>   rP   rQ   rR   rS   rT   rU   rV   input_shaper   r[   s              r#   r^   zDPRContextEncoder.forward&  s   b 2C1N--TXT_Tq$8$D  $+Jj 	 &1%<kk$+B] ]%>cddd"#..**KK&',,..ss3KKTUUU%.%:!!@T! $ 
;v66664;#;; 
 !"[EJvVVVN""))'/!5# # 
 
  	122;&!/w?Tahas
 
 
 	
r"   NNNNNNN)r   r   r   r   rH   r   r   r   rc   r   r   r    r^   rf   rg   s   @r#   r   r     s       y        '++/+/*.,0/3&*X
 X
F#X
 !(X
 !(	X

  'X
 $D>X
 'tnX
 d^X
 
&fck(::	;X
 X
 X
 ^X
 X
 X
 X
 X
r"   r   zh
    The bare DPRQuestionEncoder transformer outputting pooler outputs as question representations.
    c                        e Zd Zdef fdZe	 	 	 	 	 	 	 ddee         dee         dee         dee         dee         d	ee         d
ee         de	e
eedf         f         fd            Z xZS )DPRQuestionEncoderr6   c                     t                                          |           || _        t          |          | _        |                                  d S r   )rG   rH   r6   rC   r   rM   rN   s     r#   rH   zDPRQuestionEncoder.__init__  sH        *6 2 2r"   NrP   rQ   rR   rS   rT   rU   rV   rW   .c           	         ||n| j         j        }||n| j         j        }||n| j         j        }||t	          d          |+|                     ||           |                                }n.||                                dd         }nt	          d          ||j        n|j        }	|(|t          j	        ||	          n|| j         j
        k    }|!t          j        |t          j        |	          }|                     |||||||          }
|s
|
dd         S t          |
j        |
j        |
j        	          S )
aj  
        input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
            Indices of input sequence tokens in the vocabulary. To match pretraining, DPR input sequence should be
            formatted with [CLS] and [SEP] tokens as follows:

            (a) For sequence pairs (for a pair title+text for example):

            ```
            tokens:         [CLS] is this jack ##son ##ville ? [SEP] no it is not . [SEP]
            token_type_ids:   0   0  0    0    0     0       0   0   1  1  1  1   1   1
            ```

            (b) For single sequences (for a question for example):

            ```
            tokens:         [CLS] the dog is hairy . [SEP]
            token_type_ids:   0   0   0   0  0     0   0
            ```

            DPR is a model with absolute position embeddings so it's usually advised to pad the inputs on the right
            rather than the left.

            Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
            [`PreTrainedTokenizer.__call__`] for details.

            [What are input IDs?](../glossary#input-ids)

        Examples:

        ```python
        >>> from transformers import DPRQuestionEncoder, DPRQuestionEncoderTokenizer

        >>> tokenizer = DPRQuestionEncoderTokenizer.from_pretrained("facebook/dpr-question_encoder-single-nq-base")
        >>> model = DPRQuestionEncoder.from_pretrained("facebook/dpr-question_encoder-single-nq-base")
        >>> input_ids = tokenizer("Hello, is my dog cute ?", return_tensors="pt")["input_ids"]
        >>> embeddings = model(input_ids).pooler_output
        ```
        Nr   ro   r   r   r   rY   r   r   )r6   rT   rU   r   rJ   %warn_if_padding_and_no_attention_maskrq   r   r   r   r   r   r   r   r&   r   r   r   r   s              r#   r^   zDPRQuestionEncoder.forward  s   b 2C1N--TXT_Tq$8$D  $+Jj 	 &1%<kk$+B] ]%>cddd"66y.QQQ#..**KK&',,..ss3KKTUUU%.%:!!@T! $ 
;v66664;#;; 
 !"[EJvVVVN''))'/!5# ( 
 
  	122;'!/w?Tahas
 
 
 	
r"   r   )r   r   r   r   rH   r   r   r   rc   r   r&   r    r^   rf   rg   s   @r#   r   r     s       y        '++/+/*.,0/3&*Y
 Y
F#Y
 !(Y
 !(	Y

  'Y
 $D>Y
 'tnY
 d^Y
 
'vs{);;	<Y
 Y
 Y
 ^Y
 Y
 Y
 Y
 Y
r"   r   zE
    The bare DPRReader transformer outputting span predictions.
    c                        e Zd Zdef fdZe	 	 	 	 	 	 ddee         dee         dee         dee         dee         d	ee         d
e	e
eedf         f         fd            Z xZS )	DPRReaderr6   c                     t                                          |           || _        t          |          | _        |                                  d S r   )rG   rH   r6   ri   r   rM   rN   s     r#   rH   zDPRReader.__init__  sH       .v66r"   NrP   rQ   rS   rT   rU   rV   rW   .c                    ||n| j         j        }||n| j         j        }||n| j         j        }||t	          d          |+|                     ||           |                                }n.||                                dd         }nt	          d          ||j        n|j        }|t          j	        ||          }| 
                    ||||||          S )a  
        input_ids (`tuple[torch.LongTensor]` of shapes `(n_passages, sequence_length)`):
            Indices of input sequence tokens in the vocabulary. It has to be a sequence triplet with 1) the question
            and 2) the passages titles and 3) the passages texts To match pretraining, DPR `input_ids` sequence should
            be formatted with [CLS] and [SEP] with the format:

            `[CLS] <question token ids> [SEP] <titles ids> [SEP] <texts ids>`

            DPR is a model with absolute position embeddings so it's usually advised to pad the inputs on the right
            rather than the left.

            Indices can be obtained using [`DPRReaderTokenizer`]. See this class documentation for more details.

            [What are input IDs?](../glossary#input-ids)
        inputs_embeds (`torch.FloatTensor` of shape `(n_passages, sequence_length, hidden_size)`, *optional*):
            Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
            is useful if you want more control over how to convert `input_ids` indices into associated vectors than the
            model's internal embedding lookup matrix.

        Examples:

        ```python
        >>> from transformers import DPRReader, DPRReaderTokenizer

        >>> tokenizer = DPRReaderTokenizer.from_pretrained("facebook/dpr-reader-single-nq-base")
        >>> model = DPRReader.from_pretrained("facebook/dpr-reader-single-nq-base")
        >>> encoded_inputs = tokenizer(
        ...     questions=["What is love ?"],
        ...     titles=["Haddaway"],
        ...     texts=["'What Is Love' is a song recorded by the artist Haddaway"],
        ...     return_tensors="pt",
        ... )
        >>> outputs = model(**encoded_inputs)
        >>> start_logits = outputs.start_logits
        >>> end_logits = outputs.end_logits
        >>> relevance_logits = outputs.relevance_logits
        ```
        Nr   ro   r   r   )rS   rT   rU   rV   )r6   rT   rU   r   rJ   r   rq   r   r   r   r   )	r>   rP   rQ   rS   rT   rU   rV   r   r   s	            r#   r^   zDPRReader.forward  s!   ` 2C1N--TXT_Tq$8$D  $+Jj 	 &1%<kk$+B] ]%>cddd"66y.QQQ#..**KK&',,..ss3KKTUUU%.%:!!@T!"ZFCCCN""'/!5# # 
 
 	
r"   )NNNNNN)r   r   r   r   rH   r   r   r   rc   r   r(   r    r^   rf   rg   s   @r#   r   r     s        y        '++/*.,0/3&*K
 K
F#K
 !(K
  '	K

 $D>K
 'tnK
 d^K
 
fck 22	3K
 K
 K
 ^K
 K
 K
 K
 K
r"   r   )r   rz   r-   r   r   r   r   )%r   dataclassesr   typingr   r   r   r   r   modeling_outputsr	   modeling_utilsr
   utilsr   r   r   bert.modeling_bertr   configuration_dprr   
get_loggerr   loggerr   r&   r(   r-   rC   ri   rz   r   r   r   r   r   __all__r!   r"   r#   <module>r      s"   < ; ! ! ! ! ! ! " " " " " " " "          : : : : : : - - - - - -         
 + * * * * * ( ( ( ( ( ( 
	H	%	%   

? 
? 
? 
? 
?k 
? 
?  
?   

? 
? 
? 
? 
?{ 
? 
?  
?   
? ? ? ? ?k ? ?  ?$ * * * * * * * *(52 52 52 52 52# 52 52 52p6
 6
 6
 6
 6
) 6
 6
 6
|& & & & &"4 & & &+ + + + +#5 + + +) ) ) ) ), ) ) )    
a
 a
 a
 a
 a
3 a
 a
 
a
H   
b
 b
 b
 b
 b
5 b
 b
 
b
J   
T
 T
 T
 T
 T
# T
 T
 
T
n  r"   