
     `i                     T   d Z ddlZddlmZmZ ddlZddlmZ ddlmZm	Z	m
Z
 ddlmZmZ ddlmZmZmZ dd	lmZ dd
lmZ ddlmZmZmZmZmZmZmZmZ ddlm Z  ddl!m"Z"m#Z#m$Z$ ddl%m&Z&m'Z' ddl(m)Z) ddl*m+Z+  e'j,        e-          Z.dZ/ G d dej0                  Z1 G d dej0                  Z2 G d dej0                  Z3de2iZ4 G d dej0                  Z5 G d dej0                  Z6 G d dej0                  Z7 G d  d!e          Z8 G d" d#ej0                  Z9 G d$ d%ej0                  Z:e& G d& d'e                       Z;e& G d( d)e;                      Z< e&d*+           G d, d-e;e                      Z=e& G d. d/e;                      Z> G d0 d1ej0                  Z? e&d2+           G d3 d4e;                      Z@e& G d5 d6e;                      ZAe& G d7 d8e;                      ZB G d9 d:ej0                  ZCe& G d; d<e;                      ZDd?d=ZEg d>ZFdS )@zPyTorch Data2VecText model.    N)OptionalUnion)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )ACT2FNgelu)CacheDynamicCacheEncoderDecoderCache)GenerationMixin)GradientCheckpointingLayer))BaseModelOutputWithPastAndCrossAttentions,BaseModelOutputWithPoolingAndCrossAttentions!CausalLMOutputWithCrossAttentionsMaskedLMOutputMultipleChoiceModelOutputQuestionAnsweringModelOutputSequenceClassifierOutputTokenClassifierOutput)PreTrainedModel)apply_chunking_to_forward find_pruneable_heads_and_indicesprune_linear_layer)auto_docstringlogging)deprecate_kwarg   )Data2VecTextConfig   c                   2     e Zd ZdZ fdZ	 ddZd Z xZS )Data2VecTextForTextEmbeddingszV
    Same as BertEmbeddings with a tiny tweak for positional embeddings indexing.
    c                 l   t                                                       t          j        |j        |j        |j                  | _        t          j        |j        |j                  | _	        t          j        |j
        |j                  | _        t          j        |j        |j                  | _        t          j        |j                  | _        t#          |dd          | _        |                     dt)          j        |j                                      d          d           |                     d	t)          j        | j                                        t(          j        
          d           |j        | _        t          j        |j        |j        | j                  | _	        d S )N)padding_idxepsposition_embedding_typeabsoluteposition_ids)r    F)
persistenttoken_type_idsdtype)super__init__r   	Embedding
vocab_sizehidden_sizepad_token_idword_embeddingsmax_position_embeddingsposition_embeddingstype_vocab_sizetoken_type_embeddings	LayerNormlayer_norm_epsDropouthidden_dropout_probdropoutgetattrr)   register_buffertorcharangeexpandzerosr+   sizelongr&   selfconfig	__class__s     /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/models/data2vec/modeling_data2vec_text.pyr2   z&Data2VecTextForTextEmbeddings.__init__:   s}   !|F,=v?Q_e_rsss#%<0NPVPb#c#c %'\&2H&J\%]%]" f&8f>STTTz&"<=='.v7PR\']']$EL)GHHOOPWXXej 	 	
 	
 	
 	ek$*;*@*@*B*B%*UUUbg 	 	
 	
 	

 ".#%<*F,>DL\$
 $
 $
       Nr   c                    |.|t          || j        |          }n|                     |          }||                                }n|                                d d         }|d         }|mt	          | d          r2| j        d d d |f         }|                    |d         |          }	|	}n+t          j        |t          j	        | j
        j                  }||                     |          }|                     |          }
||
z   }| j        dk    r|                     |          }||z  }|                     |          }|                     |          }|S )Nr,   r    r.   r   r0   devicer*   )"create_position_ids_from_input_idsr&   &create_position_ids_from_inputs_embedsrG   hasattrr.   rE   rC   rF   rH   r+   rQ   r7   r;   r)   r9   r<   r@   )rJ   	input_idsr.   r+   inputs_embedspast_key_values_lengthinput_shape
seq_lengthbuffered_token_type_ids buffered_token_type_ids_expandedr;   
embeddingsr9   s                rM   forwardz%Data2VecTextForTextEmbeddings.forwardS   s{    $A)TM]_uvv#JJ=YY #..**KK',,..ss3K ^

 !t-.. m*.*=aaa*n*M'3J3Q3QR]^_R`bl3m3m0!A!&[
SWSdSk!l!l!l  00;;M $ : :> J J"%::
':55"&":":<"H"H--J^^J//
\\*--
rN   c                    |                                 dd         }|d         }t          j        | j        dz   || j        z   dz   t          j        |j                  }|                    d                              |          S )z
        We are provided embeddings directly. We cannot infer which are padded so just generate sequential position ids.

        Args:
            inputs_embeds: torch.Tensor

        Returns: torch.Tensor
        Nr,   r    rP   r   )rG   rC   rD   r&   rH   rQ   	unsqueezerE   )rJ   rV   rX   sequence_lengthr+   s        rM   rS   zDData2VecTextForTextEmbeddings.create_position_ids_from_inputs_embeds{   s     $((**3B3/%a.|q /D4D"Dq"HPUPZcpcw
 
 
 %%a((//<<<rN   )NNNNr   )__name__
__module____qualname____doc__r2   r]   rS   __classcell__rL   s   @rM   r$   r$   4   sm         

 
 
 
 
4 rs& & & &P= = = = = = =rN   r$   c                       e Zd Zd fd	Z eddd          	 	 	 	 	 	 ddej        d	eej                 d
eej                 deej                 dee	         dee
         deej                 deej                 fd            Z xZS )Data2VecTextSelfAttentionNc                 R   t                                                       |j        |j        z  dk    r0t	          |d          s t          d|j         d|j         d          |j        | _        t          |j        |j        z            | _        | j        | j        z  | _        t          j
        |j        | j                  | _        t          j
        |j        | j                  | _        t          j
        |j        | j                  | _        t          j        |j                  | _        |pt#          |dd          | _        | j        dk    s| j        d	k    r6|j        | _        t          j        d
|j        z  dz
  | j                  | _        |j        | _        || _        d S )Nr   embedding_sizezThe hidden size (z6) is not a multiple of the number of attention heads ()r)   r*   relative_keyrelative_key_queryr"   r    )r1   r2   r5   num_attention_headsrT   
ValueErrorintattention_head_sizeall_head_sizer   Linearquerykeyvaluer>   attention_probs_dropout_probr@   rA   r)   r8   r3   distance_embedding
is_decoder	layer_idxrJ   rK   r)   rz   rL   s       rM   r2   z"Data2VecTextSelfAttention.__init__   s    ::a??PVXhHiHi?8F$6 8 8 48 8 8  
 $*#= #&v'9F<V'V#W#W !58PPYv143EFF
9V/1CDDYv143EFF
z&"EFF'> (
'-zC
 C
$ '>99T=Y]q=q=q+1+ID(&(l1v7U3UXY3Y[_[s&t&tD# +"rN   past_key_valuepast_key_values4.58new_nameversionFhidden_statesattention_mask	head_maskencoder_hidden_statesoutput_attentionscache_positionreturnc                    |j         \  }}	}
|                     |          }|                    |d| j        | j                                      dd          }d}|d u}|Ht          |t                    r1|j        	                    | j
                  }|r|j        }n
|j        }n|}|r|n|}|r3|1|r/|j        | j
                 j        }|j        | j
                 j        }n|                     |          }|                    |d| j        | j                                      dd          }|                     |          }|                    |d| j        | j                                      dd          }|N|s|nd }|                    ||| j
        d|i          \  }}|r$t          |t                    rd|j        | j
        <   t'          j        ||                    dd                    }| j        dk    s| j        d	k    rt|j         d         |j         d         }}|>t'          j        |dz
  t&          j        |j        
                              dd          }n:t'          j        |t&          j        |j        
                              dd          }t'          j        |t&          j        |j        
                              dd          }||z
  }|                     || j        z   dz
            }|                    |j                  }| j        dk    rt'          j        d||          }||z   }n?| j        d	k    r4t'          j        d||          }t'          j        d||          }||z   |z   }|t?          j         | j                  z  }|||z   }tB          j"        #                    |d          }| $                    |          }|||z  }t'          j        ||          }|%                    dddd          &                                }|'                                d d         | j(        fz   }|                    |          }||fS )Nr,   r    r"   Fr   Trl   rm   rP   r/   zbhld,lrd->bhlrzbhrd,lrd->bhlrdimr   r	   ))shapert   viewrn   rq   	transpose
isinstancer   
is_updatedgetrz   cross_attention_cacheself_attention_cachelayerskeysvaluesru   rv   updaterC   matmulr)   tensorrH   rQ   rD   rx   r8   tor0   einsummathsqrtr   
functionalsoftmaxr@   permute
contiguousrG   rr   )rJ   r   r   r   r   r}   r   r   
batch_sizerY   _query_layerr   is_cross_attentioncurr_past_key_valuecurrent_states	key_layervalue_layerattention_scoresquery_length
key_lengthposition_ids_lposition_ids_rdistancepositional_embeddingrelative_position_scoresrelative_position_scores_queryrelative_position_scores_keyattention_probscontext_layernew_context_layer_shapes                                  rM   r]   z!Data2VecTextSelfAttention.forward   s    %2$7!
Jjj//!&&z2t7OQUQijjttq
 
 
2$>&/+>?? 6,7;;DNKK
% O*9*O''*9*N''&5#2DW..- 	F/"=*"=+24>BGI-4T^DKKK00I!z2t7OQUQijjtt1 I **^44K%**B 8$:R i1oo  *7I!St)<)C)C{DN=M~<^* *&	; & F*_FY*Z*Z FAEO.t~> !<Y5H5HR5P5PQQ'>99T=Y]q=q=q'2'8';Y_Q=O*L*!&j1nEJWdWk!l!l!l!q!q" " "'l%*UbUi!j!j!j!o!oprtu!v!v"\*EJ}OcdddiijkmoppN%6H#'#:#:8dFb;bef;f#g#g #7#:#:AR#:#S#S +~==+0<8H+Wk+l+l(#36N#N  -1EEE16>NP[]q1r1r./4|<LiYm/n/n,#36T#TWs#s +di8P.Q.QQ%/.@ -//0@b/II ,,77  -	9O_kBB%--aAq99DDFF"/"4"4"6"6ss";t?Q>S"S%**+BCCo--rN   NNNNNNFN)ra   rb   rc   r2   r   rC   Tensorr   FloatTensorr   booltupler]   re   rf   s   @rM   rh   rh      s       # # # # # #6 _%0A6RRR 7;15=A+/,115e. e.|e. !!23e. E-.	e.
  ((9:e. "%e. $D>e. !.e. 
u|	e. e. e. SRe. e. e. e. e.rN   rh   c                   P     e Zd Z fdZdej        dej        dej        fdZ xZS )Data2VecTextSelfOutputc                    t                                                       t          j        |j        |j                  | _        t          j        |j        |j                  | _        t          j        |j	                  | _
        d S Nr'   )r1   r2   r   rs   r5   denser<   r=   r>   r?   r@   rI   s     rM   r2   zData2VecTextSelfOutput.__init__  sf    Yv163EFF
f&8f>STTTz&"<==rN   r   input_tensorr   c                     |                      |          }|                     |          }|                     ||z             }|S Nr   r@   r<   rJ   r   r   s      rM   r]   zData2VecTextSelfOutput.forward  @    

=11]33}|'CDDrN   ra   rb   rc   r2   rC   r   r]   re   rf   s   @rM   r   r     i        > > > > >U\  RWR^        rN   r   eagerc                       e Zd Zd fd	Zd Z eddd          	 	 	 	 	 	 dd	ej        d
eej	                 deej	                 deej	                 dee
         dee         deej                 deej                 fd            Z xZS )Data2VecTextAttentionNc                     t                                                       t          |j                 |||          | _        t          |          | _        t                      | _        d S )Nr)   rz   )	r1   r2   $DATA2VEC_TEXT_SELF_ATTENTION_CLASSES_attn_implementationrJ   r   outputsetpruned_headsr{   s       rM   r2   zData2VecTextAttention.__init__)  sc    89TU$;
 
 
	
 -V44EErN   c                    t          |          dk    rd S t          || j        j        | j        j        | j                  \  }}t          | j        j        |          | j        _        t          | j        j        |          | j        _        t          | j        j	        |          | j        _	        t          | j
        j        |d          | j
        _        | j        j        t          |          z
  | j        _        | j        j        | j        j        z  | j        _        | j                            |          | _        d S )Nr   r    r   )lenr   rJ   rn   rq   r   r   rt   ru   rv   r   r   rr   union)rJ   headsindexs      rM   prune_headsz!Data2VecTextAttention.prune_heads3  s    u::??F7490$)2OQUQb
 
u
 -TY_eDD	*49=%@@	,TY_eDD	.t{/@%QOOO )-	(EE

(R	%"&)"?$)B_"_	 -33E::rN   r|   r}   r~   r   Fr   r   r   r   r   r   r   c           	          |                      |||||||          }|                     |d         |          }	|	f|dd          z   }
|
S )Nr   r   r   r}   r   r   r   r    )rJ   r   )rJ   r   r   r   r   r}   r   r   self_outputsattention_outputoutputss              rM   r]   zData2VecTextAttention.forwardE  sf     yy)"7+/) ! 
 
  ;;|AFF#%QRR(88rN   r   r   )ra   rb   rc   r2   r   r   rC   r   r   r   r   r   r   r]   re   rf   s   @rM   r   r   (  s       " " " " " "; ; ;$ _%0A6RRR 7;15=A+/,115 | !!23 E-.	
  ((9: "% $D> !. 
u|	   SR    rN   r   c                   B     e Zd Z fdZdej        dej        fdZ xZS )Data2VecTextIntermediatec                    t                                                       t          j        |j        |j                  | _        t          |j        t                    rt          |j                 | _        d S |j        | _        d S r   )r1   r2   r   rs   r5   intermediate_sizer   r   
hidden_actstrr
   intermediate_act_fnrI   s     rM   r2   z!Data2VecTextIntermediate.__init__`  sn    Yv163KLL
f'-- 	9'-f.?'@D$$$'-'8D$$$rN   r   r   c                 Z    |                      |          }|                     |          }|S r   )r   r   )rJ   r   s     rM   r]   z Data2VecTextIntermediate.forwardh  s,    

=1100??rN   r   rf   s   @rM   r   r   _  s^        9 9 9 9 9U\ el        rN   r   c                   P     e Zd Z fdZdej        dej        dej        fdZ xZS )Data2VecTextOutputc                    t                                                       t          j        |j        |j                  | _        t          j        |j        |j                  | _        t          j	        |j
                  | _        d S r   )r1   r2   r   rs   r   r5   r   r<   r=   r>   r?   r@   rI   s     rM   r2   zData2VecTextOutput.__init__p  sf    Yv79KLL
f&8f>STTTz&"<==rN   r   r   r   c                     |                      |          }|                     |          }|                     ||z             }|S r   r   r   s      rM   r]   zData2VecTextOutput.forwardv  r   rN   r   rf   s   @rM   r   r   o  r   rN   r   c                   0    e Zd Zd fd	Z eddd          	 	 	 	 	 	 	 ddej        d	eej                 d
eej                 deej                 deej                 dee	         dee
         deej                 deej                 fd            Zd Z xZS )Data2VecTextLayerNc                    t                                                       |j        | _        d| _        t	          ||          | _        |j        | _        |j        | _        | j        r0| j        st          |  d          t	          |d|          | _	        t          |          | _        t          |          | _        d S )Nr    rz   z> should be used as a decoder model if cross attention is addedr*   r   )r1   r2   chunk_size_feed_forwardseq_len_dimr   	attentionry   add_cross_attentionro   crossattentionr   intermediater   r   rJ   rK   rz   rL   s      rM   r2   zData2VecTextLayer.__init__  s    '-'E$.vKKK +#)#= # 	? j D!h!h!hiii"7
i# # #D 5V<<(00rN   r|   r}   r~   r   Fr   r   r   r   encoder_attention_maskr   r   r   c	           	      h   |                      ||||||          }	|	d         }
|	dd          }| j        rV|Tt          | d          st          d|  d          |                     |
||||||          }|d         }
||dd          z   }t          | j        | j        | j        |
          }|f|z   }|S )N)r   r   r   r}   r   r   r    r   z'If `encoder_hidden_states` are passed, z` has to be instantiated with cross-attention layers by setting `config.add_cross_attention=True`r   )	r   ry   rT   ro   r   r   feed_forward_chunkr   r   )rJ   r   r   r   r   r   r}   r   r   self_attention_outputsr   r   cross_attention_outputslayer_outputs                 rM   r]   zData2VecTextLayer.forward  s!    "&)/+) "0 "
 "
 2!4(,? 	<4@4!122  Dd D D D  
 '+&9&9 5#&; /"3- ': ' '#  7q9 7 ;;G0#T%A4CSUe
 
  /G+rN   c                 \    |                      |          }|                     ||          }|S r   )r   r   )rJ   r   intermediate_outputr   s       rM   r   z$Data2VecTextLayer.feed_forward_chunk  s2    "//0@AA{{#68HIIrN   r   )NNNNNFN)ra   rb   rc   r2   r   rC   r   r   r   r   r   r   r]   r   re   rf   s   @rM   r   r   ~  s.       1 1 1 1 1 1  _%0A6RRR 7;15=A>B+/,115. .|. !!23. E-.	.
  ((9:. !)): ;. "%. $D>. !.. 
u|	. . . SR.`      rN   r   c                   H    e Zd Zd fd	Z	 	 	 	 	 	 	 	 	 	 ddej        deej                 deej                 deej                 d	eej                 d
ee         dee	         dee	         dee	         dee	         deej                 de
eej                 ef         fdZ xZS )Data2VecTextEncoderNc                     t                                                       | _        t          j        fdt          j                  D                       | _        d| _        d S )Nc                 2    g | ]}t          |           S )r   )r   ).0irK   s     rM   
<listcomp>z0Data2VecTextEncoder.__init__.<locals>.<listcomp>  s'    #t#t#tq$5f$J$J$J#t#t#trN   F)	r1   r2   rK   r   
ModuleListrangenum_hidden_layerslayergradient_checkpointingr   s    ` rM   r2   zData2VecTextEncoder.__init__  sb    ]#t#t#t#tTYZ`ZrTsTs#t#t#tuu
&+###rN   FTr   r   r   r   r   r}   	use_cacher   output_hidden_statesreturn_dictr   r   c                    |	rdnd }|rdnd }|r| j         j        rdnd }| j        r%| j        r|rt                              d           d}|rD| j         j        r8|6t          t          | j                   t          | j                             }|rO| j         j        rCt          |t                    r.t                              d           t          j        |          }t          | j                  D ]Z\  }}|	r||fz   }|||         nd } |||||||||          }|d         }|r$||d         fz   }| j         j        r||d	         fz   }[|	r||fz   }|
st          d
 |||||fD                       S t          |||||          S )N zZ`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...F)rK   zPassing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.58.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.)r   r}   r   r   r   r    r"   c              3      K   | ]}||V  	d S r   r  )r  vs     rM   	<genexpr>z.Data2VecTextEncoder.forward.<locals>.<genexpr>  s4       
 
 =  !===
 
rN   )last_hidden_stater}   r   
attentionscross_attentions)rK   r   r
  trainingloggerwarning_oncery   r   r   r   r   from_legacy_cache	enumerater	  r   )rJ   r   r   r   r   r   r}   r  r   r  r  r   all_hidden_statesall_self_attentionsall_cross_attentionsr  layer_modulelayer_head_masklayer_outputss                      rM   r]   zData2VecTextEncoder.forward  sc    #7@BBD$5?bb4%6d4;;Zdrr`d& 	"4= 	" "##p   "	 	v/ 	vO4K1,dk2R2R2RT`hlhsTtTtTtuuO 	U/ 	UJPU4V4V 	U\  
 2COTTO(44 	V 	VOA|# I$58H$H!.7.CillO(L%'= /"3-	 	 	M *!,M  V&9]1=M<O&O#;2 V+?=QRCSBU+U( 	E 1]4D D 	 
 
 "#%'(
 
 
 
 
 
 9+++*1
 
 
 	
rN   r   )
NNNNNNFFTN)ra   rb   rc   r2   rC   r   r   r   r   r   r   r   r   r]   re   rf   s   @rM   r   r     sP       , , , , , , 7;15=A>B+/$(,1/4&*15P
 P
|P
 !!23P
 E-.	P

  ((9:P
 !)): ;P
 "%P
 D>P
 $D>P
 'tnP
 d^P
 !.P
 
uU\"$MM	NP
 P
 P
 P
 P
 P
 P
 P
rN   r   c                   B     e Zd Z fdZdej        dej        fdZ xZS )Data2VecTextPoolerc                     t                                                       t          j        |j        |j                  | _        t          j                    | _        d S r   )r1   r2   r   rs   r5   r   Tanh
activationrI   s     rM   r2   zData2VecTextPooler.__init__#  sC    Yv163EFF
'))rN   r   r   c                 r    |d d df         }|                      |          }|                     |          }|S Nr   )r   r%  )rJ   r   first_token_tensorpooled_outputs       rM   r]   zData2VecTextPooler.forward(  s@     +111a40

#56666rN   r   rf   s   @rM   r"  r"  "  s^        $ $ $ $ $
U\ el        rN   r"  c                   0    e Zd ZU eed<   dZdZddgZd ZdS )Data2VecTextPreTrainedModelrK   data2vec_textTr$   r   c                    t          |t          j                  rT|j        j                            d| j        j                   |j         |j        j        	                                 dS dS t          |t          j
                  r_|j        j                            d| j        j                   |j        +|j        j        |j                 	                                 dS dS t          |t          j                  rmt          |d          r%|j        |j        j        	                                 t          |d          r*|j        %|j        j                            d           dS dS dS dS )zInitialize the weightsg        )meanstdNbiasweightg      ?)r   r   rs   r1  datanormal_rK   initializer_ranger0  zero_r3   r&   r<   rT   fill_)rJ   modules     rM   _init_weightsz)Data2VecTextPreTrainedModel._init_weights8  sk   fbi(( 	. M&&CT[5R&SSS{& &&((((( '&-- 	.M&&CT[5R&SSS!-"6#56<<>>>>> .--- 	.vv&& )6;+B &&(((vx(( .V]-F"((-----		. 	.. .-F-FrN   N)	ra   rb   rc   r!   __annotations__base_model_prefixsupports_gradient_checkpointing_no_split_modulesr8  r  rN   rM   r+  r+  1  sI         '&*#8:MN. . . . .rN   r+  c            "           e Zd ZdZd fd	Zd Zd Zd Ze	 	 	 	 	 	 	 	 	 	 	 	 	 	 dde	e
j                 d	e	e
j                 d
e	e
j                 de	e
j                 de	e
j                 de	e
j                 de	e
j                 de	e
j                 de	e         de	e         de	e         de	e         de	e         de	e
j                 deee
j                 ef         fd            Z xZS )Data2VecTextModela2  

    The model can behave as an encoder (with only self-attention) as well as a decoder, in which case a layer of
    cross-attention is added between the self-attention layers, following the architecture described in *Attention is
    all you need*_ by Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N. Gomez, Lukasz
    Kaiser and Illia Polosukhin.

    To behave as an decoder the model needs to be initialized with the `is_decoder` argument of the configuration set
    to `True`. To be used in a Seq2Seq model, the model needs to initialized with both `is_decoder` argument and
    `add_cross_attention` set to `True`; an `encoder_hidden_states` is then expected as an input to the forward pass.

    .. _*Attention is all you need*: https://huggingface.co/papers/1706.03762

    Tc                     t                                          |           || _        t          |          | _        t          |          | _        |rt          |          nd| _        | 	                                 dS )zv
        add_pooling_layer (bool, *optional*, defaults to `True`):
            Whether to add a pooling layer
        N)
r1   r2   rK   r$   r\   r   encoderr"  pooler	post_init)rJ   rK   add_pooling_layerrL   s      rM   r2   zData2VecTextModel.__init__\  st    
 	   7??*6224EO(0004 	rN   c                     | j         j        S r   r\   r7   rJ   s    rM   get_input_embeddingsz&Data2VecTextModel.get_input_embeddingsl  s    ..rN   c                     || j         _        d S r   rE  )rJ   rv   s     rM   set_input_embeddingsz&Data2VecTextModel.set_input_embeddingso  s    */'''rN   c                     |                                 D ]/\  }}| j        j        |         j                            |           0dS )z
        Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
        class PreTrainedModel
        N)itemsr@  r	  r   r   )rJ   heads_to_pruner	  r   s       rM   _prune_headszData2VecTextModel._prune_headsr  sU    
 +0022 	C 	CLE5Lu%/;;EBBBB	C 	CrN   NrU   r   r.   r+   r   rV   r   r   r}   r  r   r  r  r   r   c                    ||n| j         j        }||n| j         j        }||n| j         j        }| j         j        r|
|
n| j         j        }
nd}
||t          d          |+|                     ||           |                                }n.||                                d d         }nt          d          |\  }}||j	        n|j	        }d}|	Bt          |	t                    s|	d         d         j        d         n|	                                }|t          j        |||z   f|          }|gt!          | j        d          r1| j        j        d d d |f         }|                    ||          }|}n!t          j        |t          j        |	          }|                     ||          }| j         j        rL|J|                                \  }}}||f}|t          j        ||          }|                     |          }nd }|                     || j         j                  }|                     |||||
          }|                     ||||||	|
||||          }|d         }| j        |                     |          nd }|s||f|dd          z   S t9          |||j        |j        |j        |j                   S )NFzDYou cannot specify both input_ids and inputs_embeds at the same timer,   z5You have to specify either input_ids or inputs_embedsr   r   )rQ   r.   rP   )rU   r+   r.   rV   rW   )
r   r   r   r   r}   r  r   r  r  r   r    )r  pooler_outputr}   r   r  r  )!rK   r   r  use_return_dictry   r  ro   %warn_if_padding_and_no_attention_maskrG   rQ   r   r   r   get_seq_lengthrC   onesrT   r\   r.   rE   rF   rH   get_extended_attention_maskinvert_attention_maskget_head_maskr  r@  rA  r   r}   r   r  r  ) rJ   rU   r   r.   r+   r   rV   r   r   r}   r  r   r  r  r   rX   r   rY   rQ   rW   rZ   r[   extended_attention_maskencoder_batch_sizeencoder_sequence_lengthr   encoder_hidden_shapeencoder_extended_attention_maskembedding_outputencoder_outputssequence_outputr)  s                                    rM   r]   zData2VecTextModel.forwardz  s{   $ 2C1N--TXT_Tq$8$D  $+Jj 	 &1%<kk$+B];! 	%.%:		@UIII ]%>cddd"66y.QQQ#..**KK&',,..ss3KKTUUU!,
J%.%:!!@T!"& "/5996"1%+B//$3355 # !"Z*jCY6Y)ZdjkkkN!t(899 [*./*HKZK*X'3J3Q3QR\^h3i3i0!A!&[
SY!Z!Z!Z 150P0PQ_al0m0m ;! 	3&;&G=R=W=W=Y=Y: 7$68O#P %-).4HQW)X)X)X&.2.H.HI_.`.`++.2+ &&y$+2OPP	??%)'#9 + 
 
 ,,2"7#B+/!5#) ' 
 
 *!,8<8OO444UY 	J#]3oabb6III;-'+;)7&1,=
 
 
 	
rN   )T)NNNNNNNNNNNNNN)ra   rb   rc   rd   r2   rG  rI  rM  r   r   rC   r   r   r   r   r   r   r]   re   rf   s   @rM   r>  r>  K  s              / / /0 0 0C C C  -11515/3,0048<9=+/$(,0/3&*15s
 s
EL)s
 !.s
 !.	s

 u|,s
 EL)s
  -s
  (5s
 !) 6s
 "%s
 D>s
 $D>s
 'tns
 d^s
 !.s
  
uU\"$PP	Q!s
 s
 s
 ^s
 s
 s
 s
 s
rN   r>  zX
    Data2VecText Model with a `language modeling` head on top for CLM fine-tuning.
    )custom_introc            $           e Zd ZddgZ fdZd Zd Ze	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddee	j
                 dee	j                 d	ee	j
                 d
ee	j
                 dee	j                 dee	j                 dee	j                 dee	j                 dee	j
                 dee         dee         dee         dee         dee         dee	j                 deeef         f d            Z xZS )Data2VecTextForCausalLMlm_head.decoder.weightlm_head.decoder.biasc                    t                                          |           |j        st                              d           t          |d          | _        t          |          | _        | 	                                 d S )NzTIf you want to use `Data2VecTextLMHeadModel` as a standalone, add `is_decoder=True.`FrC  
r1   r2   ry   r  warningr>  r,  Data2VecTextLMHeadlm_headrB  rI   s     rM   r2   z Data2VecTextForCausalLM.__init__  sv         	sNNqrrr.vOOO)&11 	rN   c                     | j         j        S r   ri  decoderrF  s    rM   get_output_embeddingsz-Data2VecTextForCausalLM.get_output_embeddings      |##rN   c                     || j         _        d S r   rk  rJ   new_embeddingss     rM   set_output_embeddingsz-Data2VecTextForCausalLM.set_output_embeddings      -rN   NrU   r   r.   r+   r   rV   r   r   labelsr}   r  r   r  r  r   r   c                 n   ||n| j         j        }|	d}|                     |||||||||
|||||          }|d         }|                     |          }d}|	 | j        ||	fd| j         j        i|}|s|f|dd         z   }||f|z   n|S t          |||j        |j        |j	        |j
                  S )aA  
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the left-to-right language modeling loss (next word prediction). Indices should be in
            `[-100, 0, ..., config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are
            ignored (masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`

        Example:

        ```python
        >>> from transformers import AutoTokenizer, Data2VecTextForCausalLM, Data2VecTextConfig
        >>> import torch

        >>> tokenizer = AutoTokenizer.from_pretrained("facebook/data2vec-text-base")
        >>> config = Data2VecTextConfig.from_pretrained("facebook/data2vec-text-base")
        >>> config.is_decoder = True
        >>> model = Data2VecTextForCausalLM.from_pretrained("facebook/data2vec-text-base", config=config)

        >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
        >>> outputs = model(**inputs)

        >>> prediction_logits = outputs.logits
        ```NF)r   r.   r+   r   rV   r   r   r}   r  r   r  r  r   r   r4   r"   )losslogitsr}   r   r  r  )rK   rP  r,  ri  loss_functionr4   r   r}   r   r  r  )rJ   rU   r   r.   r+   r   rV   r   r   rt  r}   r  r   r  r  r   kwargsr   r^  prediction_scoreslm_lossr   s                         rM   r]   zData2VecTextForCausalLM.forward  s3   T &1%<kk$+B]I$$))%'"7#9+/!5#) % 
 
" "!* LL99(d(!   ;1 	 G  	L')GABBK7F,3,?WJ''VK0$#3!/)$5
 
 
 	
rN   )NNNNNNNNNNNNNNN)ra   rb   rc   _tied_weights_keysr2   rm  rr  r   r   rC   
LongTensorr   r   r   r   r   r   r   r]   re   rf   s   @rM   ra  ra    s        34JK
 
 
 
 
$ $ $. . .  156:59371559=A>B-1+/$(,0/3&*15!U
 U
E,-U
 !!23U
 !!12	U

 u/0U
 E-.U
   12U
  ((9:U
 !)): ;U
 )*U
 "%U
 D>U
 $D>U
 'tnU
 d^U
  !.!U
$ 
u77	8%U
 U
 U
 ^U
 U
 U
 U
 U
rN   ra  c                       e Zd ZddgZ fdZd Zd Ze	 	 	 	 	 	 	 	 	 	 	 	 ddee	j
                 dee	j                 d	ee	j
                 d
ee	j
                 dee	j                 dee	j                 dee	j                 dee	j                 dee	j
                 dee         dee         dee         deeef         fd            Z xZS )Data2VecTextForMaskedLMrb  rc  c                    t                                          |           |j        rt                              d           t          |d          | _        t          |          | _        | 	                                 d S )NzsIf you want to use `Data2VecTextForMaskedLM` make sure `config.is_decoder=False` for bi-directional self-attention.Fre  rf  rI   s     rM   r2   z Data2VecTextForMaskedLM.__init__h  s        	NN1  
 /vOOO)&11 	rN   c                     | j         j        S r   rk  rF  s    rM   rm  z-Data2VecTextForMaskedLM.get_output_embeddingsw  rn  rN   c                     || j         _        d S r   rk  rp  s     rM   rr  z-Data2VecTextForMaskedLM.set_output_embeddingsz  rs  rN   NrU   r   r.   r+   r   rV   r   r   rt  r   r  r  r   c                    ||n| j         j        }|                     |||||||||
||          }|d         }|                     |          }d}|	et	                      }|	                    |j                  }	 ||                    d| j         j                  |	                    d                    }|s|f|dd         z   }||f|z   n|S t          |||j
        |j                  S )a  
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
            config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the
            loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`
        N)
r   r.   r+   r   rV   r   r   r   r  r  r   r,   r"   rv  rw  r   r  )rK   rP  r,  ri  r   r   rQ   r   r4   r   r   r  )rJ   rU   r   r.   r+   r   rV   r   r   rt  r   r  r  r   r^  rz  masked_lm_lossloss_fctr   s                      rM   r]   zData2VecTextForMaskedLM.forward}  s1   , &1%<kk$+B]$$))%'"7#9/!5# % 
 
 "!* LL99'))HYY0788F%X&7&<&<RAW&X&XZ`ZeZefhZiZijjN 	Z')GABBK7F3A3M^%..SYY$!/)	
 
 
 	
rN   )NNNNNNNNNNNN)ra   rb   rc   r|  r2   rm  rr  r   r   rC   r}  r   r   r   r   r   r]   re   rf   s   @rM   r  r  d  s       24JK    $ $ $. . .  156:59371559=A>B-1,0/3&*7
 7
E,-7
 !!237
 !!12	7

 u/07
 E-.7
   127
  ((9:7
 !)): ;7
 )*7
 $D>7
 'tn7
 d^7
 
un$	%7
 7
 7
 ^7
 7
 7
 7
 7
rN   r  c                   .     e Zd ZdZ fdZd Zd Z xZS )rh  z/Data2VecText Head for masked language modeling.c                    t                                                       t          j        |j        |j                  | _        t          j        |j        |j                  | _        t          j        |j        |j	                  | _
        t          j        t          j        |j	                            | _        | j        | j
        _        d S r   )r1   r2   r   rs   r5   r   r<   r=   
layer_normr4   rl  	ParameterrC   rF   r0  rI   s     rM   r2   zData2VecTextLMHead.__init__  s    Yv163EFF
,v'9v?TUUUy!3V5FGGLV->!?!?@@	 IrN   c                     |                      |          }t          |          }|                     |          }|                     |          }|S r   )r   r   r  rl  rJ   featuresry  xs       rM   r]   zData2VecTextLMHead.forward  sE    JJx  GGOOA LLOOrN   c                     | j         j        j        j        dk    r| j        | j         _        d S | j         j        | _        d S )Nmeta)rl  r0  rQ   typerF  s    rM   _tie_weightszData2VecTextLMHead._tie_weights  s<     <#(F22 $	DL)DIIIrN   )ra   rb   rc   rd   r2   r]   r  re   rf   s   @rM   rh  rh    s\        99& & & & &  * * * * * * *rN   rh  z
    Data2VecText Model transformer with a sequence classification/regression head on top (a linear layer on top of the
    pooled output) e.g. for GLUE tasks.
    c                   F    e Zd Z fdZe	 	 	 	 	 	 	 	 	 	 ddeej                 deej                 deej                 deej                 deej                 deej                 d	eej                 d
ee	         dee	         dee	         de
eef         fd            Z xZS )%Data2VecTextForSequenceClassificationc                     t                                          |           |j        | _        || _        t	          |d          | _        t          |          | _        |                                  d S NFre  )	r1   r2   
num_labelsrK   r>  r,  Data2VecTextClassificationHead
classifierrB  rI   s     rM   r2   z.Data2VecTextForSequenceClassification.__init__  sh        +.vOOO8@@ 	rN   NrU   r   r.   r+   r   rV   rt  r   r  r  r   c                    |
|
n| j         j        }
|                     ||||||||	|
	  	        }|d         }|                     |          }d}|t|                    |j                  }| j         j        f| j        dk    rd| j         _        nN| j        dk    r7|j        t          j
        k    s|j        t          j        k    rd| j         _        nd| j         _        | j         j        dk    rWt                      }| j        dk    r1 ||                                |                                          }n |||          }n| j         j        dk    rGt                      } ||                    d| j                  |                    d                    }n*| j         j        dk    rt!                      } |||          }|
s|f|d	d         z   }||f|z   n|S t#          |||j        |j        
          S )a  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        Nr   r.   r+   r   rV   r   r  r  r   r    
regressionsingle_label_classificationmulti_label_classificationr,   r"   r  )rK   rP  r,  r  r   rQ   problem_typer  r0   rC   rH   rp   r   squeezer   r   r   r   r   r  rJ   rU   r   r.   r+   r   rV   rt  r   r  r  r   r^  rw  rv  r  r   s                    rM   r]   z-Data2VecTextForSequenceClassification.forward  s    ( &1%<kk$+B]$$))%'/!5# % 

 

 "!*11YYv}--F{'/?a''/;DK,,_q((flej.H.HFL\a\eLeLe/LDK,,/KDK,{'<77"99?a''#8FNN$4$4fnn6F6FGGDD#8FF33DD)-JJJ+--xB @ @&++b//RR)-III,..x// 	FY,F)-)9TGf$$vE'!/)	
 
 
 	
rN   
NNNNNNNNNN)ra   rb   rc   r2   r   r   rC   r}  r   r   r   r   r   r]   re   rf   s   @rM   r  r    sL       	 	 	 	 	  156:59371559-1,0/3&*E
 E
E,-E
 !!23E
 !!12	E

 u/0E
 E-.E
   12E
 )*E
 $D>E
 'tnE
 d^E
 
u..	/E
 E
 E
 ^E
 E
 E
 E
 E
rN   r  c                   F    e Zd Z fdZe	 	 	 	 	 	 	 	 	 	 ddeej                 deej                 deej                 deej                 deej                 deej                 d	eej                 d
ee	         dee	         dee	         de
eef         fd            Z xZS )Data2VecTextForMultipleChoicec                    t                                          |           t          |          | _        t	          j        |j                  | _        t	          j        |j	        d          | _
        |                                  d S )Nr    )r1   r2   r>  r,  r   r>   r?   r@   rs   r5   r  rB  rI   s     rM   r2   z&Data2VecTextForMultipleChoice.__init__5  sm       .v66z&"<==)F$6:: 	rN   NrU   r.   r   rt  r+   r   rV   r   r  r  r   c                    |
|
n| j         j        }
||j        d         n|j        d         }|)|                    d|                    d                    nd}|)|                    d|                    d                    nd}|)|                    d|                    d                    nd}|)|                    d|                    d                    nd}|=|                    d|                    d          |                    d                    nd}|                     ||||||||	|
	  	        }|d         }|                     |          }|                     |          }|                    d|          }d}|4t                      }|	                    |j
                  } |||          }|
s|f|dd         z   }||f|z   n|S t          |||j        |j                  S )a[  
        input_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`):
            Indices of input sequence tokens in the vocabulary.

            Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
            [`PreTrainedTokenizer.__call__`] for details.

            [What are input IDs?](../glossary#input-ids)
        token_type_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`, *optional*):
            Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,
            1]`:

            - 0 corresponds to a *sentence A* token,
            - 1 corresponds to a *sentence B* token.

            [What are token type IDs?](../glossary#token-type-ids)
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the multiple choice classification loss. Indices should be in `[0, ...,
            num_choices-1]` where `num_choices` is the size of the second dimension of the input tensors. (See
            `input_ids` above)
        position_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`, *optional*):
            Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
            config.max_position_embeddings - 1]`.

            [What are position IDs?](../glossary#position-ids)
        inputs_embeds (`torch.FloatTensor` of shape `(batch_size, num_choices, sequence_length, hidden_size)`, *optional*):
            Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
            is useful if you want more control over how to convert `input_ids` indices into associated vectors than the
            model's internal embedding lookup matrix.
        Nr    r,   r   )r+   r.   r   r   rV   r   r  r  r"   r  )rK   rP  r   r   rG   r,  r@   r  r   r   rQ   r   r   r  )rJ   rU   r.   r   rt  r+   r   rV   r   r  r  num_choicesflat_input_idsflat_position_idsflat_token_type_idsflat_attention_maskflat_inputs_embedsr   r)  rw  reshaped_logitsrv  r  r   s                           rM   r]   z%Data2VecTextForMultipleChoice.forward?  sD   X &1%<kk$+B],5,Aioa((}GZ[\G]CLCXINN2,>,>???^bLXLdL--b,2C2CB2G2GHHHjnR`Rln11"n6I6I"6M6MNNNrvR`Rln11"n6I6I"6M6MNNNrv ( r=#5#5b#9#9=;M;Mb;Q;QRRR 	 $$*..,/!5# % 

 

  
]33// ++b+66'))HYY566F8OV44D 	F%''!""+5F)-)9TGf$$vE("!/)	
 
 
 	
rN   r  )ra   rb   rc   r2   r   r   rC   r}  r   r   r   r   r   r]   re   rf   s   @rM   r  r  3  sL             15596:-1371559,0/3&*Y
 Y
E,-Y
 !!12Y
 !!23	Y

 )*Y
 u/0Y
 E-.Y
   12Y
 $D>Y
 'tnY
 d^Y
 
u//	0Y
 Y
 Y
 ^Y
 Y
 Y
 Y
 Y
rN   r  c                   F    e Zd Z fdZe	 	 	 	 	 	 	 	 	 	 ddeej                 deej                 deej                 deej                 deej                 deej                 d	eej                 d
ee	         dee	         dee	         de
eef         fd            Z xZS )"Data2VecTextForTokenClassificationc                 Z   t                                          |           |j        | _        t          |d          | _        |j        |j        n|j        }t          j        |          | _	        t          j
        |j        |j                  | _        |                                  d S r  )r1   r2   r  r>  r,  classifier_dropoutr?   r   r>   r@   rs   r5   r  rB  rJ   rK   r  rL   s      rM   r2   z+Data2VecTextForTokenClassification.__init__  s        +.vOOO)/)B)NF%%TZTn 	 z"455)F$68IJJ 	rN   NrU   r   r.   r+   r   rV   rt  r   r  r  r   c                    |
|
n| j         j        }
|                     ||||||||	|
	  	        }|d         }|                     |          }|                     |          }d}|`t                      }|                    |j                  } ||                    d| j	                  |                    d                    }|
s|f|dd         z   }||f|z   n|S t          |||j        |j                  S )z
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
        Nr  r   r,   r"   r  )rK   rP  r,  r@   r  r   r   rQ   r   r  r   r   r  r  s                    rM   r]   z*Data2VecTextForTokenClassification.forward  s*   $ &1%<kk$+B]$$))%'/!5# % 

 

 "!*,,7711'))HYYv}--F8FKKDO<<fkk"ooNND 	FY,F)-)9TGf$$vE$!/)	
 
 
 	
rN   r  )ra   rb   rc   r2   r   r   rC   r}  r   r   r   r   r   r]   re   rf   s   @rM   r  r    s8             156:59371559-1,0/3&*4
 4
E,-4
 !!234
 !!12	4

 u/04
 E-.4
   124
 )*4
 $D>4
 'tn4
 d^4
 
u++	,4
 4
 4
 ^4
 4
 4
 4
 4
rN   r  c                   (     e Zd ZdZ fdZd Z xZS )r  z-Head for sentence-level classification tasks.c                 4   t                                                       t          j        |j        |j                  | _        |j        |j        n|j        }t          j        |          | _	        t          j        |j        |j
                  | _        d S r   )r1   r2   r   rs   r5   r   r  r?   r>   r@   r  out_projr  s      rM   r2   z'Data2VecTextClassificationHead.__init__  s    Yv163EFF
)/)B)NF%%TZTn 	 z"455	&"4f6GHHrN   c                     |d d dd d f         }|                      |          }|                     |          }t          j        |          }|                      |          }|                     |          }|S r'  )r@   r   rC   tanhr  r  s       rM   r]   z&Data2VecTextClassificationHead.forward  sj    QQQ111WLLOOJJqMMJqMMLLOOMM!rN   )ra   rb   rc   rd   r2   r]   re   rf   s   @rM   r  r    sR        77I I I I I      rN   r  c                   b    e Zd Z fdZe	 	 	 	 	 	 	 	 	 	 	 ddeej                 deej                 deej                 deej                 deej                 deej                 d	eej                 d
eej                 dee	         dee	         dee	         de
eef         fd            Z xZS ) Data2VecTextForQuestionAnsweringc                     t                                          |           |j        | _        t          |d          | _        t          j        |j        |j                  | _        | 	                                 d S r  )
r1   r2   r  r>  r,  r   rs   r5   
qa_outputsrB  rI   s     rM   r2   z)Data2VecTextForQuestionAnswering.__init__  sk        +.vOOO)F$68IJJ 	rN   NrU   r   r.   r+   r   rV   start_positionsend_positionsr   r  r  r   c                    ||n| j         j        }|                     |||||||	|
|	  	        }|d         }|                     |          }|                    dd          \  }}|                    d                                          }|                    d                                          }d }||t          |                                          dk    r|                    d          }t          |                                          dk    r|                    d          }|                    d          }|	                    d|          }|	                    d|          }t          |          } |||          } |||          }||z   dz  }|s||f|dd          z   }||f|z   n|S t          ||||j        |j                  S )	Nr  r   r    r,   r   )ignore_indexr"   )rv  start_logits
end_logitsr   r  )rK   rP  r,  r  splitr  r   r   rG   clampr   r   r   r  )rJ   rU   r   r.   r+   r   rV   r  r  r   r  r  r   r^  rw  r  r  
total_lossignored_indexr  
start_lossend_lossr   s                          rM   r]   z(Data2VecTextForQuestionAnswering.forward  s    &1%<kk$+B]$$))%'/!5# % 

 

 "!*11#)<<r<#:#: j#++B//::<<''++6688

&=+D?''))**Q.."1"9"9""="==%%''((1,, - 5 5b 9 9(--a00M-33A}EEO)//=AAM']CCCH!,@@Jx
M::H$x/14J 	R"J/'!""+=F/9/EZMF**6Q+%!!/)
 
 
 	
rN   )NNNNNNNNNNN)ra   rb   rc   r2   r   r   rC   r}  r   r   r   r   r   r]   re   rf   s   @rM   r  r    sM             156:593715596:48,0/3&*>
 >
E,->
 !!23>
 !!12	>

 u/0>
 E-.>
   12>
 "%"23>
   01>
 $D>>
 'tn>
 d^>
 
u22	3>
 >
 >
 ^>
 >
 >
 >
 >
rN   r  c                     |                      |                                          }t          j        |d                              |          |z   |z  }|                                |z   S )a  
    Replace non-padding symbols with their position numbers. Position numbers begin at padding_idx+1. Padding symbols
    are ignored. This is modified from fairseq's `utils.make_positions`.

    Args:
        x: torch.Tensor x:

    Returns: torch.Tensor
    r    r   )nerp   rC   cumsumtype_asrH   )rU   r&   rW   maskincremental_indicess        rM   rR   rR   I  sg     <<$$((**D <!444<<TBBE[[_cc##%%33rN   )ra  r  r  r  r  r  r>  r+  )r   )Grd   r   typingr   r   rC   r   torch.nnr   r   r   activationsr
   r   cache_utilsr   r   r   
generationr   modeling_layersr   modeling_outputsr   r   r   r   r   r   r   r   modeling_utilsr   pytorch_utilsr   r   r   utilsr   r   utils.deprecationr   configuration_data2vec_textr!   
get_loggerra   r  _HIDDEN_STATES_START_POSITIONModuler$   rh   r   r   r   r   r   r   r   r"  r+  r>  ra  r  rh  r  r  r  r  r  rR   __all__r  rN   rM   <module>r     s   " !  " " " " " " " "        A A A A A A A A A A ' ' ' ' ' ' ' ' C C C C C C C C C C ) ) ) ) ) ) 9 9 9 9 9 9	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 . - - - - - l l l l l l l l l l , , , , , , , , 0 0 0 0 0 0 ; ; ; ; ; ; 
	H	%	% !" V= V= V= V= V=BI V= V= V=tB. B. B. B. B.	 B. B. B.L    RY    &( $3 3 3 3 3BI 3 3 3n    ry           E E E E E2 E E ERW
 W
 W
 W
 W
") W
 W
 W
v        . . . . ./ . . .2 b
 b
 b
 b
 b
3 b
 b
 b
J   
k
 k
 k
 k
 k
9? k
 k
 
k
\ P
 P
 P
 P
 P
9 P
 P
 P
h* * * * * * * *>   R
 R
 R
 R
 R
,G R
 R
 R
j e
 e
 e
 e
 e
$? e
 e
 e
P D
 D
 D
 D
 D
)D D
 D
 D
P    RY   , J
 J
 J
 J
 J
'B J
 J
 J
Z4 4 4 4 	 	 	rN   