
     `iy                      d Z ddlmZ ddlZddlZddlZddlm	Z	m
Z
mZ ddlmZmZmZmZmZ ddlmZmZmZmZmZmZmZmZmZ ddlmZmZ dd	lm Z  d
dl!m"Z"  e j#        e$          Z%dZ&dZ'd Z(d Z)d Z*d Z+ G d dej,        j-                  Z. G d dej,        j-                  Z/ G d dej,        j-                  Z0 G d dej,        j-                  Z1 G d dej,        j-                  Z2 G d dej,        j-                  Z3 G d dej,        j-                  Z4 G d  d!ej,        j-                  Z5 G d" d#ej,        j-                  Z6 G d$ d%ej,        j-                  Z7 G d& d'ej,        j-                  Z8 G d( d)e          Z9d*Z:d+Z; e
d,e:           G d- d.ej,        j-                              Z< e
d,e:           G d/ d0e9                      Z= e
d1e:           G d2 d3e9e                      Z> G d4 d5ej,        j-                  Z? e
d6e:           G d7 d8e9e                      Z@ e
d9e:           G d: d;e9e                      ZA G d< d=ej,        j-                  ZBd@d>ZCg d?ZDdS )AzPyTorch ESM model.    )annotationsN   )add_code_sample_docstringsadd_start_docstrings%add_start_docstrings_to_model_forward)+TFBaseModelOutputWithPastAndCrossAttentions.TFBaseModelOutputWithPoolingAndCrossAttentionsTFMaskedLMOutputTFSequenceClassifierOutputTFTokenClassifierOutput)	TFMaskedLanguageModelingLossTFModelInputTypeTFPreTrainedModelTFSequenceClassificationLossTFTokenClassificationLossget_initializerkeras
shape_listunpack_inputs)check_embeddings_within_boundsstable_softmax)logging   )	EsmConfigzfacebook/esm2_t6_8M_UR50Dr   c                h    t          j        | dd          \  }}t          j        | |fd          S )N   axis)tfsplitconcat)xx1x2s      {/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/models/esm/modeling_tf_esm.pyrotate_halfr'   6   s7    Xa$$$FB9rc2YR((((    c                    |d d d d d t          j        |           d         d d f         }|d d d d d t          j        |           d         d d f         }| |z  t          |           |z  z   S )N)r    shaper'   )r#   cossins      r&   apply_rotary_pos_embr.   ;   s~    
aaa%bhqkk"o%qqq(
)C
aaa%bhqkk"o%qqq(
)CGA,--r(   c                F    | t           j                            |           z   S )zJMake layer symmetric in final two dimensions, used for contact prediction.)r    linalgmatrix_transpose)r#   s    r&   
symmetrizer2   B   s    ry))!,,,,r(   c                    t          j        | dd          }t          j        | dd          }t          j        | dd          }||z  }||z  }| |z
  }|S )z=Perform average product correct, used for contact prediction.r   T)keepdimsr*   )r   r*   )r    
reduce_sum)r#   a1a2a12avg
normalizeds         r&   average_product_correctr;   G   se    	q"t	,	,	,B	q"t	,	,	,B
-8d
3
3
3C
r'C
)CSJr(   c                  @     e Zd ZdZdd fdZ fdZddZddZ xZS )TFRotaryEmbeddingz
    Rotary position embeddings based on those in
    [RoFormer](https://huggingface.co/docs/transformers/model_doc/roformer). Query and keys are transformed by rotation
    matrices which depend on their relative positions.
    Ndimintc                Z    t                                          |           || _        d S )Nname)super__init__r>   )selfr>   rB   	__class__s      r&   rD   zTFRotaryEmbedding.__init__Z   s+    d### r(   c           
     b   t                                          |           |                     d| j        dz  ft          j        t          d          d          | _        | j                            ddt	          j	        d| j        dt          j                  | j        z  z  z             d S )	Ninv_freqr         ?F)r+   dtypeinitializer	trainablei'  r   )startlimitdeltarJ   )
rC   build
add_weightr>   r    float32r   rH   assignrange)rE   input_shaperF   s     r&   rP   zTFRotaryEmbedding.buildd   s    k"""tx1}.bjo^aNbNbns ( 
 
 	5RXATXQbjYYY\`\ddef	
 	
 	
 	
 	
r(   r   c                H   t          j        |          |         }t          j        || j        j                  }t          j        d|| j                  }t          j        ||fd          d d d d d d f         }t          j        |          t          j        |          fS )NrJ   z
i, j -> ijr   r   )	r    r+   rT   rH   rJ   einsumr"   r,   r-   )rE   r#   seq_dimensionseq_lentfreqsembs          r&   _compute_cos_sinz"TFRotaryEmbedding._compute_cos_sinm   s    (1++m,HWDM$7888	,4=99iR000tQQQ1ABvc{{BF3KK''r(   q	tf.Tensorkreturntuple[tf.Tensor, tf.Tensor]c                z    |                      |d          \  }}t          |||          t          |||          fS )Nr*   )rY   )r^   r.   )rE   r_   ra   cos_embsin_embs        r&   callzTFRotaryEmbedding.callv   sI    00"0EE !GW55 GW55
 	
r(   N)r>   r?   )r   )r_   r`   ra   r`   rb   rc   )	__name__
__module____qualname____doc__rD   rP   r^   rg   __classcell__rF   s   @r&   r=   r=   S   s               
 
 
 
 
( ( ( (
 
 
 
 
 
 
 
r(   r=   c                  :     e Zd ZdZ	 	 	 dd fdZdd	Zd
 Z xZS )TFEsmContactPredictionHeadzWPerforms symmetrization, apc, and computes a logistic regression on the output featuresTr   Nin_featuresr?   eos_idxc                    t                                          |           || _        || _        t          j                            d|dd          | _        d S )NrA   r   sigmoid
regression)use_bias
activationrB   )rC   rD   rr   rq   r   layersDenseru   )rE   rq   biasrr   rB   rF   s        r&   rD   z#TFEsmContactPredictionHead.__init__   sT     	d###&,,,Q)Zf,ggr(   c                    | j         rd S d| _         t          | dd           Zt          j        | j        j                  5  | j                            d | j        f           d d d            d S # 1 swxY w Y   d S d S )NTru   )builtgetattrr    
name_scoperu   rB   rP   rq   rE   rU   s     r&   rP   z TFEsmContactPredictionHead.build   s    : 	F
4t,,8t344 @ @%%tT-=&>???@ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ 98s    "A//A36A3c                   t          j        || j        k    |j                  }t          j        |d          t          j        |d          z  }||d d d d d d d d f         z  }|dd dd df         }|ddd dd f         }t          |          \  }}}}}t          j        ||||z  ||f          }t          t          |                    }t          j	        |d          }t          j
        |                     |          d          S )Nr   r   .r   )r   r   r   r   permr   )r    castrr   rJ   expand_dimsr   reshaper;   r2   	transposesqueezeru   )	rE   tokens
attentionseos_mask
batch_sizerx   headsseqlen_s	            r&   rg   zTFEsmContactPredictionHead.call   s   76T\1:3CDD>(A..!1L1LL(111dD!!!QQQ+>"??
SbS#2#.
QRR,
/9*/E/E,
FE61Z
Z%QW,XYY
 -Z
-C-CDD
\*<@@@
z$//*55q999r(   )Tr   N)rq   r?   rr   r?   rh   )ri   rj   rk   rl   rD   rP   rg   rm   rn   s   @r&   rp   rp      s        aa
 
h 
h 
h 
h 
h 
h 
h@ @ @ @: : : : : : :r(   rp   c                  <     e Zd ZdZd fd	Z	 d	dZd ZddZ xZS )
TFEsmEmbeddingszV
    Same as BertEmbeddings with a tiny tweak for positional embeddings indexing.
    Nc                   t                                          |           t          j                            |j        |j        t          |j                  d          | _	        t          j                            |j
        |j        t          |j                  d          | _        |j        r,t          j                            |j        d          | _        nd | _        t!          |dd          | _        t%          j        |j
                  d d d f         | _        |j        | _        |j        | _        |j        | _        || _        d S )	NrA   word_embeddings)embeddings_initializerrB   position_embeddings
layer_normepsilonrB   position_embedding_typeabsolute)rC   rD   r   rx   	Embedding
vocab_sizehidden_sizer   initializer_ranger   max_position_embeddingsr   emb_layer_norm_beforeLayerNormalizationlayer_norm_epsr   r}   r   r    rT   position_idspad_token_idpadding_idxtoken_dropoutmask_token_idconfigrE   r   rB   rF   s      r&   rD   zTFEsmEmbeddings.__init__   s4   d###$|55#263K#L#L"	  6  
  
 $)<#9#9*#263K#L#L&	 $: $
 $
  ' 	##l==fF[bn=ooDOO"DO (/v7PR\']']$HV%CDDT111WM!.#1#1r(   r   c                (   |.|t          || j        |          }n|                     |          }|/t          || j        j                   |                     |          }|}| j        rt          j	        || j
        k    d d d d d f         d|          }d}t          j        t          j        |d          t          j                  }|| j
        k    }	t          j                            |	t          j        d          |z  }
|d|z
  z  d|
z
  d d d d f         z  }| j        dk    r|                     |          }||z  }| j        |                     |          }|0|t          j        t          j        |d          |j                  z  }|S )Ng        gQ?r   r   )rJ   r   r   r   )"create_position_ids_from_input_idsr   &create_position_ids_from_inputs_embedsr   r   r   r   r   r    wherer   r   r5   rR   mathcount_nonzeror   r   r   r   rJ   )rE   	input_idsattention_maskr   inputs_embedspast_key_values_length
embeddingsmask_ratio_trainsrc_lengthsmasked_tokensmask_ratio_observedr   s               r&   rg   zTFEsmEmbeddings.call   s    $A)TM]_uvv#JJ=YY *9dk6LMMM 00;;M #
  	h90B#BAAAqqq$J"OQTV`aaJ)'"-R"H"H"H"*UUK%);;M"$'"7"7RZ^`"7"a"ado"o#q+;';<DW@WYZYZYZ\`bfYf?ggJ':55"&":":<"H"H--J?&44J%#bgbn^R.P.PR\Rb&c&ccJ r(   c                    t          |          dd         }|d         }t          j        | j        dz   || j        z   dz   t          j                  }t          j        t          j        |d          |          S )z
        We are provided embeddings directly. We cannot infer which are padded so just generate sequential position ids.

        Args:
            inputs_embeds: tf.Tensor

        Returns: tf.Tensor
        Nr   r   )rM   rN   rJ   r   )r   r    rT   r   int64broadcast_tor   )rE   r   rU   sequence_lengthr   s        r&   r   z6TFEsmEmbeddings.create_position_ids_from_inputs_embeds   sz     !//4%a.x"Q&o@P.PST.T\^\d
 
 
 r~lA>>LLLr(   c                   | j         rd S d| _         t          | dd           Pt          j        | j        j                  5  | j                            d            d d d            n# 1 swxY w Y   t          | dd           Pt          j        | j        j                  5  | j                            d            d d d            n# 1 swxY w Y   t          | dd           `t          j        | j        j                  5  | j                            d d | j	        j
        g           d d d            d S # 1 swxY w Y   d S d S )NTr   r   r   )r|   r}   r    r~   r   rB   rP   r   r   r   r   r   s     r&   rP   zTFEsmEmbeddings.build  s   : 	F
4*D11=t3899 1 1$**40001 1 1 1 1 1 1 1 1 1 1 1 1 1 14.55At7<== 5 5(..t4445 5 5 5 5 5 5 5 5 5 5 5 5 5 54t,,8t344 M M%%tT4;3J&KLLLM M M M M M M M M M M M M M M M M M 986    A''A+.A+!CCC(D77D;>D;rh   )NNNNr   )	ri   rj   rk   rl   rD   rg   r   rP   rm   rn   s   @r&   r   r      s              > rs+ + + +ZM M M"M M M M M M M Mr(   r   c                  H     e Zd Zd fd	ZddZ	 	 	 	 	 	 	 dddZddZ xZS )TFEsmSelfAttentionNc                   t                                          |           |j        |j        z  dk    r0t	          |d          s t          d|j         d|j         d          |j        | _        t          |j        |j        z            | _        | j        | j        z  | _        t          j
                            | j        t          |j                  d          | _        t          j
                            | j        t          |j                  d	          | _        t          j
                            | j        t          |j                  d
          | _        t          j
                            |j                  | _        |pt)          |dd          | _        d | _        | j        dk    s| j        dk    rV|j        | _        t          j
                            d|j        z  dz
  | j        t          |j                            | _        n&| j        dk    rt5          | j        d          | _        |j        | _        || _        d S )NrA   r   embedding_sizezThe hidden size (z6) is not a multiple of the number of attention heads ()querykernel_initializerrB   keyvaluer   r   relative_keyrelative_key_queryr   r   )r   rotaryrotary_embeddings)r>   rB   )rC   rD   r   num_attention_headshasattr
ValueErrorr?   attention_head_sizeall_head_sizer   rx   ry   r   r   r   r   r   Dropoutattention_probs_dropout_probdropoutr}   r   r   r   r   distance_embeddingr=   
is_decoderr   )rE   r   r   rB   rF   s       r&   rD   zTFEsmSelfAttention.__init__  sO   d### ::a??PVXhHiHi?8F$6 8 8 48 8 8  
 $*#= #&v'9F<V'V#W#W !58PP\''?6C[3\3\cj ( 
 

 <%%?6C[3\3\ch & 
 
 \''?6C[3\3\cj ( 
 

 |++F,OPP'> (
'-zC
 C
$ "&'>99T=Y]q=q=q+1+ID(&+l&<&<F22Q6('6v7O'P'P '= ' 'D##
 )X55%64;SZm%n%n%nD" +r(   r#   r`   rb   c                    t          |          d d         | j        | j        gz   }t          j        ||          }t          j        |d          S )Nr   r   r   r   r   r   )r   r   r   r    r   r   )rE   r#   new_x_shapes      r&   transpose_for_scoresz'TFEsmSelfAttention.transpose_for_scores@  sL     mmCRC(D,DdF^+__Jq+&&|AL1111r(   Fhidden_statesr   tf.Tensor | None	head_maskencoder_hidden_statesencoder_attention_maskpast_key_valuetuple[tuple[tf.Tensor]] | Noneoutput_attentionsbool | Nonetrainingbooltuple[tf.Tensor]c	                F   |                      |          }	|d u}
|
r||d         }|d         }|}n4|
rS|                     |                     |                    }|                     |                     |                    }|}n||                     |                     |                    }|                     |                     |                    }t	          j        |d         |gd          }t	          j        |d         |gd          }nP|                     |                     |                    }|                     |                     |                    }|                     |	          }|| j        dz  z  }| j        r||f}| j        dk    r| 	                    ||          \  }}t	          j
        ||d          }| j        d	k    s| j        d
k    r t          |          d         }t	          j        t	          j        |t          j                  d          }t	          j        t	          j        |t          j                  d          }||z
  }|                     || j        z   dz
            }t	          j        ||j                  }| j        d	k    rt	          j        d||          }||z   }n?| j        d
k    r4t	          j        d||          }t	          j        d||          }||z   |z   }|||z   }t)          |d          }|                     ||          }|||z  }||z  }t	          j        |d          }t          |          d d         | j        gz   }t	          j        ||          }|r||fn|f}| j        r||fz   }|S )Nr   r   r   r   g      r   Ttranspose_br   r   rW   r   zbhld,lrd->bhlrzbhrd,lrd->bhlrr   r   r   r*   )r   r   r   r   r    r"   r   r   r   r   matmulr   r   rT   r   r   r   r   rJ   rX   r   r   r   r   r   )rE   r   r   r   r   r   r   r   r   mixed_query_layeris_cross_attention	key_layervalue_layerquery_layerattention_scores
seq_lengthposition_ids_lposition_ids_rdistancepositional_embeddingrelative_position_scoresrelative_position_scores_queryrelative_position_scores_keyattention_probscontext_layernew_context_layer_shapeoutputss                              r&   rg   zTFEsmSelfAttention.callE  s    !JJ}55
 3$> 	O."<&q)I(+K3NN 	O11$((;P2Q2QRRI33DJJ?T4U4UVVK3NN'11$((=2I2IJJI33DJJ}4M4MNNK	>!#4i"@qIIII)^A%6$D1MMMKK11$((=2I2IJJI33DJJ}4M4MNNK//0ABB "D$<d$BB? 	6 (5N'833%)%;%;K%S%S"K 9[)NNN'>99T=Y]q=q=q#M2215J^BHZrx,P,P,PRTUUN^BHZrx,P,P,PRSTTN%6H#'#:#:8dFb;bef;f#g#g #%7+?AR#S#S +~==+-95E{Th+i+i(#36N#N  -1EEE13;K[Zn1o1o./1y9I9Vj/k/k,#36T#TWs#s %/.@ ))9CCC ,,,JJ  -	9O'+5]FFF",]";";CRC"@DDVCW"W
=2IJJ6G]=/22mM]? 	2 11Gr(   c                   | j         rd S d| _         t          | dd           ]t          j        | j        j                  5  | j                            d d | j        j        g           d d d            n# 1 swxY w Y   t          | dd           ]t          j        | j	        j                  5  | j	                            d d | j        j        g           d d d            n# 1 swxY w Y   t          | dd           ]t          j        | j
        j                  5  | j
                            d d | j        j        g           d d d            n# 1 swxY w Y   t          | dd           St          j        | j        j                  5  | j                            d            d d d            d S # 1 swxY w Y   d S d S )NTr   r   r   r   )r|   r}   r    r~   r   rB   rP   r   r   r   r   r   r   s     r&   rP   zTFEsmSelfAttention.build  s   : 	F
4$''3tz// H H
  $dk.E!FGGGH H H H H H H H H H H H H H H4%%1tx}-- F FdDK,CDEEEF F F F F F F F F F F F F F F4$''3tz// H H
  $dk.E!FGGGH H H H H H H H H H H H H H H4,d33?t5:;; 3 3&,,T2223 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 @?sH    (A44A8;A8.(C""C&)C&(EEE
F22F69F6NN)r#   r`   rb   r`   NNNNNFF)r   r`   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rb   r   rh   )ri   rj   rk   rD   r   rg   rP   rm   rn   s   @r&   r   r     s        & & & & & &P2 2 2 2 ,0&*26379=).e e e e eN3 3 3 3 3 3 3 3r(   r   c                  0     e Zd Zd fd	ZddZddZ xZS )TFEsmSelfOutputNc                (   t                                          |           t          j                            |j        t          |j                  d          | _        t          j        	                    |j
                  | _        || _        d S NrA   denser   rC   rD   r   rx   ry   r   r   r   r  r   hidden_dropout_probr   r   r   s      r&   rD   zTFEsmSelfOutput.__init__  y    d###\''?6C[3\3\cj ( 
 

 |++F,FGGr(   Fc                h    |                      |          }|                     ||          }||z  }|S Nr   r  r   rE   r   input_tensorr   s       r&   rg   zTFEsmSelfOutput.call  8    

=11]XFF%r(   c                   | j         rd S d| _         t          | dd           `t          j        | j        j                  5  | j                            d d | j        j        g           d d d            d S # 1 swxY w Y   d S d S NTr  	r|   r}   r    r~   r  rB   rP   r   r   r   s     r&   rP   zTFEsmSelfOutput.build      : 	F
4$''3tz// H H
  $dk.E!FGGGH H H H H H H H H H H H H H H H H H 43    (A55A9<A9rh   Fri   rj   rk   rD   rg   rP   rm   rn   s   @r&   r  r    sm                H H H H H H H Hr(   r  c                  D     e Zd Zd fd	Zd Z	 	 	 	 	 	 	 ddZddZ xZS )	TFEsmAttentionNc                .   t                                          |           t          |d          | _        t	          |d          | _        t                      | _        t          j	        
                    |j        d          | _        || _        d S )NrA   rE   output	LayerNormr   )rC   rD   r   rE   r  output_layersetpruned_headsr   rx   r   r   r  r   r   s      r&   rD   zTFEsmAttention.__init__  s~    d###&vF;;;	+FBBBEE88AV]h8iir(   c                    t           rh   NotImplementedError)rE   r   s     r&   prune_headszTFEsmAttention.prune_heads      !!r(   Fc	           
         |                      |          }	|                     |	|||||||          }
|                     |
d         |          }|f|
dd          z   }|S )Nr   r   )r  rE   r  )rE   r   r   r   r   r   r   r   r   hidden_states_lnself_outputsattention_outputr   s                r&   rg   zTFEsmAttention.call  sx      >>-88yy!"	
 	
  ,,\!_mLL#%QRR(88r(   c                   | j         rd S d| _         t          | dd           Pt          j        | j        j                  5  | j                            d            d d d            n# 1 swxY w Y   t          | dd           Pt          j        | j        j                  5  | j                            d            d d d            n# 1 swxY w Y   t          | dd           `t          j        | j        j                  5  | j                            d d | j	        j
        g           d d d            d S # 1 swxY w Y   d S d S )NTrE   r  r  )r|   r}   r    r~   rE   rB   rP   r  r  r   r   r   s     r&   rP   zTFEsmAttention.build  s   : 	F
4&&2ty~.. & &	%%%& & & & & & & & & & & & & & &4..:t0566 . .!''---. . . . . . . . . . . . . . .4d++7t~233 L L$$dD$+2I%JKKKL L L L L L L L L L L L L L L L L L 87r   rh   r   )ri   rj   rk   rD   r   rg   rP   rm   rn   s   @r&   r  r    s             " " " "#   4L L L L L L L Lr(   r  c                  0     e Zd Zd
 fdZddZdd	Z xZS )TFEsmIntermediater   r   c                     t                      j        di | t          j                            |j        t          |j                  d          | _        || _	        d S )Nr  )unitsr   rB    )
rC   rD   r   rx   ry   intermediate_sizer   r   r  r   rE   r   kwargsrF   s      r&   rD   zTFEsmIntermediate.__init__  sb    ""6"""\''*.v/GHH ( 
 


 r(   r   r`   rb   c                p    |                      |          }t          j                            |          }|S )Ninputs)r  r    nngelu)rE   r   s     r&   rg   zTFEsmIntermediate.call  s.    

-
88

=11r(   Nc                   | j         rd S d| _         t          | dd           `t          j        | j        j                  5  | j                            d d | j        j        g           d d d            d S # 1 swxY w Y   d S d S r  r  r   s     r&   rP   zTFEsmIntermediate.build  r  r  r   r   r   r`   rb   r`   rh   r  rn   s   @r&   r(  r(    sm                
H H H H H H H Hr(   r(  c                  0     e Zd Zd fd	ZddZddZ xZS )TFEsmOutputNc                (   t                                          |           t          j                            |j        t          |j                  d          | _        t          j        	                    |j
                  | _        || _        d S r  r  r   s      r&   rD   zTFEsmOutput.__init__%  r  r(   Fc                h    |                      |          }|                     ||          }||z  }|S r	  r
  r  s       r&   rg   zTFEsmOutput.call-  r  r(   c                   | j         rd S d| _         t          | dd           `t          j        | j        j                  5  | j                            d d | j        j        g           d d d            d S # 1 swxY w Y   d S d S r  )	r|   r}   r    r~   r  rB   rP   r   r,  r   s     r&   rP   zTFEsmOutput.build3  s    : 	F
4$''3tz// N N
  $dk.K!LMMMN N N N N N N N N N N N N N N N N N 43r  rh   r  r  rn   s   @r&   r8  r8  $  sm                N N N N N N N Nr(   r8  c                  >     e Zd Zd fd	Z	 	 	 	 	 	 	 ddZddZ xZS )
TFEsmLayerNc                   t                                          |           |j        | _        d| _        t	          |d          | _        |j        | _        |j        | _        | j        r-| j        st          |  d          t	          |          | _	        t          |d          | _        t          |d          | _        t          j                            |j        d          | _        || _        d S )	NrA   r   	attentionz> should be used as a decoder model if cross attention is addedintermediater  r  r   )rC   rD   chunk_size_feed_forwardseq_len_dimr  r?  r   add_cross_attentionRuntimeErrorcrossattentionr(  r@  r8  r  r   rx   r   r   r  r   r   s      r&   rD   zTFEsmLayer.__init__=  s    d###'-'E$'[AAA +#)#= # 	9? l"d#j#j#jkkk"0"8"8D-f>JJJ'X>>>88AV]h8iir(   Fc	           
     R   |
|d d         nd }	|                      |||||	|          }
|
d         }| j        r|
dd         }|
d         }n
|
dd          }d }| j        rr|pt          | d          st          d|  d          |
|d	d          nd }|                     ||||||||
          }|d         }||dd         z   }|d         }||z   }|                     |          }|                     |          }|                     |||          }|f|z   }| j        r||fz   }|S )Nr   )r   r   r   r   r   r   rE  z'If `encoder_hidden_states` are passed, z` has to be instantiated with cross-attention layers by setting `config.add_cross_attention=True`r*   r   r   )r   r  r   )r?  r   r   AttributeErrorrE  r  r@  r  )rE   r   r   r   r   r   r   r   r   self_attn_past_key_valueself_attention_outputsr%  r   present_key_valuecross_attn_present_key_valuecross_attn_past_key_valuecross_attention_outputslayernorm_outputintermediate_outputlayer_outputs                       r&   rg   zTFEsmLayer.callM  s    :H9S>"1"#5#5Y] !%/3 "0 "
 "
 2!4 ? 	1,QrT2G 6r :,QRR0G'+$? 	Q4@4!122 $`d ` ` `   @N?Yrss(;(;_c%&*&9&9 %&)!! ': 	' 	'#  7q9 7" ==G ,C2+F( 14P P>>*:;;"//>N/OO((-<LW_ ) 
 
  /G+ ? 	5!2 44Gr(   c                N   | j         rd S d| _         t          | dd           Pt          j        | j        j                  5  | j                            d            d d d            n# 1 swxY w Y   t          | dd           Pt          j        | j        j                  5  | j                            d            d d d            n# 1 swxY w Y   t          | dd           Pt          j        | j        j                  5  | j                            d            d d d            n# 1 swxY w Y   t          | dd           `t          j        | j	        j                  5  | j	                            d d | j
        j        g           d d d            d S # 1 swxY w Y   d S d S )NTr?  r@  r  r  )r|   r}   r    r~   r?  rB   rP   r@  r  r  r   r   r   s     r&   rP   zTFEsmLayer.build  s   : 	F
4d++7t~233 + +$$T***+ + + + + + + + + + + + + + +4..:t0566 . .!''---. . . . . . . . . . . . . . .4..:t0566 . .!''---. . . . . . . . . . . . . . .4d++7t~233 L L$$dD$+2I%JKKKL L L L L L L L L L L L L L L L L L 87sH    A''A+.A+!CCCD))D-0D-#(FFFrh   r   r  rn   s   @r&   r=  r=  <  s             & "#D D D DLL L L L L L L Lr(   r=  c                  D     e Zd Zd fd	Z	 	 	 	 	 	 	 	 	 	 ddZddZ xZS )	TFEsmEncoderNc                    t                                          |           | _        fdt          j                  D             | _        t          j                            j	        d          | _
        d S )NrA   c                8    g | ]}t          d |           S )zlayer_._rA   )r=  ).0ir   s     r&   
<listcomp>z)TFEsmEncoder.__init__.<locals>.<listcomp>  s,    ggg!jnnn===gggr(   emb_layer_norm_afterr   )rC   rD   r   rT   num_hidden_layerslayerr   rx   r   r   rZ  r   s    ` r&   rD   zTFEsmEncoder.__init__  s{    d###gggguVMeGfGfggg
$)L$C$C)0F %D %
 %
!!!r(   FTc                
   |	rdnd }|rdnd }|r| j         j        rdnd }|rdnd }t          | j                  D ]s\  }}|	r||fz   }|||         nd }|||         nd } |||||||||          }|d         }|r||d         fz  }|r$||d         fz   }| j         j        r||d         fz   }t| j        r|                     |          }|	r||fz   }|
st          d |||||fD                       S t          |||||          S )Nr+  r   r   r   r   c              3     K   | ]}||V  	d S rh   r+  )rW  vs     r&   	<genexpr>z$TFEsmEncoder.call.<locals>.<genexpr>  s4       
 
 =  !===
 
r(   )last_hidden_statepast_key_valuesr   r   cross_attentions)r   rC  	enumerater\  rZ  tupler   )rE   r   r   r   r   r   rb  	use_cacher   output_hidden_statesreturn_dictr   all_hidden_statesall_self_attentionsall_cross_attentionsnext_decoder_cacherX  layer_modulelayer_head_maskr   layer_outputss                        r&   rg   zTFEsmEncoder.call  s    #7@BBD$5?bb4%6d4;;Zdrr`d#,6RR$(44 	V 	VOA|# I$58H$H!.7.CillO3B3N_Q//TXN(L%&!	 	M *!,M ;"}R'8&::"  V&9]1=M<O&O#;2 V+?=QRCSBU+U($ 	E 55mDDM 	E 1]4D D 	 
 
 "&%'(
 
 
 
 
 
 ;+.+*1
 
 
 	
r(   c                   | j         rd S d| _         t          | dd           ]t          j        | j        j                  5  | j                            d d | j        j        g           d d d            n# 1 swxY w Y   t          | dd           P| j	        D ]J}t          j        |j                  5  |                    d            d d d            n# 1 swxY w Y   Id S d S )NTrZ  r\  )
r|   r}   r    r~   rZ  rB   rP   r   r   r\  )rE   rU   r\  s      r&   rP   zTFEsmEncoder.build  sy   : 	F
4/66Bt8=>> W W)//tT[=T0UVVVW W W W W W W W W W W W W W W4$''3 & &]5:.. & &KK%%%& & & & & & & & & & & & & & & 43& &s$    (A44A8;A82CC	C	rh   )
NNNNNNFFTFr  rn   s   @r&   rT  rT    s        
 
 
 
 
 
 "#"E
 E
 E
 E
N
& 
& 
& 
& 
& 
& 
& 
&r(   rT  c                  0     e Zd Zd
 fdZddZdd	Z xZS )TFEsmPoolerr   r   c                     t                      j        di | t          j                            |j        t          |j                  dd          | _        || _	        d S )Ntanhr  )r*  r   rw   rB   r+  )
rC   rD   r   rx   ry   r   r   r   r  r   r-  s      r&   rD   zTFEsmPooler.__init__  se    ""6"""\''$.v/GHH	 ( 
 

 r(   r   r`   rb   c                J    |d d df         }|                      |          }|S )Nr   r0  )r  )rE   r   first_token_tensorpooled_outputs       r&   rg   zTFEsmPooler.call  s1     +111a40

*<
==r(   Nc                   | j         rd S d| _         t          | dd           `t          j        | j        j                  5  | j                            d d | j        j        g           d d d            d S # 1 swxY w Y   d S d S r  r  r   s     r&   rP   zTFEsmPooler.build  r  r  r5  r6  rh   r  rn   s   @r&   rr  rr    sm        	 	 	 	 	 	   H H H H H H H Hr(   rr  c                      e Zd ZdZeZdZdS )TFEsmPreTrainedModelz
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    esmN)ri   rj   rk   rl   r   config_classbase_model_prefixr+  r(   r&   rz  rz     s'         
 Lr(   rz  a2  

    This model inherits from [`TFPreTrainedModel`]. Check the superclass documentation for the generic methods the
    library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads
    etc.)

    This model is also a Keras [Model](https://www.tensorflow.org/api_docs/python/tf/keras/Model) subclass. Use it as a
    regular Keras model and refer to the TF/Keras documentation for all matters related to general usage and behavior.

    Parameters:
        config ([`EsmConfig`]): Model configuration class with all the parameters of the
            model. Initializing with a config file does not load the weights associated with the model, only the
            configuration. Check out the [`~TFPreTrainedModel.from_pretrained`] method to load the model weights.
a  
    Args:
        input_ids (`tf.Tensor` of shape `({0})`):
            Indices of input sequence tokens in the vocabulary.

            Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
            [`PreTrainedTokenizer.__call__`] for details.

            [What are input IDs?](../glossary#input-ids)
        attention_mask (`tf.Tensor` of shape `({0})`, *optional*):
            Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:

            - 1 for tokens that are **not masked**,
            - 0 for tokens that are **masked**.

            [What are attention masks?](../glossary#attention-mask)
        position_ids (`tf.Tensor` of shape `({0})`, *optional*):
            Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
            config.max_position_embeddings - 1]`.

            [What are position IDs?](../glossary#position-ids)
        head_mask (`tf.Tensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
            Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`:

            - 1 indicates the head is **not masked**,
            - 0 indicates the head is **masked**.

        inputs_embeds (`tf.Tensor` of shape `({0}, hidden_size)`, *optional*):
            Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
            is useful if you want more control over how to convert `input_ids` indices into associated vectors than the
            model's internal embedding lookup matrix.
        output_attentions (`bool`, *optional*):
            Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
            tensors for more detail.
        output_hidden_states (`bool`, *optional*):
            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
            more detail.
        return_dict (`bool`, *optional*):
            Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple.
z]The bare ESM Model transformer outputting raw hidden-states without any specific head on top.c                  p     e Zd ZdZdgZd" fd	Zd#dZd Zd$d
Zd Z		 	 	 	 	 	 	 	 	 	 	 	 	 d%d&d Z
d! Z xZS )'TFEsmMainLayera  

    The model can behave as an encoder (with only self-attention) as well as a decoder, in which case a layer of
    cross-attention is added between the self-attention layers, following the architecture described in [Attention is
    all you need](https://huggingface.co/papers/1706.03762) by Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit,
    Llion Jones, Aidan N. Gomez, Lukasz Kaiser and Illia Polosukhin.

    To behave as an decoder the model needs to be initialized with the `is_decoder` argument of the configuration set
    to `True`. To be used in a Seq2Seq model, the model needs to initialized with both `is_decoder` argument and
    `add_cross_attention` set to `True`; an `encoder_hidden_states` is then expected as an input to the forward pass.
    r   TNc                L    t                      j        d	d|i| || _        |j        | _        t	          |d          | _        t          |d          | _        |rt          |d          nd | _	        t          | j        j        | j        j        z  dd          | _        d S )
NrB   r   rA   encoderpoolerTcontact_head)rq   rz   rB   r+  )rC   rD   r   r   r   r   rT  r  rr  r  rp   r[  r   r  )rE   r   add_pooling_layerrB   r.  rF   s        r&   rD   zTFEsmMainLayer.__init__v  s    --d-f--- +)&|DDD#F;;;<MWk&x8888SW658WW^biw
 
 
r(   c                4   | j         rd S d| _         t          | dd           Pt          j        | j        j                  5  | j                            d            d d d            n# 1 swxY w Y   t          | dd           Pt          j        | j        j                  5  | j                            d            d d d            n# 1 swxY w Y   t          | dd           Pt          j        | j        j                  5  | j                            d            d d d            n# 1 swxY w Y   t          | dd           St          j        | j	        j                  5  | j	                            d            d d d            d S # 1 swxY w Y   d S d S )NTr   r  r  r  )
r|   r}   r    r~   r   rB   rP   r  r  r  r   s     r&   rP   zTFEsmMainLayer.build  sy   : 	F
4t,,8t344 , ,%%d+++, , , , , , , , , , , , , , ,4D))5t|011 ) )""4((() ) ) ) ) ) ) ) ) ) ) ) ) ) )44((4t{/00 ( (!!$'''( ( ( ( ( ( ( ( ( ( ( ( ( ( (4..:t0566 . .!''---. . . . . . . . . . . . . . . . . . ;:sH    A''A+.A+!CCCD))D-0D-#FFFc                    | j         j        S rh   )r   r   rE   s    r&   get_input_embeddingsz#TFEsmMainLayer.get_input_embeddings  s    ..r(   r   tf.Variablec                f    || j         j        _        t          |          d         | j         _        d S )Nr   )r   r   weightr   r   )rE   r   s     r&   set_input_embeddingsz#TFEsmMainLayer.set_input_embeddings  s,    16'.%/%6%6q%9"""r(   c                    t           rh   r  )rE   heads_to_prunes     r&   _prune_headszTFEsmMainLayer._prune_heads  r!  r(   Fr   TFModelInputType | Noner   np.ndarray | tf.Tensor | Noner   r   r   r   rb  +tuple[tuple[np.ndarray | tf.Tensor]] | Nonerf  r   r   rg  rh  r   r   rb   ATFBaseModelOutputWithPoolingAndCrossAttentions | tuple[tf.Tensor]c                   | j         j        sd}	||t          d          |t          |          }n)|t          |          d d         }nt          d          |\  }}| d}d gt	          | j        j                  z  }n!t          |d         d                   d         }|t          j        |||z   fd          }| 	                    ||||||	          }t          |          }||z   }| j        rt          j
        |          }t          j        t          j        |d d d d f         ||df          |d d d d f                   }t          j        ||j        
          }||d d d d d f         z  }t          |          }t          j        ||d         d|d         |d         f          }|d         |d d d d | d d d f         }n%t          j        ||d         dd|d         f          }t          j        ||j        
          }t          j        d|j        
          }t          j        d|j        
          }t          j        t          j        ||          |          }| j        rp|nt          j        ||j        
          }t	          t          |                    }|dk    r|d d d d d d d f         }|dk    r|d d d d d d f         }d|z
  dz  }nd }|t&          d g| j         j        z  }|                     |||||||	|
|||          }|d         }| j        |                     |          nd }|s||f|dd          z   S t-          |||j        |j        |j        |j                  S )NFzDYou cannot specify both input_ids and inputs_embeds at the same timer   z5You have to specify either input_ids or inputs_embedsr   r*   r   )dimsr   )r   r   r   r   r   r   rW   r   rI   g     r   )r   r   r   r   r   rb  rf  r   rg  rh  r   rG  )ra  pooler_outputrb  r   r   rc  )r   r   r   r   lenr  r\  r    fillr   rT   
less_equaltiler   rJ   r   constantmultiplysubtractr  r[  r  r	   rb  r   r   rc  )rE   r   r   r   r   r   r   r   rb  rf  r   rg  rh  r   rU   r   r   r   embedding_outputattention_mask_shapemask_seq_lengthseq_idscausal_maskextended_attention_maskone_cstten_thousand_cstnum_dims_encoder_attention_maskencoder_extended_attention_maskencoder_outputssequence_outputrw  s                                  r&   rg   zTFEsmMainLayer.call  s}     {% 	I ]%>cddd"$Y//KK&$]33CRC8KKTUUU!,
J"%&"#fs4<+='>'>>OO%/0B10E%F%Fr%J"!W:zDZ7Z*[cdeeeN??)%'#9 + 
 
  *.99$'==
 ? 	h//G-dAAA._a0PQQaaa& K '+^5IJJJK&1N111dAAA:4N&N##-.E#F#F &(j'*>q*A1FZ[\F]_stu_v)w' '# q!-*A!!!QQQVWVWVWBW*X'&(j!5a!8!Q@TUV@W X' '# #%'*AIYI_"`"`"`+c)9)?@@@;x7G7MNNN"$+bk'CZ.[.[]m"n"n ? 	35A &(W-CKbKh%i%i%i".1*=S2T2T.U.U+.!332HDRSRSRSUVUVUV2W/.!332HDRVXYXYXYIY2Z/ 035T/TX`.`++.2+  %%!>>I,,*2"7#B+/!5# ' 
 
 *!,FJkF]/BBBcg 	$  #$ $
 >-'+;)7&1,=
 
 
 	
r(   c                     | ||dd          j         }t          j        |d          }t          j        ||j                  }||d d d d d f         z  }||d d d d d d d f         z  }|                     ||          S )NT)r   rh  r   r   r   )r   r    stackr   rJ   r  )rE   r   r   attnss       r&   predict_contactszTFEsmMainLayer.predict_contacts8  s    VN`deeepQ'''
 ==4t 3444qqq$ 677  ///r(   )TNrh   )r   r  NNNNNNNNNNNNF)r   r  r   r  r   r  r   r  r   r  r   r  r   r  rb  r  rf  r   r   r   rg  r   rh  r   r   r   rb   r  )ri   rj   rk   rl   _keys_to_ignore_on_load_missingrD   rP   r  r  r  rg   r  rm   rn   s   @r&   r  r  c  s        

 
 (7&7#
 
 
 
 
 
. . . ."/ / /: : : :" " "
 .28<6:377;?C@DGK!%)-,0#'W
 W
 W
 W
 W
r
0 
0 
0 
0 
0 
0 
0r(   r  c                       e Zd Zdd  fdZe ee                    d                     ee	e
e          	 	 	 	 	 	 	 	 	 	 	 	 	 d!d"d                                    Zd Zd#dZ xZS )$
TFEsmModelTr   r   c                p     t                      j        |g|R i | t          ||d          | _        d S )Nr{  r  rB   )rC   rD   r  r{  )rE   r   r  r1  r.  rF   s        r&   rD   zTFEsmModel.__init__J  sF    3&333F333!&<MTYZZZr(   batch_size, sequence_length
checkpointoutput_typer|  NFr   r  r   r  r   r   r   r   r   rb  r  rf  r   r   rg  rh  r   rb   r  c                J    |                      |||||||||	|
|||          }|S )a  
        encoder_hidden_states  (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
            Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if
            the model is configured as a decoder.
        encoder_attention_mask (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
            Mask to avoid performing attention on the padding token indices of the encoder input. This mask is used in
            the cross-attention if the model is configured as a decoder. Mask values selected in `[0, 1]`:

            - 1 for tokens that are **not masked**,
            - 0 for tokens that are **masked**.

        past_key_values (`tuple[tuple[tf.Tensor]]` of length `config.n_layers`)
            contains precomputed key and value hidden states of the attention blocks. Can be used to speed up decoding.
            If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those that
            don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of all
            `decoder_input_ids` of shape `(batch_size, sequence_length)`.
        use_cache (`bool`, *optional*, defaults to `True`):
            If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding (see
            `past_key_values`). Set to `False` during training, `True` during generation
        )r   r   r   r   r   r   r   rb  rf  r   rg  rh  r   )r{  )rE   r   r   r   r   r   r   r   rb  rf  r   rg  rh  r   r   s                  r&   rg   zTFEsmModel.callO  sK    V (()%'"7#9+/!5#  
 
 r(   c                8    | j                             ||          S rh   r{  r  rE   r   r   s      r&   r  zTFEsmModel.predict_contacts      x((@@@r(   c                    | j         rd S d| _         t          | dd           St          j        | j        j                  5  | j                            d            d d d            d S # 1 swxY w Y   d S d S )NTr{  )r|   r}   r    r~   r{  rB   rP   r   s     r&   rP   zTFEsmModel.build  s    : 	F
4%%1tx}-- % %t$$$% % % % % % % % % % % % % % % % % % 21s    A((A,/A,)Tr5  r  )r   r  r   r  r   r  r   r  r   r  r   r  r   r  rb  r  rf  r   r   r   rg  r   rh  r   r   r   rb   r  rh   )ri   rj   rk   rD   r   r   ESM_INPUTS_DOCSTRINGformatr   _CHECKPOINT_FOR_DOCr	   _CONFIG_FOR_DOCrg   r  rP   rm   rn   s   @r&   r  r  E  s       
[ [ [ [ [ [ [
 **+?+F+FGd+e+eff&B$   .28<6:377;?C@DGK!%)-,0#' %3 3 3 3  gf ]3jA A A% % % % % % % %r(   r  z1ESM Model with a `language modeling` head on top.c                       e Zd ZdgZdgZ fdZd Zd Zd Ze	 e
e                    d                     eeeed	          	 	 	 	 	 	 	 	 	 	 	 	 d d!d                                    Zd Zd"dZ xZS )#TFEsmForMaskedLMr   r  c                H   t                                          |           |j        rt                              d           t          |dd          | _        t          |d          | _        |j	        rt          j        t          j                            |                                 ddd                    5  | j        j        j                            d	           d d d            n# 1 swxY w Y   | j        j        j        j        d
         | j        _        d S d S )NzjIf you want to use `EsmForMaskedLM` make sure `config.is_decoder=False` for bi-directional self-attention.Fr{  r  lm_headrA   r   r   r   r   )rC   rD   r   loggerwarningr  r{  TFEsmLMHeadr  tie_word_embeddingsr    r~   ospathjoin_name_scoper   r   rP   weightsdecoderrE   r   rF   s     r&   rD   zTFEsmForMaskedLM.__init__  sX       	NN1  
 "&ENNN"6	:::% 	Rrw||D,<,<,>,>|Ufgghh H H#399,GGGH H H H H H H H H H H H H H H#'8#6#F#Nq#QDL   		R 	Rs   >%C//C36C3c                    | j         j        S rh   r  r  r  s    r&   get_output_embeddingsz&TFEsmForMaskedLM.get_output_embeddings  s    |##r(   c                    || j         _        d S rh   r  )rE   new_embeddingss     r&   set_output_embeddingsz&TFEsmForMaskedLM.set_output_embeddings  s    -r(   c                    | j         S rh   )r  r  s    r&   get_lm_headzTFEsmForMaskedLM.get_lm_head  s
    |r(   r  z<mask>)r  r  r|  maskNFr   r  r   r  r   r   r   r   labelsr   r   rg  rh  r   r   rb   #TFMaskedLMOutput | tuple[tf.Tensor]c                @   ||n| j         j        }|                     ||||||||	|
||          }|d         }|                     |          }d}||                     ||          }|s|f|dd         z   }||f|z   n|S t          |||j        |j                  S )a!  
        labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
            config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the
            loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`
        kwargs (`dict[str, any]`, *optional*, defaults to `{}`):
            Used to hide legacy arguments that have been deprecated.
        N)
r   r   r   r   r   r   r   rg  rh  r   r   )r  logitsr   lossr  r   r   )r   use_return_dictr{  r  hf_compute_lossr
   r   r   )rE   r   r   r   r   r   r   r   r  r   rg  rh  r   r   r  prediction_scoresmasked_lm_lossr  s                     r&   rg   zTFEsmForMaskedLM.call  s    > &1%<kk$+B](()%'"7#9/!5#  
 
 "!* LL99!11HY1ZZN 	Z')GABBK7F3A3M^%..SYY$!/)	
 
 
 	
r(   c                8    | j                             ||          S rh   r  r  s      r&   r  z!TFEsmForMaskedLM.predict_contacts  r  r(   c                   | j         rd S d| _         t          | dd           Pt          j        | j        j                  5  | j                            d            d d d            n# 1 swxY w Y   t          | dd           St          j        | j        j                  5  | j                            d            d d d            d S # 1 swxY w Y   d S d S )NTr{  r  )r|   r}   r    r~   r{  rB   rP   r  r   s     r&   rP   zTFEsmForMaskedLM.build  sP   : 	F
4%%1tx}-- % %t$$$% % % % % % % % % % % % % % %4D))5t|011 ) )""4((() ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) 65$    A''A+.A+!C		CC)NNNNNNNNNNNF)r   r  r   r  r   r  r   r  r   r  r   r  r   r  r  r  r   r   rg  r   rh  r   r   r   rb   r  rh   )ri   rj   rk   r  "_keys_to_ignore_on_load_unexpectedrD   r  r  r  r   r   r  r  r   r  r
   r  rg   r  rP   rm   rn   s   @r&   r  r    s?       '6&7#*3&R R R R R"$ $ $. . .   **+?+F+FGd+e+eff&$$	   .28<6:377;?C@D04)-,0#'6
 6
 6
 6
  gf ]6
pA A A	) 	) 	) 	) 	) 	) 	) 	)r(   r  c                  8     e Zd ZdZd fd	ZddZd Zd Z xZS )r  z&ESM Head for masked language modeling.Nc                   t                                          |           t          j                            |j        t          |j                  d          | _        t          j        	                    |j
        d          | _        |j        rd | _        n?t          j                            |j        t          |j                  dd          | _        || _        d S )	NrA   r  r   r   r   r  F)r   rB   rv   )rC   rD   r   rx   ry   r   r   r   r  r   r   r   r  r  r   r   r   s      r&   rD   zTFEsmLMHead.__init__  s    d###\''?6C[3\3\cj ( 
 

  ,99&BW^j9kk% 	DLL <--!#263K#L#L	 .  DL r(   c                .   | j         rd S d| _         |                     d| j        j        fdd          | _        t          | dd           ]t          j        | j        j	                  5  | j        
                    d d | j        j        g           d d d            n# 1 swxY w Y   t          | dd           ]t          j        | j        j	                  5  | j        
                    d d | j        j        g           d d d            n# 1 swxY w Y   t          | dd           l| j        j        sbt          j        | j        j	                  5  | j        
                    d d | j        j        g           d d d            d S # 1 swxY w Y   d S d S d S )NTrz   zeros)r+   rK   rL   r  r   r  )r|   rQ   r   r   rz   r}   r    r~   r  rB   rP   r   r   r  r  r   s     r&   rP   zTFEsmLMHead.build  su    : 	F
OOF4;3I2KY`lpOqq	4$''3tz// H H
  $dk.E!FGGGH H H H H H H H H H H H H H H4t,,8t344 M M%%tT4;3J&KLLLM M M M M M M M M M M M M M M4D))5dk>]5t|011 J J""D$0G#HIIIJ J J J J J J J J J J J J J J J J J 6555s6   )(BB!$B!(DDD(FF
F
c                    d| j         iS )Nrz   )rz   r  s    r&   get_biaszTFEsmLMHead.get_bias+  s    	""r(   c                4   |                      |          }t          j                            |          }|                     |          }| j        j        r%t          j        || j        d          | j	        z   }n|                     |          | j	        z   }|S )NTr   )
r  r    r2  r3  r   r   r  r   r  rz   )rE   featuresr#   s      r&   rg   zTFEsmLMHead.call.  s    JJx  EJJqMMOOA ;* 	,	!T\t<<<tyHAAQ$)+Ar(   rh   )	ri   rj   rk   rl   rD   rP   r  rg   rm   rn   s   @r&   r  r    sy        00     $J J J J"# # #
 
 
 
 
 
 
r(   r  z
    ESM Model transformer with a sequence classification/regression head on top (a linear layer on top of the pooled
    output) e.g. for GLUE tasks.
    c                       e Zd ZdgZ fdZe ee                    d                     e	e
ee          	 	 	 	 	 	 	 	 	 	 ddd                                    ZddZ xZS )TFEsmForSequenceClassificationr   c                    t                                          |           |j        | _        || _        t	          |dd          | _        t          |d          | _        d S NFr{  r  
classifierrA   )rC   rD   
num_labelsr   r  r{  TFEsmClassificationHeadr  r  s     r&   rD   z'TFEsmForSequenceClassification.__init__E  sZ        +!&ENNN1&|LLLr(   r  r  NFr   r  r   r  r   r   r  r   r   rg  rh  r   r   rb   -TFSequenceClassifierOutput | tuple[tf.Tensor]c                :   |	|	n| j         j        }	|                     ||||||||	|
	  	        }|d         }|                     |          }|dn|                     ||          }|	s|f|dd         z   }||f|z   n|S t          |||j        |j                  S )a  
        labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        Nr   r   r   r   r   rg  rh  r   r   r   r  )r   r  r{  r  r  r   r   r   rE   r   r   r   r   r   r  r   rg  rh  r   r   r  r  r  r  s                   r&   rg   z#TFEsmForSequenceClassification.callM  s    4 &1%<kk$+B](()%'/!5#  

 

 "!*11~tt4+?+?+O+O 	FY,F)-)9TGf$$vE)!/)	
 
 
 	
r(   c                   | j         rd S d| _         t          | dd           Pt          j        | j        j                  5  | j                            d            d d d            n# 1 swxY w Y   t          | dd           St          j        | j        j                  5  | j                            d            d d d            d S # 1 swxY w Y   d S d S NTr{  r  )r|   r}   r    r~   r{  rB   rP   r  r   s     r&   rP   z$TFEsmForSequenceClassification.build  sP   : 	F
4%%1tx}-- % %t$$$% % % % % % % % % % % % % % %4t,,8t344 , ,%%d+++, , , , , , , , , , , , , , , , , , 98r  
NNNNNNNNNF)r   r  r   r  r   r  r   r  r   r  r  r  r   r   rg  r   rh  r   r   r   rb   r  rh   )ri   rj   rk   r  rD   r   r   r  r  r   r  r   r  rg   rP   rm   rn   s   @r&   r  r  ;  s         (7&7#M M M M M **+?+F+FGd+e+eff&.$   .28<6:377;04)-,0#'.
 .
 .
 .
  gf ].
`	, 	, 	, 	, 	, 	, 	, 	,r(   r  z
    ESM Model with a token classification head on top (a linear layer on top of the hidden-states output) e.g. for
    Named-Entity-Recognition (NER) tasks.
    c                       e Zd ZdgZdgZ fdZe ee	                    d                     e
eee          	 	 	 	 	 	 	 	 	 	 ddd                                    ZddZ xZS )TFEsmForTokenClassificationr  r   c                F   t                                          |           |j        | _        t          |dd          | _        t
          j                            |j                  | _	        t
          j        
                    |j        d          | _        || _        d S r  )rC   rD   r  r  r{  r   rx   r   r  r   ry   r  r   r  s     r&   rD   z$TFEsmForTokenClassification.__init__  s        +!&ENNN|++F,FGG,,,V->\,RRr(   r  r  NFr   r  r   r  r   r   r  r   r   rg  rh  r   r   rb   *TFTokenClassifierOutput | tuple[tf.Tensor]c                h   |	|	n| j         j        }	|                     ||||||||	|
	  	        }|d         }|                     ||
          }|                     |          }|dn|                     ||          }|	s|f|dd         z   }||f|z   n|S t          |||j        |j                  S )z
        labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
        Nr  r   r   r   r  )	r   r  r{  r   r  r  r   r   r   r  s                   r&   rg   z TFEsmForTokenClassification.call  s    0 &1%<kk$+B](()%'/!5#  

 

 "!*,,,JJ11~tt4+?+?+O+O 	FY,F)-)9TGf$$vE&!/)	
 
 
 	
r(   c                   | j         rd S d| _         t          | dd           Pt          j        | j        j                  5  | j                            d            d d d            n# 1 swxY w Y   t          | dd           `t          j        | j        j                  5  | j                            d d | j        j	        g           d d d            d S # 1 swxY w Y   d S d S r  )
r|   r}   r    r~   r{  rB   rP   r  r   r   r   s     r&   rP   z!TFEsmForTokenClassification.build  sp   : 	F
4%%1tx}-- % %t$$$% % % % % % % % % % % % % % %4t,,8t344 M M%%tT4;3J&KLLLM M M M M M M M M M M M M M M M M M 98s$    A''A+.A+!(CCCr  )r   r  r   r  r   r  r   r  r   r  r  r  r   r   rg  r   rh  r   r   r   rb   r  rh   )ri   rj   rk   r  r  rD   r   r   r  r  r   r  r   r  rg   rP   rm   rn   s   @r&   r  r    s         +4&'6&7#     **+?+F+FGd+e+eff&+$   .28<6:377;04)-,0#'/
 /
 /
 /
  gf ]/
b	M 	M 	M 	M 	M 	M 	M 	Mr(   r  c                  4     e Zd ZdZd fd	ZddZddZ xZS )	r  z-Head for sentence-level classification tasks.Nc                   t                                          |           t          j                            |j        t          |j                  dd          | _        t          j        	                    |j
                  | _        t          j                            |j        t          |j                  dd          | _        || _        d S )NrA   rt  r  )r   rw   rB   linearout_proj)rC   rD   r   rx   ry   r   r   r   r  r   r  r   r  r  r   r   s      r&   rD   z TFEsmClassificationHead.__init__  s    d###\''.v/GHH	 ( 
 

 |++F,FGG**.v/GHH	 + 
 
 r(   Fc                    |d d dd d f         }|                      ||          }|                     |          }|                      ||          }|                     |          }|S )Nr   r   )r   r  r  )rE   r  r   r#   s       r&   rg   zTFEsmClassificationHead.call  si    QQQ111WLLXL..JJqMMLLXL..MM!r(   c                   | j         rd S d| _         t          | dd           ]t          j        | j        j                  5  | j                            d d | j        j        g           d d d            n# 1 swxY w Y   t          | dd           `t          j        | j	        j                  5  | j	                            d d | j        j        g           d d d            d S # 1 swxY w Y   d S d S )NTr  r  )
r|   r}   r    r~   r  rB   rP   r   r   r  r   s     r&   rP   zTFEsmClassificationHead.build  s   : 	F
4$''3tz// H H
  $dk.E!FGGGH H H H H H H H H H H H H H H4T**6t}122 K K##T41H$IJJJK K K K K K K K K K K K K K K K K K 76s$    (A44A8;A8.(C##C'*C'rh   r  )ri   rj   rk   rl   rD   rg   rP   rm   rn   s   @r&   r  r    ss        77     "   	K 	K 	K 	K 	K 	K 	K 	Kr(   r  c                    t          j        | |k    t           j                  }t          j        |d          |z   |z  }||z   S )z
    Replace non-padding symbols with their position numbers. Position numbers begin at padding_idx+1. Padding symbols
    are ignored. This is modified from fairseq's `utils.make_positions`.

    Args:
        x: tf.Tensor x:

    Returns: tf.Tensor
    r   r   )r    r   r   cumsum)r   r   r   r  incremental_indicess        r&   r   r     sG     79+RX66D9T2225KKtS,,r(   )r  r  r  r  rz  )r   )Erl   
__future__r   r  numpynp
tensorflowr    
file_utilsr   r   r   modeling_tf_outputsr   r	   r
   r   r   modeling_tf_utilsr   r   r   r   r   r   r   r   r   tf_utilsr   r   utilsr   configuration_esmr   
get_loggerri   r  r  r  r'   r.   r2   r;   rx   Layerr=   rp   r   r   r  r  r(  r8  r=  rT  rr  rz  ESM_START_DOCSTRINGr  r  r  r  r  r  r  r  r   __all__r+  r(   r&   <module>r     s     " " " " " " 				         q q q q q q q q q q             
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 G F F F F F F F       ( ( ( ( ( ( 
	H	%	%1 ) ) )
. . .- - -
	 	 	)
 )
 )
 )
 )
* )
 )
 )
X%: %: %: %: %:!3 %: %: %:PmM mM mM mM mMel( mM mM mM`d3 d3 d3 d3 d3+ d3 d3 d3NH H H H Hel( H H H02L 2L 2L 2L 2LU\' 2L 2L 2LjH H H H H* H H H2N N N N N%,$ N N N0fL fL fL fL fL# fL fL fLRZ& Z& Z& Z& Z&5<% Z& Z& Z&|H H H H H%,$ H H H:    ,    ' T c [0 [0 [0 [0 [0U\' [0 [0	 [0| c K% K% K% K% K%% K% K%	 K%\ MObccj) j) j) j) j)+-I j) j) dcj)Z3 3 3 3 3%,$ 3 3 3l   K, K, K, K, K,%9;W K, K, K,\   NM NM NM NM NM"68Q NM NM NMb%K %K %K %K %Kel0 %K %K %KP- - - -   r(   