
     `i                       d dl Z d dlZd dlmZ d dlmZmZmZ d dlZd dlm	Z	m
Z
 ddlmZ ddlmZmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZmZmZmZmZ ddlmZm Z  ddl!m"Z"m#Z#m$Z$m%Z% ddl&m'Z' ddl(m)Z)m*Z* ddl+m,Z,m-Z-m.Z. ddl/m0Z0 ddl1m2Z2m3Z3 ddl4m5Z5m6Z6 d Z7 G d de
j8                  Z9d Z:d Z; G d de
j8                  Z<	 	 dade
j8        dej	        d ej	        d!ej	        d"eej	                 d#e=d$e=d%eej	                 d&e'e,         fd'Z> G d( d)e
j8                  Z? G d* d+e
j8                  Z@ G d, d-e
j8                  ZAd. ZB G d/ d0e
j8                  ZC G d1 d2e
j8                  ZD G d3 d4e          ZE G d5 d6e
j8                  ZF G d7 d8e
j8                  ZGe- G d9 d:e$                      ZH G d; d<eH          ZI G d= d>e
j8                  ZJ G d? d@e
j8                  ZK G dA dBe
j8                  ZLee- G dC dDe                                  ZM G dE dFe
j8                  ZN G dG dHe
j8                  ZO edI           G dJ dKe
j8                              ZP G dL dMe
j8                  ZQ G dN dOe
j8                  ZRdP ZSdbdQZTdRej	        dSeUdTej	        fdUZV G dV dWe
j8                  ZW G dX dYe          ZXe- G dZ d[e$                      ZY G d\ d]eY          ZZ G d^ d_eYe          Z[g d`Z\dS )c    N)	dataclass)CallableOptionalUnion)Tensornn   )ACT2FN)CacheDynamicCache)GenerationMixin)use_kernel_forward_from_hub)create_causal_mask)GradientCheckpointingLayer)"BaseModelOutputWithCrossAttentionsBaseModelOutputWithPast,BaseModelOutputWithPoolingAndCrossAttentionsCausalLMOutputWithPastModelOutput)ROPE_INIT_FUNCTIONSdynamic_rope_update)ALL_ATTENTION_FUNCTIONSModuleUtilsMixinPreTrainedModelget_parameter_dtype)Unpack) find_pruneable_heads_and_indicesprune_linear_layer)TransformersKwargsauto_docstringcan_return_tuple)deprecate_kwarg)OutputRecordercheck_model_inputs   )EvollaConfigSaProtConfigc                     |                      |                                          }t          j        |d                              |          |z  }|                                |z   S )a  
    Replace non-padding symbols with their position numbers. Position numbers begin at padding_idx+1. Padding symbols
    are ignored. This is modified from fairseq's `utils.make_positions`.

    Args:
        x: torch.Tensor x:

    Returns: torch.Tensor
    r%   dim)neinttorchcumsumtype_aslong)	input_idspadding_idxmaskincremental_indicess       ~/home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/models/evolla/modeling_evolla.py"create_position_ids_from_input_idsr6   5   s`     <<$$((**D,t333;;DAADH##%%33    c                   8     e Zd ZdZ fdZ	 	 	 	 ddZd Z xZS )EvollaSaProtEmbeddingszV
    Same as BertEmbeddings with a tiny tweak for positional embeddings indexing.
    c                    t                                                       t          j        |j        |j        |j                  | _        |j        r&t          j	        |j        |j
                  | _        nd | _        t          j        |j                  | _        t          |dd          | _        |                     dt%          j        |j                                      d          d           |j        | _        | j        dk    r+t          j        |j        |j        | j                  | _        |j        | _        |j        | _        d | _        d S )	N)r2   epsposition_embedding_typeabsoluteposition_ids)r%   F
persistent)super__init__r   	Embedding
vocab_sizehidden_sizepad_token_idword_embeddingsemb_layer_norm_before	LayerNormlayer_norm_eps
layer_normDropouthidden_dropout_probdropoutgetattrr=   register_bufferr-   arangemax_position_embeddingsexpandr2   position_embeddingstoken_dropoutmask_token_idr?   selfconfig	__class__s     r5   rD   zEvollaSaProtEmbeddings.__init__J   sB   !|F,=v?Q_e_rsss' 	# l6+=6CXYYYDOO"DOz&"<=='.v7PR\']']$EL)GHHOOPWXXej 	 	
 	
 	
 ".':55')|.0BPTP`( ( (D$ $1#1 r7   Nc                    |-|t          || j                  }n|                     |          }||                     |          }|}| j        r||                    || j        k                        d          d          }d}||                    d          n|j	        d         }|| j        k                        d          
                                |z  }|d|z
  z  d|z
  d d d d f         z                      |j                  }| j        dk    r|                     |          }	||	z   }| j        |                     |          }|0||                    d          z                      |j                  }|S )Nr@           gQ?r%   r>   )r6   r2   &create_position_ids_from_inputs_embedsrI   rW   masked_fillrX   	unsqueezesumshapefloattodtyper=   rV   rM   )
rZ   r1   attention_maskr?   inputs_embeds
embeddingsmask_ratio_trainsrc_lengthsmask_ratio_observedrV   s
             r5   forwardzEvollaSaProtEmbeddings.forwardc   s    $A)TM]^^#JJ=YY  00;;M #
  	)"7#//d>P1P0[0[\^0_0_adeeJ)4B4N.,,R000T]TcdeTfK#,0B#B"G"G"K"K"Q"Q"S"SVa"a$,<(<=EXAXZ[Z[Z[]acgZg@hhll  J ':55"&":":<"H"H#&99J?&44J%$~'?'?'C'CCGG
HXYYJ r7   c                    |                                 dd         }|d         }t          j        | j        dz   || j        z   dz   t          j        |j                  }|                    d                              |          S )z
        We are provided embeddings directly. We cannot infer which are padded so just generate sequential position ids.

        Args:
            inputs_embeds: torch.Tensor

        Returns: torch.Tensor
        Nr@   r%   rf   devicer   )sizer-   rS   r2   r0   rp   ra   rU   )rZ   rh   input_shapesequence_lengthr?   s        r5   r_   z=EvollaSaProtEmbeddings.create_position_ids_from_inputs_embeds   s     $((**3B3/%a.|q /D4D"Dq"HPUPZcpcw
 
 
 %%a((//<<<r7   NNNN)__name__
__module____qualname____doc__rD   rm   r_   __classcell__r\   s   @r5   r9   r9   E   st         ! ! ! ! !6 / / / /b= = = = = = =r7   r9   c                 h    |                      dd          \  }}t          j        | |fd          S )N   r@   r)   )chunkr-   catxx1x2s      r5   rotate_half_esmr      s6    WWQBWFB9rc2YB''''r7   c                     |d d d d d | j         d         d d f         }|d d d d d | j         d         d d f         }| |z  t          |           |z  z   S )N)rc   r   )r   cossins      r5   apply_rotary_pos_emb_esmr      sp    
aaaMagbkM111$
%C
aaaMagbkM111$
%CG**S011r7   c                        e Zd ZU dZej        ed<   def fdZddZ	dej        dej        d	e
ej        ej        f         fd
Z xZS )EvollaSaProtRotaryEmbeddingz
    Rotary position embeddings based on those in
    [RoFormer](https://huggingface.co/docs/transformers/model_doc/roformer). Query and keys are transformed by rotation
    matrices which depend on their relative positions.
    inv_freqr*   c                    t                                                       ddt          j        d|dt          j                                                  |z  z  z  }|                     d|           d | _        d | _        d | _	        d S )N      ?i'  r   r|   rf   r   )
rC   rD   r-   rS   int64rd   rR   _seq_len_cached_cos_cached_sin_cached)rZ   r*   r   r\   s      r5   rD   z$EvollaSaProtRotaryEmbedding.__init__   s    %ELC%+$N$N$N$T$T$V$VY\$\]^Z222#r7   r|   c                 2   |j         |         }|| j        k    s| j        j        |j        k    r|| _        t	          j        |j         |         |j                                      | j                  }t	          j        || j                  }t	          j	        ||fd          
                    |j                  }|                                d d d d d d f         | _        |                                d d d d d d f         | _        | j        | j        fS )Nrp   r@   r)   )rc   r   r   rp   r-   rS   r/   r   outerr~   re   r   r   r   )rZ   r   seq_dimensionseq_lentfreqsembs          r5   _update_cos_sin_tablesz2EvollaSaProtRotaryEmbedding._update_cos_sin_tables   s    '-( d***d.>.E.Q.Q#*D QW]3AHEEEMMdm\\AK4=11E)UEN33366qx@@C"wwyytQQQ)9:D"wwyytQQQ)9:D!111r7   qkreturnc                    |                      |d          \  | _        | _        t          || j        | j                                      |j                  t          || j        | j                                      |j                  fS )Nr   )r   r   )r   r   r   r   re   rf   )rZ   r   r   s      r5   rm   z#EvollaSaProtRotaryEmbedding.forward   s    -1-H-HZ\-H-]-]*$* %Q(8$:JKKNNUVU\N]]$Q(8$:JKKNNUVU\N]]
 	
r7   )r|   )ru   rv   rw   rx   r-   r   __annotations__r,   rD   r   tuplerm   ry   rz   s   @r5   r   r      s           l C            2 2 2 2 
 
%, 
5u|A[;\ 
 
 
 
 
 
 
 
r7   r   r^   modulequerykeyvaluerg   scalingrP   	head_maskkwargsc                    t          j        ||                    dd                    |z  }	t          | d          r.| j        dv r$|j        d         }
t          j        |
t           j        |	j                  	                    dd          }t          j        |
t           j        |	j                  	                    dd          }||z
  }| 
                    || j        z   dz
            }|                    |j                  }| j        d	k    rt          j        d
||          }n<| j        dk    r1t          j        d
||          }t          j        d||          }||z   }|	|z   }	|$|d d d d d d d |j        d         f         }|	|z   }	t          j                            |	dt           j                                      |j                  }	t          j                            |	|| j                  }	||	|z  }	t          j        |	|          }|                    dd                                          }||	fS )Nr|   r	   r=   relative_keyrelative_key_queryro   r@   r%   r   r   zbhld,lrd->bhlrr   zbhrd,lrd->bhlrr   )r*   rf   )ptraining)r-   matmul	transposehasattrr=   rc   rS   r0   rp   viewdistance_embeddingrT   re   rf   einsumr   
functionalsoftmaxfloat32rP   r   
contiguous)r   r   r   r   rg   r   rP   r   r   attn_weights
seq_lengthposition_ids_lposition_ids_rdistancepositional_embeddingrelative_position_scoresrelative_position_scores_queryrelative_position_scores_keycausal_maskattn_outputs                       r5   eager_attention_forwardr      sI    <s}}Q':':;;gELv011 ?f6T Y 7 7 [^
j
<K^___ddegijkkj
<K^___ddefhjkk!N2%88FDb9bef9fgg366U[6II)^;;',|4DeMa'b'b$$+/CCC-2\:JESg-h-h*+0<8H#Oc+d+d('EHd'd$#&>>!$QQQ111o	"o%=>#k1=((2U](SSVVW\WbccL=((6?([[L#i/,|U33K''1--88::K$$r7   c                        e Zd Zd fd	Z	 	 	 	 ddej        deej                 deej                 deej                 deej                 d	ee	         d
e
ej                 fdZ xZS )EvollaSaProtSelfAttentionNFc                    t                                                       || _        |j        |j        z  dk    r0t          |d          s t          d|j         d|j         d          |j        | _        t          |j        |j        z            | _        | j        | j        z  | _	        t          j        |j        | j	                  | _        t          j        |j        | j	                  | _        t          j        |j        | j	                  | _        |j        | _        |pt#          |dd          | _        d | _        | j        dk    s| j        d	k    r7|j        | _        t          j        d
|j        z  dz
  | j                  | _        n%| j        dk    rt/          | j                  | _        |j        | _        || _        d| _        | j        o| | _        d S )Nr   embedding_sizezThe hidden size (z6) is not a multiple of the number of attention heads ()r=   r>   r   r   r|   r%   rotaryr)   r   )rC   rD   r[   rG   num_attention_headsr   
ValueErrorr,   attention_head_sizeall_head_sizer   Linearr   r   r   attention_probs_dropout_probrP   rQ   r=   rotary_embeddingsrT   rE   r   r   
is_decoder	layer_idxr   	is_causal)rZ   r[   r=   r   is_cross_attentionr\   s        r5   rD   z"EvollaSaProtSelfAttention.__init__  s    ::a??PVXhHiHi?8F$6 8 8 48 8 8  
 $*#= #&v'9F<V'V#W#W !58PPYv143EFF
9V/1CDDYv143EFF
:'> (
'-zC
 C
$ "&'>99T=Y]q=q=q+1+ID(&(l1v7U3UXY3Y[_[s&t&tD##)X55%@TE]%^%^%^D" +"C1C-Cr7   hidden_statesrg   r   encoder_hidden_statesencoder_attention_maskr   r   c                    |j         d d         \  }}||d| j        f}	|                     |                              |	                              dd          }
|d u}|r|n|}|r|n|}|                     |                              |	                              dd          }|                     |                              |	                              dd          }|
| j        dz  z  }
| j        dk    r|                     |
|          \  }
}t          }| j
        j        dk    rE| j        dv r%t          d| j
        j         d	| j         d
          t          | j
        j                 } || |
|||f| j        sdn| j        | j        |d|\  }}|                    ||d                                          }||fS )Nr@   r%   r|         r   eagerr   zESM z attention does not support z^ embeddings. Set attention explicitly to 'eager' with `model.set_attn_implementation('eager')`r^   )rP   r   r   )rc   r   r   r   r   r   r   r=   r   r   r[   _attn_implementationr   r   r   rP   r   reshaper   )rZ   r   rg   r   r   r   r   
batch_sizer   hidden_shapequery_layerr   current_states	key_layervalue_layerattention_interfacer   r   s                     r5   rm   z!EvollaSaProtSelfAttention.forward3  s    "/!4SbS!9
J"JD4LMjj//44\BBLLQPQRR2$>2DW..-3EY//>HH^,,11,??II!QOO	jj0055lCCMMaQRSS "D$<d$BB'833%)%;%;K%S%S"K(?;+w66+/UUU h4;; h hY]Yu h h h   #:$+:Z"[$7$7
%
  $}>CC$,L
%
 
%
 
%
 
%
!\ "))*j"EEPPRRL((r7   )NNFrt   )ru   rv   rw   rD   r-   r   r   FloatTensorr   r   r   rm   ry   rz   s   @r5   r   r     s         D  D  D  D  D  DJ 7;15=A>B3) 3)|3) !!233) E-.	3)
  ((9:3) !)): ;3) +,3) 
u|	3) 3) 3) 3) 3) 3) 3) 3)r7   r   c                   $     e Zd Z fdZd Z xZS )EvollaSaProtSelfOutputc                     t                                                       t          j        |j        |j                  | _        t          j        |j                  | _        d S N)	rC   rD   r   r   rG   denserN   rO   rP   rY   s     r5   rD   zEvollaSaProtSelfOutput.__init__j  sJ    Yv163EFF
z&"<==r7   c                 d    |                      |          }|                     |          }||z   }|S r   r   rP   rZ   r   input_tensors      r5   rm   zEvollaSaProtSelfOutput.forwardo  4    

=11]33%4r7   ru   rv   rw   rD   rm   ry   rz   s   @r5   r   r   i  G        > > > > >
      r7   r   c                   H     e Zd Zd fd	Zd Z	 	 	 	 ddee         fdZ xZS )	EvollaSaProtAttentionNFc                    t                                                       t          |||          | _        t	          |          | _        t                      | _        t          j	        |j
        |j                  | _	        d S )N)r   r   r;   )rC   rD   r   rZ   r   outputsetpruned_headsr   rK   rG   rL   )rZ   r[   r   r   r\   s       r5   rD   zEvollaSaProtAttention.__init__w  sk    -f	^pqqq	,V44EEf&8f>STTTr7   c                    t          |          dk    rd S t          || j        j        | j        j        | j                  \  }}t          | j        j        |          | j        _        t          | j        j        |          | j        _        t          | j        j	        |          | j        _	        t          | j
        j        |d          | j
        _        | j        j        t          |          z
  | j        _        | j        j        | j        j        z  | j        _        | j                            |          | _        d S )Nr   r%   r)   )lenr   rZ   r   r   r   r   r   r   r   r   r   r   union)rZ   headsindexs      r5   prune_headsz!EvollaSaProtAttention.prune_heads~  s    u::??F7490$)2OQUQb
 
u
 -TY_eDD	*49=%@@	,TY_eDD	.t{/@%QOOO )-	(EE

(R	%"&)"?$)B_"_	 -33E::r7   r   c                     |                      |          } | j        |f||||d|\  }}	|                     ||          }|S )Nrg   r   r   r   )rK   rZ   r   )
rZ   r   rg   r   r   r   r   hidden_states_lnr   _s
             r5   rm   zEvollaSaProtAttention.forward  sk      >>-88"
)"7#9
 
 
 
Q kk+}==r7   )NFrt   )	ru   rv   rw   rD   r   r   r   rm   ry   rz   s   @r5   r   r   v  s        U U U U U U; ; ;* "#  +,       r7   r   c                 f    | dz  dt          j        | t          j        d          z            z   z  S )zz
    This is the gelu implementation from the original EVOLLA_SA_PROT repo. Using F.gelu yields subtly wrong results.
    g      ?r   g       @)r-   erfmathsqrt)r   s    r5   gelur    s/     s7cEIa$)C..&8999::r7   c                   B     e Zd Z fdZdej        dej        fdZ xZS )EvollaSaProtIntermediatec                     t                                                       t          j        |j        |j                  | _        d S r   )rC   rD   r   r   rG   intermediate_sizer   rY   s     r5   rD   z!EvollaSaProtIntermediate.__init__  s6    Yv163KLL


r7   r   r   c                 N    |                      |          }t          |          }|S r   )r   r  )rZ   r   s     r5   rm   z EvollaSaProtIntermediate.forward  s&    

=11]++r7   ru   rv   rw   rD   r-   r   rm   ry   rz   s   @r5   r  r    sc        M M M M MU\ el        r7   r  c                   $     e Zd Z fdZd Z xZS )EvollaSaProtOutputc                     t                                                       t          j        |j        |j                  | _        t          j        |j                  | _	        d S r   )
rC   rD   r   r   r
  rG   r   rN   rO   rP   rY   s     r5   rD   zEvollaSaProtOutput.__init__  sJ    Yv79KLL
z&"<==r7   c                 d    |                      |          }|                     |          }||z   }|S r   r   r   s      r5   rm   zEvollaSaProtOutput.forward  r   r7   r   rz   s   @r5   r  r    r   r7   r  c                   F     e Zd Z fdZ	 	 	 	 ddee         fdZd Z xZS )EvollaSaProtLayerc                    t                                                       |j        | _        d| _        t	          |          | _        |j        | _        |j        | _        | j        r/| j        st          |  d          t	          |d          | _	        t          |          | _        t          |          | _        t          j        |j        |j                  | _        d S )Nr%   z> should be used as a decoder model if cross attention is addedT)r   r;   )rC   rD   chunk_size_feed_forwardseq_len_dimr   	attentionr   add_cross_attentionRuntimeErrorcrossattentionr  intermediater  r   r   rK   rG   rL   rY   s     r5   rD   zEvollaSaProtLayer.__init__  s    '-'E$.v66 +#)#= # 	Y? l"d#j#j#jkkk"7SW"X"X"XD4V<<(00f&8f>STTTr7   Nr   c                      | j         |f||d|}| j        r8|6t          | d          st          d|  d           | j        |f||||d|}|                     |          }|S )N)rg   r   r  z'If `encoder_hidden_states` are passed, z` has to be instantiated with cross-attention layers by setting `config.add_cross_attention=True`r   )r  r   r   AttributeErrorr  feed_forward_chunk)	rZ   r   rg   r   r   r   r   attention_outputlayer_outputs	            r5   rm   zEvollaSaProtLayer.forward  s     *4>
)
 
 	
 
 ? 	4@4!122 $`d ` ` `  
  3t2  -#&;'=        ../?@@r7   c                     |                      |          }|                     |          }|                     ||          }|S r   )rK   r  r   )rZ   r  attention_output_lnintermediate_outputr  s        r5   r  z$EvollaSaProtLayer.feed_forward_chunk  sE    "nn-=>>"//0CDD{{#68HIIr7   rt   )	ru   rv   rw   rD   r   r   rm   r  ry   rz   s   @r5   r  r    s        U U U U U$ "#! ! +,! ! ! !F      r7   r  c                   P     e Zd Z fdZe	 	 	 	 ddee         fd            Z xZS )EvollaSaProtEncoderc                    t                                                       | _        t          j        fdt          j                  D                       | _        t          j        j	        j
                  | _        d| _        d S )Nc                 .    g | ]}t                    S  )r  ).0r  r[   s     r5   
<listcomp>z0EvollaSaProtEncoder.__init__.<locals>.<listcomp>  s"    #g#g#g!$5f$=$=#g#g#gr7   r;   F)rC   rD   r[   r   
ModuleListrangenum_hidden_layerslayerrK   rG   rL   emb_layer_norm_aftergradient_checkpointingrY   s    `r5   rD   zEvollaSaProtEncoder.__init__   s}    ]#g#g#g#guVMeGfGf#g#g#ghh
$&L1CI^$_$_$_!&+###r7   Nr   c           	          t          | j                  D ]\  }}|||         nd }	 ||f||	||d|} | j        r|                     |          }t          |          S )Nr   )last_hidden_state)	enumerater-  r.  r   )
rZ   r   rg   r   r   r   r   ilayer_modulelayer_head_masks
             r5   rm   zEvollaSaProtEncoder.forward  s      )44 		 		OA|.7.CillO(L-)&;'=   MM $ 	E 55mDDM1MRRRRr7   rt   )	ru   rv   rw   rD   r!   r   r   rm   ry   rz   s   @r5   r$  r$    s        , , , , ,  "#S S +,S S S S S S S Sr7   r$  c                   B     e Zd Z fdZdej        dej        fdZ xZS )EvollaSaProtPoolerc                     t                                                       t          j        |j        |j                  | _        t          j                    | _        d S r   )rC   rD   r   r   rG   r   Tanh
activationrY   s     r5   rD   zEvollaSaProtPooler.__init__#  sC    Yv163EFF
'))r7   r   r   c                 r    |d d df         }|                      |          }|                     |          }|S )Nr   )r   r:  )rZ   r   first_token_tensorpooled_outputs       r5   rm   zEvollaSaProtPooler.forward(  s@     +111a40

#56666r7   r  rz   s   @r5   r7  r7  "  s^        $ $ $ $ $
U\ el        r7   r7  c                   r    e Zd ZU eed<   dgZdZdZdZe	 e
edd          g e
edd          gdZd	 Zd
S )EvollaSaProtPreTrainedModelr[   r  Tr%   r  )r   
layer_namer  )r   
attentionscross_attentionsc                 v   | j         j        }t          |t          j                  rJ|j        j                            d|           |j         |j        j        	                                 dS dS t          |t          j
                  rU|j        j                            d|           |j        +|j        j        |j                 	                                 dS dS t          |t          j                  r?|j        j        	                                 |j        j                            d           dS dS )zInitialize the weightsr^   meanstdNr   )r[   initializer_range
isinstancer   r   weightdatanormal_biaszero_rE   r2   rK   fill_)rZ   r   rF  s      r5   _init_weightsz)EvollaSaProtPreTrainedModel._init_weightsA  s'   k+fbi(( 
	*M&&CS&999{& &&((((( '&-- 	*M&&CS&999!-"6#56<<>>>>> .--- 	*K""$$$M$$S)))))	* 	*r7   N)ru   rv   rw   r'   r   _no_split_modules_supports_flash_attn_supports_sdpa_supports_attention_backendr  r#   r   _can_record_outputsrO  r'  r7   r5   r?  r?  1  s         ,-N"& +%~&?qU`aaabN4AJZ[[[
 * * * * *r7   r?  c                   
    e Zd Zdef fdZd Zd Zd Ze	 dde	e
j                 de	e
j                 d	eee
j                 ef         fd
            Z	 	 ddedee         de	e
j                 de	e
j                 d	ef
dZ xZS )EvollaSaProtProteinEncoderr[   c                     t                                          |           t          |          | _        t	          |          | _        d S r   )rC   rD   r9   ri   r$  encoderrY   s     r5   rD   z#EvollaSaProtProteinEncoder.__init__R  s=       088*622r7   c                     | j         j        S r   ri   rI   rZ   s    r5   get_input_embeddingsz/EvollaSaProtProteinEncoder.get_input_embeddingsW  s    ..r7   c                     || j         _        d S r   rZ  rZ   r   s     r5   set_input_embeddingsz/EvollaSaProtProteinEncoder.set_input_embeddingsZ  s    */'''r7   c                     |                                 D ]/\  }}| j        j        |         j                            |           0dS )z
        Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
        class PreTrainedModel
        N)itemsrX  r-  r  r   )rZ   heads_to_pruner-  r   s       r5   _prune_headsz'EvollaSaProtProteinEncoder._prune_heads]  sU    
 +0022 	C 	CLE5Lu%/;;EBBBB	C 	Cr7   Nr1   rg   r   c                 R   |                                 }|\  }}|j        }|t          j        ||f|          }|                     ||          }|                     ||          }|                     ||          }	|	d         }
t          |
|	j        |	j	        |	j
                  S )Nr   r1   rg   )rg   r   )r1  r   rA  rB  )rq   rp   r-   onesri   get_extended_attention_maskrX  r   r   rA  rB  )rZ   r1   rg   rr   r   r   rp   rh   extended_attention_maskencoder_outputssequence_outputs              r5   rm   z"EvollaSaProtProteinEncoder.forwarde  s      nn&&!,
J!!"Z*j)A6RRRN)N[["&"B"B>S^"_"_,,}E\,]])!,;-)7&1,=	
 
 
 	
r7   rr   rp   rf   c                 6   |t          |           }|                                dk    r| j        j        s|t	          j        dt                     |                                dk    r|dddddddf         }ng|                                dk    r4| j        j        rt          j        |||          }n,|ddddddf         }nt          d| d|j
         d          |                    |          }d	|z
  t          j        |          j        z  }|S )
a  
        Makes broadcastable attention and causal masks so that future and masked tokens are ignored.

        Arguments:
            attention_mask (`torch.Tensor`):
                Mask with ones indicating tokens to attend to, zeros for tokens to ignore.
            input_shape (`Tuple[int]`):
                The shape of the input to the model.

        Returns:
            `torch.Tensor` The extended attention mask, with a the same dtype as `attention_mask.dtype`.
        Nr|   zNThe `device` argument is deprecated and will be removed in v5 of Transformers.r	   z!Wrong shape for input_ids (shape z) or attention_mask (shape r   r   r   )r   r*   r[   r   warningswarnFutureWarningr   *create_extended_attention_mask_for_decoderr   rc   re   r-   finfomin)rZ   rg   rr   rp   rf   rh  s         r5   rg  z6EvollaSaProtProteinEncoder.get_extended_attention_mask~  sa   & ='--E""$$))dk.D)!dfs  
 1$$&4QQQaaa]&C##!!Q&& {% K*:*e+ +'' +9D$9I*J''sKss\j\psss   #:"<"<5"<"I"I#&)@#@EKPUDVDVDZ"Z&&r7   r   NN)ru   rv   rw   r'   rD   r\  r_  rc  r$   r   r-   r   r   r   r   rm   r,   rp   rf   rg  ry   rz   s   @r5   rV  rV  Q  s>       3| 3 3 3 3 3 3
/ / /0 0 0C C C  26
 
EL)
 !.
 
uU\"$PP	Q	
 
 
 
8 *.'+6' 6'6' 3Z6' &	6'
 $6' 
6' 6' 6' 6' 6' 6' 6' 6'r7   rV  c                   &     e Zd Zd fd	Zd Z xZS )!EvollaSequenceCompressorAttention@      c                    t                                                       |dz  | _        || _        ||z  }t	          j        |          | _        t	          j        |          | _        t	          j        ||d          | _	        t	          j        ||dz  d          | _
        t	          j        ||d          | _        d S )Nr   FrL  r|   )rC   rD   scaler   r   rK   
norm_medianorm_latentsr   to_qto_kvto_out)rZ   r*   dim_headr   	inner_dimr\   s        r5   rD   z*EvollaSequenceCompressorAttention.__init__  s    t^

u$	,s++L--Ic95999	YsIM>>>
i	3U;;;r7   c                 
   |                      |          }|                     |          }| j        }|                     |          }t	          j        ||fd          }|                     |                              dd          \  }}|                    |	                    d          |	                    d          |d          
                    dddd          }|                    |	                    d          |	                    d          |d          
                    dddd          }|                    |	                    d          |	                    d          |d          
                    dddd          }|| j        z  }t	          j        ||                    dd                    }	|	|	                    dd	                                          z
  }	|	j        \  }
}}}t	          j        ||                              |j                  }|d
d
d
d
d
d
f         }|d
d
d
d
d
d
f         }||z  }|	                    d|z
                                  d          }	|	                    d          }t	          j        ||          }|
                    dddd          }|                    |	                    d          |	                    d          d          }|                     |          S )z
        Args:
            x (torch.Tensor): image features
                shape (b, n1, D)
            latent (torch.Tensor): latent features
                shape (b, n2, D);  n2: num of latent tokens
        r   r)   r|   r@   r   r%   r	   Tr*   keepdimNg     )rz  r{  r   r|  r-   r~   r}  r}   r   rq   permutery  r   r   amaxdetachrc   rf  re   rp   r`   boolr   r   r~  )rZ   r   latentsr3   hr   kv_inputr   vsimbsnhskdokdrf  mask_expones_expattnouts                      r5   rm   z)EvollaSequenceCompressorAttention.forward  s    OOA##G,,JIIg9a\r222zz(##))2 * 
 
1 FF166!99affQiiB//771aCCFF166!99affQiiB//771aCCFF166!99affQiiB//771aCC
N l1akk"b1122CHHTH2299;;;9BSz"c""%%dk224qqq()aaaD()("ooq4xoo//66{{r{""l4##kk!Q1%% kk#((1++sxx{{B77{{3r7   )ru  rv  r   rz   s   @r5   rt  rt    sL        < < < < < <)  )  )  )  )  )  ) r7   rt  c                   &     e Zd Zd fd	Zd Z xZS )EvollaFeedForward   c                 >   t                                                       t          ||z            }t          j        |          | _        t          j        ||d          | _        t          j                    | _	        t          j        ||d          | _
        d S NFrx  )rC   rD   r,   r   rK   normr   fc1GELUr:  fc2)rZ   r*   multr  r\   s       r5   rD   zEvollaFeedForward.__init__  sz    d
OO	L%%	9S)%888'))9Y%888r7   c           	          |                      |                     |                     |                     |                                        S r   )r  r:  r  r  )rZ   r   s     r5   rm   zEvollaFeedForward.forward  s6    xx1(>(>??@@@r7   )r  r   rz   s   @r5   r  r    sS        9 9 9 9 9 9A A A A A A Ar7   r  c                   *     e Zd Zdef fdZd Z xZS )!EvollaSequenceCompressorResamplerr[   c           
      p   t                                                       |j        j        }|j        | _        t          j        t          j	        | j        |          d          | _
        t          j        g           | _        t          |j                  D ]^}| j                            t          j        t!          ||j        |j                  t'          ||j                  g                     _t          j        |j                  | _        t          j        ||j                  | _        d S )NT)requires_grad)r*   r  r   )r*   r  )rC   rD   protein_encoder_configrG   resampler_num_latentsnum_latentsr   	Parameterr-   randnr  r*  layersr+  resampler_depthappendrt  resampler_dim_headresampler_headsr  resampler_ff_multrK   r  r   protein_projector)rZ   r[   protein_repr_dimr  r\   s       r5   rD   z*EvollaSequenceCompressorResampler.__init__   s   !8D!7|EK0@BR$S$ScghhhmB''v-.. 
	 
	AK9 06;T\b\r   *.>VE]^^^	 	 	 	 	 L!344	!#+;V=O!P!Pr7   c                 N   |j         d         }|j         \  }}t          j        || j                                      |j                  }t          j        ||fd          }t          j        |                              | j        j                  }| j        d          |                    ddd          z  }|                    |j	                  }| j
        D ]#\  }	}
 |	|||          |z   } |
|          |z   }$|                     |          }|                     |          S )Nr   r%   r)   r@   )rc   r-   rf  r  re   rp   r~   r  r   rf   r  r  r  )rZ   embedsr3   br  r  latent_maskrf  r  r  fftransformed_features               r5   rm   z)EvollaSequenceCompressorResampler.forward  s   LO
AjT%56699$+FFy$,!444 z!}} 344,t$tyyQ':'::**V\** 	, 	,HD"d67D11G;GbkkG+GG"44W==yy,---r7   )ru   rv   rw   r&   rD   rm   ry   rz   s   @r5   r  r    sZ        Q| Q Q Q Q Q Q*. . . . . . .r7   r  c                       e Zd ZU dZeej                 ed<   dZeej                 ed<   dZ	ee
ej        df                  ed<   dZee
ej        df                  ed<   dS )EvollaProteinEncoderModelOutputNsequence_compressor_outputr1  .r   rA  )ru   rv   rw   r  r   r-   r   r   r1  r   r   rA  r'  r7   r5   r  r  )  s          ?C): ;BBB59x 12999=AM8E%"3S"89:AAA:>Ju0#567>>>>>r7   r  c                   X     e Zd Zdef fdZedej        dej        fd            Z	 xZ
S )EvollaProteinEncoderr[   c                     t                                                       t          |j                  | _        t          |          | _        d S )Nr[   )rC   rD   rV  r  modelr  sequence_compressor_resamplerrY   s     r5   rD   zEvollaProteinEncoder.__init__3  sH    /v7TUUU
-NV\-]-]-]***r7   r1   rg   c                     |                      ||          }|j        }|                     ||          }t          ||j                  S )Nre  )r  r1  )r  r1  r  r  )rZ   r1   rg   r   protein_outputprotein_embedssequence_reprs          r5   rm   zEvollaProteinEncoder.forward8  sT    iWW'9::>>ZZ.'4,>
 
 
 	
r7   )ru   rv   rw   r&   rD   r!   r-   
LongTensorr   rm   ry   rz   s   @r5   r  r  2  s        ^| ^ ^ ^ ^ ^ ^
 
!1 
5CT 
 
 
 
 
 
 
 
r7   r  c                        e Zd Z	 	 	 ddee         dee         dee         f fdZd Z eddd	
          	 	 	 	 	 	 	 dd            Z xZ	S )#EvollaSequenceAlignerCrossAttentionNprotein_encoder_dimstructure_encoder_dimmsa_encoder_dimc                    t                                                       |j        | _        |j        | _        | j        dz  | _        t          | j        | j        z            | _        | j        | j        z  | _        |j        }|j	        }|j
        }t          j        | j        | j                  | _        |?t          j        || j                  | _        t          j        || j                  | _        nd | _        d | _        |?t          j        || j                  | _        t          j        || j                  | _        nd | _        d | _        |?t          j        || j                  | _        t          j        || j                  | _        nd | _        d | _        t)          | j                  | _        t          j        |          | _        t          j        | j        | j        |          | _        t3          | j        |          | _        t          j        t9          j        dg                    | _        t          j        t9          j        dg                    | _        d S )Nr   rx  r^   ) rC   rD   rG   r   ry  r,   r   r   $aligner_attention_probs_dropout_probaligner_enable_biasaligner_ffn_multr   r   r   key_proteinvalue_proteinkey_structurevalue_structurekey_msa	value_msaEvollaRMSNormattention_normrN   rP   out_projr  r  r  r-   tensorgate_attentiongate_ffw)	rZ   r[   r  r  r  r   enable_biasffn_multr\   s	           r5   rD   z,EvollaSequenceAlignerCrossAttention.__init__E  s
    	!-#)#= -t3
#&t'7$:R'R#S#S !58PP'-'R$0*Yt/1CDD
*!y)<d>PQQD!#+>@R!S!SD#D!%D ,!#+@$BT!U!UD#%9-BDDV#W#WD  !%D#'D &9_d6HIIDLY8JKKDNNDL!DN+D,<==z">??	$"2D4D;WWW#D$4h?? l5<+>+>??U\3%%8%899r7   c	                    |||g}	d |	D             }	|	st          d          t          j        |	d          }	|                     |          }
|                     |
          }
| j        G| j        @|                    |          }|                     |          }|                     |          }nd}d}| j        G| j	        @|                    |          }|                     |          }| 	                    |          }nd}d}| j
        G| j        @|                    |          }| 
                    |          }|                     |          }nd}d}|||g}d |D             }t          j        |d          }|||g}d |D             }t          j        |d          }|
                                dd         | j        | j        fz   } |
j        |                     d	d
dd          }
|                                dd         | j        | j        fz   } |j        |                     d	d
dd          }|                                dd         | j        | j        fz   } |j        |                     d	d
dd          }|
| j        z  }
|St          j        |                    d	          |                    d                                        |j                  }|ddddddf         |	ddddddf         z  }t          j        |
|                    dd                    }||                    dd                                          z
  }|                    d|z
                                  t          j        |j                  j                  } t;          j        d          |          }t          j        ||          }|                    d	d
dd                                          }|                                dd         | j         fz   } |j        | }| !                    |          }|S )z
        query_states: text
        key_value_states: protein
        query_states: [bs, query_seq_len, dim]
        key_value_states: [bs, kv_seq_len, dim]
        query_attn_mask: [bs, query_seq_len]
        kv_attn_mask: [bs, kv_seq_len]
        c                     g | ]}||S r   r'  r(  r  s     r5   r)  zGEvollaSequenceAlignerCrossAttention.cross_attention.<locals>.<listcomp>  s    AAAa1====r7   z=At least one modality should be provided for cross attention.r%   r)   Nc                     g | ]}||S r   r'  r  s     r5   r)  zGEvollaSequenceAlignerCrossAttention.cross_attention.<locals>.<listcomp>  s    ;;;1Q]Q]]]r7   c                     g | ]}||S r   r'  r  s     r5   r)  zGEvollaSequenceAlignerCrossAttention.cross_attention.<locals>.<listcomp>  s    ???Qqr7   r@   r   r|   r	   r   Tr  )"r   r-   r~   r  r   r  r  re   r  r  r  r  rq   r   r   r   r  ry  rf  rp   r   r   r  r  r`   r  rp  rf   rq  r   Softmaxr   r   r  )rZ   query_statesprotein_key_value_statesstructure_key_value_statesmsa_key_value_statesquery_attn_maskprotein_kv_attn_maskstructure_kv_attn_maskmsa_kv_attn_maskkv_attn_maskr   key_layer_proteinvalue_layer_proteinkey_layer_structurevalue_layer_structurekey_layer_msavalue_layer_msar   r   new_query_layer_shapenew_key_layer_shapenew_value_layer_shaperg   r   attention_scoresattention_probscontext_layernew_context_layer_shapes                               r5   cross_attentionz3EvollaSequenceAlignerCrossAttention.cross_attentionx  s   * -.DFVWAA<AAA 	^\]]]y1555)),77 jj--'D,>,J'?'B'B<'P'P$ $ 0 01I J J"&"4"45M"N"N $"&)d.B.N)C)F)F|)T)T&"&"4"45O"P"P$($8$89S$T$T!!"&$(!<#(B#7#:#:<#H#H  LL)=>>M"nn-ABBOO M"O&(;]K	;;	;;;	IiQ///	*,A?S??+???i333 + 0 0 2 23B3 7$$;
 !
 'k&(=>FFq!QPQRR'nn..ss3$$7
 
 #IN$78@@Aq!LL	 + 0 0 2 23B3 7$$;
 !
 'k&(=>FFq!QPQRR!DJ. "#j):):1)=)=|?P?PQR?S?STTWWXdXkllO(D!!!T)9:\!!!TSWYZYZYZJZ=[[|K1D1DR1L1LMM#l&7&7B&7&M&M&T&T&V&VV'33%%''\5G)H)H)L
 
 -"*,,,-=>> _kBB%--aAq99DDFF"/"4"4"6"6ss";t?Q>S"S**,CDm44r7   past_key_valuepast_key_values4.58new_nameversionc           
      8   |q|j         \  }}}|ct          j        ||                              |	j                  |	                    ||f          j        z                      |j                  }nd }|q|j         \  }}}|ct          j        ||                              |	j                  |
                    ||f          j        z                      |j                  }nd }|q|j         \  }}}|ct          j        ||                              |	j                  |                    ||f          j        z                      |j                  }nd }|}||                                s,||                                s||                                rv|}|                     ||||||||          }t          j	        | j
                  |z  }||z   }|}|                     |          t          j	        | j                  z  }||z   }|S )N)rq   )r  r  r  r  r  r  r  r  )rc   r-   rf  re   rp   rU   Tanyr  tanhr  r  r  )rZ   r  protein_kv_statesstructure_kv_statesmsa_kv_statesr  r  r  r  protein_batch_maskstructure_batch_maskmsa_batch_maskr  r  protein_kv_seq_lenr*   structure_kv_seq_lenmsa_kv_seq_lenr   residuals                       r5   rm   z+EvollaSequenceAlignerCrossAttention.forward  s`     (*;*A'B"C#+Jr#56699:L:STT(//6H"5M/NNPQ"&-.. %
 $( *,?,E)B$c%-Jr#788;;<N<UVV*118Lb7Q1RRTU"(/00 '
 &*"$&3&9#B'Jr>22556H6OPP$++."1E+FFHI"])** !
  $$ */C/G/G/I/I*#/4J4N4N4P4P/).>.B.B.D.D)$H 00*):+>%2 /%9'=!1 1 	 	M "Jt':;;mKM$}4M$H GGM22UZ5N5NNM$}4Mr7   )NNNNNNNNNN)
ru   rv   rw   r   r,   rD   r  r"   rm   ry   rz   s   @r5   r  r  D  s         .2/3)-1: 1: &c]1:  (}	1:
 "#1: 1: 1: 1: 1: 1:fn n n` _%0A6RRR "#!G G G SRG G G G Gr7   r  RMSNormc                   ,     e Zd Zd fd	Zd Zd Z xZS )r  ư>c                     t                                                       t          j        t	          j        |                    | _        || _        dS )z<
        EvollaRMSNorm is equivalent to T5LayerNorm
        N)rC   rD   r   r  r-   rf  rI  variance_epsilon)rZ   rG   r<   r\   s      r5   rD   zEvollaRMSNorm.__init__5  sD     	l5:k#:#:;; #r7   c                    |j         }|                    t          j                  }|                    d                              dd          }|t          j        || j        z             z  }| j        |                    |          z  S )Nr|   r@   T)r  )	rf   re   r-   r   powrE  rsqrtr  rI  )rZ   r   input_dtypevariances       r5   rm   zEvollaRMSNorm.forward=  s|    #)%((77 $$Q'',,R,>>%Ht?T4T(U(UU{]--k::::r7   c                 H    t          | j        j                   d| j         S )Nz, eps=)r   rI  rc   r  r[  s    r5   
extra_reprzEvollaRMSNorm.extra_reprD  s&    )**II$2GIIIr7   )r  )ru   rv   rw   rD   rm   r  ry   rz   s   @r5   r  r  3  sb        $ $ $ $ $ $; ; ;J J J J J J Jr7   r  c                   |     e Zd ZU ej        ed<   ddef fdZ ej                    e	d                         Z
 xZS )EvollaRotaryEmbeddingr   Nr[   c                    t                                                       t          |d          rSt          |j        t
                    r9|j                            d|j                            d                    | _        nd| _        |j        | _	        |j        | _
        || _        t          | j                 | _        |                     | j        |          \  }| _        |                     d|d           | j        | _        d S )Nrope_scaling	rope_typetypedefaultr   FrA   )rC   rD   r   rH  r  dictgetr  rT   max_seq_len_cachedoriginal_max_seq_lenr[   r   rope_init_fnattention_scalingrR   r   original_inv_freq)rZ   r[   rp   r   r\   s       r5   rD   zEvollaRotaryEmbedding.__init__K  s    6>** 	'z&:Mt/T/T 	'#044[&BUBYBYZ`BaBabbDNN&DN"("@$*$B!/?+/+<+<T[&+Q+Q($(ZeDDD!%r7   c                 X   | j         d d d d f                                                             |j        d         dd                              |j                  }|d d d d d f                                         }t          |j        j        t                    r|j        j        dk    r|j        j        nd}t          j
        |d          5  |                                |                                z                      dd          }t          j        ||fd	          }|                                | j        z  }|                                | j        z  }	d d d            n# 1 swxY w Y   |                    |j        
          |	                    |j        
          fS )Nr   r@   r%   mpscpuF)device_typeenabledr|   r)   r   )r   rd   rU   rc   re   rp   rH  r  strr-   autocastr   r~   r   r&  r   rf   )
rZ   r   r?   inv_freq_expandedposition_ids_expandedr+  r   r   r   r   s
             r5   rm   zEvollaRotaryEmbedding.forward\  s    !M$4-8>>@@GGHZ[\H]_acdeehhijiqrr ,QQQaaaZ 8 > > @ @'1!(-'E'Ek!(-[`J`J`ahmmfk^UCCC 	5 	5&,,..1F1L1L1N1NNYYZ[]^__E)UEN333C''))d44C''))d44C		5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 vvAGv$$cff17f&;&;;;s   BE++E/2E/r   )ru   rv   rw   r-   r   r   r&   rD   no_gradr   rm   ry   rz   s   @r5   r  r  H  s         l/ /| / / / / / /" U]__< <  _< < < < <r7   r  c                   $     e Zd Z fdZd Z xZS )	EvollaMLPc                    t                                                       || _        |j        | _        |j        | _        t          j        | j        | j        |j                  | _        t          j        | j        | j        |j                  | _	        t          j        | j        | j        |j                  | _
        t          |j                 | _        d S )Nrx  )rC   rD   r[   rG   r
  r   r   mlp_bias	gate_projup_proj	down_projr
   
hidden_actact_fnrY   s     r5   rD   zEvollaMLP.__init__m  s    !-!'!94#3T5KRXRabbby!143IPVP_```4#94;KRXRabbbV./r7   c                     |                      |                     |                     |                    |                     |          z            }|S r   )r8  r:  r6  r7  )rZ   r   r8  s      r5   rm   zEvollaMLP.forwardw  sA    NN4;;t~~a/@/@#A#ADLLQROO#STT	r7   r   rz   s   @r5   r3  r3  l  sG        0 0 0 0 0      r7   r3  c                     | dd| j         d         dz  f         }| d| j         d         dz  df         }t          j        | |fd          S )z*Rotates half the hidden dims of the input..Nr@   r|   r)   )rc   r-   r~   r   s      r5   rotate_halfr=  |  s]    	
3"!'"+"""	#B	
3q """	#B9rc2YB''''r7   c                     |                     |          }|                     |          }| |z  t          |           |z  z   }||z  t          |          |z  z   }||fS )a  Applies Rotary Position Embedding to the query and key tensors.

    Args:
        q (`torch.Tensor`): The query tensor.
        k (`torch.Tensor`): The key tensor.
        cos (`torch.Tensor`): The cosine part of the rotary embedding.
        sin (`torch.Tensor`): The sine part of the rotary embedding.
        position_ids (`torch.Tensor`, *optional*):
            Deprecated and unused.
        unsqueeze_dim (`int`, *optional*, defaults to 1):
            The 'unsqueeze_dim' argument specifies the dimension along which to unsqueeze cos[position_ids] and
            sin[position_ids] so that they can be properly broadcasted to the dimensions of q and k. For example, note
            that cos[position_ids] and sin[position_ids] have the shape [batch_size, seq_len, head_dim]. Then, if q and
            k have the shape [batch_size, heads, seq_len, head_dim], then setting unsqueeze_dim=1 makes
            cos[position_ids] and sin[position_ids] broadcastable to the shapes of q and k. Similarly, if q and k have
            the shape [batch_size, seq_len, heads, head_dim], then set unsqueeze_dim=2.
    Returns:
        `tuple(torch.Tensor)` comprising of the query and key tensors rotated using the Rotary Position Embedding.
    )ra   r=  )r   r   r   r   r?   unsqueeze_dimq_embedk_embeds           r5   apply_rotary_pos_embrB    sc    ( --
&
&C
--
&
&C3w;q>>C/0G3w;q>>C/0GGr7   r   n_repr   c                     | j         \  }}}}|dk    r| S | dddddddddf                             |||||          } |                     |||z  ||          S )z
    This is the equivalent of torch.repeat_interleave(x, dim=1, repeats=n_rep). The hidden states go from (batch,
    num_key_value_heads, seqlen, head_dim) to (batch, num_attention_heads, seqlen, head_dim)
    r%   N)rc   rU   r   )r   rC  batchnum_key_value_headsslenhead_dims         r5   	repeat_kvrI    s    
 2?1D.Ehzz!!!!QQQaaa"23::5BUW\^bdlmmM  (;e(CT8TTTr7   c                       e Zd ZdZdedef fdZ eddd          	 	 dd
ej	        de
ej	        ej	        f         deej	                 dee         deej                 dee         de
ej	        ej	        f         fd            Z xZS )EvollaAttentionz=Multi-headed attention from 'Attention Is All You Need' paperr[   r   c                    t                                                       || _        || _        t	          |d|j        |j        z            | _        |j        |j        z  | _	        | j        dz  | _
        |j        | _        d| _        t          j        |j        |j        | j        z  |j                  | _        t          j        |j        |j        | j        z  |j                  | _        t          j        |j        |j        | j        z  |j                  | _        t          j        |j        | j        z  |j        |j                  | _        d S )NrH  r   Trx  )rC   rD   r[   r   rQ   rG   r   rH  rF  num_key_value_groupsr   attention_dropoutr   r   r   attention_biasq_projk_projv_projo_projrZ   r[   r   r\   s      r5   rD   zEvollaAttention.__init__  sB   "
F4F&Jd4dee$*$>&B\$\!}d*!'!9i :T] JQWQf
 
 
 i :T] JQWQf
 
 
 i :T] JQWQf
 
 
 i&68JQWQf
 
 
r7   r  r  r  r  Nr   rV   rg   cache_positionr   r   c                 D   |j         d d         }g |d| j        R }|                     |                              |                              dd          }	|                     |                              |                              dd          }
|                     |                              |                              dd          }|\  }}t          |	|
||          \  }	}
|&|||d}|                    |
|| j	        |          \  }
}t          }| j        j        dk    rt          | j        j                 } || |	|
||f| j        sdn| j        | j        d|\  }} |j        g |dR                                  }|                     |          }||fS )Nr@   r%   r|   )r   r   rU  r   r^   )rP   r   )rc   rH  rP  r   r   rQ  rR  rB  updater   r   r[   r   r   r   rN  r   r   r   rS  )rZ   r   rV   rg   r  rU  r   rr   r   r  
key_statesvalue_statesr   r   cache_kwargsr   r   r   s                     r5   rm   zEvollaAttention.forward  s    $)#2#.88b8$-88{{=1166|DDNNqRSTT[[//44\BBLLQPQRR
{{=1166|DDNNqRSTT&S#7jRUWZ#[#[ j&#&snUUL'6'='=j,X\Xfht'u'u$J(?;+w66"9$+:Z"[$7$7	%
  $}HCC$2HL	%
 	%
 	%
 	%
!\ *k);;;;;;FFHHkk+..L((r7   rr  )ru   rv   rw   rx   r&   r,   rD   r"   r-   r   r   r   r   r  r   r   rm   ry   rz   s   @r5   rK  rK    s       GG
| 
 
 
 
 
 
 
. _%0A6RRR ,059)) ))|)) #5<#=>)) !.	))
 "%)) !!12)) +,)) 
u|U\)	*)) )) )) SR)) )) )) )) ))r7   rK  c                        e Zd Zdedef fdZ eddd          	 	 	 	 	 	 	 	 	 	 	 	 dd
ej        de	ej        ej        f         de
ej                 de
ej                 de
e         de
e         de
ej                 de
ej                 de
ej                 de
ej                 de
ej                 de
ej                 de
ej                 de
ej                 dej        fd            Z xZS )EvollaDecoderLayerr[   r   c                    t                                                       |j        | _        t          ||          | _        t          |          | _        t          |j        |j                  | _	        t          |j        |j                  | _
        |dz   t          |j        |j        z  d          z  dk    rt          ||j                  | _        d S d S )Nr[   r   r;   r%   r   )r  )rC   rD   rG   rK  	self_attnr3  mlpr  rms_norm_epsinput_layernormpost_attention_layernormmaxr,  aligner_num_add_layersr  adapterrT  s      r5   rD   zEvollaDecoderLayer.__init__  s    !-()LLLV$$,V-?VEXYYY(5f6HfNa(b(b(b%MS!9V=Z!Z\]^^^bccc>$*$6  DLLL dcr7   r  r  r  r  NFr   rV   rg   r?   	use_cacherU  r  r  r  r  r  r  r  r   c                 *   |}|                      |          } | j        d|||||||d|\  }}||z   }|}|                     |          }|                     |          }||z   }t	          | d          r|                     |||	|
||||          }|S )N)r   rg   r?   r  rg  rU  rV   rf  )r  r  r  r  r  r  r  r  r'  )rb  r_  rc  r`  r   rf  )rZ   r   rV   rg   r?   r  rg  rU  r  r  r  r  r  r  r  r   r  r  s                     r5   rm   zEvollaDecoderLayer.forward  s    & !,,];; *4> 	
')%+) 3	
 	
 	
 	
q !=0 !55mDD// =04## 
	 LL*"3$7+ /#5%9- ) 	 	M r7   )NNNFNNNNNNNN)ru   rv   rw   r&   r,   rD   r"   r-   r   r   r   r  r   r  rm   ry   rz   s   @r5   r\  r\    s       |        _%0A6RRR
 2637+/$)59486:04597;15265 5|5 #5<#=>5 !.	5
 u/05 "%5 D>5 !!125 $EL15 &el35  -5 %U\25 'u|45 !.5 "%,/5" 
#5 5 5 SR5 5 5 5 5r7   r\  c                   ^     e Zd ZU eed<   dZdZg dZdgZdZ	dZ
dZdZdZeedZ fdZ xZS )	EvollaPreTrainedModelr[   r  T)r\  r  r  r  F)r   rA  c                    | j         j        }t                                          |           t	          |t
                    rX|j                                         |j                                         |j	        j
        j                            d           d S t	          |t                    r#|j        j                            d|           d S d S )Nr   r^   rD  )r[   rG  rC   rO  rH  r  r  rM  r  r  rI  rJ  rN  r  r  rK  )rZ   r   rF  r\   s      r5   rO  z#EvollaPreTrainedModel._init_weightsP  s    k+f%%%fABB 	;!'')))O!!###!(-33C88888 ABB 	;N''Sc':::::	; 	;r7   )ru   rv   rw   r&   r   base_model_prefixsupports_gradient_checkpointingrP  _skip_keys_device_placementrQ  rR  _supports_flex_attn_can_compile_fullgraphrS  r\  rK  rT  rO  ry   rz   s   @r5   rj  rj  :  s         &*#  
 $5"5 N!"'+% 
; ; ; ; ; ; ; ; ;r7   rj  c            !           e Zd Zdef fdZd Zd Zee	 	 	 	 	 	 	 	 	 	 	 	 	 dde	e
j                 de	e
j                 de	e
j                 d	e	e         d
e	e
j                 de	e         de	e
j                 de	e
j                 de	e
j                 de	e
j                 de	e
j                 de	e
j                 de	e
j                 deeef         fd                        Z xZS )EvollaModelr[   c                 &   t                                                     j        | _        j        | _        t          j        | j        j        | j                  | _        t                    | _
        t          j        fdt          j                  D                       | _        t          j        j                  | _        t%                    | _        t)          dd          | _        |                                  d S )Nr  c                 2    g | ]}t          |           S )r^  )r\  )r(  r   r[   s     r5   r)  z(EvollaModel.__init__.<locals>.<listcomp>c  s@       
 	 #!'    r7   r;   r/  F)rC   rD   rH   r2   rF   r   rE   rG   embed_tokensr  protein_encoderr*  r+  r,  r  r  ra  r  r  
rotary_embrQ   r/  	post_initrY   s    `r5   rD   zEvollaModel.__init__\  s      !. +L&:LdN^__36BBBm   
 "'v'?!@!@  
 
 "&"4&:MNNN	/v>>>&-f6NPU&V&V#r7   c                     | j         S r   ru  r[  s    r5   r\  z EvollaModel.get_input_embeddingsq  s      r7   c                     || _         d S r   rz  r^  s     r5   r_  z EvollaModel.set_input_embeddingst  s    !r7   Nr1   rg   r?   r  rh   rg  rU  protein_input_idsprotein_attention_maskstructure_feats	msa_featsr  r  r   c                    |du |duz  rt          d          ||                     |          }|r|t          | j                  }|B||                                nd}t          j        |||j        d         z   |j                  }||	                    d          }d}d}|J|	H| 
                    ||	          }|j        }t          j        dg|j        d         z  |j                  }t          | j        ||||	          }|}|                     ||          }| j        D ]} ||f||||||||
|||||d
|}|                     |          }t#          ||          }|S )a;  
        protein_input_ids (torch.LongTensor):
            The input IDs for the protein sequence in structure-aware tokens. Should be of shape `(batch_size, protein_seq_length)` and type `torch.LongTensor`.
        protein_attention_mask (torch.Tensor):
            The attention mask for the protein sequence. Should be of shape `(batch_size, protein_seq_length)` and type `torch.Tensor`.
        structure_feats (torch.FloatTensor):
            The input IDs for purely structure-based features. Should be of shape `(batch_size, structure_seq_length, structure_feat_dim)` and type `torch.FloatTensor`. Dummy input for now.
        msa_feats (torch.FloatTensor):
            The input IDs for purely MSA-based features. Should be of shape `(batch_size, msa_seq_length, msa_feat_dim)` and type `torch.FloatTensor`. Dummy input for now.
        structure_batch_mask (torch.Tensor):
            The batch mask to decide which protein sequences are purely structure-based. Should be of shape `(batch_size)` and type `torch.Tensor`. Should be paired with `structure_feats`. Dummpy input for now.
        msa_batch_mask (torch.Tensor):
            The batch mask to decide which protein sequences are purely MSA-based. Should be of shape `(batch_size)` and type `torch.Tensor`. Should be paired with `msa_feats`. Dummpy input for now.
        Nz:You must specify exactly one of input_ids or inputs_embedsr  r   r%   r   re  T)r[   input_embedsrg   rU  r  )rg   r?   r  rg  rU  rV   r  r  r  r  r  r  r  )r1  r  )r   ru  r   r[   get_seq_lengthr-   rS   rc   rp   ra   rv  r  r  r   rw  r  r  r   )rZ   r1   rg   r?   r  rh   rg  rU  r|  r}  r~  r  r  r  r   past_seen_tokensprotein_featsr  protein_outputsr   r   rV   decoder_layerr   s                           r5   rm   zEvollaModel.forwardw  s   B -t";< 	[YZZZ  --i88M 	?0*$+>>>O!CRC^==???de"\ "2]5H5K"KTaTh  N )33A66L!(-C-O"22+5 3  O ,FM!&tf7H7Nq7Q.QZkZr!s!s!s(;&))+
 
 
 & #oom\JJ![ 	 	M)M*) /#-$7"/$3'#5%9- .   MM$ 		-00(++
 
 
 r7   )NNNNNNNNNNNNN)ru   rv   rw   r&   rD   r\  r_  r    r$   r   r-   r  r   r   r   r  r   r   r   rm   ry   rz   s   @r5   rr  rr  [  s       |      *! ! !" " "  151537+/59$(598<9=7;157;15b bE,-b !.b u/0	b
 "%b   12b D>b !!12b $E$45b !) 6b "%"34b E-.b 'u|4b !.b  
u--	.!b b b  ^b b b b br7   rr  c                       e Zd Z fdZd Zd Zee	 	 	 	 	 	 	 ddee	j
                 dee	j                 dee	j                 dee	j
                 d	ee	j
                 d
ee	j                 dee         fd                        Z xZS )EvollaForProteinText2Textc                     t                                          |           t          |          | _        |j        | _        t          j        |j        | j        d          | _        | 	                                 d S r  )
rC   rD   rr  r  rF   r   r   rG   lm_headrx  rY   s     r5   rD   z"EvollaForProteinText2Text.__init__  sg        ((
 +y!3T_5QQQr7   c                 4    | j                                         S r   )r  r\  r[  s    r5   r\  z.EvollaForProteinText2Text.get_input_embeddings  s    z..000r7   c                 6    | j                             |          S r   )r  r_  r^  s     r5   r_  z.EvollaForProteinText2Text.set_input_embeddings  s    z..u555r7   Nr1   rg   rh   labelsr|  r}  rg  c           
           | j         d||||||d|}	|	d         }
|                     |
          }d}| | j        d||| j        d|}t	          |||	j        |	j        |	j                  }|S )a,  
        protein_input_ids (torch.LongTensor):
            The input IDs for the protein sequence. Should be of shape `(batch_size, protein_seq_length)` and type `torch.LongTensor`.
        protein_attention_mask (torch.Tensor):
            The attention mask for the protein sequence. Should be of shape `(batch_size, protein_seq_length)` and type `torch.Tensor`.

        Example:

        ```python
        >>> from transformers import EvollaProcessor, EvollaForProteinText2Text
        >>> model = EvollaForProteinText2Text.from_pretrained("westlake/Evolla-10B-hf")
        >>> processor = EvollaProcessor.from_pretrained("westlake/Evolla-10B-hf")

        >>> protein_information = {
            "aa_seq": "your amino acid sequence",
            "foldseek": "your foldseek sequence",
        }
        >>> question = "What is the function of this protein?"
        >>> message = [
            {"role": "system", "content": "You are an AI expert that can answer any questions about protein."},
            {"role": "user", "content": question},
        ]

        >>> inputs = processor(proteins=[protein_information], messages_list=[message], return_tensors="pt", padding="longest")
        >>> outputs = model.generate(**inputs)

        >>> print(processor.batch_decode(outputs, skip_special_tokens=True))
        ```)r1   rg   rh   r|  r}  rg  r   N)logitsr  rF   )lossr  r  r   rA  r'  )r  r  loss_functionrF   r   r  r   rA  )rZ   r1   rg   rh   r  r|  r}  rg  r   outputsr   r  r  
lm_outputss                 r5   rm   z!EvollaForProteinText2Text.forward  s    T $* 
)'/#9
 
 
 
  
m,,%4%iVFtiibhiiD+#3!/)
 
 

 r7   r  )ru   rv   rw   rD   r\  r_  r!   r    r   r-   r  r   r   r  rm   ry   rz   s   @r5   r  r    s           1 1 16 6 6  151559-18<9=$(? ?E,-? !.?   12	?
 )*? $E$45? !) 6? D>? ? ? ^ ? ? ? ? ?r7   r  )r  rr  rj  )r^   N)Nr%   )]r  rl  dataclassesr   typingr   r   r   r-   r   r   activationsr
   cache_utilsr   r   
generationr   integrationsr   masking_utilsr   modeling_layersr   modeling_outputsr   r   r   r   r   modeling_rope_utilsr   r   modeling_utilsr   r   r   r   processing_utilsr   pytorch_utilsr   r   utilsr   r    r!   utils.deprecationr"   utils.genericr#   r$   configuration_evollar&   r'   r6   Moduler9   r   r   r   rd   r   r   r   r   r  r  r  r  r$  r7  r?  rV  rt  r  r  r  r  r  r  r  r3  r=  rB  r,   rI  rK  r\  rj  rr  r  __all__r'  r7   r5   <module>r     sA  ,   ! ! ! ! ! ! , , , , , , , , , ,          ! ! ! ! ! ! . . . . . . . . ) ) ) ) ) ) 7 7 7 7 7 7 / / / / / / 9 9 9 9 9 9              L K K K K K K K m m m m m m m m m m m m & & & & & & Q Q Q Q Q Q Q Q I I I I I I I I I I 0 0 0 0 0 0 ? ? ? ? ? ? ? ? < < < < < < < <4 4 4 ^= ^= ^= ^= ^=RY ^= ^= ^=B( ( (
2 2 2)
 )
 )
 )
 )
") )
 )
 )
f (,/% /%I/%</% 
/% <	/%
 U\*/% /% /% %/% '(/% /% /% /%dV) V) V) V) V)	 V) V) V)r
 
 
 
 
RY 
 
 
- - - - -BI - - -`; ; ;    ry   
 
 
 
 
 
 
 
7 7 7 7 72 7 7 7t S  S  S  S  S")  S  S  SF        * * * * */ * * *>c' c' c' c' c'!< c' c' c'L7  7  7  7  7 	 7  7  7 tA A A A A	 A A A'. '. '. '. '.	 '. '. '.T ? ? ? ? ?k ? ?  ?
 
 
 
 
29 
 
 
$l l l l l") l l l^ Y''J J J J JBI J J ('J(!< !< !< !< !<BI !< !< !<H    	    ( ( (   6	UU\ 	U# 	U%, 	U 	U 	U 	UD) D) D) D) D)bi D) D) D)NF F F F F3 F F FR ; ; ; ; ;O ; ; ;@@ @ @ @ @' @ @ @FP P P P P 5 P P Pf P
O
Or7   