
     `i,                        d Z ddlZddlZddlZddlmZmZmZ ddlm	Z	m
Z
 ddlZddlmZ eeej        f         Zeee	f         ZdZ ej        d	           G d
 d                      ZdedefdZddededee         fdZdededefdZdedefdZdedej        fdZ	 	 	 	 	 ddedede
ej                 de
ej                 de
e         de
ee                  de
ee                  defdZdS ) zProtein data type.    N)IteratorMappingSequence)AnyOptional   )residue_constantsg{Gz?T)frozenc                      e Zd ZU dZej        ed<   ej        ed<   ej        ed<   ej        ed<   ej        ed<   dZeej                 ed<   dZ	ee
         ed	<   dZeee
                  ed
<   dZeee                  ed<   dS )Proteinz!Protein structure representation.atom_positionsaatype	atom_maskresidue_index	b_factorsNchain_indexremarkparentsparents_chain_index)__name__
__module____qualname____doc__npndarray__annotations__r   r   r   strr   r   r   int     /home/jaya/work/projects/VOICE-AGENT/VIET/agent-env/lib/python3.11/site-packages/transformers/models/esm/openfold_utils/protein.pyr   r   "   s         ++ J J z :
 z )-K"*%,,, !FHSM    (,GXhsm$+++ 48(3-077777r    r   proteinnet_strreturnc                 j   d}d t          j        ||           D             }t          |dd d         d |dd d         D                       }g d}d }d }d }|D ]}d|d         k    rw|d         d                                         }	t	          t          |	                    D ]}
|	|
         t          j        vrd	|	|
<   t          j	        d
 |	D                       }d|d         k    r+g }t	          d          D ]U}|
                    t          t          t          |d         |                                                                        Vt          j	        |          }t          j        t          |d                   dz  t          j        df                              t          j                  }t%          |          D ]@\  }
}t          j        |d d |
d df                   |d d t          j        |         d d f<   A|t*          z  }d|d         k    rt          j	        t          t          dddj        |d         d                                                                       }t          j        t          |          t          j        f                              t          j                  }t%          |          D ]\  }
}d|d d t          j        |         f<   ||d         z  }|J t/          |||t          j        t          |                    d           S )Nz(\[[A-Z]+\]\n)c                 \    g | ])}t          |          d k    |                                *S r   )lenstrip).0tags     r!   
<listcomp>z*from_proteinnet_string.<locals>.<listcomp>J   s5    ___sRUVYRZRZ]^R^R^syy{{R^R^R^r    r      c                 8    g | ]}|                     d           S )
)split)r)   ls     r!   r+   z*from_proteinnet_string.<locals>.<listcomp>K   s#    >a>a>aQRqwwt}}>a>a>ar    r   )NCACz	[PRIMARY]Xc                 b    g | ],}t           j                            |t           j                  -S r   )r	   restype_ordergetrestype_num)r)   
res_symbols     r!   r+   z*from_proteinnet_string.<locals>.<listcomp>X   s1    vvvdn"044ZARA^__vvvr    z
[TERTIARY]   z[MASK])-+).N)r   r   r   r   r   )rer/   zipr(   ranger'   r	   restypesr   arrayappendlistmapfloatzerosatom_type_numastypefloat32	enumerate	transpose
atom_orderPICO_TO_ANGSTROMr7   r   arange)r"   tag_retagsgroupsatomsr   r   r   gseqitertiaryaxistertiary_npatommasks                   r!   from_proteinnet_stringr[   H   sE   F__bhv~.N.N___D.1$qt!t*>a>aVZ[\[_^_[_V`>a>a>a.b.bF'''EFNI ) )!A$A$q'--//C3s88__ ! !q6!2!;;; CFXvvruvvv FF QqT!!*,Ha F FS!T
0@0@0B0B%C%C D DEEEE(8,,KXs8A;'7'71'<>O>]_`&abbiijljtuuN$U++ n n4KM<XcdededeghgkjkgkdkXlKmKmqqq"3">t"DaaaGHH..NN18D11%5%5%91Q47==??!K!KLLMMDII%3 
 fRZ    %U++ E E4CD	!!!.9$??@@i(I%iF,,   r    protchain_idc                 0   g }| j         }||                    d|            | j        }| j        }||fdt	          ||          D             }|t          |          dk    rdg}|                    dd                    |                      |S )NREMARK c                 &    g | ]\  }}|k    |S r   r   )r)   rU   pr]   s      r!   r+   z#get_pdb_headers.<locals>.<listcomp>   s"    TTTAa8mm1mmmr    r   N/APARENT  )r   rB   r   r   r>   r'   join)r\   r]   pdb_headersr   r   r   s    `    r!   get_pdb_headersrg   z   s    K[F-V--...lG22>TTTT%8'!B!BTTT#g,,!++'4'!2!244555r    pdb_strc                    g }|                     d          }| j        }||                    d|            | j        t	          | j                  dk    rg }| j        i }t          | j        | j                  D ]P\  }}|                    t          |          g            |t          |                                       |           Qt          d |D                       }	t          |	dz             D ];}|                    t          |          dg          }
|                    |
           <n,|                    t          | j                             ndgg}dt          t                   d	t          fd
}|                     ||d                              d}t          |          D ]u\  }}d|vrd|vr|                    |           d|v rOd||dz            vrB|dz  }|t	          |          k    s	||         }
ndg}
|                     ||
                     vd                    |          S )zWAdd pdb headers to an existing PDB string. Useful during multi-chain
    recycling
    r.   Nr_   r   c              3   4   K   | ]}t          |          V  d S )N)r   )r)   	chain_idxs     r!   	<genexpr>z"add_pdb_headers.<locals>.<genexpr>   s(      FFY#i..FFFFFFr    r   rb   ra   r#   c                 2    dd                     |            S )Nrc   rd   )re   )ra   s    r!   make_parent_linez)add_pdb_headers.<locals>.make_parent_line   s    &!&&&r    PARENTREMARKTEREND)r/   r   rB   r   r'   r   r>   
setdefaultr   maxr?   r7   rC   r   rJ   re   )r\   rh   out_pdb_lineslinesr   parents_per_chainparent_dictra   rU   max_idxchain_parentsrn   chain_counterr0   s                 r!   add_pdb_headersr|      s     "MMM$E[F/v//000 |C$5$5$9$9#/02KDL$*BCC . .1&&s1vvr222CFF#**1----FF+FFFFFG7Q;'' 8 8 +A @ @!((77778 $$T$,%7%78888#WI'HSM 'c ' ' ' ' ))*;A*>??@@@M%   
B 
B11!2!2  ###A::%uQU|33QM C(9$:$::: 1- @!&  !1!1-!@!@AAA99]###r    c                   ! t           j        dgz   !dt          dt          f!fd}t           j        }g }| j        }| j        }| j        }| j        	                    t          j                  }| j        }| j        }	t          j        |t           j        k              rt!          d          t#          |           }
t%          |
          dk    r|                    |
           |j        d         }d}d}t*          j        }d}t/          |          D ]} |||                   }t1          |||         ||         ||                   D ]\  }}}}|d	k     rd
}t%          |          dk    r|nd| }d}d}d}|d         }d}d}|	||	|                  }|d|dd|d|d|dd|d||         d|dd|d         d|d         d|d         d|d|dd|d|d}|                    |           |dz  }||dz
  k    }|	%||dz
  k    r|	|dz            |k    rd}|	|dz            }|rpd}|d|dd |||                   dd|d||         d} |                    |            |dz  }||dz
  k    r#|                    t#          | |                     |                    d           |                    d           d                     |          S )!zConverts a `Protein` instance to a PDB string.

    Args:
      prot: The protein to convert to PDB.

    Returns:
      PDB string.
    r4   rr#   c                 P    t           j                            |          d          S )NUNK)r	   restype_1to3r7   )r~   r@   s    r!   res_1to3zto_pdb.<locals>.res_1to3   s      -11(1+uEEEr    zInvalid aatypes.r   r   Ng      ?ATOM   rd    g      ?Az<6z>5z<4z>1z>3z>4z   z>8.3fr,   z>6.2fz
          z>2Trq   z      rr   r.   )r	   r@   r   r   
atom_typesr   r   r   r   rH   r   int32r   r   anyr8   
ValueErrorrg   r'   extendshapestringascii_uppercaser?   r>   rB   re   )"r\   r   r   	pdb_linesr   r   r   r   r   r   headersn
atom_indexprev_chain_index
chain_tags	chain_tagrU   
res_name_3	atom_nameposrZ   b_factorrecord_typenamealt_locinsertion_code	occupancyelementcharge	atom_lineshould_terminate	chain_endchain_termination_liner@   s"                                    @r!   to_pdbr      s    !)SE1HFC FC F F F F F F #-JII[F(N&--bh77MI"K	vf(4455 -+,,,d##G
7||a!!!QAJ'JI1XX 0J 0JXfQi((
.1*nQ>OQZ[\Q]_hij_k.l.l 	 	*IsD(czz K #I! 3 399YDGNIlGFI&&{1~6	 H +:H + +4H +GH +1+ +#,1+ #>+(6>+ + q6=+ "%Q=+ 031v=+ >	+ %->	+ +
 *+
  &*+ +  Y'''!OJJA:"AEzzk!a%04DDD#' #.q1u#5  	JIrrrrrhhvay6I6Irrryrr]jkl]mrrr # 3444!OJAEzz   7G!H!HIIIUR99Yr    c                 0    t           j        | j                 S )ao  Computes an ideal atom mask.

    `Protein.atom_mask` typically is defined according to the atoms that are reported in the PDB. This function
    computes a mask according to heavy atoms that should be present in the given sequence of amino acids.

    Args:
      prot: `Protein` whose fields are `numpy.ndarray` objects.

    Returns:
      An ideal atom mask.
    )r	   STANDARD_ATOM_MASKr   )r\   s    r!   ideal_atom_maskr     s     /<<r    featuresresultr   r   r   r   r   c                     t          | d         |d         |d         | d         dz   ||nt          j        |d                   ||||	  	        S )a  Assembles a protein from a prediction.

    Args:
      features: Dictionary holding model inputs.
      result: Dictionary holding model outputs.
      b_factors: (Optional) B-factors to use for the protein.
      chain_index: (Optional) Chain indices for multi-chain predictions
      remark: (Optional) Remark about the prediction
      parents: (Optional) List of template names
    Returns:
      A protein instance.
    r   final_atom_positionsfinal_atom_maskr   r   N)	r   r   r   r   r   r   r   r   r   )r   r   
zeros_like)r   r   r   r   r   r   r   s          r!   from_predictionr   ,  sj    * !45*+/!3(4))"-O`Ha:b:b/
 
 
 
r    r&   )NNNNN)r   dataclassesr=   r   collections.abcr   r   r   typingr   r   numpyr   r   r	   r   r   FeatureDictModelOutputrM   	dataclassr   r[   r   rC   rg   r|   r   r   r   r   r    r!   <module>r      s[          				  7 7 7 7 7 7 7 7 7 7                           c2:o&c3h  d###"8 "8 "8 "8 "8 "8 "8 $#"8J/3 /7 / / / /d ' S c    (/$' /$C /$C /$ /$ /$ /$dZ  Z S Z  Z  Z  Z z=' =bj = = = =$ '+(, '+37  
# "*%	
 SM hsm$ "(3-0      r    