o
    Zh,                     @   sT  d Z ddlZddlZddlZddlmZmZmZmZm	Z	m
Z
mZmZ ddlZddlmZ e	eejf Ze	eef ZdZejddG d	d
 d
ZdedefddZd#dededee fddZdededefddZdedefddZdedejfddZ					d$dedede
ej de
ej de
e de
ee  d e
ee  defd!d"ZdS )%zProtein data type.    N)AnyDictIteratorListMappingOptionalSequenceTuple   )residue_constantsg{Gz?T)frozenc                   @   s   e Zd ZU dZejed< ejed< ejed< ejed< ejed< dZeej ed< dZ	ee
 ed	< dZeee
  ed
< dZeee  ed< dS )Proteinz!Protein structure representation.atom_positionsaatype	atom_maskresidue_index	b_factorsNchain_indexremarkparentsparents_chain_index)__name__
__module____qualname____doc__npndarray__annotations__r   r   r   strr   r   r   int r    r    ]/var/www/auris/lib/python3.10/site-packages/transformers/models/esm/openfold_utils/protein.pyr   !   s   
 




r   proteinnet_strreturnc              	   C   s&  d}dd t || D }t|dd d dd |dd d D }g d}d }d }d }|D ]}d	|d kr\|d d  }	tt|	D ]}
|	|
 tjvrPd
|	|
< qCt	dd |	D }q-d|d krg }tdD ]}|
ttt|d |   qht	|}tt|d d tjdftj}t|D ]\}
}t|d d |
d df |d d tj| d d f< q|t9 }q-d|d krt	ttdddj|d d  }tt|tjftj}t|D ]\}
}d|d d tj| f< q||d 9 }q-|d usJ t|||tt|d dS )Nz(\[[A-Z]+\]\n)c                 S   s    g | ]}t |d kr| qS r   )lenstrip).0tagr    r    r!   
<listcomp>I   s     z*from_proteinnet_string.<locals>.<listcomp>r      c                 S   s   g | ]}| d qS )
)split)r'   lr    r    r!   r)   J   s    r
   )NCACz	[PRIMARY]Xc                 S   s   g | ]
}t j|t jqS r    )r   Zrestype_ordergetrestype_num)r'   Z
res_symbolr    r    r!   r)   W       z
[TERTIARY]   z[MASK])-+).N)r   r   r   r   r   )rer,   zipr&   ranger%   r   restypesr   arrayappendlistmapfloatZzerosZatom_type_numastypeZfloat32	enumerateZ	transposeZ
atom_orderPICO_TO_ANGSTROMr2   r   Zarange)r"   tag_retagsgroupsZatomsr   r   r   gseqiZtertiaryZaxisZtertiary_npZatommaskr    r    r!   from_proteinnet_stringG   s^   ("
&6
(rK   protchain_idc                    s   g }| j }|d ur|d|  | j}| j}|d ur+|d ur+ fddt||D }|d u s5t|dkr8dg}|dd|  |S )NREMARK c                    s   g | ]
\}}| kr|qS r    r    )r'   rI   prM   r    r!   r)      r4   z#get_pdb_headers.<locals>.<listcomp>r   N/APARENT  )r   r=   r   r   r9   r%   join)rL   rM   Zpdb_headersr   r   r   r    rP   r!   get_pdb_headersy   s   rU   pdb_strc                 C   s  g }| d}| j}|dur|d|  | jdurrt| jdkrrg }| jdurii }t| j| jD ]\}}|t|g  |t| | q2t	dd |D }	t
|	d D ]}|t|dg}
||
 qWn|t| j ndgg}d	tt d
tfdd}|||d  d}t|D ]6\}}d|vrd|vr|| d|v rd||d  vr|d7 }|t|ks|| }
ndg}
|||
 qd|S )zWAdd pdb headers to an existing PDB string. Useful during multi-chain
    recycling
    r+   NrN   r   c                 S   s   g | ]}t |qS r    )r   )r'   Z	chain_idxr    r    r!   r)      s    z#add_pdb_headers.<locals>.<listcomp>r
   rQ   rO   r#   c                 S   s   dd |  S )NrR   rS   )rT   )rO   r    r    r!   make_parent_line   s   z)add_pdb_headers.<locals>.make_parent_lineZPARENTZREMARKTEREND)r,   r   r=   r   r%   r   r9   
setdefaultr   maxr:   r2   r>   r   rB   rT   )rL   rV   Zout_pdb_lineslinesr   Zparents_per_chainZparent_dictrO   rI   Zmax_idxZchain_parentsrW   Zchain_counterr-   r    r    r!   add_pdb_headers   sB   




r]   c           !         s  t jdg  dtdtf fdd}t j}g }| j}| j}| j}| j	t
j}| j}| j}	t
|t jkr8tdt| }
t|
dkrG||
 |jd }d}d}tj}d	}t|D ]}||| }t||| || || D ]v\}}}}|d
k ryqnd}t|dkr|nd| }d}d}d}|d }d}d}|	d	ur||	|  }|d|dd|d|d|dd|d|| d|dd|d d|d d|d d|d|dd|d|d}|| |d7 }qn||d k}|	d	ur||d kr|	|d  |krd}|	|d  }|r>d}|d|dd||| dd|d|| d} ||  |d7 }||d kr>|t| | qY|d  |d d!|S )"zConverts a `Protein` instance to a PDB string.

    Args:
      prot: The protein to convert to PDB.

    Returns:
      PDB string.
    r1   rr#   c                    s   t j |  dS )NZUNK)r   Zrestype_1to3r2   )r^   r;   r    r!   res_1to3   s   zto_pdb.<locals>.res_1to3zInvalid aatypes.r   r
   Ng      ?ATOM   rS    g      ?Az<6z>5z<4z>1z>3z>4z   z>8.3fr*   z>6.2fz
          z>2TrX   z      rY   r+   )r   r;   r   r   
atom_typesr   r   r   r   rA   r   Zint32r   r   anyr3   
ValueErrorrU   r%   extendshapestringascii_uppercaser:   r9   r=   rT   )!rL   r`   re   Z	pdb_linesr   r   r   r   r   r   headersnZ
atom_indexZprev_chain_indexZ
chain_tagsZ	chain_tagrI   Z
res_name_3Z	atom_nameposrJ   Zb_factorZrecord_typenameZalt_locZinsertion_codeZ	occupancyelementZchargeZ	atom_lineZshould_terminateZ	chain_endZchain_termination_liner    r_   r!   to_pdb   s   	

&


 0



rq   c                 C   s   t j| j S )ao  Computes an ideal atom mask.

    `Protein.atom_mask` typically is defined according to the atoms that are reported in the PDB. This function
    computes a mask according to heavy atoms that should be present in the given sequence of amino acids.

    Args:
      prot: `Protein` whose fields are `numpy.ndarray` objects.

    Returns:
      An ideal atom mask.
    )r   ZSTANDARD_ATOM_MASKr   )rL   r    r    r!   ideal_atom_mask  s   rr   featuresresultr   r   r   r   r   c                 C   sD   t | d |d |d | d d |dur|nt|d ||||d	S )a  Assembles a protein from a prediction.

    Args:
      features: Dictionary holding model inputs.
      result: Dictionary holding model outputs.
      b_factors: (Optional) B-factors to use for the protein.
      chain_index: (Optional) Chain indices for multi-chain predictions
      remark: (Optional) Remark about the prediction
      parents: (Optional) List of template names
    Returns:
      A protein instance.
    r   Zfinal_atom_positionsZfinal_atom_maskr   r
   N)	r   r   r   r   r   r   r   r   r   )r   r   Z
zeros_like)rs   rt   r   r   r   r   r   r    r    r!   from_prediction+  s   
ru   r$   )NNNNN)r   dataclassesr8   rj   typingr   r   r   r   r   r   r   r	   numpyr   rc   r   r   r   ZFeatureDictZModelOutputrC   	dataclassr   rK   r   rU   r]   rq   rr   ru   r    r    r    r!   <module>   sN   (
%22]

