"""
Import of wwPDB xml validation file
Adapted from Eliza's code
"""
#=========================================================================================
# Licence, Reference and Credits
#=========================================================================================
__copyright__ = "Copyright (C) CCPN project (https://www.ccpn.ac.uk) 2014 - 2022"
__credits__ = ("Ed Brooksbank, Joanna Fox, Victoria A Higman, Luca Mureddu, Eliza Płoskoń",
"Timothy J Ragan, Brian O Smith, Gary S Thompson & Geerten W Vuister")
__licence__ = ("CCPN licence. See https://ccpn.ac.uk/software/licensing/")
__reference__ = ("Skinner, S.P., Fogh, R.H., Boucher, W., Ragan, T.J., Mureddu, L.G., & Vuister, G.W.",
"CcpNmr AnalysisAssign: a flexible platform for integrated NMR analysis",
"J.Biomol.Nmr (2016), 66, 111-124, http://doi.org/10.1007/s10858-016-0060-y")
#=========================================================================================
# Last code modification
#=========================================================================================
__modifiedBy__ = "$modifiedBy: Geerten Vuister $"
__dateModified__ = "$dateModified: 2022-03-10 18:29:43 +0000 (Thu, March 10, 2022) $"
__version__ = "$Revision: 3.1.0 $"
#=========================================================================================
# Created
#=========================================================================================
__author__ = "$Author: Geerten Vuister $"
__date__ = "$Date: 2022-03-09 16:04:57 +0000 (Thu, March 9, 2022) $"
#=========================================================================================
# Start of code
#=========================================================================================
import pandas as pd
import xml.etree.ElementTree as et
from ccpn.core.lib.ContextManagers import undoBlockWithoutSideBar, notificationEchoBlocking
from ccpn.core.DataTable import TableFrame
from ccpn.framework.Application import getApplication
from ccpn.framework.Version import applicationVersion
from ccpn.util.Path import aPath
# def removeSpaces(txt):
# return ','.join(txt.split())
[docs]def getViolationTable(xroot):
rows = []
for LineInTheRoot in xroot.iter('violated_distance_restraint'):
rows.append(LineInTheRoot.attrib)
pdbViolatedRestrTable = pd.DataFrame(rows)
# I do not need those columns at the moment:
columnsToDropOff = ['altcode_1', 'altcode_2', 'chain_1', 'chain_2', 'ent_1', 'ent_2', 'said_1', 'said_2',
'icode_1', 'icode_2', ]
pdbViolatedRestrTable = pdbViolatedRestrTable.drop(columns=columnsToDropOff, axis=1)
pdbViolatedRestrTable['violation'] = pd.to_numeric(pdbViolatedRestrTable['violation'], errors='coerce')
return pdbViolatedRestrTable
[docs]def getSimpleViolationTable(xroot):
rows = []
for LineInTheRoot in xroot.iter('violated_distance_restraint'):
rows.append(LineInTheRoot.attrib)
pdbViolatedRestrTable = pd.DataFrame(rows)
violatedSimplified_dict = {'restraintList':[], 'restraint_id': [], 'resname_1': [], 'resnum_1': [], 'atom_1': [], 'resname_2': [], 'resnum_2': [], 'atom_2': [], 'model': [], 'violation': []}
violatedSimplified_dict = {'restraintList':[], 'restraint_id': [], 'model': [], 'violation': []}
count = 0
for eachRestraintList in pdbViolatedRestrTable['rlist_id'].unique():
for eachRestraint in pdbViolatedRestrTable.loc[(pdbViolatedRestrTable['rlist_id'] == eachRestraintList)]['rest_id']: # .unique():
count = count +1
# print(eachRestraintList, eachRestraint)
for model in pdbViolatedRestrTable.loc[(pdbViolatedRestrTable['rest_id'] == eachRestraint) & (pdbViolatedRestrTable['rlist_id'] == eachRestraintList), 'model'].unique():
violatedSimplified_dict["restraintList"].append(eachRestraintList)
violatedSimplified_dict["restraint_id"].append(eachRestraint)
violatedSimplified_dict["model"].append(model)
violatedSimplified_dict["violation"].append(pdbViolatedRestrTable.loc[(pdbViolatedRestrTable['rest_id'] == eachRestraint) & (pdbViolatedRestrTable['rlist_id'] == eachRestraintList) & (pdbViolatedRestrTable['model'] == model), 'violation'].iloc[0])
# violatedSimplified_dict["violation"].append(pdbViolatedRestrTable.loc[(pdbViolatedRestrTable['rest_id'] == eachRestraint) & (pdbViolatedRestrTable['rlist_id'] == eachRestraintList)] ['violation'].unique())#.iloc[0])
if count > 100:
break
violatedSimplified_DataFrame = pd.DataFrame.from_dict(violatedSimplified_dict)
violatedSimplified_DataFrame['violation'] = pd.to_numeric(violatedSimplified_DataFrame['violation'], errors='coerce')
return violatedSimplified_DataFrame
[docs]def getRamachandranTable(xroot):
rows = []
for LineInTheRoot in xroot.iter('ModelledSubgroup'):
rows.append(LineInTheRoot.attrib)
ramachandranTable = pd.DataFrame(rows)
# I do not need those columns at the moment:
# columnsToDropOff = ['altcode', 'chain', 'ent', 'said','icode', ]
# ramachandranTable = ramachandranTable.drop(columns=columnsToDropOff, axis=1)
ramachandranTable['resnum'] = pd.to_numeric(ramachandranTable['resnum'], downcast="integer",
errors='coerce')
ramachandranTable['phi'] = pd.to_numeric(ramachandranTable['phi'], errors='coerce')
ramachandranTable['psi'] = pd.to_numeric(ramachandranTable['psi'], errors='coerce')
return ramachandranTable
[docs]def getSimpleRamachandranTable(xroot):
rows = []
ramaSimplified_dict = {'residue': [], 'favored': [], 'allowed': [], 'outlier': []}
for LineInTheRoot in xroot.iter('ModelledSubgroup'):
rows.append(LineInTheRoot.attrib)
ramachandranTable = pd.DataFrame(rows)
# I do not need those columns at the moment:
# columnsToDropOff = ['altcode', 'chain', 'ent', 'said','icode', ]
# ramachandranTable = ramachandranTable.drop(columns=columnsToDropOff, axis=1)
ramachandranTable['resnum'] = pd.to_numeric(ramachandranTable['resnum'], downcast="integer",
errors='coerce')
ramachandranTable['phi'] = pd.to_numeric(ramachandranTable['phi'], errors='coerce')
ramachandranTable['psi'] = pd.to_numeric(ramachandranTable['psi'], errors='coerce')
for residue in ramachandranTable['resnum'].unique():
ramaSimplified_dict["residue"].append(residue)
ramaSimplified_dict["favored"].append(ramachandranTable.loc[(ramachandranTable['resnum'] == residue) & (
ramachandranTable['rama'] == 'Favored')]['model'].count())
ramaSimplified_dict["allowed"].append(ramachandranTable.loc[(ramachandranTable['resnum'] == residue) & (
ramachandranTable['rama'] == 'Allowed')]['model'].count())
ramaSimplified_dict["outlier"].append(ramachandranTable.loc[(ramachandranTable['resnum'] == residue) & (
ramachandranTable['rama'] == 'OUTLIER')]['model'].count())
ramaSimplified_DataFrame = pd.DataFrame.from_dict(ramaSimplified_dict)
return ramaSimplified_DataFrame
[docs]def importWwPdbFile(path, project,
includeViolations=True, violationsTableName='wwPDBviolations',
includeRamachandran=True, ramachandranTableName='wwPDBramachandran'
) -> list:
"""Import wwPDB validation results from path into project
:return a list V3 object created in the project
"""
if not aPath(path).exists():
raise RuntimeError(f'importWwPdbFile: Invalid path {path}')
xtree = et.parse(path)
xroot = xtree.getroot()
result = []
if includeViolations:
_dataA = TableFrame(getViolationTable(xroot))
# _dataB = TableFrame(getSimpleViolationTable(xroot))
table1 = project.newDataTable(name=violationsTableName, data=_dataA, comment='violated restraints from wwPDB')
# self.project.newDataTable(name=self.violName.text()+'_simple', data=_dataB, comment='simplified violations from PDB')
result.append(table1)
if includeRamachandran:
tempRama1 = getRamachandranTable(xroot)
tempRama2 = getSimpleRamachandranTable(xroot)
_data1 = TableFrame(tempRama1)
_data2 = TableFrame(tempRama2)
# tempGrp = TableFrame(tempRama.groupby(by = ['chain','resnum','said','ent', 'seq','resname'])['rama'].value_counts())
table2 = project.newDataTable(name=ramachandranTableName, data=_data1, comment='ramachandran data from wwPDB')
table3 = project.newDataTable(name=ramachandranTableName+'_short', data=_data2, comment='Simplified Ramachandran Data')
result.extend((table2, table3))
return result