Source code for ccpn.AnalysisStructure.lib.importWwPdbFile

"""
Import of wwPDB xml validation file
Adapted from Eliza's code
"""
#=========================================================================================
# Licence, Reference and Credits
#=========================================================================================
__copyright__ = "Copyright (C) CCPN project (https://www.ccpn.ac.uk) 2014 - 2022"
__credits__ = ("Ed Brooksbank, Joanna Fox, Victoria A Higman, Luca Mureddu, Eliza Płoskoń",
               "Timothy J Ragan, Brian O Smith, Gary S Thompson & Geerten W Vuister")
__licence__ = ("CCPN licence. See https://ccpn.ac.uk/software/licensing/")
__reference__ = ("Skinner, S.P., Fogh, R.H., Boucher, W., Ragan, T.J., Mureddu, L.G., & Vuister, G.W.",
                 "CcpNmr AnalysisAssign: a flexible platform for integrated NMR analysis",
                 "J.Biomol.Nmr (2016), 66, 111-124, http://doi.org/10.1007/s10858-016-0060-y")
#=========================================================================================
# Last code modification
#=========================================================================================
__modifiedBy__ = "$modifiedBy: Geerten Vuister $"
__dateModified__ = "$dateModified: 2022-03-10 18:29:43 +0000 (Thu, March 10, 2022) $"
__version__ = "$Revision: 3.1.0 $"
#=========================================================================================
# Created
#=========================================================================================
__author__ = "$Author: Geerten Vuister $"
__date__ = "$Date: 2022-03-09 16:04:57 +0000 (Thu, March 9, 2022) $"
#=========================================================================================
# Start of code
#=========================================================================================

import pandas as pd
import xml.etree.ElementTree as et

from ccpn.core.lib.ContextManagers import undoBlockWithoutSideBar, notificationEchoBlocking
from ccpn.core.DataTable import TableFrame

from ccpn.framework.Application import getApplication
from ccpn.framework.Version import applicationVersion

from ccpn.util.Path import aPath

# def removeSpaces(txt):
#     return ','.join(txt.split())


[docs]def getViolationTable(xroot): rows = [] for LineInTheRoot in xroot.iter('violated_distance_restraint'): rows.append(LineInTheRoot.attrib) pdbViolatedRestrTable = pd.DataFrame(rows) # I do not need those columns at the moment: columnsToDropOff = ['altcode_1', 'altcode_2', 'chain_1', 'chain_2', 'ent_1', 'ent_2', 'said_1', 'said_2', 'icode_1', 'icode_2', ] pdbViolatedRestrTable = pdbViolatedRestrTable.drop(columns=columnsToDropOff, axis=1) pdbViolatedRestrTable['violation'] = pd.to_numeric(pdbViolatedRestrTable['violation'], errors='coerce') return pdbViolatedRestrTable
[docs]def getSimpleViolationTable(xroot): rows = [] for LineInTheRoot in xroot.iter('violated_distance_restraint'): rows.append(LineInTheRoot.attrib) pdbViolatedRestrTable = pd.DataFrame(rows) violatedSimplified_dict = {'restraintList':[], 'restraint_id': [], 'resname_1': [], 'resnum_1': [], 'atom_1': [], 'resname_2': [], 'resnum_2': [], 'atom_2': [], 'model': [], 'violation': []} violatedSimplified_dict = {'restraintList':[], 'restraint_id': [], 'model': [], 'violation': []} count = 0 for eachRestraintList in pdbViolatedRestrTable['rlist_id'].unique(): for eachRestraint in pdbViolatedRestrTable.loc[(pdbViolatedRestrTable['rlist_id'] == eachRestraintList)]['rest_id']: # .unique(): count = count +1 # print(eachRestraintList, eachRestraint) for model in pdbViolatedRestrTable.loc[(pdbViolatedRestrTable['rest_id'] == eachRestraint) & (pdbViolatedRestrTable['rlist_id'] == eachRestraintList), 'model'].unique(): violatedSimplified_dict["restraintList"].append(eachRestraintList) violatedSimplified_dict["restraint_id"].append(eachRestraint) violatedSimplified_dict["model"].append(model) violatedSimplified_dict["violation"].append(pdbViolatedRestrTable.loc[(pdbViolatedRestrTable['rest_id'] == eachRestraint) & (pdbViolatedRestrTable['rlist_id'] == eachRestraintList) & (pdbViolatedRestrTable['model'] == model), 'violation'].iloc[0]) # violatedSimplified_dict["violation"].append(pdbViolatedRestrTable.loc[(pdbViolatedRestrTable['rest_id'] == eachRestraint) & (pdbViolatedRestrTable['rlist_id'] == eachRestraintList)] ['violation'].unique())#.iloc[0]) if count > 100: break violatedSimplified_DataFrame = pd.DataFrame.from_dict(violatedSimplified_dict) violatedSimplified_DataFrame['violation'] = pd.to_numeric(violatedSimplified_DataFrame['violation'], errors='coerce') return violatedSimplified_DataFrame
[docs]def getRamachandranTable(xroot): rows = [] for LineInTheRoot in xroot.iter('ModelledSubgroup'): rows.append(LineInTheRoot.attrib) ramachandranTable = pd.DataFrame(rows) # I do not need those columns at the moment: # columnsToDropOff = ['altcode', 'chain', 'ent', 'said','icode', ] # ramachandranTable = ramachandranTable.drop(columns=columnsToDropOff, axis=1) ramachandranTable['resnum'] = pd.to_numeric(ramachandranTable['resnum'], downcast="integer", errors='coerce') ramachandranTable['phi'] = pd.to_numeric(ramachandranTable['phi'], errors='coerce') ramachandranTable['psi'] = pd.to_numeric(ramachandranTable['psi'], errors='coerce') return ramachandranTable
[docs]def getSimpleRamachandranTable(xroot): rows = [] ramaSimplified_dict = {'residue': [], 'favored': [], 'allowed': [], 'outlier': []} for LineInTheRoot in xroot.iter('ModelledSubgroup'): rows.append(LineInTheRoot.attrib) ramachandranTable = pd.DataFrame(rows) # I do not need those columns at the moment: # columnsToDropOff = ['altcode', 'chain', 'ent', 'said','icode', ] # ramachandranTable = ramachandranTable.drop(columns=columnsToDropOff, axis=1) ramachandranTable['resnum'] = pd.to_numeric(ramachandranTable['resnum'], downcast="integer", errors='coerce') ramachandranTable['phi'] = pd.to_numeric(ramachandranTable['phi'], errors='coerce') ramachandranTable['psi'] = pd.to_numeric(ramachandranTable['psi'], errors='coerce') for residue in ramachandranTable['resnum'].unique(): ramaSimplified_dict["residue"].append(residue) ramaSimplified_dict["favored"].append(ramachandranTable.loc[(ramachandranTable['resnum'] == residue) & ( ramachandranTable['rama'] == 'Favored')]['model'].count()) ramaSimplified_dict["allowed"].append(ramachandranTable.loc[(ramachandranTable['resnum'] == residue) & ( ramachandranTable['rama'] == 'Allowed')]['model'].count()) ramaSimplified_dict["outlier"].append(ramachandranTable.loc[(ramachandranTable['resnum'] == residue) & ( ramachandranTable['rama'] == 'OUTLIER')]['model'].count()) ramaSimplified_DataFrame = pd.DataFrame.from_dict(ramaSimplified_dict) return ramaSimplified_DataFrame
[docs]def importWwPdbFile(path, project, includeViolations=True, violationsTableName='wwPDBviolations', includeRamachandran=True, ramachandranTableName='wwPDBramachandran' ) -> list: """Import wwPDB validation results from path into project :return a list V3 object created in the project """ if not aPath(path).exists(): raise RuntimeError(f'importWwPdbFile: Invalid path {path}') xtree = et.parse(path) xroot = xtree.getroot() result = [] if includeViolations: _dataA = TableFrame(getViolationTable(xroot)) # _dataB = TableFrame(getSimpleViolationTable(xroot)) table1 = project.newDataTable(name=violationsTableName, data=_dataA, comment='violated restraints from wwPDB') # self.project.newDataTable(name=self.violName.text()+'_simple', data=_dataB, comment='simplified violations from PDB') result.append(table1) if includeRamachandran: tempRama1 = getRamachandranTable(xroot) tempRama2 = getSimpleRamachandranTable(xroot) _data1 = TableFrame(tempRama1) _data2 = TableFrame(tempRama2) # tempGrp = TableFrame(tempRama.groupby(by = ['chain','resnum','said','ent', 'seq','resname'])['rama'].value_counts()) table2 = project.newDataTable(name=ramachandranTableName, data=_data1, comment='ramachandran data from wwPDB') table3 = project.newDataTable(name=ramachandranTableName+'_short', data=_data2, comment='Simplified Ramachandran Data') result.extend((table2, table3)) return result