Source code for ccpnmodel.ccpncore.lib.molecule.MoleculeQuery

"""API (data storage) level functionality for querying molecules

"""
#=========================================================================================
# Licence, Reference and Credits
#=========================================================================================
__copyright__ = "Copyright (C) CCPN project (http://www.ccpn.ac.uk) 2014 - 2021"
__credits__ = ("Ed Brooksbank, Luca Mureddu, Timothy J Ragan & Geerten W Vuister")
__licence__ = ("CCPN licence. See http://www.ccpn.ac.uk/v3-software/downloads/license")
__reference__ = ("Skinner, S.P., Fogh, R.H., Boucher, W., Ragan, T.J., Mureddu, L.G., & Vuister, G.W.",
                 "CcpNmr AnalysisAssign: a flexible platform for integrated NMR analysis",
                 "J.Biomol.Nmr (2016), 66, 111-124, http://doi.org/10.1007/s10858-016-0060-y")

#=========================================================================================
# Last code modification
#=========================================================================================
__modifiedBy__ = "$modifiedBy: Luca Mureddu $"
__dateModified__ = "$dateModified: 2021-03-08 12:39:50 +0000 (Mon, March 08, 2021) $"
__version__ = "$Revision: 3.0.3 $"
#=========================================================================================
# Created
#=========================================================================================

__author__ = "$Author: CCPN $"
__date__ = "$Date: 2017-04-07 10:28:48 +0000 (Fri, April 07, 2017) $"
#=========================================================================================
# Start of code
#=========================================================================================
"""Code for querying Molecules and MolSystems"""

from ccpnmodel.ccpncore.lib.chemComp import ChemCompOverview
from ccpnmodel.ccpncore.lib.chemComp import ObsoleteChemComps
from ccpnmodel.ccpncore.lib.chemComp import Io as chemCompIo
import urllib

molTypeOrder = ('protein', 'DNA', 'RNA', 'carbohydrate', 'other')

LINEAR_POLYMER_TYPES = ('protein', 'DNA', 'RNA', 'other')


# CifCode map - preset to ensure it is not overridden by obsolete ChemComps in list
cifCodeRemap = {}
# # Std DNA - keep one-letter ccpCode
cifCodeRemap['DA'] = cifCodeRemap['Da'] = ('DNA', 'Da')
cifCodeRemap['DC'] = cifCodeRemap['Dc'] = ('DNA', 'Dc')
cifCodeRemap['DG'] = cifCodeRemap['Dg'] = ('DNA', 'Dg')
cifCodeRemap['DI'] = cifCodeRemap['Di'] = ('DNA', 'Di')
cifCodeRemap['DU'] = cifCodeRemap['Du'] = ('DNA', 'Du')
cifCodeRemap['DT'] = cifCodeRemap['Dt'] = ('DNA', 'Dt')

# # Std RNA - keep one-letter ccpCode
cifCodeRemap['5MU'] = cifCodeRemap['5mu'] = cifCodeRemap['RT'] =  cifCodeRemap['Rt'] = ('RNA','5mu')
#
# Set as RNA or other, to override DNA with similar name
for tag in ('2at', '2bt', '2gt', '2nt', '2ot', '3me', 'Ap7', 'Atl', 'Boe', 'Car',
            'Eit', 'Fnu', 'Gmu', 'Lcc', 'Lcg', 'P2t', 'S2m', 'T2t', 'Tfe', 'Tln'):
  cifCodeRemap[tag] = cifCodeRemap[tag.upper()] = ['RNA', tag]

cifCodeRemap['Hob'] = cifCodeRemap['HOB'] = ('other', 'Hob')
cifCodeRemap['Xxx'] = cifCodeRemap['XXX'] = ('other', 'Xxx')

# ccpCode remaps - for upgrade convresion of 'inventive' ChemComps
ccpCodeRemap = {
  'DNA':{
    'Xxx':('DNA','Dn'),
    'A00':('DNA','Da'),
    'A11':('DNA','Da'),
    'C00':('DNA','Dc'),
    'C11':('DNA','Dc'),
    'G00':('DNA','Dg'),
    'G11':('DNA','Dg'),
    'I00':('DNA','Di'),
    'I11':('DNA','Di'),
    'U00':('DNA','Du'),
    'U11':('DNA','Du'),
  },
  'protein':{
    'Xxx':('protein','Unk'),
  },
  'RNA':{
    'Xxx':('RNA','N'),
    'A00':('RNA','A'),
    'A11':('RNA','A'),
    'C00':('RNA','C'),
    'C11':('RNA','C'),
    'G00':('RNA','G'),
    'G11':('RNA','G'),
    'I00':('RNA','I'),
    'I11':('RNA','I'),
    'U00':('RNA','U'),
    'U11':('RNA','U'),
  },
  'other':{
    'Acy':('other','Ace'),
    'Nh3':('other','Nh2'),
    'A01_dna':('DNA','Da'),
    'C01_dna':('DNA','Dc'),
    'G01_dna':('DNA','Dg'),
    'I01_dna':('DNA','Di'),
    'T01_dna':('DNA','Dt'),
    'U01_dna':('DNA','Du'),
    'A01_rna':('RNA','A'),
    'C01_rna':('RNA','C'),
    'G01_rna':('RNA','G'),
    'I01_rna':('RNA','I'),
    'T01_rna':('RNA','T'),
    'U01_rna':('RNA','U'),
  },
}


[docs]def fetchStdResNameMap(project:'MemopsRoot', reset:bool=False, debug:bool=False): """ fetch dict of {residueName:(molType,ccpCode)}, using cached value if present and not reset. NBNB TBD Add naming variants from ChemComp naming systems """ chemCompOverview = ChemCompOverview.chemCompOverview obsoleteChemComps = ObsoleteChemComps.obsoleteChemCompData logger = project._logger if hasattr(project, '_residueName2chemCompId') and not reset: return project._residueName2chemCompId else: result = project._residueName2chemCompId = {} rejected = {} result.update(cifCodeRemap) remapped = {} nFound = 0 for molType in molTypeOrder: # Add data for all chemComps from overview for ccpCode,tt in reversed(sorted(chemCompOverview[molType].items())): nFound += 1 # NB done in reversed order to ensure Xyz takes precedence over XYZ cifCode = tt[1] dd = obsoleteChemComps.get(cifCode) if dd: altCode = dd['cifCode'] if altCode is None: rejected[molType, ccpCode] = None else: # Remaps are handled in another loop remapped[(molType, ccpCode)] = (cifCode, altCode) else: # Dummy value to allow shared diagnostics printout val = ('-', '-') if not cifCode: # no cifCode - skip. debug message rejected[molType, ccpCode] = cifCode ccId = ccpCodeRemap.get(molType, {}).get(ccpCode) if ccId: val = ccId message = 'CIF-CCP-REMAP' else: message = 'CIF-NO' elif cifCode != cifCode.upper(): # cifCode is not upperCase - skip. debug message message = 'CIF-LOW' rejected[molType, ccpCode] = cifCode else: locif = cifCode[0] + cifCode[1:].lower() val = result.get(cifCode) or result.get(locif) if val is None: # New value. Set the map val = result[cifCode] = result[locif] =(molType, ccpCode) message = 'CIF-OK' else: # Value was already set if val[0] == molType and val[1] == ccpCode: # This one was set up front message = 'CIF-PRESET' elif val[1] == cifCode and val[1] != locif: # ccpCode was UPPER-CASE # replace UPPERCASE ccpCode with mixed-case if molType == val[0]: result[cifCode] = result[locif] = (molType, ccpCode) rejected[val[0], val[1]] = cifCode # Debug messages: if ccpCode.upper() == val[1].upper(): message = 'CIF-REPL-INTRA' else: message = 'CIF-REPL-CLASH1' elif molType == 'other' and val[0] != 'other': message = 'CIF-CLASH-OTHER' rejected[molType, ccpCode] = cifCode else: message = 'CIF-REPL-CLASH2' rejected[molType, ccpCode] = cifCode else: # Simple cifCode clash. Ignore and set debug messages if molType == val[0]: if ccpCode.upper() == val[1].upper(): message = 'CIF-INTRA' else: message = 'CIF-CLASH1' elif molType == 'other' and val[0] != 'other': message = 'CIF-OTHER' else: message = 'CIF-CLASH2' rejected[molType, ccpCode] = cifCode # # Print out debug messages # print("\t".join((message, molType, ccpCode, val[0], val[1], # tt[0] or '-', tt[1] or '-', tt[2] or '-'))) # # if len(ccpCode) == 5 and ccpCode.startswith('D-'): # # D- amino acid - special case. # # for now add ccpCode as extra alias # print("\t".join(('CCP-D-Xyz', molType, ccpCode,val[0], val[1], # tt[0] or '-', tt[1] or '-', tt[2] or '-'))) # result[ccpCode] = val # print("CIF-nFound %s" % nFound) for ccId, codes in sorted(remapped.items()): cifCode, altCode = codes val = result.get(altCode) if val is None: pass # print('\t'.join(("CIF-REMAP-ERROR1", ccId[0], ccId[1], cifCode, altCode))) else: locif = cifCode[0] + cifCode[1:].lower() result[cifCode] = result[locif] = val # print('\t'.join(("CIF-REMAP-OK", ccId[0], ccId[1], cifCode, altCode, val[0], val[1]))) rejected[ccId] = cifCode if debug: # for tt, cifCode in sorted(rejected.items()): # print(" %s:%s, # REJECTED" % (repr(tt), repr(cifCode))) # Check for upper-case ccpCodes remaining # for tag,val in sorted(result.items()): # if val[1][1:] != val[1][1:].lower(): # print("CCP-UPPER\t%s\t%s\t%s" % (val[0], val[1], tag)) # check for unused ChemComps for chemComp in project.sortedChemComps(): cifCode = chemComp.code3Letter ccpCode = chemComp.ccpCode molType = chemComp.molType val = result.get(ccpCode) ccId = (chemComp.molType, ccpCode) # Debug output checking ccpCode message = None if not val: val = (chemComp.code1Letter, cifCode) message = "CHEM-MISS" elif molType != val[0]: message = "CHEM-TYPE-CLASH" elif ccpCode != val[1]: message = "CHEM-CODE-CLASH" else: message = "CHEM-OK" # if message is not None: # print ("\t".join(str(x) for x in (message, molType, ccpCode, val[0], val[1], cifCode))) # Debug output checking ccpCode val = result.get(cifCode) message = None if not val: val = (chemComp.code1Letter, cifCode) message = "CCIF-MISS" elif molType != val[0]: message = "CCIF-TYPE-CLASH" elif ccpCode != val[1]: message = "CCIF-CODE-CLASH" else: message = "CCIF-OK" # if message is not None: # print ("\t".join(str(x) for x in (message, molType, ccpCode, val[0], val[1], cifCode))) tags = set() # get sysNames for namingSystem in chemComp.namingSystems: for sysName in namingSystem.chemCompSysNames: tags.add(sysName.sysName) # set additional synonyms for tag in tags: prevId = result.get(tag) if prevId is None: if len(tag) == 1: pass # print ("CINFO8\tRejecting one-letter synonym\t%s from ChemComp %s:%s" # % (tag, cifCode, val)) elif ccId == val: # print ("CINFO9\tAdding new ccpCode synonym\t%s from ChemComp %s:%s" # % (tag, cifCode, ccId)) result[tag] = val # # else: # print ("CWARNING\tclash1\tfor %s chemComp %s v. cifCode %s:%s" # % (tag, ccId, cifCode, val)) # elif prevId != val: # print ("CWARNING\tclash2\tfor %s chemComp %s, %s v. cifCode %s:%s" # % (tag, ccId, prevId, cifCode, val)) # return result
if __name__ == '__main__': from ccpnmodel.ccpncore.lib.Io import Api as apiIo project = apiIo.newProject('ChemCompNameTest') # printCcpCodeStats(project) dd = fetchStdResNameMap(project, reset=True, debug=True) for key,val in sorted(dd.items()): print (" '%s':('%s','%s')," % (key, val[0], val[1])) # import json # data = _parseObsoleteChemCompTable(open('/home/rhf22/rhf22/Dropbox/RHFnotes/ChemComp/ResidueNameMap3.txt')) # print(json.dumps(data, sort_keys=True, indent=4)) #from NEF molecule creation