Source code for ccpnmodel.ccpncore.lib.molecule.MoleculeModify

"""API (data storage) level functionality for creating and modifying molecules

"""
#=========================================================================================
# Licence, Reference and Credits
#=========================================================================================

__copyright__ = "Copyright (C) CCPN project (http://www.ccpn.ac.uk) 2014 - 2021"
__credits__ = ("Ed Brooksbank, Luca Mureddu, Timothy J Ragan & Geerten W Vuister")
__licence__ = ("CCPN licence. See http://www.ccpn.ac.uk/v3-software/downloads/license")
__reference__ = ("Skinner, S.P., Fogh, R.H., Boucher, W., Ragan, T.J., Mureddu, L.G., & Vuister, G.W.",
                 "CcpNmr AnalysisAssign: a flexible platform for integrated NMR analysis",
                 "J.Biomol.Nmr (2016), 66, 111-124, http://doi.org/10.1007/s10858-016-0060-y")

#=========================================================================================
# Last code modification
#=========================================================================================
__modifiedBy__ = "$modifiedBy: Luca Mureddu $"
__dateModified__ = "$dateModified: 2021-03-08 12:39:50 +0000 (Mon, March 08, 2021) $"
__version__ = "$Revision: 3.0.3 $"
#=========================================================================================
# Created
#=========================================================================================

__author__ = "$Author: CCPN $"
__date__ = "$Date: 2017-04-07 10:28:48 +0000 (Fri, April 07, 2017) $"
#=========================================================================================
# Start of code
#=========================================================================================
"""Code for generating and modifying Molecules and MolSystems"""

import typing
from ccpn.util.nef import StarIo
from ccpn.util import Common as commonUtil
from ccpnmodel.ccpncore.memops.ApiError import ApiError
from ccpnmodel.ccpncore.lib.chemComp import Io as chemCompIo
from ccpnmodel.ccpncore.lib.molecule import MoleculeQuery


###from ccp.general.Io import getChemComp

#################################################################
#
# Molecule creation
#
##################################################################

[docs]def createMolecule(project, sequence:(list,str), molType:str='protein', name:str="Molecule", startNumber:int=1, isCyclic=False): """Descrn: Makes Molecule for a given sequence Inputs: Project,List of Words (ChemComp.CcpCode), Word (ChemComp.molType), String ( Ccp.Molecule.Molecule.name) Int (first MolResidue.seqCode) Output: Molecule """ # ensure name is unique i = 0 ss = name while project.findFirstMolecule(name=ss): i += 1 ss = '%s%d' % (name,i) if ss != name: project._logger.warning( "CCPN molecule named %s already exists. New molecule has been named %s" % (name,ss)) name = ss molecule = project.newMolecule(name=name) try: if len(sequence) == 3 and molType == 'other': # It's not a sequence! It's a three letter ccp-code for identifing a chemComp small-molecule. We need a better way! molecule.extendMolResidues([sequence,], 1, isCyclic) return molecule if isinstance(sequence, str): molecule.extendOneLetterMolResidues(sequence, molType, startNumber, isCyclic) else: molecule.extendMolResidues(sequence, startNumber, isCyclic) except Exception as e: try: molecule.delete() except: pass raise e return molecule
def _getLinearChemCompData(project, molType, ccpCode, linking): """Descrn: Implementation function, specific for addLinearSequence() Inputs: Project object, and desired molType, ccpCode, linking (all strings) Output: (dd,ll) tuple where dd is a dictionary for passing to the MolResidue creation (as **dd), and ll is a list of the linkCodes that are different from 'next' and 'prev' """ seqLinks = [] otherLinkCodes = [] chemComp = project.findFirstChemComp(molType=molType, ccpCode=ccpCode) isOther = False if chemComp is None: isOther = True chemComp = project.findFirstChemComp(molType='other', ccpCode=ccpCode) if chemComp is None: chemComp = chemCompIo.fetchChemComp(project, molType, ccpCode) if chemComp is None: raise ApiError("No chemComp for %s residue %s" % (molType, ccpCode)) chemCompVar = chemComp.findFirstChemCompVar(linking=linking, isDefaultVar=True) or \ chemComp.findFirstChemCompVar(linking=linking) # Note requiring a default var is too strict - not always set for # imports from mol2/PDB etc if isOther and (chemCompVar is None): if linking == 'start': linkEnd = chemComp.findFirstLinkEnd(linkCode='next') elif linking == 'end': linkEnd = chemComp.findFirstLinkEnd(linkCode='prev') else: linkEnd = None if linkEnd: otherLinkCodes.append(linkEnd.linkCode) chemCompVar = chemComp.findFirstChemCompVar(isDefaultVar=True) or \ chemComp.findFirstChemCompVar() if chemCompVar is None: raise ApiError("No ChemCompVar found for %s:%s linking %s" % (molType, ccpCode, linking)) molResData = {'chemComp':chemComp, 'linking':chemCompVar.linking, 'descriptor':chemCompVar.descriptor} for linkEnd in chemCompVar.linkEnds: code = linkEnd.linkCode if code in ('next','prev'): seqLinks.append(code) else: otherLinkCodes.append(code) if linking == 'start': if seqLinks and seqLinks != ['next']: raise ApiError("Linking 'start' must have just 'next' linkEnd") elif linking == 'end': if seqLinks and seqLinks != ['prev']: raise ApiError("Linking 'end' must have just 'prev' linkEnd ") elif linking != 'middle' or seqLinks not in (['next','prev'],['prev','next']): raise ApiError("Illegal linking %s with seqLinks %s" % (linking,seqLinks)) return molResData, otherLinkCodes
[docs]def createMoleculeFromNef(project, name:str, sequence:typing.Sequence[dict], defaultType:str='UNK') -> 'Molecule': """Create a Molecule from a sequence of NEF row dictionaries (or equivalent)""" residueName2chemCompId = MoleculeQuery.fetchStdResNameMap(project) stretches = StarIo.splitNefSequence(sequence) molecule = project.newMolecule(name=name) for stretch in stretches: # Try setting start number sequenceCode = stretch[0]['sequence_code'] seqCode, seqInsertCode,offset = commonUtil.parseSequenceCode(sequenceCode) if seqCode is None: startNumber = 1 else: startNumber = seqCode # Create new MolResidues residueTypes = [row.get('residue_name', defaultType) for row in stretch] firstLinking = stretch[0].get('linking') if len(residueTypes) > 1: lastLinking = stretch[-1].get('linking') if (firstLinking in ('start', 'single', 'nonlinear', 'dummy') or lastLinking == 'end'): isCyclic = False else: # We use isCyclic to set the ends to 'middle'. It gets sorted out below isCyclic = True molResidues = molecule.extendMolResidues(sequence=residueTypes, startNumber=startNumber, isCyclic=isCyclic) # Adjust linking and descriptor if isCyclic: if firstLinking != 'cyclic' or lastLinking != 'cyclic': # not cyclic after all - remove cyclising link cyclicLink = molResidues[-1].findFirstMolResLinkEnd(linkCode='next').molResLink cyclicLink.delete() else: if firstLinking != 'start': ff = molResidues[0].chemComp.findFirstChemCompVar chemCompVar = (ff(linking='middle', isDefaultVar=True) or ff(linking='middle')) molResidues[0].__dict__['linking'] = 'middle' molResidues[0].__dict__['descriptor'] = chemCompVar.descriptor if lastLinking != 'end': ff = molResidues[-1].chemComp.findFirstChemCompVar chemCompVar = (ff(linking='middle', isDefaultVar=True) or ff(linking='middle')) molResidues[-1].__dict__['linking'] = 'middle' molResidues[-1].__dict__['descriptor'] = chemCompVar.descriptor else: # Only one residue residueType = residueTypes[0] if residueType.startswith('dummy.'): tt = ('dummy',residueType[6:]) else: tt = residueName2chemCompId.get(residueTypes[0]) if not tt: project._logger.warning("""Could not access ChemComp for %s - replacing with %s NB - could be a failure in fetching remote information. Are you off line?""" % (residueTypes[0], defaultType)) tt = residueName2chemCompId.get(defaultType) if tt: chemComp = chemCompIo.fetchChemComp(project, tt[0], tt[1]) if chemComp: chemCompVar = (chemComp.findFirstChemCompVar(linking='none') or chemComp.findFirstChemCompVar()) # just a default molResidues = [molecule.newMolResidue(seqCode=startNumber, chemCompVar=chemCompVar)] else: raise ValueError("Residue type %s %s: Error in getting template information" % (residueTypes[0], tt)) else: raise ValueError("Residue type %s not recognised" % residueTypes[0]) startNumber += len(residueTypes) # return molecule