Source code for ccpnmodel.ccpncore.lib.molecule.Labeling

"""Library functions for molecule labeling

"""
#=========================================================================================
# Licence, Reference and Credits
#=========================================================================================
__copyright__ = "Copyright (C) CCPN project (http://www.ccpn.ac.uk) 2014 - 2021"
__credits__ = ("Ed Brooksbank, Joanna Fox, Victoria A Higman, Luca Mureddu, Eliza Płoskoń",
               "Timothy J Ragan, Brian O Smith, Gary S Thompson & Geerten W Vuister")
__licence__ = ("CCPN licence. See http://www.ccpn.ac.uk/v3-software/downloads/license")
__reference__ = ("Skinner, S.P., Fogh, R.H., Boucher, W., Ragan, T.J., Mureddu, L.G., & Vuister, G.W.",
                 "CcpNmr AnalysisAssign: a flexible platform for integrated NMR analysis",
                 "J.Biomol.Nmr (2016), 66, 111-124, http://doi.org/10.1007/s10858-016-0060-y")
#=========================================================================================
# Last code modification
#=========================================================================================
__modifiedBy__ = "$modifiedBy: Ed Brooksbank $"
__dateModified__ = "$dateModified: 2021-07-29 19:59:01 +0100 (Thu, July 29, 2021) $"
__version__ = "$Revision: 3.0.4 $"
#=========================================================================================
# Created
#=========================================================================================
__author__ = "$Author: CCPN $"
__date__ = "$Date: 2017-04-07 10:28:48 +0000 (Fri, April 07, 2017) $"
#=========================================================================================
# Start of code
#=========================================================================================

from ccpn.util import Common as commonUtil
from typing import Sequence


def _getIsotopomerSingleAtomFractions(isotopomers, atomName, subType=1):
  """Descrn: Get the isotope proportions for a names atom in over a set
             of isotopomers. Fractions normalised to 1.0
     Inputs: List of ChemCompLabel.Isotopomers, Word (ChemAtom.name), Word (ChemAtom.subType)
     Output: Dict of Word:Float - IsotopeCode:fraction
  """

  fractionDict = {}
  isoWeightSum =  sum([x.weight for x in isotopomers])

  for isotopomer in isotopomers:
    atomLabels  = isotopomer.findAllAtomLabels(name=atomName,subType=subType)
    atWeightSum = sum([x.weight for x in atomLabels])
    atFactor    = isotopomer.weight  / isoWeightSum

    for atomLabel in atomLabels:
      isotopeCode = atomLabel.isotopeCode
      contrib     = atFactor * atomLabel.weight / atWeightSum
      fractionDict[isotopeCode] = fractionDict.get(isotopeCode,0.0) + contrib
  #
  return fractionDict


def _getIsotopomerAtomPairFractions(isotopomers, atomNames, subTypes=(1,1)):
  """Descrn: Get the combined isotope proportions for a given pair of named
             atoms within a set of isotopomers. Fractions normalised to 1.0
     Inputs: List of ChemCompLabel.Isotopomers, 2-Tuple of Words (ChemAtom.name), 2-Tuple of Words (ChemAtom.subType)
     Output: Dict of Tuple:Float - (IsotopeCode,IsotopeCode):fraction
  """

  fractionDict = {}

  isoWeightSum = sum([x.weight for x in isotopomers])

  atLabels   = [None, None]
  sumWeights = [None, None]

  for isotopomer in isotopomers:
    for i in (0,1):
      atLabels[i] = isotopomer.findAllAtomLabels(name=atomNames[i],
                                                 subType=subTypes[i])
      sumWeights[i] = sum([x.weight for x in atLabels[i]])

    # Done this way to guard against the divisor becoming zero
    factor  = isotopomer.weight / isoWeightSum
    divisor = sumWeights[0] * sumWeights[1]

    for atl0 in atLabels[0]:
      for atl1 in atLabels[1]:

        if atl0 is atl1:
          contrib = atl0.weight * factor / sumWeights[0]
        else:
          contrib = atl0.weight * atl1.weight * factor / divisor

        key = (atl0.isotopeCode, atl1.isotopeCode)
        fractionDict[key] = fractionDict.get(key, 0.0) + contrib
  #
  return fractionDict


def _singleAtomFractions(labeledMixture, resId, atName):
  """get the isotopeCode:fraction dictionary for a labeledMixture

  labeledMixture:  LabeledMixture object
  resid, atName, residue serial and atom name for atom
  Returns isotopeCode:fraction dictionary with fractions normalised to 1.0
  """

  # set up
  molResidue = labeledMixture.labeledMolecule.molecule.findFirstMolResidue(
               serial=resId)
  if molResidue is None:
    return None

  chemAtom = molResidue.chemCompVar.findFirstChemAtom(name=atName)
  if chemAtom is None:
    return None
  elementName = chemAtom.elementSymbol
  subType = chemAtom.subType

  # get MolLabelFractions
  useLabel = labeledMixture.averageComposition
  if not useLabel:
    molLabelFractions = list(labeledMixture.molLabelFractions)
    if len(molLabelFractions) == 1:
      useLabel = molLabelFractions[0]

  if useLabel:
    # average composition
    resLabel = useLabel.molLabel.findFirstResLabel(resId=resId)
    result = _molLabelFractionsDict(resLabel, atName, subType, elementName)

  else:
    # average over multiple molLabels
    molWeightSum = sum([x.weight for x in molLabelFractions])
    result = {}

    for mlf in molLabelFractions:
      resLabel = mlf.molLabel.findFirstResLabel(resId=resId)
      partResult = _molLabelFractionsDict(resLabel, atName, subType, elementName)
      for key, val in partResult.items():
        result[key] = result.get(key, 0.0) + val * mlf.weight / molWeightSum
  #
  return result


def _atomPairFractions(labeledMixture, resIds, atNames,):
  """get the isotope pair : fraction dictionary for a labeledMixture

  labeledMixture:  LabeledMixture object
  resIds: length-two tuple of residue serials
  atNames: length-two tuple of atom names

  Returns (isotopeCode1, isotopeCode2):fraction dictionary
  with fractions normalised to 1.0
  """

  result = {}

  if len(resIds) != 2:
    raise("Error: length of resIds %s must be 2" % resIds)
  if len(atNames) != 2:
    raise("Error: length of atNames %s must be 2"
          % atNames)

  # calculate starting parameters
  elementNames = []
  subTypes = []
  for ii in (0,1):
    molResidue = labeledMixture.labeledMolecule.molecule.findFirstMolResidue(
                 serial=resIds[ii])
    if molResidue is None:
      return None

    chemAtom = molResidue.chemCompVar.findFirstChemAtom(name=atNames[ii])
    if chemAtom is None:
      return None
    elementName = chemAtom.elementSymbol
    elementNames.append(elementName)
    subTypes.append(chemAtom.subType)

  # get MolLabelFractions
  avLabel = labeledMixture.averageComposition
  if avLabel:
    molLabelFractions = [avLabel]
  else:
    molLabelFractions = labeledMixture.molLabelFractions
  molWeightSum = sum([x.weight for x in molLabelFractions])

  # calculate result
  for mlf in molLabelFractions:
    molFactor = mlf.weight / molWeightSum
    molLabel = mlf.molLabel

    uncorrelatedAtoms = True
    if resIds[0] == resIds[1]:
      oneResLabel = molLabel.findFirstResLabel(resId=resIds[0])
      for ii in (0,1):
        if (oneResLabel.findAllAtomLabels(atomName=atNames[ii]) or
            oneResLabel.findAllAtomLabels(elementName=elementNames[ii])):
          uncorrelatedAtoms = False

    if uncorrelatedAtoms:
      # isotope frequencies are uncorrelated at the residue level
      dds = []
      for ii in (0,1):
        resLabel = molLabel.findFirstResLabel(resId=resIds[ii])
        dds.append(_molLabelFractionsDict(resLabel, atNames[ii],
                                     subTypes[ii], elementNames[ii]))

      for iso0 in dds[0]:
        for iso1 in dds[1]:
          key = (iso0, iso1)
          contrib = dds[0][iso0] * dds[1][iso1] * molFactor
          result[key] = result.get(key, 0.0) + contrib

    else:
      # isotope frequencies are correlated at the residue level
      # Only happens if both are from the same residue and neither has
      # any AtomLabels. Loop over ResLabelFractions only
      resLabelFractions = oneResLabel.resLabelFractions
      rlfWeightSum = sum([x.weight for x in resLabelFractions])
      for rlf in resLabelFractions:
        partResult = _getIsotopomerAtomPairFractions(rlf.iotopomers, atNames, subTypes)
        for key, val in partResult.items():
          contrib = val * rlf.weight * molFactor / rlfWeightSum
          result[key] = result.get(key, 0.0) + contrib
  #
  return result


def _molLabelFractionsDict(resLabel, atName, subType, elementName):
  """get the isotopeCode:fraction dictionary for a single resLabel

  resLabel: resLabel object
  resid residue serial
  atName, subType, elementName: atom name subType and element name for atom

  Returns isotopeCode:fraction dictionary with fractions normalised to 1.0
  """
  # set up
  result = {}

  # get atomLabels, if any
  atomLabels = resLabel.findAllAtomLabels(atomName=atName)
  if not atomLabels:
    atomLabels = resLabel.findAllAtomLabels(elementName=elementName)

  # calculate fractions for AtomLabels
  if atomLabels:
    atWeightSum = sum([x.weight for x in atomLabels])
    for atomLabel in atomLabels:
      isotopeCode = '%s%s' % (atomLabel.massNumber, elementName)
      result[isotopeCode] = (result.get(isotopeCode, 0.0) +
                             atomLabel.weight / atWeightSum)

  else:
    # calculate fractions for ResLabelFractions
    resLabelFractions = resLabel.resLabelFractions
    rlfWeightSum = sum([x.weight for x in resLabelFractions])

    for rlf in resLabelFractions:
      isotopomers = rlf.isotopomers
      isoFactor = rlf.weight  / rlfWeightSum

      fractionDict = _getIsotopomerSingleAtomFractions(isotopomers, atName, subType)

      for isotopeCode in fractionDict.keys():
        contrib = fractionDict[isotopeCode]
        result[isotopeCode] = result.get(isotopeCode,0.0) + (isoFactor * contrib)
  #
  return result

[docs]def molAtomLabelFractions(labeling:str, molResidue, atomName:str) -> dict: """get isotopeCode:percentage mapping for atom in molResidue, with given labeling Will use molecule specific labeling if one exists, otherwise general labeling scheme.""" result = {} labeledMolecule = molResidue.root.findFirstLabeledMolecule(name=molResidue.molecule.name) if labeledMolecule: labeledMixture = labeledMolecule.findFirstLabeledMixture(name=labeling) if labeledMixture: result = _singleAtomFractions(labeledMixture, molResidue.serial, atomName) if not result: result = chemAtomLabelFractions(molResidue.root, labeling, molResidue.ccpCode, atomName) # return result
[docs]def molAtomLabelPairFractions(labeling:str, molResiduePair:Sequence, atomNamePair:Sequence) -> dict: """get isotopeCode:percentage mapping for atom (molResidue, atomName) pair with given labeling Will use molecule specific labeling if one exists, otherwise general labeling scheme.""" assert len(molResiduePair) == 2, "molAtomLabelPairFractions: length of molResidues must be 2" assert len(atomNamePair) == 2, "molAtomLabelPairFractions: length of atomNames must be 2" if molResiduePair[0].molecule is not molResiduePair[1].molecule: raise ValueError("molResidues must belong to the same molecule") result = {} molResidue0 = molResiduePair[0] labeledMolecule = molResidue0.root.findFirstLabeledMolecule(name=molResidue0.molecule.name) if labeledMolecule: # There is a specifically labeled molecule - use corresponding function labeledMixture = labeledMolecule.findFirstLabeledMixture(name=labeling) if labeledMixture: result = _atomPairFractions(labeledMixture, [x.serial for x in molResiduePair], atomNamePair) if not result: # No specific labeled molecule if molResidue0 is molResiduePair[1]: # intraresidue = use labeling scheme, if any result = chemAtomPairLabelFractions(molResiduePair[0].root, labeling, molResidue0.ccpCode, atomNamePair) else: # Uncorrelated atoms - use product of fractions for each atom result = commonUtil.dictionaryProduct( chemAtomLabelFractions(molResidue0.root, labeling, molResidue0.ccpCode, atomNamePair[0]), chemAtomLabelFractions(molResidue0.root, labeling, molResiduePair[1].ccpCode, atomNamePair[1]) ) # return result
[docs]def chemAtomLabelFractions(project, labeling:str, ccpCode:str, atomName:str) -> dict: """get isotopeCode:percentage mapping for atom in ChemComp with given labeling""" result = {} labelingScheme = project.findFirstLabelingScheme(name=labeling) if labelingScheme: chemCompLabel = labelingScheme.findFirstChemCompLabel(ccpCode=ccpCode) if chemCompLabel: result = _getIsotopomerSingleAtomFractions(chemCompLabel.isotopomers, atomName) # return result
[docs]def chemAtomPairLabelFractions(project, labeling:str, ccpCode:str, atomNamePair:Sequence) -> dict: """get isotopeCode:percentage mapping for atom pair in ChemComp with given labeling Assumes that atoms are in the same residue""" result = {} labelingScheme = project.findFirstLabelingScheme(name=labeling) if labelingScheme: chemCompLabel = labelingScheme.findFirstChemCompLabel(ccpCode=ccpCode) if chemCompLabel: result = _getIsotopomerAtomPairFractions(chemCompLabel.isotopomers, atomNamePair) # return result
[docs]def sampleChainLabeling(sample, chainCode:str) -> str: """Get labeling string for chain chainCode in sample If chainCode does not match a SampleComponent, look for unambiguous global labeling: Heuristics: If there is only one SampleComponent, use that labeling If all SampleComponents with explicit chainCodes have the same labeling, use that labeling""" from ccpn.util.Constants import DEFAULT_LABELLING labeling = DEFAULT_LABELLING sampleComponents = sample.sortedSampleComponents() if len(sampleComponents) == 1: labeling = sampleComponents[0].labeling else: for sampleComponent in sampleComponents: if chainCode in sampleComponent.chainCodes: labeling = sampleComponent.labeling break else: labelings = [x.labeling for x in sample.sampleComponents if x.chainCodes] if len(labelings) == 1: # Only one labeling in use in sample - use it labeling = labelings.pop() # return labeling