"""Labelling scheme handling. Functions to get labelling fraction
"""
#=========================================================================================
# Licence, Reference and Credits
#=========================================================================================
__copyright__ = "Copyright (C) CCPN project (http://www.ccpn.ac.uk) 2014 - 2019"
__credits__ = ("Ed Brooksbank, Luca Mureddu, Timothy J Ragan & Geerten W Vuister")
__licence__ = ("CCPN licence. See http://www.ccpn.ac.uk/v3-software/downloads/license")
__reference__ = ("Skinner, S.P., Fogh, R.H., Boucher, W., Ragan, T.J., Mureddu, L.G., & Vuister, G.W.",
"CcpNmr AnalysisAssign: a flexible platform for integrated NMR analysis",
"J.Biomol.Nmr (2016), 66, 111-124, http://doi.org/10.1007/s10858-016-0060-y")
#=========================================================================================
# Last code modification
#=========================================================================================
__modifiedBy__ = "$modifiedBy: CCPN $"
__dateModified__ = "$dateModified: 2017-07-07 16:32:32 +0100 (Fri, July 07, 2017) $"
__version__ = "$Revision: 3.0.0 $"
#=========================================================================================
# Created
#=========================================================================================
__author__ = "$Author: CCPN $"
__date__ = "$Date: 2017-04-07 10:28:41 +0000 (Fri, April 07, 2017) $"
#=========================================================================================
# Start of code
#=========================================================================================
import typing
from ccpn.core.Atom import Atom
from ccpn.core.Sample import Sample
from ccpn.core.SampleComponent import SampleComponent
[docs]def getLabellingScheme(schemeName: str) -> typing.Optional[typing.Dict]:
"""Get labelling scheme dictionary from scheme name (or None if none is found
TODO NBNB probably needs more parameters and a different location"""
raise NotImplementedError("getLabellingScheme not yet implemented")
[docs]def sampleComponentLabelledFraction(sampleComponent: SampleComponent,
atom2IsotopeCode: typing.Dict[Atom, str]
) -> float:
"""Get labelled fraction from SampleComponent.
All atoms must be from a chain compatible with SampleComponent.
NB for x/y and % wildcard atoms, labelling is averaged over the
component atoms. If only some of these are specifically labelled or
in a labelling scheme, the rest will be assumed to have the
default labelling of the sampleComponent. This may sometimes give
misleading results, but it is the best available default behaviour"""
result = 1.0
isotopeCode2Fraction = sampleComponent.isotopeCode2Fraction
processData = {}
substance = sampleComponent.substance
chains = sampleComponent.chains
for atom, isotopeCode in atom2IsotopeCode.items():
residue = atom.residue
chain = residue.chain
if chain not in chains:
raise ValueError("%s does not match any of the chains for %s" % (atom, sampleComponent))
# fraction serves as a sentinel, to check if there was specific labelling
fraction = None
if substance:
# Go from Atom to a list of component atoms, to catch wildcard atoms
atoms = [atom]
ll = []
for aa in atoms:
ll2 = aa.componentAtoms
if ll2:
atoms.extend(ll2)
else:
ll.append(aa)
atoms = ll
# Sum specific labelling over wildcard component atoms
summa = 0.0
count = 0
for aa in atoms:
dd = substance.getSpecificAtomLabelling(aa)
if dd:
# There is a specific labelling for this atom
fract = dd.get(isotopeCode)
if fract is None:
raise ValueError("isotopeCode %s not recognised in specificAtomLabelling for %s"
% (isotopeCode, atom))
else:
count += 1
summa += fract
if count:
# At least some atoms were specifically labelled. Use them
fraction = summa / len(atoms)
if count < len(atoms):
# Not all atoms were specifically labelled,
# Could happen e.g. when checking HG% for a Valine with only one methyl group
# specifically labelled, or for unsuitable labelling patterns
# Assume default labeling for the rest.
# This may give misleading results, but it is the best we can reasonably do.
fract = isotopeCode2Fraction.get(isotopeCode)
if fract is None:
raise ValueError("isotopeCode %s not recognised for %s"
% (isotopeCode, atom))
else:
fraction += fract * (1 - count / len(atoms))
if fraction is None:
# Most common case - put in dta structure to process below
dd2 = processData.get(residue, {})
dd2[atom.name] = isotopeCode
processData[residue] = dd2
else:
# include fraction in result
result *= fraction
# Process atoms that were not specifically labelled
labelling = sampleComponent.labelling
if labelling:
labellingScheme = getLabellingScheme(labelling)
else:
labellingScheme = None
for residue, atomName2IsotopeCode in processData.items():
fraction = residueLabelledFraction(atomName2IsotopeCode=atomName2IsotopeCode,
residueType=residue.residueType,
labellingScheme=labellingScheme,
isotopeCode2Fraction=isotopeCode2Fraction)
# include fraction in result
result *= fraction
#
return result
[docs]def residueLabelledFraction(atomName2IsotopeCode: typing.Dict[str, str], residueType: str = None,
labellingScheme: typing.Dict = None,
isotopeCode2Fraction: typing.Dict[str, float] = None,
) -> float:
"""Get fraction matching isotopeCodes for atoms in a single residue from labeling scheme and/or
uniform labeling. Position-specific labeling is NOT considered. If residueType or labellingScheme
is absent, or if an atom name is not found in the labelling scheme, the isotopeCode2Fraction
dictionary is used for that atom(s). If any atom has no labelling information anywhere, the
function returns None.
:params atomName2IsotopeCode {atomName:isotopeCode} dictionary defining the labelling pattern
to evaluate, e.g. {'CA':'13C', 'CB':'12C', 'CG':'13C'}.
The atom name may may end in '%', referring to equivalent atoms e.g. Leu HB%, CD%, HD1% or HD%,
in which case the labelling is averaged over the components.
Non-stereo assigned atoms, e.g. Leu HBx or HDy% will also be averaged, treating
HBx as HB% and HDy% as HD%. This will be misleading if the components have very different
labelling levels, but it is the lesser evil.
:params residueType Type of residue that atoms belong to.
If absent only uniform labelling is considered.
:params labellingScheme Labelling scheme dictionary (see example below).
If absent only uniform labelling is considered.
:params isotopeCode2Fraction gives default uniform isotope
percentages, as for SampleComponent.uniformIsotopeFractions (q.v.).
Example value: {'12C':0.289, '13C':0.711, '1H':0.99985, '2H':0.00015}
| *Example of labeling scheme dictionary*
|
| 'MyScheme': {
| '_name':'MyScheme',
| 'GLY':[
| { '_weight':0.2,
| 'CA':{'13C':0.5, '12C':0.5}
| 'C':{'13C':0.99,'12C':0.01}
| },
| {
| '_weight':0.5,
| 'CA':{'13C':0.0, '12C':1.0}
| 'C':{'13C':0.0,'12C':1.0}
| },
| {
| '_weight':0.3,
| 'C':{'13C':0.95, '12C':0.05},
| 'HA2':{'1H':0.7, '2H':0.3},
| 'HA3':{'1H':0.7, '2H':0.3} }
| ],
| 'ALA': [
| ...
| ],
| ...
| },
|
| The first isotopomer is 50% 13C CA and 99% 13C carbonyl, and defaults to the uniform labelling
| fraction for 1H and 15N.
|
| The second isotopomer is 100% C12 for both CA and carbonyl, and defaults to the uniform
| labelling fraction for 1H and 15N
|
| The third isotopomer is 95% 13C for carbonyl, 30% deuterium labelled (i.e. 70% 1H) for the
| aliphatic protons, and defaults to the uniform labelling fraction for nitrogen, CA, and NH
| protons.
"""
# NB This does NOT support wildcards ending in '*' (used e.g. for DNA "H2'*")
# But the fragility of adding '*' is not worth the limited usefulness
wildCardEndings = ('x%', 'y%', '%', 'x', 'y',)
if residueType and labellingScheme:
isotopomers = labellingScheme.get(residueType)
else:
isotopomers = None
if isotopomers:
result = 0.0
weights = 0.0
for isotopomer in isotopomers:
contribution = 1.0
for name, isotopeCode in atomName2IsotopeCode.items():
dd = isotopomer.get(name)
if dd:
# Isotopomer has data for atom `name`
fraction = dd.get(isotopeCode)
else:
# No data for atom `name`.
fraction = None
for ending in wildCardEndings:
# Check if it is a wildcard atom
if name.endswith(ending):
# Wildcard. average over contained atoms
length = len(ending)
prefix = name[:-length]
summa = 0.0
count = 0
for ss, dd in isotopomer.items():
if ss.startswith(prefix) and ss[-length:].isdigit():
# Atom matches wildcard expression
fract = dd.get(isotopeCode)
if fract is None:
raise ValueError(
"Residue %s, atom %s (from %s) recognised, but no data for isotope %s"
% (residueType, ss, name, isotopeCode)
)
else:
count += 1
summa += fract
if count:
fraction = summa / count
break
if fraction is None:
# Not a wildcard or no matches found - get fraction from isotope
fraction = isotopeCode2Fraction.get(isotopeCode)
if fraction is None:
raise ValueError("Residue %s, atom %s recognised, but no data for isotope %s"
% (residueType, name, isotopeCode))
else:
contribution *= fraction
weight = isotopomer['_weight']
weights += weight
result += contribution * weight
#
result /= weights
else:
# No isotopomers - get labelling from uniform fractions only
result = 1.0
for isotopeCode in atomName2IsotopeCode.values():
fraction = isotopeCode2Fraction.get(isotopeCode)
if fraction is None:
raise ValueError("No data for isotope %s" % isotopeCode)
else:
result *= fraction
#
return result
[docs]def atomLabelledFraction(sample: Sample, atom2IsotopeCode: typing.Dict[Atom, str]) -> float:
"""
Get fraction of cases in sample where the atoms have the
indicated isotopeCodes.
Equivalent groups of atoms e.g. Leu HB%, CD%, HD1% or HD%, will be averaged over their constituent
atoms. Non-stereo assigned atoms, e.g. Leu HBx or HDy% will also be averaged, treating
HBx as HB% and HDy% as HD%. This will be misleading if the components have very different
labelling levels, but it remains the lesser evil.
:params sample Sample to use for determining labeling
:params atom2IsotopeCode {atom:isotopeCode} dictionary for atoms to look at.
"""
# Reorganise input for processing
processData = {}
for atom, isotopeCode in atom2IsotopeCode:
chain = atom.residue.chain
dd = processData.get(chain, {})
dd[atom] = isotopeCode
processData[chain] = dd
result = 1.0
# Get labelledFractions, one chain at a time
sampleComponents = sample.sampleComponents
for chain, dd in processData.items():
chainComponents = [x for x in sampleComponents if chain in x.chains]
if chainComponents:
sampleComponent0 = chainComponents[0]
fraction = sampleComponentLabelledFraction(sampleComponent0, dd)
if len(chainComponents) > 1:
# Multiple components - we need to weigh them by concentration and redetermine fraction
concentration0 = sampleComponent0.concentration
unit0 = sampleComponent0.concentrationUnit
if not concentration0:
raise ValueError("Concentration not set for %s - cannot average over sampleComponents (1)"
% sampleComponent)
weight = concentration0
summa = fraction * concentration0
for sampleComponent in chainComponents[1:]:
concentration = sampleComponent.concentration
if not concentration:
raise ValueError(
"Concentration not set for %s - cannot average over sampleComponents (2)"
% sampleComponent
)
unit = sampleComponent.concentrationUnit
if unit != unit0:
raise ValueError("%s unit %s does not match previous component %s - %s"
% (sampleComponent, unit, sampleComponent0, unit0))
fract = sampleComponentLabelledFraction(sampleComponent, dd)
weight += concentration
summa += fract * concentration
#
fraction = summa / weight
#
result *= fraction
else:
raise ValueError("Chain %s matches no SampleComponent in sample %s" % (chain, sample))
#
return result