Source code for ccpn.core.lib.AxisCodeLib

"""
Module Documentation here
"""
#=========================================================================================
# Licence, Reference and Credits
#=========================================================================================
__copyright__ = "Copyright (C) CCPN project (http://www.ccpn.ac.uk) 2014 - 2021"
__credits__ = ("Ed Brooksbank, Joanna Fox, Victoria A Higman, Luca Mureddu, Eliza Płoskoń",
               "Timothy J Ragan, Brian O Smith, Gary S Thompson & Geerten W Vuister")
__licence__ = ("CCPN licence. See http://www.ccpn.ac.uk/v3-software/downloads/license")
__reference__ = ("Skinner, S.P., Fogh, R.H., Boucher, W., Ragan, T.J., Mureddu, L.G., & Vuister, G.W.",
                 "CcpNmr AnalysisAssign: a flexible platform for integrated NMR analysis",
                 "J.Biomol.Nmr (2016), 66, 111-124, http://doi.org/10.1007/s10858-016-0060-y")
#=========================================================================================
# Last code modification
#=========================================================================================
__modifiedBy__ = "$modifiedBy: Ed Brooksbank $"
__dateModified__ = "$dateModified: 2021-09-24 17:14:14 +0100 (Fri, September 24, 2021) $"
__version__ = "$Revision: 3.0.4 $"
#=========================================================================================
# Created
#=========================================================================================
__author__ = "$Author: Ed Brooksbank $"
__date__ = "$Date: 2021-06-28 18:11:16 +0100 (Mon, June 28, 2021) $"
#=========================================================================================
# Start of code
#=========================================================================================
import itertools
from collections import OrderedDict


def _matchSingleAxisCodeLength(code1, code2):
    """return a score based on the mismatch in length
    """
    lenDiff = abs(len(code1) - len(code2))

    return (100 + 800 // (lenDiff + 1))


def _matchSingleAxisCode(code1: str = None, code2: str = None, exactMatch: bool = False, allowLowercase=True, mismatch=0) -> int:
    """number of matching characters
    code1, code2 = strings
    e.g. 'Hn1', 'H1'

    Compare single axis codes

    Must always be upper case first letter

    more matching letters = higher code
    difference in length reduces match

    'Jab' matches 'J' or 'Jab...', but NOT 'Ja...'      ie, 1 or 3 or more letter match

    MQ gets no match - only with other MQ axis codes

    Hn* always matches Hcn*

    :param code1: first axis code to compare
    :param code2: second axis code to compare
    :param exactMatch: only allow exact matches, True/False
    :return: score based on the match
    """
    if mismatch > 0:
        raise ValueError('mismatch cannot be greater than 0')

    # undefined codes
    if not code1 or not code2 or (code1[0].islower() and code2[0].islower() and not allowLowercase):
        return mismatch

    # if exactMatch is True then only test for exact match
    if exactMatch:
        return 1100 if (code1 == code2) else 0

    ms = [a for a in zip(code1, code2)]  # zips to the shortest string
    ss = 0

    # add extra tests from v2.4
    if (code1.startswith('MQ') and not code2.startswith('MQ')) or (code2.startswith('MQ') and not code1.startswith('MQ')):
        return mismatch
    # char followed by digit already accounted for

    # get count of matching characters - more characters -> higher score
    for a, b in ms:
        if a != b:
            break
        ss += 1

    # another v2.4 test
    if ss:
        if ((code1.startswith('Hn') and code2.startswith('Hcn')) or
                (code1.startswith('Hcn') and code2.startswith('Hn'))):
            # Hn must always match Hcn, give it a high score
            ss += 500

        if code1.startswith('J'):
            if ss == 2:  # must be a 1, or (3 or more) letter match
                return mismatch

        ss += _matchSingleAxisCodeLength(code1, code2)
    return (1000 + ss) if ss else mismatch


def _axisCodeMapIndices(axisCodes, refAxisCodes, checkBoundAtoms=True, allMatches=False, exactMatch=False):
    """get mapping tuple so that axisCodes[result[ii]] matches refAxisCodes[ii]
    all axisCodes must match, but result can contain None if refAxisCodes is longer
    if axisCodes contain duplicates, you will get one of possible matches
    """
    # CCPNINTERNAL - used in multiple places to map display order and spectrum order

    mismatch = -1000
    lenDifference = len(refAxisCodes) - len(axisCodes)

    # get the individual scores for the matching axisCodes
    matches = []
    for code in axisCodes:
        matches.append([_matchSingleAxisCode(code, x, exactMatch=exactMatch, mismatch=mismatch) for x in refAxisCodes])

    values = list(range(len(axisCodes))) + [None] * lenDifference
    _results = []
    for perm in itertools.permutations(values):
        perm = list(perm)
        score = 0
        for ii, jj in enumerate(perm):
            if jj is not None and ii < len(refAxisCodes):
                _score = matches[jj][ii]
                if _score <= 0:
                    perm[ii] = None
                score += _score
        if score > 0:
            _results.append((score, tuple(perm)))

    if _results:
        if checkBoundAtoms:
            # bound atoms matching - make a dict of matching atoms in axisCodes and refAxisCodes
            boundCodeDicts = []
            for tryCodes in axisCodes, refAxisCodes:
                boundCodes = {}
                boundCodeDicts.append(boundCodes)
                for ii, code in enumerate(tryCodes):
                    if len(code) > 1:
                        for jj in range(ii + 1, len(tryCodes)):
                            code2 = tryCodes[jj]
                            if len(code2) > 1:
                                # purely matching by checking the upper/lowerCase characters in the string
                                if (code[0].isupper() and code[0].lower() == code2[1] and
                                        code2[0].isupper() and code2[0].lower() == code[1] and
                                        code[2:] == code2[2:]):
                                    # Matches pair of bound atoms - e.g. match Hc - Ch, or Hc1 - Ch1
                                    boundCodes[tryCodes.index(code)] = tryCodes.index(code2)
                                    boundCodes[tryCodes.index(code2)] = tryCodes.index(code)

            if boundCodeDicts[0] and boundCodeDicts[1]:
                # bound pairs on both sides - check for matching pairs
                _bounds = []
                for score, perm in _results:
                    for idx1, idx2 in boundCodeDicts[1].items():
                        target = perm[idx1]
                        if target is not None and target == boundCodeDicts[0].get(perm[idx2]):
                            # if there is a match then increase the score
                            score *= 2
                            break
                    _bounds.append((score, perm))
                _results = _bounds

        _results = sorted(_results, reverse=True, key=lambda val: val[0])
        if allMatches:
            return tuple(res[1] for res in _results if res)
        else:
            return _results and _results[0] and _results[0][1]


[docs]def getAxisCodeMatch(axisCodes, refAxisCodes, exactMatch=False, allMatches=False, checkBoundAtoms=False) -> OrderedDict: """Return an OrderedDict containing the mapping from the refAxisCodes to axisCodes There may be multiple matches, or None for each axis code. Set allMatches to True to return all, or False for only the best match in each case e.g. for unique axis codes: getAxisCodeMatch(('Hn', 'Nh', 'C'), ('Nh', 'Hn'), allMatches=False) -> { 'Hn': 'Hn' 'Nh': 'Nh' 'C' : None } getAxisCodeMatch(('Hn', 'Nh', 'C'), ('Nh', 'Hn'), allMatches=True) -> { 'Hn': ('Hn',) 'Nh': ('Nh',) 'C' : () } for similar repeated axis codes, possibly from matching isotopeCodes: getAxisCodeMatch(('Nh', 'H'), ('H', 'H1', 'N'), allMatches=False) -> { 'Nh': 'N' 'H' : 'H' } getAxisCodeMatch(('Nh', 'H'), ('H', 'H1', 'N'), allMatches=True) -> { 'Nh': ('N',) 'H' : ('H', 'H1') <- in this case the first match is always the highest } """ _found = OrderedDict() _matches = _axisCodeMapIndices(axisCodes, refAxisCodes, checkBoundAtoms=checkBoundAtoms, allMatches=allMatches, exactMatch=exactMatch) if allMatches: for _match in _matches or (): for ii, ind in enumerate(_match): if ind is not None and ii < len(refAxisCodes): if axisCodes[ind] in _found: if refAxisCodes[ii] not in _found[axisCodes[ind]]: _found[axisCodes[ind]] += (refAxisCodes[ii],) else: _found[axisCodes[ind]] = (refAxisCodes[ii],) return OrderedDict([(axis, _found.get(axis) or ()) for axis in axisCodes]) elif _matches: # _matches is a single item for ii, ind in enumerate(_matches): if ind is not None and ii < len(refAxisCodes): _found[axisCodes[ind]] = refAxisCodes[ii] return OrderedDict([(axis, _found.get(axis)) for axis in axisCodes]) return OrderedDict()
[docs]def getAxisCodeMatchIndices(axisCodes, refAxisCodes, exactMatch=False, allMatches=False, checkBoundAtoms=False): """Return a tuple containing the indices for each axis code in axisCodes in refAxisCodes Only the best match is returned for each code, elements not found in refAxisCodes will be marked as 'None' e.g. for unique axis codes: indices = getAxisCodeMatchIndices(('Hn', 'Nh', 'C'), ('Nh', 'Hn')) -> (1, 0, None) i.e axisCodes[0] = 'Hn' which maps to refAxisCodes[indices[0]] = 'Hn' for similar repeated axis codes, possibly from matching isotopeCodes: getAxisCodeMatchIndices(('Nh', 'H'), ('H', 'H1', 'N')) -> (2, 0) """ _found = OrderedDict() _matches = _axisCodeMapIndices(axisCodes, refAxisCodes, checkBoundAtoms=checkBoundAtoms, allMatches=allMatches, exactMatch=exactMatch) if allMatches: for _match in _matches or (): for ii, ind in enumerate(_match): if ind is not None and ii < len(refAxisCodes): if axisCodes[ind] in _found: if ii not in _found[axisCodes[ind]]: _found[axisCodes[ind]] += (ii,) else: _found[axisCodes[ind]] = (ii,) return tuple(_found.get(axis) or () for axis in axisCodes) elif _matches: # _matches is a single item for ii, ind in enumerate(_matches): if ind is not None and ii < len(refAxisCodes): _found[axisCodes[ind]] = ii return tuple(_found.get(axis) for axis in axisCodes) return ()
[docs]def axisCodeMatch(axisCode, refAxisCodes): """Get refAxisCode that best matches axisCode """ for ii, indx in enumerate(_axisCodeMapIndices([axisCode], refAxisCodes)): if indx == 0: # We have a match return refAxisCodes[ii] else: return None
[docs]def doAxisCodesMatch(axisCodes, refAxisCodes): """Return True if axisCodes match refAxisCodes else False""" if len(axisCodes) != len(refAxisCodes): return False for code1, code2 in zip(axisCodes, refAxisCodes): if not _matchSingleAxisCode(code1, code2): return False return True