Source code for ccpnmodel.ccpncore.lib.Io.Fasta
"""Code for reading Fasta format files
"""
#=========================================================================================
# Licence, Reference and Credits
#=========================================================================================
__copyright__ = "Copyright (C) CCPN project (http://www.ccpn.ac.uk) 2014 - 2017"
__credits__ = ("Wayne Boucher, Ed Brooksbank, Rasmus H Fogh, Luca Mureddu, Timothy J Ragan & Geerten W Vuister")
__licence__ = ("CCPN licence. See http://www.ccpn.ac.uk/v3-software/downloads/license",
"or ccpnmodel.ccpncore.memops.Credits.CcpnLicense for licence text")
__reference__ = ("For publications, please use reference from http://www.ccpn.ac.uk/v3-software/downloads/license",
"or ccpnmodel.ccpncore.memops.Credits.CcpNmrReference")
#=========================================================================================
# Last code modification
#=========================================================================================
__modifiedBy__ = "$modifiedBy: CCPN $"
__dateModified__ = "$dateModified: 2017-07-07 16:33:13 +0100 (Fri, July 07, 2017) $"
__version__ = "$Revision: 3.0.0 $"
#=========================================================================================
# Created
#=========================================================================================
__author__ = "$Author: CCPN $"
__date__ = "$Date: 2017-04-07 10:28:48 +0000 (Fri, April 07, 2017) $"
#=========================================================================================
# Start of code
#=========================================================================================
[docs]def parseFastaFile(inputFile):
"""Parse Fasta file and return sequences"""
sequences = []
with open(inputFile, 'r') as f:
chains = []
lines = [line.strip() for line in f.readlines() if line.strip()]
for line in lines:
if line and line[0] == '>':
chains.append(lines.index(line))
for chain in chains :
name = lines[chain][1:].lstrip().split()[0] # the [1:] to eliminate the '>'
index = chains.index(chain)
if not index == len(chains)-1:
endIndex = chains[index+1]
sequences.append([name, ''.join(lines[chain+1:endIndex])])
else:
sequences.append([name, ''.join(lines[chain+1:])])
return sequences