# Copyright 2018 by Ariel Aptekmann. # All rights reserved. # # This file is part of the Biopython distribution and governed by your # choice of the "Biopython License Agreement" or the "BSD 3-Clause License". # Please see the LICENSE file that should have been included as part of this # package. """Module for the support of MEME minimal motif format.""" from __future__ import print_function from Bio import Seq from Bio import motifs import math def read(handle): """Parse the text output of the MEME program into a meme.Record object. Examples -------- >>> from Bio.motifs import minimal >>> with open("motifs/meme.out") as f: ... record = minimal.read(f) ... >>> for motif in record: ... print(motif.name, motif.evalue) ... 1 1.1e-22 You can access individual motifs in the record by their index or find a motif by its name: >>> from Bio import motifs >>> with open("motifs/minimal_test.meme") as f: ... record = motifs.parse(f, 'minimal') ... >>> motif = record[0] >>> print(motif.name) KRP >>> motif = record['IFXA'] >>> print(motif.name) IFXA This function wont retrieve instances, as there are none in minimal meme format. """ motif_number = 0 record = Record() _read_version(record, handle) _read_alphabet(record, handle) _read_background(record, handle) while True: for line in handle: if line.startswith("MOTIF"): break else: return record name = line.split()[1] motif_number += 1 length, num_occurrences, evalue = _read_motif_statistics(line, handle) counts = _read_lpm(line, handle) # {'A': 0.25, 'C': 0.25, 'T': 0.25, 'G': 0.25} motif = motifs.Motif(alphabet=record.alphabet, counts=counts) motif.background = record.background motif.length = length motif.num_occurrences = num_occurrences motif.evalue = evalue motif.name = name record.append(motif) assert len(record) == motif_number return record class Record(list): """Class for holding the results of a minimal MEME run.""" def __init__(self): """Initialize record class values.""" self.version = "" self.datafile = "" self.command = "" self.alphabet = None self.background = {} self.sequences = [] def __getitem__(self, key): """Return the motif of index key.""" if isinstance(key, str): for motif in self: if motif.name == key: return motif else: return list.__getitem__(self, key) # Everything below is private def _read_background(record, handle): """Read background letter frequencies (PRIVATE).""" for line in handle: if line.startswith("Background letter frequencies"): break else: raise ValueError("Improper input file. File should contain a line starting background frequencies.") try: line = next(handle) except StopIteration: raise ValueError("Unexpected end of stream: Expected to find line starting background frequencies.") line = line.strip() ls = line.split() A, C, G, T = float(ls[1]), float(ls[3]), float(ls[5]), float(ls[7]) record.background = {"A": A, "C": C, "G": G, "T": T} def _read_version(record, handle): """Read MEME version (PRIVATE).""" for line in handle: if line.startswith("MEME version"): break else: raise ValueError("Improper input file. File should contain a line starting MEME version.") line = line.strip() ls = line.split() record.version = ls[2] def _read_alphabet(record, handle): """Read alphabet (PRIVATE).""" for line in handle: if line.startswith("ALPHABET"): break else: raise ValueError("Unexpected end of stream: Expected to find line starting with 'ALPHABET'") if not line.startswith("ALPHABET= "): raise ValueError("Line does not start with 'ALPHABET':\n%s" % line) line = line.strip().replace("ALPHABET= ", "") if line == "ACGT": al = "ACGT" else: al = "ACDEFGHIKLMNPQRSTVWY" record.alphabet = al def _read_lpm(line, handle): """Read letter probability matrix (PRIVATE).""" counts = [[], [], [], []] for line in handle: freqs = line.split() if len(freqs) != 4: break counts[0].append(int(float(freqs[0]) * 1000000)) counts[1].append(int(float(freqs[1]) * 1000000)) counts[2].append(int(float(freqs[2]) * 1000000)) counts[3].append(int(float(freqs[3]) * 1000000)) c = {} c["A"] = counts[0] c["C"] = counts[1] c["G"] = counts[2] c["T"] = counts[3] return c def _read_motif_statistics(line, handle): """Read motif statistics (PRIVATE).""" # minimal : # letter-probability matrix: alength= 4 w= 19 nsites= 17 E= 4.1e-009 for line in handle: if line.startswith("letter-probability matrix:"): break num_occurrences = int(line.split("nsites=")[1].split()[0]) length = int(line.split("w=")[1].split()[0]) evalue = float(line.split("E=")[1].split()[0]) return length, num_occurrences, evalue def _read_motif_name(handle): """Read motif name (PRIVATE).""" for line in handle: if "sorted by position p-value" in line: break else: raise ValueError("Unexpected end of stream: Failed to find motif name") line = line.strip() words = line.split() name = " ".join(words[0:2]) return name