# Copyright 2009 by David Winter. All rights reserved. # This code is part of the Biopython distribution and governed by its # license. Please see the LICENSE file that should have been included # as part of this package. import os import sys import unittest from Bio import MissingExternalDependencyError from Bio import AlignIO from Bio.Nexus import Trees # One day we should use planned TreeIO module from Bio.Emboss.Applications import FDNADistCommandline, FNeighborCommandline from Bio.Emboss.Applications import FSeqBootCommandline, FProtDistCommandline from Bio.Emboss.Applications import FProtParsCommandline, FConsenseCommandline from Bio.Emboss.Applications import FTreeDistCommandline, FDNAParsCommandline #Try to avoid problems when the OS is in another language os.environ['LANG'] = 'C' exes_wanted = ['fdnadist', 'fneighbor', 'fprotdist', 'fprotpars', 'fconsense', 'fseqboot', 'ftreedist', 'fdnapars'] exes = dict() # Dictionary mapping from names to exe locations if "EMBOSS_ROOT" in os.environ: #Windows default installation path is C:\mEMBOSS which contains the exes. #EMBOSS also sets an environment variable which we will check for. path = os.environ["EMBOSS_ROOT"] if os.path.isdir(path): for name in exes_wanted: if os.path.isfile(os.path.join(path, name+".exe")): exes[name] = os.path.join(path, name+".exe") del path, name if sys.platform!="win32": from Bio._py3k import getoutput for name in exes_wanted: #This will "just work" if installed on the path as normal on Unix output = getoutput("%s -help" % name) if "not found" not in output and "not recognized" not in output: exes[name] = name del output del name if len(exes) < len(exes_wanted): raise MissingExternalDependencyError( "Install the Emboss package 'PhylipNew' if you want to use the "+ "Bio.Emboss.Applications wrappers for phylogenetic tools.") ########################################################################### # A few top level functions that are called repeatedly in the test cases def write_AlignIO_dna(): """Convert opuntia.aln to a phylip file""" assert 1 == AlignIO.convert("Clustalw/opuntia.aln", "clustal", "Phylip/opuntia.phy", "phylip") def write_AlignIO_protein(): """Convert hedgehog.aln to a phylip file""" assert 1 == AlignIO.convert("Clustalw/hedgehog.aln", "clustal", "Phylip/hedgehog.phy", "phylip") def clean_up(): """Delete tests files (to be used as tearDown() function in test fixtures)""" for filename in ["test_file", "Phylip/opuntia.phy", "Phylip/hedgehog.phy"]: if os.path.isfile(filename): os.remove(filename) def parse_trees(filename): """Helper function until we have Bio.Phylo on trunk.""" data = open("test_file", "r").read() for tree_str in data.split(";\n"): if tree_str: yield Trees.Tree(tree_str+";") class DistanceTests(unittest.TestCase): """Tests for calculating distance based phylogenetic trees with phylip""" def tearDown(self): clean_up() test_taxa = ['Archaeohip', 'Calippus', 'Hypohippus', 'M._secundu', 'Merychippu', 'Mesohippus', 'Nannipus', 'Neohippari', 'Parahippus', 'Pliohippus'] def distances_from_alignment(self, filename, DNA = True): """check we can make distance matrix from a given alignment""" self.assertTrue(os.path.isfile(filename), "Missing %s" % filename) if DNA: cline = FDNADistCommandline(exes["fdnadist"], method = 'j', sequence= filename, outfile = "test_file", auto = True) else: cline = FProtDistCommandline(exes["fprotdist"], method = 'j', sequence= filename, outfile = "test_file", auto = True) stdout, strerr = cline() #biopython can't grok distance matrices, so we'll just check it exists self.assertTrue(os.path.isfile("test_file")) def tree_from_distances(self, filename): """Check we can estimate a tree from a distance matrix""" self.assertTrue(os.path.isfile(filename), "Missing %s" % filename) cline = FNeighborCommandline(exes["fneighbor"], datafile = filename, outtreefile = "test_file", auto= True, filter = True) stdout, stderr = cline() for tree in parse_trees("test_file"): tree_taxa = [t.replace(" ", "_") for t in tree.get_taxa()] self.assertEqual(self.test_taxa, sorted(tree_taxa)) def test_distances_from_phylip_DNA(self): """Calculate a distance matrix from an phylip alignment""" self.distances_from_alignment("Phylip/horses.phy") def test_distances_from_AlignIO_DNA(self): """Calculate a distance matrix from an alignment written by AlignIO""" write_AlignIO_dna() self.distances_from_alignment("Phylip/opuntia.phy") #def test_distances_from_bootstrapped_phylip_DNA(self): # """Calculate a set of distance matrices from phylip alignments""" # self.distances_from_alignment("Phylip/bs_horses.phy") # fprotdist tests def test_distances_from_protein_phylip(self): """Calculate a distance matrix from phylip protein alignment""" self.distances_from_alignment("Phylip/interlaced.phy", DNA=False) def test_distances_from_protein_AlignIO(self): """Calculate distance matrix from an AlignIO written protein alignment""" write_AlignIO_protein() self.distances_from_alignment("Phylip/hedgehog.phy", DNA=False) #def test_distances_from_bootstrapped_phylip_protein(self): # """Calculate distance matrices from a bootstrapped protein alignment""" # self.distances_from_alignment("Clustalw/bs_interlaced.phy", DNA=False) # fneighbor tests #def test_tree_from_distances(self): # """Estimate tree from distance matrix and parse it.""" # self.tree_from_distances("Phylip/horses.fdnadist") # This one won't work because of a bug in EMBOSS 6.0.1 #def test_tree_from_bootstrapped_distances(self): # """Estimate tree from bootstrapped distance matrix and parse it""" # self.tree_from_distances("Phylip/bs_horses.fdnadist") class ParsimonyTests(unittest.TestCase): """Tests for estimating parsimony based phylogenetic trees with phylip""" def tearDown(self): clean_up() def parsimony_tree(self, filename, format, DNA=True): """Estimate a parsimony tree from an alignment""" self.assertTrue(os.path.isfile(filename), "Missing %s" % filename) if DNA: cline = FDNAParsCommandline(exes["fdnapars"], sequence = filename, outtreefile = "test_file", auto= True, stdout=True) else: cline = FProtParsCommandline(exes["fprotpars"], sequence = filename, outtreefile = "test_file", auto= True, stdout=True) stdout, stderr = cline() a_taxa = [s.name.replace(" ", "_") for s in next(AlignIO.parse(open(filename, "r"), format))] for tree in parse_trees("test_file"): t_taxa = [t.replace(" ", "_") for t in tree.get_taxa()] self.assertEqual(sorted(a_taxa), sorted(t_taxa)) # fdnapars tests #def test_parsimony_tree_from_phylip_DNA(self): # """Make a parsimony tree from a phylip DNA alignment""" # self.parsimony_tree("Phylip/horses.phy", "phylip") def test_parsimony_tree_from_AlignIO_DNA(self): """Make a parsimony tree from an alignment written with AlignIO""" write_AlignIO_dna() self.parsimony_tree("Phylip/opuntia.phy", "phylip") #def test_parsimony_bootstrapped_phylip_DNA(self): # """Make a parsimony tree from a bootstrapped phylip DNA alignment""" # self.parsimony_tree("Phylip/bs_horses.phy", "phylip") # fprotpars tests #def test_parsimony_tree_from_phylip_protein(self): # """Make a parsimony tree from a phylip DNA alignment""" # self.parsimony_tree("Phylip/interlaced.phy", "phylip", DNA=False) def test_parsimony_from_AlignIO_protein(self): """Make a parsimony tree from protein alignment written with AlignIO""" write_AlignIO_protein() self.parsimony_tree("Phylip/interlaced.phy", "phylip", DNA=False) #def test_parsimony_tree_bootstrapped_phylip_protein(self): # """Make a parsimony tree from a phylip DNA alignment""" # self.parsimony_tree("Phylip/bs_interlaced.phy", "phylip", DNA=False) class BootstrapTests(unittest.TestCase): """Tests for pseudosampling alignments with fseqboot""" def tearDown(self): clean_up() def check_bootstrap(self, filename, format, align_type="d"): """Check we can use fseqboot to pseudosample an alignment The align_type type argument is passed to the commandline object to set the output format to use (from [D]na,[p]rotein and [r]na ) """ self.assertTrue(os.path.isfile(filename), "Missing %s" % filename) cline = FSeqBootCommandline(exes["fseqboot"], sequence = filename, outfile = "test_file", seqtype = align_type, reps = 2, auto = True, filter = True) stdout, stderr = cline() # the resultant file should have 2 alignments... bs = list(AlignIO.parse(open("test_file", "r" ), format)) self.assertEqual(len(bs), 2) # ..and each name in the original alignment... a_names = [s.name.replace(" ", "_") for s in AlignIO.read(open(filename, "r"), format)] # ...should be in each alignment in the bootstrapped file for a in bs: self.assertEqual(a_names, [s.name.replace(" ", "_") for s in a]) def test_bootstrap_phylip_DNA(self): """Pseudosample a phylip DNA alignment""" self.check_bootstrap("Phylip/horses.phy", "phylip") def test_bootstrap_AlignIO_DNA(self): """Pseudosample a phylip DNA alignment written with AlignIO""" write_AlignIO_dna() self.check_bootstrap("Phylip/opuntia.phy", "phylip") def test_bootstrap_phylip_protein(self): """Pseudosample a phylip protein alignment""" self.check_bootstrap("Phylip/interlaced.phy", "phylip", "p") def test_bootstrap_AlignIO_protein(self): """Pseudosample a phylip protein alignment written with AlignIO""" write_AlignIO_protein() self.check_bootstrap("Phylip/hedgehog.phy", "phylip", "p") class TreeComparisonTests(unittest.TestCase): """Tests for comparing phylogenetic trees with phylip tools""" def tearDown(self): clean_up() def test_fconsense(self): """Calculate a consensus tree with fconsense""" cline = FConsenseCommandline(exes["fconsense"], intreefile = "Phylip/horses.tree", outtreefile = "test_file", auto = True, filter = True) stdout, stderr = cline() #Split the next and get_taxa into two steps to help 2to3 work tree1 = next(parse_trees("test_file")) taxa1 = tree1.get_taxa() for tree in parse_trees("Phylip/horses.tree"): taxa2 = tree.get_taxa() self.assertEqual(sorted(taxa1), sorted(taxa2)) def test_ftreedist(self): """Calculate the distance between trees with ftreedist""" cline = FTreeDistCommandline(exes["ftreedist"], intreefile = "Phylip/horses.tree", outfile = "test_file", auto = True, filter = True) stdout, stderr = cline() self.assertTrue(os.path.isfile("test_file")) if __name__ == "__main__": runner = unittest.TextTestRunner(verbosity = 2) unittest.main(testRunner=runner) clean_up()