# Copyright (C) 2011 by Brandon Invergo (b.invergo@gmail.com) # This code is part of the Biopython distribution and governed by its # license. Please see the LICENSE file that should have been included # as part of this package. import unittest import os import os.path from Bio.Phylo.PAML import codeml from Bio.Phylo.PAML._paml import PamlError # Some constants to assist with testing: # This is the number of parameters that should be parsed for each # NSsites site class model SITECLASS_PARAMS = {0: 6, 1: 4, 2: 4, 3: 4, 7: 5, 8: 8} # This is the default number of site classes per NSsites site # class model SITECLASSES = {0: None, 1: 2, 2: 3, 3: 3, 7: 10, 8: 11} class ModTest(unittest.TestCase): align_dir = os.path.join("PAML", "Alignments") tree_dir = os.path.join("PAML", "Trees") ctl_dir = os.path.join("PAML", "Control_files") results_dir = os.path.join("PAML", "Results") working_dir = os.path.join("PAML", "codeml_test") align_file = os.path.join(align_dir, "alignment.phylip") tree_file = os.path.join(tree_dir, "species.tree") bad_tree_file = os.path.join(tree_dir, "bad.tree") out_file = os.path.join(results_dir, "test.out") results_file = os.path.join(results_dir, "bad_results.out") bad_ctl_file1 = os.path.join(ctl_dir, "bad1.ctl") bad_ctl_file2 = os.path.join(ctl_dir, "bad2.ctl") bad_ctl_file3 = os.path.join(ctl_dir, "bad3.ctl") ctl_file = os.path.join(ctl_dir, "codeml", "codeml.ctl") def tearDown(self): """Just in case CODEML creates some junk files, do a clean-up.""" del_files = [self.out_file, "2NG.dN", "2NG.dS", "2NG.t", "codeml.ctl", "lnf", "rst", "rst1", "rub"] for filename in del_files: if os.path.exists(filename): os.remove(filename) if os.path.exists(self.working_dir): for filename in os.listdir(self.working_dir): filepath = os.path.join(self.working_dir, filename) os.remove(filepath) os.rmdir(self.working_dir) def setUp(self): self.cml = codeml.Codeml() def testAlignmentFileIsValid(self): self.assertRaises((AttributeError, TypeError, OSError), codeml.Codeml, alignment = list()) self.cml.alignment = list() self.cml.tree = self.tree_file self.cml.out_file = self.out_file self.assertRaises((AttributeError, TypeError, OSError), self.cml.run) def testAlignmentExists(self): self.assertRaises((EnvironmentError, IOError), codeml.Codeml, alignment = "nonexistent") self.cml.alignment = "nonexistent" self.cml.tree = self.tree_file self.cml.out_file = self.out_file self.assertRaises(IOError, self.cml.run) def testTreeFileValid(self): self.assertRaises((AttributeError, TypeError, OSError), codeml.Codeml, tree = list()) self.cml.alignment = self.align_file self.cml.tree = list() self.cml.out_file = self.out_file self.assertRaises((AttributeError, TypeError, OSError), self.cml.run) def testTreeExists(self): self.assertRaises((EnvironmentError, IOError), codeml.Codeml, tree = "nonexistent") self.cml.alignment = self.align_file self.cml.tree = "nonexistent" self.cml.out_file = self.out_file self.assertRaises(IOError, self.cml.run) def testWorkingDirValid(self): self.cml.tree = self.tree_file self.cml.alignment = self.align_file self.cml.out_file = self.out_file self.cml.working_dir = list() self.assertRaises((AttributeError, TypeError, OSError), self.cml.run) def testOutputFileValid(self): self.cml.tree = self.tree_file self.cml.alignment = self.align_file self.cml.out_file = list() self.assertRaises((AttributeError, ValueError, OSError), self.cml.run) def testOptionExists(self): self.assertRaises((AttributeError, KeyError), self.cml.set_options, xxxx=1) self.assertRaises((AttributeError, KeyError), self.cml.get_option, "xxxx") def testAlignmentSpecified(self): self.cml.tree = self.tree_file self.cml.out_file = self.out_file self.assertRaises((AttributeError, ValueError), self.cml.run) def testTreeSpecified(self): self.cml.alignment = self.align_file self.cml.out_file = self.out_file self.assertRaises((AttributeError, ValueError), self.cml.run) def testOutputFileSpecified(self): self.cml.alignment = self.align_file self.cml.tree = self.tree_file self.assertRaises((AttributeError, ValueError), self.cml.run) def testPamlErrorsCaught(self): self.cml.alignment = self.align_file self.cml.tree = self.bad_tree_file self.cml.out_file = self.out_file self.assertRaises((EnvironmentError, PamlError), self.cml.run) def testCtlFileValidOnRun(self): self.cml.alignment = self.align_file self.cml.tree = self.tree_file self.cml.out_file = self.out_file self.assertRaises((AttributeError, TypeError, OSError), self.cml.run, ctl_file = list()) def testCtlFileExistsOnRun(self): self.cml.alignment = self.align_file self.cml.tree = self.tree_file self.cml.out_file = self.out_file self.assertRaises((EnvironmentError, IOError), self.cml.run, ctl_file = "nonexistent") def testCtlFileValidOnRead(self): self.assertRaises((AttributeError, TypeError, OSError), self.cml.read_ctl_file, list()) self.assertRaises((AttributeError, KeyError), self.cml.read_ctl_file, self.bad_ctl_file1) self.assertRaises(AttributeError, self.cml.read_ctl_file, self.bad_ctl_file2) self.assertRaises(TypeError, self.cml.read_ctl_file, self.bad_ctl_file3) target_options = {"noisy": 0, "verbose": 0, "runmode": 0, "seqtype": 1, "CodonFreq": 2, "ndata": None, "clock": 0, "aaDist": None, "aaRatefile": None, "model": 0, "NSsites": [0], "icode": 0, "Mgene": 0, "fix_kappa": 0, "kappa": 4.54006, "fix_omega": 0, "omega": 1, "fix_alpha": 1, "alpha": 0, "Malpha": 0, "ncatG": None, "getSE": 0, "RateAncestor": 0, "Small_Diff": None, "cleandata": 1, "fix_blength": 1, "method": 0, "rho": None, "fix_rho": None} self.cml.read_ctl_file(self.ctl_file) #Compare the dictionary keys: self.assertEqual(sorted(self.cml._options), sorted(target_options)) for key in target_options: self.assertEqual(self.cml._options[key], target_options[key], "%s: %r vs %r" % (key, self.cml._options[key], target_options[key])) def testCtlFileExistsOnRead(self): self.assertRaises((EnvironmentError, IOError), self.cml.read_ctl_file, ctl_file = "nonexistent") def testResultsValid(self): self.assertRaises((AttributeError, TypeError, OSError), codeml.read, list()) def testResultsExist(self): self.assertRaises((EnvironmentError, IOError), codeml.read, "nonexistent") def testResultsParsable(self): self.assertRaises(ValueError, codeml.read, self.results_file) def testParseSEs(self): res_dir = os.path.join(self.results_dir, "codeml", "SE") for results_file in os.listdir(res_dir): version = results_file.split('-')[1].split('.')[0] version_msg = "Improper parsing for version %s" \ % version.replace('_', '.') results_path = os.path.join(res_dir, results_file) results = codeml.read(results_path) self.assertEqual(len(results), 4, version_msg) self.assertTrue("NSsites" in results, version_msg) models = results["NSsites"] # Only site class model 0 was simulated self.assertEqual(len(models), 1, version_msg) self.assertTrue(0 in models, version_msg) model = models[0] self.assertEqual(len(model), 5, version_msg) self.assertTrue("parameters" in model, version_msg) params = model["parameters"] # There should be one new item in the parameters, "SEs" self.assertEqual(len(params), SITECLASS_PARAMS[0] + 1, version_msg) self.assertTrue("SEs" in params, version_msg) def testParseAllNSsites(self): res_dir = os.path.join(self.results_dir, "codeml", "all_NSsites") for results_file in os.listdir(res_dir): version = results_file.split('-')[1].split('.')[0] version_msg = "Improper parsing for version %s" \ % version.replace('_', '.') results_path = os.path.join(res_dir, results_file) results = codeml.read(results_path) # There should be 4 top-level items: 'codon model', 'model', # 'version', & 'NSsites' self.assertEqual(len(results), 4, version_msg) self.assertTrue("NSsites" in results, version_msg) # There should be 6 NSsites classes: 0, 1, 2, 3, 7 & 8 self.assertEqual(len(results["NSsites"]), 6, version_msg) # Each site class model should have 5 sub-items: 'lnL', 'tree', # 'description', 'parameters', & 'tree length'. It should # have the correct number of parameters also. for model_num in [0, 1, 2, 3, 7, 8]: model = results["NSsites"][model_num] self.assertEqual(len(model), 5, version_msg) self.assertTrue("parameters" in model, version_msg) params = model["parameters"] self.assertEqual(len(params), SITECLASS_PARAMS[model_num], version_msg) self.assertTrue("branches" in params, version_msg) branches = params["branches"] # There are 7 branches in the test case (specific to these # test cases) self.assertEqual(len(branches), 7, version_msg) if "site classes" in params: self.assertEqual(len(params["site classes"]), SITECLASSES[model_num], version_msg) def testParseNSsite3(self): res_dir = os.path.join(self.results_dir, "codeml", "NSsite3") for results_file in os.listdir(res_dir): version = results_file.split('-')[1].split('.')[0] version_msg = "Improper parsing for version %s" \ % version.replace('_', '.') results_path = os.path.join(res_dir, results_file) results = codeml.read(results_path) # There should be 5 top-level items: 'codon model', 'model', # 'version', 'NSsites' & site-class model, the last of which # is only there when only one NSsites class is used self.assertEqual(len(results), 5, version_msg) self.assertTrue('site-class model' in results, version_msg) self.assertEqual(results['site-class model'], 'discrete', version_msg) self.assertTrue("NSsites" in results, version_msg) # There should be 1 NSsites classe: 3 self.assertEqual(len(results["NSsites"]), 1, version_msg) # Each site class model should have 5 sub-items: 'lnL', 'tree', # 'description', 'parameters', & 'tree length'. It should # have the correct number of parameters also. model = results["NSsites"][3] self.assertEqual(len(model), 5, version_msg) self.assertTrue("parameters" in model, version_msg) params = model["parameters"] self.assertEqual(len(params), SITECLASS_PARAMS[3], version) self.assertTrue("site classes" in params, version_msg) site_classes = params["site classes"] self.assertEqual(len(site_classes), 4, version_msg) def testParseBranchSiteA(self): res_dir = os.path.join(self.results_dir, "codeml", "branchsiteA") for results_file in os.listdir(res_dir): version = results_file.split('-')[1].split('.')[0] version_msg = "Improper parsing for version %s" \ % version.replace('_', '.') results_path = os.path.join(res_dir, results_file) results = codeml.read(results_path) # There are 5 top-level items in this case: # 'codon model', 'model', 'version', 'NSsites' & 'site-class model' self.assertEqual(len(results), 5, version_msg) self.assertTrue("NSsites" in results, version_msg) models = results["NSsites"] # Only site class model 2 is simulated for Branch Site A self.assertEqual(len(models), 1, version_msg) self.assertTrue(2 in models, version_msg) model = models[2] self.assertEqual(len(model), 5, version_msg) self.assertTrue("parameters" in model, version_msg) params = model["parameters"] # Branch Site A results lack a "branches" parameter self.assertEqual(len(params), SITECLASS_PARAMS[2]-1, version_msg) self.assertTrue("site classes" in params, version_msg) site_classes = params["site classes"] # Branch Site A adds another site class self.assertEqual(len(site_classes), SITECLASSES[2]+1, version) for class_num in [0, 1, 2, 3]: self.assertTrue(class_num in site_classes, version_msg) site_class = site_classes[class_num] self.assertEqual(len(site_class), 2, version_msg) self.assertTrue("branch types" in site_class, version_msg) branches = site_class["branch types"] self.assertEqual(len(branches), 2, version_msg) def testParseCladeModelC(self): cladeC_res_dir = os.path.join(self.results_dir, "codeml", "clademodelC") for results_file in os.listdir(cladeC_res_dir): version = results_file.split('-')[1].split('.')[0] version_msg = "Improper parsing for version %s" \ % version.replace('_', '.') results_path = os.path.join(cladeC_res_dir, results_file) results = codeml.read(results_path) # 5 top-level items again in this case self.assertEqual(len(results), 5, version_msg) self.assertTrue("NSsites" in results, version_msg) models = results["NSsites"] # Only site class model 2 is simulated for Clade Model C self.assertEqual(len(models), 1, version_msg) self.assertTrue(2 in models, version_msg) model = models[2] self.assertEqual(len(model), 5, version_msg) self.assertTrue("parameters" in model, version_msg) params = model["parameters"] # Clade Model C results lack a "branches" parameter self.assertEqual(len(params), SITECLASS_PARAMS[2] - 1, version_msg) self.assertTrue("site classes" in params, version_msg) site_classes = params["site classes"] self.assertEqual(len(site_classes), SITECLASSES[2], version) for class_num in [0, 1, 2]: self.assertTrue(class_num in site_classes, version_msg) site_class = site_classes[class_num] self.assertEqual(len(site_class), 2, version_msg) self.assertTrue("branch types" in site_class, version_msg) branches = site_class["branch types"] self.assertEqual(len(branches), 2, version_msg) def testParseNgene2Mgene02(self): res_dir = os.path.join(self.results_dir, "codeml", "ngene2_mgene02") for results_file in os.listdir(res_dir): version = results_file.split('-')[1].split('.')[0] version_msg = "Improper parsing for version %s" \ % version.replace('_', '.') results_path = os.path.join(res_dir, results_file) results = codeml.read(results_path) self.assertEqual(len(results), 4, version_msg) self.assertTrue("NSsites" in results, version_msg) models = results["NSsites"] self.assertEqual(len(models), 1, version_msg) self.assertTrue(0 in models, version_msg) model = models[0] self.assertEqual(len(model), 5, version_msg) self.assertTrue("parameters" in model, version_msg) params = model["parameters"] # This type of model has fewer parameters for model 0 self.assertEqual(len(params), 4, version_msg) self.assertTrue("rates" in params, version_msg) rates = params["rates"] self.assertEqual(len(rates), 2, version_msg) def testParseNgene2Mgene1(self): res_dir = os.path.join(self.results_dir, "codeml", "ngene2_mgene1") for results_file in os.listdir(res_dir): version = results_file.split('-')[1].split('.')[0] version_msg = "Improper parsing for version %s" \ % version.replace('_', '.') results_path = os.path.join(res_dir, results_file) results = codeml.read(results_path) self.assertEqual(len(results), 4, version_msg) self.assertTrue("genes" in results, version_msg) genes = results["genes"] self.assertEqual(len(genes), 2, version_msg) model = genes[0] self.assertEqual(len(model), 5, version_msg) self.assertTrue("parameters" in model, version_msg) params = model["parameters"] self.assertEqual(len(params), SITECLASS_PARAMS[0], version_msg) def testParseNgene2Mgene34(self): res_dir = os.path.join(self.results_dir, "codeml", "ngene2_mgene34") for results_file in os.listdir(res_dir): version = results_file.split('-')[1].split('.')[0] version_msg = "Improper parsing for version %s" \ % version.replace('_', '.') results_path = os.path.join(res_dir, results_file) results = codeml.read(results_path) self.assertEqual(len(results), 4, version_msg) self.assertTrue("NSsites" in results, version_msg) models = results["NSsites"] self.assertEqual(len(models), 1, version_msg) self.assertTrue(0 in models, version_msg) model = models[0] self.assertEqual(len(model), 5, version_msg) self.assertTrue("parameters" in model, version_msg) params = model["parameters"] # This type of model has fewer parameters for model 0 self.assertEqual(len(params), 3, version_msg) self.assertTrue("rates" in params, version_msg) rates = params["rates"] self.assertEqual(len(rates), 2, version_msg) self.assertTrue("genes" in params, version_msg) genes = params["genes"] self.assertEqual(len(genes), 2, version_msg) def testParseFreeRatio(self): res_dir = os.path.join(self.results_dir, "codeml", "freeratio") for results_file in os.listdir(res_dir): version = results_file.split('-')[1].split('.')[0] version_msg = "Improper parsing for version %s" \ % version.replace('_', '.') results_path = os.path.join(res_dir, results_file) results = codeml.read(results_path) self.assertEqual(len(results), 4, version_msg) self.assertTrue("NSsites" in results, version_msg) models = results["NSsites"] self.assertEqual(len(models), 1, version_msg) self.assertTrue(0 in models, version_msg) model = models[0] # With the free ratio model, you get 3 extra trees: dN tree, # dS tree and omega tree self.assertEqual(len(model), 8, version_msg) self.assertTrue("parameters" in model, version_msg) params = model["parameters"] self.assertEqual(len(params), SITECLASS_PARAMS[0], version_msg) self.assertTrue("branches" in params, version_msg) # There should be 7 branches branches = params["branches"] self.assertEqual(len(branches), 7, version_msg) self.assertTrue("omega" in params, version_msg) omega = params["omega"] self.assertEqual(len(omega), 7, version_msg) def testParsePairwise(self): res_dir = os.path.join(self.results_dir, "codeml", "pairwise") for results_file in os.listdir(res_dir): version = results_file.split('-')[1].split('.')[0] version_msg = "Improper parsing for version %s" \ % version.replace('_', '.') results_path = os.path.join(res_dir, results_file) results = codeml.read(results_path) # Pairwise models have an extra top-level item: pairwise self.assertEqual(len(results), 5, version_msg) self.assertTrue("pairwise" in results, version_msg) pairwise = results["pairwise"] self.assertEqual(len(pairwise), 5, version_msg) def testParseAA(self): res_dir = os.path.join(self.results_dir, "codeml", "aa_model0") for results_file in os.listdir(res_dir): version = results_file.split('-')[1].split('.')[0] version_msg = "Improper parsing for version %s" \ % version.replace('_', '.') results_path = os.path.join(res_dir, results_file) results = codeml.read(results_path) # Amino Acid analysis has different top-levels: # 'NSsites', 'model', 'version', 'lnL max', 'distances' # Version 4.1 doesn't seem to produce distances in the results if version == "4_1": self.assertEqual(len(results), 4, version_msg) self.assertTrue("lnL max" in results, version_msg) else: self.assertEqual(len(results), 5, version_msg) self.assertTrue("lnL max" in results, version_msg) self.assertTrue("distances" in results, version_msg) distances = results["distances"] # non-pairwise AA analysis only gives raw distances self.assertEqual(len(distances), 1, version_msg) def testParseAAPairwise(self): res_dir = os.path.join(self.results_dir, "codeml", "aa_pairwise") for results_file in os.listdir(res_dir): version = results_file.split('-')[1].split('.')[0] version_msg = "Improper parsing for version %s" \ % version.replace('_', '.') results_path = os.path.join(res_dir, results_file) results = codeml.read(results_path) # Pairwise AA analysis has one top-level fewer than non-pairwise self.assertEqual(len(results), 4, version_msg) self.assertTrue("lnL max" in results, version_msg) self.assertTrue("distances" in results, version_msg) distances = results["distances"] # Pairwise AA analysis has ML & raw distances self.assertEqual(len(distances), 2, version_msg) if __name__ == "__main__": runner = unittest.TextTestRunner(verbosity = 2) unittest.main(testRunner=runner)