# Copyright 2012 Lenna X. Peterson (arklenna@gmail.com). # All rights reserved. # # Tests adapted from test_PDB.py # # This code is part of the Biopython distribution and governed by its # license. Please see the LICENSE file that should have been included # as part of this package. """Unit tests for the MMCIF portion of the Bio.PDB module.""" import unittest try: import numpy from numpy import dot # Missing on old PyPy's micronumpy del dot from numpy.linalg import svd, det # Missing in PyPy 2.0 numpypy except ImportError: from Bio import MissingPythonDependencyError raise MissingPythonDependencyError( "Install NumPy if you want to use Bio.PDB.") from Bio.Seq import Seq from Bio.Alphabet import generic_protein from Bio.PDB.PDBExceptions import PDBConstructionException, PDBConstructionWarning from Bio.PDB import PPBuilder, CaPPBuilder from Bio.PDB.MMCIFParser import MMCIFParser class ParseReal(unittest.TestCase): """Testing with real CIF file(s).""" def test_parser(self): """Extract polypeptides from 1A80.""" parser = MMCIFParser() structure = parser.get_structure("example", "PDB/1A8O.cif") self.assertEqual(len(structure), 1) for ppbuild in [PPBuilder(), CaPPBuilder()]: #========================================================== # Check that serial_num (model column) is stored properly self.assertEqual(structure[0].serial_num, 1) #First try allowing non-standard amino acids, polypeptides = ppbuild.build_peptides(structure[0], False) self.assertEqual(len(polypeptides), 1) pp = polypeptides[0] # Check the start and end positions self.assertEqual(pp[0].get_id()[1], 151) self.assertEqual(pp[-1].get_id()[1], 220) # Check the sequence s = pp.get_sequence() self.assertTrue(isinstance(s, Seq)) self.assertEqual(s.alphabet, generic_protein) #Here non-standard MSE are shown as M self.assertEqual("MDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQ" "NANPDCKTILKALGPGATLEEMMTACQG", str(s)) #========================================================== #Now try strict version with only standard amino acids #Should ignore MSE 151 at start, and then break the chain #at MSE 185, and MSE 214,215 polypeptides = ppbuild.build_peptides(structure[0], True) self.assertEqual(len(polypeptides), 3) #First fragment pp = polypeptides[0] self.assertEqual(pp[0].get_id()[1], 152) self.assertEqual(pp[-1].get_id()[1], 184) s = pp.get_sequence() self.assertTrue(isinstance(s, Seq)) self.assertEqual(s.alphabet, generic_protein) self.assertEqual("DIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNW", str(s)) #Second fragment pp = polypeptides[1] self.assertEqual(pp[0].get_id()[1], 186) self.assertEqual(pp[-1].get_id()[1], 213) s = pp.get_sequence() self.assertTrue(isinstance(s, Seq)) self.assertEqual(s.alphabet, generic_protein) self.assertEqual("TETLLVQNANPDCKTILKALGPGATLEE", str(s)) #Third fragment pp = polypeptides[2] self.assertEqual(pp[0].get_id()[1], 216) self.assertEqual(pp[-1].get_id()[1], 220) s = pp.get_sequence() self.assertTrue(isinstance(s, Seq)) self.assertEqual(s.alphabet, generic_protein) self.assertEqual("TACQG", str(s)) def testModels(self): """Test file with multiple models""" parser = MMCIFParser() structure = parser.get_structure("example", "PDB/1LCD.cif") self.assertEqual(len(structure), 3) for ppbuild in [PPBuilder(), CaPPBuilder()]: #========================================================== # Check that serial_num (model column) is stored properly self.assertEqual(structure[0].serial_num, 1) self.assertEqual(structure[1].serial_num, 2) self.assertEqual(structure[2].serial_num, 3) #First try allowing non-standard amino acids, polypeptides = ppbuild.build_peptides(structure[0], False) self.assertEqual(len(polypeptides), 1) pp = polypeptides[0] # Check the start and end positions self.assertEqual(pp[0].get_id()[1], 1) self.assertEqual(pp[-1].get_id()[1], 51) # Check the sequence s = pp.get_sequence() self.assertTrue(isinstance(s, Seq)) self.assertEqual(s.alphabet, generic_protein) #Here non-standard MSE are shown as M self.assertEqual("MKPVTLYDVAEYAGVSYQTVSRVVNQASHVSAKTREKVEAAMAELNYIPNR", str(s)) #========================================================== #Now try strict version with only standard amino acids polypeptides = ppbuild.build_peptides(structure[0], True) self.assertEqual(len(polypeptides), 1) pp = polypeptides[0] # Check the start and end positions self.assertEqual(pp[0].get_id()[1], 1) self.assertEqual(pp[-1].get_id()[1], 51) # Check the sequence s = pp.get_sequence() self.assertTrue(isinstance(s, Seq)) self.assertEqual(s.alphabet, generic_protein) self.assertEqual("MKPVTLYDVAEYAGVSYQTVSRVVNQASHVSAKTREKVEAAMAELNYIPNR", str(s)) if __name__ == '__main__': runner = unittest.TextTestRunner(verbosity=2) unittest.main(testRunner=runner)