# Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk) # This code is part of the Biopython distribution and governed by its # license. Please see the LICENSE file that should have been included # as part of this package. """Consumer class that builds a Structure object. This is used by the PDBParser and MMCIFparser classes. """ import warnings # SMCRA hierarchy from Bio.PDB.Structure import Structure from Bio.PDB.Model import Model from Bio.PDB.Chain import Chain from Bio.PDB.Residue import Residue, DisorderedResidue from Bio.PDB.Atom import Atom, DisorderedAtom from Bio.PDB.PDBExceptions import \ PDBConstructionException, PDBConstructionWarning class StructureBuilder: """ Deals with contructing the Structure object. The StructureBuilder class is used by the PDBParser classes to translate a file to a Structure object. """ def __init__(self): self.line_counter=0 self.header={} def _is_completely_disordered(self, residue): "Return 1 if all atoms in the residue have a non blank altloc." atom_list=residue.get_unpacked_list() for atom in atom_list: altloc=atom.get_altloc() if altloc==" ": return 0 return 1 # Public methods called by the Parser classes def set_header(self, header): self.header=header def set_line_counter(self, line_counter): """ The line counter keeps track of the line in the PDB file that is being parsed. Arguments: o line_counter - int """ self.line_counter=line_counter def init_structure(self, structure_id): """Initiate a new Structure object with given id. Arguments: o id - string """ self.structure=Structure(structure_id) def init_model(self, model_id, serial_num = None): """Initiate a new Model object with given id. Arguments: o id - int o serial_num - int """ self.model=Model(model_id,serial_num) self.structure.add(self.model) def init_chain(self, chain_id): """Initiate a new Chain object with given id. Arguments: o chain_id - string """ if self.model.has_id(chain_id): self.chain=self.model[chain_id] warnings.warn("WARNING: Chain %s is discontinuous at line %i." % (chain_id, self.line_counter), PDBConstructionWarning) else: self.chain=Chain(chain_id) self.model.add(self.chain) def init_seg(self, segid): """Flag a change in segid. Arguments: o segid - string """ self.segid=segid def init_residue(self, resname, field, resseq, icode): """ Initiate a new Residue object. Arguments: o resname - string, e.g. "ASN" o field - hetero flag, "W" for waters, "H" for hetero residues, otherwise blank. o resseq - int, sequence identifier o icode - string, insertion code """ if field!=" ": if field=="H": # The hetero field consists of H_ + the residue name (e.g. H_FUC) field="H_"+resname res_id=(field, resseq, icode) if field==" ": if self.chain.has_id(res_id): # There already is a residue with the id (field, resseq, icode). # This only makes sense in the case of a point mutation. warnings.warn("WARNING: Residue ('%s', %i, '%s') " "redefined at line %i." % (field, resseq, icode, self.line_counter), PDBConstructionWarning) duplicate_residue=self.chain[res_id] if duplicate_residue.is_disordered()==2: # The residue in the chain is a DisorderedResidue object. # So just add the last Residue object. if duplicate_residue.disordered_has_id(resname): # The residue was already made self.residue=duplicate_residue duplicate_residue.disordered_select(resname) else: # Make a new residue and add it to the already # present DisorderedResidue new_residue=Residue(res_id, resname, self.segid) duplicate_residue.disordered_add(new_residue) self.residue=duplicate_residue return else: # Make a new DisorderedResidue object and put all # the Residue objects with the id (field, resseq, icode) in it. # These residues each should have non-blank altlocs for all their atoms. # If not, the PDB file probably contains an error. if not self._is_completely_disordered(duplicate_residue): # if this exception is ignored, a residue will be missing self.residue=None raise PDBConstructionException(\ "Blank altlocs in duplicate residue %s ('%s', %i, '%s')" \ % (resname, field, resseq, icode)) self.chain.detach_child(res_id) new_residue=Residue(res_id, resname, self.segid) disordered_residue=DisorderedResidue(res_id) self.chain.add(disordered_residue) disordered_residue.disordered_add(duplicate_residue) disordered_residue.disordered_add(new_residue) self.residue=disordered_residue return residue=Residue(res_id, resname, self.segid) self.chain.add(residue) self.residue=residue def init_atom(self, name, coord, b_factor, occupancy, altloc, fullname, serial_number=None, element=None): """ Initiate a new Atom object. Arguments: o name - string, atom name, e.g. CA, spaces should be stripped o coord - Numeric array (Float0, size 3), atomic coordinates o b_factor - float, B factor o occupancy - float o altloc - string, alternative location specifier o fullname - string, atom name including spaces, e.g. " CA " o element - string, upper case, e.g. "HG" for mercury """ residue=self.residue # if residue is None, an exception was generated during # the construction of the residue if residue is None: return # First check if this atom is already present in the residue. # If it is, it might be due to the fact that the two atoms have atom # names that differ only in spaces (e.g. "CA.." and ".CA.", # where the dots are spaces). If that is so, use all spaces # in the atom name of the current atom. if residue.has_id(name): duplicate_atom=residue[name] # atom name with spaces of duplicate atom duplicate_fullname=duplicate_atom.get_fullname() if duplicate_fullname!=fullname: # name of current atom now includes spaces name=fullname warnings.warn("WARNING: atom names %s and %s differ " "only in spaces at line %i." % (duplicate_fullname, fullname, self.line_counter), PDBConstructionWarning) atom=self.atom=Atom(name, coord, b_factor, occupancy, altloc, fullname, serial_number, element) if altloc!=" ": # The atom is disordered if residue.has_id(name): # Residue already contains this atom duplicate_atom=residue[name] if duplicate_atom.is_disordered()==2: duplicate_atom.disordered_add(atom) else: # This is an error in the PDB file: # a disordered atom is found with a blank altloc # Detach the duplicate atom, and put it in a # DisorderedAtom object together with the current # atom. residue.detach_child(name) disordered_atom=DisorderedAtom(name) residue.add(disordered_atom) disordered_atom.disordered_add(atom) disordered_atom.disordered_add(duplicate_atom) residue.flag_disordered() warnings.warn("WARNING: disordered atom found " "with blank altloc before line %i.\n" % self.line_counter, PDBConstructionWarning) else: # The residue does not contain this disordered atom # so we create a new one. disordered_atom=DisorderedAtom(name) residue.add(disordered_atom) # Add the real atom to the disordered atom, and the # disordered atom to the residue disordered_atom.disordered_add(atom) residue.flag_disordered() else: # The atom is not disordered residue.add(atom) def set_anisou(self, anisou_array): "Set anisotropic B factor of current Atom." self.atom.set_anisou(anisou_array) def set_siguij(self, siguij_array): "Set standard deviation of anisotropic B factor of current Atom." self.atom.set_siguij(siguij_array) def set_sigatm(self, sigatm_array): "Set standard deviation of atom position of current Atom." self.atom.set_sigatm(sigatm_array) def get_structure(self): "Return the structure." # first sort everything # self.structure.sort() # Add the header dict self.structure.header=self.header return self.structure def set_symmetry(self, spacegroup, cell): pass