/*
*
* This code may be freely distributed and modified under the
* terms of the GNU Lesser General Public Licence. This should
* be distributed with the code. If you do not have a copy,
* see:
*
* http://www.gnu.org/copyleft/lesser.html
*
* Copyright for this code is held jointly by the individual
* authors. These should be listed in @author doc comments.
*
* For more information on the BioJava project and its aims,
* or to join the biojava-l mailing list, visit the home page
* at:
*
* http://www.biojava.org/
*
* Created on 16.03.2004
*
*/
package org.biojava.bio.structure.io;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Calendar;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.StringTokenizer;
import org.biojava.bio.structure.AminoAcid;
import org.biojava.bio.structure.AminoAcidImpl;
import org.biojava.bio.structure.AtomImpl;
import org.biojava.bio.structure.Author;
import org.biojava.bio.structure.Chain;
import org.biojava.bio.structure.ChainImpl;
import org.biojava.bio.structure.DBRef;
import org.biojava.bio.structure.Group;
import org.biojava.bio.structure.GroupIterator;
import org.biojava.bio.structure.HetatomImpl;
import org.biojava.bio.structure.Compound;
import org.biojava.bio.structure.JournalArticle;
import org.biojava.bio.structure.NucleotideImpl;
import org.biojava.bio.structure.PDBHeader;
import org.biojava.bio.structure.SSBond;
import org.biojava.bio.structure.Structure;
import org.biojava.bio.structure.StructureException;
import org.biojava.bio.structure.StructureImpl;
import org.biojava.bio.structure.StructureTools;
/**
* This class implements the actual PDB file parsing. Do not access it directly, but
* via the PDBFileReader class.
*
*
Parsing
*
* During the PDBfile parsing several Flags can be set:
*
* - {@link #setParseCAOnly(boolean)} - parse only the Atom records for C-alpha atoms
* - {@link #setParseSecStruc(boolean)} - a flag if the secondary structure information from the PDB file (author's assignment) should be parsed.
* If true the assignment can be accessed through {@link AminoAcid}.getSecStruc();
* - {@link #setAlignSeqRes(boolean)} - should the AminoAcid sequences from the SEQRES
* and ATOM records of a PDB file be aligned? (default:yes)
*
*
*
* To provide excessive memory usage for large PDB files, there is the ATOM_CA_THRESHOLD.
* If more Atoms than this threshold are being parsed in a PDB file, the parser will automatically
* switch to a C-alpha only representation.
*
*
*
* The result of the parsing of the PDB file is a new {@link Structure} object.
*
*
*
* For more documentation on how to work with the Structure API please
* see
* http://biojava.org/wiki/BioJava:CookBook#Protein_Structure
*
*
*
*
* Example
*
* Q: How can I get a Structure object from a PDB file?
*
*
* A:
*
public {@link Structure} loadStructure(String pathToPDBFile){
// The PDBFileParser is wrapped by the PDBFileReader
{@link PDBFileReader} pdbreader = new {@link PDBFileReader}();
{@link Structure} structure = null;
try{
structure = pdbreader.getStructure(pathToPDBFile);
System.out.println(structure);
} catch (IOException e) {
e.printStackTrace();
}
return structure;
}
*
*
* @author Andreas Prlic
* @author Jules Jacobsen
* @since 1.4
*/
public class PDBFileParser {
private final boolean DEBUG = false;
// required for parsing:
private String pdbId; //the actual id of the entry
private Structure structure;
private List current_model; // contains the ATOM records for each model
private Chain current_chain;
private Group current_group;
private List seqResChains; // contains all the chains for the SEQRES records
// for printing
private static final String NEWLINE;
private Map header ;
private PDBHeader pdbHeader;
private JournalArticle journalArticle;
private List