/* * BioJava development code * * This code may be freely distributed and modified under the * terms of the GNU Lesser General Public Licence. This should * be distributed with the code. If you do not have a copy, * see: * * http://www.gnu.org/copyleft/lesser.html * * Copyright for this code is held jointly by the individual * authors. These should be listed in @author doc comments. * * For more information on the BioJava project and its aims, * or to join the biojava-l mailing list, visit the home page * at: * * http://www.biojava.org/ * * Created on 16.03.2004 * @author Andreas Prlic * * */ package org.biojava.bio.structure.io; import java.io.BufferedReader; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.PrintWriter; import java.net.URL; import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.zip.GZIPOutputStream; import org.biojava.bio.structure.AminoAcid; import org.biojava.bio.structure.Chain; import org.biojava.bio.structure.Group; import org.biojava.bio.structure.GroupIterator; import org.biojava.bio.structure.Structure; import org.biojava.utils.io.InputStreamProvider; /** *

* The wrapper class for parsing a PDB file. *

* * *

* Several flags can be set for this class *

{@link #setParseCAOnly} - parse only the Atom records for C-alpha atoms (default:false)
{@link #setParseSecStruc} - a flag if the secondary structure information from the PDB file (author's assignment) should be parsed. * If true the assignment can be accessed through {@link AminoAcid}.getSecStruc(); (default:false)
{@link #setAlignSeqRes(boolean)} - should the AminoAcid sequences from the SEQRES * and ATOM records of a PDB file be aligned? (default:true)
{@link #setAutoFetch(boolean)} - if the PDB file can not be found locally, should it be fetched * from the EBI - ftp server? (default:false)

* * * *

Example

* Q: How can I get a Structure object from a PDB file? *

* A: *

 public {@link Structure} loadStructure(String pathToPDBFile){
		{@link PDBFileReader} pdbreader = new {@link PDBFileReader}();

		{@link Structure} structure = null;
		try{
			structure = pdbreader.getStructure(pathToPDBFile);
			System.out.println(structure);
		} catch (IOException e) {
			e.printStackTrace();
		}
		return structure;
	}

* * Access PDB files from a directory, take care of compressed PDB files *

 * public {@link Structure} loadStructureById() {
		String path = "/path/to/PDB/directory/";

		{@link PDBFileReader} pdbreader = new {@link PDBFileReader}();
		pdbreader.setPath(path);
		{@link Structure} structure = null;
		try {
			structure = pdbreader.getStructureById("5pti");
		} catch (IOException e){
			e.printStackTrace();
		}
		return structure;

	}

* * * @author Andreas Prlic * */ public class PDBFileReader implements StructureIOFile { // a list of big pdb files for testing // "1htq", // "1c2w", // "1ffk", // "1giy", // "1j5a", // "1jj2", // "1jzx", // "1jzy", // "1jzz", // "1k01", // "1k73", // "1k8a", // "1k9m", // "1kc8", // "1kd1", // "1kqs", // "1m1k", // "1m90", // "1mkz", // "1ml5", // "1n8r", String path ; List extensions ; boolean parseSecStruc; boolean autoFetch; boolean parseCAOnly; boolean alignSeqRes; public static void main(String[] args){ String filename = "/path/to/PDBFile.pdb" ; // also see the demos PDBFileReader pdbreader = new PDBFileReader(); pdbreader.setParseSecStruc(true); pdbreader.setAlignSeqRes(true); pdbreader.setParseCAOnly(false); pdbreader.setAutoFetch(true); try{ Structure struc = pdbreader.getStructure(filename); System.out.println(struc); GroupIterator gi = new GroupIterator(struc); while (gi.hasNext()){ Group g = (Group) gi.next(); Chain c = g.getParent(); if ( g instanceof AminoAcid ){ AminoAcid aa = (AminoAcid)g; Map sec = aa.getSecStruc(); System.out.println(c.getName() + " " + g + " " + sec); } } } catch (Exception e) { e.printStackTrace(); } } public PDBFileReader() { extensions = new ArrayList(); path = "" ; extensions.add(".ent"); extensions.add(".pdb"); extensions.add(".ent.gz"); extensions.add(".pdb.gz"); extensions.add(".ent.Z"); extensions.add(".pdb.Z"); parseSecStruc = false; autoFetch = false; parseCAOnly = false; alignSeqRes = true; } /** return the flag if only the CA atoms should be parsed * * @return flag if CA only should be read */ public boolean isParseCAOnly() { return parseCAOnly; } /** only the CA atoms should be parsed from the PDB file * * @param parseCAOnly */ public void setParseCAOnly(boolean parseCAOnly) { this.parseCAOnly = parseCAOnly; } /** get the flag if the SEQRES and ATOM amino acids are going to be aligned * * @return flag */ public boolean isAlignSeqRes() { return alignSeqRes; } /** set the flag if the SEQRES and ATOM amino acids should be aligned and linked * * @param alignSeqRes */ public void setAlignSeqRes(boolean alignSeqRes) { this.alignSeqRes = alignSeqRes; } /** should the parser to fetch missing PDB files from the EBI FTP server automatically? * default is false * @return flag */ public boolean isAutoFetch() { return autoFetch; } /** tell the parser to fetch missing PDB files from the EBI FTP server automatically. * * default is false. If true, new PDB files will be automatically stored in the Path and gzip compressed. * * @param autoFetch */ public void setAutoFetch(boolean autoFetch) { this.autoFetch = autoFetch; } /* A flag to tell the parser to parse the Author's secondary structure assignment from the file * default is set to false, i.e. do NOT parse. * @param parseSecStruc */ public boolean isParseSecStruc() { return parseSecStruc; } /* A flag to tell the parser to parse the Author's secondary structure assignment from the file * */ public void setParseSecStruc(boolean parseSecStruc) { this.parseSecStruc = parseSecStruc; } /** directory where to find PDB files */ public void setPath(String p){ path = p ; } /** * Returns the path value. * @return a String representing the path value * @see #setPath * */ public String getPath() { return path ; } /** define supported file extensions * compressed extensions .Z,.gz do not need to be specified * they are dealt with automatically. */ public void addExtension(String s){ //System.out.println("add Extension "+s); extensions.add(s); } /** clear the supported file extensions * */ public void clearExtensions(){ extensions.clear(); } /** try to find the file in the filesystem and return a filestream in order to parse it * rules how to find file * - first check: if file is in path specified by PDBpath * - secnd check: if not found check in PDBpath/xy/ where xy is second and third char of PDBcode. * if autoFetch is set it will try to download missing PDB files automatically. */ private InputStream getInputStream(String pdbId) throws IOException { //System.out.println("checking file"); // compression formats supported // this has been moved to InputStreamProvider ... //String[] str = {".gz",".zip",".Z"}; //ArrayList compressions = new ArrayList( Arrays.asList( str ) ); InputStream inputStream =null; String pdbFile = null ; File f = null ; // this are the possible PDB file names... String fpath = path+"/"+pdbId; String ppath = path +"/pdb"+pdbId; String[] paths = new String[]{fpath,ppath}; for ( int p=0;p