/*
* BioJava development code
*
* This code may be freely distributed and modified under the
* terms of the GNU Lesser General Public Licence. This should
* be distributed with the code. If you do not have a copy,
* see:
*
* http://www.gnu.org/copyleft/lesser.html
*
* Copyright for this code is held jointly by the individual
* authors. These should be listed in @author doc comments.
*
* For more information on the BioJava project and its aims,
* or to join the biojava-l mailing list, visit the home page
* at:
*
* http://www.biojava.org/
*
* created at Mar 4, 2008
*/
package org.biojava.bio.structure.io.mmcif;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.util.ArrayList;
import java.util.List;
import java.util.logging.Logger;
import org.biojava.bio.structure.Structure;
import org.biojava.bio.structure.io.MMCIFFileReader;
import org.biojava.bio.structure.io.StructureIOFile;
import org.biojava.bio.structure.io.mmcif.model.AtomSite;
import org.biojava.bio.structure.io.mmcif.model.ChemComp;
import org.biojava.bio.structure.io.mmcif.model.DatabasePDBremark;
import org.biojava.bio.structure.io.mmcif.model.DatabasePDBrev;
import org.biojava.bio.structure.io.mmcif.model.Entity;
import org.biojava.bio.structure.io.mmcif.model.EntityPolySeq;
import org.biojava.bio.structure.io.mmcif.model.Exptl;
import org.biojava.bio.structure.io.mmcif.model.PdbxEntityNonPoly;
import org.biojava.bio.structure.io.mmcif.model.PdbxNonPolyScheme;
import org.biojava.bio.structure.io.mmcif.model.PdbxPolySeqScheme;
import org.biojava.bio.structure.io.mmcif.model.Refine;
import org.biojava.bio.structure.io.mmcif.model.Struct;
import org.biojava.bio.structure.io.mmcif.model.StructAsym;
import org.biojava.bio.structure.io.mmcif.model.StructKeywords;
import org.biojava.bio.structure.io.mmcif.model.StructRef;
import org.biojava.bio.structure.io.mmcif.model.StructRefSeq;
/** A simple mmCif file parser
*
* @author Andreas Prlic
* @since 1.7
* Usage:
*
String file = "path/to/mmcif/file";
StructureIOFile pdbreader = new MMCIFFileReader();
try {
Structure s = pdbreader.getStructure(file);
System.out.println(s);
// you can convert it to a PDB file...
System.out.println(s.toPDB());
} catch (IOException e) {
e.printStackTrace();
}
*
* For more documentation see http://biojava.org/wiki/BioJava:CookBook#Protein_Structure.
*/
public class SimpleMMcifParser implements MMcifParser {
List consumers ;
public static final String LOOP_END = "#";
public static final String LOOP_START = "loop_";
public static final String FIELD_LINE = "_";
public static final String STRING_LIMIT = ";";
private static final char s1 = '\'';
private static final char s2 = '\"';
Struct struct ;
public static Logger logger = Logger.getLogger("org.biojava.bio.structure");
public SimpleMMcifParser(){
consumers = new ArrayList();
struct = null;
}
public void addMMcifConsumer(MMcifConsumer consumer) {
consumers.add(consumer);
}
public void clearConsumers() {
consumers.clear();
}
public void removeMMcifConsumer(MMcifConsumer consumer) {
consumers.remove(consumer);
}
public static void main(String[] args){
String file = "/Users/andreas/WORK/PDB/mmCif/a9/1a9n.cif.gz";
//String file = "/Users/andreas/WORK/PDB/MMCIF/1gav.mmcif";
//String file = "/Users/andreas/WORK/PDB/MMCIF/100d.cif";
//String file = "/Users/andreas/WORK/PDB/MMCIF/1a4a.mmcif";
System.out.println("parsing " + file);
StructureIOFile pdbreader = new MMCIFFileReader();
try {
Structure s = pdbreader.getStructure(file);
System.out.println(s);
// convert it to a PDB file...
System.out.println(s.toPDB());
} catch (IOException e) {
e.printStackTrace();
}
}
public void parse(InputStream inStream) throws IOException {
parse(new BufferedReader(new InputStreamReader(inStream)));
}
public void parse(BufferedReader buf)
throws IOException {
triggerDocumentStart();
// init container objects...
struct = new Struct();
String line = null;
boolean inLoop = false;
List loopFields = new ArrayList();
List lineData = new ArrayList();
String category = null;
// the first line is a data_PDBCODE line, test if this looks like a mmcif file
line = buf.readLine();
if (!line.startsWith("data_")){
System.err.println("this does not look like a valid MMcif file! The first line should be data_1XYZ, but is " + line);
triggerDocumentEnd();
return;
}
while ( (line = buf.readLine ()) != null ){
//System.out.println(inLoop + " " + line);
if ( inLoop){
if (line.startsWith(LOOP_END)){
// reset all data
inLoop = false;
lineData.clear();
category=null;
loopFields.clear();
continue;
}
if ( line.startsWith(FIELD_LINE)){
// found another field.
String txt = line.trim();
//System.out.println("line: " + txt);
if ( txt.indexOf('.') > -1){
String[] spl = txt.split("\\.");
//System.out.println(spl.length);
category = spl[0];
String attribute = spl[1];
loopFields.add(attribute);
if ( spl.length > 2){
System.err.println("found nested attribute, not supported, yet!");
}
} else {
category = txt;
}
} else {
// in loop and we found a data line
lineData = processLine(line, buf, loopFields.size());
if ( lineData.size() != loopFields.size()){
System.err.println("did not find enough data fields...");
}
endLineChecks(category, loopFields,lineData);
lineData.clear();
}
} else {
// not in loop
if ( line.startsWith(LOOP_START)){
loopFields.clear();
inLoop = true;
category=null;
lineData.clear();
continue;
} else if (line.startsWith(LOOP_END)){
inLoop = false;
if ( category != null)
endLineChecks(category, loopFields, lineData);
category = null;
loopFields.clear();
lineData.clear();
} else {
// a boring normal line
//System.out.println("boring data line: " + line + " " + inLoop + " " );
List data = processLine(line, buf, 2);
//System.out.println("got a single line " + data);
String key = data.get(0);
int pos = key.indexOf(".");
if ( pos < 0 ) {
// looks like a chem_comp file
// line should start with data, otherwise something is wrong!
if (! line.startsWith("data_")){
System.err.println("this does not look like a valid MMcif file! The first line should be data_1XYZ, but is " + line);
triggerDocumentEnd();
return;
}
// ignore the first line...
category=null;
lineData.clear();
continue;
}
category = key.substring(0,pos);
String value = data.get(1);
loopFields.add(key.substring(pos+1,key.length()));
lineData.add(value);
}
}
}
if (struct != null){
triggerStructData(struct);
}
triggerDocumentEnd();
}
private List processSingleLine(String line){
//System.out.println("SS processSingleLine " + line);
List data = new ArrayList();
if ( line.trim().length() == 1){
if ( line.startsWith(STRING_LIMIT))
return data;
}
boolean inString = false;
boolean inS1 = false;
boolean inS2 = false;
String word = "";
//System.out.println(line);
for (int i=0; i< line.length(); i++ ){
//System.out.println(word);
Character c = line.charAt(i);
Character nextC = null;
if (i < line.length() - 1)
nextC = line.charAt(i+1);
if (c == ' ') {
if ( ! inString){
if ( ! word.equals(""))
data.add(word);
word = "";
} else {
// we are in a string, add the space
word += c;
}
} else if (c == s1 ) {
if ( inString){
boolean wordEnd = false;
if (! inS2) {
if (nextC != null){
//System.out.println("nextC: >"+nextC+"<");
if ( Character.isWhitespace(nextC)){
i++;
wordEnd = true;
}
}
}
if ( wordEnd ) {
// at end of string
if ( ! word.equals(""))
data.add(word);
word = "";
inString = false;
inS1 = false;
} else {
word += c;
}
} else {
// the beginning of a new string
inString = true;
inS1 = true;
}
} else if ( c == s2 ){
if ( inString){
boolean wordEnd = false;
if (! inS1) {
if (nextC != null){
//System.out.println("nextC: >"+nextC+"<");
if ( Character.isWhitespace(nextC)){
i++;
wordEnd = true;
}
}
}
if ( wordEnd ) {
// at end of string
if ( ! word.equals(""))
data.add(word);
word = "";
inString = false;
inS2 = false;
} else {
word += c;
}
} else {
// the beginning of a new string
inString = true;
inS2 = true;
}
} else {
word += c;
}
}
if ( ! word.trim().equals(""))
data.add(word);
return data;
}
/** get the content of a cif entry
*
* @param line
* @param buf
* @return
*/
private List processLine(String line,
BufferedReader buf,
int fieldLength)
throws IOException{
//System.out.println("XX processLine " + fieldLength + " " + line);
// go through the line and process each character
List lineData = new ArrayList();
boolean inString = false;
String bigWord = null;
while ( true ){
if ( line.startsWith(STRING_LIMIT)){
if (! inString){
inString = true;
if ( line.length() > 1)
bigWord = line.substring(1);
else
bigWord = "";
} else {
// the end of a word
lineData.add(bigWord);
bigWord = null;
inString = false;
}
} else {
if ( inString )
//TODO: make bigWord a stringbuffer...
bigWord += (line);
else {
List dat = processSingleLine(line);
for (String d : dat){
lineData.add(d);
}
}
}
//System.out.println("in process line : " + lineData.size() + " " + fieldLength);
if ( lineData.size() > fieldLength){
System.err.println("wrong data length ("+lineData.size()+
") should be ("+fieldLength+") at line " + line + " got lineData: " + lineData);
return lineData;
}
if ( lineData.size() == fieldLength)
return lineData;
line = buf.readLine();
if ( line == null)
break;
}
return lineData;
}
private void endLineChecks(String category,List loopFields, List lineData ) throws IOException{
/*System.out.println("parsed the following data: " +category + " fields: "+
loopFields + " DATA: " +
lineData);
if (category.equals("_struct")){
System.exit(0);
}*/
if ( loopFields.size() != lineData.size()){
System.err.println("looks like we got a problem with nested string quote characters:");
throw new IOException("data length ("+ lineData.size() +
") != fields length ("+loopFields.size()+
") category: " +category + " fields: "+
loopFields + " DATA: " +
lineData );
}
if ( category.equals("_entity")){
Entity e = (Entity) buildObject(
"org.biojava.bio.structure.io.mmcif.model.Entity",
loopFields,lineData);
triggerNewEntity(e);
} else if ( category.equals("_struct")){
struct = (Struct) buildObject(
"org.biojava.bio.structure.io.mmcif.model.Struct",
loopFields, lineData);
} else if ( category.equals("_atom_site")){
AtomSite a = (AtomSite) buildObject(
"org.biojava.bio.structure.io.mmcif.model.AtomSite",
loopFields, lineData);
triggerNewAtomSite(a);
} else if ( category.equals("_database_PDB_rev")){
DatabasePDBrev dbrev = (DatabasePDBrev) buildObject(
"org.biojava.bio.structure.io.mmcif.model.DatabasePDBrev",
loopFields, lineData);
triggerNewDatabasePDBrev(dbrev);
} else if ( category.equals("_database_PDB_remark")){
DatabasePDBremark remark = (DatabasePDBremark) buildObject(
"org.biojava.bio.structure.io.mmcif.model.DatabasePDBremark",
loopFields, lineData);
triggerNewDatabasePDBremark(remark);
} else if ( category.equals("_exptl")){
Exptl exptl = (Exptl) buildObject(
"org.biojava.bio.structure.io.mmcif.model.Exptl",
loopFields,lineData);
triggerExptl(exptl);
} else if ( category.equals("_struct_ref")){
StructRef sref = (StructRef) buildObject(
"org.biojava.bio.structure.io.mmcif.model.StructRef",
loopFields,lineData);
triggerNewStrucRef(sref);
} else if ( category.equals("_struct_ref_seq")){
StructRefSeq sref = (StructRefSeq) buildObject(
"org.biojava.bio.structure.io.mmcif.model.StructRefSeq",
loopFields,lineData);
triggerNewStrucRefSeq(sref);
} else if ( category.equals("_entity_poly_seq")){
EntityPolySeq exptl = (EntityPolySeq) buildObject(
"org.biojava.bio.structure.io.mmcif.model.EntityPolySeq",
loopFields,lineData);
triggerNewEntityPolySeq(exptl);
} else if ( category.equals("_struct_asym")){
StructAsym sasym = (StructAsym) buildObject(
"org.biojava.bio.structure.io.mmcif.model.StructAsym",
loopFields,lineData);
triggerNewStructAsym(sasym);
} else if ( category.equals("_pdbx_poly_seq_scheme")){
PdbxPolySeqScheme ppss = (PdbxPolySeqScheme) buildObject(
"org.biojava.bio.structure.io.mmcif.model.PdbxPolySeqScheme",
loopFields,lineData);
triggerNewPdbxPolySeqScheme(ppss);
} else if ( category.equals("_pdbx_nonpoly_scheme")){
PdbxNonPolyScheme ppss = (PdbxNonPolyScheme) buildObject(
"org.biojava.bio.structure.io.mmcif.model.PdbxNonPolyScheme",
loopFields,lineData);
triggerNewPdbxNonPolyScheme(ppss);
} else if ( category.equals("_pdbx_entity_nonpoly")){
PdbxEntityNonPoly pen = (PdbxEntityNonPoly) buildObject(
"org.biojava.bio.structure.io.mmcif.model.PdbxEntityNonPoly",
loopFields,lineData
);
triggerNewPdbxEntityNonPoly(pen);
} else if ( category.equals("_struct_keywords")){
StructKeywords kw = (StructKeywords)buildObject(
"org.biojava.bio.structure.io.mmcif.model.StructKeywords",
loopFields,lineData
);
triggerNewStructKeywords(kw);
} else if (category.equals("_refine")){
Refine r = (Refine)buildObject(
"org.biojava.bio.structure.io.mmcif.model.Refine",
loopFields,lineData
);
triggerNewRefine(r);
} else if (category.equals("_chem_comp")){
ChemComp c = (ChemComp)buildObject(
"org.biojava.bio.structure.io.mmcif.model.ChemComp",
loopFields, lineData
);
triggerNewChemComp(c);
} else {
// trigger a generic bean that can deal with all missing data types...
triggerGeneric(category,loopFields,lineData);
}
}
private void setPair(Object o, List lineData){
Class c = o.getClass();
if (lineData.size() == 2){
String key = lineData.get(0);
String val = lineData.get(1);
int dotPos = key.indexOf('.');
if ( dotPos > -1){
key = key.substring(dotPos+1,key.length());
}
String u = key.substring(0,1).toUpperCase();
try {
Method m = c.getMethod("set" + u + key.substring(1,key.length()) , String.class);
m.invoke(o,val);
}
catch (InvocationTargetException iex){
iex.printStackTrace();
}
catch (IllegalAccessException aex){
aex.printStackTrace();
}
catch( NoSuchMethodException nex){
if ( val.equals("?") || val.equals(".")) {
logger.info("trying to set field >" + key + "< in >"+ c.getName() + "<, but not found. Since value is >"+val+"< most probably just ignore this.");
} else {
logger.warning("trying to set field >" + key + "< in >"+ c.getName() + "<, but not found! (value:" + val + ")");
}
}
} else {
System.err.println("trying to set key/value pair on object " +o.getClass().getName() + " but did not find in " + lineData);
}
}
private void setArray(Class c, Object o, String key, String val){
// TODO: not implemented yet!
//logger.info("Setting of array not implemented at the present for " + key + " " + val);
/*
int pos = key.indexOf("[");
String varName = key.substring(0,pos);
String u = varName.substring(0,1).toUpperCase();
try {
Method m = c.getMethod("set" + u + varName.substring(1,varName.length()) , String.class);
m.invoke(o,val);
} catch (Exception e){
e.printStackTrace();
}
*/
}
private Object buildObject(String className, List loopFields, List lineData) {
Object o = null;
try {
// build up the Entity object from the line data...
Class c = Class.forName(className);
o = c.newInstance();
int pos = -1 ;
for (String key: loopFields){
pos++;
String val = lineData.get(pos);
//System.out.println(key + " " + val);
String u = key.substring(0,1).toUpperCase();
try {
Method m = c.getMethod("set" + u + key.substring(1,key.length()) , String.class);
m.invoke(o,val);
}
catch( NoSuchMethodException nex){
if (key.indexOf("[") > -1) {
setArray(c,o,key,val);
} else {
System.err.println("Trying to set field " + key + " in "+ c.getName() +", but not found! (value:" + val + ")");
}
}
}
} catch (InstantiationException eix){
eix.printStackTrace();
} catch (InvocationTargetException etx){
etx.printStackTrace();
} catch (IllegalAccessException eax){
eax.printStackTrace();
} catch (ClassNotFoundException ex){
ex.printStackTrace();
}
return o;
}
public void triggerGeneric(String category, List loopFields, List lineData){
for(MMcifConsumer c : consumers){
c.newGenericData(category,loopFields, lineData);
}
}
public void triggerNewEntity(Entity entity){
for(MMcifConsumer c : consumers){
c.newEntity(entity);
}
}
public void triggerNewEntityPolySeq(EntityPolySeq epolseq){
for(MMcifConsumer c : consumers){
c.newEntityPolySeq(epolseq);
}
}
public void triggerNewChemComp(ChemComp cc){
for(MMcifConsumer c : consumers){
c.newChemComp(cc);
}
}
public void triggerNewStructAsym(StructAsym sasym){
for(MMcifConsumer c : consumers){
c.newStructAsym(sasym);
}
}
private void triggerStructData(Struct struct){
for(MMcifConsumer c : consumers){
c.setStruct(struct);
}
}
private void triggerNewAtomSite(AtomSite atom){
for(MMcifConsumer c : consumers){
c.newAtomSite(atom);
}
}
private void triggerNewDatabasePDBrev(DatabasePDBrev dbrev){
for(MMcifConsumer c : consumers){
c.newDatabasePDBrev(dbrev);
}
}
private void triggerNewDatabasePDBremark(DatabasePDBremark remark){
for(MMcifConsumer c : consumers){
c.newDatabasePDBremark(remark);
}
}
private void triggerExptl(Exptl exptl){
for(MMcifConsumer c : consumers){
c.newExptl(exptl);
}
}
private void triggerNewStrucRef(StructRef sref){
for(MMcifConsumer c : consumers){
c.newStructRef(sref);
}
}
private void triggerNewStrucRefSeq(StructRefSeq sref){
for(MMcifConsumer c : consumers){
c.newStructRefSeq(sref);
}
}
private void triggerNewPdbxPolySeqScheme(PdbxPolySeqScheme ppss){
for(MMcifConsumer c : consumers){
c.newPdbxPolySeqScheme(ppss);
}
}
private void triggerNewPdbxNonPolyScheme(PdbxNonPolyScheme ppss){
for(MMcifConsumer c : consumers){
c.newPdbxNonPolyScheme(ppss);
}
}
public void triggerNewPdbxEntityNonPoly(PdbxEntityNonPoly pen){
for (MMcifConsumer c: consumers){
c.newPdbxEntityNonPoly(pen);
}
}
public void triggerNewStructKeywords(StructKeywords kw){
for (MMcifConsumer c: consumers){
c.newStructKeywords(kw);
}
}
public void triggerNewRefine(Refine r){
for (MMcifConsumer c: consumers){
c.newRefine(r);
}
}
public void triggerDocumentStart(){
for(MMcifConsumer c : consumers){
c.documentStart();
}
}
public void triggerDocumentEnd(){
for(MMcifConsumer c : consumers){
c.documentEnd();
}
}
}