/*
* BioJava development code
*
* This code may be freely distributed and modified under the
* terms of the GNU Lesser General Public Licence. This should
* be distributed with the code. If you do not have a copy,
* see:
*
* http://www.gnu.org/copyleft/lesser.html
*
* Copyright for this code is held jointly by the individual
* authors. These should be listed in @author doc comments.
*
* For more information on the BioJava project and its aims,
* or to join the biojava-l mailing list, visit the home page
* at:
*
* http://www.biojava.org/
*
*/
package search;
import java.lang.System;
import java.io.*;
import java.util.*;
import org.biojava.bio.BioException;
import org.biojava.bio.seq.*;
import org.biojava.bio.seq.db.*;
import org.biojava.bio.seq.io.*;
import org.biojava.bio.symbol.*;
/**
* IndexFastaDB
will perform Biojava indexing of DNA or
* protein Fasta format files, creating an IndexedSequenceDB-compliant
* database. Setting the database name is optional; if not set, it
* defaults to being the same as the first file indexed. The index and
* store files are given the database name with the extension '.index'
* and '.store' respectively.
*
* @author Keith James
* @version 1.0
* @since 1.1
*/
public class IndexFastaDB
{
static final String USAGE =
"\nUsage: java -Dtype=(aa|nt) [-Dname=] "
+ "IndexFastaDB file(s)";
static final String INDEX = ".index";
static final String LIST = ".list";
public static void main (String [] args)
throws Exception
{
Properties props;
String seqType;
String dbName;
Alphabet alpha;
SymbolTokenization parser;
SequenceFormat seqFormat;
IDMaker idMaker;
SequenceBuilderFactory sbFactory;
TabIndexStore indexStore;
IndexedSequenceDB indexedDB;
// Check arguments and system properties
if (args.length < 1)
throw new Exception(USAGE);
props = System.getProperties();
seqType = props.getProperty("type");
if (seqType == null)
throw new Exception(USAGE);
if (! seqType.equalsIgnoreCase("aa") || ! seqType.equalsIgnoreCase("nt"))
throw new Exception(USAGE);
// Set the appropriate alphabet
if (seqType.equalsIgnoreCase("aa"))
alpha = ProteinTools.getAlphabet();
else
alpha = DNATools.getDNA();
// Check for database name below
dbName = props.getProperty("name");
seqFormat = new FastaFormat();
idMaker = new IDMaker.ByName();
sbFactory = new FastaDescriptionLineParser.Factory(SimpleSequenceBuilder.FACTORY);
try
{
parser = alpha.getTokenization("token");
String fastaFileName = args[0];
// If database name is not set, use the name of the Fasta
// file
if (dbName == null)
dbName = fastaFileName;
File fastaFile = new File(fastaFileName);
File indexFile = new File(dbName + INDEX);
File indexList = new File(dbName + LIST);
System.out.println("Creating database with name '"
+ dbName
+ "' from "
+ fastaFileName);
// Instantiate the store and index the first file
indexStore = new TabIndexStore(indexFile,
indexList,
dbName,
seqFormat,
sbFactory,
parser);
indexedDB = new IndexedSequenceDB(idMaker, indexStore);
indexedDB.addFile(fastaFile);
// Add any subsequent files to the index
for (int i = 1; i < args.length; i++)
{
fastaFileName = args[i];
fastaFile = new File(fastaFileName);
System.out.println("Indexing and adding file "
+ fastaFileName);
indexedDB.addFile(fastaFile);
}
}
catch (BioException be)
{
be.printStackTrace();
}
catch (IOException ioe)
{
ioe.printStackTrace();
}
}
}