/* * Created on September 10, 2004 * * leashw is a wrapper to automate running large number of leash from GDE. * Input is always a comma-separated list, in GDE * text-flatfile format: * * "555.dna.gi * 50402582,48526329,48686672,30349262,18624161 * 34500865,32351189,32351188,30691948,30691946 * "4932.dna.gi * 50593503,42742309,50593505,44829554,51315355 * 51014182,51014180,51014178,51014176,51014174 * * Output is, by default a list in the same format. * eg. java leashw -e .acc -mn SHoundAccFromGiList -mpf ex1.flat -of ex1.acc.flat * * "555.dna.acc * AF025769,AY263147,AY264798,AY264799,AY293288, * AY309068,AY309069,AY622309,AY673646,E33750 * "4932.dna.acc * AB182994,AY693362,AY693363,AY693364,AY693365, * AY693366,NC_001145,NC_001146,NC_001147,NC_001148 * * If -s is used, * output is a String [], usually each String is a multiline * flatfile in GenBank or fasta format, but any String [] would be valid. * In that case, names (eg. "555.dna.gi) would be omitted from the output, * and results would NOT be comma-separated. * -e : replaces the current extension for each name with * for example, if the name was 555.dna.gi and SHoundTaxIDFromGi was the method, * -e .taxid would change the name to 555.dna.taxid * * Another issue addressed by leashw is the fact that some SeqHound * methods accept lists as input (eg. int [] gilist) while others only * accept a single ID number at a time (eg. int gi). Where large numbers of * requests must be processed, we want to eliminate the overhead of launching * a JavaVM and then a new instance of Leash, for, potentially, tens of thousands * of ID's. * * My idea was to call Leash.main within a loop, but this would, currently, * mean saving each result in a temporary file and reading that file * in again, which also generates unnecessary overhead. One very simple * way to get around this would be to eliminate ALL text messages from * Leash (could be turned on with a -v (ie. verbose) option). The ONLY thing * returned by leash would be the result, and not the terminal messages. What I mean * is something like * * result = L.main(args); * * Maybe even better would be if Leash just automatically recognized that * if the input is int [] and the method expects int, then it just runs * the commands iteratively, storing the output in an int []. * * These two options are not necessarily mutually exclusive. It might be * useful to have both. */ import java.io.IOException; import java.util.*; import ca.umanitoba.command.*; //------------------------------------------------------------ /**Type wrapper for "global parameters" */ class Params { String IFN, OFN = ""; BufferedTextInputFile ListFile = new BufferedTextInputFile(); BufferedTextOutputFile OutFile = new BufferedTextOutputFile(); String FileExtension = ""; String SHMethodName = ""; String [] SHargs; boolean LISTIN,SEQOUT = false; int MAXLIST = 100000; // **** WE NEED TO GET THIS NUMBER FROM SEQHOUND // String [] IDList; // int [] GIList; } // Params //------------------------------------------------------------ /** Type wrapper containing name and list of IDs read from file */ class IDList { String Name; String List; boolean hasList; String FLAG = "\""; void ReadList(Params P) { String BIGLINE = ""; hasList = false; StringTokenizer TLINE; System.out.println("Reading list..."); // Read a name Name = ""; if (!P.ListFile.EOF && !P.ListFile.currentLine.startsWith(FLAG)) {P.ListFile.nextLine();}; while (Name == "" && !P.ListFile.EOF) { if (P.ListFile.currentLine.startsWith(FLAG)) { TLINE = new StringTokenizer(P.ListFile.currentLine); Name = TLINE.nextToken(); hasList = true; } // if P.ListFile.nextLine(); } // while // Read a list while (!P.ListFile.EOF && !P.ListFile.currentLine.startsWith(FLAG)) { BIGLINE = BIGLINE.concat(P.ListFile.currentLine); P.ListFile.nextLine(); } // while List = BIGLINE; //System.out.println(Name); //System.out.println(List); } // ReadList } // IDList //----------------------- leashw ----------------------------- public class leashw { static int i; // read a parameter from command line and increment i static String ReadParam (String[] args) { String S; if (i < args.length) { S = args[i]; i++; } else {S = "";} return S; } //ReadParam // - - - - - - - - - - - - - - - - - - - - - - - - - /** Files are tested for read- or write-access, as appropriate. ParamsOkay = false unless all files exist and can be opened. */ static boolean ParamsOkay (String[] args, Params P) { boolean OKAY = true; String S; Vector V = new Vector (); i = 0; while (i < args.length) { S = ReadParam(args); if (S.equals("-s")) {P.SEQOUT=true;} else if (S.equals("-1")) {P.LISTIN = true;} else if (S.equals("-e")) {P.FileExtension = ReadParam(args);} else if (S.equals("-mn")) { V.add(S); P.SHMethodName = ReadParam(args); V.add(P.SHMethodName);} else if (S.equals("-mpf")) { // V.add(S); P.IFN = ReadParam(args); if (!P.ListFile.OpenOkay(P.IFN)) {OKAY = false; }; // V.add(P.IFN); } else if (S.equals("-of")) { V.add(S); P.OFN = ReadParam(args); V.add(P.OFN); if (!P.OutFile.WriteOkay(P.OFN)) {OKAY = false; } } } // while System.out.println(P.SEQOUT); System.out.println(P.LISTIN); System.out.println(P.FileExtension); System.out.println(P.SHMethodName); System.out.println(P.IFN); System.out.println(P.OFN); if (!OKAY) PrintHelp(); return OKAY; } // ParamsOkay // - - - - - - - - - - -- - - - - - - - - - - - - - - - /**If command line parameters aren't correct, or files do not open successfully, print a help message.*/ static void PrintHelp () { System.out.println("Usage: leashw.py leashw_options leash_options"); System.out.println(""); System.out.println(" leashw options:"); System.out.println(" -s SeqHound function generates sequence output"); System.out.println(" -e file_extension - for each name in GDE flat file,"); System.out.println(" replace existing extensio with file_extension"); System.out.println(" -1 SeqHound function takes only single ID numbers"); System.out.println(" as input, and not lists"); System.out.println(""); System.out.println(" leash options:"); System.out.println(" -mn method_name - SeqHound method name"); // System.out.println(" -mpi item1,item2... - SeqHound parameters given on command line"); System.out.println(" -mpf filename - SeqHound parameters read from file"); System.out.println(" -of filename - output filename"); } //PrintHelp // - - - - - - - - - - -- - - - - - - - - - - - - - - - /** Write the name for an ID list to the output file. If -e option is used, replace the extension with the one specified on the command line.*/ static void WriteName (Params P, String Name) { int I; if (P.FileExtension != "") { I = Name.lastIndexOf("."); if (I != -1) { Name = Name.substring(0,I); } // if Name = Name + P.FileExtension; } //if P.OutFile.PW.println(Name); } //WriteName //******************************************************* //********************** MAIN METHOD ******************** public static void main(String[] args) { // Read command line parameters. Params P = new Params(); // Read parameters and file names from args array and try to open files. boolean OKAY; OKAY = ParamsOkay(args,P); if (OKAY) { Leash L = new Leash(); // ParamV is a vector used to build a String [] to send // to L.main. Before calling L.main, ParamV is converted into // the String [] SHargs. The -mn argument is always the same, // but the -mpi argument is different for each new list we // read from the P.InFile. // I have also added, in comments, code to specify a temporary // output file (-of). If Leash can be rewritten to return // results to an object, rather than a file, we don't need // these lines. Vector ParamV = new Vector (); ParamV.add("-mn"); ParamV.add(P.SHMethodName); // ParamV.add("-of"); ParamV.add("-mpi"); String [] SHargs; IDList IDL = new IDList(); IDL.ReadList(P); while (IDL.hasList) { if (!P.SEQOUT) {WriteName(P,IDL.Name);}; ParamV.add(IDL.List); // Re-cast Vector into a String [] SHargs = new String[ParamV.size()]; ParamV.toArray(SHargs); // Run SeqHound to retrieve all hits // if -l then // Leash // else // for each ID // Leash L.main(SHargs); // Read a list IDL.ReadList(P); // Remove list from ParamV, so that the next one // can be added. ParamV.remove(3); // ParamV.remove(5); } // while try {P.OutFile.BW.flush(); P.OutFile.BW.close();} catch (IOException e) {System.out.println("Error closing " + P.OutFile.F.getName()); System.err.println(e.getMessage());}; } // OKAY } // main } // leashw