/* @source infoseq application ** ** Displays some simple information about sequences ** ** @author Copyright (C) Jon Ison (jison@ebi.ac.uk) 2006 ** @author Copyright (C) Gary Williams (gwilliam@hgmp.mrc.ac.uk) ** @modified 29 June 2006 Jon Ison (major rewrite) ** @modified 04/02/2000 rbsk@sanger - added 'percent GC' computation ** @@ ** ** This program is free software; you can redistribute it and/or ** modify it under the terms of the GNU General Public License ** as published by the Free Software Foundation; either version 2 ** of the License, or (at your option) any later version. ** ** This program is distributed in the hope that it will be useful, ** but WITHOUT ANY WARRANTY; without even the implied warranty of ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ** GNU General Public License for more details. ** ** You should have received a copy of the GNU General Public License ** along with this program; if not, write to the Free Software ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ******************************************************************************/ #include "emboss.h" static AjBool infoseq_printheader(AjBool html, AjBool instring, const char *text, ajuint wid, AjBool columns, const AjPStr delimiter, AjPFile outfile); static AjBool infoseq_print(AjBool html, AjBool instring, const AjPStr str, AjBool usewid, ajuint wid, AjBool columns, const AjPStr delimiter, AjPFile outfile); /* @prog infoseq ************************************************************** ** ** Displays some simple information about sequences ** ******************************************************************************/ int main(int argc, char **argv) { /* VARIABLE DECLARATIONS */ AjPSeqall seqall = NULL; AjPSeq seq = NULL; AjBool html; AjBool doheader; AjBool dotype; AjBool dousa; AjBool dodb; AjBool doname; AjBool doacc; AjBool dogi; AjBool dosv; AjBool dolength; AjBool doorg; AjBool dodesc; AjBool dopgc; AjPFile outfile = NULL; AjPStr altusa = NULL; /* default name when the real name is unknown */ AjPStr altname = NULL; AjPStr altacc = NULL; AjPStr altgi = NULL; AjPStr altsv = NULL; AjPStr altdb = NULL; ajint length; AjBool type = ajTrue; /* ajTrue if Protein */ float pgc = 0.0; AjBool firsttime = ajTrue; const AjPStr usa = NULL; const AjPStr name = NULL; const AjPStr acc = NULL; const AjPStr gi = NULL; const AjPStr sv = NULL; const AjPStr desc = NULL; const AjPStr org = NULL; const AjPStr db = NULL; AjBool columns = ajFalse; AjPStr delimiter = NULL; AjPStr tempstr = NULL; AjBool instring = ajFalse; /* If token was printed and not at end-of-line yet */ /* ACD PROCESSING */ embInit("infoseq", argc, argv); outfile = ajAcdGetOutfile("outfile"); seqall = ajAcdGetSeqall("sequence"); html = ajAcdGetBoolean("html"); doheader = ajAcdGetBoolean("heading"); dousa = ajAcdGetBoolean("usa"); dodb = ajAcdGetBoolean("database"); doname = ajAcdGetBoolean("name"); doacc = ajAcdGetBoolean("accession"); dogi = ajAcdGetBoolean("gi"); dosv = ajAcdGetBoolean("seqversion"); dotype = ajAcdGetBoolean("type"); dolength = ajAcdGetBoolean("length"); dopgc = ajAcdGetBoolean("pgc"); dodesc = ajAcdGetBoolean("description"); doorg = ajAcdGetBoolean("organism"); columns = ajAcdGetBoolean("columns"); delimiter = ajAcdGetString("delimiter"); altusa = ajStrNewC("-"); altname = ajStrNewC("-"); altacc = ajStrNewC("-"); altgi = ajStrNewC("-"); altsv = ajStrNewC("-"); altdb = ajStrNewC("-"); tempstr = ajStrNew(); if(ajStrMatchC(delimiter, "\\t")) ajStrAssignK(&delimiter, '\t'); /* PRINT START OF HTML TABLE */ if(html) ajFmtPrintF(outfile,"\n"); /* MAIN APPLICATION LOOP */ while(ajSeqallNext(seqall, &seq)) { ajSeqTrim(seq); ajSeqTrace(seq); /* is this a protein or nucleic sequence? */ type = ajSeqIsProt(seq); if(firsttime) { /* Print the header information */ if(doheader) { /* Start the HTML table title line */ if(html) ajFmtPrintF(outfile, ""); /* else if(columns) ajFmtPrintF(outfile, "%s", "#"); */ if(dousa) instring = infoseq_printheader(html, instring, "USA", 25, columns, delimiter, outfile); if(dodb) instring = infoseq_printheader(html, instring, "Database", 10, columns, delimiter, outfile); if(doname) instring = infoseq_printheader(html, instring, "Name", 15, columns, delimiter, outfile); if(doacc) instring = infoseq_printheader(html, instring, "Accession", 15, columns, delimiter, outfile); if(dogi) instring = infoseq_printheader(html, instring, "GI", 15, columns, delimiter, outfile); if(dosv) instring = infoseq_printheader(html, instring, "Version", 8, columns, delimiter, outfile); if(dotype) instring = infoseq_printheader(html, instring, "Type", 5, columns, delimiter, outfile); if(dolength) instring = infoseq_printheader(html, instring, "Length", 7, columns, delimiter, outfile); if(!type && dopgc) instring = infoseq_printheader(html, instring, "%GC", 7, columns, delimiter, outfile); if(doorg) instring = infoseq_printheader(html, instring, "Organism", 20, columns, delimiter, outfile); if(dodesc) instring = infoseq_printheader(html, instring, "Description", 12, columns, delimiter, outfile); /* End the HTML table title line */ if(html) ajFmtPrintF(outfile, "\n"); else ajFmtPrintF(outfile, "\n"); instring = ajFalse; } firsttime = ajFalse; } /* GET SEQUENCE ATTRIBUTES (strings set to '-' if unknown) */ /* usa */ usa = ajSeqGetUsaS(seq); if(ajStrGetLen(usa) == 0) usa = altusa; /* db */ db = ajSeqGetDbS(seq); if(ajStrGetLen(db) == 0) db = altdb; /* name */ name = ajSeqGetNameS(seq); if(ajStrGetLen(name) == 0) name = altname; /* accession number */ acc = ajSeqGetAccS(seq); if(ajStrGetLen(acc) == 0) acc = altacc; /* GI number */ gi = ajSeqGetGiS(seq); if(ajStrGetLen(gi) == 0) gi = altgi; /* version number */ sv = ajSeqGetSvS(seq); if(ajStrGetLen(sv) == 0) sv = altsv; /* length */ length = ajSeqGetLen(seq); if(dopgc && !type) { pgc = ajMeltGC(ajSeqGetSeqS(seq),length); pgc *= 100; /* percentage */ } /* description */ desc = ajSeqGetDescS(seq); org = ajSeqGetTaxS(seq); /* start table line */ if(html) ajFmtPrintF(outfile, ""); /* To correspond to # in header line */ /* if(doheader && columns) ajFmtPrintF(outfile, " "); */ if(dousa) instring = infoseq_print(html, instring, usa, ajTrue, 25, columns, delimiter, outfile); if(dodb) instring = infoseq_print(html, instring, db, ajTrue, 15, columns, delimiter, outfile); if(doname) instring = infoseq_print(html, instring, name, ajTrue, 15, columns, delimiter, outfile); if(doacc) instring = infoseq_print(html, instring, acc, ajTrue, 15, columns, delimiter, outfile); if(dogi) instring = infoseq_print(html, instring, gi, ajTrue, 15, columns, delimiter, outfile); if(dosv) instring = infoseq_print(html, instring, sv, ajTrue, 8, columns, delimiter, outfile); if(dotype) { if(type) ajFmtPrintS(&tempstr, "%c", 'P'); else ajFmtPrintS(&tempstr, "%c", 'N'); instring = infoseq_print(html, instring, tempstr, ajTrue, 5, columns, delimiter, outfile); } if(dolength) { ajFmtPrintS(&tempstr, "%d", length); instring = infoseq_print(html, instring, tempstr, ajTrue, 7, columns, delimiter, outfile); } if(!type && dopgc) { ajFmtPrintS(&tempstr, "%.2f", pgc); instring = infoseq_print(html, instring, tempstr, ajTrue, 7, columns, delimiter, outfile); } if(doorg) instring = infoseq_print(html, instring, org, ajTrue, 20, columns, delimiter, outfile); if(dodesc) instring = infoseq_print(html, instring, desc, ajFalse, 0, columns, delimiter, outfile); /* end table line */ if(html) ajFmtPrintF(outfile, "\n"); else ajFmtPrintF(outfile, "\n"); instring = ajFalse; } /* end the HTML table */ if(html) ajFmtPrintF(outfile, "
\n"); ajFileClose(&outfile); ajStrDel(&altusa); ajStrDel(&altname); ajStrDel(&altacc); ajStrDel(&altsv); ajStrDel(&altdb); ajStrDel(&altgi); ajStrDel(&delimiter); /* JISON */ ajStrDel(&tempstr); /* JISON */ ajSeqallDel(&seqall); ajSeqDel(&seq); embExit(); return 0; } /* @funcstatic infoseq_printheader ******************************************** ** ** Prints out a sequence information record to html or text file. ** ** @param [r] html [AjBool] HTML output if true, else plain text ** @param [r] instring [AjBool] Some token already printed (need spaces) ** @param [r] text [const char *] Text to print ** @param [r] wid [ajuint] Field width ** @param [r] columns [AjBool] Print in columns if true ** @param [r] delimiter [const AjPStr] Delimiter between tokens ** @param [u] outfile [AjPFile] Output file ** @return [AjBool] True on success. ** @@ ******************************************************************************/ static AjBool infoseq_printheader(AjBool html, AjBool instring, const char *text, ajuint wid, AjBool columns, const AjPStr delimiter, AjPFile outfile) { /* Suppress delimiter on first call (for first string printed out) */ static AjBool nodelim = AJTRUE; static AjBool colfull = AJFALSE; ajuint tlen = strlen(text); /* Reset for each new line */ if(!instring) { nodelim = ajTrue; colfull = ajFalse; } if(html) ajFmtPrintF(outfile, "%s", text); else { if(columns) { if(colfull) ajFmtPrintF(outfile, " %-*s", wid, text); else ajFmtPrintF(outfile, "%-*s", wid, text); if(tlen >= wid) colfull = ajTrue; else colfull = ajFalse; } else { if(nodelim) ajFmtPrintF(outfile, "%s", text); else ajFmtPrintF(outfile, "%S%s", delimiter, text); } } nodelim = ajFalse; return ajTrue; } /* @funcstatic infoseq_print ************************************************** ** ** Prints out a sequence information record to html or text file. ** ** @param [r] html [AjBool] Undocumented ** @param [r] instring [AjBool] Undocumented ** @param [r] str [const AjPStr] Undocumented ** @param [r] usewid [AjBool] Undocumented ** @param [r] wid [ajuint] Undocumented ** @param [r] columns [AjBool] Undocumented ** @param [r] delimiter [const AjPStr] Undocumented ** @param [u] outfile [AjPFile] Undocumented ** @return [AjBool] True on success. ** @@ ******************************************************************************/ static AjBool infoseq_print(AjBool html, AjBool instring, const AjPStr str, AjBool usewid, ajuint wid, AjBool columns, const AjPStr delimiter, AjPFile outfile) { /* Suppress delimiter on first call (for first string printed out) */ static AjBool nodelim = AJTRUE; static AjBool colfull = AJFALSE; ajuint tlen = ajStrGetLen(str); /* Reset for each new line */ if(instring == ajFalse) { nodelim = ajTrue; colfull = ajFalse; } if(html) ajFmtPrintF(outfile, "%S", str); else { if(columns) { if(colfull) ajFmtPrintF(outfile, " "); if(usewid) { ajFmtPrintF(outfile, "%-*S", wid, str); if(tlen >= wid) colfull = ajTrue; else colfull = ajFalse; } else { ajFmtPrintF(outfile, "%S", str); } } else { if(nodelim) ajFmtPrintF(outfile, "%S", str); else ajFmtPrintF(outfile, "%S%S", delimiter, str); } } nodelim = ajFalse; return ajTrue; }