/* @source redata application ** ** Reports isoschizomers, references and suppliers for restriction enzymes ** @author Copyright (C) Alan Bleasby (ableasby@hgmp.mrc.ac.uk) ** @@ ** ** This program is free software; you can redistribute it and/or ** modify it under the terms of the GNU General Public License ** as published by the Free Software Foundation; either version 2 ** of the License, or (at your option) any later version. ** ** This program is distributed in the hope that it will be useful, ** but WITHOUT ANY WARRANTY; without even the implied warranty of ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ** GNU General Public License for more details. ** ** You should have received a copy of the GNU General Public License ** along with this program; if not, write to the Free Software ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ******************************************************************************/ #include "emboss.h" #define ENZDATA "REBASE/embossre.enz" #define REFDATA "REBASE/embossre.ref" #define SUPDATA "REBASE/embossre.sup" #define SUPPGUESS 50 /* Estimate of number of suppliers. */ static AjPTable redata_supply_table(AjPFile inf); /* @prog redata *************************************************************** ** ** Search REBASE for enzyme name, references, suppliers etc ** ******************************************************************************/ int main(int argc, char **argv) { AjPStr enzyme = NULL; AjPFile outf = NULL; AjPFile enzfile = NULL; AjPFile reffile = NULL; AjPFile supfile = NULL; AjBool isoschizomers; AjBool references; AjBool suppliers; AjPTable t; AjPStr key = NULL; const AjPStr value = NULL; AjPStr line = NULL; AjPStr enzline = NULL; const char *p; AjPStr str; AjPStr iso; AjPStr *ea; ajint ne = 0; ajint len; ajint ncuts; AjBool blunt; ajint cut1; ajint cut2; ajint cut3; ajint cut4; ajint i; ajint n; embInit("redata", argc, argv); enzfile = ajDatafileNewInNameC(ENZDATA); reffile = ajDatafileNewInNameC(REFDATA); supfile = ajDatafileNewInNameC(SUPDATA); if(!enzfile || !reffile || !supfile) ajFatal("EMBOSS_DATA undefined or REBASEEXTRACT needs running"); enzyme = ajAcdGetString("enzyme"); outf = ajAcdGetOutfile("outfile"); isoschizomers = ajAcdGetBoolean("isoschizomers"); references = ajAcdGetBoolean("references"); suppliers = ajAcdGetBoolean("suppliers"); ajStrRemoveWhite(&enzyme); line = ajStrNew(); enzline = ajStrNew(); str = ajStrNew(); key = ajStrNewC("."); iso = ajStrNew(); /* Read in and close supplier file for later use */ t=redata_supply_table(supfile); ajFileClose(&supfile); /* Read the enzyme line */ while(ajReadlineTrim(enzfile,&enzline)) { p=ajStrGetPtr(enzline); if(*p=='#' || *p=='\n' || *p=='!') continue; p = ajSysFuncStrtok(p," \t\n"); ajStrAssignC(&str,p); /* while(*p) ++p; *p = ' ';*/ if(ajStrMatchCaseS(str,enzyme)) break; } /* Only do the rest if a matching enzyme was found */ if(ajStrMatchCaseS(str,enzyme)) { ajFmtPrintF(outf,"%S\n\n",str); while(ajStrMatchCaseS(str,enzyme)) { p = ajStrGetPtr(enzline); p = ajSysFuncStrtok(p," \t\n"); ajStrAssignC(&str,p); p = ajSysFuncStrtok(NULL," \t\n"); ajStrAssignC(&line,p); p = ajSysFuncStrtok(NULL,"\n"); sscanf(p,"%d%d",&len,&ncuts); if(ncuts==2) sscanf(p,"%d%d%d%d%d",&len,&ncuts,&blunt,&cut1,&cut2); else sscanf(p,"%d%d%d%d%d%d%d",&len,&ncuts,&blunt,&cut1,&cut2, &cut3,&cut4); ajStrFmtUpper(&line); ajFmtPrintF(outf,"Recognition site is %s leaving ", ajStrGetPtr(line)); if(blunt) ajFmtPrintF(outf,"blunt ends\n"); else ajFmtPrintF(outf,"sticky ends\n"); if(ncuts==2) ajFmtPrintF(outf," Cut positions 5':%d 3':%d\n",cut1,cut2); else ajFmtPrintF(outf," Cut positions 5':%d 3':%d [5':%d 3':%d]\n", cut1,cut2,cut3,cut4); if(!ajReadlineTrim(enzfile,&enzline)) break; p = ajStrGetPtr(enzline); p = ajSysFuncStrtok(p," \t\n"); ajStrAssignC(&str,p); } /* Read the reference file */ while(ajReadlineTrim(reffile,&line)) { p = ajStrGetPtr(line); if(*p=='#' || *p=='\n' || *p=='!') continue; if(ajStrMatchCaseS(line,enzyme)) break; while(!ajStrMatchC(line,"//")) ajReadlineTrim(reffile,&line); } ajReadlineTrim(reffile,&line); ajFmtPrintF(outf,"Organism: %s\n",ajStrGetPtr(line)); ajReadlineTrim(reffile,&iso); if(ajStrGetLen(iso)) ne = ajArrCommaList(iso,&ea); ajReadlineTrim(reffile,&line); if(ajStrGetLen(line)) ajFmtPrintF(outf,"Methylated: %s\n",ajStrGetPtr(line)); ajReadlineTrim(reffile,&line); if(ajStrGetLen(line)) ajFmtPrintF(outf,"Source: %s\n",ajStrGetPtr(line)); if(isoschizomers && ajStrGetLen(iso)) { ajFmtPrintF(outf,"\nIsoschizomers:\n"); n = 0; ajFmtPrintF(outf," "); for(i=0;i