/******************************COPYRIGHT NOTICE*******************************/ /* (c) Centro de Regulacio Genomica */ /* and */ /* Cedric Notredame */ /* 12 Aug 2014 - 22:07. */ /*All rights reserved. */ /*This file is part of T-COFFEE. */ /* */ /* T-COFFEE is free software; you can redistribute it and/or modify */ /* it under the terms of the GNU General Public License as published by */ /* the Free Software Foundation; either version 2 of the License, or */ /* (at your option) any later version. */ /* */ /* T-COFFEE is distributed in the hope that it will be useful, */ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* */ /* You should have received a copy of the GNU General Public License */ /* along with Foobar; if not, write to the Free Software */ /* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /*............................................... */ /* If you need some more information */ /* cedric.notredame@europe.com */ /*............................................... */ /******************************COPYRIGHT NOTICE*******************************/ /********* Sequence input routines for CLUSTAL W *******************/ /* DES was here. FEB. 1994 */ /* Now reads PILEUP/MSF and CLUSTAL alignment files */ #include #include #include #include #include "io_lib_header.h" #include "util_lib_header.h" #include "define_header.h" /* * Prototypes */ extern Boolean linetype(char *,char *); extern Boolean blankline(char *); extern void warning(char *,...); extern void error(char *,...); extern char * rtrim(char *); extern char * blank_to_(char *); extern void getstr(char *,char *); void fill_chartab(void); static void get_seq(char *,char *,int *,char *); static void get_clustal_seq(char *,char *,int *,char *,int); static void get_msf_seq(char *,char *,int *,char *,int); static void check_infile(int *); static int count_clustal_seqs(void); static int count_msf_seqs(void); /* * Global variables */ static FILE *fin; char *amino_acid_codes = "ABCDEFGHIKLMNPQRSTUVWXYZ-"; /* DES */ char *nucleic_acid_order = "ACGTUN"; static int seqFormat; static char chartab[128]; void fill_chartab(void) /* Create translation and check table */ { register int i; register int c; for(i=0;i<128;chartab[i++]=0); for(i=0,c=0;c<=amino_acid_codes[i];i++) chartab[c]=chartab[tolower(c)]=c; } static void get_msf_seq(char *sname,char *seq,int *len,char *tit,int seqno) /* read the seqno_th. sequence from a PILEUP multiple alignment file */ { static char *line; int i,j,k; unsigned char c; if ( !line)line=(char*)vcalloc ( (MAXLINE+1), sizeof (char)); fseek(fin,0,0); /* start at the beginning */ *len=0; /* initialise length to zero */ for(i=0;;i++) { if(fgets(line,MAXLINE+1,fin)==NULL) return; /* read the title*/ if(linetype(line,"/") ) break; /* lines...ignore*/ } while (fgets(line,MAXLINE+1,fin) != NULL) { if(!blankline(line)) { for(i=1;i') break; /* EOL */ if( (c=chartab[c])) {seq[++(*len)]=c; } } if(*len == SEQ_MAX_LEN || c == '>') break; } break; /**********************************************/ case GDE: while(*line != '#' ||*line != '%' ) fgets(line,MAXLINE+1,fin); for (i=1;i<=MAXNAMES;i++) { if (line[i] == '(' || line[i] == '\n') { i--; break; } sname[i-1] = line[i]; } sname[i]=EOS; offset=0; if (sname[i-1] == '(') sscanf(&line[i],"%d",&offset); else offset = 0; for(i=MAXNAMES-1;i > 0;i--) if(isspace(sname[i])) { sname[i]=EOS; break; } blank_to_(sname); *tit=EOS; *len=0; for (i=0;i SEQ_MAX_LEN) { error("Sequence too long. Maximum is %d",(pint)SEQ_MAX_LEN); return 0; /* also return zero if too many */ } for ( a=0; a') { /* no */ seqFormat=(line[3] == ';')?PIR:PEARSON; /* distinguish PIR and Pearson */ (*nseqs)++; } else if((*line == '"') || (*line == '%') || (*line == '#')) { seqFormat=GDE; /* GDE format */ if (*line == '%') { (*nseqs)++; } else if (*line == '#') { (*nseqs)++; } } else { seqFormat=UNKNOWN; return; } while(fgets(line,MAXLINE+1,fin) != NULL) { switch(seqFormat) { case EMBLSWISS: if( linetype(line,"ID") ) (*nseqs)++; break; case PIR: case PEARSON: if( *line == '>' ) (*nseqs)++; break; case GDE: if(( *line == '%' ) ) (*nseqs)++; else if (( *line == '#') ) (*nseqs)++; break; case CLUSTAL: *nseqs = count_clustal_seqs(); /* DES */ /* fprintf(stdout,"\nnseqs = %d\n",(pint)*nseqs); */ fseek(fin,0,0); return; break; case MSF: *nseqs = count_msf_seqs(); fseek(fin,0,0); return; break; case USER: default: break; } } fseek(fin,0,0); } static int count_clustal_seqs(void) /* count the number of sequences in a clustal alignment file */ { static char *line; int nseqs; if ( !line)line=(char*)vcalloc ( (MAXLINE+1), sizeof (char)); while (fgets(line,MAXLINE+1,fin) != NULL) { if(!blankline(line)) break; /* Look for next non- */ } /* blank line */ nseqs = 1; while (fgets(line,MAXLINE+1,fin) != NULL) { if(blankline(line)) return nseqs; nseqs++; } return 0; /* if you got to here-funny format/no seqs.*/ } static int count_msf_seqs(void) { /* count the number of sequences in a PILEUP alignment file */ static char *line; int nseqs; if ( !line)line=(char*)vcalloc ( (MAXLINE+1), sizeof (char)); while (fgets(line,MAXLINE+1,fin) != NULL) { if(linetype(line,"/")) break; } while (fgets(line,MAXLINE+1,fin) != NULL) { if(!blankline(line)) break; /* Look for next non- */ } /* blank line */ nseqs = 1; while (fgets(line,MAXLINE+1,fin) != NULL) { if(blankline(line)) return nseqs; nseqs++; } return 0; /* if you got to here-funny format/no seqs.*/ }