/* @source distmat application ** ** Calculates the evolutionary distance matrix for a set of ** aligned sequences. Measures the pairwise evolutionary ** distances between aligned sequences. Distances are expressed ** as substitutions per 100 bases or a.a.'s. ** ** Methods to correct for multiple substitutions at a site: ** Nucleic Acid- Kimura's 2 parameter, Tajima-Nei, Jin-Nei ** Gamma distance or Tamura methods. ** Protein - Kimura method. ** Nucleic Acid or Protein - Jukes-Cantor method ** ** ** @author Copyright (C) Tim Carver (tcarver@hgmp.mrc.ac.uk) ** @@ ** ** This program is free software; you can redistribute it and/or ** modify it under the terms of the GNU General Public License ** as published by the Free Software Foundation; either version 2 ** of the License, or (at your option) any later version. ** ** This program is distributed in the hope that it will be useful, ** but WITHOUT ANY WARRANTY; without even the implied warranty of ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ** GNU General Public License for more details. ** ** You should have received a copy of the GNU General Public License ** along with this program; if not, write to the Free Software ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ******************************************************************************/ #include "emboss.h" #include static AjPFloat2d distmat_calc_match(char* const * seqcharptr, ajint len, ajint nseqs, AjBool ambig, AjBool nuc, AjPFloat2d* gap); static AjPFloat2d distmat_uncorrected(const AjPFloat2d match, const AjPFloat2d gap, float gapwt, ajint len, ajint nseqs); /* correction methods for multiple subst. */ static AjPFloat2d distmat_JukesCantor(const AjPFloat2d match, const AjPFloat2d gap, float gapwt, ajint mlen, ajint nseqs, AjBool nuc); static AjPFloat2d distmat_Kimura(char* const * seqcharptr, ajint len, ajint nseqs); static AjPFloat2d distmat_KimuraProt(char* const * seqcharptr, ajint mlen, ajint nseqs); static AjPFloat2d distmat_Tamura(char* const * seqcharptr, ajint len, ajint nseqs); static AjPFloat2d distmat_TajimaNei(char* const * seqcharptr, const AjPFloat2d match, ajint mlen, ajint nseqs); static AjPFloat2d distmat_JinNei(char* const * seqcharptr, ajint len, ajint nseqs, AjBool calc_a, float var_a); /* output routine and misc routines */ static void distmat_outputDist(AjPFile outf, ajint nseqs, const AjPSeqset seqset, const AjPFloat2d match, float gapwt, ajint method, AjBool ambig, AjBool nuc, ajint posn, ajint incr); static float distmat_checkambigProt(ajint t1, ajint t2); static void distmat_checkSubs(ajint t1, ajint t2, ajint* trans, ajint* tranv); static void distmat_checkRY(ajint t1, ajint t2, ajint* trans, ajint* tranv); static char** distmat_getSeq(const AjPSeqset seqset, ajint nseqs, ajint mlen, ajint incr, ajint posn, ajint* len); int main (int argc, char * argv[]) { ajint nseqs; ajuint mlen; ajint len; ajint i; ajint method; ajint incr; ajint posn; float gapwt; float var_a; const char *p; char **seqcharptr; AjPSeqset seqset = NULL; AjPFloat2d match = NULL; AjPFloat2d matchDist = NULL; AjPFloat2d gap = NULL; AjPStr methodlist = NULL; AjPStr methodlist2 = NULL; AjPFile outf = NULL; AjBool nuc = ajFalse; AjBool ambig; AjBool calc_a; embInit("distmat", argc, argv); seqset = ajAcdGetSeqset("sequence"); if(ajSeqsetIsNuc(seqset)) /* nucleic acid seq */ nuc = ajTrue; else if( ajSeqsetIsProt(seqset)) nuc = ajFalse; else embExit(); outf = ajAcdGetOutfile("outfile"); /* output filename */ ambig = ajAcdGetBoolean("ambiguous"); gapwt = ajAcdGetFloat("gapweight"); if(nuc) { methodlist = ajAcdGetListSingle("nucmethod"); methodlist2 = ajAcdGetListSingle("protmethod"); } else { methodlist2 = ajAcdGetListSingle("nucmethod"); methodlist = ajAcdGetListSingle("protmethod"); } posn = ajAcdGetInt("position"); calc_a = ajAcdGetBoolean("calculatea"); var_a = ajAcdGetFloat("parametera"); incr = 1; /* codons to analyse */ if(posn >= 1 && posn <= 3 && nuc) { incr = 3; posn--; } else if(posn == 123) posn = 0; else if(posn == 23 || posn == 13 || posn != 12) ajFatal("Choose base positions 1, 2, 3, 12, or 123"); ajStrToInt(methodlist, &method); nseqs = ajSeqsetGetSize(seqset); if(nseqs<2) ajFatal("Insufficient sequences (%d) to create a matrix",nseqs); mlen = ajSeqsetGetLen(seqset); for(i=0;i= j) { if(i==j) D = 0.; else D=ajFloat2dGet(match,j,i); D=D*(float)100.; if(D < 10.) ajFmtPrintF(outf," %.2f\t",D); else if(D < 100.) ajFmtPrintF(outf," %.2f\t",D); else ajFmtPrintF(outf,"%.2f\t",D); } else ajFmtPrintF(outf,"\t"); ajFmtPrintF(outf,"\t%S %d",ajSeqsetGetseqNameS(seqset,j),j+1); ajFmtPrintF(outf,"\n"); } return; }