/* * MrBayes 3.1.2 * * copyright 2002-2005 * * John P. Huelsenbeck * Section of Ecology, Behavior and Evolution * Division of Biological Sciences * University of California, San Diego * La Jolla, CA 92093-0116 * * johnh@biomail.ucsd.edu * * Fredrik Ronquist * School of Computational Science * Florida State University * Tallahassee, FL 32306-4120 * * ronquist@csit.fsu.edu * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details (www.gnu.org). * */ #include #include #include #include #include #include #include "mb.h" #include "globals.h" #include "command.h" #include "bayes.h" #include "sump.h" #include "mcmc.h" #if defined(__MWERKS__) #include "SIOUX.h" #endif /* local prototypes */ void GetSumpToken (int *tokenType); int HarmonicArithmeticMean (MrBFlt *paramVals, int nRows, int nCols, int col, MrBFlt *mean, MrBFlt *harm_mean); void MeanVariance(MrBFlt *paramVals, int nRows, int nCols, int col, MrBFlt *m, MrBFlt *v); MrBFlt PotentialScaleReduction (MrBFlt *paramVals, int nRows, int nCols, int nRuns, int col); int PrintAATable (FILE *fp, MrBFlt *parmVals, int nRows, int nCols, int nRuns); int PrintOverlayPlot (FILE *fp); int PrintParamTable (FILE *fp, MrBFlt *parmVals, int nRows, int nCols, int nRuns); int PrintPlot (FILE *fp, MrBFlt *paramVals); void PrintPlotHeader (FILE *fp); void Sort2 (MrBFlt *item, int left, int right); int SortColumn (MrBFlt *paramValues, int nRows, int nCols, int column); int SortParameters (MrBFlt *paramValues); /* local (but global to this file) parameters */ int numColumns, numRows; MrBFlt *parameterValues; char *sumpTokenP, sumpToken[CMD_STRING_LENGTH], *headerNames; int DoSump (void) { int i, j, n, lineTerm=LINETERM_UNIX, longestLineLength=0, tokenType, lineNum, lastTokenWasDash, inSumpComment, allDigitLine, nNumbersOnThisLine, lastNonDigitLine, numParamLines, numLinesToRead, numLinesRead, firstNumCols=0, nLines, nHeaders=0, len, longestHeader, firstFileNumParamLines=0, firstFileNumRows=0, firstFileNumColumns=0, unreliable, oneUnreliable; MrBFlt tempD, *parameterPointer, mean, harm_mean; char *s=NULL, *s1, *headerLine=NULL, temp[100]; FILE *fp=NULL, *fpOut=NULL; # if defined (MPI_ENABLED) int ierror; # endif # if defined (MPI_ENABLED) if (proc_id == 0) { # endif /* set file pointers to NULL */ fp = fpOut = NULL; /* check if there is anything to do */ if (sumpParams.margLike == NO && sumpParams.plot == NO && sumpParams.table == NO) { MrBayesPrint ("%s Nothing to do for sump. Try setting Plot, Marglike or Table to yes.\n", spacer); return NO_ERROR; } /* tell user we are ready to go */ if (sumpParams.numRuns == 1) MrBayesPrint ("%s Summarizing parameters in file %s.p\n", spacer, sumpParams.sumpFileName); else if (sumpParams.numRuns == 2) MrBayesPrint ("%s Summarizing parameters in files %s.run1.p and %s.run2.p\n", spacer, sumpParams.sumpFileName, sumpParams.sumpFileName); else /* if (sumpParams.numRuns > 2) */ { MrBayesPrint ("%s Summarizing parameters in %d files (%s.run1.p,\n", spacer, sumpParams.numRuns, sumpParams.sumpFileName); MrBayesPrint ("%s %s.run2.p, etc)\n", spacer, sumpParams.sumpFileName); } if (sumpParams.printToFile == NO) MrBayesPrint ("%s Writing output to screen but not to file ('Printtofile = No')\n", spacer); else MrBayesPrint ("%s Writing output to file %s\n", spacer, sumpParams.sumpOutfile); /* open output text file */ if (sumpParams.printToFile == YES) { if ((fpOut = OpenNewMBPrintFile (sumpParams.sumpOutfile)) == NULL) goto errorExit; /* print unique identifier to the output file */ len = (int) strlen (stamp); if (len <= 1) fprintf (fpOut, "[ID: None Available]\n"); else fprintf (fpOut, "[ID: %s]\n", stamp); } /* open input file(s) */ for (n=0; n longestLineLength) { longestLineLength = i; DEBUG("reallocating s new size: %u\n",longestLineLength*sizeof(char)); s = (char *) realloc((void *) s, (size_t) (longestLineLength * sizeof(char))); if (!s) { MrBayesPrint ("%s Problem reallocating string for reading sump file\n", spacer); goto errorExit; } DEBUG("reallocating headerLine newsize: %u\n",longestLineLength*sizeof(char)); headerLine = (char *)realloc((void *) headerLine, (size_t) ((longestLineLength)* sizeof(char))); if (!headerLine) { MrBayesPrint ("%s Problem reallocating headerLine for reading sump file\n", spacer); goto errorExit; } DEBUG("reallocating headerNames newsize: %u\n",(longestLineLength+40) * sizeof(char)); headerNames = (char *)realloc(headerNames, (size_t) ((longestLineLength+40) * sizeof(char))); if (!headerNames) { MrBayesPrint ("%s Problem allocating headerNames for reading sump file\n", spacer); goto errorExit; } for (i=0; i 0) numParamLines++; } } /* Now, check some aspects of the .p file. */ if (inSumpComment == YES) { if (sumpParams.numRuns > 1) MrBayesPrint ("%s Unterminated comment in file \"%s.run%d.p\"\n", spacer, sumpParams.sumpFileName, n+1); else MrBayesPrint ("%s Unterminated comment in file \"%s\"\n", spacer, sumpParams.sumpFileName); goto errorExit; } if (numParamLines <= 0) { if (sumpParams.numRuns > 1) MrBayesPrint ("%s No parameters were found in file \"%s.run%d.p\"\n", spacer, sumpParams.sumpFileName, n+1); else MrBayesPrint ("%s No parameters were found in file \"%s\"\n", spacer, sumpParams.sumpFileName); goto errorExit; } if (n == 0) { if (sumpParams.sumpBurnIn > numParamLines) { MrBayesPrint ("%s No parameters are sampled as the burnin exceeds the number of lines in last block\n", spacer); MrBayesPrint ("%s Try setting burnin to a number less than %d\n", spacer, numParamLines); goto errorExit; } /* tell the user that everything is fine */ if (sumpParams.numRuns > 1) MrBayesPrint ("%s Found %d parameter lines in file \"%s.run%d.p\"\n", spacer, numParamLines, sumpParams.sumpFileName, n+1); else MrBayesPrint ("%s Found %d parameter lines in file \"%s\"\n", spacer, numParamLines, sumpParams.sumpFileName); if (sumpParams.sumpBurnIn > 0) MrBayesPrint ("%s Of the %d lines, %d of them will be summarized (starting at line %d)\n", spacer, numParamLines, numParamLines - sumpParams.sumpBurnIn, lastNonDigitLine + sumpParams.sumpBurnIn + 1); else MrBayesPrint ("%s All %d lines will be summarized (starting at line %d)\n", spacer, numParamLines, lastNonDigitLine+1); MrBayesPrint ("%s (Only the last set of lines will be read, in case multiple\n", spacer); MrBayesPrint ("%s parameter blocks are present in the same file.)\n", spacer); firstFileNumParamLines = numParamLines; } else { /* check that we find the same number of parameter lines in all files */ if (numParamLines != firstFileNumParamLines) { MrBayesPrint ("%s Incompatible files: First file had %d while file %d had %d post-burnin samples\n", spacer, firstFileNumParamLines, n+1, numParamLines); } } /* Calculate and check the number of columns and rows for the file */ (void)fseek(fp, 0L, 0); for (lineNum=0; lineNum 1) MrBayesPrint ("%s Found a line with non-digit characters (line %d) in file %d\n", spacer, lineNum, n+1); else MrBayesPrint ("%s Found a line with non-digit characters (line %d)\n", spacer, lineNum); goto errorExit; } else { if (nNumbersOnThisLine > 0) { nLines++; if (nLines == 1 && n == 0) firstNumCols = nNumbersOnThisLine; else { if (nNumbersOnThisLine != firstNumCols) { if (sumpParams.numRuns == 1) MrBayesPrint ("%s Number of columns is not even (%d in first line and %d in %d line)\n", spacer, firstNumCols, nNumbersOnThisLine, lineNum); else MrBayesPrint ("%s Number of columns is not even (%d in first line of first file and %d in %d line of file %d)\n", spacer, firstNumCols, nNumbersOnThisLine, lineNum, i+1); goto errorExit; } } } } } numRows = nLines; numColumns = firstNumCols; if (n==0) { MrBayesPrint ("%s %d rows and %d columns in each row\n", spacer, numRows, numColumns); if (sumpParams.numRuns > 1) MrBayesPrint ("%s Expecting the same layout in all subsequent files\n", spacer); /* allocate space to hold parameter information */ if (numRows == 0 || numColumns == 0) { MrBayesPrint ("%s The number of rows or columns is equal to zero\n", spacer); goto errorExit; } if (memAllocs[ALLOC_SUMPINFO] == YES) { MrBayesPrint ("%s Sump string is already allocated\n", spacer); goto errorExit; } parameterValues = (MrBFlt *)SafeMalloc((size_t) (sumpParams.numRuns * numRows * numColumns * sizeof(MrBFlt))); if (!parameterValues) { MrBayesPrint ("%s Problem allocating parameterValues\n", spacer); goto errorExit; } memAllocs[ALLOC_SUMPINFO] = YES; for (i=0; i= numRows * numColumns) { if (sumpParams.numRuns > 1) MrBayesPrint ("%s Too many parameter values read in (%d) in file %d\n", spacer, j, n+1); else MrBayesPrint ("%s Too many parameter values read in (%d)\n", spacer, j); goto errorExit; } sscanf (sumpToken, "%lf", &tempD); if (lastTokenWasDash == YES) tempD *= -1.0; parameterPointer[numLinesRead * numColumns + nNumbersOnThisLine] = tempD; j++; nNumbersOnThisLine++; lastTokenWasDash = NO; } else if (tokenType == DASH) { lastTokenWasDash = YES; } else if (tokenType != UNKNOWN_TOKEN_TYPE) { /* we have a problem */ if (sumpParams.numRuns > 1) MrBayesPrint ("%s Found a line with non-digit characters (line %d) in file %d\n", spacer, lineNum, n+1); else MrBayesPrint ("%s Found a line with non-digit characters (line %d)\n", spacer, lineNum); goto errorExit; } } } while (*sumpToken); lineNum++; if (nNumbersOnThisLine > 0) numLinesRead++; } /* tell user how many lines were successfully read */ if (sumpParams.numRuns == 1) MrBayesPrint ("%s Successfully read %d lines from last parameter block\n", spacer, numLinesRead); else MrBayesPrint ("%s Successfully read %d lines from last parameter block of file %d\n", spacer, numLinesRead, n+1); /* Check how many parameter line was read in. */ if (numLinesRead != numParamLines - sumpParams.sumpBurnIn) { MrBayesPrint ("%s Unable to read all lines that should contain parameter samples\n", spacer); goto errorExit; } /* separate header line into titles for each column and/or check for compliance with first file */ if (n == 0) { if (GetHeaders (headerLine, &nHeaders) == ERROR) goto errorExit; /* get length of longest header */ longestHeader = 9; /* 9 is the length of the word "parameter" (for printing table) */ for (i=0; i longestHeader) longestHeader = len; } } else { for (i=0; i 1) { if (sumpParams.allRuns == YES) { for (i=0; i 2) MrBayesPrint ("%s \"%s.run1.p\", \"%s.run2.p\", etc:\n", spacer, sumpParams.sumpFileName, sumpParams.sumpFileName); else /* if (sumpParams.numRuns == 2) */ MrBayesPrint ("%s \"%s.run1.p\" and \"%s.run2.p\":\n", spacer, sumpParams.sumpFileName, sumpParams.sumpFileName); MrBayesPrint ("%s (Use the harmonic mean for Bayes factor comparisons of models)\n\n", spacer, sumpParams.sumpFileName); MrBayesPrint ("%s Run Arithmetic mean Harmonic mean\n", spacer); MrBayesPrint ("%s --------------------------------------\n", spacer); } if (unreliable == NO) MrBayesPrint ("%s %3d %10.2lf %10.2lf\n", spacer, n+1, mean, harm_mean); else MrBayesPrint ("%s %3d %10.2lf * %10.2lf *\n", spacer, n+1, mean, harm_mean); } if (sumpParams.printToFile == YES) { if (sumpParams.numRuns == 1) { MrBayesPrintf (fpOut, "\n%s Estimated marginal likelihoods for run sampled in file \"%s\"\n\n", spacer, sumpParams.sumpFileName); MrBayesPrintf (fpOut, "%s Arithmetic mean Harmonic mean\n", spacer); MrBayesPrintf (fpOut, "%s --------------------------------\n", spacer); if (unreliable == NO) MrBayesPrintf (fpOut, "%s %10.2lf %10.2lf\n", spacer, mean, harm_mean); else MrBayesPrintf (fpOut, "%s %10.2lf * %10.2lf *\n", spacer, mean, harm_mean); } else { if (n == 0) { MrBayesPrintf (fpOut, "%s Estimated marginal likelihoods for runs sampled in files\n", spacer); if (sumpParams.numRuns > 2) MrBayesPrintf (fpOut, "%s \"%s.run1.p\", \"%s.run2.p\", etc:\n", spacer, sumpParams.sumpFileName, sumpParams.sumpFileName); else /* if (sumpParams.numRuns == 2) */ MrBayesPrintf (fpOut, "%s \"%s.run1.p\" and \"%s.run2.p\":\n", spacer, sumpParams.sumpFileName, sumpParams.sumpFileName); MrBayesPrintf (fpOut, "%s Run Arithmetic mean Harmonic mean\n", spacer); MrBayesPrintf (fpOut, "%s --------------------------------------\n", spacer); } if (unreliable == NO) MrBayesPrintf (fpOut, "%s %3d %10.2lf %10.2lf\n", spacer, n+1, mean, harm_mean); else MrBayesPrintf (fpOut, "%s %3d %10.2lf * %10.2lf *\n", spacer, n+1, mean, harm_mean); } } } /* next run */ if (sumpParams.numRuns == 1) { MrBayesPrint ("%s --------------------------------\n", spacer); if (sumpParams.printToFile == YES) MrBayesPrintf (fpOut, " --------------------------------\n"); } else { if (SortColumn (parameterValues, sumpParams.numRuns*numRows, numColumns, i) == ERROR) goto errorExit; if (HarmonicArithmeticMean (parameterValues, sumpParams.numRuns*numRows, numColumns, i, &mean, &harm_mean) == ERROR) { unreliable = YES; oneUnreliable = YES; } else unreliable = NO; MrBayesPrint ("%s --------------------------------------\n", spacer); if (unreliable == YES) MrBayesPrint ("%s TOTAL %10.2lf * %10.2lf *\n", spacer, mean, harm_mean); else MrBayesPrint ("%s TOTAL %10.2lf %10.2lf\n", spacer, mean, harm_mean); MrBayesPrint ("%s --------------------------------------\n", spacer); if (sumpParams.printToFile == YES) { MrBayesPrintf (fpOut, "%s --------------------------------------\n", spacer); MrBayesPrintf (fpOut, "%s --------------------------------------\n", spacer); if (unreliable == YES) MrBayesPrintf (fpOut, "%s TOTAL %10.2lf * %10.2lf *\n", spacer, mean, harm_mean); else MrBayesPrintf (fpOut, "%s TOTAL %10.2lf %10.2lf\n", spacer, mean, harm_mean); MrBayesPrintf (fpOut, "%s --------------------------------------\n", spacer); } } if (oneUnreliable == YES) { MrBayesPrint ("%s * These estimates may be unreliable because \n", spacer); MrBayesPrint ("%s some extreme values were excluded\n\n", spacer); if (sumpParams.printToFile == YES) { MrBayesPrintf (fpOut, "%s * These estimates may be unreliable because \n", spacer); MrBayesPrintf (fpOut, "%s some extreme values were excluded\n\n", spacer); } } else { MrBayesPrint ("\n"); if (sumpParams.printToFile == YES) { MrBayesPrintf (fpOut, "\n"); } } } /* check next header */ } if (sumpParams.table == YES) { /* Print parameter information to screen and possibly to file. */ if (sumpParams.numRuns > 1 && sumpParams.allRuns == YES) { for (i=0; i %s\n", i, temp); } # endif return (NO_ERROR); } void GetSumpToken (int *tokenType) { int allNumbers; register char *temp; (*tokenType) = 0; temp = sumpToken; while (IsWhite(*sumpTokenP) == 1 || IsWhite(*sumpTokenP) == 2) { if (IsWhite(*sumpTokenP) == 2) { *tokenType = RETURNSYMBOL; foundNewLine = YES; /* MrBayesPrint ("RETURN\n"); */ } ++sumpTokenP; } *tokenType = UNKNOWN_TOKEN_TYPE; if (IsIn(*sumpTokenP,"=")) { *temp++ = *sumpTokenP++; *tokenType = EQUALSIGN; } else if (IsIn(*sumpTokenP,";")) { *temp++ = *sumpTokenP++; *tokenType = SEMICOLON; } else if (IsIn(*sumpTokenP,":")) { *temp++ = *sumpTokenP++; *tokenType = COLON; } else if (IsIn(*sumpTokenP,",")) { *temp++ = *sumpTokenP++; *tokenType = COMMA; } else if (IsIn(*sumpTokenP,"#")) { *temp++ = *sumpTokenP++; *tokenType = POUNDSIGN; } else if (IsIn(*sumpTokenP,"(")) { *temp++ = *sumpTokenP++; *tokenType = LEFTPAR; } else if (IsIn(*sumpTokenP,")")) { *temp++ = *sumpTokenP++; *tokenType = RIGHTPAR; } else if (IsIn(*sumpTokenP,"{")) { *temp++ = *sumpTokenP++; *tokenType = LEFTCURL; } else if (IsIn(*sumpTokenP,"}")) { *temp++ = *sumpTokenP++; *tokenType = RIGHTCURL; } else if (IsIn(*sumpTokenP,"[")) { *temp++ = *sumpTokenP++; *tokenType = LEFTCOMMENT; } else if (IsIn(*sumpTokenP,"]")) { *temp++ = *sumpTokenP++; *tokenType = RIGHTCOMMENT; } else if (IsIn(*sumpTokenP,"?")) { *temp++ = *sumpTokenP++; *tokenType = QUESTIONMARK; } else if (IsIn(*sumpTokenP,"-")) { *temp++ = *sumpTokenP++; *tokenType = DASH; } else if (IsIn(*sumpTokenP,"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_0123456789.")) { allNumbers = TRUE; if (!IsIn(*sumpTokenP,"0123456789.")) allNumbers = FALSE; *temp++ = *sumpTokenP++; while(IsIn(*sumpTokenP,"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_0123456789.")) { if (!IsIn(*sumpTokenP,"0123456789.")) allNumbers = FALSE; *temp++ = *sumpTokenP++; } if (allNumbers == TRUE) *tokenType = NUMBER; else *tokenType = ALPHA; } else if (IsIn(*sumpTokenP,"*")) { *temp++ = *sumpTokenP++; *tokenType = ASTERISK; } else if (IsIn(*sumpTokenP,"/")) { *temp++ = *sumpTokenP++; *tokenType = FORWARDSLASH; } else if (IsIn(*sumpTokenP,"'\\'")) { *temp++ = *sumpTokenP++; *tokenType = BACKSLASH; } else if (IsIn(*sumpTokenP,"!")) { *temp++ = *sumpTokenP++; *tokenType = EXCLAMATIONMARK; } else if (IsIn(*sumpTokenP,"%")) { *temp++ = *sumpTokenP++; *tokenType = PERCENT; } else if (IsIn(*sumpTokenP,"\"")) { *temp++ = *sumpTokenP++; *tokenType = QUOTATIONMARK; } else if (IsIn(*sumpTokenP,"&~+^$@|{}`><")) { *temp++ = *sumpTokenP++; *tokenType = WEIRD; } *temp = '\0'; } int HarmonicArithmeticMean (MrBFlt *paramVals, int nRows, int nCols, int col, MrBFlt *mean, MrBFlt *harm_mean) { int i, reliable; MrBFlt a, aOld, x, y, scaler, n; reliable = YES; scaler = paramVals[(nRows-1) * nCols + col] - 100.0; a = aOld = n = 0.0; for (i=0; i 0.0) { R = ((MrBFlt)(nRows - 1) / (MrBFlt) (nRows)) + ((MrBFlt)(nRuns + 1) / (MrBFlt) (nRuns)) * (sB / sW); return sqrt(R); } else return -1.0; } /* PrintAATable: Print amino acid table to file fp */ int PrintAATable (FILE *fp, MrBFlt *parmVals, int nRows, int nCols, int nRuns) { int i, j, k, len, startReading; int aaModelCounts[20]; MrBFlt f, sum[20], ssq[20], stddev[20], prob[20]; char parts[100], temp[100]; for (i=0; GetNameFromString (headerNames, temp, i+1) != ERROR; i++) { len = (int) strlen(temp); if (IsSame (temp, "Aamodel") == SAME || IsSame (temp, "Aamodel") == CONSISTENT_WITH) { MrBayesPrintf (fp, "\n\n"); if (IsSame (temp, "Aamodel") == SAME) MrBayesPrintf (fp, "%s Amino acid model probabilities:\n\n", spacer); else { startReading = NO; j = k = 0; while (temp[j] != '\0') { if (startReading == YES && temp[j] != '}') parts[k++] = temp[j]; if (temp[j] == '{') startReading = YES; else if (temp[j] == '}') startReading = NO; j++; } parts[k] = '\0'; MrBayesPrintf (fp, "%s Amino acid model probabilities for partition(s) %s:\n\n", spacer, parts); } if (nRuns == 1) { for (j=0; j<20; j++) aaModelCounts[j] = 0; for (j=0; j 1) */ { for (j=0; j<20; j++) prob[j] = sum[j] = ssq[j] = 0.0; for (j=0; j 9) MrBayesPrintf (fp, "%s (1 = Run number 1; 2 = Run number 2 etc.; x = Run number 10 or above; * = Several runs)\n", spacer); else if (sumpParams.numRuns > 2) MrBayesPrintf (fp, "%s (1 = Run number 1; 2 = Run number 2 etc.; * = Several runs)\n", spacer); else MrBayesPrintf (fp, "%s (1 = Run number 1; 2 = Run number 2; * = Both runs)\n", spacer); /* print overlay x-y plot of log likelihood vs. generation for all runs */ screenWidth = 60; /* don't change this without changing numY, meanY, and plotSymbol declared above */ screenHeight = 15; /* find the x and y columns of the first run */ whichIsX = whichIsY = -1; for (i=0; GetNameFromString (headerNames, temp, i+1) != ERROR; i++) { len = (int) strlen(temp); if (IsSame (temp, "Gen") == SAME) whichIsX = i; if (IsSame (temp, "lnL") == SAME) whichIsY = i; } if (whichIsX < 0 || whichIsY < 0) { MrBayesPrint ("%s Could not find the 'Gen' and 'lnL' columns of the first file\n", spacer); return (ERROR); } /* find minX, minY, maxX, and maxY over all runs */ minX = minY = 1000000000.0; maxX = maxY = -1000000000.0; for (n=0; n maxX) maxX = x; } } for (n=0; n= screenWidth) k = screenWidth - 1; if (k == j) { y += parameterValues[(n*numRows + i) * numColumns + whichIsY]; k2 ++; } else { y /= k2; if (y < minY) minY = y; if (y > maxY) maxY = y; k2 = 1; y = parameterValues[(n*numRows + i) * numColumns + whichIsY]; j++; } } if (k2 > 0) { y /= k2; if (y < minY) minY = y; if (y > maxY) maxY = y; } } /* initialize the plot symbols */ for (i=0; i= screenWidth) k = screenWidth - 1; if (k <= 0) k = 0; meanY[k] += y; numY[k]++; } /* transfer these points to the overlay */ for (i=0; i 0) { k = (int) ((((meanY[i] / numY[i]) - minY)/ (maxY - minY)) * screenHeight); if (k < 0) k = 0; else if (k >= screenHeight) k = screenHeight - 1; if (plotSymbol[k][i] == ' ') { if (n <= 8) plotSymbol[k][i] = '1' + n; else plotSymbol[k][i] = 'x'; } else plotSymbol[k][i] = '*'; } } } /* next run */ /* now print the overlay plot */ MrBayesPrintf (fp, "\n +"); for (i=0; i=0; i--) { MrBayesPrintf (fp, " |"); for (j=0; j longestHeader) longestHeader = len; } /* print the header rows */ MrBayesPrintf (fp, "%s %*c 95%% Cred. Interval\n", spacer, longestHeader, ' '); MrBayesPrintf (fp, "%s %*c ----------------------\n", spacer, longestHeader, ' '); MrBayesPrintf (fp, "%s Parameter%*c Mean Variance Lower Upper Median", spacer, longestHeader-9, ' '); if (nRuns > 1) MrBayesPrintf (fp, " PSRF *"); MrBayesPrintf (fp, "\n"); MrBayesPrintf (fp, "%s ", spacer); for (j=0; j 1) MrBayesPrintf (fp, "-----------"); MrBayesPrintf (fp, "\n"); /* print table values */ for (i=0; GetNameFromString (headerNames, temp, i+1) != ERROR; i++) { len = (int) strlen(temp); if (IsSame (temp, "Gen") == SAME) continue; if (IsSame (temp, "Aamodel") == SAME || IsSame (temp, "Aamodel") == CONSISTENT_WITH) continue; if (IsSame (temp, "lnL") == SAME) continue; if (nRuns > 1) sqrt_R = PotentialScaleReduction (parmVals, nRows, nCols, nRuns, i); parmVals2 = (MrBFlt *)SafeMalloc((size_t) (nRuns * numRows * numColumns * sizeof(MrBFlt))); if (!parmVals2) { MrBayesPrint ("%s Problem allocating parameterValues\n", spacer); return ERROR; } memcpy(parmVals2, parmVals, (nRuns * numRows * numColumns * sizeof(MrBFlt))); if (SortColumn (parmVals2, nRuns*nRows, nCols, i) == ERROR) { free(parmVals2); return ERROR; } MeanVariance (parmVals2, nRuns*nRows, nCols, i, &mean, &var); lower = parmVals2[((int)(nRuns * nRows * 0.025) * nCols + i)]; upper = parmVals2[((int)(nRuns * nRows * 0.975) * nCols + i)]; median = parmVals2[((int)(nRuns * nRows * 0.500) * nCols + i)]; free(parmVals2); MrBayesPrintf (fp, "%s %-*s ", spacer, longestHeader, temp); MrBayesPrintf (fp, "%12.6lf %12.6lf %12.6lf %12.6lf %12.6lf", mean, var, lower, upper, median); if (nRuns > 1) { if (sqrt_R < 0.0) MrBayesPrintf (fp, " N/A "); else MrBayesPrintf (fp, " %9.3lf", sqrt_R); } MrBayesPrintf (fp, "\n"); } MrBayesPrintf (fp, "%s ", spacer); for (j=0; j 1) MrBayesPrintf (fp, "-----------"); MrBayesPrintf (fp, "\n"); if (nRuns > 1) { MrBayesPrintf (fp, "%s * Convergence diagnostic (PSRF = Potential scale reduction factor [Gelman\n", spacer); MrBayesPrintf (fp, "%s and Rubin, 1992], uncorrected) should approach 1 as runs converge. The\n", spacer); MrBayesPrintf (fp, "%s values may be unreliable if you have a small number of samples. PSRF should\n", spacer); MrBayesPrintf (fp, "%s only be used as a rough guide to convergence since all the assumptions\n", spacer); MrBayesPrintf (fp, "%s that allow one to interpret it as a scale reduction factor are not met in\n", spacer); MrBayesPrintf (fp, "%s the phylogenetic context.\n", spacer); } return (NO_ERROR); } /* PrintPlot: Print x-y plot of log likelihood vs. generation */ int PrintPlot (FILE *fp, MrBFlt *paramVals) { int i, j, k, len, numY[60], screenWidth, screenHeight, whichIsX, whichIsY; char temp[100]; MrBFlt x, y, minX, maxX, minY, maxY, meanY[60]; /* print x-y plot of log likelihood vs. generation */ screenWidth = 60; /* don't change this without changing numY and meanY, declared above */ screenHeight = 15; whichIsX = whichIsY = -1; for (i=0; GetNameFromString (headerNames, temp, i+1) != ERROR; i++) { len = (int) strlen(temp); if (IsSame (temp, "Gen") == SAME) whichIsX = i; else if (IsSame (temp, "lnL") == SAME) whichIsY = i; } if (whichIsX < 0 || whichIsY < 0) { MrBayesPrint ("%s Could not find the 'Gen' and 'lnL' columns\n", spacer); return (ERROR); } /* find minX and maxX */ minX = 1000000000.0; maxX = -1000000000.0; for (i=0; i maxX) maxX = x; } /* collect Y data */ for (i=0; i= screenWidth) k = screenWidth - 1; if (k < 0) k = 0; meanY[k] += y; numY[k]++; } /* find minY and maxY */ minY = 1000000000.0; maxY = -1000000000.0; for (i=0; i maxY) maxY = meanY[i]; } /* print plot */ MrBayesPrintf (fp, "\n +"); for (i=0; i=0; j--) { MrBayesPrintf (fp, " |"); for (i=0; i 0) { k = (int) (((meanY[i] - minY) / (maxY - minY)) * screenHeight); if (k < 0) k = 0; if (k >= screenHeight) k = screenHeight - 1; if (k == j) MrBayesPrintf (fp, "*"); else MrBayesPrintf (fp, " "); } else { MrBayesPrintf (fp, " "); } } MrBayesPrintf (fp, "|\n"); } MrBayesPrintf (fp, " +"); for (i=0; i 1) { MrBayesPrintf (fp, "%s Below are rough plots of the generation (x-axis) versus the log \n", spacer); MrBayesPrintf (fp, "%s probability of observing the data (y-axis). You can use these \n", spacer); MrBayesPrintf (fp, "%s graphs to determine what the burn in for your analysis should be. \n", spacer); MrBayesPrintf (fp, "%s When the log probability starts to plateau you may be at station- \n", spacer); MrBayesPrintf (fp, "%s arity. Sample trees and parameters after the log probability \n", spacer); MrBayesPrintf (fp, "%s plateaus. Of course, this is not a guarantee that you are at sta- \n", spacer); MrBayesPrintf (fp, "%s tionarity. Also examine the convergence diagnostics provided by \n", spacer); MrBayesPrintf (fp, "%s the 'sump' and 'sumt' commands for all the parameters in your \n", spacer); MrBayesPrintf (fp, "%s model. Remember that the burn in is the number of samples to dis- \n", spacer); MrBayesPrintf (fp, "%s card. There are a total of ngen / samplefreq samples taken during \n", spacer); MrBayesPrintf (fp, "%s a MCMC analysis. \n", spacer); } else { MrBayesPrintf (fp, "%s Below is a rough plot of the generation (x-axis) versus the log \n", spacer); MrBayesPrintf (fp, "%s probability of observing the data (y-axis). You can use this \n", spacer); MrBayesPrintf (fp, "%s graph to determine what the burn in for your analysis should be. \n", spacer); MrBayesPrintf (fp, "%s When the log probability starts to plateau you may be at station- \n", spacer); MrBayesPrintf (fp, "%s arity. Sample trees and parameters after the log probability \n", spacer); MrBayesPrintf (fp, "%s plateaus. Of course, this is not a guarantee that you are at sta- \n", spacer); MrBayesPrintf (fp, "%s analysis should be. When the log probability starts to plateau \n", spacer); MrBayesPrintf (fp, "%s tionarity. When possible, run multiple analyses starting from dif-\n", spacer); MrBayesPrintf (fp, "%s ferent random trees; if the inferences you make for independent \n", spacer); MrBayesPrintf (fp, "%s analyses are the same, this is reasonable evidence that the chains\n", spacer); MrBayesPrintf (fp, "%s have converged. You can use MrBayes to run several independent \n", spacer); MrBayesPrintf (fp, "%s analyses simultaneously. During such a run, MrBayes will monitor \n", spacer); MrBayesPrintf (fp, "%s the convergence of topologies. After the run has been completed, \n", spacer); MrBayesPrintf (fp, "%s the 'sumt' and 'sump' functions will provide additional conver- \n", spacer); MrBayesPrintf (fp, "%s gence diagnostics for all the parameters in your model. Remember \n", spacer); MrBayesPrintf (fp, "%s that the burn in is the number of samples to discard. There are \n", spacer); MrBayesPrintf (fp, "%s a total of ngen / samplefreq samples taken during a MCMC analysis.\n", spacer); } } void Sort (MrBFlt *item, int count) { Sort2 (item, 0, count-1); } void Sort2 (MrBFlt *item, int left, int right) { register int i, j; MrBFlt x, y; i = left; j = right; x = item[(left+right)/2]; do { while (item[i] < x && i < right) i++; while (x < item[j] && j > left) j--; if (i <= j) { y = item[i]; item[i] = item[j]; item[j] = y; i++; j--; } } while (i <= j); if (left < j) Sort2 (item, left, j); if (i < right) Sort2 (item, i, right); } int SortColumn (MrBFlt *paramValues, int nRows, int nCols, int column) { int i; MrBFlt *tempParamValues; /* allocate information for sorting */ tempParamValues = (MrBFlt *)SafeMalloc((size_t) (nRows * sizeof(MrBFlt))); if (!tempParamValues) { MrBayesPrint ("%s Problem allocating tempParamValues\n", spacer); return ERROR; } for (i=0; i