#include "phylip.h" #include "seq.h" /* version 3.6. (c) Copyright 1993-2004 by the University of Washington. Written by Joseph Felsenstein, Lucas Mix, Akiko Fuseki, Sean Lamont, Andrew Keeffe, Dan Fineman, and Patrick Colacurcio. Permission is granted to copy and use this program provided no fee is charged for it and provided that this copyright notice is not removed. */ typedef long vall[maxcategs]; typedef double contribarr[maxcategs]; #ifndef OLDC /* function prototypes */ void init_protmats(void); void getoptions(void); void makeprotfreqs(void); void allocrest(void); void doinit(void); void inputoptions(void); void input_protdata(long); void makeweights(void); void prot_makevalues(long, pointarray, long, long, sequence, steptr); void prot_inittable(void); void alloc_pmatrix(long); void getinput(void); void inittravtree(node *); void prot_nuview(node *); void prot_slopecurv(node *, double, double *, double *, double *); void makenewv(node *); void update(node *); void smooth(node *); void make_pmatrix(double **, double **, double **, long, double, double, double *, double **); double prot_evaluate(node *, boolean); void treevaluate(void); void promlcopy(tree *, tree *, long, long); void proml_re_move(node **, node **); void insert_(node *, node *, boolean); void addtraverse(node *, node *, boolean); void rearrange(node *, node *); void proml_coordinates(node *, double, long *, double *); void proml_printree(void); void sigma(node *, double *, double *, double *); void describe(node *); void prot_reconstr(node *, long); void rectrav(node *, long, long); void summarize(void); void initpromlnode(node **, node **, node *, long, long, long *, long *, initops, pointarray, pointarray, Char *, Char *, FILE *); void dnaml_treeout(node *); void buildnewtip(long, tree *); void buildsimpletree(tree *); void free_all_protx (long, pointarray); void maketree(void); void clean_up(void); void globrearrange(void); void proml_unroot(node* root, node** nodep, long nonodes) ; void reallocsites(void); void prot_freetable(void); void free_pmatrix(long sib); void alloclrsaves(void); void freelrsaves(void); void resetlrsaves(void); /* function prototypes */ #endif long rcategs; boolean haslengths; long oldendsite=0; Char infilename[100], outfilename[100], intreename[100], outtreename[100], catfilename[100], weightfilename[100]; double *rate, *rrate, *probcat; long nonodes2, sites, weightsum, categs, datasets, ith, njumble, jumb; long inseed, inseed0, parens; boolean global, jumble, weights, trout, usertree, inserting = false, ctgry, rctgry, auto_, hypstate, progress, mulsets, justwts, firstset, improve, smoothit, polishing, lngths, gama, invar, usepmb, usepam, usejtt; tree curtree, bestree, bestree2, priortree; node *qwhere, *grbg, *addwhere; double cv, alpha, lambda, invarfrac, bestyet; long *enterorder; steptr aliasweight; contribarr *contribution, like, nulike, clai; double **term, **slopeterm, **curveterm; longer seed; char *progname; char aachar[26]="ARNDCQEGHILKMFPSTWYVBZX?*-"; node **lrsaves; /* Local variables for maketree, propagated globally for c version: */ long k, nextsp, numtrees, maxwhich, mx, mx0, mx1, shimotrees; double dummy, maxlogl; boolean succeeded, smoothed; double **l0gf; double *l0gl; double **tbl; Char ch, ch2; long col; vall *mp; /* Variables introduced to allow for protein probability calculations */ long max_num_sibs; /* maximum number of siblings used in a */ /* nuview calculation. determines size */ /* final size of pmatrices */ double *eigmat; /* eig matrix variable */ double **probmat; /* prob matrix variable */ double ****dpmatrix; /* derivative of pmatrix */ double ****ddpmatrix; /* derivative of xpmatrix */ double *****pmatrices; /* matrix of probabilities of protien */ /* conversion. The 5 subscripts refer */ /* to sibs, rcategs, categs, final and */ /* initial states, respectively. */ double freqaa[20]; /* amino acid frequencies */ /* this JTT matrix decomposition thanks to Elisabeth Tillier */ static double jtteigmat[] = {+0.00000000000000,-1.81721720738768,-1.87965834528616,-1.61403121885431, -1.53896608443751,-1.40486966367848,-1.30995061286931,-1.24668414819041, -1.17179756521289,-0.31033320987464,-0.34602837857034,-1.06031718484613, -0.99900602987105,-0.45576774888948,-0.86014403434677,-0.54569432735296, -0.76866956571861,-0.60593589295327,-0.65119724379348,-0.70249806480753}; static double jttprobmat[20][20] = {{+0.07686196156903,+0.05105697447152,+0.04254597872702,+0.05126897436552, +0.02027898986051,+0.04106097946952,+0.06181996909002,+0.07471396264303, +0.02298298850851,+0.05256897371552,+0.09111095444453,+0.05949797025102, +0.02341398829301,+0.04052997973502,+0.05053197473402,+0.06822496588753, +0.05851797074102,+0.01433599283201,+0.03230298384851,+0.06637396681302}, {-0.04445795120462,-0.01557336502860,-0.09314817363516,+0.04411372100382, -0.00511178725134,+0.00188472427522,-0.02176250428454,-0.01330231089224, +0.01004072641973,+0.02707838224285,-0.00785039050721,+0.02238829876349, +0.00257470703483,-0.00510311699563,-0.01727154263346,+0.20074235330882, -0.07236268502973,-0.00012690116016,-0.00215974664431,-0.01059243778174}, {+0.09480046389131,+0.00082658405814,+0.01530023104155,-0.00639909042723, +0.00160605602061,+0.00035896642912,+0.00199161318384,-0.00220482855717, -0.00112601328033,+0.14840201765438,-0.00344295714983,-0.00123976286718, -0.00439399942758,+0.00032478785709,-0.00104270266394,-0.02596605592109, -0.05645800566901,+0.00022319903170,-0.00022792271829,-0.16133258048606}, {-0.06924141195400,-0.01816245289173,-0.08104005811201,+0.08985697111009, +0.00279659017898,+0.01083740322821,-0.06449599336038,+0.01794514261221, +0.01036809141699,+0.04283504450449,+0.00634472273784,+0.02339134834111, -0.01748667848380,+0.00161859106290,+0.00622486432503,-0.05854130195643, +0.15083728660504,+0.00030733757661,-0.00143739522173,-0.05295810171941}, {-0.14637948915627,+0.02029296323583,+0.02615316895036,-0.10311538564943, -0.00183412744544,-0.02589124656591,+0.11073673851935,+0.00848581728407, +0.00106057791901,+0.05530240732939,-0.00031533506946,-0.03124002869407, -0.01533984125301,-0.00288717337278,+0.00272787410643,+0.06300929916280, +0.07920438311152,-0.00041335282410,-0.00011648873397,-0.03944076085434}, {-0.05558229086909,+0.08935293782491,+0.04869509588770,+0.04856877988810, -0.00253836047720,+0.07651693957635,-0.06342453535092,-0.00777376246014, -0.08570270266807,+0.01943016473512,-0.00599516526932,-0.09157595008575, -0.00397735155663,-0.00440093863690,-0.00232998056918,+0.02979967701162, -0.00477299485901,-0.00144011795333,+0.01795114942404,-0.00080059359232}, {+0.05807741644682,+0.14654292420341,-0.06724975334073,+0.02159062346633, -0.00339085518294,-0.06829036785575,+0.03520631903157,-0.02766062718318, +0.03485632707432,-0.02436836692465,-0.00397566003573,-0.10095488644404, +0.02456887654357,+0.00381764117077,-0.00906261340247,-0.01043058066362, +0.01651199513994,-0.00210417220821,-0.00872508520963,-0.01495915462580}, {+0.02564617106907,+0.02960554611436,-0.00052356748770,+0.00989267817318, -0.00044034172141,-0.02279910634723,-0.00363768356471,-0.01086345665971, +0.01229721799572,+0.02633650142592,+0.06282966783922,-0.00734486499924, -0.13863936313277,-0.00993891943390,-0.00655309682350,-0.00245191788287, -0.02431633805559,-0.00068554031525,-0.00121383858869,+0.06280025239509}, {+0.11362428251792,-0.02080375718488,-0.08802750967213,-0.06531316372189, -0.00166626058292,+0.06846081717224,+0.07007301248407,-0.01713112936632, -0.05900588794853,-0.04497159138485,+0.04222484636983,+0.00129043178508, -0.01550337251561,-0.01553102163852,-0.04363429852047,+0.01600063777880, +0.05787328925647,-0.00008265841118,+0.02870014572813,-0.02657681214523}, {+0.01840541226842,+0.00610159018805,+0.01368080422265,+0.02383751807012, -0.00923516894192,+0.01209943150832,+0.02906782189141,+0.01992384905334, +0.00197323568330,+0.00017531415423,-0.01796698381949,+0.01887083962858, -0.00063335886734,-0.02365277334702,+0.01209445088200,+0.01308086447947, +0.01286727242301,-0.11420358975688,-0.01886991700613,+0.00238338728588}, {-0.01100105031759,-0.04250695864938,-0.02554356700969,-0.05473632078607, +0.00725906469946,-0.03003724918191,-0.07051526125013,-0.06939439879112, -0.00285883056088,+0.05334304124753,+0.12839241846919,-0.05883473754222, +0.02424304967487,+0.09134510778469,-0.00226003347193,-0.01280041778462, -0.00207988305627,-0.02957493909199,+0.05290385686789,+0.05465710875015}, {-0.01421274522011,+0.02074863337778,-0.01006411985628,+0.03319995456446, -0.00005371699269,-0.12266046460835,+0.02419847062899,-0.00441168706583, -0.08299118738167,-0.00323230913482,+0.02954035119881,+0.09212856795583, +0.00718635627257,-0.02706936115539,+0.04473173279913,-0.01274357634785, -0.01395862740618,-0.00071538848681,+0.04767640012830,-0.00729728326990}, {-0.03797680968123,+0.01280286509478,-0.08614616553187,-0.01781049963160, +0.00674319990083,+0.04208667754694,+0.05991325707583,+0.03581015660092, -0.01529816709967,+0.06885987924922,-0.11719120476535,-0.00014333663810, +0.00074336784254,+0.02893416406249,+0.07466151360134,-0.08182016471377, -0.06581536577662,-0.00018195976501,+0.00167443595008,+0.09015415667825}, {+0.03577726799591,-0.02139253448219,-0.01137813538175,-0.01954939202830, -0.04028242801611,-0.01777500032351,-0.02106862264440,+0.00465199658293, -0.02824805812709,+0.06618860061778,+0.08437791757537,-0.02533125946051, +0.02806344654855,-0.06970805797879,+0.02328376968627,+0.00692992333282, +0.02751392122018,+0.01148722812804,-0.11130404325078,+0.07776346000559}, {-0.06014297925310,-0.00711674355952,-0.02424493472566,+0.00032464353156, +0.00321221847573,+0.03257969053884,+0.01072805771161,+0.06892027923996, +0.03326534127710,-0.01558838623875,+0.13794237677194,-0.04292623056646, +0.01375763233229,-0.11125153774789,+0.03510076081639,-0.04531670712549, -0.06170413486351,-0.00182023682123,+0.05979891871679,-0.02551802851059}, {-0.03515069991501,+0.02310847227710,+0.00474493548551,+0.02787717003457, -0.12038329679812,+0.03178473522077,+0.04445111601130,-0.05334957493090, +0.01290386678474,-0.00376064171612,+0.03996642737967,+0.04777677295520, +0.00233689200639,+0.03917715404594,-0.01755598277531,-0.03389088626433, -0.02180780263389,+0.00473402043911,+0.01964539477020,-0.01260807237680}, {-0.04120428254254,+0.00062717164978,-0.01688703578637,+0.01685776910152, +0.02102702093943,+0.01295781834163,+0.03541815979495,+0.03968150445315, -0.02073122710938,-0.06932247350110,+0.11696314241296,-0.00322523765776, -0.01280515661402,+0.08717664266126,+0.06297225078802,-0.01290501780488, -0.04693925076877,-0.00177653675449,-0.08407812137852,-0.08380714022487}, {+0.03138655228534,-0.09052573757196,+0.00874202219428,+0.06060593729292, -0.03426076652151,-0.04832468257386,+0.04735628794421,+0.14504653737383, -0.01709111334001,-0.00278794215381,-0.03513813820550,-0.11690294831883, -0.00836264902624,+0.03270980973180,-0.02587764129811,+0.01638786059073, +0.00485499822497,+0.00305477087025,+0.02295754527195,+0.00616929722958}, {-0.04898722042023,-0.01460879656586,+0.00508708857036,+0.07730497806331, +0.04252420017435,+0.00484232580349,+0.09861807969412,-0.05169447907187, -0.00917820907880,+0.03679081047330,+0.04998537112655,+0.00769330211980, +0.01805447683564,-0.00498723245027,-0.14148416183376,-0.05170281760262, -0.03230723310784,-0.00032890672639,-0.02363523071957,+0.03801365471627}, {-0.02047562162108,+0.06933781779590,-0.02101117884731,-0.06841945874842, -0.00860967572716,-0.00886650271590,-0.07185241332269,+0.16703684361030, -0.00635847581692,+0.00811478913823,+0.01847205842216,+0.06700967948643, +0.00596607376199,+0.02318239240593,-0.10552958537847,-0.01980199747773, -0.02003785382406,-0.00593392430159,-0.00965391033612,+0.00743094349652}}; /* this PMB matrix decomposition due to Elisabeth Tillier */ static double pmbeigmat[20] = {0.0000001586972220,-1.8416770496147100, -1.6025046986139100,-1.5801012515121300, -1.4987794099715900,-1.3520794233801900,-1.3003469390479700,-1.2439503327631300, -1.1962574080244200,-1.1383730501367500,-1.1153278910708000,-0.4934843510654760, -0.5419014550215590,-0.9657997830826700,-0.6276075673757390,-0.6675927795018510, -0.6932641383465870,-0.8897872681859630,-0.8382698977371710,-0.8074694642446040}; static double pmbprobmat[20][20] = {{0.0771762457248147,0.0531913844998640,0.0393445076407294,0.0466756566755510, 0.0286348361997465,0.0312327748383639,0.0505410248721427,0.0767106611472993, 0.0258916271688597,0.0673140562194124,0.0965705469252199,0.0515979465932174, 0.0250628079438675,0.0503492018628350,0.0399908189418273,0.0641898881894471, 0.0517539616710987,0.0143507440546115,0.0357994592438322,0.0736218495862984}, {0.0368263046116572,-0.0006728917107827,0.0008590805287740,-0.0002764255356960, 0.0020152937187455,0.0055743720652960,0.0003213317669367,0.0000449190281568, -0.0004226254397134,0.1805040629634510,-0.0272246813586204,0.0005904606533477, -0.0183743200073889,-0.0009194625608688,0.0008173657533167,-0.0262629806302238, 0.0265738757209787,0.0002176606241904,0.0021315644838566,-0.1823229927207580}, {-0.0194800075560895,0.0012068088610652,-0.0008803318319596,-0.0016044273960017, -0.0002938633803197,-0.0535796754602196,0.0155163896648621,-0.0015006360762140, 0.0021601372013703,0.0268513218744797,-0.1085292493742730,0.0149753083138452, 0.1346457366717310,-0.0009371698759829,0.0013501708044116,0.0346352293103622, -0.0276963770242276,0.0003643142783940,0.0002074817333067,-0.0174108903914110}, {0.0557839400850153,0.0023271577185437,0.0183481103396687,0.0023339480096311, 0.0002013267015151,-0.0227406863569852,0.0098644845475047,0.0064721276774396, 0.0001389408104210,-0.0473713878768274,-0.0086984445005797,0.0026913674934634, 0.0283724052562196,0.0001063665179457,0.0027442574779383,-0.1875312134708470, 0.1279864877057640,0.0005103347834563,0.0003155113168637,0.0081451082759554}, {0.0037510125027265,0.0107095920636885,0.0147305410328404,-0.0112351252180332, -0.0001500408626446,-0.1523450933729730,0.0611532413339872,-0.0005496748939503, 0.0048714378736644,-0.0003826320053999,0.0552010244407311,0.0482555671001955, -0.0461664995115847,-0.0021165008617978,-0.0004574454232187,0.0233755883688949, -0.0035484915422384,0.0009090698422851,0.0013840637687758,-0.0073895139302231}, {-0.0111512564930024,0.1025460064723080,0.0396772456883791,-0.0298408501361294, -0.0001656742634733,-0.0079876311843289,0.0712644184507945,-0.0010780604625230, -0.0035880882043592,0.0021070399334252,0.0016716329894279,-0.1810123023850110, 0.0015141703608724,-0.0032700852781804,0.0035503782441679,0.0118634302028026, 0.0044561606458028,-0.0001576678495964,0.0023470722225751,-0.0027457045397157}, {0.1474525743949170,-0.0054432538500293,0.0853848892349828,-0.0137787746207348, -0.0008274830358513,0.0042248844582553,0.0019556229305563,-0.0164191435175148, -0.0024501858854849,0.0120908948084233,-0.0381456105972653,0.0101271614855119, -0.0061945941321859,0.0178841099895867,-0.0014577779202600,-0.0752120602555032, -0.1426985695849920,0.0002862275078983,-0.0081191734261838,0.0313401149422531}, {0.0542034611735289,-0.0078763926211829,0.0060433542506096,0.0033396210615510, 0.0013965072374079,0.0067798903832256,-0.0135291136622509,-0.0089982442731848, -0.0056744537593887,-0.0766524225176246,0.1881210263933930,-0.0065875518675173, 0.0416627569300375,-0.0953804133524747,-0.0012559228448735,0.0101622644292547, -0.0304742453119050,0.0011702318499737,0.0454733434783982,-0.1119239362388150}, {0.1069409037912470,0.0805064400880297,-0.1127352030714600,0.1001181253523260, -0.0021480427488769,-0.0332884841459003,-0.0679837575848452,-0.0043812841356657, 0.0153418716846395,-0.0079441315103188,-0.0121766182046363,-0.0381127991037620, -0.0036338726532673,0.0195324059593791,-0.0020165963699984,-0.0061222685010268, -0.0253761448771437,-0.0005246410999057,-0.0112205170502433,0.0052248485517237}, {-0.0325247648326262,0.0238753651653669,0.0203684886605797,0.0295666232678825, -0.0003946714764213,-0.0157242718469554,-0.0511737848084862,0.0084725632040180, -0.0167068828528921,0.0686962159427527,-0.0659702890616198,-0.0014289912494271, -0.0167000964093416,-0.1276689083678200,0.0036575057830967,-0.0205958145531018, 0.0000368919612829,0.0014413626622426,0.1064360941926030,0.0863372661517408}, {-0.0463777468104402,0.0394712148670596,0.1118686750747160,0.0440711686389031, -0.0026076286506751,-0.0268454015202516,-0.1464943067133240,-0.0137514051835380, -0.0094395514284145,-0.0144124844774228,0.0249103379323744,-0.0071832157138676, 0.0035592787728526,0.0415627419826693,0.0027040097365669,0.0337523666612066, 0.0316121324137152,-0.0011350177559026,-0.0349998884574440,-0.0302651879823361}, {0.0142360925194728,0.0413145623127025,0.0324976427846929,0.0580930922002398, -0.0586974207121084,0.0202001168873069,0.0492204086749069,0.1126593173463060, 0.0116620013776662,-0.0780333711712066,-0.1109786767320410,0.0407775100936731, -0.0205013161312652,-0.0653458585025237,0.0347351829703865,0.0304448983224773, 0.0068813748197884,-0.0189002309261882,-0.0334507528405279,-0.0668143558699485}, {-0.0131548829657936,0.0044244322828034,-0.0050639951827271,-0.0038668197633889, -0.1536822386530220,0.0026336969165336,0.0021585651200470,-0.0459233839062969, 0.0046854727140565,0.0393815434593599,0.0619554007991097,0.0027456299925622, 0.0117574347936383,0.0373018612990383,0.0024818527553328,-0.0133956606027299, -0.0020457128424105,0.0154178819990401,0.0246524142683911,0.0275363065682921}, {-0.1542307272455030,0.0364861558267547,-0.0090880407008181,0.0531673937889863, 0.0157585615170580,0.0029986538457297,0.0180194047699875,0.0652152443589317, 0.0266842840376180,0.0388457366405908,0.0856237634510719,0.0126955778952183, 0.0099593861698250,-0.0013941794862563,0.0294065511237513,-0.1151906949298290, -0.0852991447389655,0.0028699120202636,-0.0332087026659522,0.0006811857297899}, {0.0281300736924501,-0.0584072081898638,-0.0178386569847853,-0.0536470338171487, -0.0186881656029960,-0.0240008730656106,-0.0541064820498883,0.2217137098936020, -0.0260500001542033,0.0234505236798375,0.0311127151218573,-0.0494139126682672, 0.0057093465049849,0.0124937286655911,-0.0298322975915689,0.0006520211333102, -0.0061018680727128,-0.0007081999479528,-0.0060523759094034,0.0215845995364623}, {0.0295321046399105,-0.0088296411830544,-0.0065057049917325,-0.0053478115612781, -0.0100646496794634,-0.0015473619084872,0.0008539960632865,-0.0376381933046211, -0.0328135588935604,0.0672161874239480,0.0667626853916552,-0.0026511651464901, 0.0140451514222062,-0.0544836996133137,0.0427485157912094,0.0097455780205802, 0.0177309072915667,-0.0828759701187452,-0.0729504795471370,0.0670731961252313}, {0.0082646581043963,-0.0319918630534466,-0.0188454445200422,-0.0374976353856606, 0.0037131290686848,-0.0132507796987883,-0.0306958830735725,-0.0044119395527308, -0.0140786756619672,-0.0180512599925078,-0.0208243802903953,-0.0232202769398931, -0.0063135878270273,0.0110442171178168,0.1824538048228460,-0.0006644614422758, -0.0069909097436659,0.0255407650654681,0.0099119399501151,-0.0140911517070698}, {0.0261344441524861,-0.0714454044548650,0.0159436926233439,0.0028462736216688, -0.0044572637889080,-0.0089474834434532,-0.0177570282144517,-0.0153693244094452, 0.1160919467206400,0.0304911481385036,0.0047047513411774,-0.0456535116423972, 0.0004491494948617,-0.0767108879444462,-0.0012688533741441,0.0192445965934123, 0.0202321954782039,0.0281039933233607,-0.0590403018490048,0.0364080426546883}, {0.0115826306265004,0.1340228176509380,-0.0236200652949049,-0.1284484655137340, -0.0004742338006503,0.0127617346949511,-0.0428560878860394,0.0060030732454125, 0.0089182609926781,0.0085353834972860,0.0048464809638033,0.0709740071429510, 0.0029940462557054,-0.0483434904493132,-0.0071713680727884,-0.0036840391887209, 0.0031454003250096,0.0246243550241551,-0.0449551277644180,0.0111449232769393}, {0.0140356721886765,-0.0196518236826680,0.0030517022326582,0.0582672093364850, -0.0000973895685457,0.0021704767224292,0.0341806268602705,-0.0152035987563018, -0.0903198657739177,0.0259623214586925,0.0155832497882743,-0.0040543568451651, 0.0036477631918247,-0.0532892744763217,-0.0142569373662724,0.0104500681408622, 0.0103483945857315,0.0679534422398752,-0.0768068882938636,0.0280289727046158}} ; /* dcmut version of PAM model from http://www.ebi.ac.uk/goldman-srv/dayhoff/ */ static double pameigmat[] = {0,-1.93321786301018,-2.20904642493621,-1.74835983874903, -1.64854548332072,-1.54505559488222,-1.33859384676989,-1.29786201193594, -0.235548517495575,-0.266951066089808,-0.28965813670665,-1.10505826965282, -1.04323310568532,-0.430423720979904,-0.541719761016713,-0.879636093986914, -0.711249353378695,-0.725050487280602,-0.776855937389452,-0.808735559461343}; static double pamprobmat[20][20] ={ {0.08712695644, 0.04090397955, 0.04043197978, 0.04687197656, 0.03347398326, 0.03825498087, 0.04952997524, 0.08861195569, 0.03361898319, 0.03688598156, 0.08535695732, 0.08048095976, 0.01475299262, 0.03977198011, 0.05067997466, 0.06957696521, 0.05854197073, 0.01049399475, 0.02991598504, 0.06471796764}, {0.07991048383, 0.006888314018, 0.03857806206, 0.07947073194, 0.004895492884, 0.03815829405, -0.1087562465, 0.008691167141, -0.0140554828, 0.001306404001, -0.001888411299, -0.006921303342, 0.0007655604228, 0.001583298443, 0.006879590446, -0.171806883, 0.04890917949, 0.0006700432804, 0.0002276237277, -0.01350591875}, {-0.01641514483, -0.007233933239, -0.1377830621, 0.1163201333, -0.002305138017, 0.01557250366, -0.07455879489, -0.003225343503, 0.0140630487, 0.005112274204, 0.001405731862, 0.01975833782, -0.001348402973, -0.001085733262, -0.003880514478, 0.0851493313, -0.01163526615, -0.0001197903399, 0.002056153393, 0.0001536095643}, {0.009669278686, -0.006905863869, 0.101083544, 0.01179903104, -0.003780967591, 0.05845105878, -0.09138357299, -0.02850503638, -0.03233951408, 0.008708065876, -0.004700705411, -0.02053221579, 0.001165851398, -0.001366585849, -0.01317695074, 0.1199985703, -0.1146346193, -0.0005953021314, -0.0004297615194, 0.007475695618}, {0.1722243502, -0.003737582995, -0.02964873222, -0.02050116381, -0.0004530478465, -0.02460043205, 0.02280768412, -0.02127364909, 0.01570095258, 0.1027744285, -0.005330539586, 0.0179697651, -0.002904077286, -0.007068126663, -0.0142869583, -0.01444241844, -0.08218861544, 0.0002069181629, 0.001099671379, -0.1063484263}, {-0.1553433627, -0.001169168032, 0.02134785337, 0.0007602305436, 0.0001395330122, 0.03194992019, -0.01290252206, 0.03281720789, -0.01311103735, 0.1177254769, -0.008008783885, -0.02375317548, -0.002817809762, -0.008196682776, 0.01731267617, 0.01853526375, 0.08249908546, -2.788771776e-05, 0.001266182191, -0.09902299976}, {-0.03671080341, 0.0274168035, 0.04625877597, 0.07520706414, -0.0001833803619, -0.1207833161, -0.006415807779, -0.005465629648, 0.02778273972, 0.007589688485, -0.02945266034, -0.03797542064, 0.07044042052, -0.002018573865, 0.01845277071, 0.006901513991, -0.02430934639, -0.0005919635873, -0.001266962331, -0.01487591261}, {-0.03060317816, 0.01182361623, 0.04200270053, 0.05406235279, -0.0003920498815, -0.09159709348, -0.009602690652, -0.00382944418, 0.01761361993, 0.01605684317, 0.05198878008, 0.02198696949, -0.09308930025, -0.00102622863, 0.01477637127, 0.0009314065393, -0.01860959472, -0.0005964703968, -0.002694284083, 0.02079767439}, {0.0195976494, -0.005104484936, 0.007406728707, 0.01236244954, 0.0201446796, 0.007039564785, 0.01276942134, 0.02641595685, 0.002764624354, 0.001273314658, -0.01335316035, 0.01105658671, 2.148773499e-05, -0.02692205639, 0.0118684991, 0.01212624708, 0.01127770094, -0.09842754796, -0.01942336432, 0.007105703151}, {-0.01819461888, -0.01509348507, -0.01297636935, -0.01996453439, 0.1715705905, -0.01601550692, -0.02122706144, -0.02854628494, -0.009351082371, -0.001527995472, -0.010198224, -0.03609537551, -0.003153182095, 0.02395980501, -0.01378664626, -0.005992611421, -0.01176810875, 0.003132361603, 0.03018439539, -0.004956065656}, {-0.02733614784, -0.02258066705, -0.0153112506, -0.02475728664, -0.04480525045, -0.01526640341, -0.02438517425, -0.04836914601, -0.00635964824, 0.02263169831, 0.09794101931, -0.04004304158, 0.008464393478, 0.1185443142, -0.02239294163, -0.0281550321, -0.01453581604, -0.0246742804, 0.0879619849, 0.02342867605}, {0.06483718238, 0.1260012082, -0.006496013283, 0.009914915531, -0.004181603532, 0.0003493226286, 0.01408035752, -0.04881663016, -0.03431167356, -0.01768005602, 0.02362447761, -0.1482364784, -0.01289035619, -0.001778893279, -0.05240099752, 0.05536174567, 0.06782165352, -0.003548568717, 0.001125301173, -0.03277489363}, {0.06520296909, -0.0754802543, 0.03139281903, -0.03266449554, -0.004485188002, -0.03389072036, -0.06163274338, -0.06484769882, 0.05722658289, -0.02824079619, 0.01544837349, 0.03909752708, 0.002029218884, 0.003151939572, -0.05471208363, 0.07962008342, 0.125916047, 0.0008696184937, -0.01086027514, -0.05314092355}, {0.004543119081, 0.01935177735, 0.01905511007, 0.02682993409, -0.01199617967, 0.01426278655, 0.02472521255, 0.03864795501, 0.02166224804, -0.04754243479, -0.1921545477, 0.03621321546, -0.02120627881, 0.04928097895, 0.009396088815, 0.01748042052, -6.173742851e-05, -0.003168033098, 0.07723565812, -0.08255529309}, {0.06710378668, -0.09441410284, -0.004801776989, 0.008830272165, -0.01021645042, -0.02764365608, 0.004250361851, 0.1648777542, -0.037446109, 0.004541057635, -0.0296980702, -0.1532325189, -0.008940580901, 0.006998050812, 0.02338809379, 0.03175059182, 0.02033965512, 0.006388075608, 0.001762762044, 0.02616280361}, {0.01915943021, -0.05432967274, 0.01249342683, 0.06836622457, 0.002054462161, -0.01233535859, 0.07087282652, -0.08948637051, -0.1245896013, -0.02204522882, 0.03791481736, 0.06557467874, 0.005529294156, -0.006296644235, 0.02144530752, 0.01664230081, 0.02647078439, 0.001737725271, 0.01414149877, -0.05331990116}, {0.0266659303, 0.0564142853, -0.0263767738, -0.08029726006, -0.006059357163, -0.06317558457, -0.0911894019, 0.05401487057, -0.08178072458, 0.01580699778, -0.05370550396, 0.09798653264, 0.003934944022, 0.01977291947, 0.0441198541, 0.02788220393, 0.03201877081, -0.00206161759, -0.005101423308, 0.03113033802}, {0.02980360751, -0.009513246268, -0.009543527165, -0.02190644172, -0.006146440672, 0.01207009085, -0.0126989156, -0.1378266418, 0.0275235217, 0.00551720592, -0.03104791544, -0.07111701247, -0.006081754489, -0.01337494521, 0.1783961085, 0.01453225059, 0.01938736048, 0.0004488631071, 0.0110844398, 0.02049339243}, {-0.01433508581, 0.01258858175, -0.004294252236, -0.007146532854, 0.009541628809, 0.008040155729, -0.006857781832, 0.05584120066, 0.007749418365, -0.05867835844, 0.08008131283, -0.004877854222, -0.0007128540743, 0.09489058424, 0.06421121962, 0.00271493526, -0.03229944773, -0.001732026038, -0.08053448316, -0.1241903609}, {-0.009854113227, 0.01294129929, -0.00593064392, -0.03016833115, -0.002018439732, -0.00792418722, -0.03372768732, 0.07828561288, 0.007722254639, -0.05067377561, 0.1191848621, 0.005059475202, 0.004762387166, -0.1029870175, 0.03537190114, 0.001089956203, -0.02139157573, -0.001015245062, 0.08400521847, -0.08273195059}}; void init_protmats() { long l; eigmat = (double *) Malloc (20 * sizeof(double)); for (l = 0; l <= 19; l++) if (usejtt) eigmat[l] = jtteigmat[l]; else { if (usepmb) eigmat[l] = pmbeigmat[l]; else eigmat[l] = pameigmat[l]; } probmat = (double **) Malloc (20 * sizeof(double *)); for (l = 0; l <= 19; l++) if (usejtt) probmat[l] = jttprobmat[l]; else { if (usepmb) probmat[l] = pmbprobmat[l]; else probmat[l] = pamprobmat[l]; } } /* init_protmats */ void getoptions() { /* interactively set options */ long i, loopcount, loopcount2; Char ch; boolean didchangecat, didchangercat; double probsum; fprintf(outfile, "\nAmino acid sequence Maximum Likelihood"); fprintf(outfile, " method, version %s\n\n",VERSION); putchar('\n'); ctgry = false; didchangecat = false; rctgry = false; didchangercat = false; categs = 1; rcategs = 1; auto_ = false; gama = false; global = false; hypstate = false; improve = false; invar = false; jumble = false; njumble = 1; lngths = false; lambda = 1.0; outgrno = 1; outgropt = false; trout = true; usertree = false; weights = false; printdata = false; progress = true; treeprint = true; usejtt = true; usepmb = false; usepam = false; interleaved = true; loopcount = 0; for (;;){ cleerhome(); printf("Amino acid sequence Maximum Likelihood"); printf(" method, version %s\n\n",VERSION); printf("Settings for this run:\n"); printf(" U Search for best tree? %s\n", (usertree ? "No, use user trees in input file" : "Yes")); if (usertree) { printf(" L Use lengths from user trees? %s\n", (lngths ? "Yes" : "No")); } printf(" P JTT, PMB or PAM probability model? %s\n", usejtt ? "Jones-Taylor-Thornton" : usepmb ? "Henikoff/Tillier PMB" : "Dayhoff PAM"); printf(" C One category of sites?"); if (!ctgry || categs == 1) printf(" Yes\n"); else printf(" %ld categories of sites\n", categs); printf(" R Rate variation among sites?"); if (!rctgry) printf(" constant rate of change\n"); else { if (gama) printf(" Gamma distributed rates\n"); else { if (invar) printf(" Gamma+Invariant sites\n"); else printf(" user-defined HMM of rates\n"); } printf(" A Rates at adjacent sites correlated?"); if (!auto_) printf(" No, they are independent\n"); else printf(" Yes, mean block length =%6.1f\n", 1.0 / lambda); } printf(" W Sites weighted? %s\n", (weights ? "Yes" : "No")); if (!usertree) { printf(" S Speedier but rougher analysis? %s\n", (improve ? "No, not rough" : "Yes")); printf(" G Global rearrangements? %s\n", (global ? "Yes" : "No")); } if (!usertree) { printf(" J Randomize input order of sequences?"); if (jumble) printf(" Yes (seed =%8ld,%3ld times)\n", inseed0, njumble); else printf(" No. Use input order\n"); } printf(" O Outgroup root? %s%3ld\n", (outgropt ? "Yes, at sequence number" : "No, use as outgroup species"),outgrno); printf(" M Analyze multiple data sets?"); if (mulsets) printf(" Yes, %2ld %s\n", datasets, (justwts ? "sets of weights" : "data sets")); else printf(" No\n"); printf(" I Input sequences interleaved? %s\n", (interleaved ? "Yes" : "No, sequential")); printf(" 0 Terminal type (IBM PC, ANSI, none)? %s\n", (ibmpc ? "IBM PC" : ansi ? "ANSI" : "(none)")); printf(" 1 Print out the data at start of run %s\n", (printdata ? "Yes" : "No")); printf(" 2 Print indications of progress of run %s\n", (progress ? "Yes" : "No")); printf(" 3 Print out tree %s\n", (treeprint ? "Yes" : "No")); printf(" 4 Write out trees onto tree file? %s\n", (trout ? "Yes" : "No")); printf(" 5 Reconstruct hypothetical sequences? %s\n", (hypstate ? "Yes" : "No")); printf("\n Y to accept these or type the letter for one to change\n"); #ifdef WIN32 phyFillScreenColor(); #endif fflush(stdout); scanf("%c%*[^\n]", &ch); getchar(); if (ch == '\n') ch = ' '; uppercase(&ch); if (ch == 'Y') break; if (((!usertree) && (strchr("UPCRAWSGJOMI012345", ch) != NULL)) || (usertree && ((strchr("UPLCRAWSOMI012345", ch) != NULL)))){ switch (ch) { case 'C': ctgry = !ctgry; if (ctgry) { printf("\nSitewise user-assigned categories:\n\n"); initcatn(&categs); if (rate){ free(rate); } rate = (double *) Malloc(categs * sizeof(double)); didchangecat = true; initcategs(categs, rate); } break; case 'P': if (usejtt) { usejtt = false; usepmb = true; } else { if (usepmb) { usepmb = false; usepam = true; } else { usepam = false; usejtt = true; } } break; case 'R': if (!rctgry) { rctgry = true; gama = true; } else { if (gama) { gama = false; invar = true; } else { if (invar) invar = false; else rctgry = false; } } break; case 'A': auto_ = !auto_; if (auto_) initlambda(&lambda); break; case 'W': weights = !weights; break; case 'S': improve = !improve; break; case 'G': global = !global; break; case 'J': jumble = !jumble; if (jumble) initjumble(&inseed, &inseed0, seed, &njumble); else njumble = 1; break; case 'L': lngths = !lngths; break; case 'O': outgropt = !outgropt; if (outgropt) initoutgroup(&outgrno, spp); break; case 'U': usertree = !usertree; break; case 'M': mulsets = !mulsets; if (mulsets) { printf("Multiple data sets or multiple weights?"); loopcount2 = 0; do { printf(" (type D or W)\n"); #ifdef WIN32 phyFillScreenColor(); #endif fflush(stdout); scanf("%c%*[^\n]", &ch2); getchar(); if (ch2 == '\n') ch2 = ' '; uppercase(&ch2); countup(&loopcount2, 10); } while ((ch2 != 'W') && (ch2 != 'D')); justwts = (ch2 == 'W'); if (justwts) justweights(&datasets); else initdatasets(&datasets); if (!jumble) { jumble = true; initjumble(&inseed, &inseed0, seed, &njumble); } } break; case 'I': interleaved = !interleaved; break; case '0': initterminal(&ibmpc, &ansi); break; case '1': printdata = !printdata; break; case '2': progress = !progress; break; case '3': treeprint = !treeprint; break; case '4': trout = !trout; break; case '5': hypstate = !hypstate; break; } } else printf("Not a possible option!\n"); countup(&loopcount, 100); } if (gama || invar) { loopcount = 0; do { printf( "\nCoefficient of variation of substitution rate among sites (must be positive)\n"); printf( " In gamma distribution parameters, this is 1/(square root of alpha)\n"); #ifdef WIN32 phyFillScreenColor(); #endif fflush(stdout); scanf("%lf%*[^\n]", &cv); getchar(); countup(&loopcount, 10); } while (cv <= 0.0); alpha = 1.0 / (cv * cv); } if (!rctgry) auto_ = false; if (rctgry) { printf("\nRates in HMM"); if (invar) printf(" (including one for invariant sites)"); printf(":\n"); initcatn(&rcategs); if (probcat){ free(probcat); free(rrate); } probcat = (double *) Malloc(rcategs * sizeof(double)); rrate = (double *) Malloc(rcategs * sizeof(double)); didchangercat = true; if (gama) initgammacat(rcategs, alpha, rrate, probcat); else { if (invar) { loopcount = 0; do { printf("Fraction of invariant sites?\n"); fflush(stdout); scanf("%lf%*[^\n]", &invarfrac); getchar(); countup (&loopcount, 10); } while ((invarfrac <= 0.0) || (invarfrac >= 1.0)); initgammacat(rcategs-1, alpha, rrate, probcat); for (i = 0; i < rcategs-1; i++) probcat[i] = probcat[i]*(1.0-invarfrac); probcat[rcategs-1] = invarfrac; rrate[rcategs-1] = 0.0; } else { initcategs(rcategs, rrate); initprobcat(rcategs, &probsum, probcat); } } } if (!didchangercat){ rrate = (double *) Malloc(rcategs*sizeof(double)); probcat = (double *) Malloc(rcategs*sizeof(double)); rrate[0] = 1.0; probcat[0] = 1.0; } if (!didchangecat) { rate = (double *) Malloc(categs*sizeof(double)); rate[0] = 1.0; } init_protmats(); } /* getoptions */ void makeprotfreqs() { /* calculate amino acid frequencies based on eigmat */ long i, mineig; mineig = 0; for (i = 0; i <= 19; i++) if (fabs(eigmat[i]) < fabs(eigmat[mineig])) mineig = i; memcpy(freqaa, probmat[mineig], 20 * sizeof(double)); for (i = 0; i <= 19; i++) freqaa[i] = fabs(freqaa[i]); } /* makeprotfreqs */ void reallocsites() { long i; for (i = 0; i < spp; i++) y[i] = (Char *) Malloc(sites*sizeof(Char)); free(category); free(weight); free(alias); free(ally); free(location); free(aliasweight); category = (long *) Malloc(sites*sizeof(long)); weight = (long *) Malloc(sites*sizeof(long)); alias = (long *) Malloc(sites*sizeof(long)); ally = (long *) Malloc(sites*sizeof(long)); location = (long *) Malloc(sites*sizeof(long)); aliasweight = (long *) Malloc(sites*sizeof(long)); for (i = 0; i < sites; i++) category[i] = 1; for (i = 0; i < sites; i++) weight[i] = 1; makeweights(); } void allocrest() { long i; y = (Char **) Malloc(spp*sizeof(Char *)); for (i = 0; i < spp; i++) y[i] = (Char *) Malloc(sites*sizeof(Char)); nayme = (naym *) Malloc(spp*sizeof(naym)); enterorder = (long *) Malloc(spp*sizeof(long)); category = (long *) Malloc(sites*sizeof(long)); weight = (long *) Malloc(sites*sizeof(long)); alias = (long *) Malloc(sites*sizeof(long)); ally = (long *) Malloc(sites*sizeof(long)); location = (long *) Malloc(sites*sizeof(long)); aliasweight = (long *) Malloc(sites*sizeof(long)); } /* allocrest */ void doinit() { /* initializes variables */ inputnumbers(&spp, &sites, &nonodes2, 1); getoptions(); if (!usertree) nonodes2--; makeprotfreqs(); if (printdata) fprintf(outfile, "%2ld species, %3ld sites\n", spp, sites); alloctree(&curtree.nodep, nonodes2, usertree); allocrest(); if (usertree) return; alloctree(&bestree.nodep, nonodes2, 0); alloctree(&priortree.nodep, nonodes2, 0); if (njumble <= 1) return; alloctree(&bestree2.nodep, nonodes2, 0); } /* doinit */ void inputoptions() { long i; if (!firstset) { samenumsp(&sites, ith); reallocsites(); } if (firstset) { for (i = 0; i < sites; i++) category[i] = 1; for (i = 0; i < sites; i++) weight[i] = 1; } if (justwts || weights) inputweights(sites, weight, &weights); weightsum = 0; for (i = 0; i < sites; i++) weightsum += weight[i]; if ((ctgry && categs > 1) && (firstset || !justwts)) { inputcategs(0, sites, category, categs, "ProML"); if (printdata) printcategs(outfile, sites, category, "Site categories"); } if (weights && printdata) printweights(outfile, 0, sites, weight, "Sites"); fprintf(outfile, "%s model of amino acid change\n\n", (usejtt ? "Jones-Taylor-Thornton" : usepmb ? "Henikoff/Tillier PMB" : "Dayhoff PAM")); } /* inputoptions */ void input_protdata(long chars) { /* input the names and sequences for each species */ /* used by proml */ long i, j, k, l, basesread, basesnew; Char charstate; boolean allread, done; if (printdata) headings(chars, "Sequences", "---------"); basesread = 0; basesnew = 0; allread = false; while (!(allread)) { /* eat white space -- if the separator line has spaces on it*/ do { charstate = gettc(infile); } while (charstate == ' ' || charstate == '\t'); ungetc(charstate, infile); if (eoln(infile)) scan_eoln(infile); i = 1; while (i <= spp) { if ((interleaved && basesread == 0) || !interleaved) initname(i - 1); j = (interleaved) ? basesread : 0; done = false; while (!done && !eoff(infile)) { if (interleaved) done = true; while (j < chars && !(eoln(infile) || eoff(infile))) { charstate = gettc(infile); if (charstate == '\n' || charstate == '\t') charstate = ' '; if (charstate == ' ' || (charstate >= '0' && charstate <= '9')) continue; uppercase(&charstate); if ((strchr("ABCDEFGHIKLMNPQRSTVWXYZ*?-", charstate)) == NULL) { printf("ERROR: bad amino acid: %c at position %ld of species %ld\n", charstate, j+1, i); if (charstate == '.') { printf(" Periods (.) may not be used as gap characters.\n"); printf(" The correct gap character is (-)\n"); } exxit(-1); } j++; y[i - 1][j - 1] = charstate; } if (interleaved) continue; if (j < chars) scan_eoln(infile); else if (j == chars) done = true; } if (interleaved && i == 1) basesnew = j; scan_eoln(infile); if ((interleaved && j != basesnew) || (!interleaved && j != chars)) { printf("ERROR: SEQUENCES OUT OF ALIGNMENT AT POSITION %ld.\n", j); exxit(-1); } i++; } if (interleaved) { basesread = basesnew; allread = (basesread == chars); } else allread = (i > spp); } if (!printdata) return; for (i = 1; i <= ((chars - 1) / 60 + 1); i++) { for (j = 1; j <= spp; j++) { for (k = 0; k < nmlngth; k++) putc(nayme[j - 1][k], outfile); fprintf(outfile, " "); l = i * 60; if (l > chars) l = chars; for (k = (i - 1) * 60 + 1; k <= l; k++) { if (j > 1 && y[j - 1][k - 1] == y[0][k - 1]) charstate = '.'; else charstate = y[j - 1][k - 1]; putc(charstate, outfile); if (k % 10 == 0 && k % 60 != 0) putc(' ', outfile); } putc('\n', outfile); } putc('\n', outfile); } putc('\n', outfile); } /* input_protdata */ void makeweights() { /* make up weights vector to avoid duplicate computations */ long i; for (i = 1; i <= sites; i++) { alias[i - 1] = i; ally[i - 1] = 0; aliasweight[i - 1] = weight[i - 1]; location[i - 1] = 0; } sitesort2 (sites, aliasweight); sitecombine2(sites, aliasweight); sitescrunch2(sites, 1, 2, aliasweight); for (i = 1; i <= sites; i++) { if (aliasweight[i - 1] > 0) endsite = i; } for (i = 1; i <= endsite; i++) { location[alias[i - 1] - 1] = i; ally[alias[i - 1] - 1] = alias[i - 1]; } term = (double **) Malloc(endsite * sizeof(double *)); for (i = 0; i < endsite; i++) term[i] = (double *) Malloc(rcategs * sizeof(double)); slopeterm = (double **) Malloc(endsite * sizeof(double *)); for (i = 0; i < endsite; i++) slopeterm[i] = (double *) Malloc(rcategs * sizeof(double)); curveterm = (double **) Malloc(endsite * sizeof(double *)); for (i = 0; i < endsite; i++) curveterm[i] = (double *) Malloc(rcategs * sizeof(double)); mp = (vall *) Malloc(sites*sizeof(vall)); contribution = (contribarr *) Malloc(endsite*sizeof(contribarr)); } /* makeweights */ void prot_makevalues(long categs, pointarray treenode, long endsite, long spp, sequence y, steptr alias) { /* set up fractional likelihoods at tips */ /* a version of makevalues2 found in seq.c */ /* used by proml */ long i, j, k, l; long b; for (k = 0; k < endsite; k++) { j = alias[k]; for (i = 0; i < spp; i++) { for (l = 0; l < categs; l++) { memset(treenode[i]->protx[k][l], 0, sizeof(double)*20); switch (y[i][j - 1]) { case 'A': treenode[i]->protx[k][l][0] = 1.0; break; case 'R': treenode[i]->protx[k][l][(long)arginine - (long)alanine] = 1.0; break; case 'N': treenode[i]->protx[k][l][(long)asparagine - (long)alanine] = 1.0; break; case 'D': treenode[i]->protx[k][l][(long)aspartic - (long)alanine] = 1.0; break; case 'C': treenode[i]->protx[k][l][(long)cysteine - (long)alanine] = 1.0; break; case 'Q': treenode[i]->protx[k][l][(long)glutamine - (long)alanine] = 1.0; break; case 'E': treenode[i]->protx[k][l][(long)glutamic - (long)alanine] = 1.0; break; case 'G': treenode[i]->protx[k][l][(long)glycine - (long)alanine] = 1.0; break; case 'H': treenode[i]->protx[k][l][(long)histidine - (long)alanine] = 1.0; break; case 'I': treenode[i]->protx[k][l][(long)isoleucine - (long)alanine] = 1.0; break; case 'L': treenode[i]->protx[k][l][(long)leucine - (long)alanine] = 1.0; break; case 'K': treenode[i]->protx[k][l][(long)lysine - (long)alanine] = 1.0; break; case 'M': treenode[i]->protx[k][l][(long)methionine - (long)alanine] = 1.0; break; case 'F': treenode[i]->protx[k][l][(long)phenylalanine - (long)alanine] = 1.0; break; case 'P': treenode[i]->protx[k][l][(long)proline - (long)alanine] = 1.0; break; case 'S': treenode[i]->protx[k][l][(long)serine - (long)alanine] = 1.0; break; case 'T': treenode[i]->protx[k][l][(long)threonine - (long)alanine] = 1.0; break; case 'W': treenode[i]->protx[k][l][(long)tryptophan - (long)alanine] = 1.0; break; case 'Y': treenode[i]->protx[k][l][(long)tyrosine - (long)alanine] = 1.0; break; case 'V': treenode[i]->protx[k][l][(long)valine - (long)alanine] = 1.0; break; case 'B': treenode[i]->protx[k][l][(long)asparagine - (long)alanine] = 1.0; treenode[i]->protx[k][l][(long)aspartic - (long)alanine] = 1.0; break; case 'Z': treenode[i]->protx[k][l][(long)glutamine - (long)alanine] = 1.0; treenode[i]->protx[k][l][(long)glutamic - (long)alanine] = 1.0; break; case 'X': /* unknown aa */ for (b = 0; b <= 19; b++) treenode[i]->protx[k][l][b] = 1.0; break; case '?': /* unknown aa */ for (b = 0; b <= 19; b++) treenode[i]->protx[k][l][b] = 1.0; break; case '*': /* stop codon symbol */ for (b = 0; b <= 19; b++) treenode[i]->protx[k][l][b] = 1.0; break; case '-': /* deletion event-absent data or aa */ for (b = 0; b <= 19; b++) treenode[i]->protx[k][l][b] = 1.0; break; } } } } } /* prot_makevalues */ void free_pmatrix(long sib) { long j,k,l; for (j = 0; j < rcategs; j++) { for (k = 0; k < categs; k++) { for (l = 0; l < 20; l++) free(pmatrices[sib][j][k][l]); free(pmatrices[sib][j][k]); } free(pmatrices[sib][j]); } free(pmatrices[sib]); } void alloc_pmatrix(long sib) { /* Allocate memory for a new pmatrix. Called iff num_sibs>max_num_sibs */ long j, k, l; double ****temp_matrix; temp_matrix = (double ****) Malloc (rcategs * sizeof(double ***)); for (j = 0; j < rcategs; j++) { temp_matrix[j] = (double ***) Malloc(categs * sizeof(double **)); for (k = 0; k < categs; k++) { temp_matrix[j][k] = (double **) Malloc(20 * sizeof (double *)); for (l = 0; l < 20; l++) temp_matrix[j][k][l] = (double *) Malloc(20 * sizeof(double)); } } pmatrices[sib] = temp_matrix; max_num_sibs++; } /* alloc_pmatrix */ void prot_freetable() { long i,j,k,l; for (j = 0; j < rcategs; j++) { for (k = 0; k < categs; k++) { for (l = 0; l < 20; l++) free(ddpmatrix[j][k][l]); free(ddpmatrix[j][k]); } free(ddpmatrix[j]); } free(ddpmatrix); for (j = 0; j < rcategs; j++) { for (k = 0; k < categs; k++) { for (l = 0; l < 20; l++) free(dpmatrix[j][k][l]); free(dpmatrix[j][k]); } free(dpmatrix[j]); } free(dpmatrix); for (j = 0; j < rcategs; j++) free(tbl[j]); free(tbl); for ( i = 0 ; i < max_num_sibs ; i++ ) free_pmatrix(i); free(pmatrices); } void prot_inittable() { /* Define a lookup table. Precompute values and print them out in tables */ /* Allocate memory for the pmatrices, dpmatices and ddpmatrices */ long i, j, k, l; double sumrates; /* Allocate memory for pmatrices, the array of pointers to pmatrices */ pmatrices = (double *****) Malloc ( spp * sizeof(double ****)); /* Allocate memory for the first 2 pmatrices, the matrix of conversion */ /* probabilities, but only once per run (aka not on the second jumble. */ alloc_pmatrix(0); alloc_pmatrix(1); /* Allocate memory for one dpmatrix, the first derivative matrix */ dpmatrix = (double ****) Malloc( rcategs * sizeof(double ***)); for (j = 0; j < rcategs; j++) { dpmatrix[j] = (double ***) Malloc( categs * sizeof(double **)); for (k = 0; k < categs; k++) { dpmatrix[j][k] = (double **) Malloc( 20 * sizeof(double *)); for (l = 0; l < 20; l++) dpmatrix[j][k][l] = (double *) Malloc( 20 * sizeof(double)); } } /* Allocate memory for one ddpmatrix, the second derivative matrix */ ddpmatrix = (double ****) Malloc( rcategs * sizeof(double ***)); for (j = 0; j < rcategs; j++) { ddpmatrix[j] = (double ***) Malloc( categs * sizeof(double **)); for (k = 0; k < categs; k++) { ddpmatrix[j][k] = (double **) Malloc( 20 * sizeof(double *)); for (l = 0; l < 20; l++) ddpmatrix[j][k][l] = (double *) Malloc( 20 * sizeof(double)); } } /* Allocate memory and assign values to tbl, the matrix of possible rates*/ tbl = (double **) Malloc( rcategs * sizeof(double *)); for (j = 0; j < rcategs; j++) tbl[j] = (double *) Malloc( categs * sizeof(double)); for (j = 0; j < rcategs; j++) for (k = 0; k < categs; k++) tbl[j][k] = rrate[j]*rate[k]; sumrates = 0.0; for (i = 0; i < endsite; i++) { for (j = 0; j < rcategs; j++) sumrates += aliasweight[i] * probcat[j] * tbl[j][category[alias[i] - 1] - 1]; } sumrates /= (double)sites; for (j = 0; j < rcategs; j++) for (k = 0; k < categs; k++) { tbl[j][k] /= sumrates; } if(jumb > 1) return; if (gama) { fprintf(outfile, "\nDiscrete approximation to gamma distributed rates\n"); fprintf(outfile, " Coefficient of variation of rates = %f (alpha = %f)\n", cv, alpha); } if (rcategs > 1) { fprintf(outfile, "\nStates in HMM Rate of change Probability\n\n"); for (i = 0; i < rcategs; i++) if (probcat[i] < 0.0001) fprintf(outfile, "%9ld%16.3f%20.6f\n", i+1, rrate[i], probcat[i]); else if (probcat[i] < 0.001) fprintf(outfile, "%9ld%16.3f%19.5f\n", i+1, rrate[i], probcat[i]); else if (probcat[i] < 0.01) fprintf(outfile, "%9ld%16.3f%18.4f\n", i+1, rrate[i], probcat[i]); else fprintf(outfile, "%9ld%16.3f%17.3f\n", i+1, rrate[i], probcat[i]); putc('\n', outfile); if (auto_) fprintf(outfile, "Expected length of a patch of sites having the same rate = %8.3f\n", 1/lambda); putc('\n', outfile); } if (categs > 1) { fprintf(outfile, "\nSite category Rate of change\n\n"); for (k = 0; k < categs; k++) fprintf(outfile, "%9ld%16.3f\n", k+1, rate[k]); } if ((rcategs > 1) || (categs >> 1)) fprintf(outfile, "\n\n"); } /* prot_inittable */ void getinput() { /* reads the input data */ /* void debugtree(tree*, FILE*) */ if (!justwts || firstset) inputoptions(); if (!justwts || firstset) input_protdata(sites); if ( !firstset ) freelrsaves(); makeweights(); alloclrsaves(); setuptree2(&curtree); if (!usertree) { setuptree2(&bestree); setuptree2(&priortree); if (njumble > 1) setuptree2(&bestree2); } prot_allocx(nonodes2, rcategs, curtree.nodep, usertree); if (!usertree) { prot_allocx(nonodes2, rcategs, bestree.nodep, 0); prot_allocx(nonodes2, rcategs, priortree.nodep, 0); if (njumble > 1) prot_allocx(nonodes2, rcategs, bestree2.nodep, 0); } prot_makevalues(rcategs, curtree.nodep, endsite, spp, y, alias); } /* getinput */ void inittravtree(node *p) { /* traverse tree to set initialized and v to initial values */ node* q; p->initialized = false; p->back->initialized = false; if ( usertree && (!lngths || p->iter) ) { p->v = initialv; p->back->v = initialv; } if ( !p->tip ) { q = p->next; while ( q != p ) { inittravtree(q->back); q = q->next; } } } /* inittravtree */ void prot_nuview(node *p) { long i, j, k, l, m, num_sibs, sib_index; node *sib_ptr, *sib_back_ptr; psitelike prot_xx, x2; double lw, prod7; double **pmat; double maxx; double correction; /* Figure out how many siblings the current node has */ /* and be sure that pmatrices is large enough */ num_sibs = count_sibs(p); for (i = 0; i < num_sibs; i++) if (pmatrices[i] == NULL) alloc_pmatrix(i); /* Recursive calls, should be called for all children */ sib_ptr = p; for (i=0 ; i < num_sibs; i++) { sib_ptr = sib_ptr->next; sib_back_ptr = sib_ptr->back; if (!sib_back_ptr->tip && !sib_back_ptr->initialized) prot_nuview(sib_back_ptr); } /* Make pmatrices for all possible combinations of category, rcateg */ /* and sib */ sib_ptr = p; /* return to p */ for (sib_index=0; sib_index < num_sibs; sib_index++) { sib_ptr = sib_ptr->next; sib_back_ptr = sib_ptr->back; lw = sib_back_ptr->v; for (j = 0; j < rcategs; j++) for (k = 0; k < categs; k++) make_pmatrix(pmatrices[sib_index][j][k], NULL, NULL, 0, lw, tbl[j][k], eigmat, probmat); } for (i = 0; i < endsite; i++) { maxx = 0; correction = 0; k = category[alias[i]-1] - 1; for (j = 0; j < rcategs; j++) { /* initialize to 1 all values of prot_xx */ for (m = 0; m <= 19; m++) prot_xx[m] = 1; sib_ptr = p; /* return to p */ /* loop through all sibs and calculate likelihoods for all possible*/ /* amino acid combinations */ for (sib_index=0; sib_index < num_sibs; sib_index++) { sib_ptr = sib_ptr->next; sib_back_ptr = sib_ptr->back; if ( j == 0) correction += sib_back_ptr->underflows[i]; memcpy(x2, sib_back_ptr->protx[i][j], sizeof(psitelike)); pmat = pmatrices[sib_index][j][k]; for (m = 0; m <= 19; m++) { prod7 = 0; for (l = 0; l <= 19; l++) prod7 += (pmat[m][l] * x2[l]); prot_xx[m] *= prod7; if ( prot_xx[m] > maxx && sib_index == (num_sibs - 1)) maxx = prot_xx[m]; } } /* And the final point of this whole function: */ memcpy(p->protx[i][j], prot_xx, sizeof(psitelike)); } p->underflows[i] = 0; if ( maxx < MIN_DOUBLE ) fix_protx(p,i,maxx,rcategs); p->underflows[i] += correction; } p->initialized = true; } /* prot_nuview */ void prot_slopecurv(node *p,double y,double *like,double *slope,double *curve) { /* compute log likelihood, slope and curvature at node p */ long i, j, k, l, m, lai; double sum, sumc, sumterm, lterm, sumcs, sumcc, sum2, slope2, curve2; double frexm = 0; /* frexm = freqaa[m]*x1[m] */ /* frexml = frexm*x2[l] */ double prod4m, prod5m, prod6m; /* elements of prod4-5 for */ /* each m */ double **pmat, **dpmat, **ddpmat; /* local pointers to global*/ /* matrices */ double prod4, prod5, prod6; contribarr thelike, nulike, nuslope, nucurve, theslope, thecurve, clai, cslai, cclai; node *q; psitelike x1, x2; q = p->back; sum = 0.0; for (j = 0; j < rcategs; j++) { for (k = 0; k < categs; k++) { make_pmatrix(pmatrices[0][j][k], dpmatrix[j][k], ddpmatrix[j][k], 2, y, tbl[j][k], eigmat, probmat); } } for (i = 0; i < endsite; i++) { k = category[alias[i]-1] - 1; for (j = 0; j < rcategs; j++) { memcpy(x1, p->protx[i][j], sizeof(psitelike)); memcpy(x2, q->protx[i][j], sizeof(psitelike)); pmat = pmatrices[0][j][k]; dpmat = dpmatrix[j][k]; ddpmat = ddpmatrix[j][k]; prod4 = 0.0; prod5 = 0.0; prod6 = 0.0; for (m = 0; m <= 19; m++) { prod4m = 0.0; prod5m = 0.0; prod6m = 0.0; frexm = x1[m] * freqaa[m]; for (l = 0; l <= 19; l++) { prod4m += x2[l] * pmat[m][l]; prod5m += x2[l] * dpmat[m][l]; prod6m += x2[l] * ddpmat[m][l]; } prod4 += frexm * prod4m; prod5 += frexm * prod5m; prod6 += frexm * prod6m; } term[i][j] = prod4; slopeterm[i][j] = prod5; curveterm[i][j] = prod6; } sumterm = 0.0; for (j = 0; j < rcategs; j++) sumterm += probcat[j] * term[i][j]; if (sumterm <= 0.0) sumterm = 0.000000001; /* ? shouldn't get here ?? */ lterm = log(sumterm) + p->underflows[i] + q->underflows[i]; for (j = 0; j < rcategs; j++) { term[i][j] = term[i][j] / sumterm; slopeterm[i][j] = slopeterm[i][j] / sumterm; curveterm[i][j] = curveterm[i][j] / sumterm; } sum += (aliasweight[i] * lterm); } for (i = 0; i < rcategs; i++) { thelike[i] = 1.0; theslope[i] = 0.0; thecurve[i] = 0.0; } for (i = 0; i < sites; i++) { sumc = 0.0; sumcs = 0.0; sumcc = 0.0; for (k = 0; k < rcategs; k++) { sumc += probcat[k] * thelike[k]; sumcs += probcat[k] * theslope[k]; sumcc += probcat[k] * thecurve[k]; } sumc *= lambda; sumcs *= lambda; sumcc *= lambda; if ((ally[i] > 0) && (location[ally[i]-1] > 0)) { lai = location[ally[i] - 1]; memcpy(clai, term[lai - 1], rcategs*sizeof(double)); memcpy(cslai, slopeterm[lai - 1], rcategs*sizeof(double)); memcpy(cclai, curveterm[lai - 1], rcategs*sizeof(double)); if (weight[i] > 1) { for (j = 0; j < rcategs; j++) { if (clai[j] > 0.0) clai[j] = exp(weight[i]*log(clai[j])); else clai[j] = 0.0; if (cslai[j] > 0.0) cslai[j] = exp(weight[i]*log(cslai[j])); else cslai[j] = 0.0; if (cclai[j] > 0.0) cclai[j] = exp(weight[i]*log(cclai[j])); else cclai[j] = 0.0; } } for (j = 0; j < rcategs; j++) { nulike[j] = ((1.0 - lambda) * thelike[j] + sumc) * clai[j]; nuslope[j] = ((1.0 - lambda) * theslope[j] + sumcs) * clai[j] + ((1.0 - lambda) * thelike[j] + sumc) * cslai[j]; nucurve[j] = ((1.0 - lambda) * thecurve[j] + sumcc) * clai[j] + 2.0 * ((1.0 - lambda) * theslope[j] + sumcs) * cslai[j] + ((1.0 - lambda) * thelike[j] + sumc) * cclai[j]; } } else { for (j = 0; j < rcategs; j++) { nulike[j] = ((1.0 - lambda) * thelike[j] + sumc); nuslope[j] = ((1.0 - lambda) * theslope[j] + sumcs); nucurve[j] = ((1.0 - lambda) * thecurve[j] + sumcc); } } memcpy(thelike, nulike, rcategs*sizeof(double)); memcpy(theslope, nuslope, rcategs*sizeof(double)); memcpy(thecurve, nucurve, rcategs*sizeof(double)); } sum2 = 0.0; slope2 = 0.0; curve2 = 0.0; for (i = 0; i < rcategs; i++) { sum2 += probcat[i] * thelike[i]; slope2 += probcat[i] * theslope[i]; curve2 += probcat[i] * thecurve[i]; } sum += log(sum2); (*like) = sum; (*slope) = slope2 / sum2; /* Expressed in terms of *slope to prevent overflow */ (*curve) = curve2 / sum2 - *slope * *slope; } /* prot_slopecurv */ void makenewv(node *p) { /* Newton-Raphson algorithm improvement of a branch length */ long it, ite; double y, yold=0, yorig, like, slope, curve, oldlike=0; boolean done, firsttime, better; node *q; q = p->back; y = p->v; yorig = y; done = false; firsttime = true; it = 1; ite = 0; while ((it < iterations) && (ite < 20) && (!done)) { prot_slopecurv(p, y, &like, &slope, &curve); better = false; if (firsttime) { /* if no older value of y to compare with */ yold = y; oldlike = like; firsttime = false; better = true; } else { if (like > oldlike) { /* update the value of yold if it was better */ yold = y; oldlike = like; better = true; it++; } } if (better) { y = y + slope/fabs(curve); /* Newton-Raphson, forced uphill-wards */ if (y < epsilon) y = epsilon; } else { if (fabs(y - yold) < epsilon) ite = 20; y = (y + (7 * yold)) / 8; /* retract 87% of way back */ } ite++; done = fabs(y-yold) < 0.1*epsilon; } smoothed = (fabs(yold-yorig) < epsilon) && (yorig > 1000.0*epsilon); p->v = yold; /* the last one that had better likelihood */ q->v = yold; curtree.likelihood = oldlike; } /* makenewv */ void update(node *p) { node *q; if (!p->tip && !p->initialized) prot_nuview(p); if (!p->back->tip && !p->back->initialized) prot_nuview(p->back); if ((!usertree) || (usertree && !lngths) || p->iter) { makenewv(p); if ( smoothit ) { inittrav(p); inittrav(p->back); } else if ( inserting && !p->tip ) { for ( q = p->next; q != p; q = q->next ) q->initialized = false; } } } /* update */ void smooth(node *p) { long i, num_sibs; node *sib_ptr; smoothed = false; update(p); if (p->tip) return; num_sibs = count_sibs(p); sib_ptr = p; for (i=0; i < num_sibs; i++) { sib_ptr = sib_ptr->next; if (polishing || (smoothit && !smoothed)) { smooth(sib_ptr->back); p->initialized = false; sib_ptr->initialized = false; } } } /* smooth */ void make_pmatrix(double **matrix, double **dmat, double **ddmat, long derivative, double lz, double rat, double *eigmat, double **probmat) { /* Computes the R matrix such that matrix[m][l] is the joint probability */ /* of m and l. */ /* Computes a P matrix such that matrix[m][l] is the conditional */ /* probability of m given l. This is accomplished by dividing all terms */ /* in the R matrix by freqaa[m], the frequency of l. */ long k, l, m; /* (l) original character state */ /* (m) final character state */ /* (k) lambda counter */ double p0, p1, p2, q; double elambdat[20], delambdat[20], ddelambdat[20]; /* exponential term for matrix */ /* and both derivative matrices */ for (k = 0; k <= 19; k++) { elambdat[k] = exp(lz * rat * eigmat[k]); if(derivative != 0) { delambdat[k] = (elambdat[k] * rat * eigmat[k]); ddelambdat[k] = (delambdat[k] * rat * eigmat[k]); } } for (m = 0; m <= 19; m++) { for (l = 0; l <= 19; l++) { p0 = 0.0; p1 = 0.0; p2 = 0.0; for (k = 0; k <= 19; k++) { q = probmat[k][m] * probmat[k][l]; p0 += (q * elambdat[k]); if(derivative !=0) { p1 += (q * delambdat[k]); p2 += (q * ddelambdat[k]); } } matrix[m][l] = p0 / freqaa[m]; if(derivative != 0) { dmat[m][l] = p1 / freqaa[m]; ddmat[m][l] = p2 / freqaa[m]; } } } } /* make_pmatrix */ double prot_evaluate(node *p, boolean saveit) { contribarr tterm; double sum, sum2, sumc, y, prod4, prodl, frexm, sumterm, lterm; double **pmat; long i, j, k, l, m, lai; node *q; psitelike x1, x2; sum = 0.0; q = p->back; y = p->v; for (j = 0; j < rcategs; j++) for (k = 0; k < categs; k++) make_pmatrix(pmatrices[0][j][k],NULL,NULL,0,y,tbl[j][k],eigmat,probmat); for (i = 0; i < endsite; i++) { k = category[alias[i]-1] - 1; for (j = 0; j < rcategs; j++) { memcpy(x1, p->protx[i][j], sizeof(psitelike)); memcpy(x2, q->protx[i][j], sizeof(psitelike)); prod4 = 0.0; pmat = pmatrices[0][j][k]; for (m = 0; m <= 19; m++) { prodl = 0.0; for (l = 0; l <= 19; l++) prodl += (pmat[m][l] * x2[l]); frexm = x1[m] * freqaa[m]; prod4 += (prodl * frexm); } tterm[j] = prod4; } sumterm = 0.0; for (j = 0; j < rcategs; j++) sumterm += probcat[j] * tterm[j]; if (sumterm < 0.0) sumterm = 0.00000001; /* ??? */ lterm = log(sumterm) + p->underflows[i] + q->underflows[i]; for (j = 0; j < rcategs; j++) clai[j] = tterm[j] / sumterm; memcpy(contribution[i], clai, rcategs*sizeof(double)); if (saveit && !auto_ && usertree && (which <= shimotrees)) l0gf[which - 1][i] = lterm; sum += aliasweight[i] * lterm; } for (j = 0; j < rcategs; j++) like[j] = 1.0; for (i = 0; i < sites; i++) { sumc = 0.0; for (k = 0; k < rcategs; k++) sumc += probcat[k] * like[k]; sumc *= lambda; if ((ally[i] > 0) && (location[ally[i]-1] > 0)) { lai = location[ally[i] - 1]; memcpy(clai, contribution[lai - 1], rcategs*sizeof(double)); for (j = 0; j < rcategs; j++) nulike[j] = ((1.0 - lambda) * like[j] + sumc) * clai[j]; } else { for (j = 0; j < rcategs; j++) nulike[j] = ((1.0 - lambda) * like[j] + sumc); } memcpy(like, nulike, rcategs*sizeof(double)); } sum2 = 0.0; for (i = 0; i < rcategs; i++) sum2 += probcat[i] * like[i]; sum += log(sum2); curtree.likelihood = sum; if (!saveit || auto_ || !usertree) return sum; if(which <= shimotrees) l0gl[which - 1] = sum; if (which == 1) { maxwhich = 1; maxlogl = sum; return sum; } if (sum > maxlogl) { maxwhich = which; maxlogl = sum; } return sum; } /* prot_evaluate */ void treevaluate() { /* evaluate a user tree */ long i; inittravtree(curtree.start); polishing = true; smoothit = true; for (i = 1; i <= smoothings * 4; i++) smooth (curtree.start); dummy = prot_evaluate(curtree.start, true); } /* treevaluate */ void promlcopy(tree *a, tree *b, long nonodes, long categs) { /* copy tree a to tree b */ long i, j=0; node *p, *q; for (i = 0; i < spp; i++) { prot_copynode(a->nodep[i], b->nodep[i], categs); if (a->nodep[i]->back) { if (a->nodep[i]->back == a->nodep[a->nodep[i]->back->index - 1]) b->nodep[i]->back = b->nodep[a->nodep[i]->back->index - 1]; else if (a->nodep[i]->back == a->nodep[a->nodep[i]->back->index - 1]->next ) b->nodep[i]->back = b->nodep[a->nodep[i]->back->index - 1]->next; else b->nodep[i]->back = b->nodep[a->nodep[i]->back->index - 1]->next->next; } else b->nodep[i]->back = NULL; } for (i = spp; i < nonodes; i++) { p = a->nodep[i]; q = b->nodep[i]; for (j = 1; j <= 3; j++) { prot_copynode(p, q, categs); if (p->back) { if (p->back == a->nodep[p->back->index - 1]) q->back = b->nodep[p->back->index - 1]; else if (p->back == a->nodep[p->back->index - 1]->next) q->back = b->nodep[p->back->index - 1]->next; else q->back = b->nodep[p->back->index - 1]->next->next; } else q->back = NULL; p = p->next; q = q->next; } } b->likelihood = a->likelihood; b->start = a->start; /* start used in dnaml only */ b->root = a->root; /* root used in dnamlk only */ } /* promlcopy */ void proml_re_move(node **p, node **q) { /* remove p and record in q where it was */ long i; /* assumes bifurcation (OK) */ *q = (*p)->next->back; hookup(*q, (*p)->next->next->back); (*p)->next->back = NULL; (*p)->next->next->back = NULL; (*q)->v += (*q)->back->v; (*q)->back->v = (*q)->v; if ( smoothit ) { inittrav((*q)); inittrav((*q)->back); inittrav((*p)->back); } if ( smoothit ) { for ( i = 0 ; i < smoothings ; i++ ) { smooth(*q); smooth((*q)->back); } } else smooth(*q); } /* proml_re_move */ void insert_(node *p, node *q, boolean dooinit) { /* Insert q near p */ long i, j, num_sibs; node *r, *sib_ptr; r = p->next->next; hookup(r, q->back); hookup(p->next, q); q->v = 0.5 * q->v; q->back->v = q->v; r->v = q->v; r->back->v = r->v; p->initialized = false; if (dooinit) { inittrav(p); inittrav(q); inittrav(q->back); } i = 1; inserting = true; while (i <= smoothings) { smooth(p); if (!p->tip) { num_sibs = count_sibs(p); sib_ptr = p; for (j=0; j < num_sibs; j++) { smooth(sib_ptr->next->back); sib_ptr = sib_ptr->next; } } i++; } inserting = false; } /* insert_ */ void addtraverse(node *p, node *q, boolean contin) { /* try adding p at q, proceed recursively through tree */ long i, num_sibs; double like, vsave = 0; node *qback = NULL, *sib_ptr; if (!smoothit) { vsave = q->v; qback = q->back; } insert_(p, q, false); like = prot_evaluate(p, false); if (like > bestyet + LIKE_EPSILON || bestyet == UNDEFINED) { bestyet = like; if (smoothit) { addwhere = q; promlcopy(&curtree, &bestree, nonodes2, rcategs); } else qwhere = q; succeeded = true; } if (smoothit) promlcopy(&priortree, &curtree, nonodes2, rcategs); else { hookup (q, qback); q->v = vsave; q->back->v = vsave; curtree.likelihood = bestyet; } if (!q->tip && contin) { num_sibs = count_sibs(q); if (q == curtree.start) num_sibs++; sib_ptr = q; for (i=0; i < num_sibs; i++) { addtraverse(p, sib_ptr->next->back, contin); sib_ptr = sib_ptr->next; } } } /* addtraverse */ void globrearrange() { /* does global rearrangements */ tree globtree; tree oldtree; int i,j,k,l,num_sibs,num_sibs2; node *where,*sib_ptr,*sib_ptr2; double oldbestyet = curtree.likelihood; int success = false; alloctree(&globtree.nodep,nonodes2,0); alloctree(&oldtree.nodep,nonodes2,0); setuptree2(&globtree); setuptree2(&oldtree); prot_allocx(nonodes2, rcategs, globtree.nodep, 0); prot_allocx(nonodes2, rcategs, oldtree.nodep, 0); promlcopy(&curtree,&globtree,nonodes2,rcategs); promlcopy(&curtree,&oldtree,nonodes2,rcategs); bestyet = curtree.likelihood; for ( i = spp ; i < nonodes2 ; i++ ) { num_sibs = count_sibs(curtree.nodep[i]); sib_ptr = curtree.nodep[i]; if ( (i - spp) % (( nonodes2 / 72 ) + 1 ) == 0 ) putchar('.'); fflush(stdout); for ( j = 0 ; j <= num_sibs ; j++ ) { proml_re_move(&sib_ptr,&where); promlcopy(&curtree,&priortree,nonodes2,rcategs); qwhere = where; if (where->tip) { promlcopy(&oldtree,&curtree,nonodes2,rcategs); promlcopy(&oldtree,&bestree,nonodes2,rcategs); sib_ptr=sib_ptr->next; continue; } else num_sibs2 = count_sibs(where); sib_ptr2 = where; for ( k = 0 ; k < num_sibs2 ; k++ ) { addwhere = NULL; addtraverse(sib_ptr,sib_ptr2->back,true); if ( !smoothit ) { if (succeeded && qwhere != where && qwhere != where->back) { insert_(sib_ptr,qwhere,true); smoothit = true; for (l = 1; l<=smoothings; l++) { smooth (where); smooth (where->back); } smoothit = false; success = true; promlcopy(&curtree,&globtree,nonodes2,rcategs); promlcopy(&priortree,&curtree,nonodes2,rcategs); } } else if ( addwhere && where != addwhere && where->back != addwhere && bestyet > globtree.likelihood) { promlcopy(&bestree,&globtree,nonodes2,rcategs); success = true; } sib_ptr2 = sib_ptr2->next; } promlcopy(&oldtree,&curtree,nonodes2,rcategs); promlcopy(&oldtree,&bestree,nonodes2,rcategs); sib_ptr = sib_ptr->next; } } promlcopy(&globtree,&curtree,nonodes2,rcategs); promlcopy(&globtree,&bestree,nonodes2,rcategs); if (success && globtree.likelihood > oldbestyet) { succeeded = true; } else { succeeded = false; } bestyet = globtree.likelihood; prot_freex(nonodes2,oldtree.nodep); prot_freex(nonodes2,globtree.nodep); freetree2(globtree.nodep,nonodes2); freetree2(oldtree.nodep,nonodes2); } /* globrearrange */ void freelrsaves() { long i,j; for ( i = 0 ; i < NLRSAVES ; i++ ) { for (j = 0; j < oldendsite; j++) free(lrsaves[i]->protx[j]); free(lrsaves[i]->protx); free(lrsaves[i]->underflows); free(lrsaves[i]); } free(lrsaves); } void alloclrsaves() { long i,j; lrsaves = Malloc(NLRSAVES * sizeof(node*)); oldendsite = endsite; for ( i = 0 ; i < NLRSAVES ; i++ ) { lrsaves[i] = Malloc(sizeof(node)); lrsaves[i]->protx = Malloc(endsite*sizeof(ratelike)); lrsaves[i]->underflows = Malloc(endsite * sizeof (double)); for (j = 0; j < endsite; j++) lrsaves[i]->protx[j] = (pratelike)Malloc(rcategs*sizeof(psitelike)); } } /* alloclrsaves */ void rearrange(node *p, node *pp) { /* rearranges the tree locally moving pp around near p */ long i, num_sibs; node *q, *r, *sib_ptr; node *rnb, *rnnb; /* assumes bifurcations (OK) */ if (!p->tip && !p->back->tip) { curtree.likelihood = bestyet; if (p->back->next != pp) r = p->back->next; else r = p->back->next->next; if (!smoothit) { rnb = r->next->back; rnnb = r->next->next->back; prot_copynode(r,lrsaves[0],rcategs); prot_copynode(r->next,lrsaves[1],rcategs); prot_copynode(r->next->next,lrsaves[2],rcategs); prot_copynode(p->next,lrsaves[3],rcategs); prot_copynode(p->next->next,lrsaves[4],rcategs); } else promlcopy(&curtree, &bestree, nonodes2, rcategs); proml_re_move(&r, &q); if (smoothit) promlcopy(&curtree, &priortree, nonodes2, rcategs); else qwhere = q; num_sibs = count_sibs (p); sib_ptr = p; for (i=0; i < num_sibs; i++) { sib_ptr = sib_ptr->next; addtraverse(r, sib_ptr->back, false); } if (smoothit) promlcopy(&bestree, &curtree, nonodes2, rcategs); else { if (qwhere == q) { hookup(rnb,r->next); hookup(rnnb,r->next->next); prot_copynode(lrsaves[0],r,rcategs); prot_copynode(lrsaves[1],r->next,rcategs); prot_copynode(lrsaves[2],r->next->next,rcategs); prot_copynode(lrsaves[3],p->next,rcategs); prot_copynode(lrsaves[4],p->next->next,rcategs); rnb->v = r->next->v; rnnb->v = r->next->next->v; r->back->v = r->v; curtree.likelihood = bestyet; } else { insert_(r, qwhere, true); smoothit = true; for (i = 1; i<=smoothings; i++) { smooth(r); smooth(r->back); } smoothit = false; promlcopy(&curtree, &bestree, nonodes2, rcategs); } } } if (!p->tip) { num_sibs = count_sibs(p); if (p == curtree.start) num_sibs++; sib_ptr = p; for (i=0; i < num_sibs; i++) { sib_ptr = sib_ptr->next; rearrange(sib_ptr->back, p); } } } /* rearrange */ void proml_coordinates(node *p, double lengthsum, long *tipy, double *tipmax) { /* establishes coordinates of nodes */ node *q, *first, *last; double xx; if (p->tip) { p->xcoord = (long)(over * lengthsum + 0.5); p->ycoord = (*tipy); p->ymin = (*tipy); p->ymax = (*tipy); (*tipy) += down; if (lengthsum > (*tipmax)) (*tipmax) = lengthsum; return; } q = p->next; do { xx = q->v; if (xx > 100.0) xx = 100.0; proml_coordinates(q->back, lengthsum + xx, tipy,tipmax); q = q->next; } while ((p == curtree.start || p != q) && (p != curtree.start || p->next != q)); first = p->next->back; q = p; while (q->next != p) q = q->next; last = q->back; p->xcoord = (long)(over * lengthsum + 0.5); if (p == curtree.start) p->ycoord = p->next->next->back->ycoord; else p->ycoord = (first->ycoord + last->ycoord) / 2; p->ymin = first->ymin; p->ymax = last->ymax; } /* proml_coordinates */ void proml_printree() { /* prints out diagram of the tree2 */ long tipy; double scale, tipmax; long i; if (!treeprint) return; putc('\n', outfile); tipy = 1; tipmax = 0.0; proml_coordinates(curtree.start, 0.0, &tipy, &tipmax); scale = 1.0 / (long)(tipmax + 1.000); for (i = 1; i <= (tipy - down); i++) drawline2(i, scale, curtree); putc('\n', outfile); } /* proml_printree */ void sigma(node *p, double *sumlr, double *s1, double *s2) { /* compute standard deviation */ double tt, aa, like, slope, curv; prot_slopecurv(p, p->v, &like, &slope, &curv); tt = p->v; p->v = epsilon; p->back->v = epsilon; aa = prot_evaluate(p, false); p->v = tt; p->back->v = tt; (*sumlr) = prot_evaluate(p, false) - aa; if (curv < -epsilon) { (*s1) = p->v + (-slope - sqrt(slope * slope - 3.841 * curv)) / curv; (*s2) = p->v + (-slope + sqrt(slope * slope - 3.841 * curv)) / curv; } else { (*s1) = -1.0; (*s2) = -1.0; } } /* sigma */ void describe(node *p) { /* print out information for one branch */ long i, num_sibs; node *q, *sib_ptr; double sumlr, sigma1, sigma2; if (!p->tip && !p->initialized) prot_nuview(p); if (!p->back->tip && !p->back->initialized) prot_nuview(p->back); q = p->back; if (q->tip) { fprintf(outfile, " "); for (i = 0; i < nmlngth; i++) putc(nayme[q->index-1][i], outfile); fprintf(outfile, " "); } else fprintf(outfile, " %4ld ", q->index - spp); if (p->tip) { for (i = 0; i < nmlngth; i++) putc(nayme[p->index-1][i], outfile); } else fprintf(outfile, "%4ld ", p->index - spp); fprintf(outfile, "%15.5f", q->v); if (!usertree || (usertree && !lngths) || p->iter) { sigma(q, &sumlr, &sigma1, &sigma2); if (sigma1 <= sigma2) fprintf(outfile, " ( zero, infinity)"); else { fprintf(outfile, " ("); if (sigma2 <= 0.0) fprintf(outfile, " zero"); else fprintf(outfile, "%9.5f", sigma2); fprintf(outfile, ",%12.5f", sigma1); putc(')', outfile); } if (sumlr > 1.9205) fprintf(outfile, " *"); if (sumlr > 2.995) putc('*', outfile); } putc('\n', outfile); if (!p->tip) { num_sibs = count_sibs(p); sib_ptr = p; for (i=0; i < num_sibs; i++) { sib_ptr = sib_ptr->next; describe(sib_ptr->back); } } } /* describe */ void prot_reconstr(node *p, long n) { /* reconstruct and print out acid at site n+1 at node p */ long i, j, k, first, num_sibs = 0; double f, sum, xx[20]; node *q = NULL; if (p->tip) putc(y[p->index-1][n], outfile); else { num_sibs = count_sibs(p); if ((ally[n] == 0) || (location[ally[n]-1] == 0)) putc('.', outfile); else { j = location[ally[n]-1] - 1; sum = 0; for (i = 0; i <= 19; i++) { f = p->protx[j][mx-1][i]; if (!p->tip) { q = p; for (k = 0; k < num_sibs; k++) { q = q->next; f *= q->protx[j][mx-1][i]; } } f = sqrt(f); xx[i] = f * freqaa[i]; sum += xx[i]; } for (i = 0; i <= 19; i++) xx[i] /= sum; first = 0; for (i = 0; i <= 19; i++) if (xx[i] > xx[first]) first = i; if (xx[first] > 0.95) putc(aachar[first], outfile); else putc(tolower(aachar[first]), outfile); if (rctgry && rcategs > 1) mx = mp[n][mx - 1]; else mx = 1; } } } /* prot_reconstr */ void rectrav(node *p, long m, long n) { /* print out segment of reconstructed sequence for one branch */ long i; node *q; putc(' ', outfile); if (p->tip) { for (i = 0; i < nmlngth; i++) putc(nayme[p->index-1][i], outfile); } else fprintf(outfile, "%4ld ", p->index - spp); fprintf(outfile, " "); mx = mx0; for (i = m; i <= n; i++) { if ((i % 10 == 0) && (i != m)) putc(' ', outfile); prot_reconstr(p, i); } putc('\n', outfile); if (!p->tip) for ( q = p->next; q != p; q = q->next ) rectrav(q->back, m, n); mx1 = mx; } /* rectrav */ void summarize() { /* print out branch length information and node numbers */ long i, j, mm, num_sibs; double mode, sum; double like[maxcategs],nulike[maxcategs]; double **marginal; node *sib_ptr; if (!treeprint) return; fprintf(outfile, "\nremember: "); if (outgropt) fprintf(outfile, "(although rooted by outgroup) "); fprintf(outfile, "this is an unrooted tree!\n\n"); fprintf(outfile, "Ln Likelihood = %11.5f\n", curtree.likelihood); fprintf(outfile, "\n Between And Length"); if (!(usertree && lngths && haslengths)) fprintf(outfile, " Approx. Confidence Limits"); fprintf(outfile, "\n"); fprintf(outfile, " ------- --- ------"); if (!(usertree && lngths && haslengths)) fprintf(outfile, " ------- ---------- ------"); fprintf(outfile, "\n\n"); for (i = spp; i < nonodes2; i++) { /* So this works with arbitrary multifurcations */ if (curtree.nodep[i]) { num_sibs = count_sibs (curtree.nodep[i]); sib_ptr = curtree.nodep[i]; for (j = 0; j < num_sibs; j++) { sib_ptr->initialized = false; sib_ptr = sib_ptr->next; } } } describe(curtree.start->back); /* So this works with arbitrary multifurcations */ num_sibs = count_sibs(curtree.start); sib_ptr = curtree.start; for (i=0; i < num_sibs; i++) { sib_ptr = sib_ptr->next; describe(sib_ptr->back); } fprintf(outfile, "\n"); if (!(usertree && lngths && haslengths)) { fprintf(outfile, " * = significantly positive, P < 0.05\n"); fprintf(outfile, " ** = significantly positive, P < 0.01\n\n"); } dummy = prot_evaluate(curtree.start, false); if (rctgry && rcategs > 1) { for (i = 0; i < rcategs; i++) like[i] = 1.0; for (i = sites - 1; i >= 0; i--) { sum = 0.0; for (j = 0; j < rcategs; j++) { nulike[j] = (1.0 - lambda + lambda * probcat[j]) * like[j]; mp[i][j] = j + 1; for (k = 1; k <= rcategs; k++) { if (k != j + 1) { if (lambda * probcat[k - 1] * like[k - 1] > nulike[j]) { nulike[j] = lambda * probcat[k - 1] * like[k - 1]; mp[i][j] = k; } } } if ((ally[i] > 0) && (location[ally[i]-1] > 0)) nulike[j] *= contribution[location[ally[i] - 1] - 1][j]; sum += nulike[j]; } for (j = 0; j < rcategs; j++) nulike[j] /= sum; memcpy(like, nulike, rcategs * sizeof(double)); } mode = 0.0; mx = 1; for (i = 1; i <= rcategs; i++) { if (probcat[i - 1] * like[i - 1] > mode) { mx = i; mode = probcat[i - 1] * like[i - 1]; } } mx0 = mx; fprintf(outfile, "Combination of categories that contributes the most to the likelihood:\n\n"); for (i = 1; i <= nmlngth + 3; i++) putc(' ', outfile); for (i = 1; i <= sites; i++) { fprintf(outfile, "%ld", mx); if (i % 10 == 0) putc(' ', outfile); if (i % 60 == 0 && i != sites) { putc('\n', outfile); for (j = 1; j <= nmlngth + 3; j++) putc(' ', outfile); } mx = mp[i - 1][mx - 1]; } fprintf(outfile, "\n\n"); marginal = (double **) Malloc(sites*sizeof(double *)); for (i = 0; i < sites; i++) marginal[i] = (double *) Malloc(rcategs*sizeof(double)); for (i = 0; i < rcategs; i++) like[i] = 1.0; for (i = sites - 1; i >= 0; i--) { sum = 0.0; for (j = 0; j < rcategs; j++) { nulike[j] = (1.0 - lambda + lambda * probcat[j]) * like[j]; for (k = 1; k <= rcategs; k++) { if (k != j + 1) nulike[j] += lambda * probcat[k - 1] * like[k - 1]; } if ((ally[i] > 0) && (location[ally[i]-1] > 0)) nulike[j] *= contribution[location[ally[i] - 1] - 1][j]; sum += nulike[j]; } for (j = 0; j < rcategs; j++) { nulike[j] /= sum; marginal[i][j] = nulike[j]; } memcpy(like, nulike, rcategs * sizeof(double)); } for (i = 0; i < rcategs; i++) like[i] = 1.0; for (i = 0; i < sites; i++) { sum = 0.0; for (j = 0; j < rcategs; j++) { nulike[j] = (1.0 - lambda + lambda * probcat[j]) * like[j]; for (k = 1; k <= rcategs; k++) { if (k != j + 1) nulike[j] += lambda * probcat[k - 1] * like[k - 1]; } marginal[i][j] *= like[j] * probcat[j]; sum += nulike[j]; } for (j = 0; j < rcategs; j++) nulike[j] /= sum; memcpy(like, nulike, rcategs * sizeof(double)); sum = 0.0; for (j = 0; j < rcategs; j++) sum += marginal[i][j]; for (j = 0; j < rcategs; j++) marginal[i][j] /= sum; } fprintf(outfile, "Most probable category at each site if > 0.95"); fprintf(outfile, " probability (\".\" otherwise)\n\n"); for (i = 1; i <= nmlngth + 3; i++) putc(' ', outfile); for (i = 0; i < sites; i++) { sum = 0.0; mm = 0; for (j = 0; j < rcategs; j++) if (marginal[i][j] > sum) { sum = marginal[i][j]; mm = j; } if (sum >= 0.95) fprintf(outfile, "%ld", mm+1); else putc('.', outfile); if ((i+1) % 60 == 0) { if (i != 0) { putc('\n', outfile); for (j = 1; j <= nmlngth + 3; j++) putc(' ', outfile); } } else if ((i+1) % 10 == 0) putc(' ', outfile); } putc('\n', outfile); for (i = 0; i < sites; i++) free(marginal[i]); free(marginal); } putc('\n', outfile); if (hypstate) { fprintf(outfile, "Probable sequences at interior nodes:\n\n"); fprintf(outfile, " node "); for (i = 0; (i < 13) && (i < ((sites + (sites-1)/10 - 39) / 2)); i++) putc(' ', outfile); fprintf(outfile, "Reconstructed sequence (caps if > 0.95)\n\n"); if (!rctgry || (rcategs == 1)) mx0 = 1; for (i = 0; i < sites; i += 60) { k = i + 59; if (k >= sites) k = sites - 1; rectrav(curtree.start, i, k); rectrav(curtree.start->back, i, k); putc('\n', outfile); mx0 = mx1; } } } /* summarize */ void initpromlnode(node **p, node **grbg, node *q, long len, long nodei, long *ntips, long *parens, initops whichinit, pointarray treenode, pointarray nodep, Char *str, Char *ch, FILE *intree) { /* initializes a node */ boolean minusread; double valyew, divisor; switch (whichinit) { case bottom: gnu(grbg, p); (*p)->index = nodei; (*p)->tip = false; malloc_ppheno((*p), endsite, rcategs); nodep[(*p)->index - 1] = (*p); break; case nonbottom: gnu(grbg, p); malloc_ppheno(*p, endsite, rcategs); (*p)->index = nodei; break; case tip: match_names_to_data(str, nodep, p, spp); break; case iter: (*p)->initialized = false; (*p)->v = initialv; (*p)->iter = true; if ((*p)->back != NULL){ (*p)->back->iter = true; (*p)->back->v = initialv; (*p)->back->initialized = false; } break; case length: processlength(&valyew, &divisor, ch, &minusread, intree, parens); (*p)->v = valyew / divisor; (*p)->iter = false; if ((*p)->back != NULL) { (*p)->back->v = (*p)->v; (*p)->back->iter = false; } break; case hsnolength: haslengths = false; break; default: /* cases hslength, treewt, unittrwt */ break; /* should never occur */ } } /* initpromlnode */ void dnaml_treeout(node *p) { /* write out file with representation of final tree2 */ /* Only works for bifurcations! */ long i, n, w; Char c; double x; node *q; boolean inloop; if (p->tip) { n = 0; for (i = 1; i <= nmlngth; i++) { if (nayme[p->index-1][i - 1] != ' ') n = i; } for (i = 0; i < n; i++) { c = nayme[p->index-1][i]; if (c == ' ') c = '_'; putc(c, outtree); } col += n; } else { putc('(', outtree); col++; inloop = false; q = p->next; do { if (inloop) { putc(',', outtree); col++; if (col > 45) { putc('\n', outtree); col = 0; } } inloop = true; dnaml_treeout(q->back); q = q->next; } while ((p == curtree.start || p != q) && (p != curtree.start || p->next != q)); putc(')', outtree); col++; } x = p->v; if (x > 0.0) w = (long)(0.43429448222 * log(x)); else if (x == 0.0) w = 0; else w = (long)(0.43429448222 * log(-x)) + 1; if (w < 0) w = 0; if (p == curtree.start) fprintf(outtree, ";\n"); else { fprintf(outtree, ":%*.5f", (int)(w + 7), x); col += w + 8; } } /* dnaml_treeout */ void buildnewtip(long m, tree *tr) { node *p; p = tr->nodep[nextsp + spp - 3]; hookup(tr->nodep[m - 1], p); p->v = initialv; p->back->v = initialv; } /* buildnewtip */ void buildsimpletree(tree *tr) { hookup(tr->nodep[enterorder[0] - 1], tr->nodep[enterorder[1] - 1]); tr->nodep[enterorder[0] - 1]->v = 1.0; tr->nodep[enterorder[0] - 1]->back->v = 1.0; tr->nodep[enterorder[1] - 1]->v = 1.0; tr->nodep[enterorder[1] - 1]->back->v = 1.0; buildnewtip(enterorder[2], tr); insert_(tr->nodep[enterorder[2] - 1]->back, tr->nodep[enterorder[0] - 1], false); } /* buildsimpletree */ void free_all_protx (long nonodes, pointarray treenode) { /* used in proml */ long i, j, k; node *p; /* Zero thru spp are tips, */ for (i = 0; i < spp; i++) { for (j = 0; j < endsite; j++) free(treenode[i]->protx[j]); free(treenode[i]->protx); } /* The rest are rings (i.e. triads) */ for (i = spp; i < nonodes; i++) { if (treenode[i] != NULL) { p = treenode[i]; do { for (k = 0; k < endsite; k++) free(p->protx[k]); free(p->protx); p = p->next; } while (p != treenode[i]); } } } /* free_all_protx */ void proml_unroot(node* root, node** nodep, long nonodes) { node *p,*r,*q; double newl; long i; long numsibs; numsibs = count_sibs(root); if ( numsibs > 2 ) { q = root; r = root; while (!(q->next == root)) q = q->next; q->next = root->next; for(i=0 ; i < endsite ; i++){ free(r->protx[i]); r->protx[i] = NULL; } free(r->protx); r->protx = NULL; chuck(&grbg, r); curtree.nodep[spp] = q; } else { /* Bifurcating root - remove entire root fork */ /* Join oldlen on each side of root */ newl = root->next->oldlen + root->next->next->oldlen; root->next->back->oldlen = newl; root->next->next->back->oldlen = newl; /* Join v on each side of root */ newl = root->next->v + root->next->next->v; root->next->back->v = newl; root->next->next->back->v = newl; /* Connect root's children */ root->next->back->back=root->next->next->back; root->next->next->back->back = root->next->back; /* Move nodep entries down one and set indices */ for ( i = spp; i < nonodes-1; i++ ) { p = nodep[i+1]; nodep[i] = p; nodep[i+1] = NULL; if ( nodep[i] == NULL ) /* This may happen in a multifurcating intree */ break; do { p->index = i+1; p = p->next; } while (p != nodep[i]); } /* Free protx arrays from old root */ for(i=0 ; i < endsite ; i++){ free(root->protx[i]); free(root->next->protx[i]); free(root->next->next->protx[i]); root->protx[i] = NULL; root->next->protx[i] = NULL; root->next->next->protx[i] = NULL; } free(root->protx); free(root->next->protx); free(root->next->next->protx); chuck(&grbg,root->next->next); chuck(&grbg,root->next); chuck(&grbg,root); } } /* proml_unroot */ void maketree() { long i, j; boolean dummy_first, goteof; pointarray dummy_treenode=NULL; long nextnode; node *root; prot_inittable(); if (usertree) { openfile(&intree,INTREE,"input tree file", "r",progname,intreename); numtrees = countsemic(&intree); if(numtrees > MAXSHIMOTREES) shimotrees = MAXSHIMOTREES; else shimotrees = numtrees; if (numtrees > 2) initseed(&inseed, &inseed0, seed); l0gl = (double *) Malloc(shimotrees * sizeof(double)); l0gf = (double **) Malloc(shimotrees * sizeof(double *)); for (i=0; i < shimotrees; ++i) l0gf[i] = (double *) Malloc(endsite * sizeof(double)); if (treeprint) { fprintf(outfile, "User-defined tree"); if (numtrees > 1) putc('s', outfile); fprintf(outfile, ":\n\n"); } which = 1; /* This taken out of tree read, used to be [spp-1], but referring to [0] produces output identical to what the pre-modified dnaml produced. */ while (which <= numtrees) { /* These initializations required each time through the loop since multiple trees require re-initialization */ haslengths = true; nextnode = 0; dummy_first = true; goteof = false; treeread(intree, &root, dummy_treenode, &goteof, &dummy_first, curtree.nodep, &nextnode, &haslengths, &grbg, initpromlnode,false,nonodes2); proml_unroot(root,curtree.nodep,nonodes2); if (goteof && (which <= numtrees)) { /* if we hit the end of the file prematurely */ printf ("\n"); printf ("ERROR: trees missing at end of file.\n"); printf ("\tExpected number of trees:\t\t%ld\n", numtrees); printf ("\tNumber of trees actually in file:\t%ld.\n\n", which - 1); exxit (-1); } curtree.start = curtree.nodep[0]->back; if ( outgropt ) curtree.start = curtree.nodep[outgrno - 1]->back; treevaluate(); proml_printree(); summarize(); if (trout) { col = 0; dnaml_treeout(curtree.start); } if(which < numtrees){ prot_freex_notip(nextnode, curtree.nodep); gdispose(curtree.start, &grbg, curtree.nodep); } else nonodes2 = nextnode; which++; } FClose(intree); putc('\n', outfile); if (!auto_ && numtrees > 1 && weightsum > 1 ) standev2(numtrees, maxwhich, 0, endsite-1, maxlogl, l0gl, l0gf, aliasweight, seed); } else { /* no user tree */ for (i = 1; i <= spp; i++) enterorder[i - 1] = i; if (jumble) randumize(seed, enterorder); if (progress) { printf("\nAdding species:\n"); writename(0, 3, enterorder); #ifdef WIN32 phyFillScreenColor(); #endif } nextsp = 3; polishing = false; smoothit = improve; buildsimpletree(&curtree); curtree.start = curtree.nodep[enterorder[0] - 1]->back; nextsp = 4; while (nextsp <= spp) { buildnewtip(enterorder[nextsp - 1], &curtree); bestyet = UNDEFINED; if (smoothit) promlcopy(&curtree, &priortree, nonodes2, rcategs); addtraverse(curtree.nodep[enterorder[nextsp - 1] - 1]->back, curtree.start, true); if (smoothit) promlcopy(&bestree, &curtree, nonodes2, rcategs); else { insert_(curtree.nodep[enterorder[nextsp - 1] - 1]->back, qwhere, true); smoothit = true; for (i = 1; i<=smoothings; i++) { smooth(curtree.start); smooth(curtree.start->back); } smoothit = false; promlcopy(&curtree, &bestree, nonodes2, rcategs); bestyet = curtree.likelihood; } if (progress) { writename(nextsp - 1, 1, enterorder); #ifdef WIN32 phyFillScreenColor(); #endif } if (global && nextsp == spp && progress) { printf("Doing global rearrangements\n"); printf(" !"); for (j = spp ; j < nonodes2 ; j++) if ( (j - spp) % (( nonodes2 / 72 ) + 1 ) == 0 ) putchar('-'); printf("!\n"); #ifdef WIN32 phyFillScreenColor(); #endif } succeeded = true; while (succeeded) { succeeded = false; if (global && nextsp == spp && progress) { printf(" "); fflush(stdout); } if (global && nextsp == spp) globrearrange(); else rearrange(curtree.start, curtree.start->back); if (global && nextsp == spp && progress) putchar('\n'); } nextsp++; } if (global && progress) { putchar('\n'); fflush(stdout); #ifdef WIN32 phyFillScreenColor(); #endif } promlcopy(&curtree, &bestree, nonodes2, rcategs); if (njumble > 1) { if (jumb == 1) promlcopy(&bestree, &bestree2, nonodes2, rcategs); else if (bestree2.likelihood < bestree.likelihood) promlcopy(&bestree, &bestree2, nonodes2, rcategs); } if (jumb == njumble) { if (njumble > 1) promlcopy(&bestree2, &curtree, nonodes2, rcategs); curtree.start = curtree.nodep[outgrno - 1]->back; for (i = 0; i < nonodes2; i++) { if (i < spp) curtree.nodep[i]->initialized = false; else { curtree.nodep[i]->initialized = false; curtree.nodep[i]->next->initialized = false; curtree.nodep[i]->next->next->initialized = false; } } treevaluate(); proml_printree(); summarize(); if (trout) { col = 0; dnaml_treeout(curtree.start); } } } if (usertree) { free(l0gl); for (i=0; i < shimotrees; i++) free(l0gf[i]); free(l0gf); } prot_freetable(); if (jumb < njumble) return; free(contribution); free(mp); for (i=0; i < endsite; i++) free(term[i]); free(term); for (i=0; i < endsite; i++) free(slopeterm[i]); free(slopeterm); for (i=0; i < endsite; i++) free(curveterm[i]); free(curveterm); free_all_protx(nonodes2, curtree.nodep); if (!usertree) { free_all_protx(nonodes2, bestree.nodep); free_all_protx(nonodes2, priortree.nodep); if (njumble > 1) free_all_protx(nonodes2, bestree2.nodep); } if (progress) { printf("\nOutput written to file \"%s\"\n", outfilename); if (trout) printf("\nTree also written onto file \"%s\"\n", outtreename); } } /* maketree */ void clean_up() { /* Free and/or close stuff */ long i; free (rrate); free (probcat); free (rate); /* Seems to require freeing every time... */ for (i = 0; i < spp; i++) { free (y[i]); } free (y); free (nayme); free (enterorder); free (category); free (weight); free (alias); free (ally); free (location); free (aliasweight); free (probmat); free (eigmat); FClose(infile); FClose(outfile); FClose(outtree); #ifdef MAC fixmacfile(outfilename); fixmacfile(outtreename); #endif } /* clean_up */ int main(int argc, Char *argv[]) { /* Protein Maximum Likelihood */ #ifdef MAC argc = 1; /* macsetup("ProML",""); */ argv[0] = "ProML"; #endif init(argc,argv); progname = argv[0]; openfile(&infile,INFILE,"input file","r",argv[0],infilename); openfile(&outfile,OUTFILE,"output file","w",argv[0],outfilename); mulsets = false; datasets = 1; firstset = true; ibmpc = IBMCRT; ansi = ANSICRT; grbg = NULL; doinit(); if (ctgry) openfile(&catfile,CATFILE,"categories file","r",argv[0],catfilename); if (weights || justwts) openfile(&weightfile,WEIGHTFILE,"weights file","r",argv[0],weightfilename); if (trout) openfile(&outtree,OUTTREE,"output tree file","w",argv[0],outtreename); for (ith = 1; ith <= datasets; ith++) { if (datasets > 1) { fprintf(outfile, "Data set # %ld:\n", ith); printf("\nData set # %ld:\n", ith); } getinput(); if (ith == 1) firstset = false; if (usertree) { max_num_sibs = 0; maketree(); } else for (jumb = 1; jumb <= njumble; jumb++) { max_num_sibs = 0; maketree(); } } clean_up(); printf("\nDone.\n\n"); #ifdef WIN32 phyRestoreConsoleAttributes(); #endif return 0; } /* Protein Maximum Likelihood */