#include "mltaln.h" #define SMALLMEMORY 1 #define DEBUG 0 #define IODEBUG 0 #define SCOREOUT 0 static int treein; static int topin; static int treeout; static int distout; static int noalign; static int multidist; static int maxdist = 2; // scale -> 2bai static int allowlongadds; static int addtotop; static int keeplength; static int diffout; // Incomplete. Not used static int ndeleted; static int mapout; static int smoothing; static double hitout; static int tuplesize; #define PLENFACA 0.01 #define PLENFACB 10000 #define PLENFACC 10000 #define PLENFACD 0.1 #define D6LENFACA 0.01 #define D6LENFACB 2500 #define D6LENFACC 2500 #define D6LENFACD 0.1 #define D10LENFACA 0.01 #define D10LENFACB 1000000 #define D10LENFACC 1000000 #define D10LENFACD 0.0 typedef struct _thread_arg { int njob; int nadd; int *nlen; int *follows; char **name; char **seq; LocalHom **localhomtable; double **iscore; double **nscore; int *istherenewgap; int **newgaplist; RNApair ***singlerna; double *eff_kozo_mapped; int alloclen; Treedep *dep; int ***topol; double **len; Addtree *addtree; GapPos **deletelist; GapPos **difflist; #ifdef enablemultithread int *iaddshare; int thread_no; pthread_mutex_t *mutex_counter; #endif } thread_arg_t; #ifdef enablemultithread typedef struct _gaplist2alnxthread_arg { // int thread_no; int ncycle; int *jobpospt; int tmpseqlen; int lenfull; char **seq; int *newgaplist; int *posmap; pthread_mutex_t *mutex; } gaplist2alnxthread_arg_t; typedef struct _distancematrixthread_arg { int thread_no; int njob; int norg; int *jobpospt; int **pointt; int *nogaplen; double **imtx; double **nmtx; double *selfscore; pthread_mutex_t *mutex; } distancematrixthread_arg_t; typedef struct _jobtable2d { int i; int j; } Jobtable2d; typedef struct _dndprethread_arg { int njob; int thread_no; double *selfscore; double **mtx; char **seq; Jobtable2d *jobpospt; pthread_mutex_t *mutex; } dndprethread_arg_t; #endif typedef struct _blocktorealign { int start; int end; int nnewres; } Blocktorealign; static void cnctintvec( int *res, int *o1, int *o2 ) { while( *o1 != -1 ) *res++ = *o1++; while( *o2 != -1 ) *res++ = *o2++; *res = -1; } static void countnewres( int len, Blocktorealign *realign, int *posmap, int *gaplist ) { int i, regstart, regend, len1; regstart = 0; len1 = len+1; for( i=0; i lenb ) return -1; else if( lena < lenb ) return 1; else return( 0 ); } static int dorealignment_tree( Blocktorealign *block, char **fullseq, int *fullseqlenpt, int norg, int ***topol, int *follows ) { int i, j, k, posinold, newlen, *nmem; int n0, n1, localloclen, nhit, hit1, hit2; int *pickhistory; int nprof1, nprof2, pos, zure; char **prof1, **prof2; int *iinf0, *iinf1; int *group, *nearest, *g2n, ngroup; char ***mem; static char **tmpaln0 = NULL; static char **tmpaln1 = NULL; static char **tmpseq; int ***topolpick; int *tmpint; int *intptr, *intptrx; char *tmpseq0, *cptr, **cptrptr; localloclen = 4 * ( block->end - block->start + 1 ); // ookisugi? tmpaln0 = AllocateCharMtx( njob, localloclen ); tmpaln1 = AllocateCharMtx( njob, localloclen ); tmpseq = AllocateCharMtx( 1, *fullseqlenpt * 4 ); iinf0 = AllocateIntVec( njob ); iinf1 = AllocateIntVec( njob ); nearest = AllocateIntVec( njob ); // oosugi posinold = block->start; n0 = 0; n1 = 0; for( i=0; istart, block->end - block->start + 1 ); tmpseq[0][block->end - block->start + 1] = 0; commongappick( 1, tmpseq ); if( tmpseq[0][0] != 0 ) { if( i < norg ) { fprintf( stderr, "BUG!!!!\n" ); exit( 1 ); } strcpy( tmpaln0[n0], tmpseq[0] ); iinf0[n0] = i; nearest[n0] = follows[i-norg]; n0++; } else { strcpy( tmpaln1[n0], "" ); iinf1[n1] = i; n1++; } } mem = AllocateCharCub( n0, n0+1, 0 ); // oosugi nmem = AllocateIntVec( n0 ); // oosugi g2n = AllocateIntVec( n0 ); // oosugi group = AllocateIntVec( n0 ); // oosugi for( i=0; i %d -> group%d\n", i, nearest[i], group[i] ); // fprintf( stderr, "mem[%d][%d] = %s\n", group[i], j, mem[group[i]][j] ); } for( i=0; i newlen ) newlen = j; for( j=0; j<=i; j++ ) { for( k=0; mem[j][k]; k++ ) fillgap( mem[j][k], newlen ); } #endif } #if 0 fprintf( stderr, "After ingroupalignment (original order):\n" ); for( i=0; i-1; intptr++ ) { for( intptrx=g2n,k=0; k %d\n", k, topol[i][0][j] ); for( intptr=topol[i][1]; *intptr>-1; intptr++ ) { for( intptrx=g2n,k=0; k %d\n", k, topol[i][1][j] ); #if 0 fprintf( stderr, "\nHIT!!! \n" ); fprintf( stderr, "\nSTEP %d\n", i ); for( j=0; topol[i][0][j]>-1; j++ ) fprintf( stderr, "%3d ", topol[i][0][j] ); fprintf( stderr, "\n" ); for( j=0; topol[i][1][j]>-1; j++ ) fprintf( stderr, "%3d ", topol[i][1][j] ); fprintf( stderr, "\n" ); #endif } for( i=0; i-1; j++ ) fprintf( stderr, "%3d ", topolpick[i][0][j] ); fprintf( stderr, "\n" ); for( j=0; topolpick[i][1][j]>-1; j++ ) fprintf( stderr, "%3d ", topolpick[i][1][j] ); fprintf( stderr, "\n" ); #endif pos = 0; // for( j=0; topolpick[i][0][j]>-1; j++ ) for( k=0; (cptr=mem[topolpick[i][0][j]][k]); k++ ) prof1[pos++] = cptr; for( intptr=topolpick[i][0]; *intptr>-1; intptr++ ) for( cptrptr=mem[*intptr]; (cptr=*cptrptr); cptrptr++ ) prof1[pos++] = cptr; nprof1 = pos; pos = 0; // for( j=0; topolpick[i][1][j]>-1; j++ ) for( k=0; (cptr=mem[topolpick[i][1][j]][k]); k++ ) prof2[pos++] = cptr; for( intptr=topolpick[i][1]; *intptr>-1; intptr++ ) for( cptrptr=mem[*intptr]; (cptr=*cptrptr); cptrptr++ ) prof2[pos++] = cptr; nprof2 = pos; profilealignment2( nprof1, nprof2, prof1, prof2, localloclen, alg ); #if 0 for( j=0; jend - block->start + 1 - newlen ); // fprintf( stderr, "zure = %d, localloclen=%d, newlen=%d\n", zure, localloclen, newlen ); if( *fullseqlenpt < strlen( fullseq[0] ) - (block->end-block->start+1) + newlen + 1 ) { *fullseqlenpt = strlen( fullseq[0] ) * 2; fprintf( stderr, "reallocating..." ); for( i=0; iend+1; for( i=0; istart, tmpseq0 ); } for( i=0; istart, tmpseq0 ); } FreeCharMtx( tmpaln0 ); FreeCharMtx( tmpaln1 ); FreeCharMtx( tmpseq ); for( i=0; istart; n0 = 0; n1 = 0; for( i=0; istart, block->end - block->start + 1 ); tmpseq[0][block->end - block->start + 1] = 0; commongappick( 1, tmpseq ); // if( strlen( tmpseq[0] ) > 0 ) if( tmpseq[0][0] != 0 ) { if( i < norg ) { fprintf( stderr, "BUG!!!!\n" ); exit( 1 ); } strcpy( tmpaln0[n0], tmpseq[0] ); iinf0[n0] = i; n0++; } else { strcpy( tmpaln1[n0], "" ); iinf1[n1] = i; n1++; } } for( i=1; istart, tmpaln0[i], newlen ); for( i=0; istart, tmpaln1[i], newlen ); } posinold = block->end+1; posinnew = block->start + newlen; zure = ( block->end - block->start + 1 - strlen( tmpaln0[0] ) ); for( i=0; i 0 && l[i] > 0 ) { if( pg < l[i] ) { c[i] = l[i] - pg; } else { c[i] = 0; } } else { c[i] = l[i]; } prep = p[i]; } } void gaplist2alnx( int len, char *a, char *s, int *l, int *p, int lenlimit ) { int gaplen; int pos, pi, posl; int prevp = -1; int reslen = 0; char *sp; // char *abk = a; #if 0 int i; char *abk = a; fprintf( stderr, "s = %s\n", s ); fprintf( stderr, "posmap = " ); for( i=0; i lenlimit ) { fprintf( stderr, "Length over. Please recompile!\n" ); exit( 1 ); } while( gaplen-- ) *a++ = '-'; pos = prevp + 1; sp = s + pos; if( ( posl = pi - pos ) ) { if( ( reslen += posl ) > lenlimit ) { fprintf( stderr, "Length over. Please recompile\n" ); exit( 1 ); } while( posl-- ) *a++ = *sp++; } if( reslen++ > lenlimit ) { fprintf( stderr, "Length over. Please recompile\n" ); exit( 1 ); } *a++ = *sp; prevp = pi; } gaplen = *l; pi = *p; if( (reslen+=gaplen) > lenlimit ) { fprintf( stderr, "Length over. Please recompile\n" ); exit( 1 ); } while( gaplen-- ) *a++ = '-'; pos = prevp + 1; sp = s + pos; if( ( posl = pi - pos ) ) { if( ( reslen += posl ) > lenlimit ) { fprintf( stderr, "Length over. Please recompile\n" ); exit( 1 ); } while( posl-- ) *a++ = *sp++; } *a = 0; // fprintf( stderr, "reslen = %d, strlen(a) = %d\n", reslen, strlen( abk ) ); // fprintf( stderr, "a = %s\n", abk ); } static void makenewgaplist( int *l, char *a ) { while( 1 ) { while( *a == '=' ) { a++; (*l)++; // fprintf( stderr, "a[] (i) = %s, *l=%d\n", a, *(l) ); } *++l = 0; if( *a == 0 ) break; a++; } *l = -1; } void arguments( int argc, char *argv[] ) { int c; nthread = 1; codonpos = 0; codonscore = 0; outnumber = 0; scoreout = 0; treein = 0; topin = 0; rnaprediction = 'm'; rnakozo = 0; nevermemsave = 0; inputfile = NULL; addfile = NULL; addprofile = 1; fftkeika = 0; constraint = 0; nblosum = 62; fmodel = 0; calledByXced = 0; devide = 0; use_fft = 0; // chuui force_fft = 0; fftscore = 1; fftRepeatStop = 0; fftNoAnchStop = 0; weight = 3; utree = 1; tbutree = 1; refine = 0; check = 1; cut = 0.0; disp = 0; outgap = 1; alg = 'A'; mix = 0; tbitr = 0; scmtd = 5; tbweight = 0; tbrweight = 3; checkC = 0; treemethod = 'X'; sueff_global = 0.1; contin = 0; scoremtx = 1; kobetsubunkatsu = 0; dorp = NOTSPECIFIED; ppenalty = NOTSPECIFIED; penalty_shift_factor = 1000.0; ppenalty_ex = NOTSPECIFIED; poffset = NOTSPECIFIED; kimuraR = NOTSPECIFIED; pamN = NOTSPECIFIED; geta2 = GETA2; fftWinSize = NOTSPECIFIED; fftThreshold = NOTSPECIFIED; RNAppenalty = NOTSPECIFIED; RNAppenalty_ex = NOTSPECIFIED; RNApthr = NOTSPECIFIED; TMorJTT = JTT; consweight_multi = 1.0; consweight_rna = 0.0; nadd = 0; multidist = 0; tuplesize = -1; legacygapcost = 0; allowlongadds = 0; addtotop = 0; keeplength = 0; diffout = 0; mapout = 0; smoothing = 0; distout = 0; hitout = 0.0; nwildcard = 0; while( --argc > 0 && (*++argv)[0] == '-' ) { while ( ( c = *++argv[0] ) ) { switch( c ) { case 'i': inputfile = *++argv; fprintf( stderr, "inputfile = %s\n", inputfile ); --argc; goto nextoption; case 'I': nadd = myatoi( *++argv ); fprintf( stderr, "nadd = %d\n", nadd ); --argc; goto nextoption; case 'e': RNApthr = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'o': RNAppenalty = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'f': ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 ); // fprintf( stderr, "ppenalty = %d\n", ppenalty ); --argc; goto nextoption; case 'Q': penalty_shift_factor = atof( *++argv ); --argc; goto nextoption; case 'g': ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 ); fprintf( stderr, "ppenalty_ex = %d\n", ppenalty_ex ); --argc; goto nextoption; case 'h': poffset = (int)( atof( *++argv ) * 1000 - 0.5 ); // fprintf( stderr, "poffset = %d\n", poffset ); --argc; goto nextoption; case 'k': kimuraR = myatoi( *++argv ); fprintf( stderr, "kappa = %d\n", kimuraR ); --argc; goto nextoption; case 'b': nblosum = myatoi( *++argv ); scoremtx = 1; fprintf( stderr, "blosum %d / kimura 200\n", nblosum ); --argc; goto nextoption; case 'j': pamN = myatoi( *++argv ); scoremtx = 0; TMorJTT = JTT; fprintf( stderr, "jtt/kimura %d\n", pamN ); --argc; goto nextoption; case 'm': pamN = myatoi( *++argv ); scoremtx = 0; TMorJTT = TM; fprintf( stderr, "tm %d\n", pamN ); --argc; goto nextoption; case 'l': fastathreshold = atof( *++argv ); constraint = 2; --argc; goto nextoption; case 'r': consweight_rna = atof( *++argv ); rnakozo = 1; --argc; goto nextoption; case 'c': consweight_multi = atof( *++argv ); --argc; goto nextoption; case 'C': nthread = myatoi( *++argv ); fprintf( stderr, "nthread = %d\n", nthread ); --argc; goto nextoption; case 'R': codonpos = 1; break; case 'S': codonscore = 1; break; #if 0 case 'R': rnaprediction = 'r'; break; case 's': RNAscoremtx = 'r'; break; #endif #if 1 case 'a': fmodel = 1; break; #endif case 'K': addprofile = 0; break; case 'y': distout = 1; break; case '^': hitout = atof( *++argv ); --argc; goto nextoption; case 't': treeout = 1; break; case 'T': noalign = 1; break; case 'D': dorp = 'd'; break; case 'P': dorp = 'p'; break; #if 1 case 'O': outgap = 0; break; #else case 'O': fftNoAnchStop = 1; break; #endif // case 'S': // scoreout = 1; // break; #if 0 case 'e': fftscore = 0; break; case 'r': fmodel = -1; break; case 'R': fftRepeatStop = 1; break; case 's': treemethod = 's'; break; #endif case 'X': treemethod = 'X'; sueff_global = atof( *++argv ); fprintf( stderr, "sueff_global = %f\n", sueff_global ); --argc; goto nextoption; case 'E': treemethod = 'E'; break; case 'q': treemethod = 'q'; break; case 'n' : outnumber = 1; break; #if 0 case 'a': alg = 'a'; break; case 'Q': alg = 'Q'; break; #endif case 'H': alg = 'H'; break; case 'A': alg = 'A'; break; case 'M': alg = 'M'; break; case 'N': nevermemsave = 1; break; case 'B': // hitsuyou! memopt -M -B no tame break; case 'F': use_fft = 1; break; case 'G': force_fft = 1; use_fft = 1; break; case 'U': treein = 1; break; case 'x': addtotop = 1; break; case 'V': allowlongadds = 1; break; case 'p': smoothing = 1; break; #if 0 case 'V': topin = 1; break; #endif case 'u': tbrweight = 0; weight = 0; break; case 'v': tbrweight = 3; break; case 'd': multidist = 1; break; case 'W': tuplesize = myatoi( *++argv ); --argc; goto nextoption; #if 0 case 'd': disp = 1; break; #endif /* Modified 01/08/27, default: user tree */ case 'J': tbutree = 0; break; /* modification end. */ #if 0 case 'z': fftThreshold = myatoi( *++argv ); --argc; goto nextoption; #endif case 'w': fftWinSize = myatoi( *++argv ); --argc; goto nextoption; #if 0 case 'Z': checkC = 1; break; #endif case 'L': legacygapcost = 1; break; case 'Y': keeplength = 1; break; case 'z': mapout = 2; break; case 'Z': mapout = 1; break; case '%': diffout = 1; break; case ':': nwildcard = 1; break; default: fprintf( stderr, "illegal option %c\n", c ); argc = 0; break; } } nextoption: ; } if( argc == 1 ) { cut = atof( (*argv) ); argc--; } if( argc != 0 ) { fprintf( stderr, "options: Check source file !\n" ); exit( 1 ); } if( tbitr == 1 && outgap == 0 ) { fprintf( stderr, "conflicting options : o, m or u\n" ); exit( 1 ); } if( alg == 'C' && outgap == 0 ) { fprintf( stderr, "conflicting options : C, o\n" ); exit( 1 ); } } static double treebase( int nseq, int *nlen, char **aseq, int nadd, char *mergeoralign, char **mseq1, char **mseq2, int ***topol, double *effarr, int *alloclen, LocalHom **localhomtable, RNApair ***singlerna, double *effarr_kozo ) { int i, l, m; int len1nocommongap, len2nocommongap; int len1, len2; int clus1, clus2; double pscore, tscore; char *indication1, *indication2; double *effarr1 = NULL; double *effarr2 = NULL; double *effarr1_kozo = NULL; double *effarr2_kozo = NULL; LocalHom ***localhomshrink = NULL; int *fftlog; int m1, m2; int *gaplen; int *gapmap; int *alreadyaligned; // double dumfl = 0.0; double dumdb = 0.0; int ffttry; RNApair ***grouprna1, ***grouprna2; if( rnakozo && rnaprediction == 'm' ) { grouprna1 = (RNApair ***)calloc( nseq, sizeof( RNApair ** ) ); grouprna2 = (RNApair ***)calloc( nseq, sizeof( RNApair ** ) ); } else { grouprna1 = grouprna2 = NULL; } fftlog = AllocateIntVec( nseq ); effarr1 = AllocateDoubleVec( nseq ); effarr2 = AllocateDoubleVec( nseq ); indication1 = AllocateCharVec( 150 ); indication2 = AllocateCharVec( 150 ); alreadyaligned = AllocateIntVec( nseq ); if( constraint ) { localhomshrink = (LocalHom ***)calloc( nseq, sizeof( LocalHom ** ) ); #if SMALLMEMORY if( multidist ) { for( i=0; i 66 ) fprintf( stderr, "..." ); fprintf( stderr, "\n" ); fprintf( stderr, "group2 = %.66s", indication2 ); if( strlen( indication2 ) > 66 ) fprintf( stderr, "..." ); fprintf( stderr, "\n" ); #endif // for( i=0; i 50000 || len2 > 50000 ) ) ) { fprintf( stderr, "\nlen1=%d, len2=%d, Switching to the memsave mode.\n", len1, len2 ); alg = 'M'; if( commonIP ) FreeIntMtx( commonIP ); commonIP = NULL; // 2013/Jul17 commonAlloc1 = 0; commonAlloc2 = 0; } if( alg == 'M' ) // hoka no thread ga M ni shitakamo shirenainode { if( commonIP ) FreeIntMtx( commonIP ); commonIP = NULL; commonAlloc1 = 0; commonAlloc2 = 0; } // if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 ); // if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 && clus1 < 1000 && clus2 < 1000 ); if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 ); else ffttry = 0; // ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 && clus1 < 5000 && clus2 < 5000 ); // v6.708 // fprintf( stderr, "f=%d, len1/fftlog[m1]=%f, clus1=%d, len2/fftlog[m2]=%f, clus2=%d\n", ffttry, (double)len1/fftlog[m1], clus1, (double)len2/fftlog[m2], clus2 ); // fprintf( stderr, "f=%d, clus1=%d, fftlog[m1]=%d, clus2=%d, fftlog[m2]=%d\n", ffttry, clus1, fftlog[m1], clus2, fftlog[m2] ); if( constraint == 2 ) { if( codonpos ) { reporterr( "\n\nThe --codonpos option is supported only for --6merpair --addfragments at this point. Add the --6merpair flag, for now.\n\n\n" ); exit( 1 ); } if( alg == 'M' ) { fprintf( stderr, "\n\nMemory saving mode is not supported.\n\n" ); exit( 1 ); } fprintf( stderr, "c" ); if( alg == 'A' ) { imp_match_init_strict( NULL, clus1, clus2, strlen( mseq1[0] ), strlen( mseq2[0] ), mseq1, mseq2, effarr1, effarr2, effarr1_kozo, effarr2_kozo, localhomshrink, NULL, 1, topol[l][0], topol[l][1], NULL, NULL, NULL, -1, 0 ); if( rnakozo ) imp_rna( clus1, clus2, mseq1, mseq2, effarr1, effarr2, grouprna1, grouprna2, NULL, NULL, NULL ); pscore = A__align( n_dis_consweight_multi, penalty, penalty_ex, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, constraint, &dumdb, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap, -1, -1, NULL, NULL, NULL, 0.0, 0.0 ); // cpmxchild0 tsukaeru?? } else if( alg == 'Q' ) { fprintf( stderr, "Q has been disabled.\n" ); exit( 1 ); } } else if( force_fft || ( use_fft && ffttry ) ) { fprintf( stderr, "f" ); if( alg == 'M' ) { fprintf( stderr, "m" ); pscore = Falign_udpari_long( NULL, NULL, n_dis_consweight_multi, mseq1, mseq2, effarr1, effarr2, NULL, NULL, clus1, clus2, *alloclen, fftlog+m1 ); } else pscore = Falign( NULL, NULL, n_dis_consweight_multi, mseq1, mseq2, effarr1, effarr2, NULL, NULL, clus1, clus2, *alloclen, fftlog+m1, NULL, 0, NULL ); } else { fprintf( stderr, "d" ); fftlog[m1] = 0; switch( alg ) { case( 'a' ): pscore = Aalign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen ); break; case( 'M' ): fprintf( stderr, "m" ); pscore = MSalignmm( n_dis_consweight_multi, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap, NULL, NULL, NULL, 0.0, 0.0 ); break; case( 'A' ): pscore = A__align( n_dis_consweight_multi, penalty, penalty_ex, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, 0, &dumdb, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap, -1, -1, NULL, NULL, NULL, 0.0, 0.0 ); // cpmxchild0 tsukaeru?? break; default: ErrorExit( "ERROR IN SOURCE FILE" ); } } nlen[m1] = 0.5 * ( nlen[m1] + nlen[m2] ); // fprintf( stderr, "aseq[last] = %s\n", aseq[nseq-1] ); #if SCOREOUT fprintf( stderr, "score = %10.2f\n", pscore ); #endif tscore += pscore; #if 0 // New gaps = '=' fprintf( stderr, "Original msa\n" ); for( i=0; i-1; i++ ) alreadyaligned[m] = 1; } if( mergeoralign[l] == '2' ) { // if( deleteadditionalinsertions ) ndeleted += deletenewinsertions( clus1, clus2, mseq1, mseq2, deleterecord ); adjustgapmap( strlen( mseq1[0] )-len1nocommongap+len1, gapmap, mseq1[0] ); restorecommongaps( nseq, nseq-(clus1+clus2), aseq, topol[l][0], topol[l][1], gapmap, *alloclen, '-' ); findnewgaps( clus1, 0, mseq1, gaplen ); insertnewgaps( nseq, alreadyaligned, aseq, topol[l][0], topol[l][1], gaplen, gapmap, *alloclen, alg, '-' ); // for( i=0; i-1; i++ ) alreadyaligned[m] = 1; } #if 0 free( topol[l][0] ); free( topol[l][1] ); free( topol[l] ); #endif } //for( i=0; ithread_no; int *iaddshare = targ->iaddshare; #endif int njob = targ->njob; int *follows = targ->follows; int nadd = targ->nadd; int *nlen = targ->nlen; char **name = targ->name; char **seq = targ->seq; LocalHom **localhomtable = targ->localhomtable; double **iscore = targ->iscore; double **nscore = targ->nscore; int *istherenewgap = targ->istherenewgap; int **newgaplist = targ->newgaplist; RNApair ***singlerna = targ->singlerna; double *eff_kozo_mapped = targ->eff_kozo_mapped; int alloclen = targ->alloclen; Treedep *dep = targ->dep; int ***topol = targ->topol; double **len = targ->len; Addtree *addtree = targ->addtree; GapPos **deletelist = targ->deletelist; GapPos **difflist = targ->difflist; double pscore; int *alnleninnode = NULL; char *targetseq; // fprintf( stderr, "\nPreparing thread %d\n", thread_no ); norg = njob - nadd; njobc = norg+1; #if 0 alnleninnode = AllocateIntVec( norg ); addmem = AllocateIntVec( nadd+1 ); depc = (Treedep *)calloc( njobc, sizeof( Treedep ) ); mseq1 = AllocateCharMtx( njob, 0 ); mseq2 = AllocateCharMtx( njob, 0 ); bseq = AllocateCharMtx( njobc, alloclen ); namec = AllocateCharMtx( njob, 0 ); nlenc = AllocateIntVec( njob ); mergeoralign = AllocateCharVec( njob ); nogaplenjusttodecideaddhereornot = AllocateIntVec( njobc ); tmpseq = calloc( alloclen, sizeof( char ) ); #else alnleninnode = AllocateIntVec( norg ); addmem = AllocateIntVec( nadd+1 ); depc = (Treedep *)calloc( njobc, sizeof( Treedep ) ); mseq1 = AllocateCharMtx( njobc, 0 ); mseq2 = AllocateCharMtx( njobc, 0 ); bseq = AllocateCharMtx( njobc, alloclen ); namec = AllocateCharMtx( njobc, 0 ); nlenc = AllocateIntVec( njobc ); mergeoralign = AllocateCharVec( njobc ); nogaplenjusttodecideaddhereornot = AllocateIntVec( njobc ); tmpseq = calloc( alloclen, sizeof( char ) ); #endif if( allowlongadds ) // hontou ha iranai. { for( i=0; i=0; i-- ) // for( i=norg-2; i; i-- ) // BUG!!!! { // reporterr( "\nstep %d\n", i ); k = 0; for( j=0; (m=topol[i][0][j])!=-1; j++ ) { mseq1[k++] = bseq[m]; // reporterr( "%d ", m ); } for( j=0; (m=topol[i][1][j])!=-1; j++ ) { mseq1[k++] = bseq[m]; // reporterr( "%d ", m ); } // reporterr( "\n" ); commongappick( k, mseq1 ); alnleninnode[i] = strlen( mseq1[0] ); // fprintf( stderr, "alnleninnode[%d] = %d\n", i, alnleninnode[i] ); } } // for( i=0; imutex_counter ); iadd = *iaddshare; if( iadd == nadd ) { pthread_mutex_unlock( targ->mutex_counter ); break; } if( iadd < 500 ) fprintf( stderr, "\rSTEP %d / %d (thread %d) \r", iadd, nadd, thread_no ); else if( iadd % 100 == 0 ) fprintf( stderr, "\nSTEP %d / %d (thread %d) \n", iadd, nadd, thread_no ); ++(*iaddshare); targetseq = seq[norg+iadd]; pthread_mutex_unlock( targ->mutex_counter ); } else #endif { iadd++; if( iadd == nadd ) break; targetseq = seq[norg+iadd]; if( iadd < 500 ) fprintf( stderr, "\rSTEP %d / %d \r", iadd, nadd ); else if( iadd % 100 == 0 ) fprintf( stderr, "\nSTEP %d / %d \n", iadd, nadd ); } for( i=0; i 0 ) { for( i=0; imutex_counter ); fprintf( stdout, "\nmergeoralign (iadd=%d) = ", iadd ); for( i=0; imutex_counter ); #endif singlerna = NULL; pscore = treebase( njobc, nlenc, bseq, 1, mergeoralign, mseq1, mseq2, topolc, effc, &alloclen, localhomtablec, singlerna, eff_kozo_mapped ); #if 0 pthread_mutex_lock( targ->mutex_counter ); // fprintf( stdout, "res (iadd=%d) = %s, pscore=%f\n", iadd, bseq[norg], pscore ); // fprintf( stdout, "effc (iadd=%d) = ", iadd ); // for( i=0; imutex_counter ); #endif #if 0 fprintf( trap_g, "done.\n" ); fclose( trap_g ); #endif // fprintf( stdout, "\n>seq[%d, iadd=%d] = \n%s\n", norg+iadd, iadd, seq[norg+iadd] ); // fprintf( stdout, "\n>bseq[%d, iadd=%d] = \n%s\n", norg, iadd, bseq[norg] ); // strcpy( seq[norg+iadd], bseq[norg] ); if( diffout ) { reporterr( "Not yet written\n" ); exit( 1 ); // deletenewinsertions_difflist( norg, 1, bseq, bseq+norg, difflist+iadd ); // strcpy( targetseq, bseq[norg] ); // i = norg; // no new gap!! } else if( keeplength ) { // reporterr( "deletelist = %p\n", deletelist ); // reporterr( "deletelist+iadd = %p\n", deletelist+iadd ); ndeleted += deletenewinsertions_whole_eq( norg, 1, bseq, bseq+norg, deletelist+iadd ); // for( i=0; i\n%s\n", bseq[i] ); strcpy( targetseq, bseq[norg] ); i = norg; // no new gap!! } else { strcpy( targetseq, bseq[norg] ); rep = -1; for( i=0; i maxl ) maxl = nogaplen[i]; if( dorp == 'd' ) /* nuc */ { seq_grp_nuc( grpseq, tmpseq ); // makepointtable_nuc( pointt[i], grpseq ); // makepointtable_nuc_octet( pointt[i], grpseq ); if( tuplesize == 10 ) makepointtable_nuc_dectet( pointt[i], grpseq ); else if( tuplesize == 6 ) makepointtable_nuc( pointt[i], grpseq ); else { fprintf( stderr, "tuplesize=%d: not supported\n", tuplesize ); exit( 1 ); } } else /* amino */ { seq_grp( grpseq, tmpseq ); makepointtable( pointt[i], grpseq ); } } if( nunknown ) fprintf( stderr, "\nThere are %d ambiguous characters\n", nunknown ); for( i=0; i 0 ) { distancematrixthread_arg_t *targ; int jobpos; pthread_t *handle; pthread_mutex_t mutex; jobpos = 0; targ = calloc( nthread, sizeof( distancematrixthread_arg_t ) ); handle = calloc( nthread, sizeof( pthread_t ) ); pthread_mutex_init( &mutex, NULL ); for( i=0; i nogaplen[j] ) { longer=(double)nogaplen[i]; shorter=(double)nogaplen[j]; } else { longer=(double)nogaplen[j]; shorter=(double)nogaplen[i]; } lenfac = 1.0 / ( shorter / longer * lenfacd + lenfacb / ( longer + lenfacc ) + lenfaca ); bunbo = MIN( selfscore[i], selfscore[j] ); if( j < norg ) { if( bunbo == 0.0 ) imtx[i][j-i] = maxdist; else imtx[i][j-i] = maxdist * ( 1.0 - mtxv / bunbo ) * lenfac; } else { if( bunbo == 0.0 ) nmtx[i][j-norg] = maxdist; else nmtx[i][j-norg] = maxdist * ( 1.0 - mtxv / bunbo ) * lenfac; } } free( table1 ); } } fprintf( stderr, "\ndone.\n\n" ); fflush( stderr ); free( grpseq ); free( tmpseq ); FreeIntMtx( pointt ); free( nogaplen ); free( selfscore ); if( hitout<0.0 ) { fprintf( stdout, "Threshold=%f\n\n", -hitout ); for( i=0; i0.0 ) { fprintf( stdout, "Threshold=%f\n\n", hitout ); for( i=norg; i 0 ) { dndprethread_arg_t *targ; Jobtable2d jobpos; pthread_t *handle; pthread_mutex_t mutex; jobpos.i = 0; jobpos.j = 0; targ = calloc( nthread, sizeof( dndprethread_arg_t ) ); handle = calloc( nthread, sizeof( pthread_t ) ); pthread_mutex_init( &mutex, NULL ); for( i=0; i 9.9 ) { fprintf( stderr, "WARNING: distance %d-%d is strange, %f.\n", i, j, mtxv ); mtxv = 9.9; // exit( 1 ); // 2016/Aug/3 } #else // CHUUI!!! 2012/05/16 if( mtxv > 2.0 ) { mtxv = 2.0; } if( mtxv < 0.0 ) { fprintf( stderr, "Distance %d-%d is strange, %f.\n", i, j, mtxv ); exit( 1 ); } #endif mtx[i][j-i] = mtxv; } } } #if TEST for( i=0; i %d\n", postoshiftfrom, postoshiftto ); for( j=0; j %d\n", postoshiftfrom, postoshiftto ); for( j=0; j 1000 || nadd > 1000 ) use_fft = 0; // if( norg > 1000 ) use_fft = 0; fullseqlen = alloclen = nlenmax*4+1; //chuui! seq = AllocateCharMtx( njob, alloclen ); name = AllocateCharMtx( njob, B+1 ); nlen = AllocateIntVec( njob ); ndeleted = 0; if( multidist || tuplesize > 0 ) { iscore = AllocateFloatHalfMtx( norg ); nscore = AllocateFloatMtx( norg, nadd ); } else { iscore = AllocateFloatHalfMtx( njob ); nscore = NULL; } kozoarivec = AllocateCharVec( njob ); ordertable = AllocateIntVec( norg+1 ); if( constraint ) { #if SMALLMEMORY if( multidist ) { localhomtable = (LocalHom **)calloc( norg, sizeof( LocalHom *) ); for( i=0; i 0 ) // if mtx is internally computed { if( multidist == 1 ) { ktupledistancematrix( njob, norg, nlenmax, seq, name, iscore, nscore ); // iscore ha muda. // hat2p = fopen( "hat2-1", "w" ); // WriteFloatHat2_pointer_halfmtx( hat2p, njob, name, iscore ); // fclose( hat2p ); dndpre( norg, seq, iscore ); // fprintf( stderr, "Loading 'hat2i' (aligned sequences) ... " ); // prep = fopen( "hat2i", "r" ); // if( prep == NULL ) ErrorExit( "Make hat2i." ); // readhat2_doublehalf_pointer( prep, njob-nadd, name, iscore ); // fclose( prep ); // fprintf( stderr, "done.\n" ); // hat2p = fopen( "hat2-2", "w" ); // WriteFloatHat2_pointer_halfmtx( hat2p, norg, name, iscore ); // fclose( hat2p ); } else { ktupledistancematrix( njob, norg, nlenmax, seq, name, iscore, nscore ); } } else { if( multidist == 1 ) { fprintf( stderr, "Loading 'hat2n' (aligned sequences - new sequences) ... " ); prep = fopen( "hat2n", "r" ); if( prep == NULL ) ErrorExit( "Make hat2n." ); readhat2_doublehalf_part_pointer( prep, njob, nadd, name, nscore ); fclose( prep ); fprintf( stderr, "done.\n" ); fprintf( stderr, "Loading 'hat2i' (aligned sequences) ... " ); prep = fopen( "hat2i", "r" ); if( prep == NULL ) ErrorExit( "Make hat2i." ); readhat2_doublehalf_pointer( prep, njob-nadd, name, iscore ); fclose( prep ); fprintf( stderr, "done.\n" ); } else { fprintf( stderr, "Loading 'hat2' ... " ); prep = fopen( "hat2", "r" ); if( prep == NULL ) ErrorExit( "Make hat2." ); readhat2_doublehalf_pointer( prep, njob, name, iscore ); fclose( prep ); fprintf( stderr, "done.\n" ); } } #if 1 if( distout ) { fprintf( stderr, "Writing distances between new sequences and existing msa.\n" ); hat2p = fopen( "hat2", "w" ); if( multidist || tuplesize > 0 ) { for( iadd=0; iadd 0.03 ) { fprintf( stderr, "################################################################################\n" ); fprintf( stderr, "# \n" ); fprintf( stderr, "# The reference MSA has >3%% ambiguous columns.\n" ); fprintf( stderr, "# Please prepare a better reference.\n" ); fprintf( stderr, "# \n" ); fprintf( stderr, "################################################################################\n" ); exit( 1 ); } if( keeplength && mapout ) { addbk = (char **)calloc( nadd+1, sizeof( char * ) ); for( i=0; i 1 ) cnctintvec( ordertable, topol[norg-2][0], topol[norg-2][1] ); else { ordertable[0] = 0; ordertable[1] = -1; } FreeFloatHalfMtx( iscoreo, norg ); #ifdef enablemultithread if( nthread ) { pthread_t *handle; pthread_mutex_t mutex_counter; thread_arg_t *targ; int *iaddsharept; targ = calloc( nthread, sizeof( thread_arg_t ) ); handle = calloc( nthread, sizeof( pthread_t ) ); pthread_mutex_init( &mutex_counter, NULL ); iaddsharept = calloc( 1, sizeof(int) ); *iaddsharept = 0; for( i=0; i 0 ) { FreeFloatHalfMtx( iscore, norg ); FreeFloatMtx( nscore ); } else { FreeFloatHalfMtx( iscore, njob ); } // for( i=0; i%s (%d) \n%s\n", name[norg+i], norg+i, seq[norg+i] ); if( treeout ) { fp = fopen( "infile.tree", "a" ); if( fp == 0 ) { fprintf( stderr, "File error!\n" ); exit( 1 ); } for( i=0; i %d\n", follower[i][j]+norg, i ); } fclose( orderfp ); posmap = AllocateIntVec( lenfull+2 ); realign = calloc( lenfull+2, sizeof( Blocktorealign ) ); for( i=0; i= fullseqlen ) { fullseqlen = tmplen * 2+1; // fprintf( stderr, "Length over!\n" ); // fprintf( stderr, "strlen(tmpseq1)=%d\n", (int)strlen( tmpseq1 ) ); fprintf( stderr, "reallocating..." ); // fprintf( stderr, "alloclen=%d\n", alloclen ); // fprintf( stderr, "Please recompile!\n" ); // exit( 1 ); for( i=0; i 0 && ien > 500 ) { gaplist2alnxthread_arg_t *targ; int jobpos; pthread_t *handle; pthread_mutex_t mutex; fprintf( stderr, "%d / %d (threads %d-%d)\r", iadd, nadd, 0, nthread ); targ = calloc( nthread, sizeof( gaplist2alnxthread_arg_t ) ); handle = calloc( nthread, sizeof( pthread_t ) ); pthread_mutex_init( &mutex, NULL ); jobpos = 1; for( i=0; i%s (iadd=%d)\n%s\n", name[i], iadd, tmpseq1 ); strcpy( seq[i], tmpseq1 ); } } } tmpseq1 = tmpseq[0]; // insertgapsbyotherfragments_simple( lenfull, tmpseq1, seq[norg+iadd], newgaplist_o[iadd], posmap ); insertgapsbyotherfragments_compact( lenfull, tmpseq1, seq[norg+iadd], newgaplist_o[iadd], posmap ); // fprintf( stderr, "%d = %s\n", iadd, tmpseq1 ); eq2dash( tmpseq1 ); strcpy( seq[norg+iadd], tmpseq1 ); // adjustposmap( lenfull, posmap, newgaplist_o[iadd] ); adjustposmap( lenfull, posmap, newgaplist_compact ); countnewres( lenfull, realign, posmap, newgaplist_o[iadd] ); // muda? // countnewres( lenfull, realign, posmap, newgaplist_compact ); // muda? } fprintf( stderr, "\r done. \n\n" ); #if 0 for( i=0; i%s\n", name[i] ); fprintf( stderr, "%s\n", seq[i] ); } #endif #if 0 fprintf( stderr, "realign[].nnewres = " ); for( i=0; i 1 ) { // fprintf( stderr, "i=%d: %d-%d\n", i, realign[i].start, realign[i].end ); fprintf( stderr, "\rRealigning %d/%d \r", i, lenfull ); // zure = dorealignment_compact( realign+i, seq, &fullseqlen, norg ); // zure = dorealignment_order( realign+i, seq, &fullseqlen, norg, ordertable, follows ); zure = dorealignment_tree( realign+i, seq, &fullseqlen, norg, topol, follows ); #if 0 gappick0( check1, seq[0] ); fprintf( stderr, "check1 = %s\n", check1 ); if( strcmp( check1, check2 ) ) { fprintf( stderr, "CHANGED!!!!!\n" ); exit( 1 ); } #endif for( j=i+1; j 0 ) { reporterr( "\nTo keep the alignment length, %d letters were DELETED.\n", ndeleted ); if( mapout ) reporterr( "The deleted letters are shown in the (filename).map file.\n" ); else reporterr( "To know the positions of deleted letters, rerun the same command with the --mapout option.\n" ); } return( 0 ); }