#include "mltaln.h" #define REPORTCOSTS 0 #define DEBUG 0 #define IODEBUG 0 #define SCOREOUT 0 #define SKIP 1 #define ITERATIVECYCLE 2 #define END_OF_VEC -1 static int treein; static int topin; static int treeout; static int noalign; static int distout; static int tuplesize; static int subalignment; static int subalignmentoffset; static int nguidetree; static int sparsepickup; static int keeplength; static int ndeleted; static int mapout; static int smoothing; static double maxdistmtxsize; static int nthreadtb; static int useexternalanchors; static int oneiteration; static double maxanchorseparation; #if 0 #define PLENFACA 0.0123 #define PLENFACB 10252 #define PLENFACC 10822 #define PLENFACD 0.5 #define DLENFACA 0.01 #define DLENFACB 2445 #define DLENFACC 2412 #define DLENFACD 0.1 #else #define PLENFACA 0.01 #define PLENFACB 10000 #define PLENFACC 10000 #define PLENFACD 0.1 #define D6LENFACA 0.01 #define D6LENFACB 2500 #define D6LENFACC 2500 #define D6LENFACD 0.1 #define D10LENFACA 0.01 #define D10LENFACB 1000000 #define D10LENFACC 1000000 #define D10LENFACD 0.0 #endif typedef struct _jobtable { int i; int j; } Jobtable; typedef struct _msacompactdistmtxthread_arg { int njob; int thread_no; int *selfscore; double **partmtx; char **seq; int **skiptable; double *mindist; int *mindistfrom; int *jobpospt; #ifdef enablemultithread pthread_mutex_t *mutex; #endif } msacompactdistmtxthread_arg_t; typedef struct _compactdistmtxthread_arg { int njob; int thread_no; int *nogaplen; int **pointt; int *selfscore; double **partmtx; int *jobpospt; double *mindist; int *mindistfrom; #ifdef enablemultithread pthread_mutex_t *mutex; #endif } compactdistmtxthread_arg_t; typedef struct _msadistmtxthread_arg { int njob; int thread_no; int *selfscore; double **iscore; double **partmtx; char **seq; int **skiptable; Jobtable *jobpospt; #ifdef enablemultithread pthread_mutex_t *mutex; #endif } msadistmtxthread_arg_t; #ifdef enablemultithread // ue futatsu ha singlethread demo tsukau typedef struct _treebasethread_arg { int thread_no; int njob; int *nrunpt; int *nlen; int *jobpospt; int ***topol; Treedep *dep; double ***cpmxhist; int **memhist; char **aseq; double *effarr; int *alloclenpt; int *fftlog; char *mergeoralign; double **newdistmtx; int *selfscore; ExtAnch *extanch; int **anchindex; pthread_mutex_t *mutex; pthread_cond_t *treecond; } treebasethread_arg_t; typedef struct _distancematrixthread_arg { int thread_no; int njob; int *jobpospt; int **pointt; double **mtx; pthread_mutex_t *mutex; } distancematrixthread_arg_t; #endif void arguments( int argc, char *argv[] ) { int c; nthread = 1; nthreadpair = 1; nthreadtb = 1; outnumber = 0; topin = 0; treein = 0; treeout = 0; distout = 0; noalign = 0; nevermemsave = 0; inputfile = NULL; nadd = 0; addprofile = 1; fftkeika = 0; constraint = 0; nblosum = 62; fmodel = 0; calledByXced = 0; devide = 0; use_fft = 0; useexternalanchors = 0; oneiteration = 0; force_fft = 0; fftscore = 1; fftRepeatStop = 0; fftNoAnchStop = 0; weight = 3; utree = 1; tbutree = 1; refine = 0; check = 1; cut = 0.0; disp = 0; outgap = 1; alg = 'A'; mix = 0; tbitr = 0; scmtd = 5; tbweight = 0; tbrweight = 3; checkC = 0; treemethod = 'X'; sueff_global = 0.1; contin = 0; scoremtx = 1; kobetsubunkatsu = 0; dorp = NOTSPECIFIED; ppenalty_dist = NOTSPECIFIED; ppenalty = -1530; ppenalty_ex = NOTSPECIFIED; penalty_shift_factor = 1000.0; poffset = -123; kimuraR = NOTSPECIFIED; pamN = NOTSPECIFIED; geta2 = GETA2; fftWinSize = NOTSPECIFIED; fftThreshold = NOTSPECIFIED; TMorJTT = JTT; scoreout = 0; spscoreout = 0; tuplesize = 6; subalignment = 0; subalignmentoffset = 0; legacygapcost = 0; specificityconsideration = 0.0; nguidetree = 1; sparsepickup = 0; keeplength = 0; mapout = 0; smoothing = 0; nwildcard = 0; maxanchorseparation = 1000.0; while( --argc > 0 && (*++argv)[0] == '-' ) { while ( (c = *++argv[0]) ) { switch( c ) { case 'i': inputfile = *++argv; reporterr( "inputfile = %s\n", inputfile ); --argc; goto nextoption; case 'I': nadd = myatoi( *++argv ); reporterr( "nadd = %d\n", nadd ); --argc; goto nextoption; case 'V': ppenalty_dist = (int)( atof( *++argv ) * 1000 - 0.5 ); // fprintf( stderr, "ppenalty = %d\n", ppenalty ); --argc; goto nextoption; case 'f': ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 ); // reporterr( "ppenalty = %d\n", ppenalty ); --argc; goto nextoption; case 'Q': penalty_shift_factor = atof( *++argv ); --argc; goto nextoption; case 'g': ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 ); reporterr( "ppenalty_ex = %d\n", ppenalty_ex ); --argc; goto nextoption; case 'h': poffset = (int)( atof( *++argv ) * 1000 - 0.5 ); // reporterr( "poffset = %d\n", poffset ); --argc; goto nextoption; case 'k': kimuraR = myatoi( *++argv ); reporterr( "kappa = %d\n", kimuraR ); --argc; goto nextoption; case 'b': nblosum = myatoi( *++argv ); scoremtx = 1; // reporterr( "blosum %d / kimura 200 \n", nblosum ); --argc; goto nextoption; case 'j': pamN = myatoi( *++argv ); scoremtx = 0; TMorJTT = JTT; reporterr( "jtt/kimura %d\n", pamN ); --argc; goto nextoption; case 'm': pamN = myatoi( *++argv ); scoremtx = 0; TMorJTT = TM; reporterr( "tm %d\n", pamN ); --argc; goto nextoption; case 'C': nthreadpair = nthread = myatoi( *++argv ); reporterr( "nthread = %d\n", nthread ); reporterr( "nthreadpair = %d\n", nthread ); if( strchr( *argv, '-' ) ) nthreadtb = myatoi( strchr( *argv, '-' )+1 ); else nthreadtb = nthread; reporterr( "nthreadtb = %d\n", nthreadtb ); --argc; goto nextoption; case 's': specificityconsideration = (double)myatof( *++argv ); // reporterr( "specificityconsideration = %f\n", specificityconsideration ); --argc; goto nextoption; #if 1 case 'a': fmodel = 1; break; #endif case 'K': addprofile = 0; break; case 'y': distout = 1; break; case 't': treeout = 1; break; case '^': treeout = 2; break; case 'T': noalign = 1; break; case 'r': oneiteration = 1; break; case 'D': dorp = 'd'; break; case 'P': dorp = 'p'; break; case 'L': legacygapcost = 1; break; case 'e': fftscore = 0; break; case 'x': maxanchorseparation = myatof( *++argv ); --argc; goto nextoption; case 'H': subalignment = 1; subalignmentoffset = myatoi( *++argv ); --argc; goto nextoption; #if 0 case 'R': fftRepeatStop = 1; break; #endif case 'n' : outnumber = 1; break; #if 0 case 's': treemethod = 's'; break; case 'q': treemethod = 'q'; // minimum break; #endif case 'q': sparsepickup = myatoi( *++argv ); // reporterr( "sparsepickup = %d\n", sparsepickup ); --argc; goto nextoption; case 'X': treemethod = 'X'; sueff_global = atof( *++argv ); // fprintf( stderr, "sueff_global = %f\n", sueff_global ); --argc; goto nextoption; case 'E': nguidetree = myatoi( *++argv ); // reporterr( "nguidetree = %d\n", nguidetree ); --argc; goto nextoption; #if 0 case 'a': alg = 'a'; break; case 'H': alg = 'H'; break; case 'R': alg = 'R'; break; #endif case 'A': alg = 'A'; break; case '&': alg = 'a'; break; case '@': alg = 'd'; break; case 'N': nevermemsave = 1; break; case 'M': alg = 'M'; break; #if 0 case 'S' : scoreout = 1; // for checking parallel calculation break; #else case 'S' : spscoreout = 1; // 2014/Dec/30, sp score break; #endif case 'B': // hitsuyou! memopt -M -B no tame break; case 'F': use_fft = 1; break; case 'l': useexternalanchors = 1; case 'G': use_fft = 1; force_fft = 1; break; #if 0 case 'V': topin = 1; break; #endif case 'U': treein = 1; break; case 'u': weight = 0; tbrweight = 0; break; case 'v': tbrweight = 3; break; #if 1 case 'd': disp = 1; break; #endif #if 1 case 'O': outgap = 0; break; #else case 'O': fftNoAnchStop = 1; break; #endif case 'J': tbutree = 0; break; #if 0 case 'z': fftThreshold = myatoi( *++argv ); --argc; goto nextoption; #endif case 'w': fftWinSize = myatoi( *++argv ); --argc; goto nextoption; case 'W': tuplesize = myatoi( *++argv ); --argc; goto nextoption; #if 0 case 'Z': checkC = 1; break; #endif case 'Y': keeplength = 1; break; case 'z': mapout = 2; break; case 'Z': mapout = 1; break; case 'p': smoothing = 1; break; case ':': nwildcard = 1; break; default: reporterr( "illegal option %c\n", c ); argc = 0; break; } } nextoption: ; } if( argc == 1 ) { cut = atof( (*argv) ); argc--; } if( argc != 0 ) { reporterr( "options: Check source file !\n" ); exit( 1 ); } if( tbitr == 1 && outgap == 0 ) { reporterr( "conflicting options : o, m or u\n" ); exit( 1 ); } } static int varpairscore( int nseq, int npick, int nlenmax, char **seq, int seed ) { int i, j, npair; int *slist; char **pickseq; double score; double scoreav; double scoreav2; double scorestd; double scorevar; slist = calloc( nseq, sizeof( int ) ); pickseq = AllocateCharMtx( npick, nlenmax ); reporterr( "nseq = %d, nlenmax=%d, seed=%d\n", nseq, nlenmax, seed ); srand( seed ); for( i=0; i longestlen[i][0] ) { longestlen[i][0] = seqlen[m]; longestseq[i][0] = m; } // reporterr( "%d ", topol[i][0][j] ); } // reporterr( "longest = %d (%d)\n", longestlen[i][0], longestseq[i][0] ); longestlen[i][1] = -1; longestseq[i][1] = -1; for( j=0; (m=topol[i][1][j])!=-1; j++ ) // sukoshi muda { if( seqlen[m] > longestlen[i][1] ) { longestlen[i][1] = seqlen[m]; longestseq[i][1] = m; } // reporterr( "%d ", topol[i][1][j] ); } // reporterr( "longest = %d (%d)\n", longestlen[i][1], longestseq[i][1] ); } m = 1; for( i=n-2; i>-1; i-- ) { // reporterr( "longest[%d][0] = %d (%d)\n", i, longestlen[i][0], longestseq[i][0] ); // reporterr( "longest[%d][1] = %d (%d)\n", i, longestlen[i][1], longestseq[i][1] ); select[longestseq[i][0]] = 1; select[longestseq[i][1]] = 1; m += 1; if( m >= sparsepickup ) break; } for( i=0, k=0, j=0; injob; int thread_no = targ->thread_no; int *selfscore = targ->selfscore; double **partmtx = targ->partmtx; int *nogaplen = targ->nogaplen; int **pointt = targ->pointt; int *jobpospt = targ->jobpospt; double *mindist = targ->mindist; int *mindistfrom = targ->mindistfrom; int i, j; double tmpdist, preference, tmpdistx; //, tmpdisty; int *table1; while( 1 ) { #ifdef enablemultithread if( nthreadpair ) { pthread_mutex_lock( targ->mutex ); i = *jobpospt; if( i == -1 ) { pthread_mutex_unlock( targ->mutex ); commonsextet_p( NULL, NULL ); return( NULL ); } *jobpospt = i-1; pthread_mutex_unlock( targ->mutex ); } else #endif { i = *jobpospt; if( i == -1 ) { commonsextet_p( NULL, NULL ); return( NULL ); } *jobpospt = i-1; } table1 = (int *)calloc( tsize, sizeof( int ) ); if( !table1 ) ErrorExit( "Cannot allocate table1\n" ); if( i % 100 == 0 ) { if( nthreadpair ) reporterr( "\r% 5d / %d (thread %4d)", njob-i, njob, thread_no ); else reporterr( "\r% 5d / %d", njob-i, njob ); } makecompositiontable_p( table1, pointt[i] ); // for( j=i+1; j-1; j-- ) { tmpdist = distcompact( nogaplen[i], nogaplen[j], table1, pointt[j], selfscore[i], selfscore[j] ); preference = preferenceval( i, j, njob ); tmpdistx = tmpdist + preference; if( tmpdistx < mindist[i] ) { mindist[i] = tmpdistx; mindistfrom[i] = j; } // preference = preferenceval( j, i, njob ); // tmpdisty = tmpdist + preference; // if( tmpdisty < mindist[j] ) // { // mindist[j] = tmpdisty; // mindistfrom[j] = i; // } if( partmtx[i] ) partmtx[i][j] = tmpdist; if( partmtx[j] ) partmtx[j][i] = tmpdist; } free( table1 ); } } static void *ylcompactdisthalfmtxthread( void *arg ) // enablemultithread == 0 demo tsukau { compactdistmtxthread_arg_t *targ = (compactdistmtxthread_arg_t *)arg; int njob = targ->njob; int thread_no = targ->thread_no; int *selfscore = targ->selfscore; double **partmtx = targ->partmtx; int *nogaplen = targ->nogaplen; int **pointt = targ->pointt; int *jobpospt = targ->jobpospt; double *mindist = targ->mindist; int *mindistfrom = targ->mindistfrom; int i, j; double tmpdist, preference, tmpdistx, tmpdisty; int *table1; while( 1 ) { #ifdef enablemultithread if( nthreadpair ) { pthread_mutex_lock( targ->mutex ); i = *jobpospt; if( i == njob-1 ) { pthread_mutex_unlock( targ->mutex ); commonsextet_p( NULL, NULL ); return( NULL ); } *jobpospt = i+1; pthread_mutex_unlock( targ->mutex ); } else #endif { i = *jobpospt; if( i == njob-1 ) { commonsextet_p( NULL, NULL ); return( NULL ); } *jobpospt = i+1; } table1 = (int *)calloc( tsize, sizeof( int ) ); if( !table1 ) ErrorExit( "Cannot allocate table1\n" ); if( i % 100 == 0 ) { if( nthreadpair ) reporterr( "\r% 5d / %d (thread %4d)", i+1, njob, thread_no ); else reporterr( "\r% 5d / %d", i+1, njob ); } makecompositiontable_p( table1, pointt[i] ); for( j=i+1; j-1; j-- ) { tmpdist = distcompact( nogaplen[i], nogaplen[j], table1, pointt[j], selfscore[i], selfscore[j] ); preference = preferenceval( i, j, njob ); tmpdistx = tmpdist + preference; if( tmpdistx < mindist[i] ) { mindist[i] = tmpdistx; mindistfrom[i] = j; } preference = preferenceval( j, i, njob ); tmpdisty = tmpdist + preference; if( tmpdisty < mindist[j] ) { mindist[j] = tmpdisty; mindistfrom[j] = i; } if( partmtx[i] ) partmtx[i][j] = tmpdist; if( partmtx[j] ) partmtx[j][i] = tmpdist; } free( table1 ); } } static void *msacompactdisthalfmtxthread( void *arg ) // enablemultithread == 0 demo tsukau { msacompactdistmtxthread_arg_t *targ = (msacompactdistmtxthread_arg_t *)arg; int njob = targ->njob; int thread_no = targ->thread_no; int *selfscore = targ->selfscore; double **partmtx = targ->partmtx; char **seq = targ->seq; int **skiptable = targ->skiptable; double *mindist = targ->mindist; int *mindistfrom = targ->mindistfrom; int *jobpospt = targ->jobpospt; double tmpdist, preference, tmpdistx; //, tmpdisty; int i, j; while( 1 ) { #ifdef enablemultithread if( nthreadpair ) { pthread_mutex_lock( targ->mutex ); i = *jobpospt; if( i == -1 ) { pthread_mutex_unlock( targ->mutex ); return( NULL ); } *jobpospt = i-1; pthread_mutex_unlock( targ->mutex ); } else #endif { i = *jobpospt; if( i == -1 ) { return( NULL ); } *jobpospt = i-1; } if( i % 100 == 0 ) { if( nthreadpair ) fprintf( stderr, "\r% 5d / %d (thread %4d)", njob-i, njob, thread_no ); else fprintf( stderr, "\r% 5d / %d", i, njob ); } for( j=i-1; j>-1; j-- ) // for( j=i+1; jnjob; int thread_no = targ->thread_no; int *selfscore = targ->selfscore; double **partmtx = targ->partmtx; char **seq = targ->seq; int **skiptable = targ->skiptable; double *mindist = targ->mindist; int *mindistfrom = targ->mindistfrom; int *jobpospt = targ->jobpospt; double tmpdist, preference, tmpdistx, tmpdisty; int i, j; while( 1 ) { #ifdef enablemultithread if( nthreadpair ) { pthread_mutex_lock( targ->mutex ); i = *jobpospt; if( i == njob-1 ) { pthread_mutex_unlock( targ->mutex ); return( NULL ); } *jobpospt = i+1; pthread_mutex_unlock( targ->mutex ); } else #endif { i = *jobpospt; if( i == njob-1 ) { return( NULL ); } *jobpospt = i+1; } if( i % 100 == 0 ) { if( nthreadpair ) fprintf( stderr, "\r% 5d / %d (thread %4d)", i, njob, thread_no ); else fprintf( stderr, "\r% 5d / %d", i, njob ); } // for( j=i-1; j>-1; j-- ) for( j=i+1; jnjob; int thread_no = targ->thread_no; int *selfscore = targ->selfscore; double **iscore = targ->iscore; char **seq = targ->seq; int **skiptable = targ->skiptable; Jobtable *jobpospt = targ->jobpospt; double ssi, ssj, bunbo, iscoretmp; int i, j; int nlim = njob-1; while( 1 ) { #ifdef enablemultithread if( nthreadpair ) { pthread_mutex_lock( targ->mutex ); i = jobpospt->i; // (jobpospt-i)++ dato, shuuryou hantei no mae ni ++ surunode, tomaranakunaru. if( i == nlim ) { pthread_mutex_unlock( targ->mutex ); return( NULL ); } jobpospt->i += 1; pthread_mutex_unlock( targ->mutex ); if( i % 100 == 0 ) fprintf( stderr, "\r% 5d / %d (thread %4d)", i, njob, thread_no ); } else #endif { i = (jobpospt->i)++; if( i == nlim ) return( NULL ); if( i % 100 == 0 ) fprintf( stderr, "\r% 5d / %d", i, njob ); } ssi = selfscore[i]; for( j=i+1; j 10 ) iscoretmp = 10.0; // 2015/Mar/17 } if( iscoretmp < 0.0 ) { reporterr( "WARNING: negative distance, iscoretmp = %f\n", iscoretmp ); iscoretmp = 0.0; } iscore[i][j-i] = iscoretmp; // printf( "i,j=%d,%d, iscoretmp=%f\n", i, j, iscoretmp ); } } } #else static void *msadistmtxthread( void *arg ) // enablemultithread == 0 demo tsukau { msadistmtxthread_arg_t *targ = (msadistmtxthread_arg_t *)arg; int njob = targ->njob; int thread_no = targ->thread_no; int *selfscore = targ->selfscore; double **iscore = targ->iscore; char **seq = targ->seq; int **skiptable = targ->skiptable; Jobtable *jobpospt = targ->jobpospt; double ssi, ssj, bunbo, iscoretmp; int i, j; while( 1 ) { #ifdef enablemultithread if( nthreadpair ) pthread_mutex_lock( targ->mutex ); #endif j = jobpospt->j; i = jobpospt->i; j++; if( j == njob ) { i++; j = i + 1; if( i == njob-1 ) { #ifdef enablemultithread if( nthreadpair ) pthread_mutex_unlock( targ->mutex ); #endif return( NULL ); } } jobpospt->j = j; jobpospt->i = i; #ifdef enablemultithread if( nthreadpair ) pthread_mutex_unlock( targ->mutex ); #endif if( nthreadpair ) { if( j==i+1 && i % 10 == 0 ) fprintf( stderr, "\r% 5d / %d (thread %4d)", i, njob, thread_no ); } else { if( j==i+1 && i % 10 == 0 ) fprintf( stderr, "\r% 5d / %d", i, njob ); } ssi = selfscore[i]; ssj = selfscore[j]; bunbo = MIN( ssi, ssj ); //fprintf( stderr, "bunbo = %f\n", bunbo ); //fprintf( stderr, "naivepairscorefast() = %f\n", naivepairscorefast( seq[i], seq[j], skiptable[i], skiptable[j], penalty_dist ) ); if( bunbo == 0.0 ) iscoretmp = 2.0; // 2013/Oct/17 else { iscoretmp = ( 1.0 - naivepairscorefast( seq[i], seq[j], skiptable[i], skiptable[j], penalty_dist ) / bunbo ) * 2.0; // 2014/Aug/15 fast if( iscoretmp > 10 ) iscoretmp = 10.0; // 2015/Mar/17 } iscore[i][j-i] = iscoretmp; } } #endif #ifdef enablemultithread static void *distancematrixthread( void *arg ) { distancematrixthread_arg_t *targ = (distancematrixthread_arg_t *)arg; int thread_no = targ->thread_no; int njob = targ->njob; int *jobpospt = targ->jobpospt; int **pointt = targ->pointt; double **mtx = targ->mtx; int *table1; int i, j; while( 1 ) { pthread_mutex_lock( targ->mutex ); i = *jobpospt; if( i == njob ) { pthread_mutex_unlock( targ->mutex ); commonsextet_p( NULL, NULL ); return( NULL ); } *jobpospt = i+1; pthread_mutex_unlock( targ->mutex ); table1 = (int *)calloc( tsize, sizeof( int ) ); if( !table1 ) ErrorExit( "Cannot allocate table1\n" ); if( i % 100 == 0 ) { reporterr( "\r% 5d / %d (thread %4d)", i+1, njob, thread_no ); } makecompositiontable_p( table1, pointt[i] ); for( j=i; j-1; j++ ) { if( pairanch[j].i == k ) { // reporterr( "pairanch[%d].endi: %d->%d\n", j, pairanch[j].endi, map[pairanch[j].endi] ); pairanch[j].starti = map[pairanch[j].starti]; pairanch[j].endi = map[pairanch[j].endi]; } } } free( map ); len = strlen( seq2[0] )+1; map = calloc( sizeof( int ), len ); for( k=0; k-1; j++ ) { if( pairanch[j].j == k ) { // reporterr( "pairanch[%d].endj: %d->%d\n", j, pairanch[j].endj, map[pairanch[j].endj] ); pairanch[j].startj = map[pairanch[j].startj]; pairanch[j].endj = map[pairanch[j].endj]; } } } free( map ); } static int anchidcomp( const void *p, const void *q ) { if ( ((ExtAnch *)q)->i != ((ExtAnch *)p)->i ) return ((ExtAnch *)p)->i - ((ExtAnch *)q)->i; return ((ExtAnch *)p)->j - ((ExtAnch *)q)->j; } static int anchcomp( const void *p, const void *q ) { if ( ((ExtAnch *)q)->starti != ((ExtAnch *)p)->starti ) return ((ExtAnch *)p)->starti - ((ExtAnch *)q)->starti; return (int)((void *)p - (void *)q); } static int anchscorecomp( const void *p, const void *q ) { if ( ((ExtAnch *)q)->score != ((ExtAnch *)p)->score ) return ((ExtAnch *)q)->score - ((ExtAnch *)p)->score; return (int)((void *)q - (void *)p); } static void indexanchors( ExtAnch *a, int **idx ) { int n; for( n=0; a[n].i>-1; n++ ) ; qsort( a, n, sizeof( ExtAnch ), anchidcomp ); for( n=0; a[n].i>-1; n++ ) { // reporterr( "%d, %dx%d, %d-%d x %d-%d\n", n, a[n].i, a[n].j, a[n].starti, a[n].endi, a[n].startj, a[n].endj ); if( idx[a[n].i][a[n].j] == -1 ) idx[a[n].i][a[n].j] = n; } #if 0 int m; for( n=0; n %d\n", n, m, idx[n][m] ); exit( 1 ); #endif } #if 0 static void checkanchors_internal( ExtAnch *a ) { int p, q, r, s; int i, j; int consistent; int m; #if 0 reporterr( "before sortscore\n" ); for( p=0; a[p].i>-1; p++ ) { reporterr( "a[%d].starti,j=%d,%d, score=%d\n", p, a[p].starti, a[p].startj, a[p].score ); } #endif for( r=0; a[r].i>-1; ) { i = a[r].i; j = a[r].j; s = r; for( ; i==a[r].i && j==a[r].j; r++ ) ; // reporterr( "s=%d, r=%d\n", s, r ); qsort( a+s, r-s, sizeof( ExtAnch ), anchscorecomp ); #if 0 reporterr( "after sortscore, r=%d\n", r ); for( p=s; p m ) m = a[q].score; // reporterr( "INconsistent\n" ); // reporterr( "p=%d, q=%d\n", p, q ); // reporterr( "p: a[%d].regi,regj=%d-%d,%d-%d, score=%d\n", p, a[p].starti, a[p].endi, a[p].startj, a[p].endj, a[p].score ); // reporterr( "q: a[%d].regi,regj=%d-%d,%d-%d, score=%d\n", q, a[q].starti, a[q].endi, a[q].startj, a[q].endj, a[q].score ); // a[q].starti = a[q].startj = a[q].startj = a[q].endj = -1; // a[q].score = a[p].score - a[q].score; // ?? // a[q].score = ( a[p].score + a[q].score ) / 2; // ?? a[q].score = 0; } } if( !consistent ) // a[p].score = ( a[p].score + m ) / 2; // >= 0 a[p].score -= m; // >= 0 // a[p].score = 0; } } #if 0 reporterr( "after filtering\n" ); for( p=0; a[p].i>-1; p++ ) { reporterr( "a[%d].starti,j=%d,%d, score=%d\n", p, a[p].starti, a[p].startj, a[p].score ); } exit( 1 ); #endif } #endif static void checkanchors_strongestfirst( ExtAnch *a, int s, double gapratio1, double gapratio2 ) { int p, q; double zureij; double nogaplenestimation1; double nogaplenestimation2; #if 0 reporterr( "before sortscore\n" ); for( p=0; a[p].i>-1; p++ ) { reporterr( "a[%d].starti,j=%d,%d, score=%d\n", p, a[p].starti, a[p].startj, a[p].score ); } #endif qsort( a, s, sizeof( ExtAnch ), anchscorecomp ); nogaplenestimation1 = (double)a[0].starti / (1.0+gapratio1); nogaplenestimation2 = (double)a[0].startj / (1.0+gapratio2); zureij = nogaplenestimation1 - nogaplenestimation2; for( p=0; a[p].i>-1; p++ ) { if( a[p].starti == -1 ) continue; #if 0 nogaplenestimation1 = (double)a[p].starti / (1.0+gapratio1); nogaplenestimation2 = (double)a[p].startj / (1.0+gapratio2); if( fabs( zureij - ( nogaplenestimation1 - nogaplenestimation2 ) ) > maxanchorseparation ) { // reporterr( "warning: long internal gaps in %d-%d, |%5.2f-%5.2f - %5.2f| = %5.2f > %5.2f\n", a[p].i, a[p].j, nogaplenestimation1, nogaplenestimation2, zureij, fabs( zureij - ( nogaplenestimation1, nogaplenestimation2 ) ), maxanchorseparation ); a[p].starti = a[p].startj = a[p].startj = a[p].endj = -1; continue; } #else int nearest, mindist; double zurei, zurej; if( p ) { mindist = 999999999; for( q=0; q maxanchorseparation ) // if( fabs( zurei - zurej ) > maxanchorseparation || zurei > maxanchorseparation || zurej > maxanchorseparation ) // test { // reporterr( "warning: long internal gaps in %d-%d, |%5.2f-%5.2f - %5.2f| = %5.2f > %5.2f\n", a[p].i, a[p].j, nogaplenestimation1, nogaplenestimation2, zureij, fabs( zureij - ( nogaplenestimation1, nogaplenestimation2 ) ), maxanchorseparation ); a[p].starti = a[p].startj = a[p].startj = a[p].endj = -1; continue; } #endif // reporterr( "P score=%d, %d-%d, %d-%d\n", a[p].score, a[p].starti, a[p].endi, a[p].startj, a[p].endj ); for( q=p+1; a[q].i>-1; q++ ) { if( a[q].starti == -1 ) continue; // reporterr( "Q score=%d, %d-%d, %d-%d\n", a[q].score, a[q].starti, a[q].endi, a[q].startj, a[q].endj ); if( a[p].endi < a[q].starti && a[p].endj < a[q].startj ) { // reporterr( "consistent\n" ); ; } else if( a[p].endi == a[q].starti && a[p].endj < a[q].startj && a[q].starti-1; p++ ) { reporterr( "a[%d].starti,j=%d,%d, score=%d\n", p, a[p].starti, a[p].startj, a[p].score ); } #endif } static double gapnongapratio( int n, char **s ) { int i, j, len; char *seq, *pt1, *pt2; double fv, ng; len = strlen( s[0] ); seq = calloc( len+1, sizeof( char ) ); fv = 0.0; ng = 0.0; for( i=0; i jump to %d\n", i, j, m1[i], m2[j], anchindex[m1[i]][m2[j]] ); k = anchindex[m1[i]][m2[j]]; while( ( k!=-1 ) && ( extanch[k].i == m1[i] && extanch[k].j == m2[j] ) ) { s++; k++; } } else { // reporterr( "%dx%d, %dx%d -> jump to %d\n", j, i, m1[i], m2[j], anchindex[m2[j]][m1[i]] ); k = anchindex[m2[j]][m1[i]]; while( ( k!=-1 ) && ( extanch[k].i == m2[j] && extanch[k].j == m1[i] ) ) { s++; k++; } } #else k = 0; while( extanch[k].i > -1 ) // kanari muda { //reporterr( "m1[i],m2[j]=%d,%d ? extanch[k].i,j=%d,%d k=%d\n", m1[i], m2[j], extanch[k].i, extanch[k].j, k ); if( ( extanch[k].i == m1[i] && extanch[k].j == m2[j] ) || ( extanch[k].i == m2[j] && extanch[k].j == m1[i] ) ) { //reporterr( "hit, extanch[k].startj=%d\n", extanch[k].startj ); s++; } k++; } #endif } *pairanch = calloc( sizeof( ExtAnch ), s+1 ); s = 0; for( i=0; i -1 ) // kanari muda { if( extanch[k].i == m1[i] && extanch[k].j == m2[j] ) { (*pairanch)[s].i = i; (*pairanch)[s].j = j; (*pairanch)[s].starti = extanch[k].starti; // map mae (*pairanch)[s].endi = extanch[k].endi; // map mae (*pairanch)[s].startj = extanch[k].startj; // map mae (*pairanch)[s].endj = extanch[k].endj; // map mae (*pairanch)[s].score = extanch[k].score; s++; } if( extanch[k].j == m1[i] && extanch[k].i == m2[j] ) { (*pairanch)[s].i = i; (*pairanch)[s].j = j; (*pairanch)[s].starti = extanch[k].startj; // map mae (*pairanch)[s].endi = extanch[k].endj; // map mae (*pairanch)[s].startj = extanch[k].starti; // map mae (*pairanch)[s].endj = extanch[k].endi; // map mae (*pairanch)[s].score = extanch[k].score; s++; } k++; } #endif } (*pairanch)[s].i = (*pairanch)[s].j = -1; recountpositions( *pairanch, n1, n2, seq1, seq2 ); // truncateseq_group( *pairanch, seq1, seq2, n1, n2 ); // copybackanchors( *pairanch, ddn1, n2, seq1, seq2 ); // tabun dame #if 0 reporterr( "Before check\n" ); for( k=0; (*pairanch)[k].i>-1; k++ ) { if( (*pairanch)[k].starti!=-1) reporterr( "seq1-%d,seq2-%d %d-%d,%d-%d\n", (*pairanch)[k].i, (*pairanch)[k].j, (*pairanch)[k].starti, (*pairanch)[k].endi, (*pairanch)[k].startj, (*pairanch)[k].endj ); } #endif #if 0 reporterr( "\ngroup1=\n" ); for( i=0; m1[i]>-1; i++ ) reporterr( "%d ", m1[i] ); reporterr( "\n" ); reporterr( "\ngroup2=\n" ); for( i=0; m2[i]>-1; i++ ) reporterr( "%d ", m2[i] ); reporterr( "\n" ); #endif checkanchors_strongestfirst( *pairanch, s, gapnongapratio( n1, seq1 ), gapnongapratio( n2, seq2 ) ); // qsort( *pairanch, s, sizeof( ExtAnch ), anchcomp ); // checkanchors_new( *pairanch ); #if 0 reporterr( "After check\n" ); for( k=0; (*pairanch)[k].i>-1; k++ ) { if( (*pairanch)[k].starti!=-1) reporterr( "seq1-%d,seq2-%d %d-%d,%d-%d\n", (*pairanch)[k].i, (*pairanch)[k].j, (*pairanch)[k].starti, (*pairanch)[k].endi, (*pairanch)[k].startj, (*pairanch)[k].endj ); } #endif } static void *treebasethread( void *arg ) { treebasethread_arg_t *targ = (treebasethread_arg_t *)arg; int thread_no = targ->thread_no; int *nrunpt = targ->nrunpt; int njob = targ->njob; int *nlen = targ->nlen; int *jobpospt = targ->jobpospt; int ***topol = targ->topol; Treedep *dep = targ->dep; double ***cpmxhist = targ->cpmxhist; int **memhist = targ->memhist; char **aseq = targ->aseq; double *effarr = targ->effarr; int *alloclen = targ->alloclenpt; int *fftlog = targ->fftlog; char *mergeoralign = targ->mergeoralign; double **newdistmtx = targ->newdistmtx; int *selfscore = targ->selfscore; ExtAnch *extanch = targ->extanch; int **anchindex = targ->anchindex; char **mseq1, **mseq2; char **localcopy; int i, m, j, l; int immin, immax; int len1, len2; int clus1, clus2; double pscore, tscore; char *indication1, *indication2; double *effarr1 = NULL; double *effarr2 = NULL; // double dumfl = 0.0; double dumdb = 0.0; int ffttry; int m1, m2; double **dynamicmtx; int ssi, ssm, bunbo; int tm, ti; int **localmem = NULL; double ***cpmxchild0, ***cpmxchild1; double orieff1, orieff2; ExtAnch *pairanch = NULL; #if SKIP int **skiptable1 = NULL, **skiptable2 = NULL; #endif #if 0 int i, j; #endif tscore = 0; mseq1 = AllocateCharMtx( njob, 0 ); mseq2 = AllocateCharMtx( njob, 0 ); localcopy = calloc( njob, sizeof( char * ) ); for( i=0; imutex ); l = *jobpospt; if( l == njob-1 ) { pthread_mutex_unlock( targ->mutex ); if( commonIP ) FreeIntMtx( commonIP ); commonIP = NULL; Falign( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, 0, NULL ); Falign_udpari_long( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL ); Falign_givenanchors( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL ); A__align( NULL, 0, 0, NULL, NULL, NULL, NULL, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, 0, -1, -1, NULL, NULL, NULL, 0.0, 0.0 ); D__align( NULL, NULL, NULL, NULL, NULL, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, 0 ); G__align11( NULL, NULL, NULL, 0, 0, 0 ); // iru? free( mseq1 ); free( mseq2 ); free( localcopy ); free( effarr1 ); free( effarr2 ); free( indication1 ); free( indication2 ); if( specificityconsideration ) FreeDoubleMtx( dynamicmtx ); free( localmem ); return( NULL ); } *jobpospt = l+1; // reporterr( "l=%d, child0=%d, child1=%d\n", l, dep[l].child0, dep[l].child1 ); if( dep[l].child0 != -1 ) { while( dep[dep[l].child0].done == 0 ) pthread_cond_wait( targ->treecond, targ->mutex ); } if( dep[l].child1 != -1 ) { while( dep[dep[l].child1].done == 0 ) pthread_cond_wait( targ->treecond, targ->mutex ); } // while( *nrunpt >= nthread ) // bug while( *nrunpt >= nthreadtb ) // tabun iranai pthread_cond_wait( targ->treecond, targ->mutex ); // tabun iranai (*nrunpt)++; m1 = topol[l][0][0]; m2 = topol[l][1][0]; #if 0 localmem[0][0] = -1; posinmem=topolorderz( localmem[0], topol, dep, l, 0 ) - localmem[0]; localmem[1][0] = -1; posinmem=topolorderz( localmem[1], topol, dep, l, 1 ) - localmem[1]; #else if( dep[l].child0 == -1 ) { localmem[0] = calloc( sizeof( int ), 2 ); localmem[0][0] = m1; localmem[0][1] = -1; clus1 = 1; } else { localmem[0] = memhist[dep[l].child0]; clus1 = intlen( localmem[0] ); } if( dep[l].child1 == -1 ) { localmem[1] = calloc( sizeof( int ), 2 ); localmem[1][0] = m2; localmem[1][1] = -1; clus2 = 1; } else { localmem[1] = memhist[dep[l].child1]; clus2 = intlen( localmem[1] ); } if( l != njob-2 ) { memhist[l] = calloc( sizeof( int ), clus1+clus2+1 ); intcpy( memhist[l], localmem[0] ); intcpy( memhist[l]+clus1, localmem[1] ); memhist[l][clus1+clus2] = -1; } #endif // moved, 2018/Mar/10. Must be after changing memhist[l] if( mergeoralign[l] == 'n' ) { // reporterr( "SKIP!\n" ); dep[l].done = 1; (*nrunpt)--; pthread_cond_broadcast( targ->treecond ); // free( topol[l][0] ); topol[l][0] = NULL; // free( topol[l][1] ); topol[l][1] = NULL; // free( topol[l] ); topol[l] = NULL; pthread_mutex_unlock( targ->mutex ); free( localmem[0] ); free( localmem[1] ); continue; } // reporterr( "l=%d, dep[l].child0=%d, dep[l].child1=%d\n", l, dep[l].child0, dep[l].child1 ); if( dep[l].child0 == -1 ) cpmxchild0 = NULL; else cpmxchild0 = cpmxhist+dep[l].child0; if( dep[l].child1 == -1 ) cpmxchild1 = NULL; else cpmxchild1 = cpmxhist+dep[l].child1; // reporterr( "cpmxchild0=%p, cpmxchild1=%p\n", cpmxchild0, cpmxchild1 ); // reporterr( "\ndistfromtip = %f\n", dep[l].distfromtip ); if( specificityconsideration ) makedynamicmtx( dynamicmtx, n_dis_consweight_multi, dep[l].distfromtip ); else dynamicmtx = n_dis_consweight_multi; // reporterr( "dynamicmtx[0][1] = %f\n", dynamicmtx[0][1] ); len1 = strlen( aseq[m1] ); len2 = strlen( aseq[m2] ); if( *alloclen <= len1 + len2 ) { reporterr( "\nReallocating.." ); *alloclen = ( len1 + len2 ) + 1000; ReallocateCharMtx( aseq, njob, *alloclen + 10 ); reporterr( "done. *alloclen = %d\n", *alloclen ); } for( i=0; (j=localmem[0][i])!=-1; i++ ) { localcopy[j] = calloc( *alloclen, sizeof( char ) ); strcpy( localcopy[j], aseq[j] ); // localcopy[j] = aseq[j]; } for( i=0; (j=localmem[1][i])!=-1; i++ ) { localcopy[j] = calloc( *alloclen, sizeof( char ) ); strcpy( localcopy[j], aseq[j] ); // localcopy[j] = aseq[j]; } if( !nevermemsave && ( alg != 'M' && ( len1 > 30000 || len2 > 30000 ) ) ) { reporterr( "\nlen1=%d, len2=%d, Switching to the memsave mode\n", len1, len2 ); alg = 'M'; } if( alg == 'M' ) // hoka no thread ga M ni shitakamo shirenainode { // reporterr( "Freeing commonIP (thread %d)\n", thread_no ); if( commonIP ) FreeIntMtx( commonIP ); commonIP = NULL; commonAlloc1 = 0; commonAlloc2 = 0; } pthread_mutex_unlock( targ->mutex ); #if 1 // CHUUI@@@@ clus1 = fastconjuction_noname( localmem[0], localcopy, mseq1, effarr1, effarr, indication1, 0.0, &orieff1 ); clus2 = fastconjuction_noname( localmem[1], localcopy, mseq2, effarr2, effarr, indication2, 0.0, &orieff2 ); #else clus1 = fastconjuction_noweight( topol[l][0], localcopy, mseq1, effarr1, indication1 ); clus2 = fastconjuction_noweight( topol[l][1], localcopy, mseq2, effarr2, indication2 ); #endif #if 0 for( i=0; i 66 ) reporterr( "..." ); reporterr( "\n" ); reporterr( "group2 = %.66s", indication2 ); if( strlen( indication2 ) > 66 ) reporterr( "..." ); reporterr( "\n" ); #endif /* reporterr( "before align all\n" ); display( aseq, njob ); reporterr( "\n" ); reporterr( "before align 1 %s \n", indication1 ); display( mseq1, clus1 ); reporterr( "\n" ); reporterr( "before align 2 %s \n", indication2 ); display( mseq2, clus2 ); reporterr( "\n" ); */ // if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 ); // if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 && clus1 < 1000 && clus2 < 1000); // if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 ); // else ffttry = 0; ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 ); // ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 && clus1 < 5000 && clus2 < 5000); // v6.708 // reporterr( "f=%d, len1/fftlog[m1]=%f, clus1=%d, len2/fftlog[m2]=%f, clus2=%d\n", ffttry, (double)len1/fftlog[m1], clus1, (double)len2/fftlog[m2], clus2 ); // reporterr( "fftlog=%d,%d, ffttry=%d\n", fftlog[m1], fftlog[m2], ffttry ); if( useexternalanchors ) { // reporterr( "%%%% %d vs %d\n", m1, m2 ); pickpairanch( &pairanch, extanch, anchindex, clus1, clus2, localmem[0], localmem[1], mseq1, mseq2 ); // reporterr( "pairanch: %d:%d\n", pairanch[0].starti, pairanch[0].startj ); pscore = Falign_givenanchors( pairanch, NULL, NULL, dynamicmtx, mseq1, mseq2, effarr1, effarr2, NULL, NULL, clus1, clus2, *alloclen, fftlog+m1 ); free( pairanch ); pairanch = NULL; } else if( force_fft || ( use_fft && ffttry ) ) { if( l < 500 || l % 100 == 0 ) reporterr( " f\b\b" ); if( alg == 'M' ) { if( l < 500 || l % 100 == 0 ) reporterr( "m" ); pscore = Falign_udpari_long( NULL, NULL, dynamicmtx, mseq1, mseq2, effarr1, effarr2, NULL, NULL, clus1, clus2, *alloclen, fftlog+m1 ); } else { pscore = Falign( NULL, NULL, dynamicmtx, mseq1, mseq2, effarr1, effarr2, NULL, NULL, clus1, clus2, *alloclen, fftlog+m1, NULL, 0, NULL ); } } else { if( l < 500 || l % 100 == 0 ) reporterr( " d\b\b" ); fftlog[m1] = 0; switch( alg ) { case( 'a' ): pscore = Aalign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen ); break; case( 'M' ): if( l < 500 || l % 100 == 0 ) reporterr( "m" ); if( l < 500 || l % 100 == 0 ) if( ( cpmxchild1 && *cpmxchild1 ) || ( cpmxchild0 && *cpmxchild0 ) ) reporterr( " h" ); // reporterr( "%d-%d", clus1, clus2 ); pscore = MSalignmm( dynamicmtx, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap, cpmxchild0, cpmxchild1, cpmxhist+l, orieff1, orieff2 ); break; case( 'd' ): if( 1 && clus1 == 1 && clus2 == 1 ) { // reporterr( "%d-%d", clus1, clus2 ); pscore = G__align11( dynamicmtx, mseq1, mseq2, *alloclen, outgap, outgap ); } else { // reporterr( "%d-%d", clus1, clus2 ); pscore = D__align_ls( dynamicmtx, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, 0, &dumdb, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap ); } break; case( 'A' ): if( clus1 == 1 && clus2 == 1 ) { // reporterr( "%d-%d", clus1, clus2 ); pscore = G__align11( dynamicmtx, mseq1, mseq2, *alloclen, outgap, outgap ); } else { // reporterr( "%d-%d", clus1, clus2 ); if( l < 500 || l % 100 == 0 ) if( ( cpmxchild1 && *cpmxchild1 ) || ( cpmxchild0 && *cpmxchild0 ) ) reporterr( " h" ); pscore = A__align( dynamicmtx, penalty, penalty_ex, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, 0, &dumdb, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap, -1, -1, cpmxchild0, cpmxchild1, cpmxhist+l, orieff1, orieff2 ); } break; default: ErrorExit( "ERROR IN SOURCE FILE" ); } } #if SCOREOUT reporterr( "score = %10.2f\n", pscore ); #endif tscore += pscore; nlen[m1] = 0.5 * ( nlen[m1] + nlen[m2] ); if( disp ) display( localcopy, njob ); if( newdistmtx ) // tsukawanai { #if 0 reporterr( "group1 = " ); for( i=0; imutex ); dep[l].done = 1; (*nrunpt)--; pthread_cond_broadcast( targ->treecond ); for( i=0; (j=localmem[0][i])!=-1; i++ ) strcpy( aseq[j], localcopy[j] ); for( i=0; (j=localmem[1][i])!=-1; i++ ) strcpy( aseq[j], localcopy[j] ); // reporterr( "at step %d\n", l ); // use_getrusage(); pthread_mutex_unlock( targ->mutex ); for( i=0; (j=localmem[0][i])!=-1; i++ ) { if(localcopy[j] ) free( localcopy[j] ); localcopy[j] = NULL; } for( i=0; (j=localmem[1][i])!=-1; i++ ) { if( localcopy[j] ) free( localcopy[j] ); localcopy[j] = NULL; } // if( topol[l][0] ) free( topol[l][0] ); // topol[l][0] = NULL; // if( topol[l][1] ) free( topol[l][1] ); // topol[l][1] = NULL; // if( topol[l] ) free( topol[l] ); // topol[l] = NULL; // reporterr( "\n" ); free( localmem[0] ); free( localmem[1] ); } #if SCOREOUT reporterr( "totalscore = %10.2f\n\n", tscore ); #endif } #endif static int dooneiteration( int *nlen, char **aseq, int nadd, char *mergeoralign, char **mseq1, char **mseq2, int ***topol, Treedep *dep, int **memhist, double ***cpmxhist, double *effarr, double **newdistmtx, int *selfscore, ExtAnch *extanch, int **anchindex, int *alloclen, int (*callback)(int, int, char*) ) { int l, ll, len1, len2, i, j; int clus1, clus2; double pscore; char *indication1 = NULL, *indication2 = NULL; double *effarr1 = NULL; double *effarr2 = NULL; int *fftlog = NULL; // fixed at 2006/07/26 // double dumfl = 0.0; double dumdb = 0.0; int ffttry; int m1, m2; int *alreadyaligned = NULL; double **dynamicmtx = NULL; int **localmem = NULL; double ***cpmxchild0, ***cpmxchild1; double orieff1, orieff2; double oscore, nscore; ExtAnch *pairanch; char **oseq1, **oseq2; #if SKIP int **skiptable1 = NULL, **skiptable2 = NULL; #endif #if 0 int i, j; #endif if( effarr1 == NULL ) { effarr1 = AllocateDoubleVec( njob ); effarr2 = AllocateDoubleVec( njob ); indication1 = AllocateCharVec( 150 ); indication2 = AllocateCharVec( 150 ); fftlog = AllocateIntVec( njob ); alreadyaligned = AllocateIntVec( njob ); if( specificityconsideration ) dynamicmtx = AllocateDoubleMtx( nalphabets, nalphabets ); localmem = calloc( sizeof( int * ), 2 ); } for( i=0; i 66 ) reporterr( "..." ); reporterr( "\n" ); reporterr( "group2 = %.66s", indication2 ); if( strlen( indication2 ) > 66 ) reporterr( "..." ); reporterr( "\n" ); #endif /* reporterr( "before align all\n" ); display( aseq, njob ); reporterr( "\n" ); reporterr( "before align 1 %s \n", indication1 ); display( mseq1, clus1 ); reporterr( "\n" ); reporterr( "before align 2 %s \n", indication2 ); display( mseq2, clus2 ); reporterr( "\n" ); */ if( !nevermemsave && ( alg != 'M' && ( len1 > 30000 || len2 > 30000 ) ) ) { reporterr( "\nlen1=%d, len2=%d, Switching to the memsave mode\n", len1, len2 ); alg = 'M'; if( commonIP ) FreeIntMtx( commonIP ); commonIP = NULL; commonAlloc1 = 0; commonAlloc2 = 0; } // ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 && clus1 < 1000 && clus2 < 1000); ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 ); // ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 && clus1 < 5000 && clus2 < 5000); // v6.708 // reporterr( "f=%d, len1/fftlog[m1]=%f, clus1=%d, len2/fftlog[m2]=%f, clus2=%d\n", ffttry, (double)len1/fftlog[m1], clus1, (double)len2/fftlog[m2], clus2 ); if( useexternalanchors ) { pickpairanch( &pairanch, extanch, anchindex, clus1, clus2, localmem[0], localmem[1], mseq1, mseq2 ); // reporterr( "pairanch: %d:%d\n", pairanch[0].starti, pairanch[0].startj ); pscore = Falign_givenanchors( pairanch, NULL, NULL, dynamicmtx, mseq1, mseq2, effarr1, effarr2, NULL, NULL, clus1, clus2, *alloclen, fftlog+m1 ); free( pairanch ); pairanch = NULL; } else if( force_fft || ( use_fft && ffttry ) ) { if( l < 500 || l % 100 == 0 ) reporterr( " f\b\b" ); if( alg == 'M' ) { if( l < 500 || l % 100 == 0 ) reporterr( "m" ); pscore = Falign_udpari_long( NULL, NULL, dynamicmtx, mseq1, mseq2, effarr1, effarr2, NULL, NULL, clus1, clus2, *alloclen, fftlog+m1 ); } else { pscore = Falign( NULL, NULL, dynamicmtx, mseq1, mseq2, effarr1, effarr2, NULL, NULL, clus1, clus2, *alloclen, fftlog+m1, NULL, 0, NULL ); // reporterr( "######### mseq1[0] = %s\n", mseq1[0] ); } } else { if( l < 500 || l % 100 == 0 ) reporterr( " d\b\b" ); fftlog[m1] = 0; switch( alg ) { case( 'a' ): pscore = Aalign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen ); break; case( 'M' ): if( l < 500 || l % 100 == 0 ) reporterr( "m" ); if( l < 500 || l % 100 == 0 ) if( ( cpmxchild1 && *cpmxchild1 ) || ( cpmxchild0 && *cpmxchild0 ) ) reporterr( " h" ); // reporterr( "%d-%d", clus1, clus2 ); pscore = MSalignmm( dynamicmtx, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap, cpmxchild0, cpmxchild1, cpmxhist+l, orieff1, orieff2 ); break; case( 'd' ): if( 1 && clus1 == 1 && clus2 == 1 ) { // reporterr( "%d-%d", clus1, clus2 ); pscore = G__align11( dynamicmtx, mseq1, mseq2, *alloclen, outgap, outgap ); } else { // reporterr( "%d-%d", clus1, clus2 ); pscore = D__align_ls( dynamicmtx, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, 0, &dumdb, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap ); } break; case( 'A' ): if( clus1 == 1 && clus2 == 1 ) { // reporterr( "%d-%d", clus1, clus2 ); pscore = G__align11( dynamicmtx, mseq1, mseq2, *alloclen, outgap, outgap ); } else { if( l < 500 || l % 100 == 0 ) if( ( cpmxchild1 && *cpmxchild1 ) || ( cpmxchild0 && *cpmxchild0 ) ) reporterr( " h" ); // reporterr( "\n\n %d - %d (%d x %d) : \n", topol[l][0][0], topol[l][1][0], clus1, clus2 ); pscore = A__align( dynamicmtx, penalty, penalty_ex, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, 0, &dumdb, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap, localmem[0][0], 1, cpmxchild0, cpmxchild1, cpmxhist+l, orieff1, orieff2 ); } break; default: ErrorExit( "ERROR IN SOURCE FILE" ); } } intergroup_score( mseq1, mseq2, effarr1, effarr2, clus1, clus2, strlen( mseq1[0] ), &nscore ); #if SCOREOUT reporterr( "score = %10.2f\n", pscore ); #endif if( nscore < oscore ) { for( i=0; ig1\n%s\n", mseq1[i] ); for( i=0; ig2\n%s\n", mseq2[i] ); exit( 1 ); } #endif // free( topol[l][0] ); topol[l][0] = NULL; // free( topol[l][1] ); topol[l][1] = NULL; // free( topol[l] ); topol[l] = NULL; // reporterr( ">514\n%s\n", aseq[514] ); free( localmem[0] ); free( localmem[1] ); } Falign( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, 0, NULL ); Falign_udpari_long( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL ); Falign_givenanchors( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL ); A__align( NULL, 0, 0, NULL, NULL, NULL, NULL, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, 0, -1, -1, NULL, NULL, NULL, 0.0, 0.0 ); D__align( NULL, NULL, NULL, NULL, NULL, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, 0 ); G__align11( NULL, NULL, NULL, 0, 0, 0 ); // iru? free( effarr1 ); free( effarr2 ); free( indication1 ); free( indication2 ); free( fftlog ); if( specificityconsideration ) FreeDoubleMtx( dynamicmtx ); free( alreadyaligned ); free( localmem ); effarr1 = NULL; return( 0 ); chudan_tbfast: Falign( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, 0, NULL ); Falign_udpari_long( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL ); Falign_givenanchors( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL ); A__align( NULL, 0, 0, NULL, NULL, NULL, NULL, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, 0, -1, -1, NULL, NULL, NULL, 0.0, 0.0 ); D__align( NULL, NULL, NULL, NULL, NULL, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, 0 ); G__align11( NULL, NULL, NULL, 0, 0, 0 ); // iru? if( effarr1 ) free( effarr1 ); effarr1 = NULL; if( effarr2 ) free( effarr2 ); effarr2 = NULL; if( indication1 ) free( indication1 ); indication1 = NULL; if( indication2 ) free( indication2 ); indication2 = NULL; if( fftlog ) free( fftlog ); fftlog = NULL; if( alreadyaligned ) free( alreadyaligned ); alreadyaligned = NULL; if( specificityconsideration ) { if( dynamicmtx ) FreeDoubleMtx( dynamicmtx ); dynamicmtx = NULL; } if( localmem ) free( localmem ); localmem = NULL; #if SKIP if( skiptable1 ) FreeIntMtx( skiptable1 ); skiptable1 = NULL; if( skiptable2 ) FreeIntMtx( skiptable2 ); skiptable2 = NULL; #endif return( 1 ); } static int treebase( int *nlen, char **aseq, int nadd, char *mergeoralign, char **mseq1, char **mseq2, int ***topol, Treedep *dep, int **memhist, double ***cpmxhist, double *effarr, double **newdistmtx, int *selfscore, ExtAnch *extanch, int **anchindex, int *alloclen, int (*callback)(int, int, char*) ) { int l, len1, len2, i, m, immin, immax; int len1nocommongap, len2nocommongap; int clus1, clus2; double pscore, tscore; char *indication1 = NULL, *indication2 = NULL; double *effarr1 = NULL; double *effarr2 = NULL; int *fftlog = NULL; // fixed at 2006/07/26 // double dumfl = 0.0; double dumdb = 0.0; int ffttry; int m1, m2; int *gaplen = NULL; int *gapmap = NULL; int *alreadyaligned = NULL; double **dynamicmtx = NULL; double ssi, ssm, bunbo; int tm, ti; int gapmaplen; int **localmem = NULL; double ***cpmxchild0, ***cpmxchild1; double orieff1, orieff2; ExtAnch *pairanch; #if SKIP int **skiptable1 = NULL, **skiptable2 = NULL; #endif #if 0 int i, j; #endif if( effarr1 == NULL ) { effarr1 = AllocateDoubleVec( njob ); effarr2 = AllocateDoubleVec( njob ); indication1 = AllocateCharVec( 150 ); indication2 = AllocateCharVec( 150 ); fftlog = AllocateIntVec( njob ); gaplen = AllocateIntVec( *alloclen+10 ); gapmap = AllocateIntVec( *alloclen+10 ); alreadyaligned = AllocateIntVec( njob ); if( specificityconsideration ) dynamicmtx = AllocateDoubleMtx( nalphabets, nalphabets ); localmem = calloc( sizeof( int * ), 2 ); } for( i=0; i 0 && dep[l].child0 == l-1 && dep[l].child1 == -1 && dep[dep[l].child0].child1 == -1 ) { localmem[0][clus1] = topol[l-1][1][0]; localmem[0][clus1+1] = -1; localmem[1][0] = topol[l][1][0]; localmem[1][1] = -1; } else { localmem[0][0] = -1; posinmem = topolorderz( localmem[0], topol, dep, l, 0 ) - localmem[0]; localmem[1][0] = -1; posinmem = topolorderz( localmem[1], topol, dep, l, 1 ) - localmem[1]; } #else if( dep[l].child0 == -1 ) { localmem[0] = calloc( sizeof( int ), 2 ); localmem[0][0] = m1; localmem[0][1] = -1; clus1 = 1; } else { localmem[0] = memhist[dep[l].child0]; clus1 = intlen( localmem[0] ); } if( dep[l].child1 == -1 ) { localmem[1] = calloc( sizeof( int ), 2 ); localmem[1][0] = m2; localmem[1][1] = -1; clus2 = 1; } else { localmem[1] = memhist[dep[l].child1]; clus2 = intlen( localmem[1] ); } if( l != njob-2 ) { memhist[l] = calloc( sizeof( int ), clus1+clus2+1 ); intcpy( memhist[l], localmem[0] ); intcpy( memhist[l]+clus1, localmem[1] ); memhist[l][clus1+clus2] = -1; } #endif if( mergeoralign[l] == 'n' ) { // reporterr( "SKIP!\n" ); // free( topol[l][0] ); topol[l][0] = NULL; // free( topol[l][1] ); topol[l][1] = NULL; // free( topol[l] ); topol[l] = NULL; free( localmem[0] ); free( localmem[1] ); continue; } // reporterr( "\ndistfromtip = %f\n", dep[l].distfromtip ); if( specificityconsideration ) makedynamicmtx( dynamicmtx, n_dis_consweight_multi, dep[l].distfromtip ); else dynamicmtx = n_dis_consweight_multi; // makedynamicmtx( dynamicmtx, n_dis_consweight_multi, ( dep[l].distfromtip - 0.2 ) * 3 ); len1 = strlen( aseq[m1] ); len2 = strlen( aseq[m2] ); if( *alloclen < len1 + len2 ) { reporterr( "\nReallocating.." ); *alloclen = ( len1 + len2 ) + 1000; ReallocateCharMtx( aseq, njob, *alloclen + 10 ); gaplen = realloc( gaplen, ( *alloclen + 10 ) * sizeof( int ) ); if( gaplen == NULL ) { reporterr( "Cannot realloc gaplen\n" ); exit( 1 ); } gapmap = realloc( gapmap, ( *alloclen + 10 ) * sizeof( int ) ); if( gapmap == NULL ) { reporterr( "Cannot realloc gapmap\n" ); exit( 1 ); } reporterr( "done. *alloclen = %d\n", *alloclen ); } #if 1 // CHUUI@@@@ clus1 = fastconjuction_noname( localmem[0], aseq, mseq1, effarr1, effarr, indication1, 0.0, &orieff1 ); clus2 = fastconjuction_noname( localmem[1], aseq, mseq2, effarr2, effarr, indication2, 0.0, &orieff2 ); #else clus1 = fastconjuction_noname( topol[l][0], aseq, mseq1, effarr1, effarr, indication1, 0.0 ); clus2 = fastconjuction_noname( topol[l][1], aseq, mseq2, effarr2, effarr, indication2, 0.0 ); // clus1 = fastconjuction_noweight( topol[l][0], aseq, mseq1, effarr1, indication1 ); // clus2 = fastconjuction_noweight( topol[l][1], aseq, mseq2, effarr2, indication2 ); #endif if( mergeoralign[l] == '1' || mergeoralign[l] == '2' ) { newgapstr = "="; } else newgapstr = "-"; len1nocommongap = len1; len2nocommongap = len2; if( mergeoralign[l] == '1' ) // nai { findcommongaps( clus2, mseq2, gapmap ); commongappick( clus2, mseq2 ); len2nocommongap = strlen( mseq2[0] ); } else if( mergeoralign[l] == '2' ) { findcommongaps( clus1, mseq1, gapmap ); commongappick( clus1, mseq1 ); len1nocommongap = strlen( mseq1[0] ); } #if 0 for( i=0; i 66 ) reporterr( "..." ); reporterr( "\n" ); reporterr( "group2 = %.66s", indication2 ); if( strlen( indication2 ) > 66 ) reporterr( "..." ); reporterr( "\n" ); #endif /* reporterr( "before align all\n" ); display( aseq, njob ); reporterr( "\n" ); reporterr( "before align 1 %s \n", indication1 ); display( mseq1, clus1 ); reporterr( "\n" ); reporterr( "before align 2 %s \n", indication2 ); display( mseq2, clus2 ); reporterr( "\n" ); */ if( !nevermemsave && ( alg != 'M' && ( len1 > 30000 || len2 > 30000 ) ) ) { reporterr( "\nlen1=%d, len2=%d, Switching to the memsave mode\n", len1, len2 ); alg = 'M'; if( commonIP ) FreeIntMtx( commonIP ); commonIP = NULL; commonAlloc1 = 0; commonAlloc2 = 0; } // if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 ); // if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 && clus1 < 1000 && clus2 < 1000); // if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 ); // else ffttry = 0; ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 ); // reporterr( "f=%d, len1/fftlog[m1]=%f, clus1=%d, len2/fftlog[m2]=%f, clus2=%d\n", ffttry, (double)len1/fftlog[m1], clus1, (double)len2/fftlog[m2], clus2 ); // reporterr( "fftlog=%d,%d, ffttry=%d\n", fftlog[m1], fftlog[m2], ffttry ); if( useexternalanchors ) { pickpairanch( &pairanch, extanch, anchindex, clus1, clus2, localmem[0], localmem[1], mseq1, mseq2 ); // reporterr( "pairanch: %d:%d\n", pairanch[0].starti, pairanch[0].startj ); pscore = Falign_givenanchors( pairanch, NULL, NULL, dynamicmtx, mseq1, mseq2, effarr1, effarr2, NULL, NULL, clus1, clus2, *alloclen, fftlog+m1 ); free( pairanch ); pairanch = NULL; } else if( force_fft || ( use_fft && ffttry ) ) { if( l < 500 || l % 100 == 0 ) reporterr( " f\b\b" ); if( alg == 'M' ) { if( l < 500 || l % 100 == 0 ) reporterr( "m" ); pscore = Falign_udpari_long( NULL, NULL, dynamicmtx, mseq1, mseq2, effarr1, effarr2, NULL, NULL, clus1, clus2, *alloclen, fftlog+m1 ); } else { pscore = Falign( NULL, NULL, dynamicmtx, mseq1, mseq2, effarr1, effarr2, NULL, NULL, clus1, clus2, *alloclen, fftlog+m1, NULL, 0, NULL ); // reporterr( "######### mseq1[0] = %s\n", mseq1[0] ); } } else { if( l < 500 || l % 100 == 0 ) reporterr( " d\b\b" ); fftlog[m1] = 0; switch( alg ) { case( 'a' ): pscore = Aalign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen ); break; case( 'M' ): if( l < 500 || l % 100 == 0 ) reporterr( "m" ); if( l < 500 || l % 100 == 0 ) if( ( cpmxchild1 && *cpmxchild1 ) || ( cpmxchild0 && *cpmxchild0 ) ) reporterr( " h" ); // reporterr( "%d-%d", clus1, clus2 ); pscore = MSalignmm( dynamicmtx, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap, cpmxchild0, cpmxchild1, cpmxhist+l, orieff1, orieff2 ); break; case( 'd' ): if( 1 && clus1 == 1 && clus2 == 1 ) { // reporterr( "%d-%d", clus1, clus2 ); pscore = G__align11( dynamicmtx, mseq1, mseq2, *alloclen, outgap, outgap ); } else { // reporterr( "%d-%d", clus1, clus2 ); pscore = D__align_ls( dynamicmtx, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, 0, &dumdb, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap ); } break; case( 'A' ): if( clus1 == 1 && clus2 == 1 ) { // reporterr( "%d-%d", clus1, clus2 ); pscore = G__align11( dynamicmtx, mseq1, mseq2, *alloclen, outgap, outgap ); } else { if( l < 500 || l % 100 == 0 ) if( ( cpmxchild1 && *cpmxchild1 ) || ( cpmxchild0 && *cpmxchild0 ) ) reporterr( " h" ); // reporterr( "\n\n %d - %d (%d x %d) : \n", topol[l][0][0], topol[l][1][0], clus1, clus2 ); pscore = A__align( dynamicmtx, penalty, penalty_ex, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, 0, &dumdb, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap, localmem[0][0], 1, cpmxchild0, cpmxchild1, cpmxhist+l, orieff1, orieff2 ); } break; default: ErrorExit( "ERROR IN SOURCE FILE" ); } } #if SCOREOUT reporterr( "score = %10.2f\n", pscore ); #endif tscore += pscore; nlen[m1] = 0.5 * ( nlen[m1] + nlen[m2] ); // writePre( njob, name, nlen, aseq, 0 ); if( disp ) display( aseq, njob ); // reporterr( "\n" ); if( mergeoralign[l] == '1' ) // jissainiha nai. atarashii hairetsu ha saigo dakara. { reporterr( "Check source!!!\n" ); exit( 1 ); } if( mergeoralign[l] == '2' ) { // if( localkeeplength ) ndeleted += deletenewinsertions( clus1, clus2, mseq1, mseq2, NULL ); // for( i=0; iSTEP0 mseq1[%d] = \n%s\n", i, mseq1[i] ); // for( i=0; iSTEP0 mseq2[%d] = \n%s\n", i, mseq2[i] ); gapmaplen = strlen( mseq1[0] )-len1nocommongap+len1; adjustgapmap( gapmaplen, gapmap, mseq1[0] ); #if 0 reporterr( "\n" ); for( i=0; iSTEP1 mseq1[%d] = \n%s\n", i, mseq1[i] ); for( i=0; iSTEP1 mseq2[%d] = \n%s\n", i, mseq2[i] ); #endif // if( clus1 + clus2 < njob ) restorecommongaps( njob, aseq, topol[l][0], topol[l][1], gapmap, *alloclen, '-' ); if( smoothing ) { restorecommongapssmoothly( njob, njob-(clus1+clus2), aseq, localmem[0], localmem[1], gapmap, *alloclen, '-' ); findnewgaps( clus1, 0, mseq1, gaplen ); insertnewgaps_bothorders( njob, alreadyaligned, aseq, localmem[0], localmem[1], gaplen, gapmap, gapmaplen, *alloclen, alg, '-' ); } else { restorecommongaps( njob, njob-(clus1+clus2), aseq, localmem[0], localmem[1], gapmap, *alloclen, '-' ); findnewgaps( clus1, 0, mseq1, gaplen ); insertnewgaps( njob, alreadyaligned, aseq, localmem[0], localmem[1], gaplen, gapmap, *alloclen, alg, '-' ); } #if 0 reporterr( "\n" ); for( i=0; iSTEP3 mseq1[%d] = \n%s\n", i, mseq1[i] ); for( i=0; iSTEP3 mseq2[%d] = \n%s\n", i, mseq2[i] ); #endif #if 0 for( i=0; i-1; i++ ) alreadyaligned[m] = 1; } if( newdistmtx ) // tsukawanai { #if 0 reporterr( "group1 = " ); for( i=0; ig1\n%s\n", mseq1[i] ); for( i=0; ig2\n%s\n", mseq2[i] ); exit( 1 ); } #endif // free( topol[l][0] ); topol[l][0] = NULL; // free( topol[l][1] ); topol[l][1] = NULL; // free( topol[l] ); topol[l] = NULL; // reporterr( ">514\n%s\n", aseq[514] ); free( localmem[0] ); free( localmem[1] ); } #if SCOREOUT reporterr( "totalscore = %10.2f\n\n", tscore ); #endif Falign( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, 0, NULL ); Falign_udpari_long( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL ); Falign_givenanchors( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL ); A__align( NULL, 0, 0, NULL, NULL, NULL, NULL, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, 0, -1, -1, NULL, NULL, NULL, 0.0, 0.0 ); D__align( NULL, NULL, NULL, NULL, NULL, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, 0 ); G__align11( NULL, NULL, NULL, 0, 0, 0 ); // iru? free( effarr1 ); free( effarr2 ); free( indication1 ); free( indication2 ); free( fftlog ); free( gaplen ); free( gapmap ); if( specificityconsideration ) FreeDoubleMtx( dynamicmtx ); free( alreadyaligned ); free( localmem ); effarr1 = NULL; return( 0 ); chudan_tbfast: Falign( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, 0, NULL ); Falign_udpari_long( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL ); Falign_givenanchors( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL ); A__align( NULL, 0, 0, NULL, NULL, NULL, NULL, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, 0, -1, -1, NULL, NULL, NULL, 0.0, 0.0 ); D__align( NULL, NULL, NULL, NULL, NULL, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, 0 ); G__align11( NULL, NULL, NULL, 0, 0, 0 ); // iru? if( effarr1 ) free( effarr1 ); effarr1 = NULL; if( effarr2 ) free( effarr2 ); effarr2 = NULL; if( indication1 ) free( indication1 ); indication1 = NULL; if( indication2 ) free( indication2 ); indication2 = NULL; if( fftlog ) free( fftlog ); fftlog = NULL; if( gaplen ) free( gaplen ); gaplen = NULL; if( gapmap ) free( gapmap ); gapmap = NULL; if( alreadyaligned ) free( alreadyaligned ); alreadyaligned = NULL; if( specificityconsideration ) { if( dynamicmtx ) FreeDoubleMtx( dynamicmtx ); dynamicmtx = NULL; } if( localmem ) free( localmem ); localmem = NULL; #if SKIP if( skiptable1 ) FreeIntMtx( skiptable1 ); skiptable1 = NULL; if( skiptable2 ) FreeIntMtx( skiptable2 ); skiptable2 = NULL; #endif return( 1 ); } static void WriteOptions( FILE *fp ) { if( dorp == 'd' ) fprintf( fp, "DNA\n" ); else { if ( scoremtx == 0 ) fprintf( fp, "JTT %dPAM\n", pamN ); else if( scoremtx == 1 ) fprintf( fp, "BLOSUM %d\n", nblosum ); else if( scoremtx == 2 ) fprintf( fp, "M-Y\n" ); } reporterr( "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 ); if( use_fft ) fprintf( fp, "FFT on\n" ); fprintf( fp, "tree-base method\n" ); if( tbrweight == 0 ) fprintf( fp, "unweighted\n" ); else if( tbrweight == 3 ) fprintf( fp, "clustalw-like weighting\n" ); if( tbitr || tbweight ) { fprintf( fp, "iterate at each step\n" ); if( tbitr && tbrweight == 0 ) fprintf( fp, " unweighted\n" ); if( tbitr && tbrweight == 3 ) fprintf( fp, " reversely weighted\n" ); if( tbweight ) fprintf( fp, " weighted\n" ); fprintf( fp, "\n" ); } fprintf( fp, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 ); if( alg == 'a' ) fprintf( fp, "Algorithm A\n" ); else if( alg == 'A' ) fprintf( fp, "Algorithm A+\n" ); else fprintf( fp, "Unknown algorithm\n" ); if( treemethod == 'X' ) fprintf( fp, "Tree = UPGMA (mix).\n" ); else if( treemethod == 'E' ) fprintf( fp, "Tree = UPGMA (average).\n" ); else if( treemethod == 'q' ) fprintf( fp, "Tree = Minimum linkage.\n" ); else fprintf( fp, "Unknown tree.\n" ); if( use_fft ) { fprintf( fp, "FFT on\n" ); if( dorp == 'd' ) fprintf( fp, "Basis : 4 nucleotides\n" ); else { if( fftscore ) fprintf( fp, "Basis : Polarity and Volume\n" ); else fprintf( fp, "Basis : 20 amino acids\n" ); } fprintf( fp, "Threshold of anchors = %d%%\n", fftThreshold ); fprintf( fp, "window size of anchors = %dsites\n", fftWinSize ); } else fprintf( fp, "FFT off\n" ); fflush( fp ); } static double **preparepartmtx( int nseq ) { int i; double **val; double size; val = (double **)calloc( nseq, sizeof( double *) );; size = 0; if( compacttree == 1 ) { for( i=0; i maxdistmtxsize ) { reporterr( "\n\nThe size of full distance matrix is estimated to exceed %.2fGB.\n", maxdistmtxsize / 1000 / 1000 /1000 ); reporterr( "Will try the calculation using a %d x %d matrix.\n", nseq, i ); reporterr( "This calculation will be slow due to the limited RAM space.\n", i, nseq ); reporterr( "To avoid the slowdown, please try '--initialramusage xGB' (x>>%.2f),\n", maxdistmtxsize / 1000 / 1000 /1000 ); reporterr( "if larger RAM space is available.\n" ); reporterr( "Note that xGB is NOT the upper limit of RAM usage.\n" ); reporterr( "Two to three times larger space may be used for building a guide tree.\n" ); reporterr( "Memory usage of the MSA stage depends on similarity of input sequences.\n\n" ); // reporterr( "If the RAM is small, try '--initialramusage xGB' with a smaller x value.\n" ); reporterr( "The '--memsavetree' option uses smaller RAM space.\n" ); reporterr( "If tree-like relationship can be ignored, try '--pileup' or '--randomchain'.\n\n" ); reporterr( "The result of --initialramusage xGB is almost identical to the default, except for rounding differences.\n" ); reporterr( "In the cases of --memsavetree, --pileup and --randomchain, the result differs from the default.\n\n" ); break; } val[i] = (double *)calloc( nseq, sizeof( double ) ); } if( i == nseq ) reporterr( "The full matrix will be used.\n" ); for( ;i nlenmax ) nlenmax = ien; } infp = NULL; // stderr = fopen( "/dev/null", "a" ); // Windows???? tmpargv = AllocateCharMtx( argc, 0 ); for( i=0; i 1000000 ) { reporterr( "The number of sequences must be < %d\n", 1000000 ); reporterr( "Please try the --parttree option for such large data.\n" ); exit( 1 ); } if( njob < 2 ) { seq = AllocateCharMtx( 2, nlenmax*1+1 ); name = AllocateCharMtx( 2, B+1 ); nlen = AllocateIntVec( 2 ); readData_pointer( infp, name, nlen, seq ); fclose( infp ); gappick0( seq[1], seq[0] ); writeData_pointer( stdout, njob, name, nlen, seq+1 ); reporterr( "Warning: Only %d sequence found.\n", njob ); FreeCharMtx( seq ); FreeCharMtx( name ); free( nlen ); exit( 0 ); } if( specificityconsideration != 0.0 && nlenmax) { if( nlenmax > 100000 ) { reporterr( "\n" ); reporterr( "Too long to apply --allowshift or --unalignlevel>0\n" ); reporterr( "Please use the normal mode.\n" ); reporterr( "Please also note that MAFFT does not assume genomic rearrangements.\n" ); reporterr( "\n" ); exit( 1 ); } } #if !defined(mingw) && !defined(_MSC_VER) setstacksize( 200 * njob ); // topolorder() de ookime no stack wo shiyou. #endif if( subalignment ) { readsubalignmentstable( njob, NULL, NULL, &nsubalignments, &maxmem ); reporterr( "nsubalignments = %d\n", nsubalignments ); reporterr( "maxmem = %d\n", maxmem ); subtable = AllocateIntMtx( nsubalignments, maxmem+1 ); insubtable = AllocateIntVec( njob ); preservegaps = AllocateIntVec( njob ); for( i=0; i equivalent to v7.448 free( tmpseq ); } constants( njob, seq ); #if 0 reporterr( "params = %d, %d, %d\n", penalty, penalty_ex, offset ); #endif initSignalSM(); initFiles(); WriteOptions( trap_g ); c = seqcheck( seq ); if( c ) { reporterr( "Illegal character %c\n", c ); exit( 1 ); } reporterr( "\n" ); // reporterr( "tuplesize = %d, dorp = %c\n", tuplesize, dorp ); if( dorp == 'p' && tuplesize != 6 ) { reporterr( "tuplesize must be 6 for aa sequence\n" ); exit( 1 ); } if( dorp == 'd' && tuplesize != 6 && tuplesize != 10 ) { reporterr( "tuplesize must be 6 or 10 for dna sequence\n" ); exit( 1 ); } if( treein ) { int npickx; treein = check_guidetreefile( &randomseed, &npickx, &maxdistmtxsize ); if( treein == 't' ) { varpairscore( njob, npickx, nlenmax, seq, randomseed ); exit( 1 ); } else if( treein == 'c' ) { compacttree = 1; treein = 0; // use_fft = 0; // kankeinai? // maxdistmtxsize = 5 * 1000 * 1000; // 5GB. ato de kahen ni suru. // maxdistmtxsize = 1.0 * 1000 * 1000 * 1000; // 5GB. ato de kahen ni suru. } else if( treein == 'Y' ) { compacttree = 4; // youngest linkage, 3 ha tbfast de tsukaunode ichiou sakeru treein = 0; // use_fft = 0; // kankeinai? } else if( treein == 'S' || treein == 'C' ) { compacttree = 2; // 3 ha tbfast de tsukaunode ichiou sakeru treein = 0; // use_fft = 0; // kankeinai? } else if( treein == 'a' ) { // reporterr( "Compute pairwise scores\n" ); if( njob > 200000 ) { reporterr( "Chain?\n" ); treein = 's'; nguidetree = 1; } else if( njob < 100 || 't' == varpairscore( njob, npickx, nlenmax, seq, randomseed ) ) { if( treein == 'c' ) exit( 1 ); reporterr( "Tree!\n" ); treein = 0; nguidetree = 2; } else { reporterr( "Chain!\n" ); treein = 's'; nguidetree = 1; } } else if ( treein != 0 ) // auto no toki arieru nguidetree = 1; } # if 0 // tameshini if( sueff_global < 0.0001 || compacttree == 2 ) { nthread = 0; nthreadtb = 0; } #endif // if( njob > 10000 ) nthreadtb = 0; if( njob > 20000 ) nthreadtb = 0; // 2018/Jan. Hairetsu ga ooi toki // 1. topolorder_lessargs no stack ga tarinakunaru // 2. localcopy no tame kouritsu warui if( compacttree == 1 ) { if( maxdistmtxsize > (double)njob * (njob-1) * sizeof( double ) / 2 ) { reporterr( "Use conventional tree.\n" ); compacttree = 0; } } if( !treein ) { reporterr( "\n\nMaking a distance matrix ..\n" ); if( callback && callback( 0, 0, "Distance matrix" ) ) goto chudan; tmpseq = AllocateCharVec( nlenmax+1 ); grpseq = AllocateIntVec( nlenmax+1 ); pointt = AllocateIntMtx( njob, nlenmax+1 ); if( !compacttree ) mtx = AllocateFloatHalfMtx( njob ); if( dorp == 'd' ) tsize = (int)pow( 4, tuplesize ); else tsize = (int)pow( 6, 6 ); if( dorp == 'd' && tuplesize == 6 ) { lenfaca = D6LENFACA; lenfacb = D6LENFACB; lenfacc = D6LENFACC; lenfacd = D6LENFACD; } else if( dorp == 'd' && tuplesize == 10 ) { lenfaca = D10LENFACA; lenfacb = D10LENFACB; lenfacc = D10LENFACC; lenfacd = D10LENFACD; } else { lenfaca = PLENFACA; lenfacb = PLENFACB; lenfacc = PLENFACC; lenfacd = PLENFACD; } maxl = 0; for( i=0; i maxl ) maxl = nogaplen[i]; if( dorp == 'd' ) /* nuc */ { seq_grp_nuc( grpseq, tmpseq ); // makepointtable_nuc( pointt[i], grpseq ); // makepointtable_nuc_octet( pointt[i], grpseq ); if( tuplesize == 10 ) makepointtable_nuc_dectet( pointt[i], grpseq ); else if( tuplesize == 6 ) makepointtable_nuc( pointt[i], grpseq ); else { reporterr( "tuplesize=%d: not supported\n", tuplesize ); exit( 1 ); } } else /* amino */ { seq_grp( grpseq, tmpseq ); makepointtable( pointt[i], grpseq ); } } if( nunknown ) reporterr( "\nThere are %d ambiguous characters.\n", nunknown ); if( compacttree ) { reporterr( "Compact tree, step 1\n" ); mindistfrom = (int *)calloc( njob, sizeof( int ) ); mindist = (double *)calloc( njob, sizeof( double ) ); selfscore = (int *)calloc( njob, sizeof( int ) ); partmtx = preparepartmtx( njob ); for( i=0; i 0 ) { compactdistmtxthread_arg_t *targ; int jobpos; pthread_t *handle; pthread_mutex_t mutex; double **mindistthread; int **mindistfromthread; if( compacttree == 4 ) jobpos = 0; else jobpos = njob-1; targ = calloc( nthreadpair, sizeof( compactdistmtxthread_arg_t ) ); handle = calloc( nthreadpair, sizeof( pthread_t ) ); mindistthread = AllocateDoubleMtx( nthreadpair, njob ); mindistfromthread = AllocateIntMtx( nthreadpair, njob ); pthread_mutex_init( &mutex, NULL ); for( j=0; j 0 ) { distancematrixthread_arg_t *targ; int jobpos; pthread_t *handle; pthread_mutex_t mutex; jobpos = 0; targ = calloc( nthreadpair, sizeof( distancematrixthread_arg_t ) ); handle = calloc( nthreadpair, sizeof( pthread_t ) ); pthread_mutex_init( &mutex, NULL ); for( i=0; i nogaplen[j] ) { longer=(double)nogaplen[i]; shorter=(double)nogaplen[j]; } else { longer=(double)nogaplen[j]; shorter=(double)nogaplen[i]; } // if( tuplesize == 6 ) lenfac = 1.0 / ( shorter / longer * lenfacd + lenfacb / ( longer + lenfacc ) + lenfaca ); // else // lenfac = 1.0; // reporterr( "lenfac = %f (%.0f,%.0f)\n", lenfac, longer, shorter ); bunbo = MIN( mtx[i][0], mtx[j][0] ); if( bunbo == 0.0 ) mtx[i][j-i] = 2.0; // 2013/Oct/17 -> 2bai else mtx[i][j-i] = ( 1.0 - mtx[i][j-i] / bunbo ) * lenfac * 2.0; // 2013/Oct/17 -> 2bai // reporterr( "##### mtx = %f, mtx[i][0]=%f, mtx[j][0]=%f, bunbo=%f\n", mtx[i][j-i], mtx[i][0], mtx[j][0], bunbo ); } } if( disopt ) { for( i=0; i iguidetree loop nai ni idou if( distout ) { hat2p = fopen( "hat2", "w" ); WriteFloatHat2_pointer_halfmtx( hat2p, njob, name, mtx ); fclose( hat2p ); } #endif } #if 0 else { reporterr( "Loading 'hat2' ... " ); prep = fopen( "hat2", "r" ); if( prep == NULL ) ErrorExit( "Make hat2." ); readhat2_double( prep, njob, name, mtx ); // name chuui fclose( prep ); reporterr( "done.\n" ); } #endif // reporterr( "after computing distance matrix," ); // use_getrusage(); if( nadd && keeplength ) { originalgaps = (char *)calloc( nlenmax+1, sizeof( char) ); recordoriginalgaps( originalgaps, njob-nadd, seq ); if( mapout ) { addbk = (char **)calloc( nadd+1, sizeof( char * ) ); for( i=0; i= njob ) // check sumi { reporterr( "No such sequence, %d.\n", subtable[i][j]+1 ); exit( 1 ); } if( alignmentlength != strlen( seq[subtable[i][j]] ) ) { reporterr( "\n" ); reporterr( "###############################################################################\n" ); reporterr( "# ERROR!\n" ); reporterr( "# Subalignment %d must be aligned.\n", i+1 ); reporterr( "# Please check the alignment lengths of following sequences.\n" ); reporterr( "#\n" ); reporterr( "# %d. %-10.10s -> %d letters (including gaps)\n", subtable[i][0]+1, name[subtable[i][0]]+1, alignmentlength ); reporterr( "# %d. %-10.10s -> %d letters (including gaps)\n", subtable[i][j]+1, name[subtable[i][j]]+1, (int)strlen( seq[subtable[i][j]] ) ); reporterr( "#\n" ); reporterr( "# See http://mafft.cbrc.jp/alignment/software/merge.html for details.\n" ); if( subalignmentoffset ) { reporterr( "#\n" ); reporterr( "# You specified seed alignment(s) consisting of %d sequences.\n", subalignmentoffset ); reporterr( "# In this case, the rule of numbering is:\n" ); reporterr( "# The aligned seed sequences are numbered as 1 .. %d\n", subalignmentoffset ); reporterr( "# The input sequences to be aligned are numbered as %d .. %d\n", subalignmentoffset+1, subalignmentoffset+njob ); } reporterr( "###############################################################################\n" ); reporterr( "\n" ); goto chudan; // TEST!! //exit( 1 ); } insubtable[subtable[i][j]] = 1; } for( j=0; j OK\n" ); break; } } if( !foundthebranch ) { system( "cp infile.tree GuideTree" ); // tekitou reporterr( "\n" ); reporterr( "###############################################################################\n" ); reporterr( "# ERROR!\n" ); reporterr( "# Subalignment %d does not seem to form a monophyletic cluster\n", i+1 ); reporterr( "# in the guide tree ('GuideTree' in this directory) internally computed.\n" ); reporterr( "# If you really want to use this subalignment, pelase give a tree with --treein \n" ); reporterr( "# http://mafft.cbrc.jp/alignment/software/treein.html\n" ); reporterr( "# http://mafft.cbrc.jp/alignment/software/merge.html\n" ); if( subalignmentoffset ) { reporterr( "#\n" ); reporterr( "# You specified seed alignment(s) consisting of %d sequences.\n", subalignmentoffset ); reporterr( "# In this case, the rule of numbering is:\n" ); reporterr( "# The aligned seed sequences are numbered as 1 .. %d\n", subalignmentoffset ); reporterr( "# The input sequences to be aligned are numbered as %d .. %d\n", subalignmentoffset+1, subalignmentoffset+njob ); } reporterr( "############################################################################### \n" ); reporterr( "\n" ); goto chudan; // TEST!! //exit( 1 ); } // commongappick( seq[subtable[i]], subalignment[i] ); // irukamo } #if 0 for( i=0; i %c\n\n", i, mergeoralign[i] ); } #endif for( i=0; i iguidetree loop no soto he FreeIntMtx( subtable ); free( insubtable ); for( i=0; i 0 && nadd == 0 ) // nthreadpair ha minai { treebasethread_arg_t *targ; int jobpos; pthread_t *handle; pthread_mutex_t mutex; pthread_cond_t treecond; int *fftlog; int nrun; int nthread_yoyu; nthread_yoyu = nthreadtb * 1; nrun = 0; jobpos = 0; targ = calloc( nthread_yoyu, sizeof( treebasethread_arg_t ) ); fftlog = AllocateIntVec( njob ); handle = calloc( nthread_yoyu, sizeof( pthread_t ) ); pthread_mutex_init( &mutex, NULL ); pthread_cond_init( &treecond, NULL ); for( i=0; i0 && nadd==0 ) if( calcpairdists ) if( calcpairdists && !compacttree ) #else // if( 0 && nadd==0 ) if( calcpairdists ) // zettai nai if( calcpairdists && !compacttree ) #endif { reporterr( "Making a distance matrix from msa.. \n" ); skiptable = AllocateIntMtx( njob, 0 ); makeskiptable( njob, skiptable, bseq ); // allocate suru. #ifdef enablemultithread if( nthreadpair > 0 ) { msadistmtxthread_arg_t *targ; Jobtable jobpos; pthread_t *handle; pthread_mutex_t mutex; jobpos.i = 0; jobpos.j = 0; targ = calloc( nthreadpair, sizeof( msadistmtxthread_arg_t ) ); handle = calloc( nthreadpair, sizeof( pthread_t ) ); pthread_mutex_init( &mutex, NULL ); for( i=0; i 0 ) { msacompactdistmtxthread_arg_t *targ; int jobpos; pthread_t *handle; pthread_mutex_t mutex; double **mindistthread; int **mindistfromthread; mindistthread = AllocateDoubleMtx( nthreadpair, njob ); mindistfromthread = AllocateIntMtx( nthreadpair, njob ); targ = calloc( nthreadpair, sizeof( msacompactdistmtxthread_arg_t ) ); handle = calloc( nthreadpair, sizeof( pthread_t ) ); pthread_mutex_init( &mutex, NULL ); if( compacttree == 4 ) jobpos = 0; else jobpos = njob-1; for( i=0; i lgui ) { reporterr( "alignmentlength = %d, gui allocated %d", ien, lgui ); val = GUI_LENGTHOVER; } else { for( i=0; i 0 ) { reporterr( "\nTo keep the alignment length, %d letters were DELETED.\n", ndeleted ); if( mapout ) reporterr( "The deleted letters are shown in the (filename).map file.\n" ); else reporterr( "To know the positions of deleted letters, rerun the same command with the --mapout option.\n" ); } if( subalignment ) { FreeIntMtx( subtable ); free( insubtable ); for( i=0; i