/* file: smap.c * Author: Richard Durbin (rd@sanger.ac.uk) * Copyright (c) J Thierry-Mieg and R Durbin, 1999 * ------------------------------------------------------------------- * Acedb is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * or see the on-line version at http://www.gnu.org/copyleft/gpl.txt * ------------------------------------------------------------------- * This file is part of the ACEDB genome database package, written by * Richard Durbin (Sanger Centre, UK) rd@sanger.ac.uk, and * Jean Thierry-Mieg (CRBM du CNRS, France) mieg@kaa.crbm.cnrs-mop.fr * * Description: * Exported functions: See smap.h * HISTORY: * Last edited: Sep 11 10:51 2002 (edgrif) * * Aug 26 20:20 (rd): added support for Source_exons * * Aug 23 21:15 (rd): added sMapDNA and changed mismatch semantics * * Aug 23 19:50 (rd): added bsIndexFind calls for efficiency * these are VERY IMPORTANT * * Aug 23 19:34 (rd): added sMapKeySet() * Created: Wed Jul 28 22:11:20 1999 (rd) * CVS info: $Id: smap.c,v 1.37 2002/11/06 18:57:29 srk Exp $ *------------------------------------------------------------------- */ #include #include #include #include #include #include #include #include #include #include /* The SMap package supports building a sequence coordinate system out of multiple objects according to the following model: in parent object S_Child XREF UNIQUE Int UNIQUE Int #SMap_info // Ints are start, end in parent #SMap_info Align Int UNIQUE Int UNIQUE Int // for each ungapped block, parent_start child_start [length] // starts are starts with respect to child (relevant if sequences // are opposite orientations // if no length then until next block (so no double gap) // if no Align assume ungapped starting at 1 in child AlignDNAPep Int UNIQUE Int UNIQUE Int AlignPepDNA Int UNIQUE Int UNIQUE Int // These two tags are analogous to Align, but scale length // for the case of a dna alignment to peptide or vice-versa. Mismatch Int UNIQUE Int // start end of mismatch region in child coords // mismatches from this sequence or children are ignored // if no end then only the specified base // if no Ints then mismatches OK anywhere in this sequence Contents Homols_only Features_only No_DNA in child object SMap S_Parent UNIQUE // must be just _one_ parent. map and ->mismatch arrays are sorted in increasing order of s1, and s2 >= s1 always. We also assume here that the sequences are colinear. */ /* Used by internal exon sorting routines. */ typedef struct { int x, y ; } ExonStruct ; /*********************************************************************/ static BOOL sMapAddDNA(Array dna, SMapKeyInfo *info, sMapDNAErrorCallback callback, consMapDNAErrorReturn *errorLocation); static void exonsSort (Array a) ; static Array getExonMap(KEY key); static BOOL hasNoParent(KEY key) ; static SMapStatus sMapMapEx(Array map, int x1, int x2, struct SMapMapReturn *ret); static BOOL sMapAddNode(SMap *smap, KEY parent, KEY key, /* key to add */ int pStart, /* start coord in parent */ int pEnd, /* end coord in parent */ OBJ parentObj, /* located at #SmapInfo */ BOOL invert); static Array sMapComposeMaps (Array localMap, Array parentMap, STORE_HANDLE handle); void sMapInvertMap (Array map); static void sMapAddNeighbours (SMap *smap, KEY key, char *aqlCondition, BOOL parentDone); /*********************************************************************/ static void sMapFinalize (void *ptr) { messfree (((SMap*) ptr)->handle) ; return ; } static SMap* sMapSimpleCreate (STORE_HANDLE handle) { SMap* smap = (SMap*) handleAlloc (sMapFinalize, handle, sizeof(SMap)) ; smap->handle = handleCreate () ; smap->key2index = assHandleCreate (smap->handle) ; return smap ; } void sMapDestroy (SMap* smap) { messfree (smap) ; /* will call sMapFinalize */ return ; } /******************************************************************/ int sMapMax (SMap* smap) { return smap->length ; } /******************************************************************/ SMapKeyInfo* sMapKeyInfo (SMap* smap, KEY key) { SMapKeyInfo *info ; if (smap->lastKeyInfo && smap->lastKeyInfo->key == key) return smap->lastKeyInfo ; if (!assFind (smap->key2index, assVoid(key), &info)) return 0 ; smap->lastKeyInfo = info ; #ifdef ACEDB4 if (smap->lastKeyUnspliced) arrayDestroy(smap->lastKeyUnspliced); #endif return info ; } /******************************************************************/ /* binary searches to find segments used in sMapMap and sMapComposeMaps */ static SMapMap *lowestUpperSeg (Array map, int x) { int i = 0, j = arrayMax(map)-1 ; while (i < j) if (x > arrp(map, (i+j)/2, SMapMap)->s2) i = (i+j)/2 + 1 ; else j = (i+j)/2 ; return arrp(map, i, SMapMap) ; } static SMapMap *highestLowerSeg (Array map, int x) { int i = 0, j = arrayMax(map)-1 ; while (i < j) if (x < arrp(map, (i+j)/2+1, SMapMap)->s1) j = (i+j)/2 ; else i = (i+j)/2 + 1 ; return arrp(map, j, SMapMap) ; } /******************************************************************/ KEY sMapParent(SMapKeyInfo* info) { return info->parent; } Array sMapMapArray(SMapKeyInfo* info) { return info->map; } /* fundamental coordinate conversion routine */ SMapStatus sMapMap(SMapKeyInfo* info, int x1, int x2, int *y1, int *y2, int *nx1, int *nx2) { struct SMapMapReturn ret; SMapStatus status = sMapMapEx(info->map, x1, x2, &ret); if (y1) *y1 = ret.y1; if (y2) *y2 = ret.y2; if (nx1) *nx1 = ret.nx1; if (nx2) *nx2 = ret.nx2; return status; } BOOL sMapIsReverse(SMapKeyInfo *info, int x1, int x2) { Strand strand ; /* interpret zeroes */ if (x1 == 0) x1 = x2+1; else if (x2 == 0) x2 = x1+1; strand = x1 <= x2 ? strand_up : strand_down; return info->strand == strand; } /* and its equivalent for mapping the other way */ SMapStatus sMapInverseMap (SMapKeyInfo* info, int x1, int x2, int *y1, int *y2, int *nx1, int *nx2) { struct SMapMapReturn ret; SMapStatus status ; if (!info->inverseMap) { info->inverseMap = arrayHandleCopy (info->map, info->smap->handle) ; sMapInvertMap (info->inverseMap) ; } status = sMapMapEx(info->inverseMap, x1, x2, &ret); if (y1) *y1 = ret.y1; if (y2) *y2 = ret.y2; if (nx1) *nx1 = ret.nx1; if (nx2) *nx2 = ret.nx2; return status; } static SMapStatus sMapMapEx(Array map, int x1, int x2, struct SMapMapReturn *ret) { int z1, z2 ; SMapMap *m1, *m2 ; BOOL isReverse = FALSE ; SMapStatus status = SMAP_STATUS_PERFECT_MAP ; /* default is perfect map without gaps */ /* Passing a coord of zero gives the maximum extent of this object */ if (x1 == 0) x1 = arrp(map, arrayMax(map)-1, SMapMap)->s2; if (x2 == 0) x2 = arrp(map, arrayMax(map)-1, SMapMap)->s2; /* set default nx1, nx2 before possible reverse */ /* this means they are set even if NO_OVERLAP, but that seems OK */ ret->nx1 = x1 ; ret->nx2 = x2 ; if (x1 > x2) { int t = x1 ; x1 = x2 ; x2 = t ; isReverse = TRUE ; } if (x1 > arrp(map, arrayMax(map)-1, SMapMap)->s2) /* off end of last mapped segment */ return SMAP_STATUS_X2_NO_OVERLAP_EXTERNAL ; else if (x2 < arrp(map, 0, SMapMap)->s1) /* off start of first mapped segment */ return SMAP_STATUS_X1_NO_OVERLAP_EXTERNAL ; /* find map segment for x1 by binary search */ /* could cache in KeyInfo for rapid return on dense sorted features? */ m1 = lowestUpperSeg (map, x1) ; if (x1 < m1->s1) /* clip */ { z1 = m1->r1 ; if (m1 == arrp(map, 0, SMapMap)) status |= SMAP_STATUS_X1_EXTERNAL_CLIP ; else status |= SMAP_STATUS_X1_INTERNAL_CLIP ; if (isReverse) ret->nx2 = m1->s1; else ret->nx1 = m1->s1 ; } else if (m1->r2 > m1->r1) z1 = m1->r1 + (x1 - m1->s1) ; else z1 = m1->r1 - (x1 - m1->s1) ; /* x2 likewise */ m2 = highestLowerSeg (map, x2) ; if (m2 < m1) /* completely contained in internal gap */ return SMAP_STATUS_NO_OVERLAP_INTERNAL ; if (m1 < m2) status |= SMAP_STATUS_INTERNAL_GAPS ; if (x2 > m2->s2) /* clip */ { z2 = m2->r2 ; if (m2 == arrp(map, arrayMax(map)-1, SMapMap)) status |= SMAP_STATUS_X2_EXTERNAL_CLIP ; else status |= SMAP_STATUS_X2_INTERNAL_CLIP ; if (isReverse) ret->nx1 = m2->s2 ; else ret->nx2 = m2->s2 ; } else if (m2->r2 > m2->r1) z2 = m2->r2 - (m2->s2 - x2) ; else z2 = m2->r2 + (m2->s2 - x2) ; /* done, now fill in results and return */ if (isReverse) { ret->y1 = z2 ; ret->y2 = z1 ; } else { ret->y1 = z1 ; ret->y2 = z2 ; } return status; } char *sMapErrorString(SMapStatus status) { switch (status) { case SMAP_STATUS_PERFECT_MAP: return "PERFECT_MAP"; case SMAP_STATUS_ERROR: return "ERROR"; case SMAP_STATUS_INTERNAL_GAPS: return "INTERNAL_GAPS"; case SMAP_STATUS_X1_NO_OVERLAP_EXTERNAL: return "X1_NO_OVERLAP_EXTERNAL"; case SMAP_STATUS_X2_NO_OVERLAP_EXTERNAL: return "X2_NO_OVERLAP_EXTERNAL"; case SMAP_STATUS_NO_OVERLAP_INTERNAL: return "NO_OVERLAP_INTERNAL"; case SMAP_STATUS_X1_EXTERNAL_CLIP: return "X1_EXTERNAL_CLIP"; case SMAP_STATUS_X1_INTERNAL_CLIP: return "X1_INTERNAL_CLIP"; case SMAP_STATUS_X2_EXTERNAL_CLIP: return "X2_EXTERNAL_CLIP"; case SMAP_STATUS_X2_INTERNAL_CLIP: return "X2_INTERNAL_CLIP"; default: return NULL; } } /************************************************************/ /* standard coordinate conversion routine for a full SMap object */ BOOL sMapConvert (SMap* smap, KEY key, int x1, int x2, int *y1, int *y2) { SMapKeyInfo *info; struct SMapMapReturn ret; if ((info = sMapKeyInfo (smap, key)) && (sMapMapEx(info->map, x1, x2, &ret) & SMAP_STATUS_NO_OVERLAP) == 0) { if (y1) *y1 = ret.y1; if (y2) *y2 = ret.y2; return TRUE; } return FALSE; } #ifdef ACEDB4 /* Map into unspliced co-ordinates even is an object has source exons. Currently first call on each object costs a bsCreate/Destroy and two arrayCreate/Destroys. We could make this cheaper at the cost of holding more data in every node. */ SMapStatus sMapUnsplicedMap(SMapKeyInfo* info, int x1, int x2, int *y1, int *y2, int *nx1, int *nx2) { SMap *smap = info->smap; SMapStatus result = SMAP_STATUS_ERROR ; struct SMapMapReturn ret; Array exonMap, unsplicedMap = smap->lastKeyUnspliced ; if (!unsplicedMap) { /* If no source exons we just do ordinary map */ if (!(exonMap = getExonMap(info->key))) unsplicedMap = info->map; else { /* spliced -> unspliced */ sMapInvertMap(exonMap); /* root -> spliced -> unspliced */ unsplicedMap = smap->lastKeyUnspliced = sMapComposeMaps(exonMap, info->map, smap->handle); arrayDestroy(exonMap); } } result = sMapMapEx(unsplicedMap, x1, x2, &ret) ; if (y1) *y1 = ret.y1; if (y2) *y2 = ret.y2; if (nx1) *nx1 = ret.nx1; if (nx2) *nx2 = ret.nx2; return result ; } #endif /* ACEDB4 */ /*************************************************************/ static void sMapKeyAdd (KEYSET kset, SMapKeyInfo *info) { int i ; keySet(kset, keySetMax(kset)) = info->key ; for (i = 0 ; i < arrayMax (info->children) ; ++i) sMapKeyAdd (kset, arr(info->children, i, SMapKeyInfo*)) ; } KEYSET sMapKeys (SMap* smap, STORE_HANDLE handle) { KEYSET kset = keySetHandleCreate (handle) ; sMapKeyAdd (kset, smap->root) ; return kset ; } /*************************************************************/ /* Given an smap, will return the dna sequence for that smap. */ /* If the dna can be found it is returned as an array allocated on handle, */ /* otherwise NULL is returned. */ /* */ Array sMapDNA (SMap* smap, STORE_HANDLE handle, sMapDNAErrorCallback callback) { consMapDNAErrorReturn errorLocation = sMapErrorReturnContinue; /* this is where we build the result */ Array dna = arrayHandleCreate(smap->length, unsigned char, handle) ; /* recursively add DNA */ if (sMapAddDNA(dna, smap->root, callback, &errorLocation) && errorLocation != sMapErrorReturnFail && errorLocation != sMapErrorReturnContinueFail) { /* now fill in low byte from high byte if necessary */ int i ; unsigned char *cp = arrp(dna, 0, unsigned char) ; for (i = 0 ; i < arrayMax(dna) ; i++, cp++) { if (*cp & 0xf) *cp &= 0xf ; else *cp >>= 4 ; } return dna; } arrayDestroy(dna); return FALSE; } /* NB sMapDNA is called by dnaNewGet() if there is no directly attached DNA, and it calls dnaGet to get the dna for keys with directly attached DNA. We have been careful to ensure there is no loop here. */ /* We return TRUE if the object itself contains dna or if any of its */ /* children contain dna, otherwise FALSE. */ /* */ static BOOL sMapAddDNA(Array dna, SMapKeyInfo *info, sMapDNAErrorCallback callback, consMapDNAErrorReturn *errorLocation) { BOOL result = FALSE ; if (!bIndexTag(info->key, str2tag("DNA"))) { /* This sequence has no dna directly attached so look in children. */ int i ; for (i = 0 ; i < arrayMax(info->children) ; ++i) { /* N.B. we return TRUE if one or more of the children contain dna. */ if (sMapAddDNA(dna, arr(info->children, i, SMapKeyInfo*), callback, errorLocation)) result = TRUE ; } } else { /* Try and get this objects dna. */ OBJ obj = NULL ; Array a = NULL ; KEY key = KEY_UNDEFINED ; if ((obj = bsCreate (info->key)) && bsGetKey (obj, str2tag("DNA"), &key) && (a = dnaRawGet(key))) { unsigned char *cp, *cq ; SMapMismatch *mis ; SMapMap *map ; int i, j, length ; /* simplest dnaGet(), sequence points directly to dna array */ /* take minimum of declared and actual lengths of DNA */ if (!bsGetData (obj, _bsRight, _Int, &length) || (length > arrayMax(a))) length = arrayMax(a); /* shift sequence allowed to mismatch into high byte */ if (info->mismatch) for (i = 0 ; i < arrayMax (info->mismatch) ; ++i) { mis = arrp(info->mismatch, i, SMapMismatch) ; for (j = mis->s1 - 1 ; (j < mis->s2 - 1) && (j < length) ; ++j) { arr(a,j,unsigned char) <<= 4 ; } } /* strategy is to complain if low half-bytes don't match then OR with new sequence, allowing high byte ambiguity codes */ for (i = 0 ; i < arrayMax (info->map) ; ++i) { int dna_pos, curr_dna_length ; /* n.b. s2 >= s1 always... */ map = arrp(info->map, i, SMapMap) ; dna_pos = info->strand == strand_up ? map->r2 - 1 : map->r1 - 1 ; curr_dna_length = map->s2 - map->s1 + 1 ; /* We need to take into account that we might not have enough DNA */ if (map->s1 - 1 < length) { if (arrayMax(dna) < dna_pos + curr_dna_length) arrayMax(dna) = dna_pos + curr_dna_length ; if (map->s1 - 1 + curr_dna_length > length) curr_dna_length = length - (map->s1 - 1); /* Copy across the dna from the object into our dna array, */ /* reversing and complementing for dna on the opposite strand. */ /* */ /* NOTE that we don't cover what happens if either cp or cq */ /* is blank in any of the positions. */ cp = arrp(dna, (map->r1 - 1), unsigned char) ; cq = arrp(a, (map->s1 - 1), unsigned char) ; if (info->strand == strand_up) { for (j = curr_dna_length - 1 ; j >= 0 ; j--) { unsigned char base = complementBase[(int)(*cq & 0xf)] ; if ((*cp & 0xf) && (*cp & 0xf) != base && (*errorLocation == sMapErrorReturnContinue || *errorLocation == sMapErrorReturnContinueFail)) *errorLocation = callback ? (*callback)(key, map->s2 - j) : sMapErrorReturnFail; *cp-- |= (complementBase[(int)(*cq++ >> 4)] << 4) | base; } } else { for (j = curr_dna_length - 1 ; j >= 0 ; j--) { if ((*cp & 0xf) && (*cp & 0xf) != (*cq & 0xf) && (*errorLocation == sMapErrorReturnContinue || *errorLocation == sMapErrorReturnContinueFail)) *errorLocation = callback ? (*callback)(key, map->s2 - j) : sMapErrorReturnFail; *cp++ |= *cq++ ; } } } } result = TRUE ; } if (obj) bsDestroy (obj) ; if (a) arrayDestroy (a) ; } return result ; } /*************************************************************/ /************ now the stuff to build smaps *******************/ static int SMapMapOrder (void *va, void *vb) { SMapMap *a = (SMapMap*)va ; SMapMap *b = (SMapMap*)vb ; return (a->s1 - b->s1) ; } /******************************/ /* Finds smallest containing smap'd object that fits the predicate specified * by predicate function, or any smallest containing smap'd object if no * predicate. * * This is richards comment about this routine when it was in fmapcontrol.c * * "Mar 13 22:55 2002 (rd): fixed fMapFindSpan() to use sMapInverseMap() not SEGs * and removed fMapFindZoneFather(), replacing calls with fMapFindSpan()" * * You should NOTE WELL that smap coords run 1 -> length (i.e. do not start at 0) * */ BOOL sMapFindSpan(SMap *smap, KEY *keyp, int *start, int *end, sMapSpanPredicate predicate) { BOOL found ; KEYSET ks ; int i, min = INT_MAX ; int x, y, xResult=0, yResult=0 ; KEY key, result = KEY_UNDEFINED ; SMapStatus status ; SMapKeyInfo *info ; messAssert(smap != NULL && keyp != NULL && start != NULL && end != NULL) ; ks = sMapKeys(smap, 0) ; for (i = 0 ; i < keySetMax(ks) ; ++i) { int key_len ; key = keySet (ks, i) ; if (predicate && !(*predicate)(key)) continue ; info = sMapKeyInfo (smap, key) ; status = sMapInverseMap (info, *start, *end, &x, &y, 0, 0) ; if (status == SMAP_STATUS_PERFECT_MAP && (key_len = sMapLength(key)) < min) { result = key ; xResult = x ; yResult = y ; min = key_len ; } } keySetDestroy(ks) ; if (!result) found = FALSE ; else { *keyp = result ; *start = xResult ; *end = yResult ; found = TRUE ; } return found ; } /******************************/ static Array sMapLocalMapEx (OBJ obj, /* in parent located at #SmapInfo */ int pStart, /* start coord in parent */ int pEnd, /* end coord in parent */ STORE_HANDLE handle, /* return array on this. */ Strand *strand) /* fill in for direction.*/ { SMapMap *m ; Array u = arrayCreate (16, BSunit) ; BSunit *ui ; int i ; Array localMap ; int DNAfac, pepFac; BSMARK mark ; Strand alignStrand = strand_down; /* first get Align info into u */ mark = bsMark (obj, 0) ; if (bsGetArray (obj, str2tag("Align"), u, 3)) { DNAfac = 1 ; pepFac = 1 ; } else if (bsGetArray (obj, str2tag("AlignDNAPep"), u, 3)) { DNAfac = 3 ; pepFac = 1 ; } else if (bsGetArray (obj, str2tag("AlignPepDNA"), u, 3)) { DNAfac = 1 ; pepFac = 3 ; } else { arrayDestroy(u); bsGoto (obj, mark) ; bsMarkFree (mark) ; return 0; } bsGoto (obj, mark) ; bsMarkFree (mark) ; /* next convert u into localMap */ localMap = arrayHandleCreate (arrayMax(u)/3, SMapMap, handle) ; for (i = 0 ; i < arrayMax(u)/3 ; ++i) { m = arrayp(localMap, i, SMapMap) ; ui = arrp(u, 3*i, BSunit) ; m->s1 = ui[1].i ; m->r1 = ui[0].i ; if (ui[2].i) { /* negative length means invert - but only when pStart == pEnd */ /* s_child s_sequence 100 100 Align 100 1 -1 gives a single base sequence on reverse strand. */ if (ui[2].i < 0) { alignStrand = strand_up; m->s2 = m->s1 - ui[2].i - 1 ; } else m->s2 = m->s1 + ui[2].i - 1 ; } } arraySort (localMap, SMapMapOrder) ; /** could/should check colinearity here? **/ /* fill in blank ->s1 and all ->r2 values; must come after sort */ for (i = 0 ; i < arrayMax(localMap)-1 ; ++i) { m = arrp(localMap, i, SMapMap) ; if (pEnd > pStart) /* forward alignment */ { if (!m->s2) { if (((m+1)->s1 - m->s1) / pepFac < (((m+1)->r1 - m->r1) / DNAfac)) m->s2 = (m+1)->s1 - 1 ; else m->s2 = m->s1 + ((((m+1)->r1 - m->r1) / DNAfac) - 1) * pepFac ; } m->r2 = m->r1 + ((m->s2 - m->s1) * DNAfac) / pepFac ; } else /* reversed alignment */ { if (!m->s2) { if (((m+1)->s1 - m->s1) / pepFac < ((m->r1 - (m+1)->r1) / DNAfac)) m->s2 = (m+1)->s1 - 1 ; else m->s2 = m->s1 + (((m->r1 - (m+1)->r1) / DNAfac) - 1) * pepFac ; } m->r2 = m->r1 - ((m->s2 - m->s1) * DNAfac) / pepFac ; } } m = arrp(localMap, arrayMax(localMap)-1, SMapMap) ; m->r2 = pEnd ; if (!m->s2) { if (pEnd > pStart) m->s2 = m->s1 + ((m->r2 - m->r1) / DNAfac) * pepFac ; else m->s2 = m->s1 + ((m->r1 - m->r2) / DNAfac) * pepFac ; } /** could check total lengths match here if given **/ if (strand) { if (pEnd > pStart) /* forward alignment */ *strand = strand_down; else if (pEnd < pStart) *strand = strand_up; else /* pStart == pEnd */ *strand = alignStrand; } arrayDestroy (u) ; return localMap ; } Array sMapLocalMap (OBJ obj, /* in parent located at #SmapInfo */ int pStart, /* start coord in parent */ int pEnd, /* end coord in parent */ STORE_HANDLE handle) /* return array on this. */ { return sMapLocalMapEx(obj, pStart, pEnd, handle, NULL); } /******************************/ static Array sMapComposeMaps (Array localMap, Array parentMap, STORE_HANDLE handle) { Array productMap ; SMapMap *m1, *m2, *m3 ; SMapStatus status; struct SMapMapReturn mapRet ; int i; /* Warning: a picture helps A LOT if you want to follow this function */ /* Notation: m1 in localMap maps x-space to y-space m2 in parentMap maps y-space to z-space m3 in the productMap maps x-space to z-space */ productMap = arrayHandleCreate (arrayMax(localMap) + arrayMax(parentMap) - 1, SMapMap, handle) ; /* First loop over localMap, creating 0, 1 or more segments in productMap for each localMap segment. Treat separately the two cases where localMap is reversed or not reversed. */ if (arrp(localMap,0,SMapMap)->r1 < arrp(localMap,arrayMax(localMap)-1,SMapMap)->r2) /* localMap is not reversed - easier to think about */ for (i = 0 ; i < arrayMax(localMap) ; ++i) { m1 = arrp(localMap, i, SMapMap) ; status = sMapMapEx (parentMap, m1->r1, m1->r2, &mapRet) ; if (status & SMAP_STATUS_NO_OVERLAP) continue ; m3 = arrayp(productMap, arrayMax(productMap), SMapMap) ; /* make at least one segment */ if (!(status & SMAP_STATUS_INTERNAL_GAPS)) /* easy case */ { m3->s1 = m1->s1 + (mapRet.nx1 - m1->r1) ; m3->r1 = mapRet.y1 ; m3->s2 = m1->s2 - (m1->r2 - mapRet.nx2) ; m3->r2 = mapRet.y2 ; } else /* complex case when INTERNAL_GAPS */ { /* start first segment as above */ m3->s1 = m1->s1 + (mapRet.nx1 - m1->r1) ; m3->r1 = mapRet.y1 ; m2 = lowestUpperSeg (parentMap, m1->r1) ; while (m2->s2 < mapRet.nx2) /* all the rest of m2 is in the m1 alignment */ { m3->s2 = m1->s1 + (m2->s2 - m1->r1) ; m3->r2 = m2->r2 ; /* finish this segment */ ++m2 ; m3 = arrayp(productMap, arrayMax(productMap), SMapMap) ; /* and start another */ m3->s1 = m1->s1 + (m2->s1 - m1->r1) ; m3->r1 = m2->r1 ; } m3->s2 = m1->s2 - (m1->r2 - mapRet.nx2) ; m3->r2 = mapRet.y2 ; /* finish final segment */ } } else /* localMap is reversed */ for (i = 0 ; i < arrayMax(localMap) ; ++i) { m1 = arrp(localMap, i, SMapMap) ; status = sMapMapEx (parentMap, m1->r1, m1->r2, &mapRet) ; if (status & SMAP_STATUS_NO_OVERLAP) continue ; m3 = arrayp(productMap, arrayMax(productMap), SMapMap) ; /* make at least one segment */ if (!(status & SMAP_STATUS_INTERNAL_GAPS)) /* easy case */ { m3->s1 = m1->s1 + (m1->r1 - mapRet.nx1) ; m3->r1 = mapRet.y1 ; m3->s2 = m1->s2 - (mapRet.nx2 - m1->r2) ; m3->r2 = mapRet.y2 ; } else /* complex case when INTERNAL_GAPS */ { /* start first segment as above */ m3->s1 = m1->s1 + (m1->r1 - mapRet.nx1) ; m3->r1 = mapRet.y1 ; m2 = highestLowerSeg (parentMap, m1->r1) ; while (m2->s1 > mapRet.nx2) /* all the rest of m2 is in the m1 alignment */ { /* finish this segment */ m3->s2 = m1->s1 + (m1->r1 - m2->s1) ; m3->r2 = m2->r1 ; --m2 ; /* and start another */ m3 = arrayp(productMap, arrayMax(productMap), SMapMap) ; m3->s1 = m1->s1 + (m1->r1 - m2->s2) ; m3->r1 = m2->r2 ; } m3->s2 = m1->s2 - (mapRet.nx2 - m1->r2) ; m3->r2 = mapRet.y2 ; /* finish final segment */ } } /* finally must go through and check that we don't have abutting segments - if so join them */ /* reuse m1, m2, m3 all in productMap */ m2 = m1 = arrp(productMap, 0, SMapMap) ; for (i = 0 ; i < arrayMax(productMap)-1 ; ++m2, ++i) { m3 = m2 + 1 ; if (m3->s1 > m2->s2 + 1 || m3->r1 > m2->r2 + 1 || m3->r1 < m2->r2 - 1) { if (m1 != m2) { m1->s2 = m2->s2 ; m1->r2 = m2->r2 ; } ++m1 ; if (m1 != m3) { m1->s1 = m3->s1 ; m1->r1 = m3->r1 ; } } } if (m1 != m2) { m1->s2 = m2->s2 ; m1->r2 = m2->r2 ; } arrayMax(productMap) = (m1 - arrp(productMap, 0, SMapMap)) + 1 ; return productMap ; } /******************************/ void sMapInvertMap (Array map) { int i, t ; SMapMap *m, *n ; /* N.B. we compare the first segment with the _last_ to detect reverse */ /* coords, comparing within a segment does not if r1 == r2 ! */ if (arrp(map, 0, SMapMap)->r1 < arrp(map, (arrayMax(map) - 1), SMapMap)->r2) for (i = 0 ; i < arrayMax(map) ; ++i) { m = arrp(map, i, SMapMap) ; t = m->s1 ; m->s1 = m->r1 ; m->r1 = t ; t = m->s2 ; m->s2 = m->r2 ; m->r2 = t ; } else for (i = 0 ; i < arrayMax(map) ; ++i) { m = arrp(map, i, SMapMap) ; n = arrp(map, arrayMax(map)-1-i, SMapMap) ; t = m->s1 ; m->s1 = n->r2 ; n->r2 = t ; t = m->s2 ; m->s2 = n->r1 ; n->r1 = t ; } } /*********************************************************/ static SMapKeyInfo *sMapNewInfo (SMap* smap, KEY key, KEY parent) { SMapKeyInfo *info = (SMapKeyInfo*) halloc (sizeof(SMapKeyInfo), smap->handle) ; info->smap = smap ; info->key = key ; info->parent = parent; assInsert (smap->key2index, assVoid(key), info) ; info->children = arrayHandleCreate (8, SMapKeyInfo*, smap->handle) ; return info ; } /*****************/ static BOOL sMapAddNode(SMap *smap, KEY parent, KEY key, /* key to add */ int pStart, /* start coord in parent */ int pEnd, /* end coord in parent */ OBJ parentObj, /* located at #SmapInfo */ BOOL invert) { SMapKeyInfo *info = NULL, *parentInfo = NULL ; Array localMap = NULL, u = NULL ; SMapMismatch *mis = NULL ; SMapMap *m = NULL ; int i ; Strand strand; if (assFind (smap->key2index, assVoid(key), 0)) /* we have it already */ return FALSE; info = sMapNewInfo (smap, key, parent) ; parentInfo = sMapKeyInfo(smap, parent) ; array(parentInfo->children, arrayMax(parentInfo->children), SMapKeyInfo*) = info ; /* make local map then compose with parent map to give map */ if (!(localMap = sMapLocalMapEx(parentObj, pStart, pEnd, 0, &strand))) { localMap = arrayCreate (1, SMapMap) ; m = arrayp (localMap, 0, SMapMap) ; m->s1 = 1 ; m->r1 = pStart ; m->r2 = pEnd ; if (pEnd >= pStart) /* default for 1 base block is down strand */ { m->s2 = pEnd - pStart + 1; strand = strand_down; } else { m->s2 = pStart - pEnd + 1; strand = strand_up; } } /* Not that ExonMap is always forward so we can ignore that */ if (strand == strand_up) info->strand = invertStrand(parentInfo->strand); else info->strand = parentInfo->strand; #ifdef ACEDB4 if (!invert) { Array exonMap; if ((exonMap = getExonMap(key))) { Array unspliceMap = sMapComposeMaps(localMap, parentInfo->map, smap->handle); info->map = sMapComposeMaps (exonMap, unspliceMap, smap->handle) ; arrayDestroy(exonMap); arrayDestroy(unspliceMap); } else info->map = sMapComposeMaps(localMap, parentInfo->map, smap->handle); } else { /* The next bit is REALLY TRICKY. If we are following a mapping backwards and the parent (really child) has source exons, then the mapping we have for the parent will include the mapping from the source exons. But the child (really parent) maps to the unspliced (ie no exons) version. So before doing anything else we have to remove the source exons mapping. Then we apply the reverse SE mapping (if any) for the child (really parent) and finally the smap mapping. You are not expected to understand this. */ Array parentExonMap; sMapInvertMap(localMap); if ((parentExonMap = getExonMap(parent))) { Array unspliceMap; sMapInvertMap(parentExonMap); unspliceMap = sMapComposeMaps(parentExonMap, parentInfo->map, smap->handle); info->map = sMapComposeMaps (localMap, unspliceMap, smap->handle); arrayDestroy(parentExonMap); arrayDestroy(unspliceMap); } else info->map = sMapComposeMaps(localMap, parentInfo->map, smap->handle); } #else if (invert) sMapInvertMap(localMap); info->map = sMapComposeMaps (localMap, parentInfo->map, smap->handle) ; #endif /* !ACEDB4 */ arrayDestroy(localMap); /* get and store info->mismatch */ u = arrayCreate (16, BSunit) ; if (bsGetArray (parentObj, str2tag("Mismatch"), u, 2)) { info->mismatch = arrayHandleCreate (arrayMax(u)/2, SMapMismatch, smap->handle) ; for (i = 0 ; i < arrayMax(u) ; i += 2) { mis = arrayp(info->mismatch, arrayMax(info->mismatch), SMapMismatch) ; mis->s1 = arr(u,i,BSunit).i ; mis->s2 = arr(u,i+1,BSunit).i ? arr(u,i+1,BSunit).i : mis->s1 ; } } else if (bsFindTag (parentObj, str2tag("Mismatch"))) /* whole sequence */ { info->mismatch = arrayHandleCreate (1, SMapMismatch, smap->handle) ; mis = arrayp(info->mismatch, 0, SMapMismatch) ; mis->s1 = arrp(info->map, 0, SMapMap)->s1 ; mis->s2 = arrp(info->map, arrayMax(info->map), SMapMap)->s2 ; } else info->mismatch = NULL; arrayDestroy (u) ; if (info->mismatch && !arrayMax (info->mismatch)) arrayDestroy (info->mismatch) ; return TRUE; } /* ParentDone flag for efficiency, set when going down tree, cleared going up. */ static void sMapAddNeighbours (SMap *smap, KEY key, char *aqlCondition, BOOL parentDone) /* aqlCondition does nothing yet */ { int pos1, pos2 ; BSMARK mark1 = 0, mark2 = 0 ; OBJ obj ; KEY child ; /* check if any children first before opening */ if ( bIndexTag (key, str2tag("S_Child")) || (!parentDone && bIndexTag(key, str2tag("S_Parent"))) #ifdef ACEDB4 || bIndexTag (key, str2tag("Subsequence")) || (!parentDone && bIndexTag(key, str2tag("Source"))) #endif ) { if (!(obj = bsCreate (key))) return ; if (bsGetKeyTags (obj, str2tag("S_Child"), 0)) do { mark1 = bsMark (obj, mark1) ; if (bsGetKey (obj, _bsRight, &child)) do { mark2 = bsMark (obj, mark2) ; if (bsGetData (obj, _bsRight, _Int, &pos1) && bsGetData (obj, _bsRight, _Int, &pos2) && sMapConvert (smap, key, pos1, pos2, 0, 0)) /* sMapConvert() call checks this key overlaps area of smap */ { bsPushObj (obj) ; if (sMapAddNode(smap, key, child, pos1, pos2, obj, FALSE)) sMapAddNeighbours (smap, child, aqlCondition, TRUE) ; } bsGoto (obj, mark2) ; } while (bsGetKey (obj, _bsDown, &child)) ; bsGoto (obj, mark1) ; } while (bsGetKeyTags (obj, _bsDown, 0)) ; #ifdef ACEDB4 /* support Subsequence not under S_Child temporarily */ bsGoto (obj, 0) ; if (bsGetKey (obj, str2tag("Subsequence"), &child)) do { mark1 = bsMark (obj, mark1) ; if (bsGetData (obj, _bsRight, _Int, &pos1) && bsGetData (obj, _bsRight, _Int, &pos2) && sMapConvert (smap, key, pos1, pos2, 0, 0)) { bsPushObj (obj) ; if (sMapAddNode(smap, key, child, pos1, pos2, obj, FALSE)) sMapAddNeighbours (smap, child, aqlCondition, TRUE) ; } bsGoto (obj, mark1) ; } while (bsGetKey (obj, _bsDown, &child)) ; #endif /* ACEDB4 */ if (!parentDone) { KEY parent = 0; OBJ parentObj; if (bsGetKeyTags(obj, str2tag("S_Parent"), 0) && bsGetKey(obj, _bsRight, &parent)) { if ((parentObj = bsCreate(parent))) { if (bsFindKey2 (parentObj, str2tag("S_Child"), key, 0) && bsGetData (parentObj, _bsRight, _Int, &pos1) && bsGetData (parentObj, _bsRight, _Int, &pos2) && sMapAddNode(smap, key, parent, pos1, pos2, parentObj, TRUE)) /* Note child, parent swap here */ sMapAddNeighbours (smap, parent, aqlCondition, FALSE) ; bsDestroy(parentObj); } } #ifdef ACEDB4 else if (bsGetKey (obj, str2tag("Source"), &parent)) { if ((parentObj = bsCreate(parent))) { if (bsFindKey (parentObj, str2tag("Subsequence"), key) && bsGetData (parentObj, _bsRight, _Int, &pos1) && bsGetData (parentObj, _bsRight, _Int, &pos2) && sMapAddNode(smap, key, parent, pos1, pos2, parentObj, TRUE)) /* Note child, parent swap here */ sMapAddNeighbours (smap, parent, aqlCondition, FALSE) ; bsDestroy(parentObj); } } #endif } bsMarkFree (mark1) ; bsMarkFree (mark2) ; bsDestroy (obj) ; } } /* key is top level key */ /* aqlCondition does nothing yet */ SMap* sMapCreate (STORE_HANDLE handle, KEY key, int x1, int x2, char *aqlCondition) { SMap *smap = sMapSimpleCreate (handle) ; SMapMap *m ; int end ; /* insert self into smap as root */ smap->root = sMapNewInfo (smap, key, 0) ; smap->root->map = arrayCreate (1, SMapMap) ; m = arrayp(smap->root->map, 0, SMapMap) ; if (x2 > x1) { smap->root->strand = strand_down; m->s1 = x1 ; m->s2 = x2 ; m->r1 = 1 ; m->r2 = end = x2 - x1 + 1 ; } else { smap->root->strand = strand_up; m->s1 = x2 ; m->s2 = x1 ; m->r1 = end = x1 - x2 + 1 ; m->r2 = 1 ; } smap->length = end ; /* now recursively add all neighbours */ sMapAddNeighbours (smap, key, aqlCondition, FALSE) ; return smap ; } /************************************************************************/ /******** routines to be used independently/prior to SMAP creation ******/ /* find parent and return map between key and parent. inobj may be bsCreate(key) or NULL */ static Array mapToParent(KEY key, OBJ keyObj, KEY *kp) { OBJ obj = 0; KEY parent ; int z1, z2; Array map ; /* find self in parent */ /* slightly convoluted to allow use of Source #ifdef ACEDB4 */ parent = 0; if (bsGetKeyTags (keyObj, str2tag("S_Parent"), 0)) bsGetKey (keyObj, _bsRight, &parent) ; if (!(parent && (obj = bsCreate (parent)) && bsFindKey2 (obj, str2tag("S_Child"), key, 0) && bsGetData (obj, _bsRight, _Int, &z1) && bsGetData (obj, _bsRight, _Int, &z2))) { if (obj) bsDestroy (obj) ; #ifndef ACEDB4 return NULL; #else /* accept Source not under S_Parent temporarily */ obj = bsCreate (key) ; parent = 0; bsGetKey (obj, str2tag("Source"), &parent); bsDestroy (obj) ; if (!(parent && (obj = bsCreate (parent)) && bsFindKey (obj, str2tag("Subsequence"), key) && bsGetData (obj, _bsRight, _Int, &z1) && bsGetData (obj, _bsRight, _Int, &z2))) { if (obj) bsDestroy (obj) ; return NULL; } #endif /* ACEDB4 */ } bsPushObj (obj) ; /* go into #SMap_info */ if (!(map = sMapLocalMap (obj, z1, z2, 0))) { SMapMap *m ; map = arrayCreate (1, SMapMap) ; m = arrayp (map, 0, SMapMap) ; m->r1 = z1 ; m->r2 = z2 ; m->s1 = 1 ; m->s2 = (z2 > z1) ? (z2 - z1 + 1) : (z1 - z2 + 1) ; } bsDestroy (obj) ; #ifdef ACEDB4 /* This routine maps from child coords back into parent coords. If the child has Source_exons tags we need that mapping too. */ { Array temp, exonMap; if ((exonMap = getExonMap(key))) { temp = sMapComposeMaps(exonMap, map, 0); arrayDestroy(map); arrayDestroy(exonMap); map = temp; } } #endif if (kp) *kp = parent; return map; } BOOL sMapEnclosedBy (KEY key, int x1, int x2, KEY *kp, int *y1, int *y2) /* finds next enclosing sequence, with coords in that sequence */ { Array map; KEY parent; SMapStatus status; struct SMapMapReturn ret; OBJ obj; /* Is there an enclosing object (i.e. parent) ?? */ if (hasNoParent(key)) return FALSE; if (!(obj = bsCreate(key))) return FALSE; if (!(map = mapToParent(key, obj, &parent))) { bsDestroy(obj); return FALSE; } /* We only fail if there is no overlap at all, otherwise we return coords */ /* that may be clipped. */ status = sMapMapEx (map, x1, x2, &ret); arrayDestroy (map) ; bsDestroy(obj); if (status & SMAP_STATUS_NO_OVERLAP) return FALSE ; else { if (kp) *kp = parent ; if (y1) *y1 = ret.y1 ; if (y2) *y2 = ret.y2 ; } return TRUE ; } /*****************/ /* I'm coding this so that if will return FALSE if the coords are completely */ /* outside, or there is something else wrong with the object, like no */ /* coord info or negative coords. */ /* NOTE: we rely on sMapLength() not calling us, otherwise there will be */ /* recursion... */ /* */ BOOL sMapTreeRoot(KEY key, int x1, int x2, KEY *kp, int *y1, int *y2) { BOOL result = FALSE ; static Associator hashSet = NULL ; KEY k = KEY_UNDEFINED ; int k1 = 0, k2 = 0 ; int length; if (iskey(key) != 2 || (x1 < 0 || x2 < 0)) return FALSE ; /* need non-zero kp, y1, y2 */ if (!kp) kp = &k ; if (!y1) y1 = &k1 ; if (!y2) y2 = &k2 ; if (sMapEnclosedBy (key, x1, x2, kp, y1, y2)) { /* Enclosing parent so look for coords in that parent. */ /* use hashSet to break loops in object tree. */ hashSet = assReCreate (hashSet) ; assInsert (hashSet, assVoid(key), assVoid(1)) ; while (assInsert (hashSet, assVoid(*kp), assVoid(1)) && sMapEnclosedBy(*kp, *y1, *y2, kp, y1, y2)) ; result = TRUE ; } else if ((length = sMapLength(key)) && (x1 > 0 || x2 > 0) && (x1 <= length || x2 <= length)) { /* No enclosing object/parent so return key + coords of this object. */ /* In some ways we should really do an sMapMap() here for purity but */ /* perhaps this short cut is acceptable. */ *kp = key ; if (x1 < x2 && x1 == 0) { *y1 = length ; *y2 = x2 ; } else if (x2 < x1 && x2 == 0) { *y1 = x1 ; *y2 = length ; } else if (x1 <= x2) { if (x1 < 0) *y1 = 1 ; else *y1 = x1 ; if (x2 > length) *y2 = length ; else *y2 = x2 ; } else /* x2 <= x1 */ { if (x2 < 0) *y2 = 1 ; else *y2 = x2 ; if (x1 > length) *y1 = length ; else *y1 = x1 ; } result = TRUE ; } return result ; } /*****************/ /* try various strategies to find the sequence length of an object * could do all and check consistency? * NOTE: You should not call sMapTreeRoot() from here either directly or indirectly * othewise there will be recursion in a run away sense.....*/ int sMapLength (KEY key) { static Array units = 0 ; int i, max = 0 ; OBJ obj = NULL ; Array map; units = arrayReCreate (units, 48, BSunit) ; if (!bIndexTag (key, str2tag("DNA")) && !bIndexTag (key, str2tag("S_Parent")) && !bIndexTag (key, str2tag("S_Child")) #ifdef ACEDB4 && !bIndexTag (key, str2tag("Source")) && !bIndexTag (key, str2tag("Subsequence")) #endif ) { max = 0 ; } else if (!(obj = bsCreate (key))) { max = 0 ; } else if (bsGetKey (obj, str2tag("DNA"), 0) && bsGetData (obj, _bsRight, _Int, &max)) { /* check first if it has its own DNA */ max = max ; } else if ((map = mapToParent(key, obj, NULL))) { /* next if S_parent */ struct SMapMapReturn ret; if (!(sMapMapEx (map, 1, 0, &ret) & SMAP_STATUS_NO_OVERLAP)) max = (ret.nx2 > ret.nx1) ? (ret.nx2 - ret.nx1 + 1) : (ret.nx1 - ret.nx2 + 1) ; arrayDestroy(map); } else /* Note, there may be a mixture if S_Child and Subsequence tags, it is vital to check both, do not ignore the Subsequences if S_Childs exist. */ { if (bsGetArray (obj, str2tag("S_Child"), units, 4)) { /* next if S_children */ for (i = 2 ; i < arrayMax (units) ; i += 4) { if (arr(units,i,BSunit).i > max) max = arr(units,i,BSunit).i ; if (arr(units,i+1,BSunit).i > max) max = arr(units,i+1,BSunit).i ; } } #ifdef ACEDB4 if (bsGetArray (obj, str2tag("Subsequence"), units, 3)) { /* or Subsequence in ACEDB 4 */ for (i = 1 ; i < arrayMax (units) ; i += 3) { if (arr(units,i,BSunit).i > max) max = arr(units,i,BSunit).i ; if (arr(units,i+1,BSunit).i > max) max = arr(units,i+1,BSunit).i ; } } #endif /* ACEDB4 */ } bsDestroy (obj) ; return max ; } #ifdef ACEDB4 /***********************************************************/ /******* a utility needed for Source_exons sorting *********/ static int dnaExonsOrder (void *a, void *b) { ExonStruct *ea = (ExonStruct *)a ; ExonStruct *eb = (ExonStruct *)b ; return ea->x - eb->x ; } static void exonsSort (Array a) { /* can't just arraySort, because a is in BSunits, not pairs of BSunits */ int i, n ; Array b = 0 ; n = arrayMax(a) / 2 ; b = arrayCreate (n, ExonStruct) ; for (i = 0 ; i < n ; ++i) { arrayp(b,i,ExonStruct)->x = arr(a,2*i,BSunit).i ; arrp(b,i,ExonStruct)->y = arr(a,2*i+1,BSunit).i ; } arraySort (b, dnaExonsOrder) ; for (i = 0 ; i < n ; ++i) { arr(a,2*i,BSunit).i = arrp(b,i,ExonStruct)->x ; arr(a,2*i+1,BSunit).i = arrp(b,i,ExonStruct)->y ; } arrayDestroy (b) ; } static Array getExonMap(KEY key) { Array u, exonMap = 0; OBJ obj = 0 ; int x, i ; SMapMap *m; u = arrayCreate (16, BSunit) ; if (bIndexTag (key, str2tag("Source_exons")) && (obj = bsCreate(key)) && bsGetArray (obj, str2tag("Source_exons"), u, 2)) { exonsSort (u) ; exonMap = arrayCreate (arrayMax(u)/2, SMapMap) ; x = 1 ; for (i = 0 ; i < arrayMax(u) ; i += 2) { m = arrayp (exonMap, i/2, SMapMap) ; m->r1 = arr(u,i,BSunit).i ; m->r2 = arr(u,i+1,BSunit).i ; m->s1 = x ; m->s2 = x + (m->r2 - m->r1) ; x += m->r2 - m->r1 + 1 ; } } if (obj) bsDestroy (obj) ; arrayDestroy(u); return exonMap; } #endif /* Some small internal routines to encapsulate some tests etc. */ /* Is this object at the top of a hierachy (i.e. has no parent tags) ? */ /* */ static BOOL hasNoParent(KEY key) { BOOL result = FALSE ; if (bIndexTag(key, str2tag("S_Parent"))) result = FALSE ; else result = TRUE ; #ifdef ACEDB4 /* Objects either have S_Parent or Source, not both... */ if (result == TRUE) { if (bIndexTag(key, str2tag("Source"))) result = FALSE ; else result = TRUE ; } #endif return result ; } static void sMapDumpInfo(SMapKeyInfo *info, ACEOUT dest, int offset) { int i; aceOutPrint(dest, "\n"); for (i=0; ikey), sMapIsReverse(info, 1, 1) ? "Up" : "Down"); if (info->map) { aceOutPrint(dest, " map:"); for (i=0; imap); i++) { SMapMap *m = arrayp(info->map, i, SMapMap); aceOutPrint(dest, "%d,%d->%d,%d ", m->s1, m->s2, m->r1, m->r2); } } if (info->mismatch) { aceOutPrint(dest," mis:"); for (i=0; imap); i++) { SMapMismatch *m = arrayp(info->mismatch, i, SMapMismatch); aceOutPrint(dest, "%d,%d ", m->s1, m->s2); } } if (info->children) for (i=0; ichildren); i++) { SMapKeyInfo *inf = array(info->children, i, SMapKeyInfo *); sMapDumpInfo(inf, dest, offset+3); } } void sMapDump(SMap *smap, ACEOUT dest) { KEY rootkey; int i, k, k1, start, rx1, rx2 ; char *cp, *cq ; char buffer [8000] ; Array dna; if (!sMapTreeRoot(smap->root->key, 1, 0, &rootkey, &rx1, &rx2)) aceOutPrint(dest, "NO ROOT\n"); else aceOutPrint(dest, "Root: %s [%d,%d]\n", name(rootkey), rx1, rx2); aceOutPrint(dest, "sMapLength = %d\n", sMapLength(smap->root->key)); sMapDumpInfo(smap->root, dest, 0); aceOutPrint(dest, "\n\n"); if (!(dna = sMapDNA(smap, 0, NULL))) aceOutPrint(dest, "NO DNA\n"); else { if ((i = smap->length) > 1000) { aceOutPrint(dest, "Length = %d TRUNCATED\n", i); i = 1000; } cp = arrp(dna,0,char) ; cq = buffer ; while(i > 0) { cq = buffer ; for (k=0 ; k < 4000/(50 + 3) ; k++) if (i > 0) { start = cp -arrp(dna,0,char) +1; cq += sprintf(cq, "%5d ", start); k1 = 50 ; while (k1-- && i--) *cq++ = dnaDecodeChar[*cp++ & 0xff] ; *cq++ = '\n' ; *cq = 0 ; } aceOutPrint(dest, "%s", buffer) ; } arrayDestroy(dna); } aceOutPrint(dest, "\n\n"); } /*********************** end of file ************************/