/* File: smap.h * Author: Richard Durbin (rd@sanger.ac.uk) * Copyright (c) J Thierry-Mieg and R Durbin, 1999 * ------------------------------------------------------------------- * Acedb is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * or see the on-line version at http://www.gnu.org/copyleft/gpl.txt * ------------------------------------------------------------------- * This file is part of the ACEDB genome database package, written by * Richard Durbin (Sanger Centre, UK) rd@sanger.ac.uk, and * Jean Thierry-Mieg (CRBM du CNRS, France) mieg@kaa.crbm.cnrs-mop.fr * * Description: The sMap interface. A new set of database tags and * code to construct virtual sequences. The code handles * gaps, mismatches and much else. * You should note that sMap coords are 1-based, i.e. * sequences run from 1 -> length. This means that for * code such as much of fMap you need to convert to and * fro from fMaps 0-based coords. * HISTORY: * Last edited: Apr 8 13:42 2003 (edgrif) * Created: Wed Jul 28 22:11:39 1999 (rd) * CVS info: $Id: smap.h,v 1.27 2003/04/08 12:44:56 edgrif Exp $ *------------------------------------------------------------------- */ #ifndef DEF_SMAP_H #define DEF_SMAP_H #define USE_SMAP /* define this to use new smap code */ #include #include typedef struct { int s1, s2 ; /* coords in input space */ int r1, r2 ; /* coords in output space */ } SMapMap ; typedef struct SMapStruct SMap ; /* Recursively builds map down from key between start and stop inclusive in key's coordinate system. The idea is that the aqlCondition is applied to test whether to open each object, but this is not implemented yet. NB x2 can not be 0 here: must explicitly get length first. */ SMap* sMapCreate (STORE_HANDLE handle, KEY key, int x1, int x2, char *aqlCondition) ; /* area1 and area2 are limits in maps coordinate system. (ie 1 based ) Only objects which overlap area1 to area2 are included, (but they are not truncated to that area if they partialy overlap). DNA is only returned from area1 to area2 The first base if DNA is at coordinate area1 */ SMap* sMapCreateEx (STORE_HANDLE handle, KEY key, int x1, int x2, int area1, int area2, char *aqlCondition) ; Array sMapLocalMap (OBJ obj, /* in parent located at #SmapInfo */ int pStart, /* start coord in parent */ int pEnd, /* end coord in parent */ STORE_HANDLE h); /* return array on this. */ void sMapDestroy (SMap* smap) ; int sMapMax (SMap* smap) ; /* sMapLength() used below for a key */ /* * basic coordinate conversion routines */ /* Converts interval x1..x2 in key's coordinate system to y1..y2 in smap's. Returns TRUE if succeeds, FALSE if e.g. key not known in smap, or both x1, x2 out of range (on the same side), or both fall in the same gap in the map. If part of the interval is out of range then it clips. */ BOOL sMapConvert (SMap* smap, KEY key, int x1, int x2, int *y1, int *y2) ; /* For a given span find the sequence object that contains that span * without gaps. If sMapSpanPredicate is non-NULL, it gets used to filter the output. */ typedef BOOL (*sMapSpanPredicate)(KEY key); BOOL sMapFindSpan(SMap *smap, KEY *keyp, int *start, int *end, sMapSpanPredicate) ; /* The next section provides more complex support for coordinate conversion. */ /* These are called by sMapConvert(), so are also more efficient. */ typedef struct SMapKeyInfoStruct SMapKeyInfo ; SMapKeyInfo* sMapKeyInfo (SMap* smap, KEY key) ; /* gives handle for use with SMapMap */ /* SMap status can be one of: * * SMAP_STATUS_PERFECT_MAP for clean map. * SMAP_STATUS_ERROR for serious error, e.g. invalid args. * or some combination of other bits indicating mapping result. * * For NO_OVERLAP_EXTERNAL and CLIP the bit settings indicate which end of the * input coords the no overlap or clip happens. */ typedef unsigned int SMapStatus ; #define SMAP_STATUS_PERFECT_MAP 0x0000U /* Perfect map without gaps. */ #define SMAP_STATUS_ERROR 0x0001U /* No map, serious problem e.g. invalid args. */ #define SMAP_STATUS_INTERNAL_GAPS 0x0002U /* map with gap(s) within alignment */ #define SMAP_STATUS_OUTSIDE_AREA 0x0004U /* Mapped, but not in area. */ #define SMAP_STATUS_X1_NO_OVERLAP_EXTERNAL 0x0010U /* failure: [x1,x2] outside map range at x1 end. */ #define SMAP_STATUS_X2_NO_OVERLAP_EXTERNAL 0x0020U /* failure: [x1,x2] outside map range at x2 end. */ #define SMAP_STATUS_NO_OVERLAP_EXTERNAL 0x0030U /* either of the above. */ #define SMAP_STATUS_NO_OVERLAP_INTERNAL 0x0040U /* failure: [x1,x2] within a gap in map */ #define SMAP_STATUS_NO_OVERLAP 0x0070U /* any of the above: y1, y2 unchanged */ #define SMAP_STATUS_X1_EXTERNAL_CLIP 0x0100U /* x1 off end of map */ #define SMAP_STATUS_X1_INTERNAL_CLIP 0x0200U /* x1 in internal gap in map */ #define SMAP_STATUS_X1_CLIP 0x0300U /* either of the above: implies *nx1 != x1 */ #define SMAP_STATUS_X2_EXTERNAL_CLIP 0x0400U /* etc. */ #define SMAP_STATUS_X2_INTERNAL_CLIP 0x0800U #define SMAP_STATUS_X2_CLIP 0x0c00U #define SMAP_STATUS_CLIP 0x0f00U /* Is a flag set with no error signalled ? */ #define SMAP_STATUS_SET(STATUS, STATUS_FLAG) \ ( ((STATUS) & (STATUS_FLAG)) && ((STATUS) != SMAP_STATUS_ERROR) ) /* Most common tests are for overlap or for overlap + in area. */ #define SMAP_STATUS_OVERLAP(STATUS) \ ( (!((STATUS) & SMAP_STATUS_NO_OVERLAP)) && ((STATUS) != SMAP_STATUS_ERROR) ) #define SMAP_STATUS_INAREA(STATUS) \ ( (!((STATUS) & SMAP_STATUS_NO_OVERLAP)) && (!((STATUS) & SMAP_STATUS_OUTSIDE_AREA)) \ && ((STATUS) != SMAP_STATUS_ERROR) ) /* Return human readable error as static string. Note that at most one bit should be set in status, if more than one is set in your result, call this multiple times and cat the return values to taste. May return NULL for unused bits. */ char *sMapErrorString(SMapStatus status); /* x1, x2, y1, y2 as sMapConvert() */ /* return value is status as above */ /* nx1, nx2 are the clipped values that actually map to y1, y2 - for code simplicity these are always set, even if NO_OVERLAP status is returned (in which case they are x1, x2) */ SMapStatus sMapMap (SMapKeyInfo* info, int x1, int x2, int *y1, int *y2, int *nx1, int *nx2) ; /* as for sMapMap(), but maps in the reverse direction */ SMapStatus sMapInverseMap (SMapKeyInfo* info, int x1, int x2, int *y1, int *y2, int *nx1, int *nx2) ; #ifdef ACEDB4 /* As for sMapMap, but for objects with source exons, the conversion is from the unspliced co-ordinate system. Converts interval x1..x2 in key's coordinate system to y1..y2 in smap's. Returns SMapStatus as above, note well that it will clip coords so you may end up with the returned y1,y2 being identical. Beware that this call is _more costly than sMapMap(). */ SMapStatus sMapUnsplicedMap(SMapKeyInfo* info, int x1, int x2, int *y1, int *y2, int *nx1, int *nx2) ; #endif /* Is the mapping from this key to the root on the reverse strand. True means that for mapping from x1,x2 to y1,y2 gives y1>y2 when x1