/* Last edited: Sep 20 15:37 2001 (edgrif) */ /* fex for "Feature EXchange" */ /** this is the interface as seen by the C code in the client program **/ /** in some places the CORBA layer will be need to be a little different **/ /*********** basic types ************/ typedef enum { NO_STRAND = 0, DOWN_STRAND, UP_STRAND } FexStrand ; typedef enum { NO_PHASE = 0, PHASE_0, PHASE_1, PHASE_2 } FexPhase ; typedef enum { BASIC_FEATURE, HOMOL_FEATURE, EXON_FEATURE, TRANSCRIPT_FEATURE, VARIATION_FEATURE, INTRON_FEATURE, BOUNDARY_FEATURE, ALL_FEATURES } FexFeatureType ; /* ALL_FEATURES is for fexAvailableFeatureSets() */ typedef enum { N_HOMOL, X_HOMOL, TX_HOMOL } FexHomolType ; /* as in BLAST*, i.e. target is DNA, Protein, DNA translated */ typedef enum { CLONE_END, 5_SPLICE, 3_SPLICE } FexBoundaryType ; typedef enum { OK_STATUS = 0, NO_WRITE_ACCESS, ILLEGAL_FEATURE, ILLEGAL_ATTRIBUTE } FexStatus ; /*********** opaque types for server handles ************/ typedef struct DatabaseStruct *FexDB ; typedef struct ContextStruct *FexContext ; /* should cache DB handle */ typedef unsigned int FexID ; /*********** local types corresponding to CORBA structs ***********/ /* the following is used to store alignment gap information */ typedef struct { int s1, s2 ; /* coords in input space */ int r1, r2 ; /* coords in output space */ } FexAlignBlock ; /* this is identical to SMapMap in smap.h */ /* Big decision: The primary Feature object is a union, not set of subclasses inheriting from a core class. This simplifies and helps make transfer protocols efficient for C in particular. Problems are: Efficiency: core feature is 26 bytes assuming 1 byte per enum. The biggest union components add 18 extra bytes, and a high fraction of the features (e.g. all homols) will need the max anyway. It doesn't seem worth separating the data structures and duplicating lots of calls, requiring switches etc. Elegance: in the eye of the beholder, but we'll try to keep it clean. */ typedef struct { int x1, x2 ; /* start, end */ FexStrand strand ; FexPhase phase ; float score ; char *text ; FexID id ; union { struct { int y1, y2 ; /* targetStart, targetEnd */ FexStrand targetStrand ; FexPhase targetPhase ; /* for TX_HOMOL */ Array align ; /* of AlignBlock */ /* if align is null, then ungapped */ } homol ; struct { int cdsStart, cdsEnd ; FexPhase cdsPhase ; /* = NO_PHASE unless startNotFound */ char endNotFound ; Array exons ; /* of Feature */ } transcript ; struct { FexID transcriptID ; } exon ; } ; /* does C require an identifier here, or can I just refer to feat.homol.y1? */ } Feature ; /****************/ typedef struct { char *category ; /* GFF_feature, BioPerl primary_tag */ char *source ; /* GFF_source = ?? */ BOOL isStrandSensitive ; BOOL isFrameSensitive ; /* phase becomes important */ BOOL isScore ; FexFeatureType type ; FexHomolType homolType ; /* for Homols only */ char *targetDataSource ; /* for Homols only */ FexBoundaryType boundType ; /* for Boundaries only */ BOOL isWritable ; Array legalAttributeTags ; /* of char* */ } FexFeatureSetDataInfo ; /* We separated this out from FeatureSet, so can get information on where to find the sequence from other sources than the database. I guess the fetch thing should also be accessed via URLget, and we should set up interfaces for that. */ typedef struct { int fcol ; /* foreground colour - line and surround colour */ int bcol ; /* background colour - fill colour */ /* currently there is a CDS colour and an upStrand colour RD thinks the upStrand colour is never used and can be the same and that CDS can be shown by non-coding being half-width */ float right_priority ; enum { BY_WIDTH, BY_OFFSET, BY_HISTOGRAM } drawStyle ; enum { OVERLAP, BUMP, CLUSTER } overlapStyle ; float width ; /* column width */ float min_mag, max_mag ; float min_score, max_score ; BOOL showText ; BOOL showUpStrand ; } FexFeatureSetDrawInfo ; typedef struct { char *urlFormat ; /* printf style format with %s for target name */ char *fetchFormat ; /* similar format, but to fetch sequence */ char *removePrefix ; /* prefix to remove from names before substituting in formats */ /* Apollo has something good here that is a bit more powerful for good reasons */ } FexTargetDataSource ; /**********************************************/ FexDB fexOpenDatabase (char *locator, char *user, char *password) ; void fexCloseDatabase (FexDB db) ; BOOL fexCheckWriteAccess (FexDB db) ; BOOL fexSetWriteAccess (FexDB db, BOOL access) ; FexContext fexContext (FexDB db, char* seqName, int start, int end) ; /* end = 0 for end_of_sequence, so (start, end) = (1,0) for whole sequence */ void fexReleaseContext (FexContext context) ; char* fexDNA (FexContext context) ; /* returns IUPAC plus '-' for blank */ Array fexAvailableFeatureSets (FexContext context, FeatureType type) ; /* Array of char* of feature names */ Array fexGetFeatures (FexContext context, char *fsetName) ; /* Array of Feature structs */ FexFeatureSetDataInfo *fexFeatureSetDataInfo (FexDB db, char *fsetName) ; FexFeatureSetDrawInfo *fexFeatureSetDrawInfo (FexDB db, char *fsetName) ; FexTargetDataSource *fexTargetDataSource (FexDB db, char* dataSource) ; /**********************************************/ /***************** write back *****************/ /* In the following, attributes is additional textual information in attribute/value per line format (as for .ace files). If you want to communicate gene_name hints/info back and forth with the server you can use this. Also attributes to be assigned to the gene. The client will just neutrally put these up and allow them to be edited, deleted and added to, and passed back to the server. The attributeTagList field of FexFeatureSetDataInfo lists the fields allowed for each feature set. */ typedef struct { char *tag ; char *value ; } Attribute ; FexStatus fexNewFeature (FexDB db, char *fsetName, FexID *dbidPtr, Feature *feature, Array attributes) ; FexStatus fexReplaceFeature (FexDB db, char *fsetName, FexID dbid, Feature *feature, Array attributes) ; /* if feature is null, just replace attributes and vice versa */ FexStatus fexKillFeature (FexDB db, FexID dbid, Array attributes) ; /* may want to change attributes because "kill" will only archive, depending on server behaviour */ /* need also */ Array fexGetAttributes (FexDB db, FexID dbid) ; /* of FexAttribute */ /* Note there is no way to write a gene as such, nor a naked exon. 010811 RD: Current proposal is as follows Within an fset: In the interface, a "curate" action on a transcript does the followring: - if identical to a curated transcript then killTranscript or editAttributes - else options are storeTranscript, or replaceTranscript for each that it overlaps. If you store a transcript not overlapping any genes it creates a new gene. On the server side, an idea to explore would be that if this new gene overlaps one or more history genes then it is marked as having split from the most recent that it overlaps. If you store a transcript overlapping only transcripts from one gene, then you can either replace one of those transcripts or create a new one. If you store a transcript that overlaps multiple current genes then it must replace one of the existing transcripts. This is because the new transcript will merge all the genes covered, and the requirement to replace an existing transcript allows us to determine which gene the resulting merge becomes. Similarly if you were allowed to kill a transcript that acts as a bridge, hence creating new genes by splitting, it would not be clear which child should keep the parent's name. So I plan to ban such kills, requiring a replace action to remove the bridging transcript, so it is clear which child inherits the name (the one containing the replaced transcript). Of course all these restrictions would go if the gene identifier model was that the products of merges and splits all had new identifiers. So I guess the restrictions listed above should be a client option, maybe settable via a server. Or perhaps should be rules the server uses to reject a curate event. */ /******************* end of file ***********************/