/* @Source edamclean application ** ** Validate and fix EDAM OBO ontology ** ** @author: Copyright (C) Jon Ison (jison@ebi.ac.uk) ** @@ ** ** This program is free software; you can redistribute it and/or ** modify it under the terms of the GNU General Public License ** as published by the Free Software Foundation; either version 2 ** of the License, or (at your option) any later version. ** ** This program is distributed in the hope that it will be useful, ** but WITHOUT ANY WARRANTY; without even the implied warranty of ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ** GNU General Public License for more details. ** ** You should have received a copy of the GNU General Public License ** along with this program; if not, write to the Free Software ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ** ******************************************************************************* ** ** EDAMCLEAN documentation ** See http://wwww.emboss.org ** ** Please cite the authors and EMBOSS. ** ** Email jison@ebi.ac.uk. ** ** ** edamclean reads EDAM (OBO format file), validates the file syntax, writes a ** report from parsing and (optionally) fixes the term numbering and ** relations. ** It has optional modes of operation: ** 1. Report only ** 2. Renumber terms ** 3. Fix relations ** 4. Output PURL XML (single file) ** 5. Output PURL XML (one file / term) ** ** 1. Report only ** Write an informative report from parsing but do not change the file. ** The following checks are performed: ** i. All ids in the file are unique ** ii. All term names within a namespace are unique ** iii. All values after namespace: are valid (see below). ** iv. All field names are valid; either a standard OBO field, a relation ** or a token that must be ignored (see below). ** v. All terms have the following fields in the order specified ** (optional fields are in parenthesis): ** id, name, namespace, def, (comment), (synonym), is_a ** vi. Terms in specific namespaces have all mandatory relations defined ** and do not have disallowed relations. See "Rules" below. ** vii. End-points (term names) of all relations exist. See "Rules" below. ** This includes checking for mismatches between term id and name ** (in comment) in relations lines ** viii. All id: lines have the format: id: EDAM:0000000 ** ix. All def: lines have the format: def: "Some text." ** [EDAM:EBI "EMBRACE definition"] ** x. All relation lines have the format: RelationName: EDAM:0000000 ** ! Term name ** xi. All comment: values are *not* in quotes ("") ** xii. All synonym: values are *not* in quotes ("") ** ** 2. Renumber terms ** Write a report as above. ** Renumber all terms so that they have unique ids, starting with ** EDAM:0000000 ** for the first term in the file and increasing by 1 thereon. ** ** 3. Fix relations ** Write a report as above. If no errors reported, correct term ids used ** in all relations fields. ** ** 4. Output PURL XML (single file) ** Write a report as above. Then write XML output for term submission to PURL.org ** ** ** - ** ** jon ** ** ** ** ** ** ** 5. Output PURL XML (one file / term) ** As option 4. above, but write a single XML file per term to the specified directory. ** ** ** edamclean parameters: ** Name of OBO format file (input) ** Name of OBO format file (output) ** Name of report file (output) ** Boolean (whether to fix the output file) ** ** ** Standard OBO fields ** id: ** name: ** namespace: ** def: ** comment: ** synonym: ** xref: ** is_obsolete: ** consider: ** ** Relations ** is_a ** has_part ** is_part_of ** concerns ** is_concern_of ** has_input ** is_input_of ** has_output ** is_output_of ** has_source ** is_source_of ** has_identifier ** is_identifier_of ** has_attribute ** is_attribute_of ** has_format ** is_format_of ** ** Namespace ** entity ** topic ** operation ** resource ** data ** format ** ** ** Tokens to ignore ** Lines beginning with the following tokens are not parsed and are ** preserved as-is in the output: ** ! ** format-version ** date ** data-version ** xref ** ** ** Rules ** Rules for which term types (rules in a namespace) may or must be related ** to which other term types are described under "Rules by term type" in ** the EDAM on-line documentation. ** See http://www.ebi.ac.uk/~jison/edam.html#6.1. ** ** ** Notes ** Typedef definitions are *not* validated and are preserved as-is in the ** output. ** ** Important! ** 1. The program should not be run in modes 2 or 3 (ie. generate an EDAM ** output file) until all reported problems (from mode 1) have been fixed ** by hand - *except* "Non-unique id" errors! Results are undefined otherwise. ** 2. All [Term] definitions in the input file *must* appear before the ** first [Typedef] definition - terms appearing after are *not* validated ** fully. ** ** Known Issues ** edamclean does not detect the fact that the root term of each branch does ** not need to have an is_a relation. Disregard the messages in the lof file ** to that effect (this could fairly easily be fixed). ** ** edamclean will identify (and warn about) identical term names in cases ** where one of the terms has been made obsolete. Arguably this is the ** the correct behaviour. ** ** The code that checks for "field in wrong order" does not make all the checks ** it might, e.g. does not check whether other relations appear before is_a. ** ** It does not check for multiple (erroneous) comment: lines ** ** It does not suppress (irrelevant) error messages for obsolete terms. ** ** It does not check for duplicate relationships (where these are not allowed) ** e.g. an exact duplication of a relationship line. ******************************************************************************/ #include "emboss.h" /****************************************************************************** ** ** GLOBAL VARIABLES ** ******************************************************************************/ #define NFIELDS 10 static const char *FIELDS[NFIELDS] = { "id:", "name:", "namespace:", "def:", "comment:", "synonym:", "xref:", "is_obsolete:", "consider:", "relationship:" }; #define NRELATIONS 18 /* 'consider' field is treated as a relation */ static const char *RELATIONS[NRELATIONS] = { "is_a:", "has_part:", "is_part_of:", "concerns:", "is_concern_of:", "has_input:", "is_input_of:", "has_output:", "is_output_of:", "has_source:", "is_source_of:", "has_identifier:", "is_identifier_of:", "has_attribute:", "is_attribute_of:", "has_format:", "is_format_of:", "consider:" }; #define NNAMESPACES 6 static const char *NAMESPACES[NNAMESPACES] = { "entity", "topic", "operation", "resource", "data", "format" }; enum _namespace { entity, topic, operation, resource, data, format }; #define NOTHER 12 static const char *OTHER[NOTHER] = { "!", "format-version:", "date:", "data-version:", "subsetdef:", "[Term]", "[Typedef]", "inverse_of:", "is_anti_symmetric:", "is_cyclic:", "is_transitive:", "\0" /* NULL string to allow empty lines */ }; /****************************************************************************** ** ** DATA STRUCTURES ** ******************************************************************************/ /* @datastatic PTerm ******************************************************* ** ** Term object ** Holds name and identifier of a single EDAM term ** ** @alias STerm ** @alias OTerm ** ** @attr name [AjPStr] Name of term ** @attr id [AjPStr] Id of term ** @attr line [ajint] Line number of name: field for the term ** @attr Padding [ajint] Padding to alignment boundary ******************************************************************************/ typedef struct STerm { AjPStr name; AjPStr id; ajint line; ajint Padding; } OTerm; #define PTerm OTerm* /* @datastatic PNamespace ***************************************************** ** ** Namespace object ** Holds name and array of terms for an EDAM namespace. ** Only one copy of the terms is kept in memory (list holds pointers only) ** ** @alias SNamespace ** @alias ONamespace ** ** @attr name [AjPStr] Name of namespace ** @attr terms [PTerm*] Array of terms ** @attr list [AjPList] List of terms* ** @attr n [ajint] Size of array / list ** @attr Padding [ajint] Padding to alignment boundary *****************************************************************************/ typedef struct SNamespace { AjPStr name; PTerm *terms; AjPList list; ajint n; ajint Padding; } ONamespace; #define PNamespace ONamespace* /****************************************************************************** ** ** PROTOTYPES ** ******************************************************************************/ static PTerm ajTermNew(void); static PNamespace ajNamespaceNew(void); static void ajTermDel(PTerm *P); static void ajNamespaceDel(PNamespace *P); static const AjPStr FindTerm(ajint namespace, const AjPStr termname, PNamespace *namespaces); /****************************************************************************** ** ** FUNCTIONS ** ******************************************************************************/ /* @funcstatic ajTermNew *************************************************** ** ** Term constructor ** ** @return [PTerm] New object ** @@ ******************************************************************************/ static PTerm ajTermNew(void) { PTerm ret; AJNEW0(ret); ret->name = ajStrNew(); ret->id = ajStrNew(); ret->line = 0; return ret; } /* @funcstatic ajNamespaceNew ************************************************ ** ** Namespace constructor ** The array is *not* allocated. ** ** @return [PNamespace] New object ** @@ ******************************************************************************/ static PNamespace ajNamespaceNew(void) { PNamespace ret; AJNEW0(ret); ret->name = ajStrNew(); ret->terms = NULL; ret->list = ajListstrNew(); ret->n = 0; return ret; } /* @funcstatic ajTermDel *************************************************** ** ** Term destructor ** ** @param [d] P [PTerm*] Term object to delete ** @return [void] ** @@ ******************************************************************************/ static void ajTermDel(PTerm *P) { if(!P) ajFatal("Null arg error 1 in ajTermDel"); else if(!(*P)) ajFatal("Null arg error 2 in ajTermDel"); ajStrDel(&(*P)->name); ajStrDel(&(*P)->id); AJFREE(*P); *P=NULL; return; } /* @funcstatic ajNamespaceDel ************************************************ ** ** Namespace destructor ** ** @param [d] P [PNamespace*] Namespace object to delete ** @return [void] ** @@ ******************************************************************************/ static void ajNamespaceDel(PNamespace *P) { int i; if(!P) ajFatal("Null arg error 1 in ajNamespaceDel"); else if(!(*P)) ajFatal("Null arg error 2 in ajNamespaceDel"); ajStrDel(&(*P)->name); if((*P)->n) { for(i=0;i<(*P)->n;i++) ajTermDel(&(*P)->terms[i]); AJFREE((*P)->terms); } ajListstrFree(&(*P)->list); AJFREE(*P); *P=NULL; return; } /* @funcstatic FindTerm *************************************************** ** ** Finds a term within a namespace index and returns its identifier in the ** namespace array. ** ** Returns NULL if term is not found ** ** @param [r] namespace [ajint] Namespace index as integer ** @param [r] termname [const AjPStr] Name of term ** @param [u] namespaces [PNamespace*] Array of namespace objects ** @return [const AjPStr] Term identifier ** @@ ******************************************************************************/ static const AjPStr FindTerm(ajint namespace, const AjPStr termname, PNamespace *namespaces) { ajint x; if(!termname || !namespaces) ajFatal("Bad args to FindTerm"); for(x=0; xn; x++) if(ajStrMatchS(termname, namespaces[namespace]->terms[x]->name)) return namespaces[namespace]->terms[x]->id; return NULL; } /* @prog edamclean ******************************************************** ** ** Validate and fix EDAM OBO ontology ** *****************************************************************************/ int main(ajint argc, char **argv) { /* Variable declarations */ AjPFile inf_edam = NULL; /* Name of EDAM (input) file */ AjPFile ouf_edam = NULL; /* Name of EDAM (output) file */ AjPFile ouf_log = NULL; /* Name of report (output) file */ AjPFile ouf_xml = NULL; /* Name of XML (output) file */ AjPFile tmp_xml = NULL; /* Temp. XML (output) file */ AjPDirout xmloutdir = NULL; /* XML (output) file directory */ AjPStr mode = NULL; /* Mode of operation */ AjPList list_tmp = NULL; /* Temporary list */ AjPStr *fields = NULL; /* Array of valid tokens for first word in line */ ajint nfields = 0; /* Size of fields array */ AjPStr *ids = NULL; /* Array of all ids in file */ ajint nids = 0; /* Size of ids */ const AjPStr id = NULL; /* ID of a term */ AjPStr line = NULL; /* A line from the input file */ ajint linecnt = 0; /* Line number of line */ ajint termcnt = 0; /* Count of term definitions */ AjPStr tok = NULL; /* A token from line */ AjBool done = ajFalse; /* Housekeeping */ ajint x = 0; /* Housekeeping */ ajint y = 0; /* Housekeeping */ ajint z = 0; /* Housekeeping */ ajint idx = 0; /* Housekeeping */ AjPStr name = NULL; /* Name of a term */ AjPStr namespace = NULL; /* Namespace of a term */ AjPStr relation = NULL; /* Relationship name, e.g. "is_a" */ AjPStr tmp_name = NULL; /* Temp. name of a term */ AjPStr tmp_id = NULL; /* Temp. id of a term */ AjPStr tmp_str = NULL; /* Temp. string */ PTerm tmp_term = NULL; /* Temp. term pointer */ ajint tmp_line = 0; /* Temp. line number */ PNamespace namespaces[NNAMESPACES]; /* Array of namespace objects */ AjBool done_first = ajFalse; /* Housekeeping ... read first term */ AjBool first = ajFalse; /* Housekeeping ... on first term */ AjBool found_id = ajFalse; AjBool in_typedef = ajFalse; /* In a [Typedef] statement */ AjBool found_name = ajFalse; AjBool found_namespace = ajFalse; AjBool found_def = ajFalse; AjBool found_comment = ajFalse; AjBool found_synonym = ajFalse; AjBool found_xref = ajFalse; AjBool found_is_obsolete = ajFalse; AjBool found_consider = ajFalse; AjBool found_isa = ajFalse; AjBool found_concerns = ajFalse; AjBool found_is_concern_of = ajFalse; AjBool found_has_input = ajFalse; AjBool found_is_input_of = ajFalse; AjBool found_has_output = ajFalse; AjBool found_is_output_of = ajFalse; AjBool found_has_source = ajFalse; AjBool found_is_source_of = ajFalse; AjBool found_has_identifier = ajFalse; AjBool found_is_identifier_of = ajFalse; AjBool found_has_attribute = ajFalse; AjBool found_is_attribute_of = ajFalse; AjBool found_has_part = ajFalse; AjBool found_is_part_of = ajFalse; AjBool found_has_format = ajFalse; AjBool found_is_format_of = ajFalse; /* Read data from acd */ embInit("edamclean", argc, argv); /* ACD data handling */ inf_edam = ajAcdGetInfile("edaminfile"); ouf_edam = ajAcdGetOutfile("edamoutfile"); ouf_log = ajAcdGetOutfile("logfile"); ouf_xml = ajAcdGetOutfile("xmlfile"); xmloutdir = ajAcdGetOutdir("xmloutdir"); mode = ajAcdGetSelectSingle("mode"); /* taxdir = ajAcdGetDirectory("taxdirectory"); */ ajFmtPrint("MODE : %S\n", mode); /* ajTaxLoad(taxdir); ajOboParseObofile(inf_edam, "noidorder,nounkid"); ajFileSeek(inf_edam, 0, 0); embExit(); */ /* Memory allocation */ line = ajStrNew(); tok = ajStrNew(); name = ajStrNew(); namespace = ajStrNew(); relation = ajStrNew(); tmp_name = ajStrNew(); tmp_id = ajStrNew(); for(x=0; xname), NAMESPACES[x]); } /* Check for valid first tokens */ /* First, write array of valid tokens for first word in line */ list_tmp = ajListstrNew(); for(x=0; x\n"); } for(in_typedef=ajFalse; ajReadline(inf_edam, &line); ) { if(ajStrPrefixC(line, "[Typedef]")) in_typedef=ajTrue; else if(ajStrPrefixC(line, "[Term]")) in_typedef=ajFalse; if(in_typedef) continue; if(ajStrPrefixC(line, "namespace:")) { if(ajStrMatchC(mode, "Output PURL XML (one file / term)")) { if(!(tmp_xml=ajFileNewOutNameDirS(tmp_id, xmloutdir))) ajFatal("Could not create file"); else ajFmtPrintF(tmp_xml, "\n"); } ajStrAssignClear(&tok); ajFmtScanS(line, "%*s %S", &tok); ajStrRemoveWhite(&tok); ajFmtPrintF(tmp_xml, "\n" "\n" "jon\n" "\n" "\n" "\n", tok, tmp_id, tmp_id); if(ajStrMatchC(mode, "Output PURL XML (one file / term)")) { ajFmtPrintF(tmp_xml, "\n"); ajFileClose(&tmp_xml); } } if(ajStrPrefixC(line, "id:")) { ajStrParseC(line, ":"); ajStrParseC(NULL, ":"); ajStrAssignS(&tmp_id, ajStrParseC(NULL, ":")); ajStrRemoveWhite(&tmp_id); } } if(ajStrMatchC(mode, "Output PURL XML (single file)")) ajFmtPrintF(tmp_xml, "\n"); } exit(0); for(linecnt=0; ajReadline(inf_edam, &line); linecnt++) { ajStrAssignClear(&tok); ajFmtScanS(line, "%S", &tok); for(x=0, done=ajFalse;x1) { ajFmtPrintF(ouf_log, "Line %6d : Non-unique id: %S " "%S\n", linecnt+1, tok, line); break; } } } } } ajFmtPrintF(ouf_log, "\n\n"); ajFileSeek(inf_edam, 0, 0); /* Rewind file */ /* Check for mandatory fields / field order */ ajFmtPrintF(ouf_log, "8. STANDARD MANDATORY FIELDS AND FIELD ORDER\n"); for(in_typedef=ajFalse, first = ajTrue, done_first=ajFalse, linecnt=0; ajReadline(inf_edam, &line); linecnt++) { /* id, name, namespace, def, (comment), (synonym), is_a */ /* Stop checking once first [Typedef] line is found. */ /* if(ajStrPrefixC(line, "[Typedef]")) break; */ if(ajStrPrefixC(line, "[Typedef]")) in_typedef=ajTrue; if(ajStrPrefixC(line, "[Term]")) { in_typedef=ajFalse; /* Process previous term */ if(done_first) { if(!found_id) ajFmtPrintF(ouf_log, "Line %6d : No id: field in " "term\n", tmp_line); if(!found_name) ajFmtPrintF(ouf_log, "Line %6d : No name: field in " "term\n", tmp_line); if(!found_namespace) ajFmtPrintF(ouf_log, "Line %6d : No namespace: field in " "term\n", tmp_line); if(!found_def) ajFmtPrintF(ouf_log, "Line %6d : No def: field in " "term\n", tmp_line); /* No is_a needed for first term in file or for obsolete terms*/ if((!found_isa) && (!first) && (!found_is_obsolete)) ajFmtPrintF(ouf_log, "Line %6d : No is_a: field in " "term\n", tmp_line); first = ajFalse; /* entity */ if(ajStrMatchC(namespace, NAMESPACES[0])) { if(found_concerns || found_has_input || found_is_input_of || found_has_output || found_is_output_of || found_has_source || found_is_source_of || found_is_identifier_of || found_is_format_of || found_has_format || found_is_attribute_of) ajFmtPrintF(ouf_log, "Line %6d : Relation not allowed " "for term in this namespace\n", tmp_line); } /* topic */ else if(ajStrMatchC(namespace, NAMESPACES[1])) { if(!found_concerns) ajFmtPrintF(ouf_log, "Line %6d : No concerns: relation in term\n", tmp_line); if(found_is_concern_of || found_has_input || found_is_input_of || found_has_output || found_is_output_of || found_has_source || found_is_source_of || found_has_identifier || found_is_identifier_of || found_has_attribute || found_is_attribute_of || found_has_part || found_is_format_of || found_has_format || found_is_part_of) ajFmtPrintF(ouf_log, "Line %6d : Relation not allowed " "for term in this namespace\n", tmp_line); } /* operation */ else if(ajStrMatchC(namespace, NAMESPACES[2])) { if(!found_is_concern_of) ajFmtPrintF(ouf_log, "Line %6d : No is_concern_of: relation in term\n", tmp_line); if(found_concerns || found_is_input_of || found_is_output_of || found_has_source || found_is_source_of || found_has_identifier || found_is_identifier_of || found_has_attribute || found_is_attribute_of || found_has_part || found_is_format_of || found_has_format || found_is_part_of) ajFmtPrintF(ouf_log, "Line %6d : Relation not allowed " "for term in this namespace\n", tmp_line); } /* resource */ else if(ajStrMatchC(namespace, NAMESPACES[3])) { if(!found_is_source_of) ajFmtPrintF(ouf_log, "Line %6d : No is_source_of: " "relation in term\n", tmp_line); if(found_concerns || found_has_input || found_is_input_of || found_has_output || found_is_output_of || found_has_source || found_is_identifier_of || found_has_attribute || found_is_format_of || found_has_format || found_is_attribute_of) ajFmtPrintF(ouf_log, "Line %6d : Relation not allowed " "for term in this namespace\n", tmp_line); } /* data */ else if(ajStrMatchC(namespace, NAMESPACES[4])) { if(found_concerns || found_is_concern_of || found_has_input || found_has_output || found_is_source_of || found_has_attribute || found_is_format_of) ajFmtPrintF(ouf_log, "Line %6d : Relation not allowed " "for term in this namespace\n", tmp_line); } /* format */ else if(ajStrMatchC(namespace, NAMESPACES[5])) { if(found_concerns || found_is_concern_of || found_has_input || found_is_input_of || found_has_output || found_is_output_of || found_has_source || found_is_source_of || found_has_identifier || found_is_identifier_of || found_has_attribute || found_is_attribute_of || found_has_part || found_is_part_of || found_has_format ) ajFmtPrintF(ouf_log, "Line %6d : Relation not allowed " "for term in this namespace\n", tmp_line); } /* edam_identifier */ /* else if(ajStrMatchC(namespace, NAMESPACES[6])) { if(!found_is_identifier_of) ajFmtPrintF(ouf_log, "Line %6d : No is_identifier_of: " "relation in term\n", tmp_line); if(found_concerns || found_is_concern_of || found_has_input || found_has_output || found_is_source_of || found_has_identifier || found_has_attribute || found_is_attribute_of || found_is_format_of || found_has_format || found_has_part || found_is_part_of) ajFmtPrintF(ouf_log, "Line %6d : Relation not allowed " "for term in this namespace\n", tmp_line); } */ } tmp_line = linecnt+1; found_id = ajFalse; found_name = ajFalse; found_namespace = ajFalse; found_def = ajFalse; found_comment = ajFalse; found_synonym = ajFalse; found_xref = ajFalse; found_is_obsolete = ajFalse; found_consider = ajFalse; found_isa = ajFalse; found_concerns = ajFalse; found_is_concern_of = ajFalse; found_has_input = ajFalse; found_is_input_of = ajFalse; found_has_output = ajFalse; found_is_output_of = ajFalse; found_has_source = ajFalse; found_is_source_of = ajFalse; found_has_identifier = ajFalse; found_is_identifier_of = ajFalse; found_has_attribute = ajFalse; found_is_attribute_of = ajFalse; found_has_part = ajFalse; found_is_part_of = ajFalse; found_has_format = ajFalse; found_is_format_of = ajFalse; done_first = ajTrue; } else if(in_typedef) continue; else if(ajStrPrefixC(line, "id:")) { found_id = ajTrue; if(found_name || found_namespace || found_def || found_comment || found_synonym || found_isa || found_xref || found_is_obsolete || found_consider) ajFmtPrintF(ouf_log, "Line %6d : id: field in wrong order " "(%S)\n", linecnt+1, line); } else if(ajStrPrefixC(line, "name:")) { found_name = ajTrue; if( (!found_id) || found_namespace || found_def || found_comment || found_synonym || found_isa || found_xref || found_is_obsolete || found_consider) ajFmtPrintF(ouf_log, "Line %6d : name: field in wrong order " "(%S)\n", linecnt+1, line); } else if(ajStrPrefixC(line, "namespace:")) { found_namespace = ajTrue; ajFmtScanS(line, "%*s %S", &namespace); if((!found_id) || (!found_name) || found_def || found_comment || found_synonym || found_isa || found_xref || found_is_obsolete || found_consider) ajFmtPrintF(ouf_log, "Line %6d : namespace: field in wrong " "order (%S)\n", linecnt+1, line); } else if(ajStrPrefixC(line, "def:")) { found_def = ajTrue; if((!found_id) || (!found_name) || (!found_namespace) || found_comment || found_synonym || found_isa || found_xref || found_is_obsolete || found_consider) ajFmtPrintF(ouf_log, "Line %6d : def: field in wrong order " "(%S)\n", linecnt+1, line); } else if(ajStrPrefixC(line, "comment:")) { found_comment = ajTrue; if((!found_id) || (!found_name) || (!found_namespace) || (!found_def) || found_synonym || found_isa || found_xref || found_is_obsolete || found_consider) ajFmtPrintF(ouf_log, "Line %6d : comment: field in wrong " "order (%S)\n", linecnt+1, line); } else if(ajStrPrefixC(line, "synonym:")) { found_synonym = ajTrue; if((!found_id) || (!found_name) || (!found_namespace) || (!found_def) || found_isa || found_xref || found_is_obsolete || found_consider) ajFmtPrintF(ouf_log, "Line %6d : synonym: field in wrong " "order (%S)\n", linecnt+1, line); } else if(ajStrPrefixC(line, "is_a:")) { found_isa = ajTrue; if((!found_id) || (!found_name) || (!found_namespace) || (!found_def) || found_is_obsolete || found_consider) ajFmtPrintF(ouf_log, "Line %6d : is_a: field in wrong order " "(%S)\n", linecnt+1, line); } else if(ajStrPrefixC(line, "concerns:")) found_concerns = ajTrue; else if(ajStrPrefixC(line, "is_concern_of:")) found_is_concern_of = ajTrue; else if(ajStrPrefixC(line, "has_input:")) found_has_input = ajTrue; else if(ajStrPrefixC(line, "is_input_of:")) found_is_input_of = ajTrue; else if(ajStrPrefixC(line, "has_output:")) found_has_output = ajTrue; else if(ajStrPrefixC(line, "is_output_of:")) found_is_output_of = ajTrue; else if(ajStrPrefixC(line, "has_source:")) found_has_source = ajTrue; else if(ajStrPrefixC(line, "is_source_of:")) found_is_source_of = ajTrue; else if(ajStrPrefixC(line, "has_identifier:")) found_has_identifier = ajTrue; else if(ajStrPrefixC(line, "is_identifier_of:")) found_is_identifier_of = ajTrue; else if(ajStrPrefixC(line, "has_attribute:")) found_has_attribute = ajTrue; else if(ajStrPrefixC(line, "is_attribute_of:")) found_is_attribute_of = ajTrue; else if(ajStrPrefixC(line, "has_part:")) found_has_part = ajTrue; else if(ajStrPrefixC(line, "is_part_of:")) found_is_part_of = ajTrue; else if(ajStrPrefixC(line, "has_format:")) found_has_format = ajTrue; else if(ajStrPrefixC(line, "is_format_of:")) found_is_format_of = ajTrue; else if(ajStrPrefixC(line, "xref:")) found_xref = ajTrue; else if(ajStrPrefixC(line, "is_obsolete:")) found_is_obsolete = ajTrue; else if(ajStrPrefixC(line, "consider:")) found_consider = ajTrue; } ajFmtPrintF(ouf_log, "\n\n"); ajFileSeek(inf_edam, 0, 0); /* Rewind file */ /* Check for unique names within each namespace */ ajFmtPrintF(ouf_log, "9. UNIQUE NAMES WITHIN EACH NAMESPACE\n"); for(in_typedef=ajFalse, linecnt=0; ajReadline(inf_edam, &line); linecnt++) { if(ajStrPrefixC(line, "[Typedef]")) in_typedef=ajTrue; /* First build the arrays of names in each namespace */ if(ajStrPrefixC(line, "[Term]")) { tmp_term = ajTermNew(); in_typedef=ajFalse; } else if(in_typedef) continue; else if(ajStrPrefixC(line, "name:")) { ajStrParseC(line, ":"); ajStrAssignS(&tmp_term->name, ajStrParseC(NULL, ":")); ajStrRemoveWhiteExcess(&tmp_term->name); tmp_term->line = linecnt; } else if(ajStrPrefixC(line, "id:")) { ajStrParseC(line, ":"); ajStrParseC(NULL, ":"); ajStrAssignS(&tmp_term->id, ajStrParseC(NULL, ":")); ajStrRemoveWhiteExcess(&tmp_term->id); } else if(ajStrPrefixC(line, "namespace:")) { ajFmtScanS(line, "%*s %S", &namespace); for(x=0; xlist, tmp_term); } } for(x=0; xn = ajListToarray(namespaces[x]->list, (void***) &(namespaces[x]->terms)); for(x=0; xn; y++) for(z=0; zn; z++) if((y!=z) && (ajStrMatchS(namespaces[x]->terms[y]->name, namespaces[x]->terms[z]->name))) { ajFmtPrintF(ouf_log, "Line %6d : Non-unique name: %S " "(First used on line %d)\n", namespaces[x]->terms[y]->line, namespaces[x]->terms[y]->name, namespaces[x]->terms[z]->line); break; } } ajFmtPrintF(ouf_log, "\n\n"); ajFileSeek(inf_edam, 0, 0); /* Rewind file */ /* for(x=0; xname): %S\n", x, namespaces[x]->name); fflush(stdout); } */ /* Check for valid end-points of relations */ ajFmtPrintF(ouf_log, "10. VALID END-POINTS OF RELATIONS\n"); for(linecnt=0; ajReadline(inf_edam, &line); linecnt++) { if(ajStrPrefixC(line, "namespace:")) { /* ajFmtPrint("line === %S\n", line); fflush(stdout); */ /* Identify index of this namespace */ ajFmtScanS(line, "%*s %S", &namespace); /* ajFmtPrint("namespace === %S\n", namespace); fflush(stdout); */ /* for(x=0; xname): %S\n", x, namespaces[x]->name); fflush(stdout); } */ for(idx=0; idxname === %S++\n", idx, namespaces[idx]->name); fflush(stdout); */ if(ajStrMatchS(namespace, namespaces[idx]->name)) break; else ajFmtPrint("Failing to match %S to %S\n", namespace, namespaces[idx]->name); } if(idx==NNAMESPACES) ajFatal("namespace not found - cannot recover\nline: %S\nnamespace: %S\n", line, namespace); /* if(!ajStrMatchC(mode, "Fix relations")) continue; */ } ajStrAssignClear(&tok); if(ajStrPrefixC(line, "relationship:")) ajFmtScanS(line, "%*S %S", &tok); else ajFmtPrintS(&tok, "%S", &tok); for(x=0, done=ajFalse; x