/* @Source edamclean application
**
** Validate and fix EDAM OBO ontology
**
** @author: Copyright (C) Jon Ison (jison@ebi.ac.uk)
** @@
**
** This program is free software; you can redistribute it and/or
** modify it under the terms of the GNU General Public License
** as published by the Free Software Foundation; either version 2
** of the License, or (at your option) any later version.
**
** This program is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
** GNU General Public License for more details.
**
** You should have received a copy of the GNU General Public License
** along with this program; if not, write to the Free Software
** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
**
*******************************************************************************
**
** EDAMCLEAN documentation
** See http://wwww.emboss.org
**
** Please cite the authors and EMBOSS.
**
** Email jison@ebi.ac.uk.
**
**
** edamclean reads EDAM (OBO format file), validates the file syntax, writes a
** report from parsing and (optionally) fixes the term numbering and
** relations.
** It has optional modes of operation:
** 1. Report only
** 2. Renumber terms
** 3. Fix relations
** 4. Output PURL XML (single file)
** 5. Output PURL XML (one file / term)
**
** 1. Report only
** Write an informative report from parsing but do not change the file.
** The following checks are performed:
** i. All ids in the file are unique
** ii. All term names within a namespace are unique
** iii. All values after namespace: are valid (see below).
** iv. All field names are valid; either a standard OBO field, a relation
** or a token that must be ignored (see below).
** v. All terms have the following fields in the order specified
** (optional fields are in parenthesis):
** id, name, namespace, def, (comment), (synonym), is_a
** vi. Terms in specific namespaces have all mandatory relations defined
** and do not have disallowed relations. See "Rules" below.
** vii. End-points (term names) of all relations exist. See "Rules" below.
** This includes checking for mismatches between term id and name
** (in comment) in relations lines
** viii. All id: lines have the format: id: EDAM:0000000
** ix. All def: lines have the format: def: "Some text."
** [EDAM:EBI "EMBRACE definition"]
** x. All relation lines have the format: RelationName: EDAM:0000000
** ! Term name
** xi. All comment: values are *not* in quotes ("")
** xii. All synonym: values are *not* in quotes ("")
**
** 2. Renumber terms
** Write a report as above.
** Renumber all terms so that they have unique ids, starting with
** EDAM:0000000
** for the first term in the file and increasing by 1 thereon.
**
** 3. Fix relations
** Write a report as above. If no errors reported, correct term ids used
** in all relations fields.
**
** 4. Output PURL XML (single file)
** Write a report as above. Then write XML output for term submission to PURL.org
**
**
** -
**
** jon
**
**
**
**
**
**
** 5. Output PURL XML (one file / term)
** As option 4. above, but write a single XML file per term to the specified directory.
**
**
** edamclean parameters:
** Name of OBO format file (input)
** Name of OBO format file (output)
** Name of report file (output)
** Boolean (whether to fix the output file)
**
**
** Standard OBO fields
** id:
** name:
** namespace:
** def:
** comment:
** synonym:
** xref:
** is_obsolete:
** consider:
**
** Relations
** is_a
** has_part
** is_part_of
** concerns
** is_concern_of
** has_input
** is_input_of
** has_output
** is_output_of
** has_source
** is_source_of
** has_identifier
** is_identifier_of
** has_attribute
** is_attribute_of
** has_format
** is_format_of
**
** Namespace
** entity
** topic
** operation
** resource
** data
** format
**
**
** Tokens to ignore
** Lines beginning with the following tokens are not parsed and are
** preserved as-is in the output:
** !
** format-version
** date
** data-version
** xref
**
**
** Rules
** Rules for which term types (rules in a namespace) may or must be related
** to which other term types are described under "Rules by term type" in
** the EDAM on-line documentation.
** See http://www.ebi.ac.uk/~jison/edam.html#6.1.
**
**
** Notes
** Typedef definitions are *not* validated and are preserved as-is in the
** output.
**
** Important!
** 1. The program should not be run in modes 2 or 3 (ie. generate an EDAM
** output file) until all reported problems (from mode 1) have been fixed
** by hand - *except* "Non-unique id" errors! Results are undefined otherwise.
** 2. All [Term] definitions in the input file *must* appear before the
** first [Typedef] definition - terms appearing after are *not* validated
** fully.
**
** Known Issues
** edamclean does not detect the fact that the root term of each branch does
** not need to have an is_a relation. Disregard the messages in the lof file
** to that effect (this could fairly easily be fixed).
**
** edamclean will identify (and warn about) identical term names in cases
** where one of the terms has been made obsolete. Arguably this is the
** the correct behaviour.
**
** The code that checks for "field in wrong order" does not make all the checks
** it might, e.g. does not check whether other relations appear before is_a.
**
** It does not check for multiple (erroneous) comment: lines
**
** It does not suppress (irrelevant) error messages for obsolete terms.
**
** It does not check for duplicate relationships (where these are not allowed)
** e.g. an exact duplication of a relationship line.
******************************************************************************/
#include "emboss.h"
/******************************************************************************
**
** GLOBAL VARIABLES
**
******************************************************************************/
#define NFIELDS 10
static const char *FIELDS[NFIELDS] =
{
"id:",
"name:",
"namespace:",
"def:",
"comment:",
"synonym:",
"xref:",
"is_obsolete:",
"consider:",
"relationship:"
};
#define NRELATIONS 18
/* 'consider' field is treated as a relation */
static const char *RELATIONS[NRELATIONS] =
{
"is_a:",
"has_part:",
"is_part_of:",
"concerns:",
"is_concern_of:",
"has_input:",
"is_input_of:",
"has_output:",
"is_output_of:",
"has_source:",
"is_source_of:",
"has_identifier:",
"is_identifier_of:",
"has_attribute:",
"is_attribute_of:",
"has_format:",
"is_format_of:",
"consider:"
};
#define NNAMESPACES 6
static const char *NAMESPACES[NNAMESPACES] =
{
"entity",
"topic",
"operation",
"resource",
"data",
"format"
};
enum _namespace
{
entity,
topic,
operation,
resource,
data,
format
};
#define NOTHER 12
static const char *OTHER[NOTHER] =
{
"!",
"format-version:",
"date:",
"data-version:",
"subsetdef:",
"[Term]",
"[Typedef]",
"inverse_of:",
"is_anti_symmetric:",
"is_cyclic:",
"is_transitive:",
"\0" /* NULL string to allow empty lines */
};
/******************************************************************************
**
** DATA STRUCTURES
**
******************************************************************************/
/* @datastatic PTerm *******************************************************
**
** Term object
** Holds name and identifier of a single EDAM term
**
** @alias STerm
** @alias OTerm
**
** @attr name [AjPStr] Name of term
** @attr id [AjPStr] Id of term
** @attr line [ajint] Line number of name: field for the term
** @attr Padding [ajint] Padding to alignment boundary
******************************************************************************/
typedef struct STerm
{
AjPStr name;
AjPStr id;
ajint line;
ajint Padding;
} OTerm;
#define PTerm OTerm*
/* @datastatic PNamespace *****************************************************
**
** Namespace object
** Holds name and array of terms for an EDAM namespace.
** Only one copy of the terms is kept in memory (list holds pointers only)
**
** @alias SNamespace
** @alias ONamespace
**
** @attr name [AjPStr] Name of namespace
** @attr terms [PTerm*] Array of terms
** @attr list [AjPList] List of terms*
** @attr n [ajint] Size of array / list
** @attr Padding [ajint] Padding to alignment boundary
*****************************************************************************/
typedef struct SNamespace
{
AjPStr name;
PTerm *terms;
AjPList list;
ajint n;
ajint Padding;
} ONamespace;
#define PNamespace ONamespace*
/******************************************************************************
**
** PROTOTYPES
**
******************************************************************************/
static PTerm ajTermNew(void);
static PNamespace ajNamespaceNew(void);
static void ajTermDel(PTerm *P);
static void ajNamespaceDel(PNamespace *P);
static const AjPStr FindTerm(ajint namespace, const AjPStr termname,
PNamespace *namespaces);
/******************************************************************************
**
** FUNCTIONS
**
******************************************************************************/
/* @funcstatic ajTermNew ***************************************************
**
** Term constructor
**
** @return [PTerm] New object
** @@
******************************************************************************/
static PTerm ajTermNew(void)
{
PTerm ret;
AJNEW0(ret);
ret->name = ajStrNew();
ret->id = ajStrNew();
ret->line = 0;
return ret;
}
/* @funcstatic ajNamespaceNew ************************************************
**
** Namespace constructor
** The array is *not* allocated.
**
** @return [PNamespace] New object
** @@
******************************************************************************/
static PNamespace ajNamespaceNew(void)
{
PNamespace ret;
AJNEW0(ret);
ret->name = ajStrNew();
ret->terms = NULL;
ret->list = ajListstrNew();
ret->n = 0;
return ret;
}
/* @funcstatic ajTermDel ***************************************************
**
** Term destructor
**
** @param [d] P [PTerm*] Term object to delete
** @return [void]
** @@
******************************************************************************/
static void ajTermDel(PTerm *P)
{
if(!P)
ajFatal("Null arg error 1 in ajTermDel");
else if(!(*P))
ajFatal("Null arg error 2 in ajTermDel");
ajStrDel(&(*P)->name);
ajStrDel(&(*P)->id);
AJFREE(*P);
*P=NULL;
return;
}
/* @funcstatic ajNamespaceDel ************************************************
**
** Namespace destructor
**
** @param [d] P [PNamespace*] Namespace object to delete
** @return [void]
** @@
******************************************************************************/
static void ajNamespaceDel(PNamespace *P)
{
int i;
if(!P)
ajFatal("Null arg error 1 in ajNamespaceDel");
else if(!(*P))
ajFatal("Null arg error 2 in ajNamespaceDel");
ajStrDel(&(*P)->name);
if((*P)->n)
{
for(i=0;i<(*P)->n;i++)
ajTermDel(&(*P)->terms[i]);
AJFREE((*P)->terms);
}
ajListstrFree(&(*P)->list);
AJFREE(*P);
*P=NULL;
return;
}
/* @funcstatic FindTerm ***************************************************
**
** Finds a term within a namespace index and returns its identifier in the
** namespace array.
**
** Returns NULL if term is not found
**
** @param [r] namespace [ajint] Namespace index as integer
** @param [r] termname [const AjPStr] Name of term
** @param [u] namespaces [PNamespace*] Array of namespace objects
** @return [const AjPStr] Term identifier
** @@
******************************************************************************/
static const AjPStr FindTerm(ajint namespace, const AjPStr termname,
PNamespace *namespaces)
{
ajint x;
if(!termname || !namespaces)
ajFatal("Bad args to FindTerm");
for(x=0; xn; x++)
if(ajStrMatchS(termname, namespaces[namespace]->terms[x]->name))
return namespaces[namespace]->terms[x]->id;
return NULL;
}
/* @prog edamclean ********************************************************
**
** Validate and fix EDAM OBO ontology
**
*****************************************************************************/
int main(ajint argc, char **argv)
{
/* Variable declarations */
AjPFile inf_edam = NULL; /* Name of EDAM (input) file */
AjPFile ouf_edam = NULL; /* Name of EDAM (output) file */
AjPFile ouf_log = NULL; /* Name of report (output) file */
AjPFile ouf_xml = NULL; /* Name of XML (output) file */
AjPFile tmp_xml = NULL; /* Temp. XML (output) file */
AjPDirout xmloutdir = NULL; /* XML (output) file directory */
AjPStr mode = NULL; /* Mode of operation */
AjPList list_tmp = NULL; /* Temporary list */
AjPStr *fields = NULL; /* Array of valid tokens for first
word in line */
ajint nfields = 0; /* Size of fields array */
AjPStr *ids = NULL; /* Array of all ids in file */
ajint nids = 0; /* Size of ids */
const AjPStr id = NULL; /* ID of a term */
AjPStr line = NULL; /* A line from the input file */
ajint linecnt = 0; /* Line number of line */
ajint termcnt = 0; /* Count of term definitions */
AjPStr tok = NULL; /* A token from line */
AjBool done = ajFalse; /* Housekeeping */
ajint x = 0; /* Housekeeping */
ajint y = 0; /* Housekeeping */
ajint z = 0; /* Housekeeping */
ajint idx = 0; /* Housekeeping */
AjPStr name = NULL; /* Name of a term */
AjPStr namespace = NULL; /* Namespace of a term */
AjPStr relation = NULL; /* Relationship name, e.g. "is_a" */
AjPStr tmp_name = NULL; /* Temp. name of a term */
AjPStr tmp_id = NULL; /* Temp. id of a term */
AjPStr tmp_str = NULL; /* Temp. string */
PTerm tmp_term = NULL; /* Temp. term pointer */
ajint tmp_line = 0; /* Temp. line number */
PNamespace namespaces[NNAMESPACES]; /* Array of namespace objects */
AjBool done_first = ajFalse; /* Housekeeping ... read first term */
AjBool first = ajFalse; /* Housekeeping ... on first term */
AjBool found_id = ajFalse;
AjBool in_typedef = ajFalse; /* In a [Typedef] statement */
AjBool found_name = ajFalse;
AjBool found_namespace = ajFalse;
AjBool found_def = ajFalse;
AjBool found_comment = ajFalse;
AjBool found_synonym = ajFalse;
AjBool found_xref = ajFalse;
AjBool found_is_obsolete = ajFalse;
AjBool found_consider = ajFalse;
AjBool found_isa = ajFalse;
AjBool found_concerns = ajFalse;
AjBool found_is_concern_of = ajFalse;
AjBool found_has_input = ajFalse;
AjBool found_is_input_of = ajFalse;
AjBool found_has_output = ajFalse;
AjBool found_is_output_of = ajFalse;
AjBool found_has_source = ajFalse;
AjBool found_is_source_of = ajFalse;
AjBool found_has_identifier = ajFalse;
AjBool found_is_identifier_of = ajFalse;
AjBool found_has_attribute = ajFalse;
AjBool found_is_attribute_of = ajFalse;
AjBool found_has_part = ajFalse;
AjBool found_is_part_of = ajFalse;
AjBool found_has_format = ajFalse;
AjBool found_is_format_of = ajFalse;
/* Read data from acd */
embInit("edamclean", argc, argv);
/* ACD data handling */
inf_edam = ajAcdGetInfile("edaminfile");
ouf_edam = ajAcdGetOutfile("edamoutfile");
ouf_log = ajAcdGetOutfile("logfile");
ouf_xml = ajAcdGetOutfile("xmlfile");
xmloutdir = ajAcdGetOutdir("xmloutdir");
mode = ajAcdGetSelectSingle("mode");
/* taxdir = ajAcdGetDirectory("taxdirectory"); */
ajFmtPrint("MODE : %S\n", mode);
/*
ajTaxLoad(taxdir);
ajOboParseObofile(inf_edam, "noidorder,nounkid");
ajFileSeek(inf_edam, 0, 0);
embExit(); */
/* Memory allocation */
line = ajStrNew();
tok = ajStrNew();
name = ajStrNew();
namespace = ajStrNew();
relation = ajStrNew();
tmp_name = ajStrNew();
tmp_id = ajStrNew();
for(x=0; xname), NAMESPACES[x]);
}
/* Check for valid first tokens */
/* First, write array of valid tokens for first word in line */
list_tmp = ajListstrNew();
for(x=0; x\n");
}
for(in_typedef=ajFalse; ajReadline(inf_edam, &line); )
{
if(ajStrPrefixC(line, "[Typedef]"))
in_typedef=ajTrue;
else if(ajStrPrefixC(line, "[Term]"))
in_typedef=ajFalse;
if(in_typedef)
continue;
if(ajStrPrefixC(line, "namespace:"))
{
if(ajStrMatchC(mode, "Output PURL XML (one file / term)"))
{
if(!(tmp_xml=ajFileNewOutNameDirS(tmp_id, xmloutdir)))
ajFatal("Could not create file");
else
ajFmtPrintF(tmp_xml, "\n");
}
ajStrAssignClear(&tok);
ajFmtScanS(line, "%*s %S", &tok);
ajStrRemoveWhite(&tok);
ajFmtPrintF(tmp_xml,
"\n"
"\n"
"jon\n"
"\n"
"\n"
"\n", tok, tmp_id, tmp_id);
if(ajStrMatchC(mode, "Output PURL XML (one file / term)"))
{
ajFmtPrintF(tmp_xml, "\n");
ajFileClose(&tmp_xml);
}
}
if(ajStrPrefixC(line, "id:"))
{
ajStrParseC(line, ":");
ajStrParseC(NULL, ":");
ajStrAssignS(&tmp_id, ajStrParseC(NULL, ":"));
ajStrRemoveWhite(&tmp_id);
}
}
if(ajStrMatchC(mode, "Output PURL XML (single file)"))
ajFmtPrintF(tmp_xml, "\n");
}
exit(0);
for(linecnt=0; ajReadline(inf_edam, &line); linecnt++)
{
ajStrAssignClear(&tok);
ajFmtScanS(line, "%S", &tok);
for(x=0, done=ajFalse;x1)
{
ajFmtPrintF(ouf_log, "Line %6d : Non-unique id: %S "
"%S\n", linecnt+1, tok, line);
break;
}
}
}
}
}
ajFmtPrintF(ouf_log, "\n\n");
ajFileSeek(inf_edam, 0, 0); /* Rewind file */
/* Check for mandatory fields / field order */
ajFmtPrintF(ouf_log, "8. STANDARD MANDATORY FIELDS AND FIELD ORDER\n");
for(in_typedef=ajFalse, first = ajTrue, done_first=ajFalse, linecnt=0;
ajReadline(inf_edam, &line); linecnt++)
{
/* id, name, namespace, def, (comment), (synonym), is_a */
/* Stop checking once first [Typedef] line is found. */
/* if(ajStrPrefixC(line, "[Typedef]"))
break; */
if(ajStrPrefixC(line, "[Typedef]"))
in_typedef=ajTrue;
if(ajStrPrefixC(line, "[Term]"))
{
in_typedef=ajFalse;
/* Process previous term */
if(done_first)
{
if(!found_id)
ajFmtPrintF(ouf_log, "Line %6d : No id: field in "
"term\n", tmp_line);
if(!found_name)
ajFmtPrintF(ouf_log, "Line %6d : No name: field in "
"term\n", tmp_line);
if(!found_namespace)
ajFmtPrintF(ouf_log, "Line %6d : No namespace: field in "
"term\n", tmp_line);
if(!found_def)
ajFmtPrintF(ouf_log, "Line %6d : No def: field in "
"term\n", tmp_line);
/* No is_a needed for first term in file or for obsolete terms*/
if((!found_isa) && (!first) && (!found_is_obsolete))
ajFmtPrintF(ouf_log, "Line %6d : No is_a: field in "
"term\n", tmp_line);
first = ajFalse;
/* entity */
if(ajStrMatchC(namespace, NAMESPACES[0]))
{
if(found_concerns || found_has_input ||
found_is_input_of || found_has_output ||
found_is_output_of || found_has_source ||
found_is_source_of || found_is_identifier_of ||
found_is_format_of || found_has_format ||
found_is_attribute_of)
ajFmtPrintF(ouf_log, "Line %6d : Relation not allowed "
"for term in this namespace\n", tmp_line);
}
/* topic */
else if(ajStrMatchC(namespace, NAMESPACES[1]))
{
if(!found_concerns)
ajFmtPrintF(ouf_log, "Line %6d : No concerns: relation in term\n", tmp_line);
if(found_is_concern_of || found_has_input ||
found_is_input_of || found_has_output ||
found_is_output_of || found_has_source ||
found_is_source_of || found_has_identifier ||
found_is_identifier_of || found_has_attribute ||
found_is_attribute_of || found_has_part ||
found_is_format_of || found_has_format ||
found_is_part_of)
ajFmtPrintF(ouf_log, "Line %6d : Relation not allowed "
"for term in this namespace\n", tmp_line);
}
/* operation */
else if(ajStrMatchC(namespace, NAMESPACES[2]))
{
if(!found_is_concern_of)
ajFmtPrintF(ouf_log, "Line %6d : No is_concern_of: relation in term\n", tmp_line);
if(found_concerns || found_is_input_of ||
found_is_output_of || found_has_source ||
found_is_source_of || found_has_identifier ||
found_is_identifier_of || found_has_attribute ||
found_is_attribute_of || found_has_part ||
found_is_format_of || found_has_format ||
found_is_part_of)
ajFmtPrintF(ouf_log, "Line %6d : Relation not allowed "
"for term in this namespace\n", tmp_line);
}
/* resource */
else if(ajStrMatchC(namespace, NAMESPACES[3]))
{
if(!found_is_source_of)
ajFmtPrintF(ouf_log, "Line %6d : No is_source_of: "
"relation in term\n", tmp_line);
if(found_concerns || found_has_input ||
found_is_input_of || found_has_output ||
found_is_output_of || found_has_source ||
found_is_identifier_of || found_has_attribute ||
found_is_format_of || found_has_format ||
found_is_attribute_of)
ajFmtPrintF(ouf_log, "Line %6d : Relation not allowed "
"for term in this namespace\n", tmp_line);
}
/* data */
else if(ajStrMatchC(namespace, NAMESPACES[4]))
{
if(found_concerns || found_is_concern_of ||
found_has_input || found_has_output ||
found_is_source_of || found_has_attribute ||
found_is_format_of)
ajFmtPrintF(ouf_log, "Line %6d : Relation not allowed "
"for term in this namespace\n", tmp_line);
}
/* format */
else if(ajStrMatchC(namespace, NAMESPACES[5]))
{
if(found_concerns ||
found_is_concern_of ||
found_has_input ||
found_is_input_of ||
found_has_output ||
found_is_output_of ||
found_has_source ||
found_is_source_of ||
found_has_identifier ||
found_is_identifier_of ||
found_has_attribute ||
found_is_attribute_of ||
found_has_part ||
found_is_part_of ||
found_has_format )
ajFmtPrintF(ouf_log, "Line %6d : Relation not allowed "
"for term in this namespace\n", tmp_line);
}
/* edam_identifier */
/*
else if(ajStrMatchC(namespace, NAMESPACES[6]))
{
if(!found_is_identifier_of)
ajFmtPrintF(ouf_log, "Line %6d : No is_identifier_of: "
"relation in term\n", tmp_line);
if(found_concerns || found_is_concern_of ||
found_has_input || found_has_output ||
found_is_source_of || found_has_identifier ||
found_has_attribute || found_is_attribute_of ||
found_is_format_of || found_has_format ||
found_has_part || found_is_part_of)
ajFmtPrintF(ouf_log, "Line %6d : Relation not allowed "
"for term in this namespace\n", tmp_line);
}
*/
}
tmp_line = linecnt+1;
found_id = ajFalse;
found_name = ajFalse;
found_namespace = ajFalse;
found_def = ajFalse;
found_comment = ajFalse;
found_synonym = ajFalse;
found_xref = ajFalse;
found_is_obsolete = ajFalse;
found_consider = ajFalse;
found_isa = ajFalse;
found_concerns = ajFalse;
found_is_concern_of = ajFalse;
found_has_input = ajFalse;
found_is_input_of = ajFalse;
found_has_output = ajFalse;
found_is_output_of = ajFalse;
found_has_source = ajFalse;
found_is_source_of = ajFalse;
found_has_identifier = ajFalse;
found_is_identifier_of = ajFalse;
found_has_attribute = ajFalse;
found_is_attribute_of = ajFalse;
found_has_part = ajFalse;
found_is_part_of = ajFalse;
found_has_format = ajFalse;
found_is_format_of = ajFalse;
done_first = ajTrue;
}
else if(in_typedef)
continue;
else if(ajStrPrefixC(line, "id:"))
{
found_id = ajTrue;
if(found_name || found_namespace || found_def || found_comment ||
found_synonym || found_isa || found_xref || found_is_obsolete
|| found_consider)
ajFmtPrintF(ouf_log, "Line %6d : id: field in wrong order "
"(%S)\n", linecnt+1, line);
}
else if(ajStrPrefixC(line, "name:"))
{
found_name = ajTrue;
if( (!found_id) || found_namespace || found_def || found_comment ||
found_synonym || found_isa || found_xref || found_is_obsolete
|| found_consider)
ajFmtPrintF(ouf_log, "Line %6d : name: field in wrong order "
"(%S)\n", linecnt+1, line);
}
else if(ajStrPrefixC(line, "namespace:"))
{
found_namespace = ajTrue;
ajFmtScanS(line, "%*s %S", &namespace);
if((!found_id) || (!found_name) || found_def || found_comment ||
found_synonym || found_isa || found_xref || found_is_obsolete
|| found_consider)
ajFmtPrintF(ouf_log, "Line %6d : namespace: field in wrong "
"order (%S)\n", linecnt+1, line);
}
else if(ajStrPrefixC(line, "def:"))
{
found_def = ajTrue;
if((!found_id) || (!found_name) || (!found_namespace) ||
found_comment || found_synonym || found_isa || found_xref
|| found_is_obsolete || found_consider)
ajFmtPrintF(ouf_log, "Line %6d : def: field in wrong order "
"(%S)\n", linecnt+1, line);
}
else if(ajStrPrefixC(line, "comment:"))
{
found_comment = ajTrue;
if((!found_id) || (!found_name) || (!found_namespace) ||
(!found_def) || found_synonym || found_isa || found_xref
|| found_is_obsolete || found_consider)
ajFmtPrintF(ouf_log, "Line %6d : comment: field in wrong "
"order (%S)\n", linecnt+1, line);
}
else if(ajStrPrefixC(line, "synonym:"))
{
found_synonym = ajTrue;
if((!found_id) || (!found_name) || (!found_namespace) ||
(!found_def) || found_isa || found_xref || found_is_obsolete
|| found_consider)
ajFmtPrintF(ouf_log, "Line %6d : synonym: field in wrong "
"order (%S)\n", linecnt+1, line);
}
else if(ajStrPrefixC(line, "is_a:"))
{
found_isa = ajTrue;
if((!found_id) || (!found_name) || (!found_namespace) ||
(!found_def) || found_is_obsolete
|| found_consider)
ajFmtPrintF(ouf_log, "Line %6d : is_a: field in wrong order "
"(%S)\n", linecnt+1, line);
}
else if(ajStrPrefixC(line, "concerns:"))
found_concerns = ajTrue;
else if(ajStrPrefixC(line, "is_concern_of:"))
found_is_concern_of = ajTrue;
else if(ajStrPrefixC(line, "has_input:"))
found_has_input = ajTrue;
else if(ajStrPrefixC(line, "is_input_of:"))
found_is_input_of = ajTrue;
else if(ajStrPrefixC(line, "has_output:"))
found_has_output = ajTrue;
else if(ajStrPrefixC(line, "is_output_of:"))
found_is_output_of = ajTrue;
else if(ajStrPrefixC(line, "has_source:"))
found_has_source = ajTrue;
else if(ajStrPrefixC(line, "is_source_of:"))
found_is_source_of = ajTrue;
else if(ajStrPrefixC(line, "has_identifier:"))
found_has_identifier = ajTrue;
else if(ajStrPrefixC(line, "is_identifier_of:"))
found_is_identifier_of = ajTrue;
else if(ajStrPrefixC(line, "has_attribute:"))
found_has_attribute = ajTrue;
else if(ajStrPrefixC(line, "is_attribute_of:"))
found_is_attribute_of = ajTrue;
else if(ajStrPrefixC(line, "has_part:"))
found_has_part = ajTrue;
else if(ajStrPrefixC(line, "is_part_of:"))
found_is_part_of = ajTrue;
else if(ajStrPrefixC(line, "has_format:"))
found_has_format = ajTrue;
else if(ajStrPrefixC(line, "is_format_of:"))
found_is_format_of = ajTrue;
else if(ajStrPrefixC(line, "xref:"))
found_xref = ajTrue;
else if(ajStrPrefixC(line, "is_obsolete:"))
found_is_obsolete = ajTrue;
else if(ajStrPrefixC(line, "consider:"))
found_consider = ajTrue;
}
ajFmtPrintF(ouf_log, "\n\n");
ajFileSeek(inf_edam, 0, 0); /* Rewind file */
/* Check for unique names within each namespace */
ajFmtPrintF(ouf_log, "9. UNIQUE NAMES WITHIN EACH NAMESPACE\n");
for(in_typedef=ajFalse, linecnt=0; ajReadline(inf_edam, &line); linecnt++)
{
if(ajStrPrefixC(line, "[Typedef]"))
in_typedef=ajTrue;
/* First build the arrays of names in each namespace */
if(ajStrPrefixC(line, "[Term]"))
{
tmp_term = ajTermNew();
in_typedef=ajFalse;
}
else if(in_typedef)
continue;
else if(ajStrPrefixC(line, "name:"))
{
ajStrParseC(line, ":");
ajStrAssignS(&tmp_term->name, ajStrParseC(NULL, ":"));
ajStrRemoveWhiteExcess(&tmp_term->name);
tmp_term->line = linecnt;
}
else if(ajStrPrefixC(line, "id:"))
{
ajStrParseC(line, ":");
ajStrParseC(NULL, ":");
ajStrAssignS(&tmp_term->id, ajStrParseC(NULL, ":"));
ajStrRemoveWhiteExcess(&tmp_term->id);
}
else if(ajStrPrefixC(line, "namespace:"))
{
ajFmtScanS(line, "%*s %S", &namespace);
for(x=0; xlist, tmp_term);
}
}
for(x=0; xn = ajListToarray(namespaces[x]->list,
(void***) &(namespaces[x]->terms));
for(x=0; xn; y++)
for(z=0; zn; z++)
if((y!=z) && (ajStrMatchS(namespaces[x]->terms[y]->name,
namespaces[x]->terms[z]->name)))
{
ajFmtPrintF(ouf_log, "Line %6d : Non-unique name: %S "
"(First used on line %d)\n",
namespaces[x]->terms[y]->line,
namespaces[x]->terms[y]->name,
namespaces[x]->terms[z]->line);
break;
}
}
ajFmtPrintF(ouf_log, "\n\n");
ajFileSeek(inf_edam, 0, 0); /* Rewind file */
/*
for(x=0; xname): %S\n", x, namespaces[x]->name);
fflush(stdout);
}
*/
/* Check for valid end-points of relations */
ajFmtPrintF(ouf_log, "10. VALID END-POINTS OF RELATIONS\n");
for(linecnt=0; ajReadline(inf_edam, &line); linecnt++)
{
if(ajStrPrefixC(line, "namespace:"))
{
/* ajFmtPrint("line === %S\n", line); fflush(stdout); */
/* Identify index of this namespace */
ajFmtScanS(line, "%*s %S", &namespace);
/* ajFmtPrint("namespace === %S\n", namespace); fflush(stdout); */
/*
for(x=0; xname): %S\n", x, namespaces[x]->name);
fflush(stdout);
}
*/
for(idx=0; idxname === %S++\n", idx, namespaces[idx]->name); fflush(stdout);
*/
if(ajStrMatchS(namespace, namespaces[idx]->name))
break;
else
ajFmtPrint("Failing to match %S to %S\n", namespace, namespaces[idx]->name);
}
if(idx==NNAMESPACES)
ajFatal("namespace not found - cannot recover\nline: %S\nnamespace: %S\n", line, namespace);
/* if(!ajStrMatchC(mode, "Fix relations"))
continue; */
}
ajStrAssignClear(&tok);
if(ajStrPrefixC(line, "relationship:"))
ajFmtScanS(line, "%*S %S", &tok);
else
ajFmtPrintS(&tok, "%S", &tok);
for(x=0, done=ajFalse; x