## ## Command Syntax: nawk -f ace2line.awk <.ace file> ## ## Purpose: select fields in arbitrary order from .ace file and create line-records ## ## Input: .ace file with data for a single class -- with slight modifications ## (e.g. ace-dump file or output file generated by line2ace.awk) ## * first .ace file record specifies the data class and tags wanted as fields ## in output file ## * first record contains names of class and tags, but with no values ## -- the first line in this record should consist of a valid class name ## and a ":" mark with spaces between the two field ## -- lines after the first one should only contain a tag name; order ## determines field output order ## * values for tags not specified in first record will simply be ignored ## (maybe need another filter to check for unspecified tags) ## * each class and tag name should start with an alpha-numeric character ## * other .ace format conventions still hold if not contradicting above points ## ## Output: standard output; use redirection to output to a file ## (e.g. nawk -f ace2line.awk data_file.ace > output_file) ## * data records delimited by end-of-line; fields by specified delimiter (tab); ## set variable "field_sep" in the BEGIN section below for new assignment ## * multiple values for fields in .ace file separated by "$" in output ## field; set variable "multi_sep" in the BEGIN section for new assignment ## * first record line starts with class name and then lists tag names that ## correspond to each field in output ## * order of fields is same as order of tags in first record of .ace ## input file ## ## See also: line2ace.awk, ace_sel.awk ## ## Global Variables: ## spec_file = ARGV[1]; file containing class and tag specifications ## multi_sep = "$"; multi-value separator for a single field in output ## field_sep = "\t"; field separator in output ## cla_name_suffix = "$KEY"; appended to class name to form "key" ## ## key -- used as tag name for class key: + cla_name_suffix ## i -- numeric index ## nth -- number count of fields in output ## ## field_order[] = field order corresponding to tag/key ## output_field[] = data for the nth field in ouput ## BEGIN { spec_file = ARGV[1]; multi_sep = "$"; field_sep = "\t"; cla_name_suffix = "$KEY"; # parse spec_file to get the specifications of class and tags nth = 0; while ((getline < spec_file) > 0) { # loop-exitting condition: (nth > 0 && NF != 1) is stricter if (nth > 0 && $2 == ":") { break; } # end of if # skip any data lines that are not specifying output fields if ($0 ~ /^[^A-Za-z0-9]/ || $0 == "" || NF > 2){ continue; } # end of if if (nth == 0 && $2 != ":") { continue; } # end of if # set up data structure if valid class or tag specification if ($2 == ":") { key = $1 cla_name_suffix; field_order[key] = ++nth; output_field[field_order[key]] = $1; } # end of if if (NF == 1) { field_order[$1] = ++nth; output_field[field_order[$1]] = $1; } # end of if } # end of while if (nth <= 1) { print "ERROR: insufficient class and tag specifications for output"; exit 1; } # end of if # get ready for parsing .ace data file FlushOutputFields(); print ""; } # end of BEGIN # processing .ace data file $0 == "" || $0 ~ /^[^A-Za-z0-9]/ { # skip any line that doesn't not start with an alpha-numeric character; # ignore any data fetched previously and not yet converted; # continuation of data portion can start with non-alpha-numeric chararter, # which should be taken care of in function GetTagData. next; } $2 == ":" { FlushOutputFields(); key = $1 cla_name_suffix; if (key in field_order) { output_field[1] = GetTagData(3); } # end of if next; } # end of ($2 == ":") $2 != ":" && NF > 1 { if (output_field[1] != "" && $1 in field_order) { i = field_order[$1]; if (output_field[i] == "") { output_field[i] = GetTagData(2); } else { output_field[i] = output_field[i] multi_sep GetTagData(2); } # end of if ... else } # end of if } # end of ($2 != ":") END { FlushOutputFields(); } # end of END ####################### User-defined Functions ######################### # Flush a record in tabular format if ready and initialize output fields. function FlushOutputFields( LOCAL_VARS_HERE, i) { if (output_field[1] != "") { for (i = 1; i <= nth; i++) { printf("%s", output_field[i]); output_field[i] = ""; if (i != nth) printf("%s", field_sep); } # end of for printf("\n"); } # end of if } # Returns data for a tag from its .ace format line(s). # Assumption: "\" is used as the last token of each data line to indicate # continuation on next line. The last line of the input data # file should not end with "\" as the last token; otherwise, # the last record is ignored. The last line of each tag data # should not end with "\". # Local Variables: tag_data, i, key function GetTagData(starting_field_num, LOCAL_VARS_HERE, tag_data, i, key) { gsub(/"/, ""); tag_data = ""; for (i = starting_field_num; i <= NF; i++) { if (i == NF && $i == "\\") { if (getline <= 0) { print ""; print "WARNING: unexpected EOF at line number " NR; print " -- exit abnormally; the last record is ignored"; print ""; exit 1; } # end of if if ($2 == ":") { print ""; print "WARNING: unexpected input at line number " NR; print " -- the current record may be corrupted"; print ""; FlushOutputFields(); key = $1 cla_name_suffix; if (key in field_order) { output_field[1] = GetTagData(3); } # end of if break; } # end of if gsub(/"/, ""); i = 0; continue } else { tag_data = tag_data $i; if (i != NF) tag_data = tag_data " "; } # end of if ... else } # end of for return tag_data; }