application: diffseq [
  documentation: "Compare and report features of two similar sequences"
  groups: "Alignment:Differences"
  gui: "yes"
  batch: "yes"
  cpu: "medium"
  relations: "EDAM:0000182 topic Sequence alignment"
  relations: "EDAM:0000160 topic Sequence features"
  relations: "EDAM:0000256 operation Feature table comparison"
]

section: input [
  information: "Input section"
  type: "page"
]

  sequence: asequence [
    parameter: "Y"
    type: "any"
    features: "Y"
    relations: "EDAM:0000849 data Sequence record"
  ]

  sequence: bsequence [
    parameter: "Y"
    type: "@($(acdprotein) ? stopprotein : nucleotide)"
    features: "Y"
    relations: "EDAM:0000849 data Sequence record"
  ]

endsection: input

section: required [
  information: "Required section"
  type: "page"
]

  integer: wordsize [
    standard: "Y"
    default: "10"
    minimum: "2"
    information: "Word size"
    help: "The similar regions between the two sequences are found by
           creating a hash table of 'wordsize'd subsequences. 10 is a
           reasonable default. Making this value larger (20?) may speed up
           the program slightly, but will mean that any two differences
           within 'wordsize' of each other will be grouped as a single region
           of difference. This value may be made smaller (4?) to improve the
           resolution of nearby differences, but the program will go much
           slower."
    relations: "EDAM:0001250 data Word size"
  ]

endsection: required

section: additional [
  information: "Additional section"
  type: "page"
]

  boolean: globaldifferences [
    additional: "Y"
    default: "N"
    information: "Force reporting of differences at the start
                  and end"
    help: "Normally this program will find regions of identity that
           are the length of the specified word-size or greater and will then
           report the regions of difference between these matching regions.
           This works well and is what most people want if they are working
           with long overlapping nucleic acid sequences. You are usually not
           interested in the non-overlapping ends of these sequences. If you
           have protein sequences or short RNA sequences however, you will be
           interested in differences at the very ends . It this option is
           set to be true then the differences at the ends will also be
           reported."
    relations: "EDAM:0002527 data Parameter or primitive"
  ]

endsection: additional

section: output [
  information: "Output section"
  type: "page"
]

  report: outfile [
    parameter: "Y"
    rformat: "diffseq"
    taglist: "int:start int:end int:length str:name str:sequence
              str:first_feature str:second_feature"
    relations: "EDAM:0001256 data Sequence features (comparative)"
  ]

  featout: aoutfeat [
    parameter: "Y"
    default: "$(asequence.name).diffgff"
    help: "File for output of first sequence's features"
    type: "@($(acdprotein) ? protein : nucleotide)"
    relations: "EDAM:0001270 data Feature table"
  ]

  featout: boutfeat [
    parameter: "Y"
    default: "$(bsequence.name).diffgff"
    help: "File for output of second sequence's features"
    type: "@($(acdprotein) ? protein : nucleotide)"
    relations: "EDAM:0001270 data Feature table"
  ]

endsection: output