#! /bin/sh # Sort MAF-format alignments by sequence name, then strand, then start # position, then end position, of the top sequence. Also, merge # identical alignments. Comment lines starting with "#" are written # at the top, in unchanged order. If option "-d" is specified, then # alignments that appear only once are omitted (like uniq -d). # Minor flaws, that do not matter for typical MAF input: # 1) It might not work if the input includes TABs. # 2) Preceding whitespace is considered part of the sequence name. I # want to use sort -b, but it seems to be broken in different ways for # different versions of sort! # 3) Alignments with differences in whitespace are considered # non-identical. # This script uses perl instead of specialized commands like uniq. # The reason is that, on some systems (e.g. Mac OS X), uniq doesn't # work with long lines. # Make "sort" use a standard ordering: LC_ALL=C export LC_ALL uniqOpt=1 whichSequence=1 while getopts hdn: opt do case $opt in h) cat <