#!/bin/sh - # this script will run a search program on a sequence input file or on each # file in a file of filenames # to customise this script see the function called run_one_prog below RCS_HEADER="$Header: //tmp/pathsoft/artemis/etc/run_hth,v 1.1 2004-06-09 10:03:06 tjc Exp $" PROG=`echo $RCS_HEADER | sed 's/.*run_\(.*\),v.*/\1/'` if [ $# = 4 -a x$1 = x-onefile ] then shift ONEFILE=t PARAMETERS=$3 export PARAMETERS else if [ $# = 2 ] then PARAMETERS=$2 export PARAMETERS else echo usage: $0 -onefile input_file output_file parameters 1>&2 echo or: $0 file_of_filenames parameters 1>&2 exit 1 fi fi # expand any ~ or environment variables EXPANDED_PARAMETERS=`echo "echo $PARAMETERS" | /bin/csh -f` ### change this function to suit your site: run_one_prog () { INPUT_FILE=$1 OUTPUT_FILE=$2 PARAMETERS=$3 ### change these lines: EXEC=helixturnhelix echo "about to start $EXEC with input from $INPUT_FILE and output to" 1>&2 echo "$OUTPUT_FILE using database $PARAMETERS" 1>&2 echo "command line: $EXEC $PARAMETERS $INPUT_FILE" 1>&2 $EXEC $INPUT_FILE $OUTPUT_FILE $PARAMETERS 2> hth_errors.new # add/change the flags to suit your site: $EXEC $INPUT_FILE -outfile $OUTPUT_FILE $PARAMETERS 2>&1 | tee ${PROG}_errors.new 1>&2 #### end of changes if [ -s ${PROG}_errors.new ] then ( echo ERROR running $PROG: ; echo; echo =================================================== cat ${PROG}_errors.new ) >> $OUTPUT_FILE cat ${PROG}_errors.new >> ${PROG}_errors fi } (echo "#!/bin/sh -"; echo "kill $$") > $PROG.kill chmod a+x $PROG.kill if [ x$ONEFILE = x ] then for i in `cat $1` do run_one_prog $i $i.out $EXPANDED_PARAMETERS done else run_one_prog $1 $2 $EXPANDED_PARAMETERS fi if [ x$ONEFILE = x ] then PEPFILES=`cat $1` else PEPFILES=$1 fi if [ -e hth/hth.tab ] then mv -f hth/hth.tab hth/hth.tab.old fi for i in $PEPFILES do # write a tab file containing a feature for each hth perl -e ' open PEPFILE, "$ARGV[0]" or die "$0: cannot open $ARGV[0]\n"; my $first_line = ; close PEPFILE; if ($first_line !~ m/^>(.*?), (.*) (\d+):(\d+)\s+(\S+)/) { die "first line of $ARGV[0] is badly formatted\n"; } my ($gene_name, $description, $pep_first_base, $pep_last_base, $forward_or_back) = ($1, $2, $3, $4, $5); open HTHFILE, "$ARGV[1]" or die "$0: cannot open $ARGV[1]\n"; my $score; my $first_match_base; while () { if (m/^Score (\d+) .* in .* at residue (\d+)/) { $score = $1; $first_match_base = $2; } if (m/^\s+Sequence:\s+(\S+)/ && defined ($score) && defined ($pep_first_base)) { if ($forward_or_back eq "forward") { my $start_base = $pep_first_base + ($first_match_base - 1) * 3; my $end_base = $start_base + (length $1) * 3 - 1; print <> hth/hth.tab done exit 0