#!/usr/bin/perl -w # This is a utility to create the usage, input and output # example HTML include files for the EMBOSS application documentation. # # Can now run from any directory, with pre and post processing of the test # environment done by using the standard test script # # Uses the qatest.dat file to find and run tests. No need to create # all the test results first # # Cleans up the qa test directory on success # # Skips bad tests with a log message - these should be fixed! # # The 'scripts/qatest.pl' script will be run with the argument '-kk' # to create the example usage, input and output files. use File::Basename; %rotations = ("density" => "90<", "dotmatcher" => "90<", "pepwheel" => "90>", ); open (LOG,">>makeexample.log") || die "Cannot append to makeexample.log"; ################################################################### # # Some useful definitions # ################################################################### open (VERS, "embossversion -full -auto|") || die "Cannot run embossversion"; while () { # if(/InstallDirectory: +(\S+)/) {$installtop = $1} if(/BaseDirectory: +(\S+)/) {$distribtop = $1} } close VERS; # where the qa directories are $qatop = "$distribtop/test"; $scripts = "$distribtop/scripts"; $embassy = ""; ################################################################### # get the name of the application if ($#ARGV < 0) { print "Name of the program >"; $application = ; } else { $application = $ARGV[0]; if ($#ARGV > 0) {$embassy = $ARGV[1]} } chomp $application; if (!defined $application || $application eq "") { die "No program specified\n"; } # where the web pages and include files live $doctop = "$ENV{HOME}/devemboss"; if ($embassy eq "") { $docdir = "$doctop/doc/programs/master/emboss/apps"; $incdir = "$docdir/inc"; } else { $docdir = "$doctop/embassy/$embassy/emboss_doc/master"; $incdir = "$docdir/inc"; } # some HTML $p = "

"; $bold = ""; $unbold = ""; # maximum number of lines of a file to be displayed $MaxLines = 100; %testok = (); %testkeep = (); # names of test databases @testdbs = ( 'tsw', 'tswnew', 'twp', 'tembl', 'tpir', 'tedam', 'ttax', 'tdrcat' ); # colours for backgrounds of usage examples, input and output files # these colours should work on all PCs/Macs/UNIX web browsers $usagecolour = "#CCFFFF"; # light cyan $inputcolour = "#FFCCFF"; # light violet $outputcolour = "#CCFFCC"; # light green ################################################################### # initialise the outputs $USAGE = ""; $INPUT = ""; $OUTPUT = ""; $hasinput = 0; $haskeystrokes = 1; $hasoutkeystrokes = ""; @testdbsoutput = (); @inputfilesshown = (); @outputfilesshown = (); # debug #open (OUT, "> /people/gwilliam/jjj.html") || die "Can't open debug OUT\n"; print LOG "\n"; print LOG "Doing $application\n"; ################################################################### # check for any qa '*-ex' example directories @dirs=(); open (QATEST, "$qatop/qatest.dat") || die "Cannot open $qatop/qatest.dat"; $expex = qr/^$application[-]ex/; $expcheck = qr/^$application[-]check/; $expkeep = qr/^$application[-]keep$/; while () { if (/^ID +(\S+)/) { $idqa = $1; if ($idqa =~ /$expex/) {push @dirs, $idqa}; if ($idqa =~ /$expcheck/) {push @dirs, $idqa}; if ($idqa =~ /$expkeep/) {push @dirs, $idqa}; } } # are there any results directories? if ($#dirs == -1) { errorexit("No qa tests were found for $application"); } # sort the directory names @dirs = sort @dirs; chdir ("$qatop/qa"); ################################################################### # get next example directory $count = 0; foreach $dotest (@dirs) { $dir = "$dotest"; print "Doing test $dotest\n"; print LOG "Doing test $dotest\n"; # run the test with -kk open (TEST,"$scripts/qatest.pl -kk -logfile=qatest.doclog $dotest 2>&1|"); $testok{$dotest} = 0; $testkeep{$dotest} = 0; while () { if (/^Tests total: 1 pass: 1 fail: 0$/) {$testok{$dotest}=1} } if (!$testok{$dotest}) { print LOG "Test $dotest failed\n"; print "Test $dotest failed\n"; if ($embassy eq "") { print STDERR "Test $dotest failed, ignored\n"; } else { print STDERR "Test ($embassy) $dotest failed, ignored\n"; } next; } $count++; ################################################################### # initialise some output messages for this example $testdbcomment = ""; $commandline = ""; $usagecomment = ""; $inputcomment = ""; $outputcomment = ""; ################################################################### # read in 'testdef' file of qa commands $testfile = "$dir/testdef"; open (TESTDEF, "< $testfile") || errorexit("Couldn't open file $testfile"); @testdef = ; close (TESTDEF); ################################################################### # parse 'CL, UC, IC, OC' lines from 'testdef' foreach $line (@testdef) { if ($line =~ /^CL\s+(.+)/) {$commandline .= "$1 ";} if ($line =~ /^UC\s+(.+)/) {$usagecomment .= "$1 ";} if ($line =~ /^IC\s+(.+)/) {$inputcomment .= "$1 ";} if ($line =~ /^FI\s+(\S+)/) {push(@outfiles, "$dir/$1");} if ($line =~ /^FK\s+(.+)/) {$hasoutkeystrokes = $1;} if ($line =~ /^DI\s+(.+)/) {push(@outfiles, "$dir/$1");$savedir = $1;} if ($line =~ /^DF\s+(.+)/) {$outdirfiles{$savedir} .= "$1 "; push(@outfiles, "$dir/$savedir/$1");} if ($line =~ /^OC\s+(.+)/) {$outputcomment .= "$1 ";} if ($line =~ /^DL\s+keep/) {$testkeep{$dotest} = 1;} if ($line =~ /^AB\s+(\S+)/) { $embassypackage = "$1"; $docdir = "$doctop/embassy/$embassypackage/emboss_doc/master"; $incdir = "$docdir/inc"; } if ($line =~ /^IN\s+/) {$hasinput = 1;} if ($line =~ /^IK\s+/) {$haskeystrokes = 1;} } # remove stderr, stdin, stdout, testdef, testlog from this list @outfiles = grep !/stdin|stderr|stdout|testdef|testlog/, @outfiles; ################################################################### # read in 'stderr' file of prompts # Watch out for acdtrace which has extra lines # in all other cases, @prompts = would be correct and simpler. $promptfile = "$dir/stderr"; open (PROMPTS, "< $promptfile" ) || errorexit("Couldn't open file $promptfile"); $plines=""; while () { s/Trace:.*\n//; s/\n/\n\r/; $plines .= $_; } close (PROMPTS); (@prompts) = split(/\r/, $plines); ################################################################### # read in 'stdin' file of responses to prompts if($hasinput) { $answerfile = "$dir/stdin"; open (ANSWERS, "< $answerfile") || errorexit("Couldn't open file $answerfile"); @answers = ; close (ANSWERS); } else { @answers = (); # if $haskeystrokes could put comment about input keystrokes here } @saveanswers = @answers; ################################################################### # read in 'stdout' file of results written to screen if($hasoutkeystrokes eq "stdout") { @results = (); # could put some comment about the output keystrokes here } else { $resultsfile = "$dir/stdout"; open (RESULTS, "< $resultsfile") || errorexit("Couldn't open file $resultsfile"); @results = ; close (RESULTS); } ################################################################### # change any ampersands or angle brackets to HTML codes $commandline =~ s/&/&/g; $commandline =~ s//>/g; $usagecomment =~ s/&/&/g; $usagecomment =~ s//>/g; $inputcomment =~ s/&/&/g; $inputcomment =~ s//>/g; $outputcomment =~ s/&/&/g; $outputcomment =~ s//>/g; ################################################################### # change newlines to
$commandline =~ s/\n/
\n/g; $usagecomment =~ s/\n/
\n/g; $inputcomment =~ s/\n/
\n/g; $outputcomment =~ s/\n/
\n/g; ################################################################### # see if any of the arguments of CL are real input files @infiles = (); @inusas = (); # split on space, >, <, = - all are possible before a file name foreach $f (split /\s|\>|\<|\=/, $commandline) { chomp $f; # ignore qualifiers - words starting with a '-' if ($f =~ /^-/) {next;} # split on '::' to get files embedded in a format::file USA $f =~ s/^[']//; $f =~ s/[']$//; if ($f =~ /\:\:/) { print "CL line=$f\n"; @fm = split /\:\:/, $f; $f = pop @fm; print "CL f=$f\n"; } # deal with '@' in list files $f =~ s/\@//g; # deal with '*' in quotes $f =~ s/[*]/\\*/g; # check to see if this is an output file existing in the -ex directory if (grep /^$f$/, @outfiles) { next; } # check to see if it is a real file (not a directory) if (-f "$dir/$f" && ! -d "$dir/$f") { # we assume all files in the *-ex directory are output files # check for '.' or '/' at start of path if ($f =~ /^\./ || $f =~ m#^\/#) { #print "displaying infile=$f\n"; push @infiles, $f; } } else { if($f !~ /^pseudocap:/) { push @inusas, $f; } } } ################################################################### # see if any of the answers to prompts are real files # split on '::' to get files embedded in a format::file USA foreach $line (@answers) { chomp $line; if (length $line) { #print "line=$line\n"; @usas = split /\:\:/, $line; $f = pop @usas; #print "f=$f\n"; # deal with '@' in list files $f =~ s/\@//; # check to see if this is an output file existing in the -ex directory if (($f !~ /[*]/) && grep /^$f$/, @outfiles) { next; } # check to see if it is a real file (not a directory) #print "test for file: $dir/$f\n"; if (-f "$dir/$f" && ! -d "$dir/$f") { # we assume all files in the *-ex directory are output files # check for '.' or '/' at start of path if ($f =~ /^\./ || $f =~ m#^\/#) { #print "displaying infile=$f\n"; push @infiles, $f; } } else { push @inusas, $line; } } } ################################################################### # see if any of the answers to prompts are lists of real files # split on ',' to get files embedded in a file list 'file1,file2,file3' foreach $line (@answers) { chomp $line; if (length $line) { @filelist = split /,\s*/, $line; foreach $f (@filelist) { # deal with '@' in list files $f =~ s/\@//; # if ':' in it then ignore - not a file if ($f =~ /\:/) { #print "ignoring $f in list of input files\n"; next; } # check to see if it is a real file (not a directory) if (-f "$dir/$f" && ! -d "$dir/$f") { # we assume all files in the *-ex directory are output files # check for '.' or '/' at start of path if ($f =~ /^\./ || $f =~ m#^\/#) { push @infiles, $f; } } else { push @inusas, $line; } } } } ################################################################### # see if we use the test databases anywhere in the command line or answers foreach $f (@answers, split (/\s|\>|\<|\=/, $commandline)) { chomp $f; @d = split (/\:/, $f); foreach $d (@d) { if ($d =~ /^[\{]/) {next;} if ($d =~ /\*/) {next;} if ($d =~ /[\[]/ && $d !~ /[\[].*[\]]/ ) {next} if (grep /^$d$/, @testdbs) { # check we have not made any other comments about this test database if (! grep /$d/, @testdbsoutput) { if ($d eq "tembl") { $type = "nucleic acid"; } elsif ($d eq "tedam") { $type = "obo"; } elsif ($d eq "edam") { $type = "obo"; } elsif ($d eq "tobo") { $type = "obo"; } elsif ($d eq "tdrcat") { $type = "resource"; } elsif ($d eq "ttax") { $type = "taxonomy"; } else { $type = "protein"; } $testdbcomment .= "\n'$f' is a sequence entry in the example $type database '$d'\n$p\n"; push @testdbsoutput, $d; } } } } ################################################################### # usage documentation ################################################################### # example count if ($count == 1) { $USAGE .= qq|Here is a sample session with $application\n$p\n|; } else { $USAGE .= qq|$p\nExample $count\n$p\n|; } # usage comment if ($usagecomment ne "") { $USAGE .= qq|$usagecomment\n$p\n|; } # blank line $USAGE .= qq|\n$p\n|; # start table (light cyan) $USAGE .= qq|
\n\n|; # application name and command line (we just use the test results) # print "Doing: \% $application $commandline\n"; $usecommandline = $commandline; $usecommandline =~ s/[.][.]\/..\/data\/[^\/]+\///; # ../../data/(embassy)/ $usecommandline =~ s/[.][.]\/..\/data\///; # ../../data/ $usecommandline =~ s/[.][.]\/..\///; # ../../embl (etc.) $USAGE .= qq|% $bold$application $usecommandline$unbold\n|; # intercalate prompts and answers @pr = (); foreach $line (@prompts) { $line =~ s/[.][.]\/..\/data\/[^\/]+\///; # ../../data/(embassy)/ $line =~ s/[.][.]\/..\/data\///; # ../../data/ $line =~ s/[.][.]\/..\///; # ../../embl (etc.) push @pr, split /([^\s]+: )/, $line; } foreach $line (@pr) { $USAGE .= qq|$line|; # change ':'s in warning messages so that they don't look like prompts # although if we get a warning, things are probably going badly wrong anyway #print "prompt=$line\n"; $line =~ s/Warning:/Warning :/; $line =~ s/Error:/Error :/; $line =~ s/Fatal:/Fatal :/; # We also get warnings from blastpgp in some domainatrix apps $line =~ s/WARNING:/WARNING :/; $line =~ s/posFindAlignmentDimensions:/posFindAlignmentDimensions :/; $line =~ s/posProcessAlignment:/posProcessAlignment :/; # print the answer in bold #print "$line\n"; if ($line =~ /[^\s]: $/) { $ans = shift @answers; #print "prompt=$line\n"; #print "ans=$ans\n"; if (!defined($ans)) { $ians = $#saveanswers - $#answers; print STDERR "application '$application' \@answers[$ians] undefined\n"; # $ians = 0; # foreach $sa (@saveanswers) { # $ians++; # print STDERR "\$saveanswers[$ians] = '$sa'\n"; # } $ans = "\n"; } $ans =~ s/[.][.]\/..\/data\/[^\/]+\///; # ../../data/(embassy)/ $ans =~ s/[.][.]\/..\/data\///; # ../../data/ $ans =~ s/[.][.]\/..\///; # ../../embl (etc.) $USAGE .= "$bold$ans$unbold\n"; } } # have we used all of our answers? if ($#answers != -1) { print STDERR "WARNING **** application '$application' example $count test $dotest hasn't used ", $#answers+1, " answers\n"; print LOG "WARNING **** test $dotest hasn't used ", $#answers+1, " answers\n"; } # display any results found in the 'stdout' file $USAGE .= qq|\n|; foreach $r (@results) {$USAGE .= qq|$r|;} # end stdout results with another blank line if ($#results > -1) {$USAGE .= qq|\n|;} # end table $USAGE .= qq|
$p\n|; # blank line $USAGE .= qq|$p\n|; ################################################################### # input documentation ################################################################### # If the command line of answers contain tembl, tsw etc. # add a comment about these being test databases. # only add this comment once $I = $testdbcomment; # input comment $I .= $inputcomment; # foreach input file foreach $file (@infiles) { #print "input file=$file\n"; $shortfile = $file; $shortfile =~ s/[.][.]\/..\/data\/[^\/]+\///; # ../../data/(embassy)/ $shortfile =~ s/[.][.]\/..\/data\///; # ../../data/ $shortfile =~ s/[.][.]\/..\///; # ../../embl (etc.) # if this some sort of binary file? if (checkBinary("$dir/$file")) { $I .= $p . "

File: $shortfile

\n"; $I .= $p . "This file contains non-printing characters and so cannot be displayed here.\n"; # normal file that can be displayed } else { # has the name has been used before if (! grep /^$file$/, @inputfilesshown) { push @inputfilesshown, $file; # no - contruct new display of file # add example number to the list of examples that use this file $I .= displayFile($shortfile, "$dir/$file", $inputcolour); } } } # the @inusas list of inputs are possible USAs, but have not been checked. # see if they have a ':' in them and display using entret. # If it has an '*' then ignore it, foreach $f (@inusas) { if ($f =~ /\:/ && $f !~ /\*/ && $f !~ /http\:/) { # has the name has been used before if (! grep /^$f$/, @inputfilesshown) { push @inputfilesshown, $f; $I .= displayEntry($f, $inputcolour); } } } # If any new files were output for this example, if (length $I) { $INPUT .= qq|\n\n

Input files for usage example |; if ($count > 1) {$INPUT .= "$count";} $INPUT .= "

\n"; $INPUT .= $I; # add a link from the usage $USAGE .= qq|Go to the input files for this example
|; } ################################################################### # output documentation ################################################################### $O = ""; # output comment $O .= $outputcomment; # foreach output file foreach $path (@outfiles) { #print "output file=$path\n"; # $basefile = basename($path); if ($path !~ /^$dir\//) {next} # multiple tests $file = $path; $file =~ s/^$dir\///; $filetype = `file $path`; # if this a .gif, .ps or .png graphics file? if ($filetype =~ /: GIF image data|: PNG image data|: PostScript /) { # convert .ps file to gif $giffile = ""; $origfile = $file; if ($file =~ /([a-z0-9_]+)\.ps$/) { $pname = $1; $giffile = $file; $giffile =~ s/\.[a-z]*ps2?/.gif/; $rotate = "90<"; if(defined($rotations{$pname})){ $rotate = $rotations{$pname}; # print STDERR "rotate '$rotate'\n"; } # add -delay to see the first page of an animated gif for 10 mins # add -delay to see the first page of an animated gif for 10 mins system("2>&1 convert -delay 65535 -rotate '$rotate' $path $giffile >/dev/null"); $file = $giffile; $path = $giffile; } elsif ($file =~ /\.([a-z]+)ps$/) { $giffile = $file; $giffile =~ s/\.([a-z]+)ps/.$1.gif/; system("2>&1 convert -delay 65535 -rotate '-90<' $path $giffile >/dev/null"); $file = $giffile; $path = $giffile; } elsif ($file =~ /\.ps2$/) { $giffile = $file; $giffile =~ s/\.ps(\d)/.$1.gif/; # add -delay to see the first page of an animated gif for 10 mins system("2>&1 convert -delay 65535 -rotate '-90<' $path $giffile >/dev/null"); $file = $giffile; $path = $giffile; } elsif ($filetype =~ /: PostScript /) { $giffile = $file; $giffile .= ".gif"; # add -delay to see the first page of an animated gif for 10 mins system("2>&1 convert -delay 65535 -rotate '-90<' $path $giffile >/dev/null"); $file = $giffile; $path = $giffile; } # rename file to be unique - application name . example count . given file name $newfile = "$application.$count.$file"; system("cp $path $docdir/$newfile"); chmod 0664, "$docdir/$newfile"; # tidy up if ($giffile ne "") { unlink $giffile; } # display the graphics file $O .= $p . "

Graphics File: $origfile

\n"; $O .= $p . qq| [$application results]

\n|; # if this some other binary file? } elsif (-d $path) { $O .= $p . "

Directory: $file

\n"; $O .= $p . "This directory contains output files"; if(defined($outdirfiles{$file})) { $outtmp = $outdirfiles{$file}; $outtmp =~ s/ (\S+) $/ and $1/; $outtmp =~ s/ $//; $O .= ", for example $outtmp\.\n"; } else { $O .= ".\n$p\n"; } } elsif (checkBinary($path)) { $O .= $p . "

File: $file

\n"; $O .= $p . "This file contains non-printing characters and so cannot be displayed here.\n"; # normal file that can be displayed } else { # has the name has been used before (match to end of path in @outputfilesshown) @o = grep /$file$/, @outputfilesshown; if ($#o == -1) { push @outputfilesshown, $path; # no - contruct new display of file #print "displaying $file\n"; $O .= displayFile($file, $path, $outputcolour); } else { # do a diff of the two files $ofile = pop @o; system ("diff $ofile $path> diff.tmp"); if ( -s "diff.tmp" ) { # it is a different file $O .= displayFile($file, $path, $outputcolour); } unlink "diff.tmp"; } } } # If any new files were output for this example, if (length $O) { $OUTPUT .= qq|\n\n

Output files for usage example |; if ($count > 1) {$OUTPUT .= "$count";} $OUTPUT .= "

\n"; $OUTPUT .= $O; # add a link from the usage $USAGE .= qq|Go to the output files for this example$p|; } # force a blank line to be at the end of the usage $USAGE .= "$p\n"; } $testsfailed = ""; $nfailed = 0; foreach $dotest (@dirs) { if (!$testok{$dotest}) { if ($nfailed) {$testsfailed .= ", "} $testsfailed .= "$dotest"; $nfailed++; } } if ($nfailed) { if($nfailed == 1) { errorexit("Test $testsfailed failed"); } else { errorexit("Tests $testsfailed failed"); } } ################################################################### # create the usage, input and output documentation files writeUsage($USAGE); writeInput($INPUT); writeOutput($OUTPUT); # debug #print OUT "USAGE=\n$USAGE\n"; #print OUT "INPUT = \n$INPUT"; #print OUT "OUTPUT = \n$OUTPUT"; #close(OUT); foreach $dotest (@dirs) { if ($testok{$dotest}) { if (!$testkeep{$dotest}) { system ("rm -rf $dotest"); } } } close LOG; exit(0); ################################################################### ################################################################### ################################################################### # Name: errorexit # Function: leaves output files unchanged and exits with an error message # Args: string - error message # Returns: exits - no return ################################################################### sub errorexit { my $msg = $_[0]; my $usage = "Error: $msg"; my $input = "Error: $msg"; my $output = "Error: $msg"; # writeUsage($usage); # writeInput($input); # writeOutput($output); if ($embassy eq "") { print STDERR "Error: $msg\n"; print "Error: $msg\n"; } else { print STDERR "Error: ($embassy) $msg\n"; print "Error: ($embassy) $msg\n"; } exit 1; } ################################################################### # Name: writeUsage # Function: writes usage file # Args: string - example usage # Returns: ################################################################### sub writeUsage { my $usage = $_[0]; my $out = "$incdir/$application.usage"; open (OUT, "> $out") || die "Can't open $out"; $usage =~ s/\/data1\/[Uu]sers\/pmr\/local/\/homes\/pmr\/local/go; $usage =~ s/\/data1\/[Uu]sers\/pmr\/devemboss/\/homes\/user/go; $usage =~ s/\/homes\/pmr\/devemboss/\/homes\/user/go; $usage =~ s/(Guide tree +file created: +)\[[A-Z0-9]+\]/$1\[12345678A]/go; $usage =~ s/(GCG-Alignment file created +)\[[A-Z0-9]+\]/$1\[12345678A]/go; $usage =~ s/domainalign\-[0-9]+[.][0-9]+/domainalign-1234567890.1234/go; $usage =~ s/domainrep\-[0-9]+[.][0-9]+[.]/domainrep-1234567890.1234./go; $usage =~ s/domainrep\-[0-9]+[.][0-9]+/domainrep-1234567890.1234/go; $usage =~ s/pdbplus\-[0-9]+[.][0-9]+/pdbplus-1234567890.1234/go; $usage =~ s/seqalign\-[0-9]+[.][0-9]+[.]/seqalign-1234567890.1234./go; $usage =~ s/seqsearch\-[0-9]+[.][0-9]+[.]/seqsearch-1234567890.1234./go; $usage =~ s/hmmalign\-[0-9]+[.][0-9]+/hmmalign-1234567890.1234/go; $usage =~ s/ehmmalign\-[0-9]+[.][0-9]+/ehmmalign-1234567890.1234/go; $usage =~ s/hmmpfam\-[0-9]+[.][0-9]+/hmmpfam-1234567890.1234/go; $usage =~ s/ehmmpfam\-[0-9]+[.][0-9]+/ehmmpfam-1234567890.1234/go; $usage =~ s/Localtime: ... ... +\d+ [0-9:]+ 2[0-9][0-9][0-9]$/Localtime: Fri Jul 15 12:00:00 2011/gom; $usage =~ s/SUBMITTED iprscan-\d+-\d+/SUBMITTED-iprscan-20110715-12345678/go; print OUT $usage; close(OUT); chmod 0664, $out; # rw-rw-r-- } ################################################################### # Name: writeInput # Function: writes input file # Args: string - example input # Returns: ################################################################### sub writeInput { my $input = $_[0]; my $out = "$incdir/$application.input"; open (OUT, "> $out") || die "Can't open $out"; $input =~ s/DATE [A-Z][a-z][a-z] [A-Z][a-z][a-z] +[0-9]+ [0-9:]+ 20[01][0-9]/DATE Fri Jul 15 12:00:00 2011/go; print OUT $input; close(OUT); chmod 0664, $out; # rw-rw-r-- } ################################################################### # Name: writeOutput # Function: writes output file # Args: string - example output # Returns: ################################################################### sub writeOutput { my $output = $_[0]; my $out = "$incdir/$application.output"; open (OUT, "> $out") || die "Can't open $out"; $output =~ s/caissa/emboss4.ebi.ac.uk/go; $output =~ s/\/data1\/[Uu]sers\/pmr\/local/\/homes\/pmr\/local/go; $output =~ s/\/data1\/[Uu]sers\/pmr\/devemboss/\/homes\/user/go; $output =~ s/\/homes\/pmr\/devemboss/\/homes\/user/go; $output =~ s/DATE [A-Z][a-z][a-z] [A-Z][a-z][a-z] +[0-9]+ [0-9:]+ 20[01][0-9]/DATE Fri Jul 15 12:00:00 2011/go; $output =~ s/CreationDate: ... ... +\d+ [0-9:]+ 2[0-9][0-9][0-9]$/CreationDate: Fri Jul 15 12:00:00 2011/gom; $output =~ s/Rundate: ... ... +\d+ 2[0-9][0-9][0-9] [0-9:]+$/Rundate: Fri Jul 15 2011 12:00:00/gom; $output =~ s/Localtime: ... ... +\d+ [0-9:]+ 2[0-9][0-9][0-9]$/Localtime: Fri Jul 15 12:00:00 2011/gom; $output =~ s/\#\#date 2[0-9][0-9][0-9][-][0-9][0-9][-][0-9][0-9]$/\#\#date 2011-07-15/gom; $output =~ s/domainalign\-[0-9]+[.][0-9]+[.]/domainalign-1234567890.1234./go; $output =~ s/domainrep\-[0-9]+[.][0-9]+[.]/domainrep-1234567890.1234./go; $output =~ s/seqalign\-[0-9]+[.][0-9]+[.]/seqalign-1234567890.1234./go; $output =~ s/seqsearch\-[0-9]+[.][0-9]+[.]/seqsearch-1234567890.1234./go; $output =~ s/hmmalign\-[0-9]+[.][0-9]+/hmmalign-1234567890.1234/go; $output =~ s/hmmpfam\-[0-9]+[.][0-9]+/hmmpfam-1234567890.1234/go; $output =~ s/Time 0\.00[0-5][0-9][0-9][0-9] secs\./Time 0.001999 secs./go; $output =~ s/Time 0\.[0-9][0-9] secs\./Time 0.50 secs./go; $output =~ s/Time [1-5]\.[0-9][0-9] secs\./Time 1.50 secs./go; $output =~ s/Time 1[0-9]\.[0-9][0-9] secs\./Time 11.50 secs./go; $output =~ s/\%\%Creator: (\S+ [\d.]+) 2[0-9][0-9][0-9]\/[0-9[0-9]\/[0-9[0-9]/Creator: $1 2011\/07\/15/go; $output =~ s/mse\.msf MSF: 120 Type: N \d+\/\d+\/\d+ CompCheck: 2784/mse.msf MSF: 120 Type: N 15\/07\/10 CompCheck: 2784/go; $output =~ s/ \d+-[A-Z][a-z][a-z]-2[01][0-9][0-9] / 15-Jul-2011 /go; print OUT $output; close(OUT); chmod 0664, $out; # rw-rw-r-- } ################################################################### # Name: displayEntry # Function: returns a string used to display a database entry # Args: string - USA # string - colour # Returns: string - HTML formatted entry contents ################################################################### sub displayEntry { my $usa = $_[0]; my $colour = $_[1]; my $res = ""; #print "In displayEntry($usa)\n"; # if the 'file name' contains 'http:' then ignore it :-) if ($usa =~ /http\:/) { return $res; } # if the USA has a single ':', use entret, else it is a file and we use seqret if ($usa =~ /T?EDAM\:/i) { system ("ontoget $usa z.z -obsolete -auto"); } elsif ($usa =~ /T?TAX(-nam)?\:/i) { system ("taxget $usa z.z -auto"); } elsif ($usa =~ /T?DRCAT\:/i) { system ("drget $usa z.z -auto"); } elsif ($usa =~ /srs\:unilib\:/i) { system ("textget $usa z.z -auto"); } elsif ($usa !~ /\:\:/ && $usa =~ /\S\:/) { system ("entret $usa z.z -auto"); } elsif ($usa =~ /\:\:/) { system ("seqret $usa z.z -auto"); } $res = displayFile($usa, "z.z", $colour); unlink "z.z"; return $res; } ################################################################### # Name: displayFile # Function: returns a string used to display a file # Args: string - file name as used in the example usage # string - path to file # string - colour # Returns: string - HTML formatted files contents ################################################################### sub displayFile { my $file = $_[0]; my $path = $_[1]; my $colour = $_[2]; my $result = ""; my @lines; my $count; #print STDERR "In displayFile($file)\n"; # if the 'file name' contains 'http:' then ignore it :-) if ($file =~ /http\:/) { return $result; } # if the file has the name 'z.z' (used by displayEntry) then say in the # title that it is a database entry rather than a file if ($path eq "z.z") { $result = $p . "

Database entry: $file

\n"; } else { $result = $p . "

File: $file

\n"; } # start table $result .= qq|

\n|; # if not an .html file, put it in a PRE block if ($file !~ /.html$/) { $result .= qq|

\n|;
    }

    open (F, "< $path") || die "Can't open input file $path";
    @lines = ;
    close (F);

# convert <, >, & to HTML codes if the file is not a .html file
    if ($path !~ /.html$/) {
        foreach my $l (@lines) {
            $l =~ s/[&]/&/g;
            $l =~ s/[<]/</g;
            $l =~ s/[>]/>/g;
        }
    }

# if file is too long, cut out the middle bit;
    if ($#lines > $MaxLines) {
        for ($count = 0; $count < $MaxLines/2; $count++) {
	    if ($path =~ /.html$/) {
		$lines[$count] =~ s/[&]/&/g;
		$lines[$count] =~ s/[<]/</g;
		$lines[$count] =~ s/[>]/>/g;
	    }
            $result .= $lines[$count];
        }
        $result .= "\n\n  [Part of this file has been deleted for brevity]\n\n";
        for ($count = $#lines - $MaxLines/2; $count <= $#lines; $count++) {
	    if ($path =~ /.html$/) {
		$lines[$count] =~ s/[&]/&/g;
		$lines[$count] =~ s/[<]/</g;
		$lines[$count] =~ s/[>]/>/g;
	    }
            $result .= $lines[$count];
        }
    }
    else {
        $result .= join "", @lines;
    }

# if not an .html file, put it in a PRE block
    if ($file !~ /.html$/) {
        $result .= qq|

\n|; } # end table $result .= qq|

$p\n|; return $result; } ################################################################### # Name: checkBinary # Function: checks to see if a file has non-printing characters in # Args: string - path to file # Returns: bool - true if file is binary ################################################################### sub checkBinary { my $file = $_[0]; my $count; my $buf; my $c; open(B, "< $file") || die "Can't open file $file\n"; # get the first 1Kb characters read B, $buf, 1024; close(B); #my @n = unpack('C*', $buf); #for ($count = 0; $count <= $#n; $count++) { # $c = pop @n; #print "c=$c\n"; # if ($c >= 32 && $c <= 126) {next;} # if ($c == ord("\t") || $c == ord("\n") || $c == ord("\r")) {next;} # #print "Binary\n"; # return 1; # } # #print "Not Binary\n"; # return 0; for ($count = 0; $count < length $buf; $count++) { $c = ord(substr($buf, $count, 1)); if ($c >= 32 && $c <= 126) {next;} if ($c == ord("\t") || $c == ord("\n") || $c == ord("\r")) {next;} return 1; } return 0; }