#!/bin/sh - # This script will send a file to netscape with database IDs linked to SRS. # To customize this script edit the $DATABASES and $SRS_SERVER variable in the # perl code below and the NETSCAPE variable at the top of the script. # To enable this in Artemis, sanger_options must be set to true in the options # file. # Default browser is mozilla # If you are under KDE, you should use Konqueror then # If you are under MAC OS X, use Safari if [ `uname` = Darwin ]; then NETSCAPE=/usr/bin/open else for f in /usr/bin/x-www-browser \ /usr/bin/X11/real-netscape \ /usr/bin/firefox \ /usr/bin/iceweasel \ /usr/bin/mozilla; do if [ -f $f ]; then NETSCAPE=$f break fi done fi if [ ! -f "$NETSCAPE" ]; then NETSCAPE=`which firefox` fi if [ -f "$DIANA_ENVIRONMENT_FILE" ] then . $DIANA_ENVIRONMENT_FILE fi if [ $# = 0 ] then echo no argument given 1>&2 exit 1 fi file_arg=$1 unique_bit=$$.`hostname` # sanger hack: file_arg=`echo $file_arg | sed 's@^/tmp_mnt/nfs/@/nfs/@' | sed 's@^/tmp_mnt/tmp_nfs/@/nfs/@'` # fix for pcs3: file_arg=`echo $file_arg | sed 's@^/yeastpub4/@/nfs/disk222/yeastpub4/@'` # fix for pcs4: file_arg=`echo $file_arg | sed 's@^/.automount/pcs3/root/nfs/@/nfs/@'` file_arg=`echo $file_arg | sed 's@^/.automount/evs-users2/root/@/nfs/@'` if [ -f ./$file_arg ] then # the file is in the current directory - we need the full path so netscape # can find the file new_file=$PWD/$file_arg.$unique_bit.html else new_file=$file_arg.$unique_bit.html fi ### # sanger fix for /nfs/repository which isn't mounted on workstations: REPOSITORY=`echo $new_file | sed -n -e 's|^\/nfs\/repository\/\(.*\)\(\/\)\(.*\)$|\3|p'` if [ "$REPOSITORY" != "" ]; then if [ ! -d $HOME/artemis_tmp ]; then mkdir $HOME/artemis_tmp fi new_file="$HOME/artemis_tmp/$REPOSITORY" fi # ### cat < $new_file Results for $file_arg
EOF

perl -e '
BEGIN {
  # change these variable to list the databases to search for the IDs - the
  # database names should be separated by spaces
  $PROTEIN_DATABASES = "uniprot";
  $DNA_DATABASES = "embl";

  # change this to point to the wgetz script of your SRS server
  #$SRS_SERVER = "www.sanger.ac.uk/srs6bin/cgi-bin/wgetz?-e+";
  #$SRS_SERVER = "srs.ebi.ac.uk/srsbin/cgi-bin/wgetz?-e+";
  $SRS_SERVER = "www.bioinformatics.nl/srsbin/cgi-bin/wgetz?-e+";
  
  $PROTEIN_DATABASES =~ s/ /%20/g;
  $DNA_DATABASES =~ s/ /%20/g;

  %GENEDB_PATTERNS = (tryp => q!Tb\d+\.\d+\.\d+|TRYP_\S+!,
                      leish => q!LmjF\d+.\d+!);

  $GENEDB_PATTERN = join ("|", values %GENEDB_PATTERNS);

  $BLAST_START_LINE = "Sequences producing High-scoring Segment Pairs|" .
    "Sequences producing significant alignments:";
  $FASTA_START_LINE = "The best scores are";

  # the list of IDs we have seen so far
  @ids = ();

  # the list of IDs we have made anchors for so far
  @anchored_ids = ();

  $db_type = "unknown";
}

sub hyperlink_to_anchor
{
  $id = shift;
  qq($id);
}

sub hyperlink_id
{
  $id = shift;

  if ($db_type eq "dna") {
    $r = qq#$id#;
  } else {
    for my $org (keys %GENEDB_PATTERNS) {
      my $pattern = $GENEDB_PATTERNS{$org};

      if ($id =~ /$pattern/) {
        $r = qq#$id#;
        return $r
      }
    }

    # Text Entry
    $r = qq#$id#;
    
    #$r = qq#$id#;
    #$r = qq#$id#;
  }
  return $r
}

$file_name = $ARGV[0];

if ($file_name =~ /\.gz$/) {
  open IN_FILE, "gzip -d < $file_name |" or die "failed to open $file_name\n";
} else {
  open IN_FILE, "$file_name" or die "failed to open $file_name\n";
}

while () {
  if ($. == 1) {
    if (/^\s*([^\s]+)/) {
      if (lc $1 eq "blastn" or lc $1 eq "tblastn" or lc $1 eq "tblastx") {
        $db_type = "dna";
      } else {
        if (lc $1 eq "fasta" or lc $1 eq "blastp" or lc $1 eq "blastx") {
          $db_type = "protein";
        } else {
          print "\nWARNING: could not identify file type: $1\n";
        }
      }
    } else {
      print "\nWARNING: could not identify file type\n";
    }
  }


  # ignore header lines
  if (1..m/$BLAST_START_LINE|$FASTA_START_LINE/) {
    print;
    next;
  }

  if (@ids && /^\s*$/) {
    $summary_finished = 1;
  }

  if (/^>?\w+\|\w+\|(\w+)|^(?:(?:>?>?(?:[A-Z]+:)?)(\w+)|($GENEDB_PATTERN))|^(?:(?:>?>?(?:[A-Z]+:)?)(\w+)\\.\d+\s+) /) {

    $id = $1;

    if (!defined $id) {
      $id = $2;
    }

    if ($summary_finished) {
      if ((grep {$_ eq $id} @ids) && (!grep {$_ eq $id} @anchored_ids)) {
        # not anchored yet so make it an anchor
        if (s/\b$id\b/"" . (hyperlink_id($id)) . "<\/a>"/e) {
          push @anchored_ids, $id;
        }
      }
    } else {
      if (!grep {$_ eq $id} @ids) {
        push @ids, $id;
      }

      s/$id/hyperlink_to_anchor($id)/ei;

#     if (!s/ $id/" " . hyperlink_id($id)/ei) {
        # if the id occurs once in the line put a link at end of line
        s/$/"  LINK:" . hyperlink_id($id)/e;
#     }
    }
  }
  print;
}

' $file_arg >> $new_file

cat <> $new_file;
EOF # delete it at some point echo "rm -f $new_file > /dev/null 2>&1" | at now + 12 hours # MAC OS X => Safari browser # The command then is the following if [ -f "/usr/bin/open" -a `uname` = Darwin ] then /usr/bin/open $new_file exit 0 elif [ -f "/Applications/Safari.app/Contents/MacOS/Safari" ] then if $NETSCAPE $new_file then exit 0 fi # For Netscape or mozilla # Use openURL($new_file, new-tab) to get new-tab elif $NETSCAPE -remote "openURL($new_file)" then sleep 1m rm -f $new_file exit 0 else echo starting new netscape 2>&1 # netscape isn't running - so start it ($NETSCAPE &)& # now send the URL. we do things this way so that the script doesn't exit # until netscape has successfully shown the URL sleep 1 # don't exit the script until the file is successfully displayed or until # 40 seconds is up for i in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 do if $NETSCAPE -remote "openURL($new_file)" 2> /dev/null then sleep 1m rm -f $new_file exit 0 else sleep 2 fi done exit 1 fi