#!/bin/csh -f ############################################################################ ############################################################################ ## File: blast_search # $Id: blast_search,v 1.4 1997/11/23 16:53:56 mieg Exp $ # This file is part of the ACEDB package # Author: J Thierry-Mieg, (mieg@kaa.crbm.cnrs-mop.fr) # Copyright (C) R.Durbin & J Thierry-Mieg, 1995 #------------------------------------------------------------------- # Usage: Usage blast_search pid export_file_name fasta_file blast_pgm db # The order is crucial, because pid export_file_name are added # by externalAsynchroneCommand() #------------------------------------------------------------------- # Description: # Calls blast on the vega server on the given fasta file # Reformats the result in .ace using Erik Sonhamer MSPcrunch # Exports the result in export_file_name.out # be careful to the .out which is automatically added for # compatibility with the acedb main code # Sends a signal to pid # #------------------------------------------------------------------- # Configuration: # For this system to run on your own server # you must duplicate the vega environment, contact us about that # To run the client, you need perl and the network # ############################################################################ ############################################################################ ## Choose the blast server echo blast_search $1 $2 $3 $4 $5 setenv server vega.crbm.cnrs-mop.fr setenv blastPort 20200 ############################################################################ ## Check for particular args and default paths if ($5 == "") goto usage if (! -e $3) goto usage setenv ici `pwd` if ($?ACEDB) then setenv s $ACEDB/wscripts else # echo ACEDB not defined if (-d wscripts) then setenv s $ici/wscripts else setenv s $ici endif endif cd $s if ($?ACEDB_SRC) then if (-d $?ACEDB_SRC) then cd $?ACEDB_SRC endif endif if ($?ACEDB_MACHINE) then if (-d bin.$ACEDB_MACHINE) then cd bin.$ACEDB_MACHINE else if ($?ACEMBLY) then if ($?ACEDB_MACHINE) then if (-d $ACEMBLY/bin.$ACEDB_MACHINE) then cd $ACEMBLY/bin.$ACEDB_MACHINE endif endif endif endif endif if (-x gnbkclient) then setenv gnbkclient `pwd`/gnbkclient else if (-x /usr/local/bin/gnbkclient) then setenv gnbkclient /usr/local/bin/gnbkclient else echo Sorry Can t find /usr/local/bin/gnbkclient goto usage endif endif cd $ici ############################################################################ ## Check for necessary software /bin/env perl < /dev/null || goto noperl if ( ! -x $gnbkclient) then echo Sorry Can t find $gnbkclient goto usage endif if ( ! -x $s/client.pl) then echo Sorry Can t find client.pl in directory $s, from ici=$ici goto usage endif ############################################################################ ## Start blast search setenv blast_pgm $4 setenv blast_db $5 setenv upper 70 if ($6 != "") setenv upper $6 setenv seq `head -1 $3 | sed s'/>//'` echo 'pr='"$blast_pgm"'&db='"$blast_db"'&co='$seq >! $2.a echo '>'$seq >> $2.a # blast does not support - signs tail +2 $3 | sed s'/-/n/g' >> $2.a # cp $2.a ~/toto12 # debugging # exit # debugging echo END >> $2.a echo MSPupper 40 >> $2.a # Actual execution of the search on the remote server # Segmentation fault is a bug on $server server. ignore it. echo // `date` >! $2.b $s/client.pl $server $blastPort < $2.a | grep -v '^Segmentation fault' >! $2.h echo // `date` >> $2.b # Extract the protein names in various nomenclatures echo // extract the proteins >> $2.b nawk '/^Protein/{print $2;}' $2.h | sort -u >! $2.prot #nawk '/gp\|/ {pp = substr($1,4) ; i = index(pp, "|") ; if (i) pq = substr(pp,1,i-1) ; else pq = substr(pp,1) ; printf("\n%s\n",pq);}' $2.prot > ! $2.gp nawk '/sp\|/ {pp = substr($1,4) ; i = index(pp, "|") ; if (i) pq = substr(pp,1,i-1) ; else pq = substr(pp,1) ; printf("\n%s;\n",pq);}' $2.prot > ! $2.swiss nawk '/pir\|S\|/ {pp = substr($1,7) ; i = index(pp, "|") ; if (i) pq = substr(pp,1,i-1) ; else pq = substr(pp,1) ; printf("\n%s\n",pq);}' $2.prot > ! $2.pir # Extract the sequence names in various nomenclatures echo // extract the sequences >> $2.b nawk '/^Sequence/{print $2;}' $2.h | sort -u >! $2.seq nawk '/gb\|/ {pp = substr($1,4) ; i = index(pp, "|") ; if (i) pq = substr(pp,1,i-1) ; else pq = substr(pp,1) ; printf("\n%s;\n",pq);}' $2.seq > ! $2.gb nawk '/emb\|/ {pp = substr($1,5) ; i = index(pp, "|") ; if (i) pq = substr(pp,1,i-1) ; else pq = substr(pp,1) ; printf("\n%s\n",pq);}' $2.seq > ! $2.emb #Prepare a .ace rename file # BUG: i should really use -R, but dec 18 96, i found a bug in the ace kernel # so i change to -Alias temporarilly echo // nawk to list the various database calls >> $2.b #nawk '/gp\|/ {old = $1 ; pp = substr($1,4) ; i = index(pp, "|") ; if (i) pq = substr(pp,1,i-1) ; else pq = substr(pp,1) ; printf("\n-A Protein %s GP:%s\n",old, pq);}' $2.prot > $2.r.ace nawk '/sp\|/ {old = $1 ; pp = substr($1,4) ; i = index(pp, "|") ; if (i) pq = substr(pp,1,i-1) ; else pq = substr(pp,1) ; printf("\n-A Protein %s SP:%s\n",old, pq);}' $2.prot >! $2.r.ace nawk '/pir\|S\|/ {old = $1 ; pp = substr($1,7) ; i = index(pp, "|") ; if (i) pq = substr(pp,1,i-1) ; else pq = substr(pp,1) ; printf("\n-A Protein %s PIR:%s\n",old, pq);}' $2.prot >> $2.r.ace nawk '/gb\|/ {old = $1 ; pp = substr($1,4) ; i = index(pp, "|") ; if (i) pq = substr(pp,1,i-1) ; else pq = substr(pp,1) ; printf("\n-A Sequence %s GenBank:%s\n",old, pq);}' $2.seq >> $2.r.ace nawk '/emb\|/ {old = $1 ; pp = substr($1,5) ; i = index(pp, "|") ; if (i) pq = substr(pp,1,i-1) ; else pq = substr(pp,1) ; printf("\n-A Sequence %s EMBL:%s\n",old, pq);}' $2.seq >> $2.r.ace # arrange so that aliased objects are not empty when alias command is run echo '// Create a non empty entry for all proteins that should be aliased' >! $2.R.ace echo ' ' >> $2.R.ace nawk '{if ($2 != "") printf("%s %s\nTitle\n\n%s %s\nTitle\n\n",$2,$3,$2,$4);}' $2.r.ace >> $2.R.ace echo ' ' >> $2.R.ace #Collect the protein sequences on the server echo // Import the proteins from $server >> $2.b $gnbkclient -host $server -port 20000111 < $2.swiss | nawk -f $s/swiss.awk >! $2.swiss.out.ace $gnbkclient -host $server -port 20000112 < $2.pir | nawk -f $s/pir.awk >! $2.pir.out.ace $gnbkclient -host $server -port 20000110 < $2.gb | nawk -f $s/gb.awk >! $2.gb.out.ace $gnbkclient -host $server -port 20000110 < $2.emb | nawk -f $s/emb.awk >! $2.emb.out.ace # I don t know how to parse pir.out #accumulate everything in $2.b in the correct order echo // Accumulate >> $2.b cat $2.R.ace $2.r.ace $2.pir.out.ace $2.swiss.out.ace $2.gb.out.ace >> $2.b cat $2.emb.out.ace $2.h >> $2.b # Copy localy the input and the output file cp $2.a /var/tmp/$seq.a # if you want to keep a copy for debugging if (! -d blast_results) mkdir blast_results if (-d blast_results) then cp $2.b ./blast_results/$seq.out.ace echo blast_search done result saved as: ./blast_results/$seq.out.ace else cp $2.b ./$seq.out.ace echo blast_search done result saved as: ./$seq.out.ace endif echo parms $* # Finally, mv $2.b to $2.out and signal acedb mv $2.b $2.out if ($1 != "0") kill -USR1 $1 # Clean up #\rm $2.gp $2.gp.out.ace rm $2.a $2.prot $2.swiss $2.pir $2.r.ace $2.R.ace $2.gb $2.emb $2.h $2.seq rm $2.swiss.out.ace $2.pir.out.ace $2.gb.out.ace $2.emb.out.ace exit 0 ############################################################################ usage: echo '-----------------blast_search error----------------------------' echo Usage blast_search pid export_file_name fasta_file blast_pgm db echo Example: blast_search 3867 /var/tmp/bb.a /var/tmp/bb.fasta blast_x pir echo You called: blast_search "$*" echo '---------------------------------------------------------------' exit 1 noperl: echo Sorry Can t find /usr/local/bin/perl exit 1 ############################################################################ ############################################################################