#!/bin/bash # GBUPDATE, Version 01/03/2011 #csh script to download GenPept files # 8/17/2009 Converted from csh script to bash script # # 31 Oct 2007 Added RM_CMD and FTPCOMMAND variables, to get # around differences amoung Unix and Linux implementations. # 24 Feb 2007 As of GenBank Release 158.0, the Accession index # has been split among several files, named gbacc1.idx,gbacc2.idx etc. # gbupdate now handles these files in the same way it handles # sequence files. Simply include 'acc' in filelist as if it # was another division. # 29 Apr 2004 GenPept files have a new naming convention. For example, # in GenBank Release 141, rel141.fsa_aa # 21 Aug 2000 Updated to extract gzipped files, rather # than tar.Z files. # Example: {uses 'at' command to run a delayed batch job} # at 1am # at>gpupdate filelist & # at>ctrl-D {ends the command } # This assumes you are in the GenPept directory. The existing files will be # replaced with the new ones, and so must be writeable. # Remember to include the index and documentation files in filelist. # When disk space is tight, edit the 'filelist' file to download files in # order of decreasing size. In general, you need to have at least twice as # much empty space as is necessary to hold the largest file to be downloaded. # This can be circumvented by setting $tmpdir to /tmp, rather than ".". #----------------------- Set environment variables ------------- #Check to see if $MAILID is set MAILID=`cat $BIRCH/local/admin/BIRCH.properties | grep 'BirchProps.adminEmail'| \ cut -f2 -d "=" ` if [ ! "$MAILID" != "" ] then echo Environment variable MAILID must be set to your full Internet address echo in the form userid@hostname echo This is best done in your .profile file. exit fi # RM_CMD - command to be used for removing files and directories if [ -f /usr/bin/rm ] then RM_CMD=/usr/bin/rm else if [ -f /bin/rm ] then RM_CMD=/bin/rm else RM_CMD=rm fi fi # generate FTP command # We need to run in the passive mode, which is required by # some firewalls. # There is a lot of inconsistency from system to system as far # as how ftp is run in the passive mode. One or more of these # works on each system, but none works on all systems: # ftp -p # ftp, input 'passive' from ftp.input # pftp # RESULT=`which pftp | wc -w` if [ $RESULT -eq 1 ] then FTPCOMMAND='pftp' else FTPCOMMAND='ftp -p' fi # directory to store temporary files #tmpdir=. tmpdir=/usr/local/tmp/psgendb if [ ! -e $tmpdir ] then mkdir $tmpdir fi GBUSERID=anonymous GBPASSWD=$MAILID # RLENGTH and LLENGTH tell which field gives the length of a file # printed by 'ls -l' respectively, on remote and local hosts. RLENGTH=5 LLENGTH=5 cd $GP # GenBank download site, and directory at that site. # Mirror sites and directories are commented out. # - - - - - - NCBI #GBHOST=ftp.ncbi.nih.gov #GBDIR=genbank #RLENGTH=5 #- - - - - - -JAPAN #GBHOST=bio-mirror.jp.apan.net #GBDIR=pub/biomirror/genbank #- - - - - - -AUSTRALIA #GBHOST=bio-mirror.au.apan.net #GBDIR=biomirror/genbank #- - - - - - -SINGAPORE #GBHOST=bio-mirror.sg.apan.net #GBDIR=biomirrors/genbank #- - - - - - -CHINA #GBHOST=bio-mirror.im.ac.cn #GBDIR=genbank #- - - - - - -USA - Indiana University GBHOST=bio-mirror.net GBDIR=biomirror/genpept GBUSERID=anonymous GBPASSWD=$MAILID #- - - - - - -USA - San Diego Supercomputing Center #GBHOST=genbank.sdsc.edu #GBDIR=pub #GBUSERID=anonymous #GBPASSWD=$MAILID # - - - - - - - - - - - - - - - # Use 'ls -l' to write a list of GenBank files on remote server, # Send output to ls.out # generate FTP command file echo user $GBUSERID $GBPASSWD > ftp.input echo cd $GBDIR >> ftp.input echo bin >> ftp.input echo ls -l ls.out >> ftp.input echo bye >> ftp.input # run FTP nice $FTPCOMMAND -i -n $GBHOST < ftp.input #----------------------- MAIN LOOP ------------- for file in $(cat $1) ; do EXT=${file/*./} if [ "$EXT" = "gz" ] || [ "$EXT" = "Z" ] then name=${file%.*} else name=$file fi # Make some space by deleting the current file, if # it exists. if [ -f $name ] then echo "Removing $name" $RM_CMD $name fi # Create input file for ftp command. Logs in, moves to correct directory, # and downloads the data. Then logs out. echo user $GBUSERID $GBPASSWD > ftp.input echo cd $GBDIR >> ftp.input echo bin >> ftp.input echo get $file $tmpdir/$file >> ftp.input echo bye >> ftp.input # Get the file from GenBank nice $FTPCOMMAND -i -n $GBHOST < ftp.input # Make sure that the file received is the same length as the original #file. ORIGINAL=`grep $file ls.out |tr -s ' ' ' ' |cut -f$RLENGTH` RECIEVED=`ls -l $tmpdir/$file |tr -s ' ' ' ' |cut -f$LLENGTH` echo 'ORIGINAL= '$ORIGINAL echo 'RECEIVED= '$RECIEVED if [ "$ORIGINAL" = "$RECIEVED" ] then ls -l $tmpdir/$file >> files_received if [ "$EXT" = "gz" ] || [ "$EXT" = "Z" ] then #Uncompress the file nice -n 10 gunzip -c -f $tmpdir/$file > $name $RM_CMD $tmpdir/$file chmod a+r $name else mv $name $GP fi # Make a symbolic link to fasta file, called genpept.wrp. # This is a legacy for existing BIRCH scripts, and can be removed # when the scripts are updated. if [ ${name/*./} == fasta ] then $RM_CMD $GP/genpept.wrp ln -s $name $GP/genpept.wrp fi else echo $file >> files_missed fi # $ORIGINAL = RECEIVED done # for file