#!/bin/csh # UDS - Update a database subset - Version 8/26/95 # The union of olddatabase and newdatabase is written to $NEW.$DBEXT.UPDATED # Entries present in olddatabase are replaced by their equivalents from # newdatabase. if ($2 == "") then echo "Usage: uds olddatabase newdatabase" exit endif set OLD = $1:r set NEW = $2:r set DBEXT = $2:e # Make sure databases are of the same type if ($DBEXT != $1:e) then echo ">>> Databases not of same type" exit endif if ($DBEXT == gen | $DBEXT == pir) then # set database option for use with getloc switch ($DBEXT) case gen set DBFLAG = -g breaksw case pir set DBFLAG = -p breaksw endsw # carry out operations in a temporary working directory mkdir UDS.tmp cd UDS.tmp splitdb $DBFLAG ../$1 $OLD.ano $OLD.wrp $OLD.ind splitdb $DBFLAG ../$2 $NEW.ano $NEW.wrp $NEW.ind # Find accession numbers that are in the old database subset but # not in the new. First, lists of accession numbers from the old and # new indexes are compared, and any accession numbers unique to the old # index are written to notfound.tmp. Next, each accession number from # notfound.tmp is searched for among the ACCESSION or #Accession lines # in the new database subset. If they are found, then it is assumed that # the old entry was merged into another entry. Any accession numbers not # found at this point are written to $OLD.unique tr -s ' ' ' ' < $OLD.ind |cut -f2 -d" " | sort | uniq > $OLD.acc tr -s ' ' ' ' < $NEW.ind |cut -f2 -d" " | sort | uniq > $NEW.acc fgrep -v -f $NEW.acc $OLD.acc > notfound.tmp grep -i 'ACCESSION' $NEW.ano > $NEW.acl foreach ACNO (`cat notfound.tmp`) egrep -e $ACNO $NEW.acl > has_been_merged.tmp if (-z has_been_merged.tmp) then echo $ACNO >> $OLD.unique endif end if (! -z $OLD.unique) then # Extract these unique entries as individual files in UDS.tmp getloc $DBFLAG -c -f $OLD.unique $OLD.ano $OLD.wrp $OLD.ind # Extract all of the entries from $NEW as individual files in UDS.tmp getloc $DBFLAG -f $NEW.ind $NEW.ano $NEW.wrp $NEW.ind #Rename files so that their names are LOCUS names, not ACCESSION # numbers. This will ensure that files get written in alphabetical # order by LOCUS name. foreach file (*.$DBEXT) set LNAME = `grep LOCUS $file |tr -s ' ' ' ' |cut -f2 -d" "` if ($file != $LNAME.$DBEXT) mv $file $LNAME.$DBEXT end # Write all of the entries collected in UDS.tmp into one file in # the original working directory echo Writing updated database to $NEW.$DBEXT.UPDATED if (-e ../$NEW.$DBEXT.UPDATED) /usr/bin/rm ../$NEW.$DBEXT.UPDATED cat *.$DBEXT >> ../$NEW.$DBEXT.UPDATED else echo "All accession numbers in $1 are also found in $2" echo "No action taken." endif # Cleanup - remove temporary directory cd .. /usr/bin/rm -r UDS.tmp else echo ">>> Unknown database file extension ."$DBEXT endif