#!/usr/bin/env python3
"""
optparse is deprecated in favor of argparse as of Python 2.7. However,
since 2.7 is not always present on many systems, at this writing,
it is safer to stick with optparse for now. It should be easy
to change later, since the syntax is very similar between argparse and optparse.
from optparse import OptionParser
"""
from optparse import OptionParser
import ftplib
import hashlib
import time
import datetime
import os
import os.path
import re
import stat
import subprocess
import sys
import tarfile
'''
blastdbkit.py - Set environment variables for BioLegato Helper Applications
Synopsis: blastdbkit.py --showall [--ftpsite url]
blastdbkit.py --configure [--birchdir directory] [--blastdb directory]
blastdbkit.py --add [--ftpsite url] --dblist db[,db]
blastdbkit.py --delete --dblist db[,db]
blastdbkit.py --update [--ftpsite url] --dblist db[,db]
@modified: January 10, 2021
@author: Brian Fristensky
@contact: brian.fristensky@umanitoba.ca
'''
blib = os.environ.get("BIRCHPYLIB")
sys.path.append(blib)
from birchlib import Birchmod
PROGRAM = "blastdbkit.py : "
USAGE = "\n\tUSAGE: blastdbkit.py --showall [--ftpsite url]" + \
"\n\t\tblastdbkit.py --configure [--birchdir directory] [--blastdb directory]" + \
"\n\t\tblastdbkit.py --reportlocal" + \
"\n\t\tblastdbkit.py --reportftp [--ftpsite url]" + \
"\n\t\tblastdbkit.py --add [--ftpsite url] --dblist db[,db]" + \
"\n\t\tblastdbkit.py --delete --dblist db[,db]" + \
"\n\t\tblastdbkit.py --update [--ftpsite url] --dblist db[,db]"
DEBUG = True
if DEBUG :
print('Debugging mode on')
BM = Birchmod(PROGRAM, USAGE)
BIRCHvariables = ['BIRCH_PROMPT','BLASTDB']
"""
FTPINFO is a dictionary whose keys are ftp site addresses, and whose values are dictionaries of field names
and their values:
dbdir - directory in which the BLAST-formatted databases are found.
UseMLSD - True or False. In order to use MLSD with FTP, the site must return the filename, size,
and modification time. At this writing, only ftp.ncbi.nih.gov supports MLSD and returns all three
values. ftp.ebi.ac.uk does not support MLSD.
ftp.hgc.jp supports MLSD, but does not return the size of a file.
Future: Possible additional fields might include timezone or geographical location of
an FTP site.
"""
FTPINFO = {"ftp.ncbi.nih.gov" : {"dbdir":"/blast/db","UseMLSD":True} ,
"ftp.hgc.jp" : {"dbdir":"pub/mirror/ncbi/blast/db","UseMLSD":False},
"ftp.ebi.ac.uk" : {"dbdir":"pub/blast/db","UseMLSD":False} }
#FTPINFO = {"ftp.ncbi.nih.gov" : "/blast/db" , "ftp.hgc.jp" : "pub/mirror/ncbi/blast/db", "mirrors.vbi.vt.edu" : "mirrors/ftp.ncbi.nih.gov/blast/db"}
# for convenience, we create two lists of database names from local_dblist
# This will be used for other methods that do things like checking for
# the presence of databases etc. Importantly, DBNAMES_ALL has the database names
# in the order in which they are read, which is not true in Python dictionaries.
# When database lists are stored in dictionaries, we can access their components
# in the original order by referencing DBNAMES_ALL. DBNAMES_INSTALLED lists those
# databases that are currently installed.
DBNAMES_ALL = []
DBNAMES_INSTALLED = []
# SPECIAL CASES : mouse_genome and human_genome
# have filenames that differ from their database names. We define sets
# of file prefixes used for each of these databases.
MouseSet = ["GCF_000001635.26_top_level"]
HumanSet = ["GCF_000001405.38_top_level"]
# - - - - - - - - - - - - - Utility classes - - - - - - - - - - - - - - - - -
def chmod_ar(filename):
"""
Make a file world-readable.
"""
if os.path.exists(filename):
st = os.stat(filename)
os.chmod(filename, st.st_mode | stat.S_IREAD \
| stat.S_IRGRP | stat.S_IROTH)
def chmod_arx(filename):
"""
Make a file or directory world-readable and world-executable/searchable.
"""
if os.path.exists(filename):
st = os.stat(filename)
os.chmod(filename, st.st_mode | stat.S_IEXEC | stat.S_IREAD \
| stat.S_IXGRP | stat.S_IRGRP | stat.S_IXOTH \
| stat.S_IROTH)
def LocalHostname():
"""
Return the name of the local machine. Tries a number of methods
to get a name other than 'localhost' or a null result.
"""
import socket
import platform
def CheckName(name) :
if name == None or name.startswith("localhost") or name == "" :
OKAY = False
else :
OKAY = True
return OKAY
name = os.getenv('HOSTNAME')
if not CheckName(name) :
name = platform.uname()[1]
if not CheckName(name) :
if socket.gethostname().find('.')>=0:
name=socket.gethostname()
else:
name=socket.gethostbyaddr(socket.gethostname())[0]
return name
def GetBIRCHProperties(BIRCHDIR,PropName) :
"""
Retrieve a value from BIRCH.properties. eg. To retrieve the value
of BirchProps.adminEmail:
GetBIRCHProperties(BIRCHDIR,"adminEmail")
"""
PFN = os.path.join(BIRCHDIR , 'local' , 'admin' , 'BIRCH.properties')
pfile = open(PFN,'r')
Value = ""
Target = 'BirchProps.' + PropName
lines = pfile.readlines()
pfile.close()
plen = len(lines)
if plen > 0 :
i = 0
while (i < plen) and (Value == "") :
line = lines[i]
# ignore blank lines and comment lines
if not (line.startswith('#')) :
tokens = line.split("=")
if tokens[0] == Target :
Value = tokens[1].strip()
i += 1
return Value
def DeleteFilesByPrefix(dest,dbname,LOGFILE) :
"""
Delete all files from dest whose names begin with dbname
"""
Indent4 = ' '
os.chdir(dest)
rawlist = os.listdir(dest)
rawlist.sort()
for filename in rawlist :
prefix = filename.split('.')[0]
if prefix == dbname :
os.remove(filename)
LOGFILE.write(Indent4 + filename + ' deleted\n')
def SendEmail(From,To,Subject,Text) :
"""
Very simple email method adapted from:
http://stackoverflow.com/questions/882712/sending-html-email-using-python
There are more elaborate examples on this site for sending
HTML messages and attachments.
"""
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
Host = 'localhost'
msg = MIMEMultipart('alternative')
msg['Subject'] = Subject
Html = """\
%s
""" %(Text)
part1 = MIMEText(Text, 'plain')
part2 = MIMEText(Html, 'html')
msg.attach(part1)
msg.attach(part2)
try:
server = smtplib.SMTP(Host)
server.sendmail(From, To, msg.as_string())
server.quit()
print("Successfully sent email")
except :
print("Error: unable to send email")
def getValue(tokens,kw) :
"""
Return a value for a key/value pair, where
= is the separator.
"""
i = 0
L = len(tokens)
FOUND = False
retval = ""
while i < L and not FOUND :
t = tokens[i].split('=')
if t[0] == kw :
FOUND = True
retval = t[1]
i += 1
return retval
def FTPDirList(FTPSITE) :
"""
Get a list of files found at the remote FTP site.
"""
print('Checking ' + FTPSITE)
FTPDIR = FTPINFO[FTPSITE]["dbdir"]
ftp = ftplib.FTP(FTPSITE)
ftp.login()
ftp.cwd(FTPDIR)
dirlines = []
if FTPINFO[FTPSITE]["UseMLSD"] :
ftp.retrlines('MLSD',dirlines.append)
# Python 3 - use ftp.mlsd
else:
ftp.retrlines('LIST',dirlines.append)
ftp.quit()
return dirlines
def FTPDBList(FTPSITE) :
"""
Get a list of BLAST databases found at the remote FTP site.
"""
dirlines = FTPDirList(FTPSITE)
AvailableDBs = []
for line in dirlines:
F = FileMetaData()
F.getFTPMetaData(line,FTPINFO[FTPSITE]["UseMLSD"])
if F.Name.endswith('.tar.gz') :
tokens = F.Name.split('.')
dbname = tokens[0] # eg. nt
if not dbname in AvailableDBs :
AvailableDBs.append(dbname)
AvailableDBs.sort()
return AvailableDBs
def download_file(FTPSITE,FTPDIR,FN):
"""
Download a file from an FTP site.
"""
ftp = ftplib.FTP(FTPSITE)
ftp.login()
ftp.cwd(FTPDIR)
Outfile = open(FN, 'wb')
ftp.retrbinary('RETR ' + FN, Outfile.write)
Outfile.close()
ftp.quit()
def md5Checksum(filePath):
"""
Calculate an md5 checksum for a file.
Writen by Joel Verhagen
http://joelverhagen.com/blog/2011/02/md5-hash-of-file-in-python/
"""
with open(filePath, 'rb') as fh:
m = hashlib.md5()
while True:
data = fh.read(8192)
if not data:
break
m.update(data)
return m.hexdigest()
def extracttgz(fname):
"""
Extract all files from a .tar.gz file
Adapted from:
http://sukhbinder.wordpress.com/2014/03/06/untar-a-tar-file-with-python/
"""
if (fname.endswith("tar.gz")):
tar = tarfile.open(fname)
tar.extractall()
tar.close()
def InstallFile(tgzfile,FTPSITE,FTPDIR,LOGFILE) :
"""
Download a BLAST .tar.gz and its corresponding .md5 file from the FTP site,
and untar the file in in the current directory. Delete the .tar.gz
file when done, but keep the .md5 file.
"""
SUCCESS=False
MAX_DOWNLOAD_ATTEMPTS=3
ATTEMPTS = 0
md5file = tgzfile + '.md5'
Indent4 = ' '
while not SUCCESS and (ATTEMPTS < MAX_DOWNLOAD_ATTEMPTS) :
#Download .tar.gz file and .tar.gz.md5 file
try :
download_file(FTPSITE,FTPDIR,tgzfile)
except :
LOGFILE.write(Indent4 + 'Failed to download ' + tgzfile + '\n')
try :
download_file(FTPSITE,FTPDIR,md5file)
except :
LOGFILE.write(Indent4 + 'Failed to download ' + md5file + '\n')
# Calculate md5 checksum, and compare it with the checksum file
if os.path.exists(tgzfile) and os.path.exists(md5file) :
LocalChecksum = md5Checksum(tgzfile)
#with open(md5file, 'rb') as fh :
with open(md5file, 'r') as fh :
line = fh.readline()
RemoteChecksum = line.split(" ")[0]
if LocalChecksum == RemoteChecksum :
SUCCESS = True
else :
LOGFILE.write(Indent4 + '>>> Checksum for ' + tgzfile + ' does not match ' + md5file + '\n')
os.remove(md5file)
ATTEMPTS += 1
# Uncompress and extract files from the .tar.gz file
if SUCCESS :
try :
extracttgz(tgzfile)
except :
SUCCESS = False
LOGFILE.write(Indent4 + '>>> Error extracting files from ' + tgzfile + '\n')
os.remove(tgzfile)
return SUCCESS
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
class FileMetaData:
"""
Holds metadata for a file.
"""
def __init__(self):
"""
Initializes arguments:
Name = ""
Size = 0
MDate = datetime.time()
"""
self.Name = ""
self.Size = 0
now = datetime.datetime.now()
self.CurrentYear = now.year
self.Mtime = now
def getLocalMetaData(self,FN) :
"""
Retrieve metadata for a local file.
"""
finfo = os.stat(FN)
self.Name = FN
self.Size = int(finfo.st_size)
timestamp = finfo.st_mtime
self.Mtime = datetime.datetime.fromtimestamp(timestamp)
#print(self.Name + ' ' + str(self.Size) + ' ' + str(self.Mtime) )
def getFTPMetaData(self,line,UseMLSD) :
"""
Parse metadata for a file on a remote FTP site.
"""
if UseMLSD :
tokens = line.split(';')
numtokens = len(tokens)
self.Name = tokens[numtokens-1].lstrip()
timestruct = getValue(tokens,'modify')
self.Mtime = datetime.datetime.strptime(timestruct,"%Y%m%d%H%M%S")
self.Size = int(getValue(tokens,'size'))
else:
"""
Parsing directory lines is hazardous!
Directory listings from a number of FTP sites seem to
all be consistent with tokenizing into nine non-blank fields:
Field Content
-----------------------
0 permissions
1 ?
2 owner
3 group
4 size
5 Month
6 Day
7 Time or Year
8 Name
Field 8, is a special case, because a name might include blanks.
For the purposes of downloading NCBI files, we don't need to worry
about blanks in names.
"""
# Parse directory lines into non-blank fields
# When None is used as the separator, multiple seperators are
# parsed as a single seperator. Neat!
tokens = line.split(None)
#print(line)
L = len(tokens)
if L == 9 :
# Name and Size are easy
self.Name = tokens[8]
self.Size = int(tokens[4])
# Modification time takes a bit more work.
Month = tokens[5]
Day = tokens[6]
if ':' in tokens[7] :
#directory listings for files from the previous 12 months will not list a
# year, only a modification time. The year will either be the current year
# or the previous year.
Year = str(self.CurrentYear)
TimeHM = tokens[7]
else :
Year = tokens[7]
TimeHM = "00:00"
TimeStr = Day + ' ' + Month + ' ' + Year + ' ' + TimeHM
#self.Mtime = time.strptime(TimeStr, "%d %b %Y %H:%M")
self.Mtime = datetime.datetime.strptime(TimeStr, "%d %b %Y %H:%M")
#directory listings for files from the previous 12 months will not list a
# year, only a modification time. The year will either be the current year
# or the previous year. The result would be a time in the future. If that
# happens, we have to decrement the Year by 1 and recalculate the Mtime.
if self.Mtime > datetime.datetime.now() :
Year = str(self.CurrentYear-1)
TimeStr = Day + ' ' + Month + ' ' + Year + ' ' + TimeHM
self.Mtime = datetime.datetime.strptime(TimeStr, "%d %b %Y %H:%M")
#print(self.Name + ' ' + str(self.Size) + ' ' +str(self.Mtime))
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
class Parameters:
"""
Wrapper class for command line parameters
"""
def __init__(self):
"""
Initializes arguments:
SHOWALL = False
CONFIGURE = False
BIRCHDIR = ""
BLASTDB = ""
REPORTLOCAL = False
REPORTFTP = False
ADD = False
DELETE = False
UPDATE = False
FTPSITE = ""
DBLIST = []
SFN= ""
Then calls read_args() to fill in their values from command line
"""
self.SHOWALL = False
self.CONFIGURE = False
self.BIRCHDIR = ""
self.BLASTDB = ""
self.REPORTLOCAL = False
self.REPORTFTP = False
self.ADD = False
self.DELETE = False
self.UPDATE = False
self.FTPSITE = ""
self.DBLIST = []
self.read_args()
self.SFN = os.path.join(self.BIRCHDIR , 'local' , 'admin' , 'BIRCH.settings')
if DEBUG :
print('------------ Parameters from command line ------')
print(' SHOWALL: ' + str(self.SHOWALL))
print(' CONFIGURE: ' + str(self.CONFIGURE))
print(' BIRCHDIR: ' + self.BIRCHDIR)
print(' BLASTDB: ' + self.BLASTDB)
print(' REPORTLOCAL: ' + str(self.REPORTLOCAL))
print(' REPORTFTP: ' + str(self.REPORTFTP))
print(' ADD: ' + str(self.ADD))
print(' DELETE: ' + str(self.DELETE))
print(' UPDATE: ' + str(self.UPDATE))
print(' FTPSITE: ' + self.FTPSITE)
print(' DBLIST: ' + str(self.DBLIST))
print(' Settings file: ' + self.SFN)
print()
def read_args(self):
"""
Read command line arguments into a Parameter object
"""
parser = OptionParser()
parser.add_option("--showall", dest="showall", action="store_true", default=False,
help="Show all available databases at FTPSITE")
parser.add_option("--configure", dest="configure", action="store_true", default=False,
help="in a new install or update, set BIRCHDB environment variable")
parser.add_option("--birchdir", dest="birchdir", action="store", default="",
help="path to BIRCH installation directory")
parser.add_option("--blastdb", dest="blastdb", action="store", default="",
help="path to Blast Database directory")
parser.add_option("--reportlocal", dest="reportlocal", action="store_true", default=False,
help="Write local database stats to $BLASTDB/localstats.tsv")
parser.add_option("--reportftp", dest="reportftp", action="store_true", default=False,
help="Write database stats from remote FTP site to $BLASTDB/ftpstats.tsv")
parser.add_option("--add", dest="add", action="store_true", default=False,
help="add files to Blast database")
parser.add_option("--delete", dest="deletefiles", action="store_true", default=False,
help="delete files from Blast database")
parser.add_option("--update", dest="update", action="store_true", default=False,
help="download and install updates for Blast Database")
parser.add_option("--ftpsite", dest="ftpsite", action="store", default="",
help="FTP site from which to download update files")
parser.add_option("--dblist", dest="rawdblist", action="store", default="",
help="list of database files to add, delete or update")
(options, args) = parser.parse_args()
self.SHOWALL = options.showall
self.CONFIGURE = options.configure
self.BIRCHDIR = options.birchdir
if self.BIRCHDIR == "" :
self.BIRCHDIR = str(os.environ['BIRCH'])
self.BLASTDB = options.blastdb
self.REPORTLOCAL = options.reportlocal
self.REPORTFTP = options.reportftp
self.ADD = options.add
self.DELETE = options.deletefiles
self.UPDATE = options.update
self.FTPSITE = options.ftpsite
# We need to deal with the possibility that the user has prefixed the URL
# with a protocol. The protocol will be prepended later if we need it.
if self.FTPSITE.startswith('http://') :
self.FTPSITE = self.FTPSITE[7:]
elif self.FTPSITE.startswith('https://') :
self.FTPSITE = self.FTPSITE[8:]
if self.FTPSITE == "" :
self.FTPSITE = "ftp.ncbi.nih.gov"
if options.rawdblist != "" :
tokens = options.rawdblist.split(",")
# When BioLegato calls blastdbkit.py, the comma-separated list in --dblist will
# usually contain empty elements. We need to remove those empty components from dblist
# Evaluating a list comprehension is the most straightforward way.
self.DBLIST = [x for x in tokens if x != ""]
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
class BIRCHSettings:
"""
Data and methods for the BIRCH Settings file.
"""
def __init__(self,P):
"""
Initializes arguments:
dict = {}
"""
self.dict = {}
for var in BIRCHvariables:
self.dict[var] = ""
if os.path.exists(P.SFN) :
self.ReadBIRCHSettings(P.SFN)
#else:
# DFN = os.path.join(self.BIRCH , 'admin' , 'BIRCH.settings.default')
# self.ReadBIRCHSettings(DFN)
if DEBUG :
print('- - - - - BIRCH Settings - - - - -')
for k in self.dict :
print(' ' + k + ',' + self.dict[k])
def ReadBIRCHSettings(self,FN):
"""
Read current values of BIRCHvariables from BIRCH.settings.
"""
if os.path.exists(FN) :
Sfile = open(FN,'r')
for line in Sfile :
line = line.strip()
# ignore blank lines and comment lines
if (line != "" and line[0] != '#') :
tokens = line.split("=")
if tokens[0] in BIRCHvariables :
self.dict[tokens[0]] = tokens[1]
Sfile.close()
def WriteBIRCHSettings(self,SFN):
"""
Write current values of BIRCHvariables to BL.properties. file.
"""
Sfile = open(SFN,'w')
Sfile.write('# DO NOT EDIT THIS FILE!\n')
Sfile.write('# This file is automatically generated by blastdbkit.py during installation,\n')
Sfile.write('# update or by birchadmin --> Preferences --> Settings\n')
for k in self.dict :
Sfile.write(k + '=' + self.dict[k] + '\n')
Sfile.close()
def WriteBIRCHenvBourne(self,P):
"""
Write bash code for setting BIRCHvariables to birch_settings_Bourne.source.
Used for Bourne type shells eg. bash, sh
"""
ENVFN = os.path.join(P.BIRCHDIR, 'admin', 'birch_settings_Bourne' + '.source')
Sfile = open(ENVFN,'w')
Sfile.write('# DO NOT EDIT THIS FILE!\n')
Sfile.write('# This file is automatically generated by blastdbkit.py during installation,\n')
Sfile.write('# update or by birchadmin --> BIRCHSettings\n')
#Enclose value of argument in single quotes. This is maninly for cases such as
#BL_Terminal='gnome-terminal -e'
for k in self.dict :
Sfile.write(k + "='" + self.dict[k] + "'\n")
Sfile.write('export ')
for var in BIRCHvariables :
Sfile.write(' ' + var)
Sfile.write('\n')
Sfile.close()
chmod_ar(ENVFN)
def WriteBIRCHenvCsh(self,P):
"""
Write csh code for setting BIRCHvariables to birch_settings_csh.source.
Used for C type shells eg. csh, tcsh
"""
ENVFN = os.path.join(P.BIRCHDIR, 'admin', 'birch_settings_csh' + '.source')
Sfile = open(ENVFN,'w')
Sfile.write('# DO NOT EDIT THIS FILE!\n')
Sfile.write('# This file is automatically generated by blastdbkit.py during installation,\n')
Sfile.write('# update or by birchadmin --> BIRCHSettings\n')
for k in self.dict :
Sfile.write('setenv ' +k + ' ' + self.dict[k] + '\n')
Sfile.close()
chmod_ar(ENVFN)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
class BLASTDBList:
"""
Data and methods for the BLAST databases.
"""
def __init__(self,P,DBNAMES_ALL):
"""
Initializes local BLASTDB list and sets DBNAMES_ALL
"""
# Read in the default BLASTDB.list.
self.default_listfile = os.path.join(P.BIRCHDIR , 'admin' , 'Settings.default' , 'BLASTDB.list')
self.default_dblist = self.ReadBLASTDBList(self.default_listfile,DBNAMES_ALL)
# Read the local settings file if it exists. Update the dbtag field, the human-readable
# description of the database, with the value from the default dblist. This
# updates the local dblist if there are changes in this field in a new BIRCH release.
# Also, if a new release has db files not in the local list, add those to the
# local list.
self.local_listfile = os.path.join(P.BIRCHDIR , 'local' , 'admin' , 'BLASTDB.list')
if os.path.exists(self.local_listfile) :
self.local_dblist = self.ReadBLASTDBList(self.local_listfile,DBNAMES_ALL)
# First, remove any legacy databases from the local list, if they aren't in the
# DBNAMES_ALL. These would be databases that are no longer distributed by NCBI
# eg. v4 databases
SupportedDBs = self.default_dblist.keys()
# This was gives the following error: Runtime error: dictionary changed size during iteration.
# I dont' see why this doesn't work, because self.local_dblist is not part of the for
# statement. So we do it differently below.
#localDBs = self.local_dblist.keys()
#for dbname in localDBs :
# if not dbname in SupportedDBs :
# del self.local_dblist[dbname]
templocalDBs = {}
localkeys = self.local_dblist.keys()
for dbname in localkeys :
if dbname in SupportedDBs :
templocalDBs[dbname] = self.local_dblist[dbname]
self.local_dblist = templocalDBs
for dbname in self.default_dblist :
if dbname in self.local_dblist :
self.local_dblist[dbname]["dbtag"] = self.default_dblist[dbname]["dbtag"]
self.local_dblist[dbname]["dbtype"] = self.default_dblist[dbname]["dbtype"]
self.local_dblist[dbname]["decompress_ratio"] = self.default_dblist[dbname]["decompress_ratio"]
else:
self.local_dblist[dbname] = self.default_dblist[dbname]
else:
self.local_dblist = self.default_dblist
# dictionaries of metadata for files in BLASTDB directory and on the
# remote FTP site
self.localDBFILES = {}
# Initialize dictionaries for each database name
self.remoteDBFILES = {}
for dbname in DBNAMES_ALL :
self.remoteDBFILES[dbname] = {}
def ReadBLASTDBList(self,FN,DBNAMES_ALL):
"""
Read list of BLAST databases, descriptions and install status (0,1).
"""
BLASTDICT={}
if DBNAMES_ALL == [] : # We are reading the default file, so add database names to DBNAMES_ALL
DefaultFile = True
else :
DefaultFile = False
Bfile = open(FN,'r')
for line in Bfile :
if (line != "" and line[0] != '#') :
line = line.strip()
tokens = line.split(",")
numfields = len(tokens)
if numfields >= 4 : # ignore blank or incomplete lines
if numfields == 5 :
BLASTDICT[tokens[0]]={"dbtag":tokens[1],"dbtype":tokens[2],"decompress_ratio":tokens[3],"installed":tokens[4]}
elif numfields == 4 : # older version of .list file
BLASTDICT[tokens[0]]={"dbtag":tokens[1],"dbtype":tokens[2],"decompress_ratio":"1","installed":tokens[3]}
if DefaultFile : # We don't do this part if we're reading local database files
if not tokens[0] in DBNAMES_ALL :
DBNAMES_ALL.append(tokens[0])
Bfile.close()
if DEBUG :
print('- - - - - BLASTDB list from ' + FN + '- - - - -')
for k in DBNAMES_ALL :
if k in BLASTDICT :
print(' ' + k + ',' + str(BLASTDICT[k]))
else :
print(' ' + k + ' not in ' + FN )
return BLASTDICT
def WriteBLASTDBList(self,DBNAMES_ALL):
"""
Write list of BLAST databases, descriptions and install status (0,1).
"""
Bfile = open(self.local_listfile,'w')
Bfile.write('# DO NOT EDIT THIS FILE!\n')
Bfile.write('# This file is automatically generated by blastdbkit.py during installation,\n')
Bfile.write('# update or by birchadmin --> BLASTDB Configure\n')
for dbname in DBNAMES_ALL :
Bfile.write(dbname + ',' + self.local_dblist[dbname]["dbtag"] + ',' + str(self.local_dblist[dbname]["dbtype"])
+ ',' + str(self.local_dblist[dbname]["decompress_ratio"]) + ',' + str(self.local_dblist[dbname]["installed"]) + '\n')
Bfile.close()
chmod_ar(self.local_listfile)
def CheckBLASTDB(self,BLASTDB,DBNAMES_ALL) :
"""
Get a list of databases found in the BLASTDB directory.
There are potentially a lot of ways this might be done,
and no obvious best choice. Here, we just look to make sure
that each database name is found, regardless of how many
files share that name eg. nt includes files with names like
nt.01.*
"""
# First, create a dictionary of filenames for all databases
# found in $BLASTDB
os.chdir(BLASTDB)
# We only want BLAST files; ignore all other files
# SPECIAL CASES : mouse_genome and human_genome
# have filenames that differ from their database names.
SearchSet = DBNAMES_ALL + MouseSet + HumanSet
rawlist = os.listdir(os.getcwd())
self.localDBFILES = {}
for filename in rawlist :
tokens = filename.split('.')
# Test whether the filenems is from mouse_genome or human_genome
# NCBI used '.' as part of the filename, rather than a file extension
# separator. eg.
#MouseSet = ['GCF_000001635.26_top_level']
#HumanSet = ['GCF_000001405.38_top_level']
#So we have to do ugly things...
SpecialSet = MouseSet + HumanSet
prefix = ""
for p in SpecialSet :
if p in filename :
prefix = tokens[0] + '.' + tokens[1]
if prefix == "" :
prefix = tokens[0] # eg. nt
if prefix in SearchSet :
F = FileMetaData()
F.getLocalMetaData(filename)
# subsequent occurrences of files in a database set
if prefix in MouseSet :
dbname = "mouse_genome"
elif prefix in HumanSet :
dbname = "human_genome"
else :
dbname = prefix
if dbname in self.localDBFILES :
self.localDBFILES[dbname][filename] = F
# first occurrence of a file in a database set
else :
self.localDBFILES[dbname] = {filename:F}
# Next, iterate through the database names, and set the
# +/- field depending on whether a database name appears in
# the list of files. Here is also where we create DBNAMES_INSTALLED,
# which lists installed databases in the order in which they
# appear in the db .list files.
DBNAMES_INSTALLED = []
for dbname in DBNAMES_ALL :
if dbname in self.localDBFILES :
self.local_dblist[dbname]["installed"] = 1
DBNAMES_INSTALLED.append(dbname)
else :
self.local_dblist[dbname]["installed"] = 0
return DBNAMES_INSTALLED
def CheckFTPsite(self,FTPSITE,DBNAMES_ALL) :
"""
Get a list of database files found at the remote FTP site.
"""
# First, create a dictionary of filenames for all databases
# found in FTPSITE/FTPDIR
dirlines = FTPDirList(FTPSITE)
for line in dirlines:
F = FileMetaData()
F.getFTPMetaData(line,FTPINFO[FTPSITE]["UseMLSD"])
if F.Name.endswith('.md5') :
tgzname = F.Name[:-4]
else : # eg. nt.00.tar.gz
tgzname = F.Name
tokens = F.Name.split('.')
dbname = tokens[0] # eg. nt
# We only want BLAST files; ignore all other files
if dbname in DBNAMES_ALL :
# subsequent occurrences of files in a database set
# each section of the database (eg. nt.20.tar.gz has two files: nt.20.tar.gz and nt.20.tar.gz.md5
# We store these fils in a dictionary whose key is the name of the tar.gz file eg. nt.20.tar.gz
if tgzname in self.remoteDBFILES[dbname] :
self.remoteDBFILES[dbname][tgzname][tgzname].append(F)
# first occurrence of a file in a database set
else :
self.remoteDBFILES[dbname][tgzname] = {tgzname:[F]}
# Eliminate from the remote list any database that wasn't found at the remote
# FTP site.
for dbname in DBNAMES_ALL :
if self.remoteDBFILES[dbname] == {} :
del self.remoteDBFILES[dbname]
def WriteLocalReport(self,BLASTDB,DBNAMES_INSTALLED):
"""
Write a spreadsheet-ready report with statistics on the local copy of the NCBI databases.
The report is a tab-separated value file written to $BLASTDB/localstats.tsv.
"""
TAB = "\t"
OFN = os.path.join(BLASTDB, 'localstats.tsv')
LRfile = open(OFN,'w')
LRfile.write('blastdbkit.py:' + TAB + 'LOCAL BLAST DATABASE REPORT\n')
LRfile.write('\n')
stats = os.statvfs(BLASTDB)
totalsize = (stats.f_frsize * stats.f_blocks)/1000000
available = (stats.f_frsize * stats.f_bavail)/1000000
used = ((stats.f_blocks - stats.f_bfree) * stats.f_frsize)/1000000
LRfile.write('Database Directory' + TAB + 'Total size (Mb)' + TAB + 'Used' + TAB + 'Available' + '\n')
LRfile.write(BLASTDB + TAB + str(round(totalsize)) + TAB + str(round(used)) + TAB + str(round(available)) + '\n')
LRfile.write('\n')
LRfile.write('DB name' + TAB + 'size (Mb)' + TAB + 'Last Update' + '\n')
row = 7
starting_row = row
last_row = -1
# earlydate is the time of the most recent file in a database segment eg. nt, nr etc.
# we initialize it to a ridiculously early date
timestruct = '1970-01-01 00:00:00'
earlydate = datetime.datetime.strptime(timestruct,"%Y-%m-%d %H:%M:%S")
for dbname in DBNAMES_INSTALLED :
subtotal = 0
MostRecent = earlydate
for file in self.localDBFILES[dbname] :
subtotal = subtotal + self.localDBFILES[dbname][file].Size
if MostRecent < self.localDBFILES[dbname][file].Mtime :
MostRecent = self.localDBFILES[dbname][file].Mtime
sMostRecent = MostRecent.strftime("%Y-%m-%d %H:%M")
LRfile.write(dbname + TAB + str(round(subtotal/1000000)) + TAB + sMostRecent + "\n")
last_row = row
row += 1
# We dont' calculate a total. We insert a formula that lets the spreadsheet calculate the total.
if last_row >= starting_row : # at least one statistics line has been written
LRfile.write('TOTAL:' + TAB + '=SUM(B' + str(starting_row) + ':B' + str(last_row) + ")\n")
LRfile.close()
chmod_ar(OFN)
def WriteFTPReport(self,BLASTDB,FTPSITE,DBNAMES_ALL):
"""
Write a spreadsheet-ready report with statistics on the remote NCBI databases.
The report is a tab-separated value file written to $BLASTDB/ftpstats.tsv.
"""
TAB = "\t"
OFN = os.path.join(BLASTDB, 'ftpstats.tsv')
LRfile = open(OFN,'w')
LRfile.write('blastdbkit.py:' + TAB + 'REMOTE FTP BLAST DATABASE REPORT\n')
LRfile.write('\n')
LRfile.write('FTP site:' + TAB + FTPSITE + '\n')
LRfile.write('Database Directory:' + TAB + FTPINFO[FTPSITE]["dbdir"] + '\n')
LRfile.write('\n')
LRfile.write('DB name:' + TAB + 'compressed size (Mbytes)' + TAB + 'est. decompressed size (Mbytes)' + TAB + 'Modification Time' + '\n')
row = 7
starting_row = row
# earlydate is the time of the most recent file in a database segment eg. nt, nr etc.
# we initialize it to a ridiculously early date
timestruct = '1970-01-01 00:00:00'
earlydate = datetime.datetime.strptime(timestruct,"%Y-%m-%d %H:%M:%S")
remoteDBs = list(self.remoteDBFILES.keys())
for dbname in DBNAMES_ALL :
# Do it this way because .keys() doesn't preserve order
# of names in DBNAMES_ALL
# This way, both local and remote database spreadsheets will have
# databases in the same order for easier comparison.
if dbname in remoteDBs :
subtotal = 0
MostRecent = earlydate
for file in self.remoteDBFILES[dbname] :
for F in self.remoteDBFILES[dbname][file][file] :
subtotal = subtotal + F.Size
if MostRecent < F.Mtime :
MostRecent = F.Mtime
sMostRecent = MostRecent.strftime("%Y-%m-%d %H:%M")
fDecompress_ratio = float(self.local_dblist[dbname]["decompress_ratio"])
sizeMB = subtotal/1000000
LRfile.write(dbname + TAB + str(round(sizeMB)) + TAB + str(int(round(sizeMB*fDecompress_ratio))) + TAB + sMostRecent + '\n')
last_row = row
row += 1
# We dont' calculate a total. We insert a formula that calculates the total.
LRfile.write('TOTAL:' + TAB + '=SUM(B' + str(starting_row) + ':B' + str(last_row) + ')' + TAB + '=SUM(C' + str(starting_row) + ':C' + str(last_row) + ')' + '\n')
LRfile.close()
chmod_ar(OFN)
def FindNewFiles(self,DBLIST) :
"""
Return a list of database files .tar.gz files that are newer on the server than
those in the local directory. In practice, this means that
if the local *.tar.gz.md5 file is older than the *.tar.gz.md5
file on the remote site, we download the newer *.tar.gz files from the
remote site.
"""
# initialize NewFiles dictionary
NewFiles = {}
for dbname in DBLIST:
NewFiles[dbname] = []
for tgzname in self.remoteDBFILES[dbname] :
#print(tgzname)
for F in self.remoteDBFILES[dbname][tgzname][tgzname] :
if F.Name.endswith(".md5") :
NewDownload = False
UpdateAvailable = False
if not dbname in self.localDBFILES :
NewDownload = True
elif not F.Name in self.localDBFILES[dbname] :
NewDownload = True
elif self.localDBFILES[dbname][F.Name].Mtime < F.Mtime :
UpdateAvailable = True
if NewDownload or UpdateAvailable :
print('New: ' + F.Name)
NewFiles[dbname].append(F.Name[:-4])
NewFiles[dbname].sort()
return NewFiles
def CreateFastaNamefiles(self,BIRCHDIR,BLASTDB,DBNAMES_INSTALLED) :
"""
Create .nam files so that FASTA can find BLAST
databases.
"""
FastaDirName = os.path.join(BIRCHDIR , 'dat' , 'fasta')
os.chdir(FastaDirName)
FileHeader = '<${GB}' #first line in all .nam files
for dbname in DBNAMES_INSTALLED :
# create a file with names of all .psq files (protein) or .nsq files (nucleotide)
# for a given database subset
NameFile = open(dbname + '.nam','w')
NameFile.write(FileHeader + '\n')
if self.local_dblist[dbname]["dbtype"] in ["n","N"] : #nucleic acids seq file
ext = ".nsq"
elif self.local_dblist[dbname]["dbtype"] in ["p","P"] : #protein seq file
ext = ".psq"
# Write the file names out to the .nam file.
# Because dictionaries are not sorted, we need to first
# get a list of keys and sort them. In python2 we could use
# the keys() function, but in python3 keys() returns a view object,
# rather than a list. So we have to force the result into a list
# and then sort it.
filenames = list(self.localDBFILES[dbname].keys())
filenames.sort()
if dbname in ["human_genome", "mouse_genome"] :
print(filenames)
for fn in filenames :
F = self.localDBFILES[dbname][fn]
if F.Name.endswith(ext) :
prefix = F.Name[:-4]
NameFile.write(prefix + ' 12\n')
NameFile.close()
def CreateBLMenu(self,P,DBNAMES,Directory,TNAME) :
"""
Create a BioLegato .blmenu file from a template file
"""
# Test for End of line condition in input
def EOF(line) :
if line == "" :
result = True
else :
result = False
return result
def ReadChooser(DONE) :
"""
Read a template file to be used for creating chooser variables.
A varible in the final .blmenu file will be implemented for each
database in the list.
Returns a list of lines.
"""
Chooser = []
line = Templatefile.readline()
DONE = EOF(line)
while not line.startswith('') and not EOF(line) :
Chooser.append(line)
line = Templatefile.readline()
return Chooser
def WriteChooser(ChooserTemplate,db,dbfileprefix,OutputFile) :
"""
Write a chooser variable to the output file for a given
database section. We do this by substituting markup tags
from the template with desired values.
Another way to have done this would have been to just have this
method write out the complete PCD for each chooser. However,
that approach has the disadvantage that even minor changes in
the PCD have to be made in the Python code, rather than in the
PCD template.
"""
for line in ChooserTemplate :
#outputline = line.replace('',db)
outputline = line.replace('',dbfileprefix)
outputline = outputline.replace('',dbfileprefix)
outputline = outputline.replace('',self.local_dblist[db]["dbtag"])
if self.local_dblist[db]["installed"] == 1 :
InstSymbol = '+'
else :
InstSymbol = '-'
outputline = outputline.replace('',InstSymbol)
outputline = outputline.replace('',str(self.local_dblist[db]["installed"]))
OutputFile.write(outputline)
TemplateFN = os.path.join(Directory, TNAME + '.blmenu.template')
OutputFN = os.path.join(Directory, TNAME + '.blmenu')
Templatefile = open(TemplateFN,'r')
OutputFile = open(OutputFN,'w')
OutputFile.write('# DO NOT EDIT THIS FILE!\n')
OutputFile.write('# This file is automatically generated by blastdbkit.py during installation,\n')
OutputFile.write('# update or by birchadmin --> UpdateAddInstall\n')
line = Templatefile.readline()
DONE = EOF(line)
dblist = "" #used for the --dblist option
while not DONE :
if line.startswith('') > -1 :
line = line.replace('',dblist)
OutputFile.write(line)
elif line.find('') > -1 :
line = line.replace('',P.BLASTDB)
OutputFile.write(line)
else :
OutputFile.write(line)
if not DONE :
line = Templatefile.readline()
DONE = EOF(line)
Templatefile.close()
OutputFile.close()
chmod_ar(OutputFN)
def UpdateFiles(self,P,DBNAMES_INSTALLED,DBNAMES_ALL,LOGFILE) :
"""
Cycle through the names of databases in the order given in UpdateList.
If the name is in the list to be updated, first delete the files for that
name to create some space. Next download and install
the new files. When all files have been downloaded, send an email to the
BIRCH administrator.
"""
Indent4 = ' '
if 'all' in P.DBLIST :
UpdateList = DBNAMES_INSTALLED
else :
UpdateList = P.DBLIST
NewFiles = self.FindNewFiles(UpdateList)
for dbname in UpdateList :
#print(dbname)
if len(NewFiles[dbname]) == 0 :
LOGFILE.write(Indent4 + Indent4 + dbname + ' up to date. Nothing to install.' + '\n')
else :
LOGFILE.write(Indent4 + '----- Updating ' + dbname + ' -----\n')
for file in NewFiles[dbname] :
FTPDIR = FTPINFO[P.FTPSITE]["dbdir"]
SUCCESS = InstallFile(file,P.FTPSITE,FTPDIR,LOGFILE)
if SUCCESS :
LOGFILE.write(Indent4 + Indent4 + 'Successfully installed ' + file + '\n')
else :
LOGFILE.write(Indent4 + Indent4 + '>>> INSTALL OF ' + file + ' FAILED\n')
LOGFILE.write('\n')
def AddFiles(self,P,DBNAMES_INSTALLED,DBNAMES_ALL,LOGFILE) :
"""
Cycle through the names of databases in the order given in UpdateList.
"""
Indent4 = ' '
if 'all' in P.DBLIST :
AddList = DBNAMES_ALL
else :
AddList = P.DBLIST
NewFiles = self.FindNewFiles(AddList)
for dbname in AddList :
if dbname in DBNAMES_INSTALLED :
LOGFILE.write(Indent4 + dbname + ' already installed\n')
else :
LOGFILE.write(Indent4 + '----- Adding ' + dbname + ' -----\n')
FilesToGet = []
for file in self.remoteDBFILES[dbname] :
#print(file)
FilesToGet.append(file)
FilesToGet.sort()
for file in FilesToGet :
FTPDIR = FTPINFO[P.FTPSITE]["dbdir"]
SUCCESS = InstallFile(file,P.FTPSITE,FTPDIR,LOGFILE)
if SUCCESS :
LOGFILE.write(Indent4 + Indent4 + 'Successfully installed ' + file + '\n')
else :
LOGFILE.write(Indent4 + Indent4 + '>>> INSTALL OF ' + file + ' FAILED\n')
LOGFILE.write('\n')
def Revise(self,P,DBNAMES_ALL) :
"""
Revise local database metadata, BioLegato menus and FASTA .nam files
"""
global DBNAMES_INSTALLED
DBNAMES_INSTALLED = self.CheckBLASTDB(P.BLASTDB,DBNAMES_ALL)
if DEBUG :
print('Installed: ' + str(DBNAMES_INSTALLED))
self.WriteBLASTDBList(DBNAMES_ALL)
self.WriteLocalReport(P.BLASTDB,DBNAMES_INSTALLED)
# Write BioLegato menus for birchadmin
Directory = os.path.join(P.BIRCHDIR, 'dat', 'birchadmin', 'PCD', 'UpdateAddInstall')
for menu in ['BlastDBreport','BlastDBUpdate','BlastDBAdd', 'BlastDBDelete'] :
self.CreateBLMenu(P,DBNAMES_ALL,Directory,menu)
# Write BioLegato menus for bldna, blprotein to search local BLAST databases
Directory = os.path.join(P.BIRCHDIR, 'dat', 'bldna', 'PCD', 'Database')
for menu in ['BLASTNlocal', 'BLASTXlocal','TBLASTXlocal'] :
self.CreateBLMenu(P,DBNAMES_INSTALLED,Directory,menu)
for menu in ['BlastDBreport'] :
self.CreateBLMenu(P,DBNAMES_ALL,Directory,menu)
Directory = os.path.join(P.BIRCHDIR, 'dat', 'blprotein', 'PCD', 'Database')
for menu in ['BLASTPlocal','TBLASTNlocal'] :
self.CreateBLMenu(P,DBNAMES_INSTALLED,Directory,menu)
for menu in ['BlastDBreport'] :
self.CreateBLMenu(P,DBNAMES_ALL,Directory,menu)
# Write BioLegato menus for blnfetch, blpfetch to retrieve data from local BLAST databases
Directory = os.path.join(P.BIRCHDIR, 'dat', 'blnfetch', 'PCD', 'Database')
for menu in ['SEQFETCHlocal'] :
self.CreateBLMenu(P,DBNAMES_INSTALLED,Directory,menu)
Directory = os.path.join(P.BIRCHDIR, 'dat', 'blpfetch', 'PCD', 'Database')
for menu in ['SEQFETCHlocal'] :
self.CreateBLMenu(P,DBNAMES_INSTALLED,Directory,menu)
# Create name files so that FASTA can find BLAST databases
self.CreateFastaNamefiles(P.BIRCHDIR,P.BLASTDB,DBNAMES_INSTALLED)
Directory = os.path.join(P.BIRCHDIR, 'dat', 'bldna', 'PCD', 'Database')
for menu in ['FASTADNA','FASTXY'] :
self.CreateBLMenu(P,DBNAMES_INSTALLED,Directory,menu)
Directory = os.path.join(P.BIRCHDIR, 'dat', 'blprotein', 'PCD', 'Database')
for menu in ['FASTAPROTEIN','TFASTA'] :
self.CreateBLMenu(P,DBNAMES_INSTALLED,Directory,menu)
#======================== MAIN PROCEDURE ==========================
def main():
"""
Called when not in documentation mode.
"""
# Read parameters from command line
P = Parameters()
adminEmail = GetBIRCHProperties(P.BIRCHDIR,"adminEmail")
if DEBUG :
print('adminEmail: ' + adminEmail)
Settings = BIRCHSettings(P)
DBList = BLASTDBList(P,DBNAMES_ALL)
print('DBNAMES_ALL: ' + str(DBNAMES_ALL))
# Set BLASTDB variable according to the priority: --blastdb, then the value from BIRCH.Settings file,
# then BLASTDB environment variable, then as a final fallback, use the default path of $BIRCH/GenBank.
if P.BLASTDB == "" :
if Settings.dict['BLASTDB'] == "" :
if "BLASTDB" in os.environ :
P.BLASTDB = str(os.environ['BLASTDB'])
else :
P.BLASTDB = os.path.join(P.BIRCHDIR, 'GenBank')
else :
P.BLASTDB = Settings.dict['BLASTDB']
Settings.dict['BLASTDB'] = P.BLASTDB
LOGFN = os.path.join(P.BLASTDB,'blastdbkit.log')
if P.SHOWALL:
print("--------- Showall ----------")
AvailableDBs = FTPDBList(P.FTPSITE)
for dbname in AvailableDBs :
print(dbname)
elif P.CONFIGURE:
print("--------- Configure ----------")
# * * * * Step 1: Save local settings * * * *
Settings.WriteBIRCHSettings(P.SFN)
Settings.WriteBIRCHenvBourne(P)
Settings.WriteBIRCHenvCsh(P)
# * * * * Step 2: Revise BioLegato menus and FASTA .nam files * * * *
# Write BioLegato menus for birchadmin
#DBNAMES_INSTALLED = DBList.CheckBLASTDB(P.BLASTDB,DBNAMES_ALL)
DBList.Revise(P,DBNAMES_ALL)
elif P.REPORTLOCAL:
print("--------- ReportLocal ----------")
DBNAMES_INSTALLED = DBList.CheckBLASTDB(P.BLASTDB,DBNAMES_ALL)
DBList.WriteLocalReport(P.BLASTDB,DBNAMES_INSTALLED)
elif P.REPORTFTP:
print("--------- ReportFTP ----------")
DBList.CheckFTPsite(P.FTPSITE,DBNAMES_ALL)
DBList.WriteFTPReport(P.BLASTDB,P.FTPSITE,DBNAMES_ALL)
elif P.ADD:
print("---------- Addfiles ----------")
LOGFILE = open(LOGFN,'w')
LOGFILE.write('\n')
LOGFILE.write('Local host: ' + LocalHostname() + '\n')
LOGFILE.write('Local BLAST database directory: ' + P.BLASTDB + '\n')
LOGFILE.write('\n')
LOGFILE.write('FTP Site: ' + P.FTPSITE + '\n')
LOGFILE.write('FTP Directory: ' + FTPINFO[P.FTPSITE]['dbdir'] + '\n')
LOGFILE.write('\n')
StartTime = datetime.datetime.now()
LOGFILE.write('Adding files: \n')
LOGFILE.write('Start time: ' + str(StartTime) + '\n')
LOGFILE.write('\n')
# * * * * Step 1: Add new files to the local database * * * *
DBNAMES_INSTALLED = DBList.CheckBLASTDB(P.BLASTDB,DBNAMES_ALL)
DBList.CheckFTPsite(P.FTPSITE,DBNAMES_ALL)
DBList.AddFiles(P,DBNAMES_INSTALLED,DBNAMES_ALL,LOGFILE)
# * * * * Step 2: Revise BioLegato menus and FASTA .name files * * * *
#DBNAMES_INSTALLED = DBList.CheckBLASTDB(P.BLASTDB,DBNAMES_ALL)
DBList.Revise(P,DBNAMES_ALL)
FinishTime = datetime.datetime.now()
LOGFILE.write('\n')
LOGFILE.write('Finish time: ' + str(FinishTime) + '\n')
ElapsedTime = FinishTime - StartTime
LOGFILE.write('Elapsed time: ' + str(ElapsedTime) + '\n')
LOGFILE.close()
# * * * * Step 3: Notify user when job is completed. * * * *
Subject = 'blastdbkit.py --add completed'
Message = 'blastdbkit.py: Completed Installing Blast Databases
'
LOGFILE = open(os.path.join(P.BLASTDB,'blastdbkit.log'),'r')
for line in LOGFILE.readlines() :
Message = Message + line + '
'
LOGFILE.close()
SendEmail(adminEmail,[adminEmail],Subject,Message)
elif P.DELETE:
print("---------- Delete files ----------")
LOGFILE = open(LOGFN,'w')
LOGFILE.write('\n')
LOGFILE.write('Local host: ' + LocalHostname() + '\n')
LOGFILE.write('Local BLAST database directory: ' + P.BLASTDB + '\n')
LOGFILE.write('\n')
StartTime = datetime.datetime.now()
LOGFILE.write('Deleting files: \n')
LOGFILE.write('Start time: ' + str(StartTime) + '\n')
LOGFILE.write('\n')
# * * * * Step 1: Delete files from the local database * * * *
DBNAMES_INSTALLED = DBList.CheckBLASTDB(P.BLASTDB,DBNAMES_ALL)
if 'all' in P.DBLIST :
DeleteList = DBNAMES_INSTALLED
else :
DeleteList = P.DBLIST
for dbname in DeleteList :
if dbname in DBNAMES_INSTALLED :
LOGFILE.write(' ----- Deleting files from ' + dbname + ' -----\n')
#if dbname in MouseSet : #SPECIAL CASE
if dbname == "mouse_genome" : #SPECIAL CASE
#DeleteFilesByPrefix(P.BLASTDB,'allcontig_and_rna',LOGFILE)
for prefix in MouseSet:
DeleteFilesByPrefix(P.BLASTDB,prefix,LOGFILE)
elif dbname == "human_genome":
for prefix in HumanSet:
DeleteFilesByPrefix(P.BLASTDB,prefix,LOGFILE)
else :
DeleteFilesByPrefix(P.BLASTDB,dbname,LOGFILE)
else:
LOGFILE.write(dbname + ' not installed. Doing nothing. \n')
LOGFILE.write('\n')
# * * * * Step 2: Revise BioLegato menus and FASTA .nam files * * * *
#DBNAMES_INSTALLED = DBList.CheckBLASTDB(P.BLASTDB,DBNAMES_ALL)
DBList.Revise(P,DBNAMES_ALL)
FinishTime = datetime.datetime.now()
LOGFILE.write('\n')
LOGFILE.write('Finish time: ' + str(FinishTime) + '\n')
ElapsedTime = FinishTime - StartTime
LOGFILE.write('Elapsed time: ' + str(ElapsedTime) + '\n')
LOGFILE.close()
# * * * * Step 3: Notify user when job is completed. * * * *
Subject = 'blastdbkit --delete completed'
Message = 'blastdbkit.py: Completed Deleting Blast Database files
'
LOGFILE = open(os.path.join(P.BLASTDB,'blastdbkit.log'),'r')
for line in LOGFILE.readlines() :
Message = Message + line + '
'
LOGFILE.close()
SendEmail(adminEmail,[adminEmail],Subject,Message)
elif P.UPDATE:
print("---------- Update ----------")
LOGFILE = open(LOGFN,'w')
LOGFILE.write('\n')
LOGFILE.write('Local host: ' + LocalHostname() + '\n')
LOGFILE.write('Local BLAST database directory: ' + P.BLASTDB + '\n')
LOGFILE.write('\n')
LOGFILE.write('FTP Site: ' + P.FTPSITE + '\n')
LOGFILE.write('FTP Directory: ' + FTPINFO[P.FTPSITE]['dbdir'] + '\n')
LOGFILE.write('\n')
StartTime = datetime.datetime.now()
LOGFILE.write('Updating files: \n')
LOGFILE.write('Start time: ' + str(StartTime) + '\n')
LOGFILE.write('\n')
# * * * * Step 1: Update files in the local database * * * *
DBNAMES_INSTALLED = DBList.CheckBLASTDB(P.BLASTDB,DBNAMES_ALL)
DBList.CheckFTPsite(P.FTPSITE,DBNAMES_ALL)
DBList.UpdateFiles(P,DBNAMES_INSTALLED,DBNAMES_ALL,LOGFILE)
# * * * * Step 2: Revise BioLegato menus and FASTA .nam files * * * *
#DBNAMES_INSTALLED = DBList.CheckBLASTDB(P.BLASTDB,DBNAMES_ALL)
DBList.Revise(P,DBNAMES_ALL)
FinishTime = datetime.datetime.now()
LOGFILE.write('\n')
LOGFILE.write('Finish time: ' + str(FinishTime) + '\n')
ElapsedTime = FinishTime - StartTime
LOGFILE.write('Elapsed time: ' + str(ElapsedTime) + '\n')
LOGFILE.close()
# * * * * Step 3: Notify user when job is completed. * * * *
Subject = 'blastdbkit --update completed'
Message = 'blastdbkit.py: Completed Updating Blast Databases
'
LOGFILE = open(os.path.join(P.BLASTDB,'blastdbkit.log'),'r')
for line in LOGFILE.readlines() :
Message = Message + line + '
'
LOGFILE.close()
SendEmail(adminEmail,[adminEmail],Subject,Message)
else:
print(USAGE)
# We need to flush the stdout buffer to avoid a sys.excepthook error message.
# See http://stackoverflow.com/questions/12790328/how-to-silence-sys-excepthook-is-missing-error
try:
sys.stdout.flush()
sys.stderr.flush()
except:
pass
BM.exit_success()
if __name__ == "__main__":
main()
else:
#used to generate documentation
import doctest
doctest.testmod()
#if (BM.documentor() or "-test" in sys.argv):
# pass
#else:
# main()