#!/usr/bin/env python3 """ optparse is deprecated in favor of argparse as of Python 2.7. However, since 2.7 is not always present on many systems, at this writing, it is safer to stick with optparse for now. It should be easy to change later, since the syntax is very similar between argparse and optparse. from optparse import OptionParser """ from optparse import OptionParser import ftplib import hashlib import time import datetime import os import os.path import re import stat import subprocess import sys import tarfile ''' blastdbkit.py - Set environment variables for BioLegato Helper Applications Synopsis: blastdbkit.py --showall [--ftpsite url] blastdbkit.py --configure [--birchdir directory] [--blastdb directory] blastdbkit.py --add [--ftpsite url] --dblist db[,db] blastdbkit.py --delete --dblist db[,db] blastdbkit.py --update [--ftpsite url] --dblist db[,db] @modified: January 10, 2021 @author: Brian Fristensky @contact: brian.fristensky@umanitoba.ca ''' blib = os.environ.get("BIRCHPYLIB") sys.path.append(blib) from birchlib import Birchmod PROGRAM = "blastdbkit.py : " USAGE = "\n\tUSAGE: blastdbkit.py --showall [--ftpsite url]" + \ "\n\t\tblastdbkit.py --configure [--birchdir directory] [--blastdb directory]" + \ "\n\t\tblastdbkit.py --reportlocal" + \ "\n\t\tblastdbkit.py --reportftp [--ftpsite url]" + \ "\n\t\tblastdbkit.py --add [--ftpsite url] --dblist db[,db]" + \ "\n\t\tblastdbkit.py --delete --dblist db[,db]" + \ "\n\t\tblastdbkit.py --update [--ftpsite url] --dblist db[,db]" DEBUG = True if DEBUG : print('Debugging mode on') BM = Birchmod(PROGRAM, USAGE) BIRCHvariables = ['BIRCH_PROMPT','BLASTDB'] """ FTPINFO is a dictionary whose keys are ftp site addresses, and whose values are dictionaries of field names and their values: dbdir - directory in which the BLAST-formatted databases are found. UseMLSD - True or False. In order to use MLSD with FTP, the site must return the filename, size, and modification time. At this writing, only ftp.ncbi.nih.gov supports MLSD and returns all three values. ftp.ebi.ac.uk does not support MLSD. ftp.hgc.jp supports MLSD, but does not return the size of a file. Future: Possible additional fields might include timezone or geographical location of an FTP site. """ FTPINFO = {"ftp.ncbi.nih.gov" : {"dbdir":"/blast/db","UseMLSD":True} , "ftp.hgc.jp" : {"dbdir":"pub/mirror/ncbi/blast/db","UseMLSD":False}, "ftp.ebi.ac.uk" : {"dbdir":"pub/blast/db","UseMLSD":False} } #FTPINFO = {"ftp.ncbi.nih.gov" : "/blast/db" , "ftp.hgc.jp" : "pub/mirror/ncbi/blast/db", "mirrors.vbi.vt.edu" : "mirrors/ftp.ncbi.nih.gov/blast/db"} # for convenience, we create two lists of database names from local_dblist # This will be used for other methods that do things like checking for # the presence of databases etc. Importantly, DBNAMES_ALL has the database names # in the order in which they are read, which is not true in Python dictionaries. # When database lists are stored in dictionaries, we can access their components # in the original order by referencing DBNAMES_ALL. DBNAMES_INSTALLED lists those # databases that are currently installed. DBNAMES_ALL = [] DBNAMES_INSTALLED = [] # SPECIAL CASES : mouse_genome and human_genome # have filenames that differ from their database names. We define sets # of file prefixes used for each of these databases. MouseSet = ["GCF_000001635.26_top_level"] HumanSet = ["GCF_000001405.38_top_level"] # - - - - - - - - - - - - - Utility classes - - - - - - - - - - - - - - - - - def chmod_ar(filename): """ Make a file world-readable. """ if os.path.exists(filename): st = os.stat(filename) os.chmod(filename, st.st_mode | stat.S_IREAD \ | stat.S_IRGRP | stat.S_IROTH) def chmod_arx(filename): """ Make a file or directory world-readable and world-executable/searchable. """ if os.path.exists(filename): st = os.stat(filename) os.chmod(filename, st.st_mode | stat.S_IEXEC | stat.S_IREAD \ | stat.S_IXGRP | stat.S_IRGRP | stat.S_IXOTH \ | stat.S_IROTH) def LocalHostname(): """ Return the name of the local machine. Tries a number of methods to get a name other than 'localhost' or a null result. """ import socket import platform def CheckName(name) : if name == None or name.startswith("localhost") or name == "" : OKAY = False else : OKAY = True return OKAY name = os.getenv('HOSTNAME') if not CheckName(name) : name = platform.uname()[1] if not CheckName(name) : if socket.gethostname().find('.')>=0: name=socket.gethostname() else: name=socket.gethostbyaddr(socket.gethostname())[0] return name def GetBIRCHProperties(BIRCHDIR,PropName) : """ Retrieve a value from BIRCH.properties. eg. To retrieve the value of BirchProps.adminEmail: GetBIRCHProperties(BIRCHDIR,"adminEmail") """ PFN = os.path.join(BIRCHDIR , 'local' , 'admin' , 'BIRCH.properties') pfile = open(PFN,'r') Value = "" Target = 'BirchProps.' + PropName lines = pfile.readlines() pfile.close() plen = len(lines) if plen > 0 : i = 0 while (i < plen) and (Value == "") : line = lines[i] # ignore blank lines and comment lines if not (line.startswith('#')) : tokens = line.split("=") if tokens[0] == Target : Value = tokens[1].strip() i += 1 return Value def DeleteFilesByPrefix(dest,dbname,LOGFILE) : """ Delete all files from dest whose names begin with dbname """ Indent4 = ' ' os.chdir(dest) rawlist = os.listdir(dest) rawlist.sort() for filename in rawlist : prefix = filename.split('.')[0] if prefix == dbname : os.remove(filename) LOGFILE.write(Indent4 + filename + ' deleted\n') def SendEmail(From,To,Subject,Text) : """ Very simple email method adapted from: http://stackoverflow.com/questions/882712/sending-html-email-using-python There are more elaborate examples on this site for sending HTML messages and attachments. """ import smtplib from email.mime.multipart import MIMEMultipart from email.mime.text import MIMEText Host = 'localhost' msg = MIMEMultipart('alternative') msg['Subject'] = Subject Html = """\

%s

""" %(Text) part1 = MIMEText(Text, 'plain') part2 = MIMEText(Html, 'html') msg.attach(part1) msg.attach(part2) try: server = smtplib.SMTP(Host) server.sendmail(From, To, msg.as_string()) server.quit() print("Successfully sent email") except : print("Error: unable to send email") def getValue(tokens,kw) : """ Return a value for a key/value pair, where = is the separator. """ i = 0 L = len(tokens) FOUND = False retval = "" while i < L and not FOUND : t = tokens[i].split('=') if t[0] == kw : FOUND = True retval = t[1] i += 1 return retval def FTPDirList(FTPSITE) : """ Get a list of files found at the remote FTP site. """ print('Checking ' + FTPSITE) FTPDIR = FTPINFO[FTPSITE]["dbdir"] ftp = ftplib.FTP(FTPSITE) ftp.login() ftp.cwd(FTPDIR) dirlines = [] if FTPINFO[FTPSITE]["UseMLSD"] : ftp.retrlines('MLSD',dirlines.append) # Python 3 - use ftp.mlsd else: ftp.retrlines('LIST',dirlines.append) ftp.quit() return dirlines def FTPDBList(FTPSITE) : """ Get a list of BLAST databases found at the remote FTP site. """ dirlines = FTPDirList(FTPSITE) AvailableDBs = [] for line in dirlines: F = FileMetaData() F.getFTPMetaData(line,FTPINFO[FTPSITE]["UseMLSD"]) if F.Name.endswith('.tar.gz') : tokens = F.Name.split('.') dbname = tokens[0] # eg. nt if not dbname in AvailableDBs : AvailableDBs.append(dbname) AvailableDBs.sort() return AvailableDBs def download_file(FTPSITE,FTPDIR,FN): """ Download a file from an FTP site. """ ftp = ftplib.FTP(FTPSITE) ftp.login() ftp.cwd(FTPDIR) Outfile = open(FN, 'wb') ftp.retrbinary('RETR ' + FN, Outfile.write) Outfile.close() ftp.quit() def md5Checksum(filePath): """ Calculate an md5 checksum for a file. Writen by Joel Verhagen http://joelverhagen.com/blog/2011/02/md5-hash-of-file-in-python/ """ with open(filePath, 'rb') as fh: m = hashlib.md5() while True: data = fh.read(8192) if not data: break m.update(data) return m.hexdigest() def extracttgz(fname): """ Extract all files from a .tar.gz file Adapted from: http://sukhbinder.wordpress.com/2014/03/06/untar-a-tar-file-with-python/ """ if (fname.endswith("tar.gz")): tar = tarfile.open(fname) tar.extractall() tar.close() def InstallFile(tgzfile,FTPSITE,FTPDIR,LOGFILE) : """ Download a BLAST .tar.gz and its corresponding .md5 file from the FTP site, and untar the file in in the current directory. Delete the .tar.gz file when done, but keep the .md5 file. """ SUCCESS=False MAX_DOWNLOAD_ATTEMPTS=3 ATTEMPTS = 0 md5file = tgzfile + '.md5' Indent4 = ' ' while not SUCCESS and (ATTEMPTS < MAX_DOWNLOAD_ATTEMPTS) : #Download .tar.gz file and .tar.gz.md5 file try : download_file(FTPSITE,FTPDIR,tgzfile) except : LOGFILE.write(Indent4 + 'Failed to download ' + tgzfile + '\n') try : download_file(FTPSITE,FTPDIR,md5file) except : LOGFILE.write(Indent4 + 'Failed to download ' + md5file + '\n') # Calculate md5 checksum, and compare it with the checksum file if os.path.exists(tgzfile) and os.path.exists(md5file) : LocalChecksum = md5Checksum(tgzfile) #with open(md5file, 'rb') as fh : with open(md5file, 'r') as fh : line = fh.readline() RemoteChecksum = line.split(" ")[0] if LocalChecksum == RemoteChecksum : SUCCESS = True else : LOGFILE.write(Indent4 + '>>> Checksum for ' + tgzfile + ' does not match ' + md5file + '\n') os.remove(md5file) ATTEMPTS += 1 # Uncompress and extract files from the .tar.gz file if SUCCESS : try : extracttgz(tgzfile) except : SUCCESS = False LOGFILE.write(Indent4 + '>>> Error extracting files from ' + tgzfile + '\n') os.remove(tgzfile) return SUCCESS # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - class FileMetaData: """ Holds metadata for a file. """ def __init__(self): """ Initializes arguments: Name = "" Size = 0 MDate = datetime.time() """ self.Name = "" self.Size = 0 now = datetime.datetime.now() self.CurrentYear = now.year self.Mtime = now def getLocalMetaData(self,FN) : """ Retrieve metadata for a local file. """ finfo = os.stat(FN) self.Name = FN self.Size = int(finfo.st_size) timestamp = finfo.st_mtime self.Mtime = datetime.datetime.fromtimestamp(timestamp) #print(self.Name + ' ' + str(self.Size) + ' ' + str(self.Mtime) ) def getFTPMetaData(self,line,UseMLSD) : """ Parse metadata for a file on a remote FTP site. """ if UseMLSD : tokens = line.split(';') numtokens = len(tokens) self.Name = tokens[numtokens-1].lstrip() timestruct = getValue(tokens,'modify') self.Mtime = datetime.datetime.strptime(timestruct,"%Y%m%d%H%M%S") self.Size = int(getValue(tokens,'size')) else: """ Parsing directory lines is hazardous! Directory listings from a number of FTP sites seem to all be consistent with tokenizing into nine non-blank fields: Field Content ----------------------- 0 permissions 1 ? 2 owner 3 group 4 size 5 Month 6 Day 7 Time or Year 8 Name Field 8, is a special case, because a name might include blanks. For the purposes of downloading NCBI files, we don't need to worry about blanks in names. """ # Parse directory lines into non-blank fields # When None is used as the separator, multiple seperators are # parsed as a single seperator. Neat! tokens = line.split(None) #print(line) L = len(tokens) if L == 9 : # Name and Size are easy self.Name = tokens[8] self.Size = int(tokens[4]) # Modification time takes a bit more work. Month = tokens[5] Day = tokens[6] if ':' in tokens[7] : #directory listings for files from the previous 12 months will not list a # year, only a modification time. The year will either be the current year # or the previous year. Year = str(self.CurrentYear) TimeHM = tokens[7] else : Year = tokens[7] TimeHM = "00:00" TimeStr = Day + ' ' + Month + ' ' + Year + ' ' + TimeHM #self.Mtime = time.strptime(TimeStr, "%d %b %Y %H:%M") self.Mtime = datetime.datetime.strptime(TimeStr, "%d %b %Y %H:%M") #directory listings for files from the previous 12 months will not list a # year, only a modification time. The year will either be the current year # or the previous year. The result would be a time in the future. If that # happens, we have to decrement the Year by 1 and recalculate the Mtime. if self.Mtime > datetime.datetime.now() : Year = str(self.CurrentYear-1) TimeStr = Day + ' ' + Month + ' ' + Year + ' ' + TimeHM self.Mtime = datetime.datetime.strptime(TimeStr, "%d %b %Y %H:%M") #print(self.Name + ' ' + str(self.Size) + ' ' +str(self.Mtime)) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - class Parameters: """ Wrapper class for command line parameters """ def __init__(self): """ Initializes arguments: SHOWALL = False CONFIGURE = False BIRCHDIR = "" BLASTDB = "" REPORTLOCAL = False REPORTFTP = False ADD = False DELETE = False UPDATE = False FTPSITE = "" DBLIST = [] SFN= "" Then calls read_args() to fill in their values from command line """ self.SHOWALL = False self.CONFIGURE = False self.BIRCHDIR = "" self.BLASTDB = "" self.REPORTLOCAL = False self.REPORTFTP = False self.ADD = False self.DELETE = False self.UPDATE = False self.FTPSITE = "" self.DBLIST = [] self.read_args() self.SFN = os.path.join(self.BIRCHDIR , 'local' , 'admin' , 'BIRCH.settings') if DEBUG : print('------------ Parameters from command line ------') print(' SHOWALL: ' + str(self.SHOWALL)) print(' CONFIGURE: ' + str(self.CONFIGURE)) print(' BIRCHDIR: ' + self.BIRCHDIR) print(' BLASTDB: ' + self.BLASTDB) print(' REPORTLOCAL: ' + str(self.REPORTLOCAL)) print(' REPORTFTP: ' + str(self.REPORTFTP)) print(' ADD: ' + str(self.ADD)) print(' DELETE: ' + str(self.DELETE)) print(' UPDATE: ' + str(self.UPDATE)) print(' FTPSITE: ' + self.FTPSITE) print(' DBLIST: ' + str(self.DBLIST)) print(' Settings file: ' + self.SFN) print() def read_args(self): """ Read command line arguments into a Parameter object """ parser = OptionParser() parser.add_option("--showall", dest="showall", action="store_true", default=False, help="Show all available databases at FTPSITE") parser.add_option("--configure", dest="configure", action="store_true", default=False, help="in a new install or update, set BIRCHDB environment variable") parser.add_option("--birchdir", dest="birchdir", action="store", default="", help="path to BIRCH installation directory") parser.add_option("--blastdb", dest="blastdb", action="store", default="", help="path to Blast Database directory") parser.add_option("--reportlocal", dest="reportlocal", action="store_true", default=False, help="Write local database stats to $BLASTDB/localstats.tsv") parser.add_option("--reportftp", dest="reportftp", action="store_true", default=False, help="Write database stats from remote FTP site to $BLASTDB/ftpstats.tsv") parser.add_option("--add", dest="add", action="store_true", default=False, help="add files to Blast database") parser.add_option("--delete", dest="deletefiles", action="store_true", default=False, help="delete files from Blast database") parser.add_option("--update", dest="update", action="store_true", default=False, help="download and install updates for Blast Database") parser.add_option("--ftpsite", dest="ftpsite", action="store", default="", help="FTP site from which to download update files") parser.add_option("--dblist", dest="rawdblist", action="store", default="", help="list of database files to add, delete or update") (options, args) = parser.parse_args() self.SHOWALL = options.showall self.CONFIGURE = options.configure self.BIRCHDIR = options.birchdir if self.BIRCHDIR == "" : self.BIRCHDIR = str(os.environ['BIRCH']) self.BLASTDB = options.blastdb self.REPORTLOCAL = options.reportlocal self.REPORTFTP = options.reportftp self.ADD = options.add self.DELETE = options.deletefiles self.UPDATE = options.update self.FTPSITE = options.ftpsite # We need to deal with the possibility that the user has prefixed the URL # with a protocol. The protocol will be prepended later if we need it. if self.FTPSITE.startswith('http://') : self.FTPSITE = self.FTPSITE[7:] elif self.FTPSITE.startswith('https://') : self.FTPSITE = self.FTPSITE[8:] if self.FTPSITE == "" : self.FTPSITE = "ftp.ncbi.nih.gov" if options.rawdblist != "" : tokens = options.rawdblist.split(",") # When BioLegato calls blastdbkit.py, the comma-separated list in --dblist will # usually contain empty elements. We need to remove those empty components from dblist # Evaluating a list comprehension is the most straightforward way. self.DBLIST = [x for x in tokens if x != ""] # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - class BIRCHSettings: """ Data and methods for the BIRCH Settings file. """ def __init__(self,P): """ Initializes arguments: dict = {} """ self.dict = {} for var in BIRCHvariables: self.dict[var] = "" if os.path.exists(P.SFN) : self.ReadBIRCHSettings(P.SFN) #else: # DFN = os.path.join(self.BIRCH , 'admin' , 'BIRCH.settings.default') # self.ReadBIRCHSettings(DFN) if DEBUG : print('- - - - - BIRCH Settings - - - - -') for k in self.dict : print(' ' + k + ',' + self.dict[k]) def ReadBIRCHSettings(self,FN): """ Read current values of BIRCHvariables from BIRCH.settings. """ if os.path.exists(FN) : Sfile = open(FN,'r') for line in Sfile : line = line.strip() # ignore blank lines and comment lines if (line != "" and line[0] != '#') : tokens = line.split("=") if tokens[0] in BIRCHvariables : self.dict[tokens[0]] = tokens[1] Sfile.close() def WriteBIRCHSettings(self,SFN): """ Write current values of BIRCHvariables to BL.properties. file. """ Sfile = open(SFN,'w') Sfile.write('# DO NOT EDIT THIS FILE!\n') Sfile.write('# This file is automatically generated by blastdbkit.py during installation,\n') Sfile.write('# update or by birchadmin --> Preferences --> Settings\n') for k in self.dict : Sfile.write(k + '=' + self.dict[k] + '\n') Sfile.close() def WriteBIRCHenvBourne(self,P): """ Write bash code for setting BIRCHvariables to birch_settings_Bourne.source. Used for Bourne type shells eg. bash, sh """ ENVFN = os.path.join(P.BIRCHDIR, 'admin', 'birch_settings_Bourne' + '.source') Sfile = open(ENVFN,'w') Sfile.write('# DO NOT EDIT THIS FILE!\n') Sfile.write('# This file is automatically generated by blastdbkit.py during installation,\n') Sfile.write('# update or by birchadmin --> BIRCHSettings\n') #Enclose value of argument in single quotes. This is maninly for cases such as #BL_Terminal='gnome-terminal -e' for k in self.dict : Sfile.write(k + "='" + self.dict[k] + "'\n") Sfile.write('export ') for var in BIRCHvariables : Sfile.write(' ' + var) Sfile.write('\n') Sfile.close() chmod_ar(ENVFN) def WriteBIRCHenvCsh(self,P): """ Write csh code for setting BIRCHvariables to birch_settings_csh.source. Used for C type shells eg. csh, tcsh """ ENVFN = os.path.join(P.BIRCHDIR, 'admin', 'birch_settings_csh' + '.source') Sfile = open(ENVFN,'w') Sfile.write('# DO NOT EDIT THIS FILE!\n') Sfile.write('# This file is automatically generated by blastdbkit.py during installation,\n') Sfile.write('# update or by birchadmin --> BIRCHSettings\n') for k in self.dict : Sfile.write('setenv ' +k + ' ' + self.dict[k] + '\n') Sfile.close() chmod_ar(ENVFN) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - class BLASTDBList: """ Data and methods for the BLAST databases. """ def __init__(self,P,DBNAMES_ALL): """ Initializes local BLASTDB list and sets DBNAMES_ALL """ # Read in the default BLASTDB.list. self.default_listfile = os.path.join(P.BIRCHDIR , 'admin' , 'Settings.default' , 'BLASTDB.list') self.default_dblist = self.ReadBLASTDBList(self.default_listfile,DBNAMES_ALL) # Read the local settings file if it exists. Update the dbtag field, the human-readable # description of the database, with the value from the default dblist. This # updates the local dblist if there are changes in this field in a new BIRCH release. # Also, if a new release has db files not in the local list, add those to the # local list. self.local_listfile = os.path.join(P.BIRCHDIR , 'local' , 'admin' , 'BLASTDB.list') if os.path.exists(self.local_listfile) : self.local_dblist = self.ReadBLASTDBList(self.local_listfile,DBNAMES_ALL) # First, remove any legacy databases from the local list, if they aren't in the # DBNAMES_ALL. These would be databases that are no longer distributed by NCBI # eg. v4 databases SupportedDBs = self.default_dblist.keys() # This was gives the following error: Runtime error: dictionary changed size during iteration. # I dont' see why this doesn't work, because self.local_dblist is not part of the for # statement. So we do it differently below. #localDBs = self.local_dblist.keys() #for dbname in localDBs : # if not dbname in SupportedDBs : # del self.local_dblist[dbname] templocalDBs = {} localkeys = self.local_dblist.keys() for dbname in localkeys : if dbname in SupportedDBs : templocalDBs[dbname] = self.local_dblist[dbname] self.local_dblist = templocalDBs for dbname in self.default_dblist : if dbname in self.local_dblist : self.local_dblist[dbname]["dbtag"] = self.default_dblist[dbname]["dbtag"] self.local_dblist[dbname]["dbtype"] = self.default_dblist[dbname]["dbtype"] self.local_dblist[dbname]["decompress_ratio"] = self.default_dblist[dbname]["decompress_ratio"] else: self.local_dblist[dbname] = self.default_dblist[dbname] else: self.local_dblist = self.default_dblist # dictionaries of metadata for files in BLASTDB directory and on the # remote FTP site self.localDBFILES = {} # Initialize dictionaries for each database name self.remoteDBFILES = {} for dbname in DBNAMES_ALL : self.remoteDBFILES[dbname] = {} def ReadBLASTDBList(self,FN,DBNAMES_ALL): """ Read list of BLAST databases, descriptions and install status (0,1). """ BLASTDICT={} if DBNAMES_ALL == [] : # We are reading the default file, so add database names to DBNAMES_ALL DefaultFile = True else : DefaultFile = False Bfile = open(FN,'r') for line in Bfile : if (line != "" and line[0] != '#') : line = line.strip() tokens = line.split(",") numfields = len(tokens) if numfields >= 4 : # ignore blank or incomplete lines if numfields == 5 : BLASTDICT[tokens[0]]={"dbtag":tokens[1],"dbtype":tokens[2],"decompress_ratio":tokens[3],"installed":tokens[4]} elif numfields == 4 : # older version of .list file BLASTDICT[tokens[0]]={"dbtag":tokens[1],"dbtype":tokens[2],"decompress_ratio":"1","installed":tokens[3]} if DefaultFile : # We don't do this part if we're reading local database files if not tokens[0] in DBNAMES_ALL : DBNAMES_ALL.append(tokens[0]) Bfile.close() if DEBUG : print('- - - - - BLASTDB list from ' + FN + '- - - - -') for k in DBNAMES_ALL : if k in BLASTDICT : print(' ' + k + ',' + str(BLASTDICT[k])) else : print(' ' + k + ' not in ' + FN ) return BLASTDICT def WriteBLASTDBList(self,DBNAMES_ALL): """ Write list of BLAST databases, descriptions and install status (0,1). """ Bfile = open(self.local_listfile,'w') Bfile.write('# DO NOT EDIT THIS FILE!\n') Bfile.write('# This file is automatically generated by blastdbkit.py during installation,\n') Bfile.write('# update or by birchadmin --> BLASTDB Configure\n') for dbname in DBNAMES_ALL : Bfile.write(dbname + ',' + self.local_dblist[dbname]["dbtag"] + ',' + str(self.local_dblist[dbname]["dbtype"]) + ',' + str(self.local_dblist[dbname]["decompress_ratio"]) + ',' + str(self.local_dblist[dbname]["installed"]) + '\n') Bfile.close() chmod_ar(self.local_listfile) def CheckBLASTDB(self,BLASTDB,DBNAMES_ALL) : """ Get a list of databases found in the BLASTDB directory. There are potentially a lot of ways this might be done, and no obvious best choice. Here, we just look to make sure that each database name is found, regardless of how many files share that name eg. nt includes files with names like nt.01.* """ # First, create a dictionary of filenames for all databases # found in $BLASTDB os.chdir(BLASTDB) # We only want BLAST files; ignore all other files # SPECIAL CASES : mouse_genome and human_genome # have filenames that differ from their database names. SearchSet = DBNAMES_ALL + MouseSet + HumanSet rawlist = os.listdir(os.getcwd()) self.localDBFILES = {} for filename in rawlist : tokens = filename.split('.') # Test whether the filenems is from mouse_genome or human_genome # NCBI used '.' as part of the filename, rather than a file extension # separator. eg. #MouseSet = ['GCF_000001635.26_top_level'] #HumanSet = ['GCF_000001405.38_top_level'] #So we have to do ugly things... SpecialSet = MouseSet + HumanSet prefix = "" for p in SpecialSet : if p in filename : prefix = tokens[0] + '.' + tokens[1] if prefix == "" : prefix = tokens[0] # eg. nt if prefix in SearchSet : F = FileMetaData() F.getLocalMetaData(filename) # subsequent occurrences of files in a database set if prefix in MouseSet : dbname = "mouse_genome" elif prefix in HumanSet : dbname = "human_genome" else : dbname = prefix if dbname in self.localDBFILES : self.localDBFILES[dbname][filename] = F # first occurrence of a file in a database set else : self.localDBFILES[dbname] = {filename:F} # Next, iterate through the database names, and set the # +/- field depending on whether a database name appears in # the list of files. Here is also where we create DBNAMES_INSTALLED, # which lists installed databases in the order in which they # appear in the db .list files. DBNAMES_INSTALLED = [] for dbname in DBNAMES_ALL : if dbname in self.localDBFILES : self.local_dblist[dbname]["installed"] = 1 DBNAMES_INSTALLED.append(dbname) else : self.local_dblist[dbname]["installed"] = 0 return DBNAMES_INSTALLED def CheckFTPsite(self,FTPSITE,DBNAMES_ALL) : """ Get a list of database files found at the remote FTP site. """ # First, create a dictionary of filenames for all databases # found in FTPSITE/FTPDIR dirlines = FTPDirList(FTPSITE) for line in dirlines: F = FileMetaData() F.getFTPMetaData(line,FTPINFO[FTPSITE]["UseMLSD"]) if F.Name.endswith('.md5') : tgzname = F.Name[:-4] else : # eg. nt.00.tar.gz tgzname = F.Name tokens = F.Name.split('.') dbname = tokens[0] # eg. nt # We only want BLAST files; ignore all other files if dbname in DBNAMES_ALL : # subsequent occurrences of files in a database set # each section of the database (eg. nt.20.tar.gz has two files: nt.20.tar.gz and nt.20.tar.gz.md5 # We store these fils in a dictionary whose key is the name of the tar.gz file eg. nt.20.tar.gz if tgzname in self.remoteDBFILES[dbname] : self.remoteDBFILES[dbname][tgzname][tgzname].append(F) # first occurrence of a file in a database set else : self.remoteDBFILES[dbname][tgzname] = {tgzname:[F]} # Eliminate from the remote list any database that wasn't found at the remote # FTP site. for dbname in DBNAMES_ALL : if self.remoteDBFILES[dbname] == {} : del self.remoteDBFILES[dbname] def WriteLocalReport(self,BLASTDB,DBNAMES_INSTALLED): """ Write a spreadsheet-ready report with statistics on the local copy of the NCBI databases. The report is a tab-separated value file written to $BLASTDB/localstats.tsv. """ TAB = "\t" OFN = os.path.join(BLASTDB, 'localstats.tsv') LRfile = open(OFN,'w') LRfile.write('blastdbkit.py:' + TAB + 'LOCAL BLAST DATABASE REPORT\n') LRfile.write('\n') stats = os.statvfs(BLASTDB) totalsize = (stats.f_frsize * stats.f_blocks)/1000000 available = (stats.f_frsize * stats.f_bavail)/1000000 used = ((stats.f_blocks - stats.f_bfree) * stats.f_frsize)/1000000 LRfile.write('Database Directory' + TAB + 'Total size (Mb)' + TAB + 'Used' + TAB + 'Available' + '\n') LRfile.write(BLASTDB + TAB + str(round(totalsize)) + TAB + str(round(used)) + TAB + str(round(available)) + '\n') LRfile.write('\n') LRfile.write('DB name' + TAB + 'size (Mb)' + TAB + 'Last Update' + '\n') row = 7 starting_row = row last_row = -1 # earlydate is the time of the most recent file in a database segment eg. nt, nr etc. # we initialize it to a ridiculously early date timestruct = '1970-01-01 00:00:00' earlydate = datetime.datetime.strptime(timestruct,"%Y-%m-%d %H:%M:%S") for dbname in DBNAMES_INSTALLED : subtotal = 0 MostRecent = earlydate for file in self.localDBFILES[dbname] : subtotal = subtotal + self.localDBFILES[dbname][file].Size if MostRecent < self.localDBFILES[dbname][file].Mtime : MostRecent = self.localDBFILES[dbname][file].Mtime sMostRecent = MostRecent.strftime("%Y-%m-%d %H:%M") LRfile.write(dbname + TAB + str(round(subtotal/1000000)) + TAB + sMostRecent + "\n") last_row = row row += 1 # We dont' calculate a total. We insert a formula that lets the spreadsheet calculate the total. if last_row >= starting_row : # at least one statistics line has been written LRfile.write('TOTAL:' + TAB + '=SUM(B' + str(starting_row) + ':B' + str(last_row) + ")\n") LRfile.close() chmod_ar(OFN) def WriteFTPReport(self,BLASTDB,FTPSITE,DBNAMES_ALL): """ Write a spreadsheet-ready report with statistics on the remote NCBI databases. The report is a tab-separated value file written to $BLASTDB/ftpstats.tsv. """ TAB = "\t" OFN = os.path.join(BLASTDB, 'ftpstats.tsv') LRfile = open(OFN,'w') LRfile.write('blastdbkit.py:' + TAB + 'REMOTE FTP BLAST DATABASE REPORT\n') LRfile.write('\n') LRfile.write('FTP site:' + TAB + FTPSITE + '\n') LRfile.write('Database Directory:' + TAB + FTPINFO[FTPSITE]["dbdir"] + '\n') LRfile.write('\n') LRfile.write('DB name:' + TAB + 'compressed size (Mbytes)' + TAB + 'est. decompressed size (Mbytes)' + TAB + 'Modification Time' + '\n') row = 7 starting_row = row # earlydate is the time of the most recent file in a database segment eg. nt, nr etc. # we initialize it to a ridiculously early date timestruct = '1970-01-01 00:00:00' earlydate = datetime.datetime.strptime(timestruct,"%Y-%m-%d %H:%M:%S") remoteDBs = list(self.remoteDBFILES.keys()) for dbname in DBNAMES_ALL : # Do it this way because .keys() doesn't preserve order # of names in DBNAMES_ALL # This way, both local and remote database spreadsheets will have # databases in the same order for easier comparison. if dbname in remoteDBs : subtotal = 0 MostRecent = earlydate for file in self.remoteDBFILES[dbname] : for F in self.remoteDBFILES[dbname][file][file] : subtotal = subtotal + F.Size if MostRecent < F.Mtime : MostRecent = F.Mtime sMostRecent = MostRecent.strftime("%Y-%m-%d %H:%M") fDecompress_ratio = float(self.local_dblist[dbname]["decompress_ratio"]) sizeMB = subtotal/1000000 LRfile.write(dbname + TAB + str(round(sizeMB)) + TAB + str(int(round(sizeMB*fDecompress_ratio))) + TAB + sMostRecent + '\n') last_row = row row += 1 # We dont' calculate a total. We insert a formula that calculates the total. LRfile.write('TOTAL:' + TAB + '=SUM(B' + str(starting_row) + ':B' + str(last_row) + ')' + TAB + '=SUM(C' + str(starting_row) + ':C' + str(last_row) + ')' + '\n') LRfile.close() chmod_ar(OFN) def FindNewFiles(self,DBLIST) : """ Return a list of database files .tar.gz files that are newer on the server than those in the local directory. In practice, this means that if the local *.tar.gz.md5 file is older than the *.tar.gz.md5 file on the remote site, we download the newer *.tar.gz files from the remote site. """ # initialize NewFiles dictionary NewFiles = {} for dbname in DBLIST: NewFiles[dbname] = [] for tgzname in self.remoteDBFILES[dbname] : #print(tgzname) for F in self.remoteDBFILES[dbname][tgzname][tgzname] : if F.Name.endswith(".md5") : NewDownload = False UpdateAvailable = False if not dbname in self.localDBFILES : NewDownload = True elif not F.Name in self.localDBFILES[dbname] : NewDownload = True elif self.localDBFILES[dbname][F.Name].Mtime < F.Mtime : UpdateAvailable = True if NewDownload or UpdateAvailable : print('New: ' + F.Name) NewFiles[dbname].append(F.Name[:-4]) NewFiles[dbname].sort() return NewFiles def CreateFastaNamefiles(self,BIRCHDIR,BLASTDB,DBNAMES_INSTALLED) : """ Create .nam files so that FASTA can find BLAST databases. """ FastaDirName = os.path.join(BIRCHDIR , 'dat' , 'fasta') os.chdir(FastaDirName) FileHeader = '<${GB}' #first line in all .nam files for dbname in DBNAMES_INSTALLED : # create a file with names of all .psq files (protein) or .nsq files (nucleotide) # for a given database subset NameFile = open(dbname + '.nam','w') NameFile.write(FileHeader + '\n') if self.local_dblist[dbname]["dbtype"] in ["n","N"] : #nucleic acids seq file ext = ".nsq" elif self.local_dblist[dbname]["dbtype"] in ["p","P"] : #protein seq file ext = ".psq" # Write the file names out to the .nam file. # Because dictionaries are not sorted, we need to first # get a list of keys and sort them. In python2 we could use # the keys() function, but in python3 keys() returns a view object, # rather than a list. So we have to force the result into a list # and then sort it. filenames = list(self.localDBFILES[dbname].keys()) filenames.sort() if dbname in ["human_genome", "mouse_genome"] : print(filenames) for fn in filenames : F = self.localDBFILES[dbname][fn] if F.Name.endswith(ext) : prefix = F.Name[:-4] NameFile.write(prefix + ' 12\n') NameFile.close() def CreateBLMenu(self,P,DBNAMES,Directory,TNAME) : """ Create a BioLegato .blmenu file from a template file """ # Test for End of line condition in input def EOF(line) : if line == "" : result = True else : result = False return result def ReadChooser(DONE) : """ Read a template file to be used for creating chooser variables. A varible in the final .blmenu file will be implemented for each database in the list. Returns a list of lines. """ Chooser = [] line = Templatefile.readline() DONE = EOF(line) while not line.startswith('') and not EOF(line) : Chooser.append(line) line = Templatefile.readline() return Chooser def WriteChooser(ChooserTemplate,db,dbfileprefix,OutputFile) : """ Write a chooser variable to the output file for a given database section. We do this by substituting markup tags from the template with desired values. Another way to have done this would have been to just have this method write out the complete PCD for each chooser. However, that approach has the disadvantage that even minor changes in the PCD have to be made in the Python code, rather than in the PCD template. """ for line in ChooserTemplate : #outputline = line.replace('',db) outputline = line.replace('',dbfileprefix) outputline = outputline.replace('',dbfileprefix) outputline = outputline.replace('',self.local_dblist[db]["dbtag"]) if self.local_dblist[db]["installed"] == 1 : InstSymbol = '+' else : InstSymbol = '-' outputline = outputline.replace('',InstSymbol) outputline = outputline.replace('',str(self.local_dblist[db]["installed"])) OutputFile.write(outputline) TemplateFN = os.path.join(Directory, TNAME + '.blmenu.template') OutputFN = os.path.join(Directory, TNAME + '.blmenu') Templatefile = open(TemplateFN,'r') OutputFile = open(OutputFN,'w') OutputFile.write('# DO NOT EDIT THIS FILE!\n') OutputFile.write('# This file is automatically generated by blastdbkit.py during installation,\n') OutputFile.write('# update or by birchadmin --> UpdateAddInstall\n') line = Templatefile.readline() DONE = EOF(line) dblist = "" #used for the --dblist option while not DONE : if line.startswith('') > -1 : line = line.replace('',dblist) OutputFile.write(line) elif line.find('') > -1 : line = line.replace('',P.BLASTDB) OutputFile.write(line) else : OutputFile.write(line) if not DONE : line = Templatefile.readline() DONE = EOF(line) Templatefile.close() OutputFile.close() chmod_ar(OutputFN) def UpdateFiles(self,P,DBNAMES_INSTALLED,DBNAMES_ALL,LOGFILE) : """ Cycle through the names of databases in the order given in UpdateList. If the name is in the list to be updated, first delete the files for that name to create some space. Next download and install the new files. When all files have been downloaded, send an email to the BIRCH administrator. """ Indent4 = ' ' if 'all' in P.DBLIST : UpdateList = DBNAMES_INSTALLED else : UpdateList = P.DBLIST NewFiles = self.FindNewFiles(UpdateList) for dbname in UpdateList : #print(dbname) if len(NewFiles[dbname]) == 0 : LOGFILE.write(Indent4 + Indent4 + dbname + ' up to date. Nothing to install.' + '\n') else : LOGFILE.write(Indent4 + '----- Updating ' + dbname + ' -----\n') for file in NewFiles[dbname] : FTPDIR = FTPINFO[P.FTPSITE]["dbdir"] SUCCESS = InstallFile(file,P.FTPSITE,FTPDIR,LOGFILE) if SUCCESS : LOGFILE.write(Indent4 + Indent4 + 'Successfully installed ' + file + '\n') else : LOGFILE.write(Indent4 + Indent4 + '>>> INSTALL OF ' + file + ' FAILED\n') LOGFILE.write('\n') def AddFiles(self,P,DBNAMES_INSTALLED,DBNAMES_ALL,LOGFILE) : """ Cycle through the names of databases in the order given in UpdateList. """ Indent4 = ' ' if 'all' in P.DBLIST : AddList = DBNAMES_ALL else : AddList = P.DBLIST NewFiles = self.FindNewFiles(AddList) for dbname in AddList : if dbname in DBNAMES_INSTALLED : LOGFILE.write(Indent4 + dbname + ' already installed\n') else : LOGFILE.write(Indent4 + '----- Adding ' + dbname + ' -----\n') FilesToGet = [] for file in self.remoteDBFILES[dbname] : #print(file) FilesToGet.append(file) FilesToGet.sort() for file in FilesToGet : FTPDIR = FTPINFO[P.FTPSITE]["dbdir"] SUCCESS = InstallFile(file,P.FTPSITE,FTPDIR,LOGFILE) if SUCCESS : LOGFILE.write(Indent4 + Indent4 + 'Successfully installed ' + file + '\n') else : LOGFILE.write(Indent4 + Indent4 + '>>> INSTALL OF ' + file + ' FAILED\n') LOGFILE.write('\n') def Revise(self,P,DBNAMES_ALL) : """ Revise local database metadata, BioLegato menus and FASTA .nam files """ global DBNAMES_INSTALLED DBNAMES_INSTALLED = self.CheckBLASTDB(P.BLASTDB,DBNAMES_ALL) if DEBUG : print('Installed: ' + str(DBNAMES_INSTALLED)) self.WriteBLASTDBList(DBNAMES_ALL) self.WriteLocalReport(P.BLASTDB,DBNAMES_INSTALLED) # Write BioLegato menus for birchadmin Directory = os.path.join(P.BIRCHDIR, 'dat', 'birchadmin', 'PCD', 'UpdateAddInstall') for menu in ['BlastDBreport','BlastDBUpdate','BlastDBAdd', 'BlastDBDelete'] : self.CreateBLMenu(P,DBNAMES_ALL,Directory,menu) # Write BioLegato menus for bldna, blprotein to search local BLAST databases Directory = os.path.join(P.BIRCHDIR, 'dat', 'bldna', 'PCD', 'Database') for menu in ['BLASTNlocal', 'BLASTXlocal','TBLASTXlocal'] : self.CreateBLMenu(P,DBNAMES_INSTALLED,Directory,menu) for menu in ['BlastDBreport'] : self.CreateBLMenu(P,DBNAMES_ALL,Directory,menu) Directory = os.path.join(P.BIRCHDIR, 'dat', 'blprotein', 'PCD', 'Database') for menu in ['BLASTPlocal','TBLASTNlocal'] : self.CreateBLMenu(P,DBNAMES_INSTALLED,Directory,menu) for menu in ['BlastDBreport'] : self.CreateBLMenu(P,DBNAMES_ALL,Directory,menu) # Write BioLegato menus for blnfetch, blpfetch to retrieve data from local BLAST databases Directory = os.path.join(P.BIRCHDIR, 'dat', 'blnfetch', 'PCD', 'Database') for menu in ['SEQFETCHlocal'] : self.CreateBLMenu(P,DBNAMES_INSTALLED,Directory,menu) Directory = os.path.join(P.BIRCHDIR, 'dat', 'blpfetch', 'PCD', 'Database') for menu in ['SEQFETCHlocal'] : self.CreateBLMenu(P,DBNAMES_INSTALLED,Directory,menu) # Create name files so that FASTA can find BLAST databases self.CreateFastaNamefiles(P.BIRCHDIR,P.BLASTDB,DBNAMES_INSTALLED) Directory = os.path.join(P.BIRCHDIR, 'dat', 'bldna', 'PCD', 'Database') for menu in ['FASTADNA','FASTXY'] : self.CreateBLMenu(P,DBNAMES_INSTALLED,Directory,menu) Directory = os.path.join(P.BIRCHDIR, 'dat', 'blprotein', 'PCD', 'Database') for menu in ['FASTAPROTEIN','TFASTA'] : self.CreateBLMenu(P,DBNAMES_INSTALLED,Directory,menu) #======================== MAIN PROCEDURE ========================== def main(): """ Called when not in documentation mode. """ # Read parameters from command line P = Parameters() adminEmail = GetBIRCHProperties(P.BIRCHDIR,"adminEmail") if DEBUG : print('adminEmail: ' + adminEmail) Settings = BIRCHSettings(P) DBList = BLASTDBList(P,DBNAMES_ALL) print('DBNAMES_ALL: ' + str(DBNAMES_ALL)) # Set BLASTDB variable according to the priority: --blastdb, then the value from BIRCH.Settings file, # then BLASTDB environment variable, then as a final fallback, use the default path of $BIRCH/GenBank. if P.BLASTDB == "" : if Settings.dict['BLASTDB'] == "" : if "BLASTDB" in os.environ : P.BLASTDB = str(os.environ['BLASTDB']) else : P.BLASTDB = os.path.join(P.BIRCHDIR, 'GenBank') else : P.BLASTDB = Settings.dict['BLASTDB'] Settings.dict['BLASTDB'] = P.BLASTDB LOGFN = os.path.join(P.BLASTDB,'blastdbkit.log') if P.SHOWALL: print("--------- Showall ----------") AvailableDBs = FTPDBList(P.FTPSITE) for dbname in AvailableDBs : print(dbname) elif P.CONFIGURE: print("--------- Configure ----------") # * * * * Step 1: Save local settings * * * * Settings.WriteBIRCHSettings(P.SFN) Settings.WriteBIRCHenvBourne(P) Settings.WriteBIRCHenvCsh(P) # * * * * Step 2: Revise BioLegato menus and FASTA .nam files * * * * # Write BioLegato menus for birchadmin #DBNAMES_INSTALLED = DBList.CheckBLASTDB(P.BLASTDB,DBNAMES_ALL) DBList.Revise(P,DBNAMES_ALL) elif P.REPORTLOCAL: print("--------- ReportLocal ----------") DBNAMES_INSTALLED = DBList.CheckBLASTDB(P.BLASTDB,DBNAMES_ALL) DBList.WriteLocalReport(P.BLASTDB,DBNAMES_INSTALLED) elif P.REPORTFTP: print("--------- ReportFTP ----------") DBList.CheckFTPsite(P.FTPSITE,DBNAMES_ALL) DBList.WriteFTPReport(P.BLASTDB,P.FTPSITE,DBNAMES_ALL) elif P.ADD: print("---------- Addfiles ----------") LOGFILE = open(LOGFN,'w') LOGFILE.write('\n') LOGFILE.write('Local host: ' + LocalHostname() + '\n') LOGFILE.write('Local BLAST database directory: ' + P.BLASTDB + '\n') LOGFILE.write('\n') LOGFILE.write('FTP Site: ' + P.FTPSITE + '\n') LOGFILE.write('FTP Directory: ' + FTPINFO[P.FTPSITE]['dbdir'] + '\n') LOGFILE.write('\n') StartTime = datetime.datetime.now() LOGFILE.write('Adding files: \n') LOGFILE.write('Start time: ' + str(StartTime) + '\n') LOGFILE.write('\n') # * * * * Step 1: Add new files to the local database * * * * DBNAMES_INSTALLED = DBList.CheckBLASTDB(P.BLASTDB,DBNAMES_ALL) DBList.CheckFTPsite(P.FTPSITE,DBNAMES_ALL) DBList.AddFiles(P,DBNAMES_INSTALLED,DBNAMES_ALL,LOGFILE) # * * * * Step 2: Revise BioLegato menus and FASTA .name files * * * * #DBNAMES_INSTALLED = DBList.CheckBLASTDB(P.BLASTDB,DBNAMES_ALL) DBList.Revise(P,DBNAMES_ALL) FinishTime = datetime.datetime.now() LOGFILE.write('\n') LOGFILE.write('Finish time: ' + str(FinishTime) + '\n') ElapsedTime = FinishTime - StartTime LOGFILE.write('Elapsed time: ' + str(ElapsedTime) + '\n') LOGFILE.close() # * * * * Step 3: Notify user when job is completed. * * * * Subject = 'blastdbkit.py --add completed' Message = 'blastdbkit.py: Completed Installing Blast Databases
' LOGFILE = open(os.path.join(P.BLASTDB,'blastdbkit.log'),'r') for line in LOGFILE.readlines() : Message = Message + line + '
' LOGFILE.close() SendEmail(adminEmail,[adminEmail],Subject,Message) elif P.DELETE: print("---------- Delete files ----------") LOGFILE = open(LOGFN,'w') LOGFILE.write('\n') LOGFILE.write('Local host: ' + LocalHostname() + '\n') LOGFILE.write('Local BLAST database directory: ' + P.BLASTDB + '\n') LOGFILE.write('\n') StartTime = datetime.datetime.now() LOGFILE.write('Deleting files: \n') LOGFILE.write('Start time: ' + str(StartTime) + '\n') LOGFILE.write('\n') # * * * * Step 1: Delete files from the local database * * * * DBNAMES_INSTALLED = DBList.CheckBLASTDB(P.BLASTDB,DBNAMES_ALL) if 'all' in P.DBLIST : DeleteList = DBNAMES_INSTALLED else : DeleteList = P.DBLIST for dbname in DeleteList : if dbname in DBNAMES_INSTALLED : LOGFILE.write(' ----- Deleting files from ' + dbname + ' -----\n') #if dbname in MouseSet : #SPECIAL CASE if dbname == "mouse_genome" : #SPECIAL CASE #DeleteFilesByPrefix(P.BLASTDB,'allcontig_and_rna',LOGFILE) for prefix in MouseSet: DeleteFilesByPrefix(P.BLASTDB,prefix,LOGFILE) elif dbname == "human_genome": for prefix in HumanSet: DeleteFilesByPrefix(P.BLASTDB,prefix,LOGFILE) else : DeleteFilesByPrefix(P.BLASTDB,dbname,LOGFILE) else: LOGFILE.write(dbname + ' not installed. Doing nothing. \n') LOGFILE.write('\n') # * * * * Step 2: Revise BioLegato menus and FASTA .nam files * * * * #DBNAMES_INSTALLED = DBList.CheckBLASTDB(P.BLASTDB,DBNAMES_ALL) DBList.Revise(P,DBNAMES_ALL) FinishTime = datetime.datetime.now() LOGFILE.write('\n') LOGFILE.write('Finish time: ' + str(FinishTime) + '\n') ElapsedTime = FinishTime - StartTime LOGFILE.write('Elapsed time: ' + str(ElapsedTime) + '\n') LOGFILE.close() # * * * * Step 3: Notify user when job is completed. * * * * Subject = 'blastdbkit --delete completed' Message = 'blastdbkit.py: Completed Deleting Blast Database files
' LOGFILE = open(os.path.join(P.BLASTDB,'blastdbkit.log'),'r') for line in LOGFILE.readlines() : Message = Message + line + '
' LOGFILE.close() SendEmail(adminEmail,[adminEmail],Subject,Message) elif P.UPDATE: print("---------- Update ----------") LOGFILE = open(LOGFN,'w') LOGFILE.write('\n') LOGFILE.write('Local host: ' + LocalHostname() + '\n') LOGFILE.write('Local BLAST database directory: ' + P.BLASTDB + '\n') LOGFILE.write('\n') LOGFILE.write('FTP Site: ' + P.FTPSITE + '\n') LOGFILE.write('FTP Directory: ' + FTPINFO[P.FTPSITE]['dbdir'] + '\n') LOGFILE.write('\n') StartTime = datetime.datetime.now() LOGFILE.write('Updating files: \n') LOGFILE.write('Start time: ' + str(StartTime) + '\n') LOGFILE.write('\n') # * * * * Step 1: Update files in the local database * * * * DBNAMES_INSTALLED = DBList.CheckBLASTDB(P.BLASTDB,DBNAMES_ALL) DBList.CheckFTPsite(P.FTPSITE,DBNAMES_ALL) DBList.UpdateFiles(P,DBNAMES_INSTALLED,DBNAMES_ALL,LOGFILE) # * * * * Step 2: Revise BioLegato menus and FASTA .nam files * * * * #DBNAMES_INSTALLED = DBList.CheckBLASTDB(P.BLASTDB,DBNAMES_ALL) DBList.Revise(P,DBNAMES_ALL) FinishTime = datetime.datetime.now() LOGFILE.write('\n') LOGFILE.write('Finish time: ' + str(FinishTime) + '\n') ElapsedTime = FinishTime - StartTime LOGFILE.write('Elapsed time: ' + str(ElapsedTime) + '\n') LOGFILE.close() # * * * * Step 3: Notify user when job is completed. * * * * Subject = 'blastdbkit --update completed' Message = 'blastdbkit.py: Completed Updating Blast Databases
' LOGFILE = open(os.path.join(P.BLASTDB,'blastdbkit.log'),'r') for line in LOGFILE.readlines() : Message = Message + line + '
' LOGFILE.close() SendEmail(adminEmail,[adminEmail],Subject,Message) else: print(USAGE) # We need to flush the stdout buffer to avoid a sys.excepthook error message. # See http://stackoverflow.com/questions/12790328/how-to-silence-sys-excepthook-is-missing-error try: sys.stdout.flush() sys.stderr.flush() except: pass BM.exit_success() if __name__ == "__main__": main() else: #used to generate documentation import doctest doctest.testmod() #if (BM.documentor() or "-test" in sys.argv): # pass #else: # main()