#!/usr/bin/env python import hashlib import os import requests import sys #These servers support MLSD FTPINFO = {"ftp.ncbi.nih.gov" : "/blast/db" , "ftp.hgc.jp" : "pub/mirror/ncbi/blast/db", "ftp.ebi.ac.uk" : "pub/blast/db"} FTPSITE = 'ftp.ncbi.nih.gov' #FTPSITE = 'ftp.ebi.ac.uk' #FTPSITE = 'ftp.hgc.jp' FTPDIR = FTPINFO[FTPSITE] MAX_DOWNLOAD_ATTEMPTS=3 def download_file(FTPSITE,FTPDIR,FN): """ Adapted from: http://stackoverflow.com/questions/16694907/how-to-download-large-file-in-python-with-requests-py """ SEP = '/' URL = 'http://' + FTPSITE + FTPDIR + SEP + FN # NOTE the stream=True parameter r = requests.get(URL, stream=True) with open(FN, 'wb') as f: for chunk in r.iter_content(chunk_size=1024): if chunk: # filter out keep-alive new chunks f.write(chunk) #f.flush() commented by recommendation from J.F.Sebastian f.close() def md5Checksum(filePath): """ Writen by Joel Verhagen http://joelverhagen.com/blog/2011/02/md5-hash-of-file-in-python/ """ with open(filePath, 'rb') as fh: m = hashlib.md5() while True: data = fh.read(8192) if not data: break m.update(data) return m.hexdigest() SUCCESS=False ATTEMPTS = 0 tgzfile = sys.argv[1] md5file = tgzfile + '.md5' while not SUCCESS and (ATTEMPTS < MAX_DOWNLOAD_ATTEMPTS) : #Download .tar.gz file and .tar.gz.md5 file download_file(FTPSITE,FTPDIR,tgzfile) download_file(FTPSITE,FTPDIR,md5file) # Calculate md5 checksum, and compare it with the checksum file LocalChecksum = md5Checksum(tgzfile) with open(md5file, 'rb') as fh : line = fh.readline() RemoteChecksum = line.split(" ")[0] if LocalChecksum == RemoteChecksum : SUCCESS = True print("Success!") ATTEMPTS += 1