#!/usr/bin/env python import ftplib import time import datetime def getValue(tokens,kw) : """ Return a value for a key/value pair, where = is the separator. """ i = 0 L = len(tokens) FOUND = False retval = "" while i < L and not FOUND : t = tokens[i].split('=') if t[0] == kw : FOUND = True retval = t[1] i += 1 return retval #These servers support MLSD FTPINFO = {"ftp.ncbi.nih.gov" : "/blast/db" , "ftp.hgc.jp" : "pub/mirror/ncbi/blast/db", "ftp.ebi.ac.uk" : "pub/blast/db"} #These servers don't support MLSD OTHERFTPINFO = {"bio-mirror.net" : "/blast/db" , "mirrors.vbi.vt.edu" : "mirrors/ftp.ncbi.nih.gov/blast/db"} FTPSITE = 'ftp.ncbi.nih.gov' #FTPSITE = 'ftp.ebi.ac.uk' #FTPSITE = 'ftp.hgc.jp' FTPDIR = FTPINFO[FTPSITE] ftp = ftplib.FTP(FTPSITE) ftp.login() ftp.cwd(FTPDIR) SupportsMLSD = True dirlines = [] try : ftp.retrlines('MLSD',dirlines.append) # Python 3 - use ftp.mlsd except ftplib.all_errors as e : SupportsMLSD = False print(str(e).split(None,1)) print('Site doesn\'t support MLSD') ftp.retrlines('LIST',dirlines.append) if SupportsMLSD : for line in dirlines: tokens = line.split(';') numtokens = len(tokens) Name = tokens[numtokens-1].lstrip() Mtime = getValue(tokens,'modify') Size = getValue(tokens,'size') print(Name + ' ' + Size + ' ' + str(Mtime)) else: now = datetime.datetime.now() CurrentYear = now.year for line in dirlines: """ Parsing directory lines is hazardous! Directory listings from a number of FTP sites seem to all be consistent with tokenizing into nine non-blank fields: Field Content ----------------------- 0 permissions 1 ? 2 owner 3 group 4 size 5 Month 6 Day 7 Time or Year 8 Name Field 8, is a special case, because a name might include blanks. For the purposes of downloading NCBI files, we don't need to worry about blanks in names. """ # Parse directory lines into non-blank fields # When None is used as the separator, multiple seperators are # parsed as a single seperator. Neat! tokens = line.split(None) #print(line) L = len(tokens) if L == 9 : # Name and Size are easy Name = tokens[8] Size = tokens[4] # Modification time takes a bit more work. Month = tokens[5] Day = tokens[6] if ':' in tokens[7] : Year = str(CurrentYear) TimeHM = tokens[7] else : Year = tokens[7] TimeHM = "00:00" Mtime = Day + ' ' + Month + ' ' + Year + ' ' + TimeHM print(Name + ' ' + Size + ' ' + Mtime) Mtime = time.strptime(Mtime, "%d %b %Y %H:%M") print(Name + ' ' + Size + ' ' + str(Mtime)) ftp.quit() """ For FTP servers that support MLSD, it's easy to get the metadata. For those that don't, ftp has commands that can be used to get size and mtime: ftp> modtime robots.txt robots.txt 02/17/2015 17:01:36 GMT ftp> size robots.txt 213 26 This is a pain in the ass to work with, but it's better than nothing. """