#!/usr/bin/env python3 """ Runs the C version of NCBI xtract. Input file is an XML Document Summary returned from the Eutils Python library. Output is a TAB-separated value file with Accession numbers, Titles and Sequence Lengths. Brian Fristensky, Plant Science, University of Manitoba brian.fristensky@umanitoba.ca """ import subprocess RFN="FristenskyInput.xml" OFN="FristenskyOutput.tsv" ELEMENTS="Caption,Title,Slen" h_OUT = open(OFN,"w") # -----------------METHOD 1 ------------------- # Method 1 gives the following error in the output file: # ERROR: No data supplied to xtract from stdin or file, mode is '-rw-------' #h_RES = open(RFN,"r") #p = subprocess.Popen(["xtract", "-pattern", "DocumentSummary", "-element", ELEMENTS ], stdin=h_RES, stdout=h_OUT, stderr=h_OUT) # -----------------METHOD 2 ------------------- # Correctly generates a report as a TSV file. p = subprocess.Popen(["xtract", "-input", RFN, "-pattern", "DocumentSummary", "-element", ELEMENTS ], stdout=h_OUT, stderr=h_OUT) p.wait() h_OUT.close()