# Copyright 2004 by Harry Zuzan. All rights reserved. # Copyright 2016 by Adam Kurkiewicz. All rights reserved. # This file is part of the Biopython distribution and governed by your # choice of the "Biopython License Agreement" or the "BSD 3-Clause License". # Please see the LICENSE file that should have been included as part of this # package. """Reading information from Affymetrix CEL files version 3 and 4.""" from __future__ import print_function import struct try: import numpy except ImportError: from Bio import MissingPythonDependencyError raise MissingPythonDependencyError( "Install NumPy if you want to use Bio.Affy.CelFile") class ParserError(ValueError): """Affymetrix parser error.""" def __init__(self, *args): """Initialise class.""" super(ParserError, self).__init__(*args) _modeError = ParserError("You're trying to open an Affymetrix v4" " CEL file. You have to use a read binary mode," " like this `open(filename \"rb\")`.") # for debugging # import pprint # pp = pprint.PrettyPrinter(indent=4) class Record(object): """Stores the information in a cel file. Example usage: >>> from Bio.Affy import CelFile >>> with open("Affy/affy_v3_example.CEL", "r") as handle: ... c = CelFile.read(handle) ... >>> print(c.ncols, c.nrows) 5 5 >>> print(c.intensities) [[ 234. 170. 22177. 164. 22104.] [ 188. 188. 21871. 168. 21883.] [ 188. 193. 21455. 198. 21300.] [ 188. 182. 21438. 188. 20945.] [ 193. 20370. 174. 20605. 168.]] >>> print(c.stdevs) [[ 24. 34.5 2669. 19.7 3661.2] [ 29.8 29.8 2795.9 67.9 2792.4] [ 29.8 88.7 2976.5 62. 2914.5] [ 29.8 76.2 2759.5 49.2 2762. ] [ 38.8 2611.8 26.6 2810.7 24.1]] >>> print(c.npix) [[25 25 25 25 25] [25 25 25 25 25] [25 25 25 25 25] [25 25 25 25 25] [25 25 25 25 25]] """ def __init__(self): """Initialize class.""" self.version = None self.GridCornerUL = None self.GridCornerUR = None self.GridCornerLR = None self.GridCornerLL = None self.DatHeader = None self.Algorithm = None self.AlgorithmParameters = None self.NumberCells = None self.intensities = None self.stdevs = None self.npix = None self.nrows = None self.ncols = None self.nmask = None self.mask = None self.noutliers = None self.outliers = None self.modified = None def read(handle): """Read Affymetrix CEL file and return Record object. CEL files version 3 and 4 are supported, and the parser attempts version detection. Example Usage: >>> from Bio.Affy import CelFile >>> with open("Affy/affy_v4_example.CEL", "rb") as handle: ... c = CelFile.read(handle) ... >>> c.version == 4 True """ # If we fail to read the magic number, then it will remain None, and thus # we will invoke read_v3 (if mode is not strict), or raise IOError if mode # is strict. magicNumber = None # We check if the handle is a file-like object. If it isn't, and the mode # is strict, we raise an error. If it isn't and the mode isn't strict, we # continue (perhaps somebody has got a CEL-file-like iterable, which used # to work with previous versions of biopython and we don't want to maintain # backwards compatibility). try: mode = handle.mode # By definition an Affymetrix v4 CEL file has 64 as the first 4 bytes. # Note that we use little-endian irrespective of the platform, again by # definition. position = handle.tell() magicNumber = struct.unpack(">> from Bio.Affy import CelFile >>> with open("Affy/affy_v4_example.CEL", "rb") as handle: ... c = CelFile.read_v4(handle) ... >>> c.version == 4 True >>> print("%i by %i array" % c.intensities.shape) 5 by 5 array """ # We follow the documentation here: # http://www.affymetrix.com/estore/support/developer/powertools/changelog/gcos-agcc/cel.html.affx record = Record() preHeaders = ["magic", "version", "columns", "rows", "cellNo", "headerLen"] preHeadersMap = dict() headersMap = dict() # load pre-headers try: for name in preHeaders: preHeadersMap[name] = struct.unpack(">> from Bio.Affy import CelFile >>> with open("Affy/affy_v3_example.CEL", "r") as handle: ... c = CelFile.read_v3(handle) ... >>> c.version == 3 True """ # Needs error handling. # Needs to know the chip design. record = Record() section = "" for line in handle: if not line.strip(): continue # Set current section if line[:5] == "[CEL]": section = "CEL" elif line[:8] == "[HEADER]": section = "HEADER" elif line[:11] == "[INTENSITY]": section = "INTENSITY" record.intensities = numpy.zeros((record.nrows, record.ncols)) record.stdevs = numpy.zeros((record.nrows, record.ncols)) record.npix = numpy.zeros((record.nrows, record.ncols), int) elif line[:7] == "[MASKS]": section = "MASKS" record.mask = numpy.zeros((record.nrows, record.ncols)) elif line[:10] == "[OUTLIERS]": section = "OUTLIERS" record.outliers = numpy.zeros((record.nrows, record.ncols)) elif line[:10] == "[MODIFIED]": section = "MODIFIED" record.modified = numpy.zeros((record.nrows, record.ncols)) elif line[0] == "[": # This would be an unknown section section = "" elif section == "CEL": keyword, value = line.split("=", 1) if keyword == "Version": record.version = int(value) elif section == "HEADER": # Set record.ncols and record.nrows, remaining data goes into # record.header dict keyword, value = line.split("=", 1) if keyword == "Cols": record.ncols = int(value) elif keyword == "Rows": record.nrows = int(value) elif keyword == "GridCornerUL": x, y = value.split() record.GridCornerUL = (int(x), int(y)) elif keyword == "GridCornerUR": x, y = value.split() record.GridCornerUR = (int(x), int(y)) elif keyword == "GridCornerLR": x, y = value.split() record.GridCornerLR = (int(x), int(y)) elif keyword == "GridCornerLL": x, y = value.split() record.GridCornerLL = (int(x), int(y)) elif keyword == "DatHeader": record.DatHeader = value.strip("\n\r") elif keyword == "Algorithm": record.Algorithm = value.strip("\n\r") elif keyword == "AlgorithmParameters": record.AlgorithmParameters = value.strip("\n\r") elif section == "INTENSITY": if "NumberCells" in line: record.NumberCells = int(line.split("=", 1)[1]) elif "CellHeader" in line: pass else: words = line.split() y = int(words[0]) x = int(words[1]) record.intensities[x, y] = float(words[2]) record.stdevs[x, y] = float(words[3]) record.npix[x, y] = int(words[4]) elif section == "MASKS": if "NumberCells" in line: record.nmask = int(line.split("=", 1)[1]) elif "CellHeader" in line: pass else: words = line.split() y = int(words[0]) x = int(words[1]) record.mask[x, y] = int(1) elif section == "OUTLIERS": if "NumberCells" in line: record.noutliers = int(line.split("=", 1)[1]) elif "CellHeader" in line: pass else: words = line.split() y = int(words[0]) x = int(words[1]) record.outliers[x, y] = int(1) elif section == "MODIFIED": if "NumberCells" in line: record.nmodified = int(line.split("=", 1)[1]) elif "CellHeader" in line: pass else: words = line.split() y = int(words[0]) x = int(words[1]) record.modified[x, y] = float(words[2]) else: continue return record if __name__ == "__main__": from Bio._utils import run_doctest run_doctest()