# Time-stamp: <2011-01-31 17:03:49 Tao Liu> """Module Description Copyright (c) 2008 Tao Liu This code is free software; you can redistribute it and/or modify it under the terms of the BSD License (see the file COPYING included with the distribution). @status: experimental @version: $Revision$ @author: Tao Liu @contact: taoliu@jimmy.harvard.edu """ # ------------------------------------ # python modules # ------------------------------------ import os import sys import re import shutil from FeatIO import WigTrackI from BinKeeper import BinKeeperI #from taolib.CoreLib.DB.BinKeeper import DBBinKeeperI import time # ------------------------------------ # constants # ------------------------------------ # ------------------------------------ # Misc functions # ------------------------------------ # ------------------------------------ # Classes # ------------------------------------ class WiggleIO: """File Parser Class for Wiggle File. Note: Only can be used with the wiggle file generated by pMA2C or MACS. This module can not be univerally used. Note2: The positions in Wiggle File must be sorted for every chromosome. Example: >>> from Cistrome.CoreLib.Parser import WiggleIO >>> w = WiggleIO('sample.wig') >>> bk = w.build_binKeeper() >>> wtrack = w.build_wigtrack() """ def __init__ (self,f): """f must be a filename or a file handler. """ if type(f) == str: self.fhd = open(f,"r") elif type(f) == file: self.fhd = f else: raise Exception("f must be a filename or a file handler.") def build_wigtrack (self, bedO = None): """Use this function to return a WigTrackI. """ data = WigTrackI() add_func = data.add_loc chrom = "Unknown" span = 0 pos_fixed = 0 # pos for fixedStep data 0: variableStep, 1: fixedStep bed_len = 0 bed_region = 0 pchrom = [] for i in self.fhd: if i.startswith("track"): continue elif i.startswith("#"): continue elif i.startswith("browse"): continue elif i.startswith("variableStep"): # define line pos_fixed = 0 chromi = i.rfind("chrom=") # where the 'chrom=' is spani = i.rfind("span=") # where the 'span=' is if chromi != -1: chrom = i[chromi+6:].strip().split()[0] if not bedO is None: pchrom = bedO.peaks[chrom] bed_len = len(pchrom) bed_reg = 0 else: chrom = "Unknown" if spani != -1: span = int(i[spani+5:].strip().split()[0]) else: span = 0 elif i.startswith("fixedStep"): chromi = i.rfind("chrom=") # where the 'chrom=' is starti = i.rfind("start=") # where the 'chrom=' is stepi = i.rfind("step=") # where the 'chrom=' is spani = i.rfind("span=") # where the 'span=' is if chromi != -1: chrom = i[chromi+6:].strip().split()[0] if not bedO is None: pchrom = bedO.peaks[chrom] bed_len = len(pchrom) bed_reg = 0 else: raise Exception("fixedStep line must define chrom=XX") if spani != -1: span = int(i[spani+5:].strip().split()[0]) else: span = 0 if starti != -1: pos_fixed = int(i[starti+6:].strip().split()[0]) if pos_fixed < 1: raise Exception("fixedStep start must be bigger than 0!") else: raise Exception("fixedStep line must define start=XX") if stepi != -1: step = int(i[stepi+5:].strip().split()[0]) else: raise Exception("fixedStep line must define step=XX!") else: # read data value if pos_fixed: # fixedStep value = i.strip() add_val = False if not bed_len == 0: if bed_reg < bed_len and pos_fixed >= pchrom[bed_reg][0]: for cur_b in range(bed_reg, bed_len): if pos_fixed in range(pchrom[cur_b][0],pchrom[cur_b][1]): add_val = True break else: if pos_fixed < pchrom[cur_b][0]: break elif pos_fixed >= pchrom[cur_b][1]: bed_reg = cur_b + 1 else: add_val = True if add_val: add_func(chrom,int(pos_fixed),float(value)) pos_fixed += step else: # variableStep try: (pos,value) = i.split() except ValueError: print i,pos_fixed add_val = False if not bed_len == 0: if bed_reg < bed_len and pos_fixed >= pchrom[bed_reg][0]: for cur_b in range(bed_reg, bed_len): if pos_fixed in range(pchrom[cur_b][0],pchrom[cur_b][1]): add_val = True break else: if pos_fixed < pchrom[cur_b][0]: break elif pos_fixed >= pchrom[cur_b][1]: bed_reg = cur_b + 1 else: add_val = True if add_val: add_func(chrom,int(pos),float(value)) data.span = span self.fhd.seek(0) return data def build_binKeeper (self,chromLenDict={},binsize=200): """Use this function to return a dictionary of BinKeeper objects. chromLenDict is a dictionary for chromosome length like {'chr1':100000,'chr2':200000} bin is in bps. for detail, check BinKeeper. """ data = {} chrom = "Unknown" pos_fixed = 0 for i in self.fhd: if i.startswith("track"): continue elif i.startswith("browse"): continue elif i.startswith("#"): continue elif i.startswith("variableStep"): # define line pos_fixed = 0 chromi = i.rfind("chrom=") # where the 'chrom=' is spani = i.rfind("span=") # where the 'span=' is if chromi != -1: chrom = i[chromi+6:].strip().split()[0] else: chrom = "Unknown" if spani != -1: span = int(i[spani+5:].strip().split()[0]) else: span = 0 chrlength = chromLenDict.setdefault(chrom,250000000) + 10000000 data.setdefault(chrom,BinKeeperI(binsize=binsize,chromosomesize=chrlength)) add = data[chrom].add elif i.startswith("fixedStep"): chromi = i.rfind("chrom=") # where the 'chrom=' is starti = i.rfind("start=") # where the 'chrom=' is stepi = i.rfind("step=") # where the 'chrom=' is spani = i.rfind("span=") # where the 'span=' is if chromi != -1: chrom = i[chromi+6:].strip().split()[0] else: raise Exception("fixedStep line must define chrom=XX") if spani != -1: span = int(i[spani+5:].strip().split()[0]) else: span = 0 if starti != -1: pos_fixed = int(i[starti+6:].strip().split()[0]) if pos_fixed < 1: raise Exception("fixedStep start must be bigger than 0!") else: raise Exception("fixedStep line must define start=XX") if stepi != -1: step = int(i[stepi+5:].strip().split()[0]) else: raise Exception("fixedStep line must define step=XX!") chrlength = chromLenDict.setdefault(chrom,250000000) + 10000000 data.setdefault(chrom,BinKeeperI(binsize=binsize,chromosomesize=chrlength)) add = data[chrom].add else: # read data value if pos_fixed: # fixedStep value = i.strip() add(int(pos_fixed),float(value)) pos_fixed += step else: # variableStep try: (pos,value) = i.split() except ValueError: print i,pos_fixed add(int(pos),float(value)) self.fhd.seek(0) return data # def build_DBBinKeeper (self,dirname="NA",templatedb=None): # """Use this function to build DBBinKeepers for every # chromosome under a given directory. # Parameters: # dirname : where we store the DBBinKeeper files # templatedb : if not None, copy the templatedb file # instead of initialize a db file. # """ # data= {} # chrom = "Unknown" # if not os.path.exists(dirname): # os.mkdir(dirname) # chromdbfile = None # dbbk = None # for i in self.fhd: # if i.startswith("track"): # continue # elif i.startswith("browse"): # continue # elif i.startswith("variableStep"): # ci = i.rfind("chrom=") # where the 'chrom=' is # si = i.rfind("span=") # where the 'span=' is # if ci != -1: # chrom = i[i.rfind("chrom=")+6:].strip().split()[0] # else: # chrom = "Unknown" # if si != -1: # span = int(i[i.rfind("span=")+5:].strip().split()[0]) # else: # span = 0 # if dbbk: # dbbk.conn.commit() # chromdbfile = os.path.join(dirname,chrom+".db") # data[chrom] = chromdbfile # if templatedb: # shutil.copy(templatedb,chromdbfile) # dbbk = DBBinKeeperI(chromdbfile,chromosome=chrom,bin=8,chromosomesize=250000000) # else: # dbbk = DBBinKeeperI(chromdbfile,chromosome=chrom,bin=8,chromosomesize=250000000) # dbbk.init_tables() # add = dbbk.add # else: # (pos,value) = i.split() # add(int(pos),float(value)) # self.fhd.seek(0) # if dbbk: # dbbk.conn.commit() # return data