#!/usr/bin/env python3 # convert old style illumina file to fastq """ Assumes input is in Illumina 1.5 format, which uses Phred64 encoding Converts to Illumina 1.8, which uses Phred33 encoding. """ import os.path import sys NL = "\n" class Phred : """ Implement Phred scores as dictionaries. """ def __init__(self): """ Initializes arguments """ self.Illumina18 = {} for n in range(33,127) : CH = chr(n) self.Illumina18[CH]=chr(n-33) self.transtable = str.maketrans(self.Illumina18) pdict = Phred() #original = "BCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghi" #print(original) #print(original.translate(pdict.transtable)) #exit() #Convert arguments to variables IFN = sys.argv[1] basename = os.path.splitext(IFN)[0] infile = open(IFN,"r") OFN = basename + '.fastq' outfile = open(OFN,'w') # ID is all ":" separated fields except the two rightmost fields. # Field 2nd from right is sequence # Rightmost field is quality line. l = infile.readline() while l != "" : tokens = l.split(":") ID = ':'.join(tokens[0:-2]) outfile.write('@' + ID + NL) outfile.write(tokens[-2] + NL) outfile.write('+' + NL) outfile.write(tokens[-1].translate(pdict.transtable)) l = infile.readline() outfile.close()