#!/usr/bin/env python # # Copyright 2009 Illumina, Inc. # # This software is covered by the "Illumina Genome Analyzer Software # License Agreement" and the "Illumina Source Code License Agreement", # and certain third party copyright/licenses, and any user of this # source file is bound by the terms therein (see accompanying files # Illumina_Genome_Analyzer_Software_License_Agreement.pdf and # Illumina_Source_Code_License_Agreement.pdf and third party # copyright/license notices). # # \author Chris Saunders # """ usage: lsnp_summary.py < blt_output > extended_lsnp_output Provides extended information on lsnp calls from the blt snp-caller, in addition to the original output, this script lists the reference allele frequency, the highest frequency allele and the second highest frequency allele. """ import sys refindex = {'A':0,'C':1,'G':2,'T':3} refbase = ['A','C','G','T'] infp=sys.stdin outfp=sys.stdout for line in infp : w=line.strip().split() if w[0] != "LSNP" : outfp.write(line) continue ref=w[4] ri=refindex[ref] ref_freq=w[8+2*ri] freqs = [] for a in range(4) : freqs.append((float(w[8+2*a]),refbase[a])) freqs.sort() freqs.reverse() outfp.write("%s freq(ref): %s max_allele: %s freq(max_allele): %.8f max2_allele: %s freq(max2_allele): %.8f\n" % (line.strip(),ref_freq,freqs[0][1],freqs[0][0],freqs[1][1],freqs[1][0]))