package driver; import stream.SamLine; import align2.Tools; import fileIO.ReadWrite; import fileIO.TextFile; import fileIO.TextStreamWriter; /** * * Selects only reads with long deletions * * @author Brian Bushnell * @date Jun 21, 2013 * */ public final class SelectReads { public static void main(String[] args){ assert(args.length>=2) : "Need 2 file names: "; assert(!args[0].equalsIgnoreCase(args[1])) : "File names must be different."; ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true; ReadWrite.MAX_ZIP_THREADS=8; ReadWrite.ZIP_THREAD_DIVISOR=2; int minlen=1; long reads=Long.MAX_VALUE; char symbol='D'; if(args.length>2){symbol=(char)args[2].charAt(0);} if(args.length>3){minlen=Integer.parseInt(args[3]);} if(args.length>4){reads=Long.parseLong(args[4]);} symbol=Character.toUpperCase(symbol); if(symbol=='='){symbol='M';} if(symbol=='X'){symbol='S';} if(symbol=='N'){symbol='D';} if(symbol=='S' || symbol=='H' || symbol=='P'){symbol='C';} final int index=Tools.indexOf(new char[] {'M','S','D','I','C'}, symbol); assert(index>=0) : "Symbol (3rd argument) must be M, S, D, I, C (for match string symbols) or M, =, X, D, N, I, S, H, P (for cigar symbols)."; TextFile tf=new TextFile(args[0], true, false); TextStreamWriter tsw=new TextStreamWriter(args[1], false, false, true); tsw.start(); for(String line=tf.nextLine(); line!=null; line=tf.nextLine()){ if(line.charAt(0)=='@'){ tsw.println(line); }else{ if((reads=reads-1)<0){break;} SamLine sl=new SamLine(line); if(testLine(sl, minlen, index)){ tsw.println(line); } } } tf.close(); tsw.poison(); tsw.waitForFinish(); } private static boolean testLine(SamLine sl, int minlen, int index){ assert(sl!=null); if(!sl.mapped() || sl.cigar==null){return false;} int[] msdic=sl.cigarToMdsiMax(sl.cigar); return (msdic!=null && msdic[index]>=minlen); } }