//===================================================================== // File: CEQLaneFilter.java // Class: CEQLaneFilter // Package: AFLPcore // // Author: Barry S. Master (based on the ABILaneFilter class developed // by James J. Benham) // Date: November 2, 2000 // Contact: bsmaster@beckman.com // // Genographer v1.0 - Computer assisted scoring of gels. // Copyright (C) 1998 Montana State University // // This program is free software; you can redistribute it and/or // modify it under the terms of the GNU General Public License // as published by the Free Software Foundation; version 2 // of the License. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. // // The GNU General Public License is distributed in the file GPL //===================================================================== package AFLPcore; import java.io.File; import java.io.BufferedReader; import java.io.FileReader; import java.io.IOException; import java.util.NoSuchElementException; import java.util.StringTokenizer; /** * This class reads data from a fragment results text file produced by a * Beckman Coulter, Inc. CEQ 2000 DNA Analysis System. The text file * contains trace data, size standard peak data, and information about the * sample and experimental conditions. * *

The CEQ 2000 is a capillary system. The data in the file represent * the four color traces collected from one capillary during a fragment * separation. Genographer, however, displays these data as though * they were obtained from a lane on a gel. * *

The class expects fragment analysis results, exported as a text file * with the Header and Result Data options selected. Files exported in * this manner will include: *

*

See the readLane method for the file format and data organization. * *

This information will be stored in a Lane object, which is used by * the program. The size standard peaks will be passed to a * SizeFunction. These will be used to calculate the size of the fragments. * Since the CEQ fragment results file contains all of the size standard * information needed for Genographer's sizing functions, the * standard.cfg file is not used by this filter. * *

The filter has three options that must be set before it can run: *

    *
  1. Data Color *
  2. Size Function *
  3. Size Standard (not used by this filter) *
* These can be manipulated using getOptions() and setOptions(). * All three options are a list of choices, one of which must be selected. * The possible values for the color option are D1, D2, D3 and D4. * The size function and the size standard can be the name of any size * function or standard known to the program. * *

This class uses the FeatureList class to retrieve the known functions. * Once the options have been set, the readLane method can be called to * read the actual file. * * @see SizeFunction * @see SizeStandard * @see readLane * * This class is based on the ABILaneFilter class developed * by James J. Benham. * * @author B. Master * @date 11/02/2000 */ public class CEQLaneFilter extends ImportFilter { // Variables from parent class //private protected int filetype; // the type, see constants above //private protected String name; // the name of this filter //private protected String descript; // a brief description //private protected File helpFile; // represents the file that contains // the help info for this filter. // color channel public static final int RED = 3; // Red public static final int YELLOW = 2; // Yellow (black won't show on the lane) public static final int GREEN = 1; // Green public static final int BLUE = 0; // Blue private int colorChannel=0; // Save the value of the users choice of color channel here. private String colorChannelName; // Save the name of the users choice of color channel here. private String standardName; private SizeFunction sizeFn; /** * Creates a new filter to read in CEQ lane files. */ public CEQLaneFilter() { // Initialize the variables for this filter filetype = LANE; name = "CEQ Trace"; descript = "Reads fragment results text files exported from a CEQ 2000."; helpFile = "ceqtrace.html"; // Options must be set. options = null; standardName = "not set"; sizeFn = null; } /** * Access the name of the filter. * * @return name of the import filter */ public String getName() { return name; } /** * Returns the type of input file supported by this filter In this case * ImportFilter.LANE, since the filter reads in lane data. * * @return constant LANE. */ public int getFileType() { return filetype; } /** * Retrieves a short, approximately one sentence, description of the filter. * * @return the description */ public String getDescription() { return descript; } /** * The help file describes which files the filter reads and the options * that this filter accepts. * * @return File that contains the help information, either html or * plaintext. */ public String getHelpFile() { return helpFile; } /** * Returns the options for this filter, which includes the color of the * data, the size function to use, and the size standard. The first * option is the color to read, which can be one of four possilbe * values: Red, Blue, Green, or Yellow. The color choice is given as * a Option of type CHOICE. The second * option is also of type CHOICE. It tells which size * method should be used to compute the size of the fragements. Please * see the help files and the code for the size functions for a * description of how the work. * * @return an array containing the options described above. * * @see Option * @see FeatureList * @see SizeFunction * @see SizeStandard */ public Option[] getOptions() { Option[] returnOpts = new Option[2]; // Pick the color String[] colors = new String[4]; colors[0] = "D1"; // Display in this order on drop down menu. colors[1] = "D2"; colors[2] = "D3"; colors[3] = "D4"; Option param = new Option("Color", Option.CHOICE, true, colors); returnOpts[0] = param; // The size function option, possiblities retrieved from the // feature list. param = new Option("Size Method", Option.CHOICE, true, FeatureList.getSizeMgr().getNames(), FeatureList.getSizeMgr().getDefaultName()); returnOpts[1] = param; // Size standard code removed // -jbenham 2001-09-03 // the size standards defined /* *try * { * param = new Option("Size Standard", Option.CHOICE, true, * FeatureList.getStandardMgr().getNames()); * } *catch(IOException e) * { * throw new MissingParameterError("Error accessing standards file. " + * e.getMessage()); * } * *returnOpts[2] = param; */ return returnOpts; } /** * Sets the parameters for the filter to the specified values, including * color. The color must be set before this filter can run. The option * representing the color should have a string value naming the color. * The size function must also be set for the filter to work. It * must contain the name of a valid SizeFunction. Note that * the name is not the class name of the SizeFunction, but * the name each SizeFunction stores internally. The * third option must also be set. * * @param opts an array of length 3 which contains the options * mentioned above and described in getOptions() * The order must be: color, size function, size standard. * * @exception MissingParameterError occurs when the filter fails to * extract a string from the first option in opts. * @exception IllegalArgumentException occurs when a string is found but * cannot be matched to one of the colors: Red, Blue, Green, or Yellow. * Or if an array with length not equal to 3 is given as * opts, or if the specified size function, the second * option, could not be matched to a defined size function. */ public void setOptions(Option[] opts) { // Check the length. if(opts.length != 2) throw new IllegalArgumentException("Invalid options for CEQ Lane " + "Filter. 2 options expected, but " + opts.length + " were provided."); // extract the option String value = opts[0].getStringValue(); // store the options options = opts; // check to make sure we have a string if (value == null) throw new MissingParameterError("Color not provided as parameter to " + "CEQ Lane Filter."); if(value.equalsIgnoreCase("D1")) colorChannel = RED; else if(value.equalsIgnoreCase("D2")) colorChannel = YELLOW; else if(value.equalsIgnoreCase("D3")) colorChannel = GREEN; else if(value.equalsIgnoreCase("D4")) colorChannel = BLUE; else { // didn't match a color, so something is wrong. // set the options back to null since the ones we got were no good. options = null; // and complain throw new IllegalArgumentException("Invalid color specified for CEQ" + " Lane Filter."); } colorChannelName = value; // Save the name of the selected color choice here. // Next should be the size function String sizeFnName = opts[1].getStringValue(); try { sizeFn = (SizeFunction) FeatureList.getSizeMgr().get(sizeFnName); } catch(NoSuchElementException e) { options = null; throw new IllegalArgumentException("Invalid sizing function specified" + " for CEQ Lane Filter. "); } // Size standard code removed // -jbenham 2001-09-03 // The final option is the size standard definition /* *standardName = opts[2].getStringValue(); */ // this will be checked later } /** * Read the data from the file and use it to create a Lane. * *

The data in the file represent a run from one capillary. Genographer * displays these data as though they were a lane obtained from a gel. * * Since this is a text file, much of the data must be converted * to doubles or integers before they are used. * * Each line in the file is considered a record, ending with a carriage * return, "\r". Records that contain multiple pieces of data are also * tab delimited, "\t". * *

The file is organized into sections. Each section has a name * in the following format: "[Section Name]". The file sections can * contain Key/Value pairs or they can contain records with standard peak * or trace data. * *

The file is first searched for a specific section name. This is done to * ensure that the file is in the appropriate format and that the expected * data are actually in the file. If the section name is not found, a file * import IOException occurs. * *

The sections are not searched in the order that they appear in the file. * Instead, the [FRAGMENT ANALYZED DATA OUTPUT] section is used first. This * is a section that is unique to the proper file format. Searching for it * first ensures that the filter is reading the appropriate file. Also, * data from this section is needed to process the information in the * remaining sections. Other than starting with this section, the file is * accessed top to bottom. * *

Once the file section is found, the data are extracted by searching for * Key/Value pairs or reading records containing standard peak or trace * data. * * The Key/Value pairs have the format: "KeyName=Value" * *

The records containing the trace data have the following tab delimited format: *

   * "53	-3.112358e+001	  0.000000e+000	  1.377488e+001	  0.000000e+000"
   * 
* Where the 1st column indicates the scan point number. The remaining four columns * contain the trace data for each of the four dyes (D1, D2, D3, D4, respectively). * If a dye was not present in the sample, all of the data in the corresponding * column will be 0.000000e+000. Note also that some of the trace data may * be negative. This negative bias is removed from the data by raising the * entire trace in the positive y-axis direction, before the lane is created. * *

The records containing standard peak data have the following tab delimited format: *

   * " *	3 	D1	12.58	60	59.86	1911	6800	2.91"
   * 
* with columns representing peak type, peak number, dye, migration time, standard * size, estimated size, peak height, peak area, peak width, respectively. * A "*" in the peak type column indicates a standard record. * *

The migration times for the standard peaks are in minutes, so these must be * converted to scan point location, based on the sampling rate. Additionally, * the standard peak migration time is offset from the trace data by the * value of CVMigrationOffset, a key value found in the [FRAGMENT ANALYZED DATA OUTPUT] * section. This offset is corrected when the standard peaks are extracted from * the file. * *

The sample information, displayed by Genographer in the Gel View, has the * following format: * string "from Lane" integer "in" string. * * The sample ID information, added to the lane by this filter, will consequently * be displayed as: * Sample Name from Lane capillary number in Plate Name *

* @see getTrace * @see getStandards * @see findFileSection * @see getKeyValue * * @author B. Master * @date 12/12/2000 */ public Lane[] readLane(File inputFile) throws IOException { Lane newLane; double trace[]; DataList stdPoints; SizeFunction sizeFn; String record; StringTokenizer st; int traceSize; // Make sure we have options set, including the color channel if(options == null) throw new MissingParameterError("The color for the filter must be set" + " before the filter can work."); // Open the file. Use buffering to speed up the reads. BufferedReader in = new BufferedReader(new FileReader(inputFile)); //===== Get some sample information from this file section. ============= // If the section does not exist, an exception will be thrown. findFileSection(in, "[FRAGMENT ANALYZED DATA OUTPUT]"); String numberOfPoints = getKeyValue(in, "Number of Points"); traceSize = Double.valueOf(numberOfPoints).intValue(); trace = new double[traceSize]; // Create the trace array. // Determine which dyes are included in this file. String dyesUsed = getKeyValue(in, "Dyes Used"); // Throw an exception if we don't have data for this color in this file. checkForColor(dyesUsed, colorChannelName); // Get some more information from this file section. String CVMigrationOffset = getKeyValue(in, "CVMigrationOffset"); double xTimeOffset = Double.valueOf(CVMigrationOffset).doubleValue(); String samplingRate = getKeyValue(in, "Sampling Rate"); double dataRate = Double.valueOf(samplingRate).doubleValue(); // Now get the trace data. Look for the header string of the trace data. // It should be a couple of records down in the file. // skipRecords(in, 2); String traceHeader = " INDEX\t D1\t D2\t D3\t D4"; findFileSection(in, traceHeader); getTrace(in, trace); // Fill the trace array with trace points. // Build the lane. newLane = new Lane(trace); newLane.setColor(colorChannel); // Set the color channel //===== Get some sample information from this file section. ============= // Start over at the top of the file. See function comments on why it was done // this way. in.close(); in = new BufferedReader(new FileReader(inputFile)); findFileSection(in, "[GENERAL]"); // Move to this section. String plateName = getKeyValue(in, "Sample Plate"); String sampleName = getKeyValue(in, "Sample Name"); String samplePostion = getKeyValue(in, "Sample Position"); // Give the lane the sample ID information newLane.setName(sampleName.trim()); // Remove the trailing \t newLane.setGelName(plateName.trim()); // Converts the capillary ID from char (A thru H) to an integer (1 thru 8). int capillaryNumber = (int)(samplePostion.charAt(0) - 64); newLane.setLaneNumber(capillaryNumber); //====== Finally, get the standard peak information from the file. ============ findFileSection( in, "[FRAGMENT LIST]"); // Move to this section. stdPoints = new DataList(); getStandards(in, stdPoints, dataRate, xTimeOffset); //================= set the size function ============== String sizeName = options[1].getStringValue(); sizeFn = (SizeFunction) FeatureList.getSizeMgr().get(sizeName); sizeFn = (SizeFunction) sizeFn.clone(); sizeFn.init(stdPoints); // Give the standards to the size function. sizeFn.setMaxScan(newLane.getNumPoints() - 1); newLane.setSizeFunction(sizeFn); //========== Clean up and return. ==================== in.close(); Lane[] laneArray = new Lane[1]; laneArray[0] = newLane; return laneArray; } /** * This filter does not read gels. * * @return Always null */ public Gel readGel(File inputFile) throws IOException { return null; } /** * Get the trace points for the selected dye from the file and * store them in a trace array. * * Read through all of the trace data records. Parse each record into * its four dye componets. Every trace record contains a tab delimited field * for each of the four dyes. If the dye was included in the sample, the * corresponding field will have useful data. Otherwise, the field will have * data equal to zero. The colorChannel represents the choice of dye data * to save. * * The data intensities are scaled before being added to the trace. * This scaling ensures that the values will fit in a short data type, when * serialized by the lane class for output to a file. * * Since Genographer expects trace values to be greater than or equal to zero, * any negative values are removed by adjusting the entire trace in the positive * y-axis direction. The adjustment is limited to a fixed magnitude. If the * adjustment does not remove all of the negative bias, the remaining negative * values are set equal to zero. * * On entry, assumes the file pointer is set to the trace data location. * * On exit, the trace array is filled with trace points. * * @author B. Master * @date 03/13/2001 */ private void getTrace(BufferedReader in, double [] trace) throws IOException { String record; StringTokenizer st; final double Y_OFFSET_MAX = -200.00; // The maximum allowed offset correction. double yTracePointMin = 0.0; // Y-axis low point stored here. String tracePointNumber; String D1TracePoint; String D2TracePoint; String D3TracePoint; String D4TracePoint; for (int i = 0; i < trace.length; i++) { record = in.readLine(); // This record has trace data. st = new StringTokenizer(record, "\t", false); tracePointNumber = st.nextToken(); D1TracePoint = st.nextToken(); D2TracePoint = st.nextToken(); D3TracePoint = st.nextToken(); D4TracePoint = st.nextToken(); switch(colorChannel){ case RED: trace[i] = Double.valueOf(D1TracePoint).doubleValue()* 0.4; break; case YELLOW: trace[i] = Double.valueOf(D2TracePoint).doubleValue()* 0.4; break; case GREEN: trace[i] = Double.valueOf(D3TracePoint).doubleValue()* 0.4; break; case BLUE: trace[i] = Double.valueOf(D4TracePoint).doubleValue()* 0.4; break; } if (trace[i] < yTracePointMin) // Find the most negative trace point, yTracePointMin = trace[i]; // if any are below zero. } // Attempt to raise the trace up to the zero baseline, but don't correct too much. if (yTracePointMin < Y_OFFSET_MAX) yTracePointMin = Y_OFFSET_MAX; for (int i = 0; i < trace.length; i++) { trace[i] -= yTracePointMin; // Raise the trace somewhat (yTracePointMin is negative). if (trace[i] < 0.0) trace[i] = 0.0; // Don't let any points be negative. } } // Note: The lane class serialization casts the trace double to short (16 int) // must limit our data to fit in a short. // Ben says max is (2**`6) * 2 * 1.25 // 16 bits times one bit shift times non-linear correction of up to 25% // // Could divide all by 2.5 (or multiply by 0.4). // Could truncate any above the 163,840 max. // Could scale by a calculated factor to bring the data down to desired magnitude. // /** * Get the size standards from the file and store them in a DataList. * * Read through all the records in the peak table. If the * record represents a standard peak; store the peak size, * height and trace location in the standards DataList. * * All of the fields in the table are tab delimited strings. * These are converted to doubles or ints, as appropriate. * Some of the record fields contain peak information that is * not needed for the standards DataList. These fields are * skipped. * * The peak migration times are in minutes, so these * must be converted to scan point locations, based on the * sampling data rate. * scanPtNum = migration time(min.) * 60 (sec/min.) * rate(points/sec.) * * The migration times also include a time offset from the trace * data. This is corrected by adding the xTimeOffset. * * The end of the table is indicated by a blank record. * * Assumes the file pointer is set to the peak table section * of the file. * * On exit, the DataList parameter is filled with standard peaks. * * @author B. Master * @date 12/12/2000 */ private void getStandards(BufferedReader in, DataList stdPoints, double dataRate, double xTimeOffset) throws IOException { String record; StringTokenizer st; Peak pk; String standardMark = " * "; // Indicates a standard record. String peakType; double migrationTime; int standardSize; double peakHeight; double scanPtNum; record = in.readLine(); while (!record.equals("")) { st = new StringTokenizer(record, "\t", false); peakType = st.nextToken(); if (peakType.equals(standardMark)) { st.nextToken(); // Skip this field; st.nextToken(); // Skip this field; // Calc. the scan point number from the migration time. migrationTime = Double.valueOf(st.nextToken()).doubleValue(); scanPtNum = (migrationTime * 60 * dataRate); // 2 Hz dataRate = 120 pts/min scanPtNum += xTimeOffset; // Correct for the relative trace offset. scanPtNum = (int)(scanPtNum + 0.5); // Round to a whole number (a trace location). // Standard size. standardSize = Integer.valueOf(st.nextToken()).intValue(); st.nextToken(); // Skip this field. // Standard height. peakHeight = Double.valueOf(st.nextToken()).doubleValue(); // Add the standard peak to the list. pk = new Peak(standardSize, peakHeight, scanPtNum); stdPoints.addData(pk); } record = in.readLine(); // Next record. } } /** * Confirm that the requested dye data is included in the file. * The requested dye name should be a substring of the dyesInFile * string. If not, an exception is thrown, ending the file import. * * For example, a file with D1 and D4 data has a dyesInFile string: "D1,D4" * * @author B. Master * @date 11/02/2000 */ private void checkForColor(String dyesInFile, String dyeChannelRequested) throws IOException { if (dyesInFile.indexOf(dyeChannelRequested) == -1) { // The requested dye name was not a substring of the dyesInFile string. String errorMsg = dyeChannelRequested; errorMsg = "Could not find the color " + errorMsg + " in the file."; throw new IOException(errorMsg); } } /** * Skip through some records in the file by reading them, but not saving their * contents. * Assumes that the requested number of records are contained in the file. * * @author B. Master * @date 11/02/2000 */ private void skipRecords(BufferedReader in, int records) throws IOException { for (int i = 0; i < records; i++) { in.readLine(); } } /** * Search the file for the desired section. * File section names have the form: "[SectionName]" * * If the search is successful, the file pointer will be set to the desired section * and the routine will return to the caller. * Otherwise, the file pointer will be set to the end of the file and an * exception will be thrown, ending the file import. * * @author B. Master * @date 12/12/2000 */ private void findFileSection(BufferedReader in, String sectionName) throws IOException { String record; record = in.readLine(); while (record != null) { if (record.equals(sectionName)){ return; // Desired section found. } record = in.readLine(); } // If here, file section was not found, so throw the exception. throw new IOException("This does not appear to be a CEQ fragment results text file." + " Missing " + sectionName.trim()); } /** * Search the file for a record containing the specified key. * Keys have the form: "KeyName=Value" * * If the key was found, returns the associated value. * Otherwise, returns a null string. * * Assumes that the key, if it exists, is in the remaining records of the file * (file seek position was not reset from last position). * * @author B. Master * @date 11/02/2000 */ private String getKeyValue(BufferedReader in, String desiredKey) throws IOException { String value; StringTokenizer st; String currentKey; String record; // Search for the key record = in.readLine(); while ((record != null) && (!record.equals("\r"))) { st = new StringTokenizer(record, "=", false); currentKey = st.nextToken(); if (currentKey.equals(desiredKey)) { value = st.nextToken(); return value; // Key found, return its value. } record = in.readLine(); // Read the next record } // If here, key not found. return null; // return null; } }