//===================================================================== // File: CEQLaneFilter.java // Class: CEQLaneFilter // Package: AFLPcore // // Author: Barry S. Master (based on the ABILaneFilter class developed // by James J. Benham) // Date: November 2, 2000 // Contact: bsmaster@beckman.com // // Genographer v1.0 - Computer assisted scoring of gels. // Copyright (C) 1998 Montana State University // // This program is free software; you can redistribute it and/or // modify it under the terms of the GNU General Public License // as published by the Free Software Foundation; version 2 // of the License. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. // // The GNU General Public License is distributed in the file GPL //===================================================================== package AFLPcore; import java.io.File; import java.io.BufferedReader; import java.io.FileReader; import java.io.IOException; import java.util.NoSuchElementException; import java.util.StringTokenizer; /** * This class reads data from a fragment results text file produced by a * Beckman Coulter, Inc. CEQ 2000 DNA Analysis System. The text file * contains trace data, size standard peak data, and information about the * sample and experimental conditions. * *
The CEQ 2000 is a capillary system. The data in the file represent * the four color traces collected from one capillary during a fragment * separation. Genographer, however, displays these data as though * they were obtained from a lane on a gel. * *
The class expects fragment analysis results, exported as a text file * with the Header and Result Data options selected. Files exported in * this manner will include: *
See the readLane method for the file format and data organization. * *
This information will be stored in a Lane
object, which is used by
* the program. The size standard peaks will be passed to a
* SizeFunction
. These will be used to calculate the size of the fragments.
* Since the CEQ fragment results file contains all of the size standard
* information needed for Genographer's sizing functions, the
* standard.cfg file is not used by this filter.
*
*
The filter has three options that must be set before it can run: *
getOptions()
and setOptions()
.
* All three options are a list of choices, one of which must be selected.
* The possible values for the color option are D1, D2, D3 and D4.
* The size function and the size standard can be the name of any size
* function or standard known to the program.
*
* This class uses the FeatureList
class to retrieve the known functions.
* Once the options have been set, the readLane
method can be called to
* read the actual file.
*
* @see SizeFunction
* @see SizeStandard
* @see readLane
*
* This class is based on the ABILaneFilter class developed
* by James J. Benham.
*
* @author B. Master
* @date 11/02/2000
*/
public class CEQLaneFilter extends ImportFilter
{
// Variables from parent class
//private protected int filetype; // the type, see constants above
//private protected String name; // the name of this filter
//private protected String descript; // a brief description
//private protected File helpFile; // represents the file that contains
// the help info for this filter.
// color channel
public static final int RED = 3; // Red
public static final int YELLOW = 2; // Yellow (black won't show on the lane)
public static final int GREEN = 1; // Green
public static final int BLUE = 0; // Blue
private int colorChannel=0; // Save the value of the users choice of color channel here.
private String colorChannelName; // Save the name of the users choice of color channel here.
private String standardName;
private SizeFunction sizeFn;
/**
* Creates a new filter to read in CEQ lane files.
*/
public CEQLaneFilter()
{
// Initialize the variables for this filter
filetype = LANE;
name = "CEQ Trace";
descript = "Reads fragment results text files exported from a CEQ 2000.";
helpFile = "ceqtrace.html";
// Options must be set.
options = null;
standardName = "not set";
sizeFn = null;
}
/**
* Access the name of the filter.
*
* @return name of the import filter
*/
public String getName()
{
return name;
}
/**
* Returns the type of input file supported by this filter In this case
* ImportFilter.LANE
, since the filter reads in lane data.
*
* @return constant LANE.
*/
public int getFileType()
{
return filetype;
}
/**
* Retrieves a short, approximately one sentence, description of the filter.
*
* @return the description
*/
public String getDescription()
{
return descript;
}
/**
* The help file describes which files the filter reads and the options
* that this filter accepts.
*
* @return File that contains the help information, either html or
* plaintext.
*/
public String getHelpFile()
{
return helpFile;
}
/**
* Returns the options for this filter, which includes the color of the
* data, the size function to use, and the size standard. The first
* option is the color to read, which can be one of four possilbe
* values: Red, Blue, Green, or Yellow. The color choice is given as
* a Option
of type CHOICE
. The second
* option is also of type CHOICE
. It tells which size
* method should be used to compute the size of the fragements. Please
* see the help files and the code for the size functions for a
* description of how the work.
*
* @return an array containing the options described above.
*
* @see Option
* @see FeatureList
* @see SizeFunction
* @see SizeStandard
*/
public Option[] getOptions()
{
Option[] returnOpts = new Option[2];
// Pick the color
String[] colors = new String[4];
colors[0] = "D1"; // Display in this order on drop down menu.
colors[1] = "D2";
colors[2] = "D3";
colors[3] = "D4";
Option param = new Option("Color", Option.CHOICE, true, colors);
returnOpts[0] = param;
// The size function option, possiblities retrieved from the
// feature list.
param = new Option("Size Method", Option.CHOICE, true,
FeatureList.getSizeMgr().getNames(),
FeatureList.getSizeMgr().getDefaultName());
returnOpts[1] = param;
// Size standard code removed
// -jbenham 2001-09-03
// the size standards defined
/*
*try
* {
* param = new Option("Size Standard", Option.CHOICE, true,
* FeatureList.getStandardMgr().getNames());
* }
*catch(IOException e)
* {
* throw new MissingParameterError("Error accessing standards file. " +
* e.getMessage());
* }
*
*returnOpts[2] = param;
*/
return returnOpts;
}
/**
* Sets the parameters for the filter to the specified values, including
* color. The color must be set before this filter can run. The option
* representing the color should have a string value naming the color.
* The size function must also be set for the filter to work. It
* must contain the name of a valid SizeFunction
. Note that
* the name is not the class name of the SizeFunction
, but
* the name each SizeFunction
stores internally. The
* third option must also be set.
*
* @param opts an array of length 3 which contains the options
* mentioned above and described in getOptions()
* The order must be: color, size function, size standard.
*
* @exception MissingParameterError occurs when the filter fails to
* extract a string from the first option in opts
.
* @exception IllegalArgumentException occurs when a string is found but
* cannot be matched to one of the colors: Red, Blue, Green, or Yellow.
* Or if an array with length not equal to 3 is given as
* opts
, or if the specified size function, the second
* option, could not be matched to a defined size function.
*/
public void setOptions(Option[] opts)
{
// Check the length.
if(opts.length != 2)
throw new IllegalArgumentException("Invalid options for CEQ Lane " +
"Filter. 2 options expected, but " +
opts.length + " were provided.");
// extract the option
String value = opts[0].getStringValue();
// store the options
options = opts;
// check to make sure we have a string
if (value == null)
throw new MissingParameterError("Color not provided as parameter to " +
"CEQ Lane Filter.");
if(value.equalsIgnoreCase("D1"))
colorChannel = RED;
else if(value.equalsIgnoreCase("D2"))
colorChannel = YELLOW;
else if(value.equalsIgnoreCase("D3"))
colorChannel = GREEN;
else if(value.equalsIgnoreCase("D4"))
colorChannel = BLUE;
else
{
// didn't match a color, so something is wrong.
// set the options back to null since the ones we got were no good.
options = null;
// and complain
throw new IllegalArgumentException("Invalid color specified for CEQ" +
" Lane Filter.");
}
colorChannelName = value; // Save the name of the selected color choice here.
// Next should be the size function
String sizeFnName = opts[1].getStringValue();
try
{
sizeFn = (SizeFunction) FeatureList.getSizeMgr().get(sizeFnName);
}
catch(NoSuchElementException e)
{
options = null;
throw new IllegalArgumentException("Invalid sizing function specified"
+ " for CEQ Lane Filter. ");
}
// Size standard code removed
// -jbenham 2001-09-03
// The final option is the size standard definition
/*
*standardName = opts[2].getStringValue();
*/
// this will be checked later
}
/**
* Read the data from the file and use it to create a Lane.
*
*
The data in the file represent a run from one capillary. Genographer * displays these data as though they were a lane obtained from a gel. * * Since this is a text file, much of the data must be converted * to doubles or integers before they are used. * * Each line in the file is considered a record, ending with a carriage * return, "\r". Records that contain multiple pieces of data are also * tab delimited, "\t". * *
The file is organized into sections. Each section has a name * in the following format: "[Section Name]". The file sections can * contain Key/Value pairs or they can contain records with standard peak * or trace data. * *
The file is first searched for a specific section name. This is done to * ensure that the file is in the appropriate format and that the expected * data are actually in the file. If the section name is not found, a file * import IOException occurs. * *
The sections are not searched in the order that they appear in the file. * Instead, the [FRAGMENT ANALYZED DATA OUTPUT] section is used first. This * is a section that is unique to the proper file format. Searching for it * first ensures that the filter is reading the appropriate file. Also, * data from this section is needed to process the information in the * remaining sections. Other than starting with this section, the file is * accessed top to bottom. * *
Once the file section is found, the data are extracted by searching for * Key/Value pairs or reading records containing standard peak or trace * data. * * The Key/Value pairs have the format: "KeyName=Value" * *
The records containing the trace data have the following tab delimited format: *
* "53 -3.112358e+001 0.000000e+000 1.377488e+001 0.000000e+000" ** Where the 1st column indicates the scan point number. The remaining four columns * contain the trace data for each of the four dyes (D1, D2, D3, D4, respectively). * If a dye was not present in the sample, all of the data in the corresponding * column will be 0.000000e+000. Note also that some of the trace data may * be negative. This negative bias is removed from the data by raising the * entire trace in the positive y-axis direction, before the lane is created. * *
The records containing standard peak data have the following tab delimited format: *
* " * 3 D1 12.58 60 59.86 1911 6800 2.91" ** with columns representing peak type, peak number, dye, migration time, standard * size, estimated size, peak height, peak area, peak width, respectively. * A "*" in the peak type column indicates a standard record. * *
The migration times for the standard peaks are in minutes, so these must be * converted to scan point location, based on the sampling rate. Additionally, * the standard peak migration time is offset from the trace data by the * value of CVMigrationOffset, a key value found in the [FRAGMENT ANALYZED DATA OUTPUT] * section. This offset is corrected when the standard peaks are extracted from * the file. * *
The sample information, displayed by Genographer in the Gel View, has the * following format: * string "from Lane" integer "in" string. * * The sample ID information, added to the lane by this filter, will consequently * be displayed as: * Sample Name from Lane capillary number in Plate Name *
* @see getTrace
* @see getStandards
* @see findFileSection
* @see getKeyValue
*
* @author B. Master
* @date 12/12/2000
*/
public Lane[] readLane(File inputFile) throws IOException
{
Lane newLane;
double trace[];
DataList stdPoints;
SizeFunction sizeFn;
String record;
StringTokenizer st;
int traceSize;
// Make sure we have options set, including the color channel
if(options == null)
throw new MissingParameterError("The color for the filter must be set" +
" before the filter can work.");
// Open the file. Use buffering to speed up the reads.
BufferedReader in = new BufferedReader(new FileReader(inputFile));
//===== Get some sample information from this file section. =============
// If the section does not exist, an exception will be thrown.
findFileSection(in, "[FRAGMENT ANALYZED DATA OUTPUT]");
String numberOfPoints = getKeyValue(in, "Number of Points");
traceSize = Double.valueOf(numberOfPoints).intValue();
trace = new double[traceSize]; // Create the trace array.
// Determine which dyes are included in this file.
String dyesUsed = getKeyValue(in, "Dyes Used");
// Throw an exception if we don't have data for this color in this file.
checkForColor(dyesUsed, colorChannelName);
// Get some more information from this file section.
String CVMigrationOffset = getKeyValue(in, "CVMigrationOffset");
double xTimeOffset = Double.valueOf(CVMigrationOffset).doubleValue();
String samplingRate = getKeyValue(in, "Sampling Rate");
double dataRate = Double.valueOf(samplingRate).doubleValue();
// Now get the trace data. Look for the header string of the trace data.
// It should be a couple of records down in the file.
// skipRecords(in, 2);
String traceHeader =
" INDEX\t D1\t D2\t D3\t D4";
findFileSection(in, traceHeader);
getTrace(in, trace); // Fill the trace array with trace points.
// Build the lane.
newLane = new Lane(trace);
newLane.setColor(colorChannel); // Set the color channel
//===== Get some sample information from this file section. =============
// Start over at the top of the file. See function comments on why it was done
// this way.
in.close();
in = new BufferedReader(new FileReader(inputFile));
findFileSection(in, "[GENERAL]"); // Move to this section.
String plateName = getKeyValue(in, "Sample Plate");
String sampleName = getKeyValue(in, "Sample Name");
String samplePostion = getKeyValue(in, "Sample Position");
// Give the lane the sample ID information
newLane.setName(sampleName.trim()); // Remove the trailing \t
newLane.setGelName(plateName.trim());
// Converts the capillary ID from char (A thru H) to an integer (1 thru 8).
int capillaryNumber = (int)(samplePostion.charAt(0) - 64);
newLane.setLaneNumber(capillaryNumber);
//====== Finally, get the standard peak information from the file. ============
findFileSection( in, "[FRAGMENT LIST]"); // Move to this section.
stdPoints = new DataList();
getStandards(in, stdPoints, dataRate, xTimeOffset);
//================= set the size function ==============
String sizeName = options[1].getStringValue();
sizeFn = (SizeFunction) FeatureList.getSizeMgr().get(sizeName);
sizeFn = (SizeFunction) sizeFn.clone();
sizeFn.init(stdPoints); // Give the standards to the size function.
sizeFn.setMaxScan(newLane.getNumPoints() - 1);
newLane.setSizeFunction(sizeFn);
//========== Clean up and return. ====================
in.close();
Lane[] laneArray = new Lane[1];
laneArray[0] = newLane;
return laneArray;
}
/**
* This filter does not read gels.
*
* @return Always null
*/
public Gel readGel(File inputFile) throws IOException
{
return null;
}
/**
* Get the trace points for the selected dye from the file and
* store them in a trace array.
*
* Read through all of the trace data records. Parse each record into
* its four dye componets. Every trace record contains a tab delimited field
* for each of the four dyes. If the dye was included in the sample, the
* corresponding field will have useful data. Otherwise, the field will have
* data equal to zero. The colorChannel represents the choice of dye data
* to save.
*
* The data intensities are scaled before being added to the trace.
* This scaling ensures that the values will fit in a short data type, when
* serialized by the lane class for output to a file.
*
* Since Genographer expects trace values to be greater than or equal to zero,
* any negative values are removed by adjusting the entire trace in the positive
* y-axis direction. The adjustment is limited to a fixed magnitude. If the
* adjustment does not remove all of the negative bias, the remaining negative
* values are set equal to zero.
*
* On entry, assumes the file pointer is set to the trace data location.
*
* On exit, the trace array is filled with trace points.
*
* @author B. Master
* @date 03/13/2001
*/
private void getTrace(BufferedReader in, double [] trace) throws IOException
{
String record;
StringTokenizer st;
final double Y_OFFSET_MAX = -200.00; // The maximum allowed offset correction.
double yTracePointMin = 0.0; // Y-axis low point stored here.
String tracePointNumber;
String D1TracePoint;
String D2TracePoint;
String D3TracePoint;
String D4TracePoint;
for (int i = 0; i < trace.length; i++) {
record = in.readLine(); // This record has trace data.
st = new StringTokenizer(record, "\t", false);
tracePointNumber = st.nextToken();
D1TracePoint = st.nextToken();
D2TracePoint = st.nextToken();
D3TracePoint = st.nextToken();
D4TracePoint = st.nextToken();
switch(colorChannel){
case RED:
trace[i] = Double.valueOf(D1TracePoint).doubleValue()* 0.4;
break;
case YELLOW:
trace[i] = Double.valueOf(D2TracePoint).doubleValue()* 0.4;
break;
case GREEN:
trace[i] = Double.valueOf(D3TracePoint).doubleValue()* 0.4;
break;
case BLUE:
trace[i] = Double.valueOf(D4TracePoint).doubleValue()* 0.4;
break;
}
if (trace[i] < yTracePointMin) // Find the most negative trace point,
yTracePointMin = trace[i]; // if any are below zero.
}
// Attempt to raise the trace up to the zero baseline, but don't correct too much.
if (yTracePointMin < Y_OFFSET_MAX)
yTracePointMin = Y_OFFSET_MAX;
for (int i = 0; i < trace.length; i++) {
trace[i] -= yTracePointMin; // Raise the trace somewhat (yTracePointMin is negative).
if (trace[i] < 0.0)
trace[i] = 0.0; // Don't let any points be negative.
}
}
// Note: The lane class serialization casts the trace double to short (16 int)
// must limit our data to fit in a short.
// Ben says max is (2**`6) * 2 * 1.25
// 16 bits times one bit shift times non-linear correction of up to 25%
//
// Could divide all by 2.5 (or multiply by 0.4).
// Could truncate any above the 163,840 max.
// Could scale by a calculated factor to bring the data down to desired magnitude.
//
/**
* Get the size standards from the file and store them in a DataList.
*
* Read through all the records in the peak table. If the
* record represents a standard peak; store the peak size,
* height and trace location in the standards DataList.
*
* All of the fields in the table are tab delimited strings.
* These are converted to doubles or ints, as appropriate.
* Some of the record fields contain peak information that is
* not needed for the standards DataList. These fields are
* skipped.
*
* The peak migration times are in minutes, so these
* must be converted to scan point locations, based on the
* sampling data rate.
* scanPtNum = migration time(min.) * 60 (sec/min.) * rate(points/sec.)
*
* The migration times also include a time offset from the trace
* data. This is corrected by adding the xTimeOffset.
*
* The end of the table is indicated by a blank record.
*
* Assumes the file pointer is set to the peak table section
* of the file.
*
* On exit, the DataList parameter is filled with standard peaks.
*
* @author B. Master
* @date 12/12/2000
*/
private void getStandards(BufferedReader in, DataList stdPoints, double dataRate, double xTimeOffset) throws IOException
{
String record;
StringTokenizer st;
Peak pk;
String standardMark = " * "; // Indicates a standard record.
String peakType;
double migrationTime;
int standardSize;
double peakHeight;
double scanPtNum;
record = in.readLine();
while (!record.equals("")) {
st = new StringTokenizer(record, "\t", false);
peakType = st.nextToken();
if (peakType.equals(standardMark)) {
st.nextToken(); // Skip this field;
st.nextToken(); // Skip this field;
// Calc. the scan point number from the migration time.
migrationTime = Double.valueOf(st.nextToken()).doubleValue();
scanPtNum = (migrationTime * 60 * dataRate); // 2 Hz dataRate = 120 pts/min
scanPtNum += xTimeOffset; // Correct for the relative trace offset.
scanPtNum = (int)(scanPtNum + 0.5); // Round to a whole number (a trace location).
// Standard size.
standardSize = Integer.valueOf(st.nextToken()).intValue();
st.nextToken(); // Skip this field.
// Standard height.
peakHeight = Double.valueOf(st.nextToken()).doubleValue();
// Add the standard peak to the list.
pk = new Peak(standardSize, peakHeight, scanPtNum);
stdPoints.addData(pk);
}
record = in.readLine(); // Next record.
}
}
/**
* Confirm that the requested dye data is included in the file.
* The requested dye name should be a substring of the dyesInFile
* string. If not, an exception is thrown, ending the file import.
*
* For example, a file with D1 and D4 data has a dyesInFile string: "D1,D4"
*
* @author B. Master
* @date 11/02/2000
*/
private void checkForColor(String dyesInFile, String dyeChannelRequested) throws IOException
{
if (dyesInFile.indexOf(dyeChannelRequested) == -1) {
// The requested dye name was not a substring of the dyesInFile string.
String errorMsg = dyeChannelRequested;
errorMsg = "Could not find the color " + errorMsg + " in the file.";
throw new IOException(errorMsg);
}
}
/**
* Skip through some records in the file by reading them, but not saving their
* contents.
* Assumes that the requested number of records are contained in the file.
*
* @author B. Master
* @date 11/02/2000
*/
private void skipRecords(BufferedReader in, int records) throws IOException
{
for (int i = 0; i < records; i++) {
in.readLine();
}
}
/**
* Search the file for the desired section.
* File section names have the form: "[SectionName]"
*
* If the search is successful, the file pointer will be set to the desired section
* and the routine will return to the caller.
* Otherwise, the file pointer will be set to the end of the file and an
* exception will be thrown, ending the file import.
*
* @author B. Master
* @date 12/12/2000
*/
private void findFileSection(BufferedReader in, String sectionName) throws IOException
{
String record;
record = in.readLine();
while (record != null) {
if (record.equals(sectionName)){
return; // Desired section found.
}
record = in.readLine();
}
// If here, file section was not found, so throw the exception.
throw new IOException("This does not appear to be a CEQ fragment results text file."
+ " Missing " + sectionName.trim());
}
/**
* Search the file for a record containing the specified key.
* Keys have the form: "KeyName=Value"
*
* If the key was found, returns the associated value.
* Otherwise, returns a null string.
*
* Assumes that the key, if it exists, is in the remaining records of the file
* (file seek position was not reset from last position).
*
* @author B. Master
* @date 11/02/2000
*/
private String getKeyValue(BufferedReader in, String desiredKey) throws IOException
{
String value;
StringTokenizer st;
String currentKey;
String record;
// Search for the key
record = in.readLine();
while ((record != null) && (!record.equals("\r"))) {
st = new StringTokenizer(record, "=", false);
currentKey = st.nextToken();
if (currentKey.equals(desiredKey)) {
value = st.nextToken();
return value; // Key found, return its value.
}
record = in.readLine(); // Read the next record
}
// If here, key not found.
return null; // return null;
}
}