/* Copyright @ 1999-2003, The Institute for Genomic Research (TIGR). All rights reserved. This software is provided "AS IS". TIGR makes no warranties, express or implied, including no representation or warranty with respect to the performance of the software and derivatives or their safety, effectiveness, or commercial viability. TIGR does not warrant the merchantability or fitness of the software and derivatives for any particular purpose, or that they may be exploited without infringing the copyrights, patent rights or property rights of others. TIGR shall not be liable for any claim, demand or action for any loss, harm, illness or other damage or injury arising from access to or use of the software or associated information, including without limitation any direct, indirect, incidental, exemplary, special or consequential damages. This software program may not be sold, leased, transferred, exported or otherwise disclaimed to anyone, in whole or in part, without the prior written consent of TIGR. */ /* General notes: I need to add some exception handling code to accomodate the AIOOBE-type exceptions that can pop up in many of the methods. Maybe add methods that allow for multiple matching elements to be returned? */ package org.tigr.microarray; import java.awt.Component; import java.io.BufferedReader; import java.io.IOException; import java.io.FileReader; import java.io.File; import java.util.StringTokenizer; import java.util.Vector; import javax.swing.JFileChooser; import javax.swing.filechooser.FileFilter; import org.tigr.util.StringSplitter; /** Parses and stores mev file data @author aisaeed @version "$Revision: 1.1 $ - $Date: 2003/09/22 18:38:16 $" */ public class MevFileParser { public static final int UNSUPPORTED_FILE = 0; public static final int MEV_FILE = 1; public static final String UNIQUE_ID_STRING = "UID"; public static final boolean WITH_HEADER = true; private Vector columnHeaders; private Vector rawLines; private IntVector dataLinesMap; private boolean mevFileLoaded; /** Default and only constructor */ public MevFileParser() { } /** Default and only constructor */ public MevFileParser(String mevFileName) { loadFile(new File(mevFileName)); } /** Displays a JFileChooser with an mev file filter. The default directory is user.dir. @param dialogParent Parent component of the JFileChooser @return The selected mev file */ public static File selectFile(Component dialogParent) { return selectFile(new File(System.getProperty("user.dir")), dialogParent); } /** Displays a JFileChooser with an mev file filter that opens to a specified directory. @param defaultDirectory The default directory for the JFileChooser to open to @param dialogParent Parent component of the JFileChooser @return The selected mev file */ public static File selectFile(File defaultDirectory, Component dialogParent) { JFileChooser chooser = new JFileChooser(System.getProperty("user.dir")); chooser.setDialogTitle("Select an mev file"); chooser.setCurrentDirectory(defaultDirectory); chooser.setMultiSelectionEnabled(false); chooser.addChoosableFileFilter(new FileFilter() { public boolean accept(File f) { String extension = ""; if (f.isDirectory()) return true; if (f.getName().endsWith(".mev")) return true; else return false; } public String getDescription() { return "TIGR MeV Files (*.mev)"; } }); if (chooser.showOpenDialog(dialogParent) == JFileChooser.APPROVE_OPTION) { return chooser.getSelectedFile(); } else { return null; } } /** Scans the specified file and returns filetype/validity code.

Not yet implemented. @return The filetype/validy code */ public static int validate(File targetFile) { return 0; // Dummy return } /** Reads the specified mev file, then instantiates and populates the appropriate data objects. isMeVFileLoaded will return true if this method was successful in loading the mev file. @param targetFile The mev file to load */ public void loadFile(File targetFile) { dataLinesMap = new IntVector(); rawLines = new Vector(); columnHeaders = new Vector(); String currentLine = new String(); BufferedReader reader = null; boolean readHeaders = false; try { reader = new BufferedReader(new FileReader(targetFile)); for (int lineCount = 0; ((currentLine = reader.readLine()) != null); lineCount++) { rawLines.add(currentLine); if (! currentLine.startsWith("#")) { // Non-comment line if (! readHeaders) { // Read/load the column headers readHeaders = true; StringTokenizer st = new StringTokenizer(currentLine, "\t"); while (st.hasMoreTokens()) { columnHeaders.add(st.nextToken()); } } else { dataLinesMap.add(lineCount); } } } } catch (IOException ioe) { ioe.printStackTrace(); mevFileLoaded = false; return; } mevFileLoaded = true; } /** Returns true if the loadFile method was successful. @return The file load status */ public boolean isMevFileLoaded() { return mevFileLoaded; } /** Returns a Vector containing the required row of column headers. Each element in the Vector is one of the tab-delimited tokens from the first non-comment line in the mev file. @return The Vector of column headers */ public Vector getColumnHeaders() { return columnHeaders; } /** Returns a Vector containing the fields in the target column. All comment lines will be ignored. @param targetColumn The index of the target column; valid values range from 0 to n-1, where n is the number of columns in the mev file. @return The Vector of column data */ public Vector getColumnAt(int targetColumn) { return getColumnAt(targetColumn, false); } /** Returns a Vector containing the fields and an optional header in the target column. If requested, the first element of the Vector will be the column header value. All comment lines will be ignored. @param targetColumn The index of the target column; valid values range from 0 to n-1, where n is the number of columns in the mev file. @param withHeaders If true, the first element in the return Vector will be the column header for the target column. @return The Vector of column data */ public Vector getColumnAt(int targetColumn, boolean withHeaders) { Vector columnVector = new Vector(dataLinesMap.size() + (withHeaders ? 1 : 0)); if (withHeaders) columnVector.add(columnHeaders.elementAt(targetColumn)); for (int i = 0; i < dataLinesMap.size(); i++) { StringTokenizer st = new StringTokenizer(getElementAtIndex(i)); for (int j = 0; j < targetColumn; j++) { st.nextToken(); } columnVector.add(st.nextToken()); } return columnVector; } /** Returns a Vector containing the fields in the column which is identified by the specified column header. All comment lines will be ignored. @param columnName The column header of the target column @return The Vector of column data. If the specified column header is not found, the return Vector will be null. */ public Vector getColumnNamed(String columnName) { return getColumnNamed(columnName, false); } /** Returns a Vector containing the fields and an optional header in the column which is identified by the specified column header. If requested, the first element of the Vector will be the column header value. All comment lines will be ignored. @param columnName The column header of the target column @param withHeaders If true, the first element in the return Vector will be the column header for the target column. @return The Vector of column data. If the specified column header is not found, the return Vector will be null. */ public Vector getColumnNamed(String columnName, boolean withHeaders) { Vector columnHeaders = getColumnHeaders(); if (columnHeaders.contains(columnName)) { return getColumnAt(columnHeaders.indexOf(columnName), withHeaders); } else { return null; } } /** Returns the line from the mev file at the specified index. @param rawTargetline The index of the target line to be retrieved. @return The String containing the target line of text, as it appears in the mev file. The trailing newline character, \n, if present, is omitted. */ public String getLineAt(int rawTargetLine) { return (String) rawLines.elementAt(rawTargetLine); } /** Returns the spot element line from the mev file at the specified index. The index should refer to the position of the element in the mev file, such that an index of 0 refers to the first spot in the file, an index of 1 refers to the second spot in the file, and so forth. The header row and all comment lines do not count towards this index. @param rawTargetline The index of the target element to be retrieved. @return The String encapsulating the target element, as it appears in the mev file. The trailing newline character, \n, if present, is omitted. */ public String getElementAtIndex(int index) { return getLineAt(dataLinesMap.intElementAt(index)); } /** Returns the spot element line from the mev file at with the specified row and column values.

Not yet implemented. @param row The row value of the target element to be retrieved. @param column The column value of the target element to be retrieved. @return The String encapsulating the target element, as it appears in the mev file. The trailing newline character, \n, if present, is omitted. */ public String getElementAtRC(int row, int column) { return new String(); // Dummy return } /** Returns the spot element line from the mev file that has a UID that matches the specified id value. If there are multiple matches, only the first element will be returned.

Note: There should not be multiple elements with the same UID, as defined in the mev file format description. @param id The id of the target element to be retrieved. @return The String encapsulating the target element, as it appears in the mev file. The trailing newline character, \n, if present, is omitted. */ public String getElementById(String id) { return getElementByField(MevFileParser.UNIQUE_ID_STRING, id); } /** Returns the spot element line from the mev file that contains the specified value for the specified field. If there are multiple matches, only the first element will be returned. @param fieldName The column header that identifies the column in which to find the specified value of the target element to be retrieved. @param value The value in the specified column that identifies the target element to be retrieved. @return The String encapsulating the target element, as it appears in the mev file. The trailing newline character, \n, if present, is omitted. */ public String getElementByField(String fieldName, String value) { Vector targetColumn = getColumnNamed(fieldName); for (int i = 0; i < targetColumn.size(); i++) { if (((String) targetColumn.elementAt(i)).equals(value)) { return getElementAtIndex(i); } } return null; } /** Returns a vector that contains line index of the data. Using this vactor, one row of data can be retrieved by calling getLineAt() @return The Vector containing line indices. */ public Vector getDataLinesMap() { return dataLinesMap; } /** Returns a two-dimensional String array containing every value for each column header for every spot in the mev file. The first dimension of the array iterates over the columns, while the second dimension iterates over the spots. All comments lines will be ignored. @return The String[][] containing all spot data */ public String[][] getDataMatrix() { return getDataMatrix(false); } /** Returns a two-dimensional String array containing every value for each column header for every spot in the mev file. The first dimension of the array iterates over the columns, while the second dimension iterates over the spots. Optionally, the first element in the first dimension of the array can be an array of all column headers. All comment lines will be ignored. @param withHeaders If true, headers are included in the returned array @return The String[][] containing all spot data */ public String[][] getDataMatrix(boolean withHeaders) { Vector columnHeaders = getColumnHeaders(); int hc = withHeaders ? 1 : 0; String[][] matrix = new String[dataLinesMap.size() + hc][columnHeaders.size()]; if (withHeaders) { for (int i = 0; i < columnHeaders.size(); i++) { matrix[0][i] = (String) columnHeaders.elementAt(i); } } for (int i = hc; i < matrix.length; i++) { String currentLine = getElementAtIndex(i - hc); StringSplitter st = new StringSplitter(currentLine, "\t"); for (int j = 0; j < matrix[i].length; j++) { matrix[i][j] = st.nextToken(); } } return matrix; } /** Returns a vector containing all spots in the mev file. Each spot contains all values for each column. All comments lines will be ignored. @return The Vector containing all spot data */ public Vector getDataVector() { return getDataVector(false); } /** Returns a vector containing all spots in the mev file. Each spot contains all values for each column. Optionally, the first element in the vector can be a vector of all column headers. All comment lines will be ignored. @param withHeaders If true, headers are included in the returned array @return The Vector containing all spot data */ public Vector getDataVector(boolean withHeaders) { Vector aLine; Vector columnHeaders = getColumnHeaders(); int hc = withHeaders ? 1 : 0; Vector vec = new Vector(dataLinesMap.size() + hc); if (withHeaders) { aLine = new Vector(columnHeaders.size()); for (int i = 0; i < columnHeaders.size(); i++) { aLine.add((String) columnHeaders.elementAt(i)); } vec.add(aLine); } for (int i = hc; i < vec.capacity(); i++) { String currentLine = getElementAtIndex(i - hc); StringTokenizer st = new StringTokenizer(currentLine, "\t"); aLine = new Vector(columnHeaders.size()); while(st.hasMoreTokens()){ aLine.add(st.nextToken()); } vec.add(aLine); } return vec; } private class IntVector extends Vector { public void add(int element) { super.add(new Integer(element)); } public int intElementAt(int index) { return ((Integer) super.elementAt(index)).intValue(); } } }