/* Generated By:JavaCC: Do not edit this line. TSVFile.java */ package org.biolegato.tables; /* * TSVFile.java * * Created on January 30, 2008, 11:58 AM * * To change this template, choose Tools | Template Manager * and open the template in the editor. */ import java.io.BufferedReader; import javax.swing.filechooser.FileFilter; import java.io.File; import java.io.IOException; import java.io.Reader; import java.util.List; import java.util.ArrayList; import org.biolegato.main.DataCanvas; /** *
TSV (Tab Separated Values) file format parser. A TSV file is a * spreadsheet, where the rows are separated by new line characters (i.e. each * line of the file is a row), and the columns are separated by tab characters. * Thus an example of a TSV file would be:
** Name Phone Num Birthday * John 555-5123 Apr. 30, 1937 * Jane 555-4352 Feb. 13, 1976 * Sam 555-6333 Jan. 7, 1961 * Tom 555-7777 Aug. 4, 1943*
Where name, phone number, and birthday are all separated by tab characters * and are each columns of a spreadsheet.
* *NOTE: fields may be double or single quoted. In such cases, the single * or double quotes character may be escaped by doubling it (e.g. "AB""C" * will be read as AB"C).
** * @author Graham Alvare * @author Brian Fristensky */ public class TSVFile implements TSVFileConstants { /** * An empty 1D string array for calling the List.toArray method. */ public static final String[] EST = new String[0]; /** * An empty 2D string array for calling the List.toArray method. */ public static final String[][] DBL_EST = new String[0][]; /** * TSV file format filter (for file chooser objects). This file filter will * determine which files in a file chooser are TSV (tab separated values) * files, based on their file extension. */ public static final FileFilter TSV_FILTER = new FileFilter () { /** * Determines whether a file is a TSV file (based on extension). * Currently the only extensions supported are ".csv", ".tab" and ".tsv" ** * @param file the file to test for TSV format compatibility. * @return true if the file is a TSV file (otherwise false). * @see javax.swing.filechooser.FileFilter#accept */ public boolean accept (File file) { return (file.isDirectory() || file.getAbsolutePath().toLowerCase().endsWith(".csv") || file.getAbsolutePath().toLowerCase().endsWith(".tab") || file.getAbsolutePath().toLowerCase().endsWith(".tsv")); } /** * Returns a description of the file format that can be displayed to the * user. This allows the user to easily find and identify a file format * in the file type combo box (within file chooser objects). ** * @return the string description of the file format. * @see javax.swing.filechooser.FileFilter#getDescription */ public String getDescription () { return "TSV - Tab Separated Values file (*.csv,*.tab,*.tsv)"; } }; /** * Used to auto-detect BioLegato table canvas formats. This function will * return true if the format is suspected to be a TSV file. Otherwise, the * function will return false. ** * @param test the reader to attempt to detect the format of. * @return whether the format is a TSV file. */ public boolean isFormat (Reader test) { // The current character being parsed (for auto-detection). // If the check variable is a tab character '\t' at the end of parsing, // then the file is suspected to be a TSV file. Note that we only // parse the file until the end of line or tab character is encountered. // Thus, our purpose is to see if there is at least one column (tab) on // the first line (indicating the format could likely be a TSV file). int check = ' '; try { // Iterate through the stream until either a tab or end-line // character is encountered. Store each character in the 'check' // variable. This will later be tested. If the character is tab, // then there is at least one column on the first line of the file. while (check != '\u005cn' && check != '\u005cr' && check != '\u005ct') { // Mark the stream, so we can later reset it to the beginning. test.mark(2); // Get the next character in the stream (to test). check = test.read(); } // Reset the stream to the beginning for parsing. test.reset(); } catch (IOException e) { e.printStackTrace(System.err); } // Return true if there is at least one column on the first line of the // suspected TSV file. return (check == '\u005ct'); } /** * Reads TSV data from a Reader object into a table canvas. ** * @param table the table canvas to read the TSV file into. * @param currentFile the Reader object to parsed the TSV data from. * @param overwrite whether to overwrite the currently selected content * in the table canvas with the data parsed by this * function. * @throws java.io.IOException thrown if there is any exception while * reading from the Reader object. */ public static void readFile(TableCanvas table, Reader currentFile, boolean overwrite, boolean forceall) throws IOException { try { if (overwrite) { // We need to jot down the number of columns before we delete rows int numCols = table.tableModel.getColumnCount(); // If no data are selected, delete all data in the canvas boolean NothingSelected = (table.tablePane.getSelectedColumnCount() == 0 && table.tablePane.getSelectedRowCount() == 0); if (forceall || NothingSelected) { table.setSelectionMode(TableCanvas.SelectionMode.ROW); table.tablePane.selectAll(); table.deleteSelectedRows(); table.tablePane.clearSelection(); } // DOESN'T ELIMINATE THE ERROR, EVEN IF WE RUN addRow(dummy) TWICE // An error would be generated if we had an empty canvas, so add a dummy // first row and clear the selection. /**if (forceall || NothingSelected) { String[] dummy = new String[numCols]; for (int i = 0; i < numCols; i++ ) { dummy[i] = ""; }; table.tableModel.addRow(dummy); } */ } // Create a new parser object to read the TSV file. TSVFile parser = new TSVFile(currentFile); //System.out.println("Checkpoint 1"); //System.out.flush(); // Parse the TSV data from the Reader object. parser.parseTSV(table); //System.out.println("Checkpoint 3"); } catch (ParseException pe) { pe.printStackTrace(System.err); } } /** * Reads TSV data from a Reader object into a 2D string array. Please note * that the 2D string array is specified as follows: the rows are the first * index of the array (e.g. data[0] is the first row of the array), and the * columns are the second index of the array (e.g. data[0][1] is the second * column of the first row of the array. ** * @param currentFile the Reader object to parsed the TSV data from. * @throws java.io.IOException thrown if there is any exception while * reading from the Reader object. */ public static String[][] readTSV(Reader currentFile) throws IOException { // Stores the 2D string array parsed from the file. // By default, this variable contains an empty 2D string array. String[][] result = DBL_EST; try { // Create a new parser object to read the TSV file. TSVFile parser = new TSVFile(currentFile); // Parse the TSV data from the Reader object. result = parser.parseDoubleArray(); } catch (ParseException pe) { pe.printStackTrace(System.err); } return result; } /** * Writes a 2D string array as TSV data into an Appendable object. Please * note that the rows are the first index of the array (e.g. data[0] is the * first row of the array), and the columns are the second index of the * array (e.g. data[0][1] is the second column of the first row of the * array. ** * @param writer the Appendable object to write the TSV data to. * @param data the 2D string array to parse into TSV data. * @throws java.io.IOException thrown if there is any exception while * writing to the Appendable object. */ public static void writeFile(Appendable writer, String[][] data) throws IOException { // The current table cell being written. String cell; // Translate each row in the 2D array and write it to the file. // The original version enclosed each cell in double quotes (") // The motivation was to accommodate MS-Excell, but Excell no longer // requires quotes. Lines to begin and end a cell have been commented out. for (int y = 0; y < data.length; y++) { // Translate each column in the row array and write it to the file. // Start by writing the first column. if (data[y].length > 0) { //writer.append('\"'); // Double all of the double-quotation marks. This is to // distinguish double-quotation marks in each field from // double-quotation marks surrounding the fields. writer.append(data[y][0].replaceAll("\u005c"", "\u005c"\u005c"")); //writer.append('\"'); } // Write each additional column, appending the delimiter character // before it (i.e. the tab character) -- e.g. "\tABC" for (int x = 1; x < data[y].length; x++) { cell = data[y][x]; writer.append('\u005ct'); //writer.append('\"'); // If the cell is not null, double all of the double-quotation // marks. This is to distinguish double-quotation marks in each // field from double-quotation marks surrounding the fields. if (cell != null) { writer.append(data[y][x].replaceAll("\u005c"", "\u005c"\u005c"")); } //writer.append('\"'); } writer.append('\u005cn'); } } /** * Parse TSV data into a table canvas. ** * @param table the table canvas to parse the data into. * @see org.biolegato.tables.TableCanvas#namedHeaders */ final public void parseTSV(TableCanvas table) throws ParseException { /* Stores the spreadsheet row most recently parsed from the data. */ String[] row; /* Read the first row of the TSV data. */ row = parseRow(); //System.out.println("parseTSV:row length: " + row.length ); /* If named column headers is enabled, then read the first row as column * headers for the table. */ if (table.namedHeaders) { //System.out.println("Checkpoint 2.1a"); table.addColumns(row); } else { //System.out.println("Checkpoint 2.1b"); //System.out.flush(); table.addRow(row); } //System.out.println("Checkpoint 2.2"); label_1: while (true) { switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { case RDELIM: ; break; default: jj_la1[0] = jj_gen; break label_1; } jj_consume_token(RDELIM); row = parseRow(); table.addRow(row); } jj_consume_token(0); } /** * Parse TSV data into a table canvas. Please note that the 2D string array is * specified as follows: the rows are the first index of the array (e.g. data[0] * is the first row of the array), and the columns are the second index of the * array (e.g. data[0][1] is the second column of the first row of the array. ** * @param table the table canvas to parse the data into. */ final public String[][] parseDoubleArray() throws ParseException { /* Stores the spreadsheet row most recently parsed from the data. */ String[] row; /* An array list of all of the rows parsed in the file. This will later * be converted into a 2D string array via. the List.toString method */ List