options { /* JAVA_UNICODE_ESCAPE=true; UNICODE_INPUT=true;*/ IGNORE_CASE=true; KEEP_LINE_COLUMN=true; STATIC=false; } PARSER_BEGIN(TSVFile) package org.biolegato.tables; /* * TSVFile.java * * Created on January 30, 2008, 11:58 AM * * To change this template, choose Tools | Template Manager * and open the template in the editor. */ import java.io.BufferedReader; import javax.swing.filechooser.FileFilter; import java.io.File; import java.io.IOException; import java.io.Reader; import java.util.List; import java.util.ArrayList; import org.biolegato.main.DataCanvas; /** *

TSV (Tab Separated Values) file format parser. A TSV file is a * spreadsheet, where the rows are separated by new line characters (i.e. each * line of the file is a row), and the columns are separated by tab characters. * Thus an example of a TSV file would be:

 *       Name	Phone Num	Birthday
 *       John	555-5123	Apr. 30, 1937
 *       Jane	555-4352	Feb. 13, 1976
 *       Sam	555-6333	Jan. 7, 1961
 *       Tom	555-7777	Aug. 4, 1943

Where name, phone number, and birthday are all separated by tab characters * and are each columns of a spreadsheet.

* *

NOTE: fields may be double or single quoted. In such cases, the single * or double quotes character may be escaped by doubling it (e.g. "AB""C" * will be read as AB"C).

** * @author Graham Alvare * @author Brian Fristensky */ public class TSVFile { /** * An empty 1D string array for calling the List.toArray method. */ public static final String[] EST = new String[0]; /** * An empty 2D string array for calling the List.toArray method. */ public static final String[][] DBL_EST = new String[0][]; /** * TSV file format filter (for file chooser objects). This file filter will * determine which files in a file chooser are TSV (tab separated values) * files, based on their file extension. */ public static final FileFilter TSV_FILTER = new FileFilter () { /** * Determines whether a file is a TSV file (based on extension). * Currently the only extensions supported are ".csv", ".tab" and ".tsv" ** * @param file the file to test for TSV format compatibility. * @return true if the file is a TSV file (otherwise false). * @see javax.swing.filechooser.FileFilter#accept */ public boolean accept (File file) { return (file.isDirectory() || file.getAbsolutePath().toLowerCase().endsWith(".csv") || file.getAbsolutePath().toLowerCase().endsWith(".tab") || file.getAbsolutePath().toLowerCase().endsWith(".tsv")); } /** * Returns a description of the file format that can be displayed to the * user. This allows the user to easily find and identify a file format * in the file type combo box (within file chooser objects). ** * @return the string description of the file format. * @see javax.swing.filechooser.FileFilter#getDescription */ public String getDescription () { return "TSV - Tab Separated Values file (*.csv,*.tab,*.tsv)"; } }; /** * Used to auto-detect BioLegato table canvas formats. This function will * return true if the format is suspected to be a TSV file. Otherwise, the * function will return false. ** * @param test the reader to attempt to detect the format of. * @return whether the format is a TSV file. */ public boolean isFormat (Reader test) { // The current character being parsed (for auto-detection). // If the check variable is a tab character '\t' at the end of parsing, // then the file is suspected to be a TSV file. Note that we only // parse the file until the end of line or tab character is encountered. // Thus, our purpose is to see if there is at least one column (tab) on // the first line (indicating the format could likely be a TSV file). int check = ' '; try { // Iterate through the stream until either a tab or end-line // character is encountered. Store each character in the 'check' // variable. This will later be tested. If the character is tab, // then there is at least one column on the first line of the file. while (check != '\n' && check != '\r' && check != '\t') { // Mark the stream, so we can later reset it to the beginning. test.mark(2); // Get the next character in the stream (to test). check = test.read(); } // Reset the stream to the beginning for parsing. test.reset(); } catch (IOException e) { e.printStackTrace(System.err); } // Return true if there is at least one column on the first line of the // suspected TSV file. return (check == '\t'); } /** * Reads TSV data from a Reader object into a table canvas. ** * @param table the table canvas to read the TSV file into. * @param currentFile the Reader object to parsed the TSV data from. * @param overwrite whether to overwrite the currently selected content * in the table canvas with the data parsed by this * function. * @throws java.io.IOException thrown if there is any exception while * reading from the Reader object. */ public static void readFile(TableCanvas table, Reader currentFile, boolean overwrite, boolean forceall) throws IOException { try { if (overwrite) { // We need to jot down the number of columns before we delete rows int numCols = table.tableModel.getColumnCount(); // If no data are selected, delete all data in the canvas boolean NothingSelected = (table.tablePane.getSelectedColumnCount() == 0 && table.tablePane.getSelectedRowCount() == 0); if (forceall || NothingSelected) { table.setSelectionMode(TableCanvas.SelectionMode.ROW); table.tablePane.selectAll(); table.deleteSelectedRows(); table.tablePane.clearSelection(); } // DOESN'T ELIMINATE THE ERROR, EVEN IF WE RUN addRow(dummy) TWICE // An error would be generated if we had an empty canvas, so add a dummy // first row and clear the selection. /**if (forceall || NothingSelected) { String[] dummy = new String[numCols]; for (int i = 0; i < numCols; i++ ) { dummy[i] = ""; }; table.tableModel.addRow(dummy); } */ } // Create a new parser object to read the TSV file. TSVFile parser = new TSVFile(currentFile); //System.out.println("Checkpoint 1"); //System.out.flush(); // Parse the TSV data from the Reader object. parser.parseTSV(table); //System.out.println("Checkpoint 3"); } catch (ParseException pe) { pe.printStackTrace(System.err); } } /** * Reads TSV data from a Reader object into a 2D string array. Please note * that the 2D string array is specified as follows: the rows are the first * index of the array (e.g. data[0] is the first row of the array), and the * columns are the second index of the array (e.g. data[0][1] is the second * column of the first row of the array. ** * @param currentFile the Reader object to parsed the TSV data from. * @throws java.io.IOException thrown if there is any exception while * reading from the Reader object. */ public static String[][] readTSV(Reader currentFile) throws IOException { // Stores the 2D string array parsed from the file. // By default, this variable contains an empty 2D string array. String[][] result = DBL_EST; try { // Create a new parser object to read the TSV file. TSVFile parser = new TSVFile(currentFile); // Parse the TSV data from the Reader object. result = parser.parseDoubleArray(); } catch (ParseException pe) { pe.printStackTrace(System.err); } return result; } /** * Writes a 2D string array as TSV data into an Appendable object. Please * note that the rows are the first index of the array (e.g. data[0] is the * first row of the array), and the columns are the second index of the * array (e.g. data[0][1] is the second column of the first row of the * array. ** * @param writer the Appendable object to write the TSV data to. * @param data the 2D string array to parse into TSV data. * @throws java.io.IOException thrown if there is any exception while * writing to the Appendable object. */ public static void writeFile(Appendable writer, String[][] data) throws IOException { // The current table cell being written. String cell; // Translate each row in the 2D array and write it to the file. // The original version enclosed each cell in double quotes (") // The motivation was to accommodate MS-Excell, but Excell no longer // requires quotes. Lines to begin and end a cell have been commented out. for (int y = 0; y < data.length; y++) { // Translate each column in the row array and write it to the file. // Start by writing the first column. if (data[y].length > 0) { //writer.append('\"'); // Double all of the double-quotation marks. This is to // distinguish double-quotation marks in each field from // double-quotation marks surrounding the fields. writer.append(data[y][0].replaceAll("\"", "\"\"")); //writer.append('\"'); } // Write each additional column, appending the delimiter character // before it (i.e. the tab character) -- e.g. "\tABC" for (int x = 1; x < data[y].length; x++) { cell = data[y][x]; writer.append('\t'); //writer.append('\"'); // If the cell is not null, double all of the double-quotation // marks. This is to distinguish double-quotation marks in each // field from double-quotation marks surrounding the fields. if (cell != null) { writer.append(data[y][x].replaceAll("\"", "\"\"")); } //writer.append('\"'); } writer.append('\n'); } } } PARSER_END(TSVFile) /** * Parse TSV data into a table canvas. ** * @param table the table canvas to parse the data into. * @see org.biolegato.tables.TableCanvas#namedHeaders */ void parseTSV(TableCanvas table) : { /* Stores the spreadsheet row most recently parsed from the data. */ String[] row; } { /* Read the first row of the TSV data. */ row = parseRow() { //System.out.println("parseTSV:row length: " + row.length ); /* If named column headers is enabled, then read the first row as column * headers for the table. */ if (table.namedHeaders) { //System.out.println("Checkpoint 2.1a"); table.addColumns(row); } else { //System.out.println("Checkpoint 2.1b"); //System.out.flush(); table.addRow(row); } //System.out.println("Checkpoint 2.2"); } /* Keep reading additional columns in the file as long as there are * instances of the token (row delimiter -- new line character) */ ( row = parseRow() { table.addRow(row); } )* } /** * Parse TSV data into a table canvas. Please note that the 2D string array is * specified as follows: the rows are the first index of the array (e.g. data[0] * is the first row of the array), and the columns are the second index of the * array (e.g. data[0][1] is the second column of the first row of the array. ** * @param table the table canvas to parse the data into. */ String[][] parseDoubleArray() : { /* Stores the spreadsheet row most recently parsed from the data. */ String[] row; /* An array list of all of the rows parsed in the file. This will later * be converted into a 2D string array via. the List.toString method */ List rows = new ArrayList(); } { /* Read the first row of the TSV data. */ row = parseRow() { rows.add(row); } /* Keep reading additional columns in the file as long as there are * instances of the token (row delimiter -- new line character) */ ( row = parseRow() { rows.add(row); } )* { return rows.toArray(DBL_EST); } } /** * Parses a row of the TSV data. ** * @return the array of columns contained in the row. */ String[] parseRow() : { /* Stores the spreadsheet column most recently parsed from the data. */ String col; /* Stores all of the columns parsed from the current row. This will later * be converted into a string array via. the List.toString method */ List columns = new ArrayList(); } { /* Read the first column in the row. */ col = parseColumn() { columns.add(col); } /* Keep reading additional columns in the file as long as there are * instances of the token (column delimiter -- tab) */ ( col = parseColumn() { columns.add(col); } )* { return columns.toArray(EST); } } /** * Parses a column of the TSV data. ** * @return a single column, represented as a string object. */ String parseColumn() : { /* The most recently read in column token. The string image of this token * (which is the actual literal string read from the tokenizer) will be * parsed into the String object returned by this function/method. */ Token t; /* The string representaton of the column read by the tokenizer/parser. */ String result = ""; } { ( /* Handles non-quoted columns/cells. */ t= { result = t.image; } [ t= { result += t.image; } ] /* Handles single-quoted columns/cells. */ | t= { result = t.image; } /* Handles double-quoted columns/cells. */ | t= { result = t.image; } )? { return result; } } /* THE ROW DELIMITER COLUMNS FOR TSV FILES */ TOKEN: { < CDELIM: "\t" > : DEFAULT | < RDELIM: "\r\n" | "\n" | "\r" > : DEFAULT } /* FIELD STARTING CHARACTERS (FOR STARTING QUOTED AND NON-QUOTED STRINGS) */ TOKEN: { < SQSTART: "'" > : SQUOTE | < DQSTART: "\"" > : DQUOTE | < CHAR: ( ~["'","\"","\t","\n"] ) > : ACCEPTSQ } /* HANDLES GROWING UNQUOTED FIELDS */ TOKEN: { < CHARS: ( ~["\t","\n"] )+ > } /* HANDLES GROWING SINGLE-QUOTED FIELDS */ TOKEN: { < SQEND: "'" > : DEFAULT | < SCHAR: ( ~["'"] | "''" )+ > } /* HANDLES GROWING DOUBLE-QUOTED FIELDS */ TOKEN: { < DQEND: "\"" > : DEFAULT | < DCHAR: ( ~["\""] | "\"\"" )+ > }