/* Generated By:JavaCC: Do not edit this line. CSVFile.java */ package org.biolegato.tables; /* * CSVFile.java * * Created on January 30, 2008, 11:58 AM * * To change this template, choose Tools | Template Manager * and open the template in the editor. */ import java.io.BufferedReader; import javax.swing.filechooser.FileFilter; import java.io.File; import java.io.IOException; import java.io.Reader; import java.util.List; import java.util.ArrayList; import org.biolegato.main.DataCanvas; /** *

CSV (Comma Separated Values) file format parser. A CSV file is a * spreadsheet, where the rows are separated by new line characters (i.e. each * line of the file is a row), and the columns are separated by comma * characters. Thus an example of a CSV file would be:

 *       Name,Phone Num,Birthday
 *       John,555-5123,Apr. 30, 1937
 *       Jane,555-4352,Feb. 13, 1976
 *       Sam,555-6333,Jan. 7, 1961
 *       Tom,555-7777,Aug. 4, 1943

Where name, phone number, and birthday are all separated by comma * characters and are each columns of a spreadsheet.

* *

NOTE: fields may be double or single quoted. In such cases, the single * or double quotes character may be escaped by doubling it (e.g. "AB""C" * will be read as AB"C).

** * @author Graham Alvare * @author Brian Fristensky */ public class CSVFile extends FileFilter implements CSVFileConstants { /** * An empty 1D string array for calling the List.toArray method. */ public static final String[] EST = new String[0]; /** * An empty 2D string array for calling the List.toArray method. */ public static final String[][] DBL_EST = new String[0][]; /** * Creates a new instance of CSVFile */ public CSVFile () { } /** * Determines whether a file is a CSV file (based on extension). * Currently the only extension supported is ".csv" ** * @param file the file to test for CSV format compatibility. * @return true if the file is a CSV file (otherwise false). * @see javax.swing.filechooser.FileFilter#accept */ public boolean accept (File file) { return (file.isDirectory() || file.getAbsolutePath().toLowerCase().endsWith(".csv")); } /** * Returns a description of the file format that can be displayed to the * user. This allows the user to easily find and identify a file format * in the file type combo box (within file chooser objects). ** * @return the string description of the file format. * @see javax.swing.filechooser.FileFilter#getDescription */ public String getDescription () { return "CSV - Comma Separated Values - file (*.csv)"; } /** * Used to auto-detect BioLegato table canvas formats. This function will * return true if the format is suspected to be a CSV file. Otherwise, the * function will return false. ** * @param test the reader to attempt to detect the format of. * @return whether the format is a CSV file. */ public boolean isFormat (Reader test) { // The current character being parsed (for auto-detection). // If the check variable is a comma character ',' at the end of parsing, // then the file is suspected to be a CSV file. Note that we only // parse the file until the end of line or comma character is // encountered. Thus, our purpose is to see if there is at least one // column (comma) on the first line (indicating the format could likely // be a CSV file). int check = ' '; try { // Iterate through the stream until either a comma or end-line // character is encountered. Store each character in the 'check' // variable. This will later be tested. If the character is comma, // then there is at least one column on the first line of the file. while (check != '\u005cn' && check != '\u005cr' && check != ',') { // Mark the stream, so we can later reset it to the beginning. test.mark(2); // Get the next character in the stream (to test). check = test.read(); } // Reset the stream to the beginning for parsing. test.reset(); } catch (IOException e) { e.printStackTrace(System.err); } // Return true if there is at least one column on the first line of the // suspected CSV file. return (check == ','); } /** * Reads CSV data from a Reader object into a table canvas. ** * @param table the table canvas to read the CSV file into. * @param currentFile the Reader object to parsed the CSV data from. * @param overwrite whether to overwrite the currently selected content * in the table canvas with the data parsed by this * function. Note that this currently does not work, * and should be implemented in the future. * @throws java.io.IOException thrown if there is any exception while * reading from the Reader object. */ public static void readFile(TableCanvas table, Reader currentFile, boolean overwrite, boolean forceall) throws IOException { try { if (overwrite) { // We need to jot down the number of columns before we delete rows int numCols = table.tableModel.getColumnCount(); // If no data are selected, delete all data in the canvas boolean NothingSelected = (table.tablePane.getSelectedColumnCount() == 0 && table.tablePane.getSelectedRowCount() == 0); if (forceall || NothingSelected) { table.setSelectionMode(TableCanvas.SelectionMode.ROW); table.tablePane.selectAll(); table.deleteSelectedRows(); table.tablePane.clearSelection(); } // DOESN'T ELIMINATE THE ERROR, EVEN IF WE RUN addRow(dummy) TWICE // An error would be generated if we had an empty canvas, so add a dummy // first row and clear the selection. /**if (forceall || NothingSelected) { String[] dummy = new String[numCols]; for (int i = 0; i < numCols; i++ ) { dummy[i] = ""; }; table.tableModel.addRow(dummy); } */ } // Create a new parser object to read the CSV file. CSVFile parser = new CSVFile(currentFile); // Parse the CSV data from the Reader object. parser.parseCSV(table); } catch (ParseException pe) { pe.printStackTrace(System.err); } } /** * Reads CSV data from a Reader object into a 2D string array. Please note * that the 2D string array is specified as follows: the rows are the first * index of the array (e.g. data[0] is the first row of the array), and the * columns are the second index of the array (e.g. data[0][1] is the second * column of the first row of the array. ** * @param currentFile the Reader object to parsed the CSV data from. * @throws java.io.IOException thrown if there is any exception while * reading from the Reader object. */ public static String[][] readCSV(Reader currentFile) throws IOException { // Stores the 2D string array parsed from the file. // By default, this variable contains an empty 2D string array. String[][] result = DBL_EST; try { // Create a new parser object to read the CSV file. CSVFile parser = new CSVFile(currentFile); // Parse the CSV data from the Reader object. result = parser.parseDoubleArray(); } catch (ParseException pe) { pe.printStackTrace(System.err); } return result; } /** * Writes a 2D string array as CSV data into an Appendable object. Please * note that the rows are the first index of the array (e.g. data[0] is the * first row of the array), and the columns are the second index of the * array (e.g. data[0][1] is the second column of the first row of the * array. ** * @param writer the Appendable object to write the CSV data to. * @param data the 2D string array to parse into CSV data. * @throws java.io.IOException thrown if there is any exception while * writing to the Appendable object. */ public static void writeFile(Appendable writer, String[][] data) throws IOException { // The current table cell being written. String cell; // Translate each row in the 2D array and write it to the file. // The original version enclosed each cell in double quotes (") // The motivation was to accommodate MS-Excell, but Excell no longer // requires quotes. Lines to begin and end a cell have been commented out. for (int y = 0; y < data.length; y++) { // Translate each column in the row array and write it to the file. // Start by writing the first column. if (data[y].length > 0) { //writer.append('\"'); // Double all of the double-quotation marks. This is to // distinguish double-quotation marks in each field from // double-quotation marks surrounding the fields. writer.append(data[y][0].replaceAll("\u005c"", "\u005c"\u005c"")); //writer.append('\"'); } // Write each additional column, appending the delimiter character // before it (i.e. the comma character) -- e.g. ",ABC" for (int x = 1; x < data[y].length; x++) { cell = data[y][x]; writer.append(','); //writer.append('\"'); // If the cell is not null, double all of the double-quotation // marks. This is to distinguish double-quotation marks in each // field from double-quotation marks surrounding the fields. if (cell != null) { writer.append(cell.replaceAll("\u005c"", "\u005c"\u005c"")); } //writer.append('\"'); } writer.append('\u005cn'); } } /** * Parse CSV data into a table canvas. ** * @param table the table canvas to parse the data into. * @see org.biolegato.tables.TableCanvas#namedHeaders */ final public void parseCSV(TableCanvas table) throws ParseException { /* Stores the spreadsheet row most recently parsed from the data. */ String[] row; /* Read the first row of the CSV data. */ row = parseRow(); /* If named column headers is enabled, then read the first row as column * headers for the table. */ if (table.namedHeaders) { table.addColumns(row); } else { table.addRow(row); } label_1: while (true) { switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { case RDELIM: ; break; default: jj_la1[0] = jj_gen; break label_1; } jj_consume_token(RDELIM); row = parseRow(); table.addRow(row); } jj_consume_token(0); } /** * Parse CSV data into a table canvas. Please note that the 2D string array is * specified as follows: the rows are the first index of the array (e.g. data[0] * is the first row of the array), and the columns are the second index of the * array (e.g. data[0][1] is the second column of the first row of the array. ** * @return the 2D string array generated from the parse. */ final public String[][] parseDoubleArray() throws ParseException { /* Stores the spreadsheet row most recently parsed from the data. */ String[] row; /* An array list of all of the rows parsed in the file. This will later * be converted into a 2D string array via. the List.toString method */ List rows = new ArrayList(); /* Read the first row of the CSV data. */ row = parseRow(); rows.add(row); label_2: while (true) { switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { case RDELIM: ; break; default: jj_la1[1] = jj_gen; break label_2; } jj_consume_token(RDELIM); row = parseRow(); rows.add(row); } jj_consume_token(0); {if (true) return rows.toArray(DBL_EST);} throw new Error("Missing return statement in function"); } /** * Parses a row of the CSV data. ** * @return the array of columns contained in the row. */ final public String[] parseRow() throws ParseException { /* Stores the spreadsheet column most recently parsed from the data. */ String col; /* Stores all of the columns parsed from the current row. This will later * be converted into a string array via. the List.toString method */ List columns = new ArrayList(); /* Read the first column in the row. */ col = parseColumn(); columns.add(col); label_3: while (true) { switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { case CDELIM: ; break; default: jj_la1[2] = jj_gen; break label_3; } jj_consume_token(CDELIM); col = parseColumn(); columns.add(col); } {if (true) return columns.toArray(EST);} throw new Error("Missing return statement in function"); } /** * Parses a column of the CSV data. ** * @return a single column, represented as a string object. */ final public String parseColumn() throws ParseException { /* The most recently read in column token. The string image of this token * (which is the actual literal string read from the tokenizer) will be * parsed into the String object returned by this function/method. */ Token t; /* The string representaton of the column read by the tokenizer/parser. */ String result = ""; switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { case SQSTART: case DQSTART: case CHAR: switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { case CHAR: /* Handles non-quoted columns/cells. */ t = jj_consume_token(CHAR); result = t.image; switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { case CHARS: t = jj_consume_token(CHARS); result += t.image; break; default: jj_la1[3] = jj_gen; ; } break; case SQSTART: jj_consume_token(SQSTART); t = jj_consume_token(SCHAR); result = t.image; jj_consume_token(SQEND); break; case DQSTART: jj_consume_token(DQSTART); t = jj_consume_token(DCHAR); result = t.image; jj_consume_token(DQEND); break; default: jj_la1[4] = jj_gen; jj_consume_token(-1); throw new ParseException(); } break; default: jj_la1[5] = jj_gen; ; } {if (true) return result;} throw new Error("Missing return statement in function"); } /** Generated Token Manager. */ public CSVFileTokenManager token_source; SimpleCharStream jj_input_stream; /** Current token. */ public Token token; /** Next token. */ public Token jj_nt; private int jj_ntk; private int jj_gen; final private int[] jj_la1 = new int[6]; static private int[] jj_la1_0; static { jj_la1_init_0(); } private static void jj_la1_init_0() { jj_la1_0 = new int[] {0x4,0x4,0x2,0x40,0x38,0x38,}; } /** Constructor with InputStream. */ public CSVFile(java.io.InputStream stream) { this(stream, null); } /** Constructor with InputStream and supplied encoding */ public CSVFile(java.io.InputStream stream, String encoding) { try { jj_input_stream = new SimpleCharStream(stream, encoding, 1, 1); } catch(java.io.UnsupportedEncodingException e) { throw new RuntimeException(e); } token_source = new CSVFileTokenManager(jj_input_stream); token = new Token(); jj_ntk = -1; jj_gen = 0; for (int i = 0; i < 6; i++) jj_la1[i] = -1; } /** Reinitialise. */ public void ReInit(java.io.InputStream stream) { ReInit(stream, null); } /** Reinitialise. */ public void ReInit(java.io.InputStream stream, String encoding) { try { jj_input_stream.ReInit(stream, encoding, 1, 1); } catch(java.io.UnsupportedEncodingException e) { throw new RuntimeException(e); } token_source.ReInit(jj_input_stream); token = new Token(); jj_ntk = -1; jj_gen = 0; for (int i = 0; i < 6; i++) jj_la1[i] = -1; } /** Constructor. */ public CSVFile(java.io.Reader stream) { jj_input_stream = new SimpleCharStream(stream, 1, 1); token_source = new CSVFileTokenManager(jj_input_stream); token = new Token(); jj_ntk = -1; jj_gen = 0; for (int i = 0; i < 6; i++) jj_la1[i] = -1; } /** Reinitialise. */ public void ReInit(java.io.Reader stream) { jj_input_stream.ReInit(stream, 1, 1); token_source.ReInit(jj_input_stream); token = new Token(); jj_ntk = -1; jj_gen = 0; for (int i = 0; i < 6; i++) jj_la1[i] = -1; } /** Constructor with generated Token Manager. */ public CSVFile(CSVFileTokenManager tm) { token_source = tm; token = new Token(); jj_ntk = -1; jj_gen = 0; for (int i = 0; i < 6; i++) jj_la1[i] = -1; } /** Reinitialise. */ public void ReInit(CSVFileTokenManager tm) { token_source = tm; token = new Token(); jj_ntk = -1; jj_gen = 0; for (int i = 0; i < 6; i++) jj_la1[i] = -1; } private Token jj_consume_token(int kind) throws ParseException { Token oldToken; if ((oldToken = token).next != null) token = token.next; else token = token.next = token_source.getNextToken(); jj_ntk = -1; if (token.kind == kind) { jj_gen++; return token; } token = oldToken; jj_kind = kind; throw generateParseException(); } /** Get the next Token. */ final public Token getNextToken() { if (token.next != null) token = token.next; else token = token.next = token_source.getNextToken(); jj_ntk = -1; jj_gen++; return token; } /** Get the specific Token. */ final public Token getToken(int index) { Token t = token; for (int i = 0; i < index; i++) { if (t.next != null) t = t.next; else t = t.next = token_source.getNextToken(); } return t; } private int jj_ntk() { if ((jj_nt=token.next) == null) return (jj_ntk = (token.next=token_source.getNextToken()).kind); else return (jj_ntk = jj_nt.kind); } private java.util.List jj_expentries = new java.util.ArrayList(); private int[] jj_expentry; private int jj_kind = -1; /** Generate ParseException. */ public ParseException generateParseException() { jj_expentries.clear(); boolean[] la1tokens = new boolean[11]; if (jj_kind >= 0) { la1tokens[jj_kind] = true; jj_kind = -1; } for (int i = 0; i < 6; i++) { if (jj_la1[i] == jj_gen) { for (int j = 0; j < 32; j++) { if ((jj_la1_0[i] & (1<