/* LocationLexer.java
 *
 * created: Tue Oct  6 1998
 *
 * This file is part of Artemis
 * 
 * Copyright (C) 1998,1999,2000  Genome Research Limited
 * 
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 *
 * $Header: //tmp/pathsoft/artemis/uk/ac/sanger/artemis/io/LocationLexer.java,v 1.1 2004-06-09 09:49:50 tjc Exp $
 */

package uk.ac.sanger.artemis.io;

/**
 *  The LocationLexer class provides methods for breaking a EMBL feature
 *  location string into tokens.  The complete list of possible tokens is
 *  given below.
 *
 *
 *  @author Kim Rutherford
 *  @version $Id: LocationLexer.java,v 1.1 2004-06-09 09:49:50 tjc Exp $
 *
 */

public class LocationLexer {
  /**
   *  Create a new LocationLexer object that can be used to tokenise
   *  location_string.
   */
  public LocationLexer (String location_string) {
    this.location_string = location_string;
  }

  /**
   *  Return a TokenEnumeration containing all the tokens in this string.
   */
  public TokenEnumeration getTokens () {
    return new TokenEnumeration (location_string);
  }

  /**
   *  This is a helper class for LocationLexer - see LocationLexer.getTokens ()
   *
   */
  public class TokenEnumeration {
    public TokenEnumeration (String location_string) {
      this.location_string = location_string;
    }

    /**
     *  Return the next token but don't remove it from the enumeration.
     *  @return The next token or null if there are no more.
     */
    public Object peekElement () {
      if (peeked_object == null) {
        peeked_object = removeNextToken ();
      }

      return peeked_object;
    }

    /**
     *  Return the next token and remove it from the enumeration
     */
    public Object nextElement () {
      Object o = removeNextToken ();
      return o;
    }

    /**
     *  Try to "eat" the first token in the enumeration.  If the next token is
     *  the same as the argument String then the next token will be removed
     *  from the enum and it will return true, otherwise the enum will not
     *  change and it wil return false.
     */
    public boolean eatToken (String token) {
      if (peekElement () instanceof String &&
          ((String)peekElement ()).equals (token)) {
        nextElement ();
        return true;
      } else {
        return false;
      }
    }

    /**
     *  Try to "eat" the first token in the enumeration.  If the next token is
     *  the same as the argument Character then the next token will be removed
     *  from the enum and it will return true, otherwise the enum will not
     *  change and it wil return false.
     */
    public boolean eatToken (final char token) {
      if (peekElement () instanceof Character &&
          ((Character)peekElement ()).charValue () == token) {
        nextElement ();
        return true;
      } else {
        return false;
      }
    }

    /**
     *  Return a String contains all the remaining tokens concatenated
     *  together.
     **/
    public String toString () {
      // the number we pick for the initial StringBuffer size is not critical,
      // but should cover most possibilities
      final StringBuffer spare_tokens_string = new StringBuffer (100);

      Object next_token = nextElement ();
      
      while (next_token != null) {
        spare_tokens_string.append (next_token.toString ());
        next_token = nextElement ();
      }

      return spare_tokens_string.toString ();
    }

    /**
     *  Return the next token and logically remove it from the start of the
     *  enumeration.  Returns null when there are no more tokens.
     *  removeNextToken () will return peeked_object (and set it to null)
     *  rather than removing a token iff peeked_object is not null.
     */
    private Object removeNextToken () {
      if (peeked_object == null) {

        // loop until we get to the end of location_string or until we return
        // a token.
        while (true) {
          if (next_char_index == location_string.length ()) {
            // all tokens have been read
            return null;
          }

          final char current_char = location_string.charAt (next_char_index);

          switch (current_char) {

          case ' ': case '\t':
            // go around the loop again
            next_char_index++;
            continue;

          case '(': case ')': case ',': case '^': case ':':
            // handle single character tokens (except ".")
            next_char_index++;
            return new Character (current_char);

          case '>':
            next_char_index++;
            if (next_char_index < location_string.length () &&
                Character.isDigit (location_string.charAt (next_char_index))) {
              return new UpperInteger (removeInteger ());
            } else {
              return new Character ('>');
            }

          case '<':
            next_char_index++;
            if (next_char_index < location_string.length () &&
              Character.isDigit (location_string.charAt (next_char_index))) {
              return new LowerInteger (removeInteger ());
            } else {
              return new Character ('<');
            }

          case '0': case '1': case '2': case '3': case '4':
          case '5': case '6': case '7': case '8': case '9':
            return removeInteger ();

          case '.':
            if (next_char_index + 1 == location_string.length ()) {
              // special case we have a "." at the end of the location string -
              // the parser will catch this are report the error
              next_char_index++;
              return new Character ('.');
            } else {
              if (location_string.charAt (next_char_index + 1) == '.') {
                next_char_index += 2;
                return "..";
              }
              else {
                next_char_index ++;
                return new Character ('.');
              }
            }

          default:
            {
              // everything else is a label (eg AF009694), functional name
              // (ie complement, join or order) or garbage
              final String label = removeLabel ();

              if (label.equals ("")) {
                // couldn't read a label so just return the current character
                // and let the parser sort it out

                next_char_index++;
                return new String ("" + current_char);
              } else {
                return label;
              }
            }
          }
        }
      } else {
        final Object tmp_object = peeked_object;
        peeked_object = null;
        return tmp_object;
      }
    }


    /**
     *  Reads an integer from the current position (next_char_index) in
     *  location_string and increments next_char_index to point to the next
     *  non digit in location_string.
     */
    private Integer removeInteger () {
      String integer_string = "";

      char current_char = location_string.charAt (next_char_index);

      while (Character.isDigit (current_char)) {
        integer_string += current_char;

        next_char_index++;

        if (next_char_index >= location_string.length ()) {
          break;
        }

        current_char = location_string.charAt (next_char_index);
      }

      return new Integer (integer_string);
    }


    /**
     *  Remove a string or letters, digits and colons from location_string and
     *  adjust next_char_index appropriately.  Returns an empty String if the
     *  next character is not alphanumeric.
     */
    private String removeLabel () {
      String return_string = "";

      char current_char = location_string.charAt (next_char_index);

      if (!Character.isLetter (current_char)) {
        // first character must be a letter
        return "";
      }

      while (Character.isLetterOrDigit (current_char) ||
             current_char == '.' || current_char == '_' ||
             current_char == '*' || current_char == '\'' ||
             current_char == '-') {
        return_string += current_char;

        next_char_index++;

        if (next_char_index >= location_string.length ()) {
          break;
        }

        current_char = location_string.charAt (next_char_index);
      }
      
      return return_string;
    }

    /**
     *  Contains string passed to the constructor.
     */
    private String location_string;


    /**
     *  A pointer into location_string indicating the next character we should
     *  read.
     */
    private int next_char_index = 0;

    /**
     *  If peekElement () has been called then the token we read from the
     *  remaining_string is stored here. (see removeNextToken ()).
     */
    private Object peeked_object = null;
  }


  /**
   *  This contains the String that was passed to the constructor
   */
  private String location_string;
}