/*
* Parser.java February 2001
*
* Copyright (C) 2001, Niall Gallagher <niallg@users.sf.net>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
package org.simpleframework.util.parse;
/**
* This <code>Parser</code> object is to be used as a simple template for
* parsing uncomplicated expressions. This object is used to parse a
* <code>String</code>. This provides a few methods that can be used to store
* and track the reading of data from a buffer. There are two abstract methods
* provided to allow this to be subclassed to create a <code>Parser</code> for a
* given <code>String</code>.
*
* @author Niall Gallagher
*/
public abstract class Parser {
/**
* This is the buffer that is being parsed.
*/
protected char[] buf;
/**
* This represents the current read offset.
*/
protected int off;
/**
* This represents the length of the buffer.
*/
protected int count;
/**
* This is a no argument constructor for the <code>Parser</code>. This will
* be invoked by each subclass of this object. It will set the buffer to a
* zero length buffer so that when the <code>ensureCapacity</code> method is
* used the buf's length can be checked.
*/
protected Parser() {
this.buf = new char[0];
}
/**
* This is used to parse the <code>String</code> given to it. This will
* ensure that the <code>char</code> buffer has enough space to contain the
* characters from the <code>String</code>. This will firstly ensure that
* the buffer is resized if nessecary. The second step in this
* <code>parse</code> method is to initialize the <code>Parser</code> object
* so that multiple parse invocations can be made. The <code>init</code>
* method will reset this to an prepared state. Then finally the
* <code>parse</code> method is called to parse the <code>char</code>
* buffer.
*
* @param text
* the <code>String</code> to be parsed with this
* <code>Parser</code>
*/
public void parse(String text) {
if (text != null) {
this.ensureCapacity(text.length());
this.count = text.length();
text.getChars(0, this.count, this.buf, 0);
this.init();
this.parse();
}
}
/**
* This ensure that there is enough space in the buffer to allow for more
* <code>char</code>'s to be added. If the buffer is already larger than min
* then the buffer will not be expanded at all.
*
* @param min
* the minimum size needed to accommodate the characters
*/
protected void ensureCapacity(int min) {
if (this.buf.length < min) {
int size = this.buf.length * 2;
int max = Math.max(min, size);
char[] temp = new char[max];
this.buf = temp;
}
}
/**
* This is used to determine if a given ISO-8859-1 character is a space
* character. That is a whitespace character this sees the, space, carriage
* return and line feed characters as whitespace characters.
*
* @param c
* the character that is being determined by this
*
* @return true if the character given it is a space character
*/
protected boolean space(char c) {
switch (c) {
case ' ':
case '\t':
case '\n':
case '\r':
return true;
default:
return false;
}
}
/**
* This is used to determine weather or not a given character is a digit
* character. It assumes iso-8859-1 encoding to compare.
*
* @param c
* the character being determined by this method
*
* @return true if the character given is a digit character
*/
protected boolean digit(char c) {
return (c <= '9') && ('0' <= c);
}
/**
* This takes a unicode character and assumes an encoding of ISO-8859-1.
* This then checks to see if the given character is uppercase if it is it
* converts it into is ISO-8859-1 lowercase char.
*
* @param c
* the <code>char</code> to be converted to lowercase
*
* @return the lowercase ISO-8859-1 of the given character
*/
protected char toLower(char c) {
if ((c >= 'A') && (c <= 'Z')) return (char) ((c - 'A') + 'a');
return c;
}
/**
* This is used to skip an arbitrary <code>String</code> within the
* <code>char</code> buf. It checks the length of the <code>String</code>
* first to ensure that it will not go out of bounds. A comparison is then
* made with the buffers contents and the <code>String</code> if the reigon
* in the buffer matched the <code>String</code> then the offset within the
* buffer is increased by the <code>String</code>'s length so that it has
* effectively skipped it.
*
* @param text
* this is the <code>String</code> value to be skipped
*
* @return true if the <code>String</code> was skipped
*/
protected boolean skip(String text) {
int size = text.length();
int read = 0;
if ((this.off + size) > this.count) return false;
while (read < size) {
char a = text.charAt(read);
char b = this.buf[this.off + read++];
if (this.toLower(a) != this.toLower(b)) return false;
}
this.off += size;
return true;
}
/**
* This will initialize the <code>Parser</code> when it is ready to parse a
* new <code>String</code>. This will reset the <code>Parser</code> to a
* ready state. The <code>init</code> method is invoked by the
* <code>Parser</code> when the <code>parse</code> method is invoked.
*/
protected abstract void init();
/**
* This is the method that should be implemented to read the buf. This
* method should attempt to extract tokens from the buffer so that thes
* tokens may some how be used to determine the semantics. This method is
* invoked after the <code>init</code> method is invoked.
*/
protected abstract void parse();
}