CustomSeparator.java example

Explorer
LEADT-master
// $Id: CustomSeparator.java 41 2010-04-03 20:04:12Z marcusvnac $
// Copyright (c) 1996-2006 The Regents of the University of California. All
// Rights Reserved. Permission to use, copy, modify, and distribute this
// software and its documentation without fee, and without a written
// agreement is hereby granted, provided that the above copyright notice
// and this paragraph appear in all copies.  This software program and
// documentation are copyrighted by The Regents of the University of
// California. The software program and documentation are supplied "AS
// IS", without any accompanying services from The Regents. The Regents
// does not warrant that the operation of the program will be
// uninterrupted or error-free. The end-user understands that the program
// was developed for research purposes and is advised not to rely
// exclusively on the program for any reason.  IN NO EVENT SHALL THE
// UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT,
// SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS,
// ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF
// THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE. THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY
// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE
// PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE UNIVERSITY OF
// CALIFORNIA HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT,
// UPDATES, ENHANCEMENTS, OR MODIFICATIONS.

package org.argouml.util;

/**
 * Base class for custom separators.
 *
 * <p>It can be instantiated directly, and then works like a separator in the
 * delimiter string. For that purpose you should use the delimiter string
 * in MyTokenizer, unless your token is wider than 32 characters.
 *
 * <p>You can also subclass this class to provide for more intricate recogition
 * of the tokens. It is known that this class has been subclassed to recognize
 * quoted strings, and balanced parentheses.
 *
 * <p>You should have this mental image of the tokenizing process:<nl>
 * <li>Reset is called.
 * <li>For each character, c, in the sequence being tokenized:<ul>
 *     <li>addChar(c) is called for each separator in the tokenizer.
 *     <li>if addChar returns true, break.</ul>
 * <li>hasFreePart is checked to see if something follows. If true:<ul>
 *     <li>endChar(c) is called for each following character.
 *     <li>if/when endChar returns true, break.</ul>
 * <li>tokenLength is checked to see how far back in the sequence the token
 *     begun. If there are characters before that but after the last token,
 *     then they are made a token and this token is saved and returned next.
 * </nl>
 *
 * @author Michael Stockman
 * @since 0.11.2
 * @see MyTokenizer
 */
public class CustomSeparator {
    private char pattern[];
    private char match[];

    /**
     * This constructor is only availible to subclasses of this class.
     * If you use it you should also override {@link #addChar addChar}
     * to recognize when your separator should that control. If you don't,
     * then you may block all other separators.
     */
    protected CustomSeparator() {
	pattern = new char[0];
	match = pattern;
    }

    /**
     * This constructor creates a new custom separator that matches the
     * character start. Unless you override {@link #addChar addChar}, the
     * default behaviour is to return false in addChar until start is
     * encountered and then hasFreePart returns false.
     *
     * @param start The start character.
     */
    public CustomSeparator(char start) {
	pattern = new char[1];
	pattern[0] = start;
	match = new char[pattern.length];
    }

    /**
     * This constructor creates a new custom separator that matches the
     * string start. Unless you override {@link #addChar addChar}, the
     * default behaviour is to return false in addChar until start is
     * encountered and then hasFreePart returns false.
     *
     * @param start The start String.
     */
    public CustomSeparator(String start) {
	pattern = start.toCharArray();
	match = new char[pattern.length];
    }

    /**
     * Called to reset the separator before staring to look for a new
     * token.
     */
    public void reset() {
	int i;
	for (i = 0; i < match.length; i++)
	    match[i] = 0;
    }

    /**
     * Returns the length of the matched token. It is not required to be
     * meaningful unless addChar has returned true and hasFreePart
     * returned false or endChar returned true.
     *
     * @return The length of the matched token.
     */
    public int tokenLength() {
	return pattern.length;
    }

    /**
     * Called to allow you to decide if you want to capure control of
     * the matching process. If you return true, then
     * {@link #hasFreePart hasFreePart} will be checked to see if you
     * expect more things to follow.
     *
     * <p>The default behaviour is to return false until the character
     * or String given as parameter to the constructor has been matched.
     *
     * @param c The next character in the sequence being tokenized.
     * @return true to gain control of the matching, false to continue
     * matching.
     */
    public boolean addChar(char c) {
	int i;
	for (i = 0; i < match.length - 1; i++)
	    match[i] = match[i + 1];
	match[match.length - 1] = c;
	for (i = 0; i < match.length; i++)
	    if (match[i] != pattern[i])
		return false;
	return true;
    }

    /**
     * Called to check if more characters are expected to follow after
     * addChar has returned true. If true, then any following characters
     * will be fed to endChar until endChar returns true.
     *
     * <p>The default behaviour is to return false.
     *
     * @return true to continue feeding characters to endChar or false.
     */
    public boolean hasFreePart() {
	return false;
    }

    /**
     * Called to check if more characters are expected in the free part of
     * the token.
     *
     * @param c The next character in the sequence being tokenized.
     * @return true to indicate that the token is complete, or false to
     * continue feeding characters through endChar.
     */
    public boolean endChar(char c) {
	return true;
    }

    /**
     * Called to how many characters the CustomSeparator read after
     * the end of the separator. This allows them to see beyond the
     * end, but these characters will be fed to the separators again
     * when looking for the next token so be careful.
     *
     * @return the number of characters that were read after the end
     * of the token had been read.
     */
    public int getPeekCount() {
	return 0;
    }
}