// $Id: CustomSeparator.java 41 2010-04-03 20:04:12Z marcusvnac $
// Copyright (c) 1996-2006 The Regents of the University of California. All
// Rights Reserved. Permission to use, copy, modify, and distribute this
// software and its documentation without fee, and without a written
// agreement is hereby granted, provided that the above copyright notice
// and this paragraph appear in all copies. This software program and
// documentation are copyrighted by The Regents of the University of
// California. The software program and documentation are supplied "AS
// IS", without any accompanying services from The Regents. The Regents
// does not warrant that the operation of the program will be
// uninterrupted or error-free. The end-user understands that the program
// was developed for research purposes and is advised not to rely
// exclusively on the program for any reason. IN NO EVENT SHALL THE
// UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT,
// SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS,
// ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF
// THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE. THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY
// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE
// PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE UNIVERSITY OF
// CALIFORNIA HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT,
// UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
package org.argouml.util;
/**
* Base class for custom separators.
*
* <p>It can be instantiated directly, and then works like a separator in the
* delimiter string. For that purpose you should use the delimiter string
* in MyTokenizer, unless your token is wider than 32 characters.
*
* <p>You can also subclass this class to provide for more intricate recogition
* of the tokens. It is known that this class has been subclassed to recognize
* quoted strings, and balanced parentheses.
*
* <p>You should have this mental image of the tokenizing process:<nl>
* <li>Reset is called.
* <li>For each character, c, in the sequence being tokenized:<ul>
* <li>addChar(c) is called for each separator in the tokenizer.
* <li>if addChar returns true, break.</ul>
* <li>hasFreePart is checked to see if something follows. If true:<ul>
* <li>endChar(c) is called for each following character.
* <li>if/when endChar returns true, break.</ul>
* <li>tokenLength is checked to see how far back in the sequence the token
* begun. If there are characters before that but after the last token,
* then they are made a token and this token is saved and returned next.
* </nl>
*
* @author Michael Stockman
* @since 0.11.2
* @see MyTokenizer
*/
public class CustomSeparator {
private char pattern[];
private char match[];
/**
* This constructor is only availible to subclasses of this class.
* If you use it you should also override {@link #addChar addChar}
* to recognize when your separator should that control. If you don't,
* then you may block all other separators.
*/
protected CustomSeparator() {
pattern = new char[0];
match = pattern;
}
/**
* This constructor creates a new custom separator that matches the
* character start. Unless you override {@link #addChar addChar}, the
* default behaviour is to return false in addChar until start is
* encountered and then hasFreePart returns false.
*
* @param start The start character.
*/
public CustomSeparator(char start) {
pattern = new char[1];
pattern[0] = start;
match = new char[pattern.length];
}
/**
* This constructor creates a new custom separator that matches the
* string start. Unless you override {@link #addChar addChar}, the
* default behaviour is to return false in addChar until start is
* encountered and then hasFreePart returns false.
*
* @param start The start String.
*/
public CustomSeparator(String start) {
pattern = start.toCharArray();
match = new char[pattern.length];
}
/**
* Called to reset the separator before staring to look for a new
* token.
*/
public void reset() {
int i;
for (i = 0; i < match.length; i++)
match[i] = 0;
}
/**
* Returns the length of the matched token. It is not required to be
* meaningful unless addChar has returned true and hasFreePart
* returned false or endChar returned true.
*
* @return The length of the matched token.
*/
public int tokenLength() {
return pattern.length;
}
/**
* Called to allow you to decide if you want to capure control of
* the matching process. If you return true, then
* {@link #hasFreePart hasFreePart} will be checked to see if you
* expect more things to follow.
*
* <p>The default behaviour is to return false until the character
* or String given as parameter to the constructor has been matched.
*
* @param c The next character in the sequence being tokenized.
* @return true to gain control of the matching, false to continue
* matching.
*/
public boolean addChar(char c) {
int i;
for (i = 0; i < match.length - 1; i++)
match[i] = match[i + 1];
match[match.length - 1] = c;
for (i = 0; i < match.length; i++)
if (match[i] != pattern[i])
return false;
return true;
}
/**
* Called to check if more characters are expected to follow after
* addChar has returned true. If true, then any following characters
* will be fed to endChar until endChar returns true.
*
* <p>The default behaviour is to return false.
*
* @return true to continue feeding characters to endChar or false.
*/
public boolean hasFreePart() {
return false;
}
/**
* Called to check if more characters are expected in the free part of
* the token.
*
* @param c The next character in the sequence being tokenized.
* @return true to indicate that the token is complete, or false to
* continue feeding characters through endChar.
*/
public boolean endChar(char c) {
return true;
}
/**
* Called to how many characters the CustomSeparator read after
* the end of the separator. This allows them to see beyond the
* end, but these characters will be fed to the separators again
* when looking for the next token so be careful.
*
* @return the number of characters that were read after the end
* of the token had been read.
*/
public int getPeekCount() {
return 0;
}
}