package er.extensions.foundation; import java.text.CharacterIterator; import java.text.ParseException; import java.text.StringCharacterIterator; import java.util.Enumeration; import java.util.NoSuchElementException; /** * Tokenizes a string like a commandline parser, tokenizing on spaces unless the words are * in double quotes or single quotes. * * @author mschrag */ public class ERXCommandLineTokenizer implements Enumeration { private static enum TokenizerState { Whitespace, Text, DoubleQuoted, SingleQuoted } private StringCharacterIterator _iterator; private TokenizerState _state; private boolean _wasQuoted; /** * Creates a new ERXCommandLineTokenizer. * * @param line the line to parse */ public ERXCommandLineTokenizer(String line) { _iterator = new StringCharacterIterator(line); reset(); } protected void reset() { _state = TokenizerState.Whitespace; _iterator.first(); } /** * Returns true if there are more tokens on the line. * * @return true if there are more tokens on the line, false if not */ public boolean hasMoreElements() { return hasMoreTokens(); } /** * Returns true if there are more tokens on the line. * * @return true if there are more tokens on the line, false if not */ public boolean hasMoreTokens() { return (_iterator.current() != CharacterIterator.DONE); } /** * Returns the next token, or null if there is a parse error. * * @return the next token */ public String nextElement() { String token; try { token = nextToken(); } catch (ParseException e) { e.printStackTrace(); token = null; } return token; } /** * Returns the next token. * * @return the next token * @throws ParseException if there is a parse failure * @throws NoSuchElementException if there are no more tokens to parse */ public String nextToken() throws ParseException { boolean escapeNext = false; boolean wasQuoted = _wasQuoted; StringBuilder token = new StringBuilder(); char c = _iterator.current(); boolean done = false; while (!done && c != CharacterIterator.DONE) { if (escapeNext) { switch (c) { case '\n': throw new ParseException("Unexception escape '\\' at end of string.", _iterator.getIndex()); default: token.append(c); c = _iterator.next(); break; } escapeNext = false; } else { switch (_state) { case Whitespace: switch (c) { case '\n': case ' ': case '\t': c = _iterator.next(); break; case '\"': _state = TokenizerState.DoubleQuoted; c = _iterator.next(); if (token.length() > 0 || _wasQuoted) { done = true; _wasQuoted = false; } _wasQuoted = true; break; case '\'': _state = TokenizerState.SingleQuoted; c = _iterator.next(); if (token.length() > 0 || _wasQuoted) { done = true; _wasQuoted = false; } _wasQuoted = true; break; case '\\': escapeNext = true; c = _iterator.next(); break; default: _state = TokenizerState.Text; if (token.length() > 0 || _wasQuoted) { done = true; _wasQuoted = false; } break; } break; case Text: switch (c) { case ' ': case '\t': case '\n': _state = TokenizerState.Whitespace; break; // case '\"': // throw new ParseException("Unexpected quote '\"' in string.", // myIterator.getIndex()); case '\\': escapeNext = true; c = _iterator.next(); break; default: token.append(c); c = _iterator.next(); break; } break; case DoubleQuoted: switch (c) { case '\"': _state = TokenizerState.Whitespace; c = _iterator.next(); break; case '\\': escapeNext = true; c = _iterator.next(); break; default: token.append(c); c = _iterator.next(); break; } break; case SingleQuoted: switch (c) { case '\'': _state = TokenizerState.Whitespace; c = _iterator.next(); break; case '\\': escapeNext = true; c = _iterator.next(); break; default: token.append(c); c = _iterator.next(); break; } break; } } } if (token.length() <= 0 && !wasQuoted) { throw new NoSuchElementException("There are no more tokens on this line."); } return token.toString(); } }