/* * Copyright (c) 2008, Harald Kuhr * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name "TwelveMonkeys" nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ package com.twelvemonkeys.util; import java.util.NoSuchElementException; /** * StringTokenIterator, a drop-in replacement for {@code StringTokenizer}. * StringTokenIterator has the following features: * <ul> * <li>Iterates over a strings, 20-50% faster than {@code StringTokenizer} * (and magnitudes faster than {@code String.split(..)} or * {@code Pattern.split(..)})</li> * <li>Implements the {@code Iterator} interface</li> * <li>Optionally returns delimiters</li> * <li>Optionally returns empty elements</li> * <li>Optionally iterates in reverse</li> * <li>Resettable</li> * </ul> * * @see java.util.StringTokenizer * @author <a href="mailto:harald.kuhr@gmail.com">Harald Kuhr</a> * @author last modified by $Author: haku $ * @version $Id: //depot/branches/personal/haraldk/twelvemonkeys/release-2/twelvemonkeys-core/src/main/java/com/twelvemonkeys/util/StringTokenIterator.java#1 $ */ public class StringTokenIterator extends AbstractTokenIterator { private final String string; private final char[] delimiters; private int position; private final int maxPosition; private String next; private String nextDelimiter; private final boolean includeDelimiters; private final boolean includeEmpty; private final boolean reverse; public final static int FORWARD = 1; public final static int REVERSE = -1; /** * Stores the value of the delimiter character with the highest value. * It is used to optimize the detection of delimiter characters. */ private final char maxDelimiter; /** * Creates a StringTokenIterator * * @param pString the string to be parsed. */ public StringTokenIterator(String pString) { this(pString, " \t\n\r\f".toCharArray(), FORWARD, false, false); } /** * Creates a StringTokenIterator * * @param pString the string to be parsed. * @param pDelimiters the delimiters. */ public StringTokenIterator(String pString, String pDelimiters) { this(pString, toCharArray(pDelimiters), FORWARD, false, false); } /** * Creates a StringTokenIterator * * @param pString the string to be parsed. * @param pDelimiters the delimiters. * @param pDirection iteration direction. */ public StringTokenIterator(String pString, String pDelimiters, int pDirection) { this(pString, toCharArray(pDelimiters), pDirection, false, false); } /** * Creates a StringTokenIterator * * @param pString the string to be parsed. * @param pDelimiters the delimiters. * @param pIncludeDelimiters flag indicating whether to return delimiters as tokens. */ public StringTokenIterator(String pString, String pDelimiters, boolean pIncludeDelimiters) { this(pString, toCharArray(pDelimiters), FORWARD, pIncludeDelimiters, false); } /** * Creates a StringTokenIterator * * @param pString the string to be parsed. * @param pDelimiters the delimiters. * @param pDirection iteration direction. * @param pIncludeDelimiters flag indicating whether to return delimiters as tokens. * @param pIncludeEmpty flag indicating whether to return empty tokens * */ public StringTokenIterator(String pString, String pDelimiters, int pDirection, boolean pIncludeDelimiters, boolean pIncludeEmpty) { this(pString, toCharArray(pDelimiters), pDirection, pIncludeDelimiters, pIncludeEmpty); } /** * Implementation. * * @param pString the string to be parsed. * @param pDelimiters the delimiters. * @param pDirection iteration direction. * @param pIncludeDelimiters flag indicating whether to return delimiters as tokens. * @param pIncludeEmpty flag indicating whether to return empty tokens */ private StringTokenIterator(String pString, char[] pDelimiters, int pDirection, boolean pIncludeDelimiters, boolean pIncludeEmpty) { if (pString == null) { throw new IllegalArgumentException("string == null"); } string = pString; maxPosition = pString.length(); delimiters = pDelimiters; includeDelimiters = pIncludeDelimiters; reverse = (pDirection == REVERSE); includeEmpty = pIncludeEmpty; maxDelimiter = initMaxDelimiter(pDelimiters); reset(); } private static char[] toCharArray(String pDelimiters) { if (pDelimiters == null) { throw new IllegalArgumentException("delimiters == null"); } return pDelimiters.toCharArray(); } /** * Returns the highest char in the delimiter set. * @param pDelimiters the delimiter set * @return the highest char */ private static char initMaxDelimiter(char[] pDelimiters) { if (pDelimiters == null) { return 0; } char max = 0; for (char c : pDelimiters) { if (max < c) { max = c; } } return max; } /** * Resets this iterator. * */ public void reset() { position = 0; next = null; nextDelimiter = null; } /** * Returns {@code true} if the iteration has more elements. (In other * words, returns {@code true} if {@code next} would return an element * rather than throwing an exception.) * * @return {@code true} if the iterator has more elements. */ public boolean hasNext() { return (next != null || fetchNext() != null); } private String fetchNext() { // If next is delimiter, return fast if (nextDelimiter != null) { next = nextDelimiter; nextDelimiter = null; return next; } // If no more chars, return null if (position >= maxPosition) { return null; } return reverse ? fetchReverse() : fetchForward(); } private String fetchReverse() { // Get previous position int prevPos = scanForPrev(); // Store next string next = string.substring(prevPos + 1, maxPosition - position); if (includeDelimiters && prevPos >= 0 && prevPos < maxPosition) { nextDelimiter = string.substring(prevPos, prevPos + 1); } position = maxPosition - prevPos; // Skip empty if (next.length() == 0 && !includeEmpty) { return fetchNext(); } return next; } private String fetchForward() { // Get next position int nextPos = scanForNext(); // Store next string next = string.substring(position, nextPos); if (includeDelimiters && nextPos >= 0 && nextPos < maxPosition) { nextDelimiter = string.substring(nextPos, nextPos + 1); } position = ++nextPos; // Skip empty if (next.length() == 0 && !includeEmpty) { return fetchNext(); } return next; } private int scanForNext() { int position = this.position; while (position < maxPosition) { // Find next match, using all delimiters char c = string.charAt(position); if (c <= maxDelimiter) { // Find first delimiter match for (char delimiter : delimiters) { if (c == delimiter) { return position;// Return if match } } } // Next... position++; } // Return last position, if no match return position; } private int scanForPrev() { int position = (maxPosition - 1) - this.position; while (position >= 0) { // Find next match, using all delimiters char c = string.charAt(position); if (c <= maxDelimiter) { // Find first delimiter match for (char delimiter : delimiters) { if (c == delimiter) { return position;// Return if match } } } // Next... position--; } // Return first position, if no match return position; } /** * Returns the next element in the iteration. * * @return the next element in the iteration. * @exception java.util.NoSuchElementException iteration has no more elements. */ public String next() { if (!hasNext()) { throw new NoSuchElementException(); } String next = this.next; this.next = fetchNext(); return next; } }