/* Tokenizer.java
Purpose:
Description:
History:
Mar 20, 2012 Created by pao
Copyright (C) 2011 Potix Corporation. All Rights Reserved.
*/
// ported from zk 6.0.0
// original package: org.zkoss.zk.ui.select
package org.zkoss.zats.common.select.impl;
import java.util.ArrayList;
import org.zkoss.zats.common.select.impl.Token.Type;
import org.zkoss.zats.common.select.impl.fsm.StateCtx;
import org.zkoss.zats.common.select.impl.fsm.StateMachine;
/**
* A tokenizer of selector string.
* @since 6.0.0
* @author simonpai
*/
public class Tokenizer
{
private final StateMachine<State, CharClass, Character> _machine;
private ArrayList<Token> _tokens;
public Tokenizer()
{
_tokens = null;
_machine = new StateMachine<State, CharClass, Character>()
{
private int _anchor;
private char _prevChar;
private CharClass _prevClass;
protected boolean _inDoubleQuote;
protected boolean _inSingleQuote;
protected boolean _inParam;
protected boolean _escaped;
protected boolean _opEscaped;
@Override
protected void init()
{
getState(State.MAIN).setReturningAll(true).addMinorTransition('[', State.IN_ATTRIBUTE);
setState(State.IN_ATTRIBUTE, new StateCtx<State, CharClass, Character>()
{
@Override
protected void onReturn(Character i, CharClass cls)
{
if(cls != CharClass.OTHER)
return;
if(i == '"')
_inDoubleQuote = !_inDoubleQuote;
else if(i == '\'')
_inSingleQuote = !_inSingleQuote;
}
}).setReturningAll(true).addMinorTransition(']', State.MAIN);
// TODO: IN_PARAM
}
@Override
protected void onReset()
{
_inDoubleQuote = false;
_inSingleQuote = false;
_inParam = false;
_escaped = false;
_opEscaped = false;
_anchor = 0;
_prevChar = '!';
_prevClass = null;
_tokens = new ArrayList<Token>();
}
@Override
protected void afterStep(Character input, CharClass inputClass, State origin, State destination)
{
doDebug("* OP Escaped: " + _opEscaped);
if(inputClass == CharClass.ESCAPE)
return;
boolean isPrefix = origin == State.IN_ATTRIBUTE && inputClass == CharClass.OTHER && (input == '^' || input == '$' || input == '*');
// flush previous identifier/whitespace
if(inputClass != _prevClass && _prevClass != null && _prevClass.isMultiple())
flush(_prevChar, _prevClass, false);
// previous char is ^/$/* but input is not =
if(origin == State.IN_ATTRIBUTE && _opEscaped && input != '=')
flush(_prevChar, _prevClass, false);
// flush current
if(!inputClass.isMultiple() && !isPrefix)
flush(input, inputClass, true);
// update status
if(input == '(')
_inParam = true;
else if(input == ')')
_inParam = false;
_prevChar = input;
_prevClass = inputClass;
_opEscaped = isPrefix;
}
@Override
protected void onStop(boolean endOfInput)
{
if(!endOfInput)
return;
// flush last token if any
if(_anchor < _step)
flush(_prevChar, _prevClass, false);
}
@Override
protected CharClass getClass(Character c)
{
if(_inDoubleQuote && (_escaped || c != '"'))
return CharClass.LITERAL;
if(_inSingleQuote && (_escaped || c != '\''))
return CharClass.LITERAL;
// TODO: check this
if(_inParam && c != ',' && c != ')')
return Character.isWhitespace(c) ? CharClass.OTHER : CharClass.LITERAL;
if(_escaped)
return Character.isWhitespace(c) ? CharClass.WHITESPACE : CharClass.LITERAL;
if(Character.isLetter(c) || Character.isDigit(c) || c == '-' || c == '_')
return CharClass.LITERAL;
if(Character.isWhitespace(c))
return CharClass.WHITESPACE;
return c == '\\' ? CharClass.ESCAPE : CharClass.OTHER;
}
@Override
protected State getLandingState(Character input, CharClass inputClass)
{
if(input == '[')
return State.IN_ATTRIBUTE;
if(inputClass == CharClass.ESCAPE)
_escaped = true;
return State.MAIN;
}
@Override
protected void onReject(Character input)
{
throw new ParseException(_step, _current, input);
}
private void flush(char input, CharClass inputClass, boolean withCurrChar)
{
int endIndex = _step + (withCurrChar ? 1 : _escaped ? -1 : 0);
_tokens.add(new Token(getTokenType(input, inputClass), _anchor, endIndex));
doDebug("! flush: [" + _anchor + ", " + endIndex + "]");
_anchor = endIndex;
}
private Type getTokenType(char input, CharClass inputClass)
{
switch(inputClass)
{
case LITERAL:
return Type.IDENTIFIER;
case WHITESPACE:
return Type.WHITESPACE;
}
switch(input)
{
case ',':
return _inParam ? Type.PARAM_SEPARATOR : Type.SELECTOR_SEPARATOR;
case '*':
return Type.UNIVERSAL;
case '>':
return Type.CBN_CHILD;
case '+':
return Type.CBN_ADJACENT_SIBLING;
case '~':
return Type.CBN_GENERAL_SIBLING;
case '#':
return Type.NTN_ID;
case '.':
return Type.NTN_CLASS;
case ':':
return Type.NTN_PSDOCLS;
case '\'':
return Type.SINGLE_QUOTE;
case '"':
return Type.DOUBLE_QUOTE;
case '[':
return Type.OPEN_BRACKET;
case ']':
return Type.CLOSE_BRACKET;
case '(':
return Type.OPEN_PAREN;
case ')':
return Type.CLOSE_PAREN;
case '=':
switch(_prevChar)
{
case '^':
return Type.OP_BEGIN_WITH;
case '$':
return Type.OP_END_WITH;
case '*':
return Type.OP_CONTAIN;
default:
return Type.OP_EQUAL;
}
default:
return Character.isWhitespace(input) ? Type.MINOR_WHITESPACE : Type.UNKNOWN_CHAR;
}
}
};
}
public ArrayList<Token> tokenize(String selector)
{
_machine.start(new CharSequenceIterator(selector));
return _tokens;
}
public void setDebugMode(boolean mode)
{
_machine.setDebugMode(mode);
}
// state, input class //
private enum State
{
MAIN, IN_ATTRIBUTE;
}
private enum CharClass
{
LITERAL(true), WHITESPACE(true), ESCAPE, OTHER;
private boolean _multiple;
CharClass()
{
this(false);
}
CharClass(boolean multiple)
{
_multiple = multiple;
}
public boolean isMultiple()
{
return _multiple;
}
}
}