/*******************************************************************************
* Copyright (c) 2008 Scott Stanchfield.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* Based on the ANTLR parser generator by Terence Parr, http://antlr.org
* Ric Klaren <klaren@cs.utwente.nl>
* Scott Stanchfield - Modifications for XML Parsing
*******************************************************************************/
package com.javadude.antxr;
import java.util.List;
import com.javadude.antxr.collections.impl.BitSet;
/**A linear-approximate LL(k) grammar analzyer.
*
* All lookahead elements are sets of token types.
*
* @author Terence Parr, John Lilley
* @see com.javadude.antxr.Grammar
* @see com.javadude.antxr.Lookahead
*/
public class LLkAnalyzer implements LLkGrammarAnalyzer {
// Set "analyzerDebug" to true
public boolean DEBUG_ANALYZER = false;
private AlternativeBlock currentBlock;
protected Tool tool = null;
protected Grammar grammar = null;
// True if analyzing a lexical grammar
protected boolean lexicalAnalysis = false;
// Used for formatting bit sets in default (Java) format
CharFormatter charFormatter = new JavaCharFormatter();
/** Create an LLk analyzer */
public LLkAnalyzer(Tool tool_) {
tool = tool_;
}
/** Return true if someone used the '.' wildcard default idiom.
* Either #(. children) or '.' as an alt by itself.
*/
protected boolean altUsesWildcardDefault(Alternative alt) {
AlternativeElement head = alt.head;
// if element is #(. blah) then check to see if el is root
if (head instanceof TreeElement &&
((TreeElement)head).root instanceof WildcardElement) {
return true;
}
if (head instanceof WildcardElement && head.next instanceof BlockEndElement) {
return true;
}
return false;
}
/**Is this block of alternatives LL(k)? Fill in alternative cache for this block.
* @return true if the block is deterministic
*/
public boolean deterministic(AlternativeBlock blk) {
/** The lookahead depth for this decision */
int k = 1; // start at k=1
if (DEBUG_ANALYZER) {
System.out.println("deterministic(" + blk + ")");
}
boolean det = true;
int nalts = blk.alternatives.size();
AlternativeBlock saveCurrentBlock = currentBlock;
currentBlock = blk;
/* don't allow nongreedy (...) blocks */
if (blk.greedy == false && !(blk instanceof OneOrMoreBlock) && !(blk instanceof ZeroOrMoreBlock)) {
tool.warning("Being nongreedy only makes sense for (...)+ and (...)*", grammar.getFilename(), blk.getLine(), blk.getColumn());
}
// SPECIAL CASE: only one alternative. We don't need to check the
// determinism, but other code expects the lookahead cache to be
// set for the single alt.
if (nalts == 1) {
AlternativeElement e = blk.getAlternativeAt(0).head;
currentBlock.alti = 0;
blk.getAlternativeAt(0).cache[1] = e.look(1);
blk.getAlternativeAt(0).lookaheadDepth = 1; // set lookahead to LL(1)
currentBlock = saveCurrentBlock;
return true; // always deterministic for one alt
}
for (int i = 0; i < nalts - 1; i++) {
currentBlock.alti = i;
currentBlock.analysisAlt = i; // which alt are we analyzing?
currentBlock.altj = i + 1; // reset this alt. Haven't computed yet,
// but we need the alt number.
// compare against other alternatives with lookahead depth k
for (int j = i + 1; j < nalts; j++) {
currentBlock.altj = j;
if (DEBUG_ANALYZER) {
System.out.println("comparing " + i + " against alt " + j);
}
currentBlock.analysisAlt = j; // which alt are we analyzing?
k = 1; // always attempt minimum lookahead possible.
// check to see if there is a lookahead depth that distinguishes
// between alternatives i and j.
Lookahead[] r = new Lookahead[grammar.maxk + 1];
boolean haveAmbiguity;
do {
haveAmbiguity = false;
if (DEBUG_ANALYZER) {
System.out.println("checking depth " + k + "<=" + grammar.maxk);
}
Lookahead p,q;
p = getAltLookahead(blk, i, k);
q = getAltLookahead(blk, j, k);
// compare LOOK(alt i) with LOOK(alt j). Is there an intersection?
// Lookahead must be disjoint.
if (DEBUG_ANALYZER) {
System.out.println("p is " + p.toString(",", charFormatter, grammar));
}
if (DEBUG_ANALYZER) {
System.out.println("q is " + q.toString(",", charFormatter, grammar));
}
// r[i] = p.fset.and(q.fset);
r[k] = p.intersection(q);
if (DEBUG_ANALYZER) {
System.out.println("intersection at depth " + k + " is " + r[k].toString());
}
if (!r[k].nil()) {
haveAmbiguity = true;
k++;
}
// go until no more lookahead to use or no intersection
} while (haveAmbiguity && k <= grammar.maxk);
Alternative ai = blk.getAlternativeAt(i);
Alternative aj = blk.getAlternativeAt(j);
if (haveAmbiguity) {
det = false;
ai.lookaheadDepth = GrammarAnalyzer.NONDETERMINISTIC;
aj.lookaheadDepth = GrammarAnalyzer.NONDETERMINISTIC;
/* if ith alt starts with a syntactic predicate, computing the
* lookahead is still done for code generation, but messages
* should not be generated when comparing against alt j.
* Alternatives with syn preds that are unnecessary do
* not result in syn pred try-blocks.
*/
if (ai.synPred != null) {
if (DEBUG_ANALYZER) {
System.out.println("alt " + i + " has a syn pred");
}
// The alt with the (...)=> block is nondeterministic for sure.
// If the (...)=> conflicts with alt j, j is nondeterministic.
// This prevents alt j from being in any switch statements.
// move on to next alternative=>no possible ambiguity!
// continue inner;
}
/* if ith alt starts with a semantic predicate, computing the
* lookahead is still done for code generation, but messages
* should not be generated when comparing against alt j.
*/
else if (ai.semPred != null) {
if (DEBUG_ANALYZER) {
System.out.println("alt " + i + " has a sem pred");
}
}
/* if jth alt is exactly the wildcard or wildcard root of tree,
* then remove elements from alt i lookahead from alt j's lookahead.
* Don't do an ambiguity warning.
*/
else if (altUsesWildcardDefault(aj)) {
// System.out.println("removing pred sets");
// removeCompetingPredictionSetsFromWildcard(aj.cache, aj.head, grammar.maxk);
}
/* If the user specified warnWhenFollowAmbig=false, then we
* can turn off this warning IFF one of the alts is empty;
* that is, it points immediately at the end block.
*/
else if (!blk.warnWhenFollowAmbig &&
(ai.head instanceof BlockEndElement ||
aj.head instanceof BlockEndElement)) {
// System.out.println("ai.head pts to "+ai.head.getClass());
// System.out.println("aj.head pts to "+aj.head.getClass());
}
/* If they have the generateAmbigWarnings option off for the block
* then don't generate a warning.
*/
else if (!blk.generateAmbigWarnings) {
// nothing
}
/* If greedy=true and *one* empty alt shut off warning. */
else if (blk.greedySet && blk.greedy &&
((ai.head instanceof BlockEndElement &&
!(aj.head instanceof BlockEndElement)) ||
(aj.head instanceof BlockEndElement &&
!(ai.head instanceof BlockEndElement)))) {
// System.out.println("greedy set to true; one alt empty");
}
/* We have no choice, but to report a nondetermism */
else {
tool.errorHandler.warnAltAmbiguity(
grammar,
blk, // the block
lexicalAnalysis, // true if lexical
grammar.maxk, // depth of ambiguity
r, // set of linear ambiguities
i, // first ambiguous alternative
j // second ambiguous alternative
);
}
}
else {
// a lookahead depth, k, was found where i and j do not conflict
ai.lookaheadDepth = Math.max(ai.lookaheadDepth, k);
aj.lookaheadDepth = Math.max(aj.lookaheadDepth, k);
}
}
}
// finished with block.
// If had wildcard default clause idiom, remove competing lookahead
/*
if ( wildcardAlt!=null ) {
removeCompetingPredictionSetsFromWildcard(wildcardAlt.cache, wildcardAlt.head, grammar.maxk);
}
*/
currentBlock = saveCurrentBlock;
return det;
}
/**Is (...)+ block LL(1)? Fill in alternative cache for this block.
* @return true if the block is deterministic
*/
public boolean deterministic(OneOrMoreBlock blk) {
if (DEBUG_ANALYZER) {
System.out.println("deterministic(...)+(" + blk + ")");
}
AlternativeBlock saveCurrentBlock = currentBlock;
currentBlock = blk;
boolean blkOk = deterministic((AlternativeBlock)blk);
// block has been checked, now check that what follows does not conflict
// with the lookahead of the (...)+ block.
boolean det = deterministicImpliedPath(blk);
currentBlock = saveCurrentBlock;
return det && blkOk;
}
/**Is (...)* block LL(1)? Fill in alternative cache for this block.
* @return true if the block is deterministic
*/
public boolean deterministic(ZeroOrMoreBlock blk) {
if (DEBUG_ANALYZER) {
System.out.println("deterministic(...)*(" + blk + ")");
}
AlternativeBlock saveCurrentBlock = currentBlock;
currentBlock = blk;
boolean blkOk = deterministic((AlternativeBlock)blk);
// block has been checked, now check that what follows does not conflict
// with the lookahead of the (...)* block.
boolean det = deterministicImpliedPath(blk);
currentBlock = saveCurrentBlock;
return det && blkOk;
}
/**Is this (...)* or (...)+ block LL(k)?
* @return true if the block is deterministic
*/
public boolean deterministicImpliedPath(BlockWithImpliedExitPath blk) {
/** The lookahead depth for this decision considering implied exit path */
int k;
boolean det = true;
List<Alternative> alts = blk.getAlternatives();
int nalts = alts.size();
currentBlock.altj = -1; // comparing against implicit optional/exit alt
if (DEBUG_ANALYZER) {
System.out.println("deterministicImpliedPath");
}
for (int i = 0; i < nalts; i++) { // check follow against all alts
Alternative alt = blk.getAlternativeAt(i);
if (alt.head instanceof BlockEndElement) {
tool.warning("empty alternative makes no sense in (...)* or (...)+", grammar.getFilename(), blk.getLine(), blk.getColumn());
}
k = 1; // assume eac alt is LL(1) with exit branch
// check to see if there is a lookahead depth that distinguishes
// between alternative i and the exit branch.
Lookahead[] r = new Lookahead[grammar.maxk + 1];
boolean haveAmbiguity;
do {
haveAmbiguity = false;
if (DEBUG_ANALYZER) {
System.out.println("checking depth " + k + "<=" + grammar.maxk);
}
Lookahead p;
Lookahead follow = blk.next.look(k);
blk.exitCache[k] = follow;
currentBlock.alti = i;
p = getAltLookahead(blk, i, k);
if (DEBUG_ANALYZER) {
System.out.println("follow is " + follow.toString(",", charFormatter, grammar));
}
if (DEBUG_ANALYZER) {
System.out.println("p is " + p.toString(",", charFormatter, grammar));
}
//r[k] = follow.fset.and(p.fset);
r[k] = follow.intersection(p);
if (DEBUG_ANALYZER) {
System.out.println("intersection at depth " + k + " is " + r[k]);
}
if (!r[k].nil()) {
haveAmbiguity = true;
k++;
}
// go until no more lookahead to use or no intersection
} while (haveAmbiguity && k <= grammar.maxk);
if (haveAmbiguity) {
det = false;
alt.lookaheadDepth = GrammarAnalyzer.NONDETERMINISTIC;
blk.exitLookaheadDepth = GrammarAnalyzer.NONDETERMINISTIC;
Alternative ambigAlt = blk.getAlternativeAt(currentBlock.alti);
/* If the user specified warnWhenFollowAmbig=false, then we
* can turn off this warning.
*/
if (!blk.warnWhenFollowAmbig) {
// nothing
}
/* If they have the generateAmbigWarnings option off for the block
* then don't generate a warning.
*/
else if (!blk.generateAmbigWarnings) {
// nothing
}
/* If greedy=true and alt not empty, shut off warning */
else if (blk.greedy == true && blk.greedySet &&
!(ambigAlt.head instanceof BlockEndElement)) {
if (DEBUG_ANALYZER) {
System.out.println("greedy loop");
}
}
/* If greedy=false then shut off warning...will have
* to add "if FOLLOW break"
* block during code gen to compensate for removal of warning.
*/
else if (blk.greedy == false &&
!(ambigAlt.head instanceof BlockEndElement)) {
if (DEBUG_ANALYZER) {
System.out.println("nongreedy loop");
}
// if FOLLOW not single k-string (|set[k]| can
// be > 1 actually) then must warn them that
// loop may terminate incorrectly.
// For example, ('a'..'d')+ ("ad"|"cb")
if (!LLkAnalyzer.lookaheadEquivForApproxAndFullAnalysis(blk.exitCache, grammar.maxk)) {
tool.warning(new String[]{
"nongreedy block may exit incorrectly due",
"\tto limitations of linear approximate lookahead (first k-1 sets",
"\tin lookahead not singleton)."},
grammar.getFilename(), blk.getLine(), blk.getColumn());
}
}
// no choice but to generate a warning
else {
tool.errorHandler.warnAltExitAmbiguity(
grammar,
blk, // the block
lexicalAnalysis, // true if lexical
grammar.maxk, // depth of ambiguity
r, // set of linear ambiguities
i // ambiguous alternative
);
}
}
else {
alt.lookaheadDepth = Math.max(alt.lookaheadDepth, k);
blk.exitLookaheadDepth = Math.max(blk.exitLookaheadDepth, k);
}
}
return det;
}
/**Compute the lookahead set of whatever follows references to
* the rule associated witht the FOLLOW block.
*/
public Lookahead FOLLOW(int k, RuleEndElement end) {
// what rule are we trying to compute FOLLOW of?
RuleBlock rb = (RuleBlock)end.block;
// rule name is different in lexer
String rule;
if (lexicalAnalysis) {
rule = CodeGenerator.encodeLexerRuleName(rb.getRuleName());
}
else {
rule = rb.getRuleName();
}
if (DEBUG_ANALYZER) {
System.out.println("FOLLOW(" + k + "," + rule + ")");
}
// are we in the midst of computing this FOLLOW already?
if (end.lock[k]) {
if (DEBUG_ANALYZER) {
System.out.println("FOLLOW cycle to " + rule);
}
return new Lookahead(rule);
}
// Check to see if there is cached value
if (end.cache[k] != null) {
if (DEBUG_ANALYZER) {
System.out.println("cache entry FOLLOW(" + k + ") for " + rule + ": " + end.cache[k].toString(",", charFormatter, grammar));
}
// if the cache is a complete computation then simply return entry
if (end.cache[k].cycle == null) {
return (Lookahead)end.cache[k].clone();
}
// A cache entry exists, but it is a reference to a cyclic computation.
RuleSymbol rs = (RuleSymbol)grammar.getSymbol(end.cache[k].cycle);
RuleEndElement re = rs.getBlock().endNode;
// The other entry may not exist because it is still being
// computed when this cycle cache entry was found here.
if (re.cache[k] == null) {
// return the cycle...that's all we can do at the moment.
return (Lookahead)end.cache[k].clone();
}
if (DEBUG_ANALYZER) {
System.out.println("combining FOLLOW(" + k + ") for " + rule + ": from "+end.cache[k].toString(",", charFormatter, grammar) + " with FOLLOW for "+((RuleBlock)re.block).getRuleName()+": "+re.cache[k].toString(",", charFormatter, grammar));
}
// combine results from other rule's FOLLOW
if ( re.cache[k].cycle==null ) {
// current rule depends on another rule's FOLLOW and
// it is complete with no cycle; just kill our cycle and
// combine full result from other rule's FOLLOW
end.cache[k].combineWith(re.cache[k]);
end.cache[k].cycle = null; // kill cycle as we're complete
}
else {
// the FOLLOW cache for other rule has a cycle also.
// Here is where we bubble up a cycle. We better recursively
// wipe out cycles (partial computations). I'm a little nervous
// that we might leave a cycle here, however.
Lookahead refFOLLOW = FOLLOW(k, re);
end.cache[k].combineWith( refFOLLOW );
// all cycles should be gone, but if not, record ref to cycle
end.cache[k].cycle = refFOLLOW.cycle;
}
if (DEBUG_ANALYZER) {
System.out.println("saving FOLLOW(" + k + ") for " + rule + ": from "+end.cache[k].toString(",", charFormatter, grammar));
}
// Return the updated cache entry associated
// with the cycle reference.
return (Lookahead)end.cache[k].clone();
}
end.lock[k] = true; // prevent FOLLOW computation cycles
Lookahead p = new Lookahead();
RuleSymbol rs = (RuleSymbol)grammar.getSymbol(rule);
// Walk list of references to this rule to compute FOLLOW
for (int i = 0; i < rs.numReferences(); i++) {
RuleRefElement rr = rs.getReference(i);
if (DEBUG_ANALYZER) {
System.out.println("next[" + rule + "] is " + rr.next.toString());
}
Lookahead q = rr.next.look(k);
if (DEBUG_ANALYZER) {
System.out.println("FIRST of next[" + rule + "] ptr is " + q.toString());
}
/* If there is a cycle then if the cycle is to the rule for
* this end block, you have a cycle to yourself. Remove the
* cycle indication--the lookahead is complete.
*/
if (q.cycle != null && q.cycle.equals(rule)) {
q.cycle = null; // don't want cycle to yourself!
}
// add the lookahead into the current FOLLOW computation set
p.combineWith(q);
if (DEBUG_ANALYZER) {
System.out.println("combined FOLLOW[" + rule + "] is " + p.toString());
}
}
end.lock[k] = false; // we're not doing FOLLOW anymore
// if no rules follow this, it can be a start symbol or called by a start sym.
// set the follow to be end of file.
if (p.fset.nil() && p.cycle == null) {
if (grammar instanceof TreeWalkerGrammar) {
// Tree grammars don't see EOF, they see end of sibling list or
// "NULL TREE LOOKAHEAD".
p.fset.add(Token.NULL_TREE_LOOKAHEAD);
}
else if (grammar instanceof LexerGrammar) {
// Lexical grammars use Epsilon to indicate that the end of rule has been hit
// EOF would be misleading; any character can follow a token rule not just EOF
// as in a grammar (where a start symbol is followed by EOF). There is no
// sequence info in a lexer between tokens to indicate what is the last token
// to be seen.
// p.fset.add(EPSILON_TYPE);
p.setEpsilon();
}
else {
p.fset.add(Token.EOF_TYPE);
}
}
// Cache the result of the FOLLOW computation
if (DEBUG_ANALYZER) {
System.out.println("saving FOLLOW(" + k + ") for " + rule + ": " + p.toString(",", charFormatter, grammar));
}
end.cache[k] = (Lookahead)p.clone();
return p;
}
private Lookahead getAltLookahead(AlternativeBlock blk, int alt, int k) {
Lookahead p;
Alternative a = blk.getAlternativeAt(alt);
AlternativeElement e = a.head;
//System.out.println("getAltLookahead("+k+","+e+"), cache size is "+a.cache.length);
if (a.cache[k] == null) {
p = e.look(k);
a.cache[k] = p;
}
else {
p = a.cache[k];
}
return p;
}
/**Actions are ignored */
public Lookahead look(int k, ActionElement action) {
if (DEBUG_ANALYZER) {
System.out.println("lookAction(" + k + "," + action + ")");
}
return action.next.look(k);
}
/**Combine the lookahead computed for each alternative */
public Lookahead look(int k, AlternativeBlock blk) {
if (DEBUG_ANALYZER) {
System.out.println("lookAltBlk(" + k + "," + blk + ")");
}
AlternativeBlock saveCurrentBlock = currentBlock;
currentBlock = blk;
Lookahead p = new Lookahead();
for (int i = 0; i < blk.alternatives.size(); i++) {
if (DEBUG_ANALYZER) {
System.out.println("alt " + i + " of " + blk);
}
// must set analysis alt
currentBlock.analysisAlt = i;
Alternative alt = blk.getAlternativeAt(i);
AlternativeElement elem = alt.head;
if (DEBUG_ANALYZER) {
if (alt.head == alt.tail) {
System.out.println("alt " + i + " is empty");
}
}
Lookahead q = elem.look(k);
p.combineWith(q);
}
if (k == 1 && blk.not && subruleCanBeInverted(blk, lexicalAnalysis)) {
// Invert the lookahead set
if (lexicalAnalysis) {
BitSet b = (BitSet)((LexerGrammar)grammar).charVocabulary.clone();
int[] elems = p.fset.toArray();
for (int j = 0; j < elems.length; j++) {
b.remove(elems[j]);
}
p.fset = b;
}
else {
p.fset.notInPlace(Token.MIN_USER_TYPE, grammar.tokenManager.maxTokenType());
}
}
currentBlock = saveCurrentBlock;
return p;
}
/**Compute what follows this place-holder node and possibly
* what begins the associated loop unless the
* node is locked.
* <p>
* if we hit the end of a loop, we have to include
* what tokens can begin the loop as well. If the start
* node is locked, then we simply found an empty path
* through this subrule while analyzing it. If the
* start node is not locked, then this node was hit
* during a FOLLOW operation and the FIRST of this
* block must be included in that lookahead computation.
*/
public Lookahead look(int k, BlockEndElement end) {
if (DEBUG_ANALYZER) {
System.out.println("lookBlockEnd(" + k + ", " + end.block + "); lock is " + end.lock[k]);
}
if (end.lock[k]) {
// computation in progress => the tokens we would have
// computed (had we not been locked) will be included
// in the set by that computation with the lock on this
// node.
return new Lookahead();
}
Lookahead p;
/* Hitting the end of a loop means you can see what begins the loop */
if (end.block instanceof ZeroOrMoreBlock ||
end.block instanceof OneOrMoreBlock) {
// compute what can start the block,
// but lock end node so we don't do it twice in same
// computation.
end.lock[k] = true;
p = look(k, end.block);
end.lock[k] = false;
}
else {
p = new Lookahead();
}
/* Tree blocks do not have any follow because they are children
* of what surrounds them. For example, A #(B C) D results in
* a look() for the TreeElement end of NULL_TREE_LOOKAHEAD, which
* indicates that nothing can follow the last node of tree #(B C)
*/
if (end.block instanceof TreeElement) {
p.combineWith(Lookahead.of(Token.NULL_TREE_LOOKAHEAD));
}
/* Syntactic predicates such as ( (A)? )=> have no follow per se.
* We cannot accurately say what would be matched following a
* syntactic predicate (you MIGHT be ok if you said it was whatever
* followed the alternative predicted by the predicate). Hence,
* (like end-of-token) we return Epsilon to indicate "unknown
* lookahead."
*/
else if (end.block instanceof SynPredBlock) {
p.setEpsilon();
}
// compute what can follow the block
else {
Lookahead q = end.block.next.look(k);
p.combineWith(q);
}
return p;
}
/**Return this char as the lookahead if k=1.
* <p>### Doesn't work for ( 'a' 'b' | 'a' ~'b' ) yet!!!
* <p>
* If the atom has the <tt>not</tt> flag on, then
* create the set complement of the tokenType
* which is the set of all characters referenced
* in the grammar with this char turned off.
* Also remove characters from the set that
* are currently allocated for predicting
* previous alternatives. This avoids ambiguity
* messages and is more properly what is meant.
* ( 'a' | ~'a' ) implies that the ~'a' is the
* "else" clause.
* <p>
* NOTE: we do <b>NOT</b> include exit path in
* the exclusion set. E.g.,
* ( 'a' | ~'a' )* 'b'
* should exit upon seeing a 'b' during the loop.
*/
public Lookahead look(int k, CharLiteralElement atom) {
if (DEBUG_ANALYZER) {
System.out.println("lookCharLiteral(" + k + "," + atom + ")");
}
// Skip until analysis hits k==1
if (k > 1) {
return atom.next.look(k - 1);
}
if (lexicalAnalysis) {
if (atom.not) {
BitSet b = (BitSet)((LexerGrammar)grammar).charVocabulary.clone();
if (DEBUG_ANALYZER) {
System.out.println("charVocab is " + b.toString());
}
// remove stuff predicted by preceding alts and follow of block
removeCompetingPredictionSets(b, atom);
if (DEBUG_ANALYZER) {
System.out.println("charVocab after removal of prior alt lookahead " + b.toString());
}
// now remove element that is stated not to be in the set
b.clear(atom.getType());
return new Lookahead(b);
}
return Lookahead.of(atom.getType());
}
// Should have been avoided by MakeGrammar
tool.fatalError("panic: Character literal reference found in parser");
// ... so we make the compiler happy
return Lookahead.of(atom.getType());
}
public Lookahead look(int k, CharRangeElement r) {
if (DEBUG_ANALYZER) {
System.out.println("lookCharRange(" + k + "," + r + ")");
}
// Skip until analysis hits k==1
if (k > 1) {
return r.next.look(k - 1);
}
BitSet p = BitSet.of(r.begin);
for (int i = r.begin + 1; i <= r.end; i++) {
p.add(i);
}
return new Lookahead(p);
}
public Lookahead look(int k, GrammarAtom atom) {
if (DEBUG_ANALYZER) {
System.out.println("look(" + k + "," + atom + "[" + atom.getType() + "])");
}
if (lexicalAnalysis) {
// MakeGrammar should have created a rule reference instead
tool.fatalError("panic: token reference found in lexer");
}
// Skip until analysis hits k==1
if (k > 1) {
return atom.next.look(k - 1);
}
Lookahead l = Lookahead.of(atom.getType());
if (atom.not) {
// Invert the lookahead set against the token vocabulary
int maxToken = grammar.tokenManager.maxTokenType();
l.fset.notInPlace(Token.MIN_USER_TYPE, maxToken);
// remove stuff predicted by preceding alts and follow of block
removeCompetingPredictionSets(l.fset, atom);
}
return l;
}
/**The lookahead of a (...)+ block is the combined lookahead of
* all alternatives and, if an empty path is found, the lookahead
* of what follows the block.
*/
public Lookahead look(int k, OneOrMoreBlock blk) {
if (DEBUG_ANALYZER) {
System.out.println("look+" + k + "," + blk + ")");
}
Lookahead p = look(k, (AlternativeBlock)blk);
return p;
}
/**Combine the lookahead computed for each alternative.
* Lock the node so that no other computation may come back
* on itself--infinite loop. This also implies infinite left-recursion
* in the grammar (or an error in this algorithm ;)).
*/
public Lookahead look(int k, RuleBlock blk) {
if (DEBUG_ANALYZER) {
System.out.println("lookRuleBlk(" + k + "," + blk + ")");
}
Lookahead p = look(k, (AlternativeBlock)blk);
return p;
}
/**If not locked or noFOLLOW set, compute FOLLOW of a rule.
* <p>
* TJP says 8/12/99: not true anymore:
* Lexical rules never compute follow. They set epsilon and
* the code generator gens code to check for any character.
* The code generator must remove the tokens used to predict
* any previous alts in the same block.
* <p>
* When the last node of a rule is reached and noFOLLOW,
* it implies that a "local" FOLLOW will be computed
* after this call. I.e.,
* <pre>
* a : b A;
* b : B | ;
* c : b C;
* </pre>
* Here, when computing the look of rule b from rule a,
* we want only {B,EPSILON_TYPE} so that look(b A) will
* be {B,A} not {B,A,C}.
* <p>
* if the end block is not locked and the FOLLOW is
* wanted, the algorithm must compute the lookahead
* of what follows references to this rule. If
* end block is locked, FOLLOW will return an empty set
* with a cycle to the rule associated with this end block.
*/
public Lookahead look(int k, RuleEndElement end) {
if (DEBUG_ANALYZER) {
System.out.println("lookRuleBlockEnd(" + k + "); noFOLLOW=" +
end.noFOLLOW + "; lock is " + end.lock[k]);
}
if (/*lexicalAnalysis ||*/ end.noFOLLOW) {
Lookahead p = new Lookahead();
p.setEpsilon();
p.epsilonDepth = BitSet.of(k);
return p;
}
Lookahead p = FOLLOW(k, end);
return p;
}
/**Compute the lookahead contributed by a rule reference.
*
* <p>
* When computing ruleref lookahead, we don't want the FOLLOW
* computation done if an empty path exists for the rule.
* The FOLLOW is too loose of a set...we want only to
* include the "local" FOLLOW or what can follow this
* particular ref to the node. In other words, we use
* context information to reduce the complexity of the
* analysis and strengthen the parser.
*
* The noFOLLOW flag is used as a means of restricting
* the FOLLOW to a "local" FOLLOW. This variable is
* orthogonal to the <tt>lock</tt> variable that prevents
* infinite recursion. noFOLLOW does not care about what k is.
*/
public Lookahead look(int k, RuleRefElement rr) {
if (DEBUG_ANALYZER) {
System.out.println("lookRuleRef(" + k + "," + rr + ")");
}
RuleSymbol rs = (RuleSymbol)grammar.getSymbol(rr.targetRule);
if (rs == null || !rs.defined) {
tool.error("no definition of rule " + rr.targetRule, grammar.getFilename(), rr.getLine(), rr.getColumn());
return new Lookahead();
}
RuleBlock rb = rs.getBlock();
RuleEndElement end = rb.endNode;
boolean saveEnd = end.noFOLLOW;
end.noFOLLOW = true;
// go off to the rule and get the lookahead (w/o FOLLOW)
Lookahead p = look(k, rr.targetRule);
if (DEBUG_ANALYZER) {
System.out.println("back from rule ref to " + rr.targetRule);
}
// restore state of end block
end.noFOLLOW = saveEnd;
// check for infinite recursion. If a cycle is returned: trouble!
if (p.cycle != null) {
tool.error("infinite recursion to rule " + p.cycle + " from rule " +
rr.enclosingRuleName, grammar.getFilename(), rr.getLine(), rr.getColumn());
}
// is the local FOLLOW required?
if (p.containsEpsilon()) {
if (DEBUG_ANALYZER) {
System.out.println("rule ref to " +
rr.targetRule + " has eps, depth: " + p.epsilonDepth);
}
// remove epsilon
p.resetEpsilon();
// fset.clear(EPSILON_TYPE);
// for each lookahead depth that saw epsilon
int[] depths = p.epsilonDepth.toArray();
p.epsilonDepth = null; // clear all epsilon stuff
for (int i = 0; i < depths.length; i++) {
int rk = k - (k - depths[i]);
Lookahead q = rr.next.look(rk); // see comments in Lookahead
p.combineWith(q);
}
// note: any of these look() computations for local follow can
// set EPSILON in the set again if the end of this rule is found.
}
return p;
}
public Lookahead look(int k, StringLiteralElement atom) {
if (DEBUG_ANALYZER) {
System.out.println("lookStringLiteral(" + k + "," + atom + ")");
}
if (lexicalAnalysis) {
// need more lookahead than string can provide?
if (k > atom.processedAtomText.length()) {
return atom.next.look(k - atom.processedAtomText.length());
}
// get char at lookahead depth k, from the processed literal text
return Lookahead.of(atom.processedAtomText.charAt(k - 1));
}
// Skip until analysis hits k==1
if (k > 1) {
return atom.next.look(k - 1);
}
Lookahead l = Lookahead.of(atom.getType());
if (atom.not) {
// Invert the lookahead set against the token vocabulary
int maxToken = grammar.tokenManager.maxTokenType();
l.fset.notInPlace(Token.MIN_USER_TYPE, maxToken);
}
return l;
}
/**The lookahead of a (...)=> block is the lookahead of
* what follows the block. By definition, the syntactic
* predicate block defies static analysis (you want to try it
* out at run-time). The LOOK of (a)=>A B is A for LL(1)
* ### is this even called?
*/
public Lookahead look(int k, SynPredBlock blk) {
if (DEBUG_ANALYZER) {
System.out.println("look=>(" + k + "," + blk + ")");
}
return blk.next.look(k);
}
public Lookahead look(int k, TokenRangeElement r) {
if (DEBUG_ANALYZER) {
System.out.println("lookTokenRange(" + k + "," + r + ")");
}
// Skip until analysis hits k==1
if (k > 1) {
return r.next.look(k - 1);
}
BitSet p = BitSet.of(r.begin);
for (int i = r.begin + 1; i <= r.end; i++) {
p.add(i);
}
return new Lookahead(p);
}
public Lookahead look(int k, TreeElement t) {
if (DEBUG_ANALYZER) {
System.out.println("look(" + k + "," + t.root + "[" + t.root.getType() + "])");
}
if (k > 1) {
return t.next.look(k - 1);
}
Lookahead l = null;
if (t.root instanceof WildcardElement) {
l = t.root.look(1); // compute FIRST set minus previous rows
}
else {
l = Lookahead.of(t.root.getType());
if (t.root.not) {
// Invert the lookahead set against the token vocabulary
int maxToken = grammar.tokenManager.maxTokenType();
l.fset.notInPlace(Token.MIN_USER_TYPE, maxToken);
}
}
return l;
}
public Lookahead look(int k, WildcardElement wc) {
if (DEBUG_ANALYZER) {
System.out.println("look(" + k + "," + wc + ")");
}
// Skip until analysis hits k==1
if (k > 1) {
return wc.next.look(k - 1);
}
BitSet b;
if (lexicalAnalysis) {
// Copy the character vocabulary
b = (BitSet)((LexerGrammar)grammar).charVocabulary.clone();
}
else {
b = new BitSet(1);
// Invert the lookahead set against the token vocabulary
int maxToken = grammar.tokenManager.maxTokenType();
b.notInPlace(Token.MIN_USER_TYPE, maxToken);
if (DEBUG_ANALYZER) {
System.out.println("look(" + k + "," + wc + ") after not: " + b);
}
}
// Remove prediction sets from competing alternatives
// removeCompetingPredictionSets(b, wc);
return new Lookahead(b);
}
/** The (...)* element is the combined lookahead of the alternatives and what can
* follow the loop.
*/
public Lookahead look(int k, ZeroOrMoreBlock blk) {
if (DEBUG_ANALYZER) {
System.out.println("look*(" + k + "," + blk + ")");
}
Lookahead p = look(k, (AlternativeBlock)blk);
Lookahead q = blk.next.look(k);
p.combineWith(q);
return p;
}
/**Compute the combined lookahead for all productions of a rule.
* If the lookahead returns with epsilon, at least one epsilon
* path exists (one that consumes no tokens). The noFOLLOW
* flag being set for this endruleblk, indicates that the
* a rule ref invoked this rule.
*
* Currently only look(RuleRef) calls this. There is no need
* for the code generator to call this.
*/
public Lookahead look(int k, String rule) {
if (DEBUG_ANALYZER) {
System.out.println("lookRuleName(" + k + "," + rule + ")");
}
RuleSymbol rs = (RuleSymbol)grammar.getSymbol(rule);
RuleBlock rb = rs.getBlock();
if (rb.lock[k]) {
if (DEBUG_ANALYZER) {
System.out.println("infinite recursion to rule " + rb.getRuleName());
}
return new Lookahead(rule);
}
// have we computed it before?
if (rb.cache[k] != null) {
if (DEBUG_ANALYZER) {
System.out.println("found depth " + k + " result in FIRST " + rule + " cache: " +
rb.cache[k].toString(",", charFormatter, grammar));
}
return (Lookahead)rb.cache[k].clone();
}
rb.lock[k] = true;
Lookahead p = look(k, rb);
rb.lock[k] = false;
// cache results
rb.cache[k] = (Lookahead)p.clone();
if (DEBUG_ANALYZER) {
System.out.println("saving depth " + k + " result in FIRST " + rule + " cache: " +
rb.cache[k].toString(",", charFormatter, grammar));
}
return p;
}
/** If the first k-1 sets are singleton sets, the appoximate
* lookahead analysis is equivalent to full lookahead analysis.
*/
public static boolean lookaheadEquivForApproxAndFullAnalysis(Lookahead[] bset, int k) {
// first k-1 sets degree 1?
for (int i = 1; i <= k - 1; i++) {
BitSet look = bset[i].fset;
if (look.degree() > 1) {
return false;
}
}
return true;
}
/** Remove the prediction sets from preceding alternatives
* and follow set, but *only* if this element is the first element
* of the alternative. The class members currenBlock and
* currentBlock.analysisAlt must be set correctly.
* @param b The prediction bitset to be modified
* @el The element of interest
*/
private void removeCompetingPredictionSets(BitSet b, AlternativeElement el) {
// Only do this if the element is the first element of the alt,
// because we are making an implicit assumption that k==1.
GrammarElement head = currentBlock.getAlternativeAt(currentBlock.analysisAlt).head;
// if element is #(. blah) then check to see if el is root
if (head instanceof TreeElement) {
if (((TreeElement)head).root != el) {
return;
}
}
else if (el != head) {
return;
}
for (int i = 0; i < currentBlock.analysisAlt; i++) {
AlternativeElement e = currentBlock.getAlternativeAt(i).head;
b.subtractInPlace(e.look(1).fset);
}
}
/** reset the analyzer so it looks like a new one */
private void reset() {
grammar = null;
DEBUG_ANALYZER = false;
currentBlock = null;
lexicalAnalysis = false;
}
/** Set the grammar for the analyzer */
public void setGrammar(Grammar g) {
if (grammar != null) {
reset();
}
grammar = g;
// Is this lexical?
lexicalAnalysis = (grammar instanceof LexerGrammar);
DEBUG_ANALYZER = grammar.analyzerDebug;
}
public boolean subruleCanBeInverted(AlternativeBlock blk, boolean forLexer) {
if (
blk instanceof ZeroOrMoreBlock ||
blk instanceof OneOrMoreBlock ||
blk instanceof SynPredBlock
) {
return false;
}
// Cannot invert an empty subrule
if (blk.alternatives.size() == 0) {
return false;
}
// The block must only contain alternatives with a single element,
// where each element is a char, token, char range, or token range.
for (int i = 0; i < blk.alternatives.size(); i++) {
Alternative alt = blk.getAlternativeAt(i);
// Cannot have anything interesting in the alternative ...
if (alt.synPred != null || alt.semPred != null || alt.exceptionSpec != null) {
return false;
}
// ... and there must be one simple element
AlternativeElement elt = alt.head;
if (
!(
elt instanceof CharLiteralElement ||
elt instanceof TokenRefElement ||
elt instanceof CharRangeElement ||
elt instanceof TokenRangeElement ||
(elt instanceof StringLiteralElement && !forLexer)
) ||
!(elt.next instanceof BlockEndElement) ||
elt.getAutoGenType() != GrammarElement.AUTO_GEN_NONE
) {
return false;
}
}
return true;
}
}