// // This software is now distributed according to // the Lesser Gnu Public License. Please see // http://www.gnu.org/copyleft/lesser.txt for // the details. // -- Happy Computing! // package com.stevesoft.ewe_pat; import ewe.util.Enumeration; import ewe.util.Hashtable; import ewe.util.Vector; /** This class is just like oneChar, but doesn't worry about case. */ class FastChar extends oneChar { FastChar(char c) { super(c); } public int matchInternal(int p,Pthings pt) { return (p < pt.src.length() && pt.src.charAt(p)==c) ? nextMatch(p+1,pt) : -1; } Pattern clone1(Hashtable h) { return new FastChar(c); } } /** This class is a hashtable keyed by Character * Objects. It is used to match things of the * form (?:a..|b..|c..|..) match with greater efficiency -- * by using a Hashtable that indexes into the group * of patterns. */ class Branch extends Pattern { Hashtable h = new Hashtable(); // We need to keep track of the order // of the keys -- if we don't then // recompiling the output of toString // may produce errors by re-ordering // ()'s and changing the id number of // the backreference associated with // a subpattern. Vector keys = new Vector(); Branch() {} Pattern clone1(Hashtable x) { Branch b = new Branch(); b.keys = (Vector)keys.getCopy(); x.put(this,b); x.put(b,b); for(int i=0;i<keys.size();i++) { Pattern p = (Pattern)h.get(keys.elementAt(i)); b.h.put(keys.elementAt(i),p.clone(x)); } return b; } // this function eliminates Branches with 0 or 1 elements. final Pattern reduce(boolean ignoreCase,boolean dontMinQ) { if(h.size()==1) { Enumeration e = h.keys(); Character c = (Character)e.nextElement(); Pattern oc; if(ignoreCase||dontMinQ) oc=new oneChar(c.charValue()); else oc=new FastChar(c.charValue()); oc.next = (Pattern)h.get(c); oc.add(next); return oc; } else if(h.size()==0) return null; return this; } public patInt maxChars() { Enumeration e = h.keys(); patInt count = new patInt(0); while(e.hasMoreElements()) { Object key = e.nextElement(); Pattern pa = (Pattern)h.get(key); patInt pi = pa.maxChars(); pi.inc(); count.maxeq(pi); } return count; } public patInt minChars() { Enumeration e = h.keys(); patInt count = new patInt(0); while(e.hasMoreElements()) { Object key = e.nextElement(); Pattern pa = (Pattern)h.get(key); patInt pi = pa.minChars(); pi.inc(); count.mineq(pi); } return count; } // adds a oneChar object to this Branch void addc(oneChar o,boolean ignoreCase,boolean dontMinQ) { Pattern n = o.next; if(n == null) n = new NullPattern(); else n = RegOpt.opt(n,ignoreCase,dontMinQ); n.setParent(this); set(new Character(o.c),n,ignoreCase,dontMinQ); if(ignoreCase) { if(o.c != o.altc) set(new Character(o.altc),n,ignoreCase,dontMinQ); if(o.c != o.altc2 && o.altc != o.altc2) set(new Character(o.altc2),n,ignoreCase,dontMinQ); } } void set(Character c,Pattern n,boolean igc,boolean dontMinQ) { Pattern p = (Pattern)h.get(c); next = null; // This letter is not yet used in the Branch object. // We need to add it. if(p==null) { if(n instanceof Or) { // A NullPattern is prepended to an Or // to prevent confusing this object. // For example: (boo|bug) => (b(?:oo|ug)) // during this process. However, we // want (b(?:oo|ell)|bug) NullPattern np = new NullPattern(); np.add(n); h.put(c,np); } else { h.put(c,n); } // Make sure we remember the order things were // added into the Branch object so that we can // properly convert it to a String. keys.addElement(c); } else if(p instanceof Or) { ((Or)p).addOr(n); } else if(p instanceof oneChar && n instanceof oneChar && ((oneChar)p).c != ((oneChar)n).c) { Branch b = new Branch(); b.addc((oneChar)p,igc,dontMinQ); b.addc((oneChar)n,igc,dontMinQ); h.put(c,b); b.setParent(this); } else if(p instanceof Branch && n instanceof oneChar) { ((Branch)p).addc((oneChar)n,igc,dontMinQ); n.setParent(p); } else { // Create an Or object to receive the variety // of branches in the pattern if the current letter // is matched. We do not attempt to make these // sub-branches into a Branch object yet. Or o = new Or(); o.setParent(this); // Remove NullPattern from p -- it's no longer needed. if(p instanceof NullPattern && p.parent == null && p.next != null) { o.addOr(p.next); } else { o.addOr(p); } o.addOr(n); Pattern optpat = RegOpt.opt(o,igc,dontMinQ); h.put(c,optpat); optpat.setParent(this); } } public String toString() { StringBuffer sb = new StringBuffer(); // should protect this... sb.append("(?:(?#branch)");// Hashtable)"); for(int i=0;i<keys.size();i++) { Character c = (Character)keys.elementAt(i); sb.append(c); sb.append(h.get(c)); if(i+1<keys.size()) sb.append("|"); } sb.append(")"); sb.append(nextString()); return sb.toString(); } public int matchInternal(int pos,Pthings pt) { if(pos >= pt.src.length()) return -1; Pattern n = (Pattern)h.get(new Character(pt.src.charAt(pos))); if(n == null) return -1; if(pt.cbits != null && pt.cbits.get(pos)) return -1; return n.matchInternal(pos+1,pt); } } /** This is just a place to put the optimizing function. It is never instantiated as an Object. It just sorts through the RegOpt looking for things it can change and make faster. */ public class RegOpt { static Pattern opt(Pattern p,boolean ignoreCase, boolean dontMinQ) { if(p == null) return p; if(p instanceof Bracket) { Bracket b = (Bracket)p; // FastBracket is the only special // optimized class to have its own // source file. p = FastBracket.process(b,ignoreCase); //if(!(p instanceof FastBracket) //p = Switch.process(b,ignoreCase); p.next = b.next; p.parent = b.parent; } else if(p instanceof oneChar && !ignoreCase && !dontMinQ) { oneChar o = (oneChar)p; p = new FastChar(o.c); p.next = o.next; p.parent = o.parent; } else if(p instanceof Or && ((Or)p).leftForm().equals("(?:") && ((Or)p).v.size()==1) { // Eliminate this Or Object. Or o = (Or)p; p = (Pattern)o.v.elementAt(0); p.setParent(null); p = RegOpt.opt(p,ignoreCase,dontMinQ); p.add(o.next); } else if(p instanceof Or) { Or o = (Or)p; o.pv = null; Vector v = o.v; o.v = new Vector(); Branch b = new Branch(); b.parent = o.parent; for(int i=0;i<v.size();i++) { Pattern pp = (Pattern)v.elementAt(i); // We want to have at least two oneChar's in // the Or Object to consider making a Branch. if(pp instanceof oneChar && (b.h.size()>=1 || (i+1<v.size() && v.elementAt(i+1) instanceof oneChar))) b.addc((oneChar)pp,ignoreCase,dontMinQ); else { if(b.keys.size() > 0) { Pattern p2 = (Pattern)b.reduce(ignoreCase,dontMinQ); if(p2 != null) { o.addOr(p2); b = new Branch(); b.parent = o.parent; } } o.addOr(opt(pp,ignoreCase,dontMinQ)); } } if(b.keys.size()>0) { Pattern p2=(Pattern)b.reduce(ignoreCase,dontMinQ); if(p2 != null) o.addOr(p2); } if(o.v.size()==1 && o.leftForm().equals("(?:")) { // Eliminate Or Object p = (Pattern)o.v.elementAt(0); p.setParent(null); p = RegOpt.opt(p,ignoreCase,dontMinQ); p.add(o.next); } } else if(p instanceof FastMulti) { PatternSub ps = (PatternSub)p; ps.sub = RegOpt.opt(ps.sub,ignoreCase,dontMinQ); } else if(p instanceof Multi && safe4fm( ((PatternSub)p).sub )) { Multi m = (Multi)p; FastMulti fm = null; try { fm = new FastMulti(m.a,m.b, opt(m.sub,ignoreCase,dontMinQ)); } catch(RegSyntax rs) {} fm.parent = m.parent; fm.matchFewest = m.matchFewest; fm.next = m.next; p = fm; } if(p.next != null) p.next = opt(p.next,ignoreCase,dontMinQ); return p; } final static boolean safe4fm(Pattern x) { while(x != null) { if(x instanceof Bracket) ; else if(x instanceof Range) ; else if(x instanceof oneChar) ; else if(x instanceof Any) ; else if(x instanceof Custom && ((Custom)x).v instanceof UniValidator) ; else if(x instanceof Or) { Or o = (Or)x; if(!o.leftForm().equals("(?:")) return false; patInt lo = o.countMinChars(); patInt hi = o.countMaxChars(); if(!lo.equals(hi)) return false; for(int i=0;i<o.v.size();i++) if(!safe4fm((Pattern)o.v.elementAt(i)) ) return false; } else return false; x = x.next; } return true; } /* public static void setParents(Regex r) { setParents(r.thePattern,null); } static void setParents(Pattern p,Pattern x) { if(p instanceof PatternSub && !(p instanceof FastMulti) && !(p instanceof DotMulti)) RegOpt.setParents( ((PatternSub)p).sub, p); else if(p instanceof Or && !(p instanceof Bracket)) { Or o = (Or)p; for(int i=0;i<o.v.size();i++) RegOpt.setParents((Pattern)o.v.elementAt(i),o); } else if(p instanceof Branch) { Branch b = (Branch)p; Enumeration e = b.h.keys(); while(e.hasMoreElements()) { Object o = e.nextElement(); RegOpt.setParents( (Pattern)b.h.get(o), b); } } if(p.next == null) p.parent = x; else { p.parent = null; RegOpt.setParents(p.next,x); } }*/ }