/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package io.github.infolis.util;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Runnable implementation of the matcher.find() method for handling catastropic
* backtracking. May be passed to a thread to be monitored and cancelled in case
* catastrophic backtracking occurs while searching for a regular expression.
*
* @author kba
* @author katarina.boland@gesis.org
* @author gojomo
*
*/
public class LimitedTimeMatcher implements Runnable {
/**
* CharSequence that noticed thread interrupts -- as might be necessary to
* recover from a loose regex on unexpected challenging input.
*
* @see {@linkplain http://stackoverflow.com/questions/910740/cancelling-a-long-running-regex-match}
* @author gojomo
*/
public class InterruptibleCharSequence implements CharSequence {
CharSequence inner;
public InterruptibleCharSequence(CharSequence inner) {
super();
this.inner = inner;
}
public char charAt(int index) {
if (Thread.interrupted()) { // clears flag if set
throw new RuntimeException(new InterruptedException());
}
// counter++;
return inner.charAt(index);
}
public int length() {
return inner.length();
}
public CharSequence subSequence(int start, int end) {
return new InterruptibleCharSequence(inner.subSequence(start, end));
}
@Override
public String toString() {
return inner.toString();
}
}
private static Logger log = LoggerFactory.getLogger(LimitedTimeMatcher.class);
private static final long SLEEP_TIME_MILLIS = 10;
private boolean matched;
private boolean finished;
private boolean timedOut;
private double timePassedMillis;
private final long maxTime;
private final long startTime;
private final Matcher matcher;
protected String threadName;
private int lastMatchEnd = 0;
/**
* @param pattern
* {@link Pattern} to run
* @param str
* {@link String} to match
* @param maxTimeMillis
* maximum number of milliseconds before stopping the matching
* thread
* @param threadName
* arbitrary name of this thread for mnemonics
*/
public LimitedTimeMatcher(final Pattern pattern, final String str, long maxTimeMillis,
final String threadName) {
this.startTime = System.nanoTime();
this.maxTime = maxTimeMillis;
this.matcher = pattern.matcher(new InterruptibleCharSequence(str.substring(end())));
this.threadName = threadName;
}
@Override
public void run() {
Thread matcherThread = new Thread(new Runnable() {
// NOTE this is the blocking call
public void run() {
if (timedOut()) {
log.error("Calling run() on a LimitedTimeMatcher that timed out before!");
return;
}
finished(false);
matched(getMatcher().find());
end(matched() ? matcher.end() : 0);
finished(true);
}
}, threadName);
setTimePassedMillis(0);
matcherThread.start();
while (true) {
setTimePassedMillis(getTimePassedMillis() + (System.nanoTime() - startTime) / 1_000_000.0);
try {
Thread.sleep(SLEEP_TIME_MILLIS);
} catch (InterruptedException e) {
log.error("Logging thread was interrupted.");
break;
}
if (finished()) {
log.trace("Thread '{}' took {} ms to finish", threadName, getTimePassedMillis());
break;
} else if (getTimePassedMillis() > maxTime) {
timedOut(true);
log.warn("Thread '{}' took {} ms, longer than the maximum of {} ms, shutting down to avoid pathological backtacking.",
threadName, getTimePassedMillis(), maxTime);
matcherThread.interrupt();
matched(false);
break;
}
log.trace("Thread '{}' running for {}ms", threadName, getTimePassedMillis());
}
}
private Matcher getMatcher() {
return matcher;
}
private synchronized void setTimePassedMillis(double timePassedMillis) {
this.timePassedMillis = timePassedMillis;
}
/**
* @return the number of ms the last invocation ran for.
*/
public synchronized double getTimePassedMillis() {
return timePassedMillis;
}
/**
* @return Whether the last run of the matcher timed out
*/
synchronized boolean timedOut() {
return timedOut;
}
private synchronized void timedOut(boolean timedOut) {
this.timedOut = timedOut;
}
/**
* @return Whether the last run of the matcher finished successfully
*/
public synchronized boolean finished() {
return finished;
}
private synchronized void finished(boolean finished) {
this.finished = finished;
}
/**
* @return whether the last run of the matcher matched
*/
public synchronized boolean matched() {
return matched;
}
private synchronized void matched(boolean matched) {
this.matched = matched;
}
/**
* @return the end of the last match
*/
public synchronized int end() {
return lastMatchEnd;
}
private synchronized void end(int lastPos) {
this.lastMatchEnd = lastPos;
}
/**
* Returns the input subsequence captured by the given group during the
* previous match operation.
*
* @see Matcher#group(int)
*/
public String group(int i) {
return getMatcher().group(i);
}
/**
* Returns the input subsequence matched by the previous match.
*
* @see Matcher#group()
*/
public String group() {
// TODO Auto-generated method stub
return getMatcher().group();
}
}