// Copyright (c) 2014, SAS Institute Inc., Cary, NC, USA, All Rights Reserved
package com.sas.unravl.extractors;
import com.fasterxml.jackson.databind.node.ArrayNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.sas.unravl.ApiCall;
import com.sas.unravl.UnRAVL;
import com.sas.unravl.UnRAVLException;
import com.sas.unravl.annotations.UnRAVLExtractorPlugin;
import com.sas.unravl.assertions.UnRAVLAssertionException;
import com.sas.unravl.util.Json;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.log4j.Logger;
/**
* Matches text against grouping regular expressions and binds the substrings
* into constituent variable bindings in the current UnRAVL script environment.
* The extractor form is
*
* <pre>
* { "pattern" : [ string, pattern, var0, ... varn ] }
* </pre>
*
* such as
*
* <pre>
* { "pattern" : [ "{responseType}", "^(.*)\\s*;\\s*charset=(.*)$", "mediaType", "charset" ] }
* </pre>
*
* <p>
* This will match the value of the environment expansion of "{responseType}" to
* the given regular expression pattern <code>^(.*)\s*;\s*charset=(.*)$</code>,
* and bind the media type and the encoding character set substrings to the
* variables <code>mediaType</code> and <code>charset</code>. (Note that a per
* the JSON grammar, backslash (<code>\</code>) characters in a JSON string must
* be escaped, so the regular expression notation <code>\s</code> is coded in
* the JSON string as <code>\\s</code>.)
* </p>
*
* <p>
* For example, if the <code>responseType</code> binding in the environment was
*
* <pre>
* application/json; charset=UTF-8
* </pre>
*
* this pattern specification will bind the variables:<br>
* mediaType to <code>"application/json"</code>, and <br>
* charset to <code>"UTF-8"</code>.
* <p>
* If the regular expression does not match, this extractor will throw an
* {@link UnRAVLAssertionException}
*
* <p>
* This extractor will unbind all the variables before testing the regular
* expression, so that bindings left from other tests won't persist and leave a
* false positive.
*
* @author David.Biesack@sas.com
*
*/
@UnRAVLExtractorPlugin("pattern")
public class PatternExtractor extends BaseUnRAVLExtractor {
static Logger logger = Logger.getLogger(UnRAVL.class);
@Override
public void extract(UnRAVL current, ObjectNode extractor, ApiCall call)
throws UnRAVLException {
super.extract(current, extractor, call);
ArrayNode a = Json.array(Json.firstFieldValue(extractor));
if (a.size() < 3)
throw new UnRAVLException(
"pattern extractor "
+ a
+ " must have at least three strings: [var-name pattern fieldName]");
for (int i = 1; i < a.size(); i++) {
if (!a.get(i).isTextual())
throw new UnRAVLException("pattern extractor " + a
+ " must be all strings");
if (i > 1) {
getCall().unbind(a.get(i).textValue());
}
}
String value = current.expand(a.get(0).textValue());
if (value == null)
throw new UnRAVLException("variable " + value
+ " in pattern extractor " + extractor + " is not bound");
String text = value.toString();
String regex = current.expand(a.get(1).textValue());
Pattern pattern = Pattern.compile(regex);
Matcher matcher = pattern.matcher(text);
if (matcher.matches()) {
for (int i = 1, v = 2; i <= matcher.groupCount() && v < a.size(); i++, v++) {
value = a.get(v).textValue();
String res = matcher.group(i);
current.bind(value, res);
}
} else
throw new UnRAVLAssertionException("pattern " + regex
+ " does not match " + value + " value '" + value + "'");
}
}