package com.sas.unravl.extractors; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.node.ArrayNode; import com.fasterxml.jackson.databind.node.JsonNodeFactory; import com.fasterxml.jackson.databind.node.ObjectNode; import com.sas.unravl.ApiCall; import com.sas.unravl.UnRAVL; import com.sas.unravl.UnRAVLException; import com.sas.unravl.annotations.UnRAVLExtractorPlugin; import com.sas.unravl.util.Json; import java.net.MalformedURLException; import java.net.URL; import java.util.Map; import java.util.regex.Pattern; import org.apache.log4j.Logger; /** * Extract links from a JSON body response and bind them to variables. * <p> * There are several forms for extracting links, allowing for various link * representations. * <p> * A JSON response may contain <a * href='http://tools.ietf.org/html/rfc4287#section-4.2.7'>atom:link</a> * representations. This is also compatible with <a * href='http://amundsen.com/media-types/collection/'>Collection+JSON</a> * format. * * <pre> * { ..., * "links" : [ * { "rel" : "self", * "method" : "GET", * "href" : "http://www.example.com/orders", * "type" : "application/json" * }, * { "rel" : "search", * "method" : "GET", * "href" : "/orders?id={order_id}", * "type" : "application/json" * } * ], * ... * } * </pre> * * We will refer to the above as the atom:link response. With Collection+JSON, * the "links" are embedded in the top level "collection" object. * <p> * Common atom:link members are rel, type, href. * <p> * The second format is the the <a * href='http://stateless.co/hal_specification.html'>Hypertext Application * Language</a> (HAL) representation by Mike Kelly which uses a "_links" member: * * <pre> * { ..., * "_links": { * "self": { "href": "/orders" }, * "search": { "href": "/orders?id={order_id}" } * } * } * </pre> * * We will refer to this as the HAL response. * <p> * This UnRAVL "bind" element can extract links from only one set of links in a * JSON object. By default, this extractor will bind variables from links in the * environment variable named "responseBody"; this is normally defined using the * {@link JsonExtractor "json" extractor}. See the use of "from" below if you * want to extract links from another environment variable or expression. * <p> * The syntax for the links extractor is * * <pre> * { "links" : { "var1" : selector1, * "var2" : selector2, * ... * "varn" : selectorn } } * </pre> * * "var1" through "varn" are environment variable names which will be bound to * the links as per their corresponding selectors. Each selector may be: * <dl> * <dt>a string</dt> * <dd>the string is the link relation (for atom:link or HAL responses)</dd> * <dt>a JSON object</dt> * <dd>the link which matches all the members of this object (as regular * expressions) will be selected. The values in the selector must be strings. * This allows you to match on the link relation, method, type, and/or uri * instead of just the link relation. (For HAL responses, only href maty be * used.)</dd> * </dl> * <p> * Instead of an object of name/spec pairs, the value of the links extractor may * be an array of strings, in which case each string is used as both the * variable name and the link relation (selector) name. Thus, * * <pre> * { "links" : [ "self", "update", "delete" ] } * </pre> * * is equivalent to * * <pre> * { "links" : { "self" : "self", "update" : "update", "delete" : "delete" ] } * </pre> * * Finally, a single value may be used: * * <pre> * { "link" : "self" } * </pre> * * which is equivalent to * * <pre> * { "link" : { "self" : "self" } } * </pre> * * Note that "link" may be used instead of "links"; this is clearer for * extracting a single link. * <p> * An extra option, <code>"unwrap" : true</code> may be used to unwrap the * Jackson <code>ObjectNode</code> values from the links into a * <code>java.util.Map</code>. For example: * * <pre> * { "link" : { "self" : "self" }, "unwrap" : true } * </pre> * * <h3>Extracting just the href value from links</h3> * <p> * If the extractor is used with the key "hrefs" or "href" instead of "links", * then the extracted value will be just the string value of the "href" member * of the corresponding link representation. not the entire link object. For * example, * </p> * * <pre> * { "links" : [ "self", "update", "delete" ] } * </pre> * <p> * will bind "self", "update", and "delete" to the corresponding <strong>href * string values</strong> of the "self", "update, and "delete" links links. * </p> * <h2>Extracting from JSON other than responseBody</h2> * <p> * The links/href extractors also has an additional option * * <pre> * "from" : "var-or-path" * </pre> * <p> * If "from" is present, its value should be the name of an UnRAVL variable that * contains the links collection. By default, this is the JSON response object. * <p> * An extra option, <code>"prefix" : "<em>prefix</em>"</code> may be used to to * specify a prefix string to be prepended to the href values. This may be a URL * such as "http://www.example.com/myApi". The prefix is applied to the href if * and only if the href value is not a full URL. * </p> * <p> * If the variable <code>unravl.hrefs.prefix </code>is defined, its value will * be used if no "prefix" is defined * </p> * <p> * Examples: * </p> * * <pre> * { * "GET" : "{site}/apiPath", * "bind" : { "href" : "self", "prefix" : "https://www.example.com/myApi" } * } * * { "env" : { "site" : "https://www.example.com/myApi" }, * "GET" : "{site}/apiPath", * "bind" : { "href" : "self", "prefix" : "{site}" } * } * * { "env" : { "unravl.hrefs.prefix " : "https://www.example.com/myApi" }, * "GET" : "{site}/apiPath", * "bind" : { "href" : "self" } * } * </pre> * * <p> * All three of these forms will convert a href from the <code>"self"</code> * link such as <code>"/myResources/ab54d8bc4f"</code> to * <code>"https://www.example.com/myApi/myResources/ab54d8bc4f"</code>. * * <h2>Example: Extracting multiple links</h2> * * <p> * Consider two different JSON responses, the atom:link response and the HAL * response, as described above. The UnRAVL "bind" element * * <pre> * { "links" : { "selfLink" : "self", * "searchLink" : "search" } * } * </pre> * * will select links based on their "rel" names. This will bind the variable * "selfLink" to the object * * <pre> * { "rel" : "self", * "method" : "GET", * "href" : "http://www.example.com/orders", * "type" : "application/json" * } * </pre> * * when used with the the atom:link response, or to the object * * <pre> * { "href": "/orders" } * </pre> * <p> * when used with the HAL response. The variable named "searchLink" will be * bound to the link with the link relation "search". * </p> * * <pre> * { "href" : [ "self", "search" ] } * </pre> * * will bind "self" to the string "http://www.example.com/orders" and bind * "search" to the string "http://www.example.com/orders?id={order_id}" (because * we use the "href" form instead of the "links" form.) * * <h2>Example: Extracting from other sources</h2> * * By default, this extractor works on the variable named "responseBody" which * is bound when using the "json" extractor. However, you can use the optional * "from" member to name another variable that is bound, or you can use a Groovy * expression that returns a JsonNode. This is useful if you want to extract the * links of nested objects. It is required for Collection+JSON nodes to select * from the "collection" element inside the response, for example. * * <pre> * "bind" : [ * { "href" : { "coll" : "self" }, * "from" : "responseBody.collection" } }, * * { "href" : { "self0" : "self", * "delete0" : "delete" }, * "from" : "responseBody.collection.items[0]" } }, * * { "href" : { "selfLast" : "self", * "deleteLast" : "delete" }, * "from" : "responseBody.collection.items[responseBody.collection.items.size()-1]" } } * ] * </pre> * * this will extract the href from the link to the collection as well as the the * href values from the "self" and "delete" links in the first and last element * of the nested items array, respectively. Environment variable substitution is * performed on the string before evaluating it as a Groovy expression. * * <h2>Example: Complex matching</h2> * * By default, if the selector is a string, this extractor only matches the link * relation. This is also the only option for HAL. For atom:link, the "links" * array may contain multiple links with the same link relation. Thus, you may * specify multiple matching criteria, using regular expression matches for one * or more members of the link. For example, to match a link that has a "rel" * value of "update" and a "method" value of "PUT" and a "href" label that * contains "models", use * * <pre> * "bind" : { "link" : { "updateLink" : { "rel" : "update", * "method" : "PUT", * "href" : ".*models.*" * } * } * } * </pre> * <p> * It is easy to see that * * <pre> * "bind" : { "link" : { "updateLink" : "update" } } * </pre> * * is shorthand for * * <pre> * "bind" : { "link" : { "updateLink" : { "rel" : "update" } } } * </pre> * <p> * (Note that this element may be specified with either "links" or "link", * depending on your preference - use "links" when binding more than one link, * and "link" when binding only one.) * * @author David.Biesack@sas.com */ @UnRAVLExtractorPlugin({ "link", "links", "href", "hrefs" }) public class LinksExtractor extends BaseUnRAVLExtractor { private static final String PREFIX_KEY = "prefix"; private static final String UNRAVL_HREF_PREFIX = "unravl.hrefs.prefix"; private static final String REL_KEY = "rel"; private static final String COLLECTION_KEY = "collection"; private static final String HREF_KEY = "href"; private static final String LINKS_KEY = "links"; private static final String HAL_LINKS_KEY = "_links"; private static final Logger logger = Logger.getLogger(LinksExtractor.class); @Override public void extract(UnRAVL current, ObjectNode extractor, ApiCall call) throws UnRAVLException { super.extract(current, extractor, call); try { boolean href = isHref(extractor); JsonNode fromNode = extractor.get("from"); ObjectNode from = jsonObjectSource(extractor, fromNode, call, call.getScript()); JsonNode spec = Json.firstFieldValue(extractor); ObjectNode effectiveSpec = effectiveLinksSpec(extractor, spec); extractLinks(extractor, from, effectiveSpec, href, call); } catch (ClassCastException e) { throw new UnRAVLException(String.format( "%s extractor invalid or corresponding links are not well formed", key(extractor))); } } private boolean isHref(ObjectNode extractor) { String key = key(extractor); return key.equals("href") || key.equals("hrefs"); } private void extractLinks(ObjectNode root, ObjectNode from, ObjectNode effectiveSpec, boolean href, ApiCall call) throws UnRAVLException { boolean unwrap = unwrapOption(root); ArrayNode linksArray = null; ObjectNode linksObject = null; if (from.get(HAL_LINKS_KEY) != null) { linksObject = Json.object(from.get(HAL_LINKS_KEY)); logger.info("Extracting HAL style links"); } else if (from.get(LINKS_KEY) != null) { linksArray = Json.array(from.get(LINKS_KEY)); logger.info("Extracting Atom style links"); } else { // Collection+JSON JsonNode coll = from.get(COLLECTION_KEY); if (coll != null && coll.isObject()) { linksArray = Json.array(coll.get(LINKS_KEY)); logger.info( "Extracting Collection+JSON style links from \"collection\" member"); } else { String msg = String.format("Cannot infer links in %s extractor", key(root)); logger.error(msg); throw new UnRAVLException(msg); } } for (Map.Entry<String, JsonNode> e : Json.fields(effectiveSpec)) { String name = e.getKey(); JsonNode spec = e.getValue(); JsonNode link = matchLink(name, spec, linksArray, linksObject, root); Object value = link; if (href) { value = link.get(HREF_KEY).textValue(); value = applyPrefix(root, (String) value, name); } else if (unwrap) value = Json.unwrap(link); logger.info(String.format("Bound link name %s to %s", name, value)); call.getScript().bind(name, value); } } private String applyPrefix(ObjectNode root, String value, String name) throws UnRAVLException { if (isUrl(value)) return value; JsonNode prefixSpec = root.get(PREFIX_KEY); String prefix = null; String why = ""; if (prefixSpec == null) { Object implicitPrefix = getScript().binding(UNRAVL_HREF_PREFIX); if (implicitPrefix == null) { return value; } else if (implicitPrefix instanceof String) { prefix = (String) implicitPrefix; why = "(implicit " + UNRAVL_HREF_PREFIX + ")"; } else { throw new UnRAVLException( "href prefix value must be a string, found " + implicitPrefix.getClass().getName() + ", value = " + prefixSpec); } } else { if (!prefixSpec.isTextual()) { throw new UnRAVLException( "href prefix value must be a string, found " + prefixSpec.getClass().getName() + ", value = " + prefixSpec); } else { prefix = getScript().expand(prefixSpec.textValue()); why = "(explicit \"prefix\")"; } } logger.info(String.format("Prepended '%s' to the '%s' link href '%s' %s.", prefix, name, value, why)); return prefix + value; } private boolean isUrl(String value) { try { new URL(value); return true; } catch (MalformedURLException e) { return false; } } private JsonNode matchLink(String name, JsonNode spec, ArrayNode linksArray, ObjectNode linksObject, ObjectNode root) throws UnRAVLException { if (linksArray != null) {// Collection+JSON mode for (JsonNode link : Json.toArray(linksArray)) { if (matches(root, name, spec, link)) return link; } } else { // HAL mode JsonNode link = linksObject.get(name); return link; } throw new UnRAVLException(String.format( "No such link matching %s found in %s %s", spec, key(root), linksArray == null ? linksObject : linksArray)); } private boolean matches(ObjectNode root, String name, JsonNode spec, JsonNode link) throws UnRAVLException { if (spec.isTextual()) { return link.get(REL_KEY).textValue().equals(spec.textValue()); } else { // ensure all items in spec match, but using regular // expression matching, for (Map.Entry<String, JsonNode> e : Json.fields(spec)) { String key = e.getKey(); JsonNode val = e.getValue(); JsonNode actual = link.get(key); if (val.equals(actual)) continue; // skip pattern match if exact match if (!matches(root, actual, val)) return false; } } return true; } private boolean matches(ObjectNode root, JsonNode actual, JsonNode expected) throws UnRAVLException { if (actual.isTextual() && expected.isTextual()) { Pattern p = Pattern.compile(expected.textValue()); return p.matcher(actual.textValue()).matches(); } throw new UnRAVLException(String.format( "%s extractor selector requires string values", key(root))); } private ObjectNode effectiveLinksSpec(ObjectNode root, JsonNode spec) throws UnRAVLException { ObjectNode effectiveSpec = new ObjectNode(JsonNodeFactory.instance); if (spec.isTextual()) { // convert "self" into { "self" : "self" } spec = Json.wrapInArray(spec); } if (spec.isArray()) { // convert { "links" : ["self","delete"] } into { // "links" : { "self" : "self" } } for (JsonNode e : Json.toArray(spec)) { if (e.isTextual()) { String name = e.textValue(); effectiveSpec.set(name, e); } else throw new UnRAVLException(String.format( "Array elements must be strings in %s extractor: %s", key(root), e)); } } else if (spec.isObject()) { effectiveSpec = (ObjectNode) spec; } else { throw new UnRAVLException(String.format( "Invalid value in %s extractor: %s", key(root), spec)); } return effectiveSpec; } @SuppressWarnings("rawtypes") private ObjectNode jsonObjectSource(ObjectNode root, JsonNode fromNode, ApiCall call, UnRAVL script) throws UnRAVLException { ObjectNode from = null; if (fromNode == null) { if (!script.bound("responseBody")) { throw new UnRAVLException(String.format( "resonseBody is not bound in %s extractor", key(root))); } Object f = call.getScript().binding("responseBody"); if (f == null) { throw new UnRAVLException(String.format( "No responseBody binding in %s extractor.", key(root))); } else if (f instanceof Map) { from = Json.wrap((Map) f); } else if (f instanceof ObjectNode) { from = (ObjectNode) f; } else { throw new UnRAVLException(String.format( "responseBody is not bound to a JSON object in %s extractor: %s", key(root), f)); } } else { if (fromNode.isTextual()) { String where = fromNode.textValue(); if (call.getScript().bound(where)) { Object val = call.getScript().binding(where); if (val instanceof ObjectNode) { from = (ObjectNode) val; } else if (val instanceof Map) { from = Json.wrap((Map) val); } else { throw new UnRAVLException(String.format( "Value of \"from\": \"%s\" in %s extractor is not a JSON object:\n%s", fromNode.textValue(), key(root), val)); } } else { Object o = call.getScript().eval(where); if (o instanceof ObjectNode) { from = (ObjectNode) o; } else throw new UnRAVLException(String.format( "expression %s did not yield a JSON object in %s extractor", where, key(root))); } } } return from; } }