/**
* Copyright 2015 StreamSets Inc.
*
* Licensed under the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.streamsets.pipeline.lib.util;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class FieldRegexUtil {
//This will handle list Fields with wild card. For ex:/list[*]
//(We do not need to support /list[\\d+]) as * will cover for it (as array indices are just numbers)
//replace it with /list\[\d+\].
//We currently won't support selectively selecting some array indices with regex. For EX: /list[([0-9])]
//Move the first 10 indices.
private static final Pattern ARRAY_IDX_WILD_CARD_REGEX_PATTERN = Pattern.compile("\\[(\\(?)(\\*)(\\)?)\\]");
private static final String ARRAY_IDX_WILD_CARD_REPLACE_STRING = "\\\\[$1\\\\d+$3\\\\]";
//This will handle list element specified with constant index /list[0], /list[1], /list[(0)], /list[(1)]
//and replace it with just escaping the array index bracket, /list\[0\], /list\[1\], /list\[(0\], /list\[(1\]
//respectively.
private static final Pattern ARRAY_IDX_CONST_NUM_REGEX_PATTERN = Pattern.compile("\\[(\\(?)(\\d+)(\\)?)\\]");
private static final String ARRAY_IDX_CONST_NUM_REPLACE_STRING = "\\\\[$1$2$3\\\\]";
//This will handle map fields with wild card. For ex: /map/*/field, /map/(*)/field
//And replace it with /map/[^\/[]+/field and /map/([^\/[]+)/field respectively.
private static final Pattern MAP_WILDCARD_FIELD_PATTERN = Pattern.compile("(\\/\\(?)\\*(\\)?)");
private static final String MAP_WILD_CARD_REPLACEMENT = "$1[^\\\\/\\\\[]+$2";
private FieldRegexUtil() {}
public static boolean hasWildCards(String fieldPath) {
if(fieldPath.contains("[*]") || fieldPath.contains("/*") || fieldPath.contains("*") || fieldPath.contains("?")) {
return true;
}
return false;
}
public static List<String> getMatchingFieldPaths(String fieldPath, Set<String> fieldPaths) {
if(!hasWildCards(fieldPath)) {
return Arrays.asList(fieldPath);
}
//Any reference to array index brackets [ ] must be escaped in the regex
//Reference to * in map must be replaced by regex that matches a field name
//Reference to * in array index must be replaced by \d+
fieldPath = fieldPath
.replace("[*]", "[\\d+]")
.replace("[", "\\[")
.replace("]", "\\]")
.replaceAll("\\/\\*", "/([^\\\\/\\\\[]+)")
.replaceAll(Pattern.quote("*"), "\\\\w+")
.replaceAll(Pattern.quote("?"), "\\\\w");
Pattern pattern = Pattern.compile(fieldPath);
List<String> matchingFieldPaths = new ArrayList<>();
for(String existingFieldPath : fieldPaths) {
Matcher matcher = pattern.matcher(existingFieldPath);
if(matcher.matches()) {
matchingFieldPaths.add(existingFieldPath);
}
}
return matchingFieldPaths;
}
private static String patchUpSpecialCases(String fieldPath, Pattern pattern, String replaceMentString) {
Matcher matcher = pattern.matcher(fieldPath);
if (matcher.find()) {
return matcher.replaceAll(replaceMentString);
}
return fieldPath;
}
public static String patchUpFieldPathRegex(String fieldPath) {
String returnPath = fieldPath;
returnPath = patchUpSpecialCases(
returnPath,
ARRAY_IDX_WILD_CARD_REGEX_PATTERN,
ARRAY_IDX_WILD_CARD_REPLACE_STRING
);
returnPath = patchUpSpecialCases(
returnPath,
ARRAY_IDX_CONST_NUM_REGEX_PATTERN,
ARRAY_IDX_CONST_NUM_REPLACE_STRING
);
returnPath = patchUpSpecialCases(
returnPath,
MAP_WILDCARD_FIELD_PATTERN,
MAP_WILD_CARD_REPLACEMENT
);
return returnPath;
}
}