/** * Copyright 2015 StreamSets Inc. * * Licensed under the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.streamsets.datacollector.util; import java.util.regex.Matcher; import java.util.regex.Pattern; public class EscapeUtil { public static final Pattern pattern = Pattern.compile("\\W+?", Pattern.CASE_INSENSITIVE); private EscapeUtil() {} public static String singleQuoteEscape(String path) { // Skip escaping if no non-word chars are found // This is likely slower than just escaping it anyway // but currently left as-is for compatibility if (path == null || !pattern.matcher(path).find()) { return path; } return escapeQuotesAndBackSlash(path, true); } public static String singleQuoteUnescape(String path) { if(path != null) { Matcher matcher = pattern.matcher(path); if(matcher.find() && path.length() > 2) { path = unescapeQuotesAndBackSlash(path, true); return path.substring(1, path.length() - 1); } } return path; } public static String doubleQuoteEscape(String path) { if(path == null || !pattern.matcher(path).find()) { return path; } return escapeQuotesAndBackSlash(path, false); } public static String doubleQuoteUnescape(String path) { if(path != null) { Matcher matcher = pattern.matcher(path); if(matcher.find() && path.length() > 2) { path = unescapeQuotesAndBackSlash(path, false); return path.substring(1, path.length() - 1); } } return path; } /** * This method is used during deserializer and sqpath (Single quote escaped path) is passed to determine the last field name. */ public static String getLastFieldNameFromPath(String path) { String [] pathSplit = (path != null) ? path.split("/") : null; if(pathSplit != null && pathSplit.length > 0) { String lastFieldName = pathSplit[pathSplit.length - 1]; //handle special case field name containing slash eg. /'foo/bar' if(lastFieldName.contains("'") && !(lastFieldName.charAt(0) == '\'' && lastFieldName.charAt(lastFieldName.length() - 1) == '\'')) { //If path contains slash inside name, split it by "/'" pathSplit = path.split("/'"); if(pathSplit.length > 0) { lastFieldName = "'" + pathSplit[pathSplit.length - 1]; } } return EscapeUtil.singleQuoteUnescape(lastFieldName); } return path; } /** * This method escapes backslash, double quotes and single quotes (keeping replacement of ' to \\\\\' * as is so as to maintain backward compatibility any serialization/deserialization) */ private static String escapeQuotesAndBackSlash(String path, boolean isSingleQuoteEscape) { String quoteChar = isSingleQuoteEscape? "'" : "\""; StringBuilder sb = new StringBuilder(path.length() * 2).append(quoteChar); char[] chars = path.toCharArray(); for (char c : chars) { if (c == '\\') { sb.append("\\\\"); } else if (c == '"') { sb.append(isSingleQuoteEscape? "\\\"" : "\\\\\""); } else if (c == '\'') { sb.append(isSingleQuoteEscape? "\\\\\'" : "\\\'"); } else { sb.append(c); } } return sb.append(quoteChar).toString(); } /** * This method un escapes backslash, double quotes and single quotes (keeping replacement of \\\\\' to ' * as is so as to maintain backward compatibility any serialization/deserialization) */ private static String unescapeQuotesAndBackSlash(String path, boolean isSingleQuoteUnescape) { path = (isSingleQuoteUnescape)? path.replace("\\\"", "\"").replace("\\\\\'", "'") : path.replace("\\\\\"", "\"").replace("\\\'", "'"); return path.replace("\\\\", "\\"); } /** * This method un escapes backslash and un escapes extra escapes before double quotes and single quotes * (appended by {@link #escapeQuotesAndBackSlash(String, boolean)}). * This method should be used internally and not during for any serialization/deserialization */ public static String standardizePathForParse(String path, boolean isSingleQuoteEscape) { path = isSingleQuoteEscape? path.replace("\\\\\'", "\\'") : path.replace("\\\\\"", "\\\""); return path.replace("\\\\", "\\"); } }