/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package gobblin.data.management.copy.hive;
import java.io.IOException;
import java.io.Serializable;
import java.util.List;
import java.util.Set;
import java.util.regex.Pattern;
import com.google.common.base.Optional;
import com.google.common.base.Splitter;
import com.google.common.base.Strings;
import com.google.common.collect.HashMultimap;
import com.google.common.collect.SetMultimap;
import com.google.common.collect.Sets;
import com.typesafe.config.Config;
/**
* A whitelist / blacklist implementation for filtering Hive tables. Parses input whitelist and blacklist of the form
* [dbpattern.tablepattern1|tablepattern2|...],... and filters accordingly. The db and table patterns accept "*"
* characters. Each of whitelist and blacklist is a list of patterns. For a table to be accepted, it must fail the
* blacklist filter and pass the whitelist filter. Empty whitelist or blacklist are noops.
*
* <p>
* Example whitelist and blacklist patterns:
* * db1.table1 -> only db1.table1 passes.
* * db1 -> any table under db1 passes.
* * db1.table* -> any table under db1 whose name satisfies the pattern table* passes.
* * db* -> all tables from all databases whose names satisfy the pattern db* pass.
* * db*.table* -> db and table must satisfy the patterns db* and table* respectively
* * db1.table1,db2.table2 -> combine expressions for different databases with comma.
* * db1.table1|table2 -> combine expressions for same database with "|".
* </p>
*/
public class WhitelistBlacklist implements Serializable {
public static final String WHITELIST = "whitelist";
public static final String BLACKLIST = "blacklist";
private static final Pattern ALL_TABLES = Pattern.compile(".*");
private final SetMultimap<Pattern, Pattern> whitelistMultimap;
private final SetMultimap<Pattern, Pattern> blacklistMultimap;
public WhitelistBlacklist(Config config) throws IOException {
this(config.hasPath(WHITELIST) ? config.getString(WHITELIST).toLowerCase() : "",
config.hasPath(BLACKLIST) ? config.getString(BLACKLIST).toLowerCase() : "");
}
public WhitelistBlacklist(String whitelist, String blacklist) throws IOException {
this.whitelistMultimap = HashMultimap.create();
this.blacklistMultimap = HashMultimap.create();
populateMultimap(this.whitelistMultimap, whitelist.toLowerCase());
populateMultimap(this.blacklistMultimap, blacklist.toLowerCase());
}
/**
* @return Whether database db might contain tables accepted by this {@link WhitelistBlacklist}.
*/
public boolean acceptDb(String db) {
return accept(db, Optional.<String> absent());
}
/**
* @return Whether the input table is accepted by this {@link WhitelistBlacklist}.
*/
public boolean acceptTable(String db, String table) {
return accept(db.toLowerCase(), table==null? Optional.<String> absent(): Optional.fromNullable(table.toLowerCase()));
}
private boolean accept(String db, Optional<String> table) {
if (!this.blacklistMultimap.isEmpty() && multimapContains(this.blacklistMultimap, db, table, true)) {
return false;
}
return this.whitelistMultimap.isEmpty() || multimapContains(this.whitelistMultimap, db, table, false);
}
private static void populateMultimap(SetMultimap<Pattern, Pattern> multimap, String list) throws IOException {
Splitter tokenSplitter = Splitter.on(",").omitEmptyStrings().trimResults();
Splitter partSplitter = Splitter.on(".").omitEmptyStrings().trimResults();
Splitter tableSplitter = Splitter.on("|").omitEmptyStrings().trimResults();
for (String token : tokenSplitter.split(list)) {
if (!Strings.isNullOrEmpty(token)) {
List<String> parts = partSplitter.splitToList(token);
if (parts.size() > 2) {
throw new IOException("Invalid token " + token);
}
Pattern databasePattern = Pattern.compile(parts.get(0).replace("*", ".*"));
Set<Pattern> tablePatterns = Sets.newHashSet();
if (parts.size() == 2) {
String tables = parts.get(1);
for (String table : tableSplitter.split(tables)) {
if (table.equals("*")) {
// special case, must use ALL_TABLES due to use of set.contains(ALL_TABLES) in multimapContains
tablePatterns.add(ALL_TABLES);
} else {
tablePatterns.add(Pattern.compile(table.replace("*", ".*")));
}
}
} else {
tablePatterns.add(ALL_TABLES);
}
multimap.putAll(databasePattern, tablePatterns);
}
}
}
private static boolean multimapContains(SetMultimap<Pattern, Pattern> multimap, String database,
Optional<String> table, boolean blacklist) {
for (Pattern dbPattern : multimap.keySet()) {
if (dbPattern.matcher(database).matches()) {
if (!table.isPresent()) {
// if we are only matching database
return !blacklist || multimap.get(dbPattern).contains(ALL_TABLES);
}
for (Pattern tablePattern : multimap.get(dbPattern)) {
if (tablePattern.matcher(table.get()).matches()) {
return true;
}
}
}
}
return false;
}
}