/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package gobblin.util;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import java.util.Properties;
import java.util.Set;
import org.apache.commons.configuration.ConfigurationConverter;
import org.apache.commons.configuration.ConfigurationException;
import org.apache.commons.configuration.PropertiesConfiguration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import com.google.common.base.Charsets;
import com.google.common.base.Preconditions;
import com.google.common.base.Predicate;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import com.typesafe.config.Config;
import com.typesafe.config.ConfigFactory;
import com.typesafe.config.ConfigParseOptions;
import com.typesafe.config.ConfigSyntax;
import gobblin.configuration.ConfigurationKeys;
import lombok.Getter;
import lombok.extern.slf4j.Slf4j;
/**
* Used to load pull files from the file system.
*/
@Slf4j
@Getter
public class PullFileLoader {
public static final String GLOBAL_PROPS_EXTENSION = ".properties";
public static final PathFilter GLOBAL_PROPS_PATH_FILTER = new ExtensionFilter(GLOBAL_PROPS_EXTENSION);
public static final Set<String> DEFAULT_JAVA_PROPS_PULL_FILE_EXTENSIONS = Sets.newHashSet("pull", "job");
public static final Set<String> DEFAULT_HOCON_PULL_FILE_EXTENSIONS = Sets.newHashSet("json", "conf");
private final Path rootDirectory;
private final FileSystem fs;
private final ExtensionFilter javaPropsPullFileFilter;
private final ExtensionFilter hoconPullFileFilter;
/**
* A {@link PathFilter} that accepts {@link Path}s based on a set of valid extensions.
*/
private static class ExtensionFilter implements PathFilter {
private final Collection<String> extensions;
public ExtensionFilter(String extension) {
this(Lists.newArrayList(extension));
}
public ExtensionFilter(Collection<String> extensions) {
this.extensions = Lists.newArrayList();
for (String ext : extensions) {
this.extensions.add(ext.startsWith(".") ? ext : "." + ext);
}
}
@Override
public boolean accept(final Path path) {
Predicate<String> predicate = new Predicate<String>() {
@Override
public boolean apply(String input) {
return path.getName().toLowerCase().endsWith(input);
}
};
return Iterables.any(this.extensions, predicate);
}
}
public PullFileLoader(Path rootDirectory, FileSystem fs, Collection<String> javaPropsPullFileExtensions,
Collection<String> hoconPullFileExtensions) {
Set<String> commonExtensions = Sets.intersection(Sets.newHashSet(javaPropsPullFileExtensions),
Sets.newHashSet(hoconPullFileExtensions));
Preconditions.checkArgument(commonExtensions.isEmpty(),
"Java props and HOCON pull file extensions intersect: " + Arrays.toString(commonExtensions.toArray()));
this.rootDirectory = rootDirectory;
this.fs = fs;
this.javaPropsPullFileFilter = new ExtensionFilter(javaPropsPullFileExtensions);
this.hoconPullFileFilter = new ExtensionFilter(hoconPullFileExtensions);
}
/**
* Load a single pull file.
* @param path The {@link Path} to the pull file to load, full path
* @param sysProps A {@link Config} used as fallback.
* @param loadGlobalProperties if true, will also load at most one *.properties file per directory from the
* {@link #rootDirectory} to the pull file {@link Path}.
* @return The loaded {@link Config}.
* @throws IOException
*/
public Config loadPullFile(Path path, Config sysProps, boolean loadGlobalProperties) throws IOException {
Config fallback = loadGlobalProperties ? loadAncestorGlobalConfigs(path, sysProps) : sysProps;
if (this.javaPropsPullFileFilter.accept(path)) {
return loadJavaPropsWithFallback(path, fallback).resolve();
} else if (this.hoconPullFileFilter.accept(path)) {
return loadHoconConfigAtPath(path).withFallback(fallback).resolve();
} else {
throw new IOException(String.format("Cannot load pull file %s due to unrecognized extension.", path));
}
}
/**
* Find and load all pull files under a base {@link Path} recursively.
* @param path base {@link Path} where pull files should be found recursively.
* @param sysProps A {@link Config} used as fallback.
* @param loadGlobalProperties if true, will also load at most one *.properties file per directory from the
* {@link #rootDirectory} to the pull file {@link Path} for each pull file.
* @return The loaded {@link Config}s.
*/
public Collection<Config> loadPullFilesRecursively(Path path, Config sysProps, boolean loadGlobalProperties) {
try {
Config fallback = sysProps;
if (loadGlobalProperties && PathUtils.isAncestor(this.rootDirectory, path.getParent())) {
fallback = loadAncestorGlobalConfigs(path.getParent(), fallback);
}
return loadPullFilesRecursivelyHelper(path, fallback, loadGlobalProperties);
} catch (IOException ioe) {
return Lists.newArrayList();
}
}
private Collection<Config> loadPullFilesRecursivelyHelper(Path path, Config fallback, boolean loadGlobalProperties) {
List<Config> pullFiles = Lists.newArrayList();
try {
if (loadGlobalProperties) {
fallback = findAndLoadGlobalConfigInDirectory(path, fallback);
}
FileStatus[] statuses = this.fs.listStatus(path);
if (statuses == null) {
log.error("Path does not exist: " + path);
return pullFiles;
}
for (FileStatus status : statuses) {
try {
if (status.isDirectory()) {
pullFiles.addAll(loadPullFilesRecursivelyHelper(status.getPath(), fallback, loadGlobalProperties));
} else if (this.javaPropsPullFileFilter.accept(status.getPath())) {
pullFiles.add(loadJavaPropsWithFallback(status.getPath(), fallback).resolve());
} else if (this.hoconPullFileFilter.accept(status.getPath())) {
pullFiles.add(loadHoconConfigAtPath(status.getPath()).withFallback(fallback).resolve());
}
} catch (IOException ioe) {
// Failed to load specific subpath, try with the other subpaths in this directory
log.error(String.format("Failed to load %s. Skipping.", status.getPath()));
}
}
return pullFiles;
} catch (IOException ioe) {
log.error("Could not load properties at path: " + path, ioe);
return Lists.newArrayList();
}
}
/**
* Load at most one *.properties files from path and each ancestor of path up to and including {@link #rootDirectory}.
* Higher directories will serve as fallback for lower directories, and sysProps will serve as fallback for all of them.
* @throws IOException
*/
private Config loadAncestorGlobalConfigs(Path path, Config sysProps) throws IOException {
Config config = sysProps;
if (!PathUtils.isAncestor(this.rootDirectory, path)) {
log.warn(String.format("Loaded path %s is not a descendant of root path %s. Cannot load global properties.",
path, this.rootDirectory));
} else {
List<Path> ancestorPaths = Lists.newArrayList();
while (PathUtils.isAncestor(this.rootDirectory, path)) {
ancestorPaths.add(path);
path = path.getParent();
}
List<Path> reversedAncestors = Lists.reverse(ancestorPaths);
for (Path ancestor : reversedAncestors) {
config = findAndLoadGlobalConfigInDirectory(ancestor, config);
}
}
return config;
}
/**
* Find at most one *.properties file in the input {@link Path} and load it using fallback as fallback.
* @return The {@link Config} in path with sysProps as fallback.
* @throws IOException
*/
private Config findAndLoadGlobalConfigInDirectory(Path path, Config fallback) throws IOException {
FileStatus[] files = this.fs.listStatus(path, GLOBAL_PROPS_PATH_FILTER);
if (files == null) {
log.warn("Could not list files at path " + path);
return ConfigFactory.empty();
}
if (files.length > 1) {
throw new IOException("Found more than one global properties file at path " + path);
}
return files.length == 1 ? loadJavaPropsWithFallback(files[0].getPath(), fallback) : fallback;
}
/**
* Load a {@link Properties} compatible path using fallback as fallback.
* @return The {@link Config} in path with fallback as fallback.
* @throws IOException
*/
private Config loadJavaPropsWithFallback(Path propertiesPath, Config fallback) throws IOException {
PropertiesConfiguration propertiesConfiguration = new PropertiesConfiguration();
try (InputStreamReader inputStreamReader = new InputStreamReader(this.fs.open(propertiesPath),
Charsets.UTF_8)) {
propertiesConfiguration.load(inputStreamReader);
Config configFromProps =
ConfigUtils.propertiesToConfig(ConfigurationConverter.getProperties(propertiesConfiguration));
return ConfigFactory.parseMap(ImmutableMap.of(ConfigurationKeys.JOB_CONFIG_FILE_PATH_KEY,
PathUtils.getPathWithoutSchemeAndAuthority(propertiesPath).toString()))
.withFallback(configFromProps)
.withFallback(fallback);
} catch (ConfigurationException ce) {
throw new IOException(ce);
}
}
private Config loadHoconConfigAtPath(Path path) throws IOException {
try (InputStream is = fs.open(path);
Reader reader = new InputStreamReader(is, Charsets.UTF_8)) {
return ConfigFactory.parseMap(ImmutableMap.of(ConfigurationKeys.JOB_CONFIG_FILE_PATH_KEY,
PathUtils.getPathWithoutSchemeAndAuthority(path).toString()))
.withFallback(ConfigFactory.parseReader(reader, ConfigParseOptions.defaults().setSyntax(ConfigSyntax.CONF)));
}
}
}