/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package gobblin.data.management.dataset;
import java.io.IOException;
import java.util.Collection;
import java.util.List;
import java.util.Properties;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import com.google.common.collect.Lists;
import gobblin.data.management.copy.CopyableFile;
import gobblin.data.management.copy.CopyableFileFilter;
import gobblin.dataset.DatasetsFinder;
import gobblin.util.reflection.GobblinConstructorUtils;
/**
* Utilities for datasets.
*/
public class DatasetUtils {
public static final String CONFIGURATION_KEY_PREFIX = "gobblin.dataset.";
public static final String DATASET_PROFILE_CLASS_KEY = CONFIGURATION_KEY_PREFIX + "profile.class";
private static final String PATH_FILTER_KEY = CONFIGURATION_KEY_PREFIX + "path.filter.class";
private static final String COPYABLE_FILE_FILTER_KEY = CONFIGURATION_KEY_PREFIX + "copyable.file.filter.class";
private static final PathFilter ACCEPT_ALL_PATH_FILTER = new PathFilter() {
@Override
public boolean accept(Path path) {
return true;
}
};
private static final CopyableFileFilter ACCEPT_ALL_COPYABLE_FILE_FILTER = new CopyableFileFilter() {
@Override
public Collection<CopyableFile> filter(FileSystem sourceFs, FileSystem targetFs,
Collection<CopyableFile> copyableFiles) {
return copyableFiles;
}
};
/**
* Instantiate a {@link DatasetsFinder}. The class of the {@link DatasetsFinder} is read from property
* {@link #DATASET_PROFILE_CLASS_KEY}.
*
* @param props Properties used for building {@link DatasetsFinder}.
* @param fs {@link FileSystem} where datasets are located.
* @return A new instance of {@link DatasetsFinder}.
* @throws IOException
*/
@SuppressWarnings("unchecked")
public static <T extends gobblin.dataset.Dataset> DatasetsFinder<T> instantiateDatasetFinder(Properties props,
FileSystem fs, String default_class, Object... additionalArgs)
throws IOException {
String className = default_class;
if (props.containsKey(DATASET_PROFILE_CLASS_KEY)) {
className = props.getProperty(DATASET_PROFILE_CLASS_KEY);
}
try {
Class<?> datasetFinderClass = Class.forName(className);
List<Object> args = Lists.newArrayList(fs, props);
if (additionalArgs != null) {
args.addAll(Lists.newArrayList(additionalArgs));
}
return (DatasetsFinder<T>) GobblinConstructorUtils.invokeLongestConstructor(datasetFinderClass, args.toArray());
} catch (ReflectiveOperationException exception) {
throw new IOException(exception);
}
}
/**
* Instantiate a {@link PathFilter} from the class name at key {@link #PATH_FILTER_KEY} in props passed. If key
* {@link #PATH_FILTER_KEY} is not set, a default {@link #ACCEPT_ALL_PATH_FILTER} is returned
*
* @param props that contain path filter classname at {@link #PATH_FILTER_KEY}
* @return a new instance of {@link PathFilter}. If not key is found, returns an {@link #ACCEPT_ALL_PATH_FILTER}
*/
public static PathFilter instantiatePathFilter(Properties props) {
if (!props.containsKey(PATH_FILTER_KEY)) {
return ACCEPT_ALL_PATH_FILTER;
}
try {
Class<?> pathFilterClass = Class.forName(props.getProperty(PATH_FILTER_KEY));
return (PathFilter) pathFilterClass.newInstance();
} catch (ClassNotFoundException exception) {
throw new RuntimeException(exception);
} catch (InstantiationException exception) {
throw new RuntimeException(exception);
} catch (IllegalAccessException exception) {
throw new RuntimeException(exception);
}
}
/**
* Instantiate a {@link CopyableFileFilter} from the class name at key {@link #COPYABLE_FILE_FILTER_KEY} in props
* passed. If key {@link #COPYABLE_FILE_FILTER_KEY} is not set, a default {@link #ACCEPT_ALL_COPYABLE_FILE_FILTER} is
* returned
*
* @param props that contain path filter classname at {@link #COPYABLE_FILE_FILTER_KEY}
* @return a new instance of {@link PathFilter}. If not key is found, returns an
* {@link #ACCEPT_ALL_COPYABLE_FILE_FILTER}
*/
public static CopyableFileFilter instantiateCopyableFileFilter(Properties props, Object... additionalArgs) {
if (!props.containsKey(COPYABLE_FILE_FILTER_KEY)) {
return ACCEPT_ALL_COPYABLE_FILE_FILTER;
}
try {
Class<?> copyableFileFilterClass = Class.forName(props.getProperty(COPYABLE_FILE_FILTER_KEY));
return (CopyableFileFilter) GobblinConstructorUtils
.invokeLongestConstructor(copyableFileFilterClass, additionalArgs);
} catch (ReflectiveOperationException exception) {
throw new RuntimeException(exception);
}
}
}