/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package gobblin.aws;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.URL;
import java.util.Enumeration;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
import org.apache.commons.configuration.ConfigurationException;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.base.Optional;
import com.google.common.collect.Maps;
import com.google.common.eventbus.EventBus;
import com.typesafe.config.Config;
import gobblin.annotation.Alpha;
import gobblin.cluster.GobblinClusterConfigurationKeys;
import gobblin.cluster.GobblinHelixJobScheduler;
import gobblin.cluster.JobConfigurationManager;
import gobblin.cluster.event.NewJobConfigArrivalEvent;
import gobblin.configuration.ConfigurationKeys;
import gobblin.util.ExecutorsUtils;
import gobblin.util.SchedulerUtils;
import static gobblin.aws.GobblinAWSUtils.appendSlash;
/**
* Class for managing AWS Gobblin job configurations.
*
* <p>
* This class reads all the job configuration at startup from S3
* and schedules a refresh to poll from S3 for any new job configurations.
* The jobs read are scheduled by the {@link GobblinHelixJobScheduler} by posting a
* {@link NewJobConfigArrivalEvent} for each job configuration file.
* </p>
*
* @author Abhishek Tiwari
*/
@Alpha
public class AWSJobConfigurationManager extends JobConfigurationManager {
private static final Logger LOGGER = LoggerFactory.getLogger(AWSJobConfigurationManager.class);
private static final long DEFAULT_JOB_CONF_REFRESH_INTERVAL = 60;
private Optional<String> jobConfS3Uri;
private Map<String, Properties> jobConfFiles;
private final long refreshIntervalInSeconds;
private final ScheduledExecutorService fetchJobConfExecutor;
public AWSJobConfigurationManager(EventBus eventBus, Config config) {
super(eventBus, config);
this.jobConfFiles = Maps.newHashMap();
if (config.hasPath(GobblinAWSConfigurationKeys.JOB_CONF_REFRESH_INTERVAL)) {
this.refreshIntervalInSeconds = config.getDuration(GobblinAWSConfigurationKeys.JOB_CONF_REFRESH_INTERVAL,
TimeUnit.SECONDS);
} else {
this.refreshIntervalInSeconds = DEFAULT_JOB_CONF_REFRESH_INTERVAL;
}
this.fetchJobConfExecutor = Executors.newSingleThreadScheduledExecutor(
ExecutorsUtils.newThreadFactory(Optional.of(LOGGER), Optional.of("FetchJobConfExecutor")));
}
private void fetchJobConfSettings() {
this.jobConfDirPath =
config.hasPath(GobblinClusterConfigurationKeys.JOB_CONF_PATH_KEY) ? Optional
.of(config.getString(GobblinClusterConfigurationKeys.JOB_CONF_PATH_KEY)) : Optional.<String>absent();
this.jobConfS3Uri =
config.hasPath(GobblinAWSConfigurationKeys.JOB_CONF_S3_URI_KEY) ? Optional
.of(config.getString(GobblinAWSConfigurationKeys.JOB_CONF_S3_URI_KEY)) : Optional.<String>absent();
}
@Override
protected void startUp() throws Exception {
LOGGER.info("Starting the " + AWSJobConfigurationManager.class.getSimpleName());
LOGGER.info(String.format("Scheduling the job configuration refresh task with an interval of %d second(s)",
this.refreshIntervalInSeconds));
// Schedule the job config fetch task
this.fetchJobConfExecutor.scheduleAtFixedRate(new Runnable() {
@Override
public void run() {
try {
fetchJobConf();
} catch (IOException | ConfigurationException e) {
LOGGER.error("Failed to fetch job configurations", e);
throw new RuntimeException("Failed to fetch job configurations", e);
}
}
}, 0, this.refreshIntervalInSeconds, TimeUnit.SECONDS);
}
private void fetchJobConf()
throws IOException, ConfigurationException {
// Refresh job config pull details from config
fetchJobConfSettings();
// TODO: Eventually when config store supports job files as well
// .. we can replace this logic with config store
if (this.jobConfS3Uri.isPresent() && this.jobConfDirPath.isPresent()) {
// Download the zip file
final String zipFile = appendSlash(this.jobConfDirPath.get()) +
StringUtils.substringAfterLast(this.jobConfS3Uri.get(), File.separator);
LOGGER.debug("Downloading to zip: " + zipFile + " from uri: " + this.jobConfS3Uri.get());
FileUtils.copyURLToFile(new URL(this.jobConfS3Uri.get()), new File(zipFile));
final String extractedPullFilesPath = appendSlash(this.jobConfDirPath.get()) + "files";
// Extract the zip file
LOGGER.debug("Extracting to directory: " + extractedPullFilesPath + " from zip: " + zipFile);
unzipArchive(zipFile, new File(extractedPullFilesPath));
// Load all new job configurations
// TODO: Currently new and updated jobs are handled, we should un-schedule deleted jobs as well
final File jobConfigDir = new File(extractedPullFilesPath);
if (jobConfigDir.exists()) {
LOGGER.info("Loading job configurations from " + jobConfigDir);
final Properties properties = new Properties();
properties.setProperty(ConfigurationKeys.JOB_CONFIG_FILE_GENERAL_PATH_KEY, jobConfigDir.getAbsolutePath());
final List<Properties> jobConfigs = SchedulerUtils.loadGenericJobConfigs(properties);
LOGGER.info("Loaded " + jobConfigs.size() + " job configuration(s)");
for (Properties config : jobConfigs) {
LOGGER.debug("Config value: " + config);
// If new config or existing config got updated, then post new job config arrival event
final String jobConfigPathIdentifier = config.getProperty(ConfigurationKeys.JOB_CONFIG_FILE_PATH_KEY);
if (!jobConfFiles.containsKey(jobConfigPathIdentifier)) {
jobConfFiles.put(jobConfigPathIdentifier, config);
postNewJobConfigArrival(config.getProperty(ConfigurationKeys.JOB_NAME_KEY), config);
LOGGER.info("New config arrived for job: " + jobConfigPathIdentifier);
} else if (!config.equals(jobConfFiles.get(jobConfigPathIdentifier))) {
jobConfFiles.put(jobConfigPathIdentifier, config);
postNewJobConfigArrival(config.getProperty(ConfigurationKeys.JOB_NAME_KEY), config);
LOGGER.info("Config updated for job: " + jobConfigPathIdentifier);
} else {
LOGGER.info("Config not changed for job: " + jobConfigPathIdentifier);
}
}
} else {
LOGGER.warn("Job configuration directory " + jobConfigDir + " not found");
}
}
}
/***
* Unzip a zip archive
* @param file Zip file to unarchive
* @param outputDir Output directory for the unarchived file
* @throws IOException If any issue occurs in unzipping the file
*/
public void unzipArchive(String file, File outputDir)
throws IOException {
try (ZipFile zipFile = new ZipFile(file)) {
final Enumeration<? extends ZipEntry> entries = zipFile.entries();
while (entries.hasMoreElements()) {
final ZipEntry entry = entries.nextElement();
final File entryDestination = new File(outputDir, entry.getName());
if (entry.isDirectory()) {
// If entry is directory, create directory
if (!entryDestination.mkdirs() && !entryDestination.exists()) {
throw new IOException("Could not create directory: " + entryDestination
+ " while un-archiving zip: " + file);
}
} else {
// Create parent dirs if required
if (!entryDestination.getParentFile().mkdirs() && !entryDestination.getParentFile().exists()) {
throw new IOException("Could not create parent directory for: " + entryDestination
+ " while un-archiving zip: " + file);
}
// Extract and save the conf file
InputStream in = null;
OutputStream out = null;
try {
in = zipFile.getInputStream(entry);
out = new FileOutputStream(entryDestination);
IOUtils.copy(in, out);
} finally {
if (null != in)
IOUtils.closeQuietly(in);
if (null != out)
IOUtils.closeQuietly(out);
}
}
}
}
}
@Override
protected void shutDown() throws Exception {
GobblinAWSUtils.shutdownExecutorService(this.getClass(), this.fetchJobConfExecutor, LOGGER);
}
}