/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package gobblin.yarn;
import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.nio.ByteBuffer;
import java.util.EnumSet;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.commons.mail.EmailException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RawLocalFileSystem;
import org.apache.hadoop.io.DataOutputBuffer;
import org.apache.hadoop.security.Credentials;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.security.token.Token;
import org.apache.hadoop.yarn.api.ApplicationConstants;
import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ApplicationReport;
import org.apache.hadoop.yarn.api.records.ApplicationResourceUsageReport;
import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
import org.apache.hadoop.yarn.api.records.LocalResource;
import org.apache.hadoop.yarn.api.records.LocalResourceType;
import org.apache.hadoop.yarn.api.records.Priority;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.api.records.YarnApplicationState;
import org.apache.hadoop.yarn.client.api.YarnClient;
import org.apache.hadoop.yarn.client.api.YarnClientApplication;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.util.Records;
import org.apache.helix.Criteria;
import org.apache.helix.HelixManager;
import org.apache.helix.HelixManagerFactory;
import org.apache.helix.InstanceType;
import org.apache.helix.model.Message;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Optional;
import com.google.common.base.Splitter;
import com.google.common.base.Strings;
import com.google.common.base.Throwables;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.eventbus.EventBus;
import com.google.common.eventbus.Subscribe;
import com.google.common.io.Closer;
import com.google.common.util.concurrent.ListeningExecutorService;
import com.google.common.util.concurrent.Service;
import com.google.common.util.concurrent.ServiceManager;
import com.typesafe.config.Config;
import com.typesafe.config.ConfigFactory;
import gobblin.admin.AdminWebServer;
import gobblin.cluster.GobblinClusterConfigurationKeys;
import gobblin.cluster.GobblinClusterUtils;
import gobblin.cluster.GobblinHelixConstants;
import gobblin.cluster.HelixUtils;
import gobblin.configuration.ConfigurationKeys;
import gobblin.rest.JobExecutionInfoServer;
import gobblin.util.ConfigUtils;
import gobblin.util.EmailUtils;
import gobblin.util.ExecutorsUtils;
import gobblin.util.io.StreamUtils;
import gobblin.util.JvmUtils;
import gobblin.util.logs.LogCopier;
import gobblin.yarn.event.ApplicationReportArrivalEvent;
import gobblin.yarn.event.GetApplicationReportFailureEvent;
/**
* A client driver to launch Gobblin as a Yarn application.
*
* <p>
* This class, upon starting, will check if there's a Yarn application that it has previously submitted and
* it is able to reconnect to. More specifically, it checks if an application with the same application name
* exists and can be reconnected to, i.e., if the application has not completed yet. If so, it simply starts
* monitoring that application.
* </p>
*
* <p>
* On the other hand, if there's no such a reconnectable Yarn application, This class will launch a new Yarn
* application and start the {@link GobblinApplicationMaster}. It also persists the new application ID so it
* is able to reconnect to the Yarn application if it is restarted for some reason. Once the application is
* launched, this class starts to monitor the application by periodically polling the status of the application
* through a {@link ListeningExecutorService}.
* </p>
*
* <p>
* If a shutdown signal is received, it sends a Helix
* {@link org.apache.helix.model.Message.MessageType#SCHEDULER_MSG} to the {@link GobblinApplicationMaster}
* asking it to shutdown and release all the allocated containers. It also sends an email notification for
* the shutdown if {@link GobblinYarnConfigurationKeys#EMAIL_NOTIFICATION_ON_SHUTDOWN_KEY} is {@code true}.
* </p>
*
* <p>
* This class has a scheduled task to get the {@link ApplicationReport} of the Yarn application periodically.
* Since it may fail to get the {@link ApplicationReport} due to reason such as the Yarn cluster is down for
* maintenance, it keeps track of the count of consecutive failures to get the {@link ApplicationReport}. If
* this count exceeds the maximum number allowed, it will initiate a shutdown.
* </p>
*
* @author Yinan Li
*/
public class GobblinYarnAppLauncher {
private static final Logger LOGGER = LoggerFactory.getLogger(GobblinYarnAppLauncher.class);
private static final Splitter SPLITTER = Splitter.on(',').omitEmptyStrings().trimResults();
private static final String GOBBLIN_YARN_APPLICATION_TYPE = "GOBBLIN_YARN";
// The set of Yarn application types this class is interested in. This is used to
// lookup the application this class has launched previously upon restarting.
private static final Set<String> APPLICATION_TYPES = ImmutableSet.of(GOBBLIN_YARN_APPLICATION_TYPE);
// The set of Yarn application states under which the driver can reconnect to the Yarn application after restart
private static final EnumSet<YarnApplicationState> RECONNECTABLE_APPLICATION_STATES = EnumSet.of(
YarnApplicationState.NEW,
YarnApplicationState.NEW_SAVING,
YarnApplicationState.SUBMITTED,
YarnApplicationState.ACCEPTED,
YarnApplicationState.RUNNING
);
private final String applicationName;
private final String appQueueName;
private final Config config;
private final HelixManager helixManager;
private final Configuration yarnConfiguration;
private final YarnClient yarnClient;
private final FileSystem fs;
private final EventBus eventBus = new EventBus(GobblinYarnAppLauncher.class.getSimpleName());
private final ScheduledExecutorService applicationStatusMonitor;
private final long appReportIntervalMinutes;
private final Optional<String> appMasterJvmArgs;
private final Path sinkLogRootDir;
private final Closer closer = Closer.create();
// Yarn application ID
private volatile Optional<ApplicationId> applicationId = Optional.absent();
private volatile Optional<ServiceManager> serviceManager = Optional.absent();
// Maximum number of consecutive failures allowed to get the ApplicationReport
private final int maxGetApplicationReportFailures;
// A count on the number of consecutive failures on getting the ApplicationReport
private final AtomicInteger getApplicationReportFailureCount = new AtomicInteger();
// This flag tells if the Yarn application has already completed. This is used to
// tell if it is necessary to send a shutdown message to the ApplicationMaster.
private volatile boolean applicationCompleted = false;
private volatile boolean stopped = false;
private final boolean emailNotificationOnShutdown;
public GobblinYarnAppLauncher(Config config, YarnConfiguration yarnConfiguration) throws IOException {
this.config = config;
this.applicationName = config.getString(GobblinYarnConfigurationKeys.APPLICATION_NAME_KEY);
this.appQueueName = config.getString(GobblinYarnConfigurationKeys.APP_QUEUE_KEY);
String zkConnectionString = config.getString(GobblinClusterConfigurationKeys.ZK_CONNECTION_STRING_KEY);
LOGGER.info("Using ZooKeeper connection string: " + zkConnectionString);
this.helixManager = HelixManagerFactory.getZKHelixManager(
config.getString(GobblinClusterConfigurationKeys.HELIX_CLUSTER_NAME_KEY), GobblinClusterUtils.getHostname(),
InstanceType.SPECTATOR, zkConnectionString);
this.yarnConfiguration = yarnConfiguration;
this.yarnConfiguration.set("fs.automatic.close", "false");
this.yarnClient = YarnClient.createYarnClient();
this.yarnClient.init(this.yarnConfiguration);
this.fs = config.hasPath(ConfigurationKeys.FS_URI_KEY) ?
FileSystem.get(URI.create(config.getString(ConfigurationKeys.FS_URI_KEY)), this.yarnConfiguration) :
FileSystem.get(this.yarnConfiguration);
this.closer.register(this.fs);
this.applicationStatusMonitor = Executors.newSingleThreadScheduledExecutor(
ExecutorsUtils.newThreadFactory(Optional.of(LOGGER), Optional.of("GobblinYarnAppStatusMonitor")));
this.appReportIntervalMinutes = config.getLong(GobblinYarnConfigurationKeys.APP_REPORT_INTERVAL_MINUTES_KEY);
this.appMasterJvmArgs = config.hasPath(GobblinYarnConfigurationKeys.APP_MASTER_JVM_ARGS_KEY) ?
Optional.of(config.getString(GobblinYarnConfigurationKeys.APP_MASTER_JVM_ARGS_KEY)) :
Optional.<String>absent();
this.sinkLogRootDir = new Path(config.getString(GobblinYarnConfigurationKeys.LOGS_SINK_ROOT_DIR_KEY));
this.maxGetApplicationReportFailures = config.getInt(GobblinYarnConfigurationKeys.MAX_GET_APP_REPORT_FAILURES_KEY);
this.emailNotificationOnShutdown =
config.getBoolean(GobblinYarnConfigurationKeys.EMAIL_NOTIFICATION_ON_SHUTDOWN_KEY);
}
/**
* Launch a new Gobblin instance on Yarn.
*
* @throws IOException if there's something wrong launching the application
* @throws YarnException if there's something wrong launching the application
*/
public void launch() throws IOException, YarnException {
this.eventBus.register(this);
String clusterName = this.config.getString(GobblinClusterConfigurationKeys.HELIX_CLUSTER_NAME_KEY);
HelixUtils.createGobblinHelixCluster(
this.config.getString(GobblinClusterConfigurationKeys.ZK_CONNECTION_STRING_KEY), clusterName);
LOGGER.info("Created Helix cluster " + clusterName);
connectHelixManager();
startYarnClient();
this.applicationId = getApplicationId();
this.applicationStatusMonitor.scheduleAtFixedRate(new Runnable() {
@Override
public void run() {
try {
eventBus.post(new ApplicationReportArrivalEvent(yarnClient.getApplicationReport(applicationId.get())));
} catch (YarnException | IOException e) {
LOGGER.error("Failed to get application report for Gobblin Yarn application " + applicationId.get(), e);
eventBus.post(new GetApplicationReportFailureEvent(e));
}
}
}, 0, this.appReportIntervalMinutes, TimeUnit.MINUTES);
List<Service> services = Lists.newArrayList();
if (this.config.hasPath(GobblinYarnConfigurationKeys.KEYTAB_FILE_PATH)) {
LOGGER.info("Adding YarnAppSecurityManager since login is keytab based");
services.add(buildYarnAppSecurityManager());
}
if (!this.config.hasPath(GobblinYarnConfigurationKeys.LOG_COPIER_DISABLE_DRIVER_COPY) ||
!this.config.getBoolean(GobblinYarnConfigurationKeys.LOG_COPIER_DISABLE_DRIVER_COPY)) {
services.add(buildLogCopier(this.config,
new Path(this.sinkLogRootDir, this.applicationName + Path.SEPARATOR + this.applicationId.get().toString()),
GobblinClusterUtils.getAppWorkDirPath(this.fs, this.applicationName, this.applicationId.get().toString())));
}
if (config.getBoolean(ConfigurationKeys.JOB_EXECINFO_SERVER_ENABLED_KEY)) {
LOGGER.info("Starting the job execution info server since it is enabled");
Properties properties = ConfigUtils.configToProperties(config);
JobExecutionInfoServer executionInfoServer = new JobExecutionInfoServer(properties);
services.add(executionInfoServer);
if (config.getBoolean(ConfigurationKeys.ADMIN_SERVER_ENABLED_KEY)) {
LOGGER.info("Starting the admin UI server since it is enabled");
services.add(new AdminWebServer(properties, executionInfoServer.getAdvertisedServerUri()));
}
} else if (config.getBoolean(ConfigurationKeys.ADMIN_SERVER_ENABLED_KEY)) {
LOGGER.warn("NOT starting the admin UI because the job execution info server is NOT enabled");
}
this.serviceManager = Optional.of(new ServiceManager(services));
// Start all the services running in the ApplicationMaster
this.serviceManager.get().startAsync();
}
/**
* Stop this {@link GobblinYarnAppLauncher} instance.
*
* @throws IOException if this {@link GobblinYarnAppLauncher} instance fails to clean up its working directory.
*/
public synchronized void stop() throws IOException, TimeoutException {
if (this.stopped) {
return;
}
LOGGER.info("Stopping the " + GobblinYarnAppLauncher.class.getSimpleName());
try {
if (this.applicationId.isPresent() && !this.applicationCompleted) {
// Only send the shutdown message if the application has been successfully submitted and is still running
sendShutdownRequest();
}
if (this.serviceManager.isPresent()) {
this.serviceManager.get().stopAsync().awaitStopped(5, TimeUnit.MINUTES);
}
ExecutorsUtils.shutdownExecutorService(this.applicationStatusMonitor, Optional.of(LOGGER), 5, TimeUnit.MINUTES);
stopYarnClient();
disconnectHelixManager();
} finally {
try {
if (this.applicationId.isPresent()) {
cleanUpAppWorkDirectory(this.applicationId.get());
}
} finally {
this.closer.close();
}
}
this.stopped = true;
}
@Subscribe
public void handleApplicationReportArrivalEvent(ApplicationReportArrivalEvent applicationReportArrivalEvent) {
ApplicationReport applicationReport = applicationReportArrivalEvent.getApplicationReport();
YarnApplicationState appState = applicationReport.getYarnApplicationState();
LOGGER.info("Gobblin Yarn application state: " + appState.toString());
// Reset the count on failures to get the ApplicationReport when there's one success
this.getApplicationReportFailureCount.set(0);
if (appState == YarnApplicationState.FINISHED ||
appState == YarnApplicationState.FAILED ||
appState == YarnApplicationState.KILLED) {
applicationCompleted = true;
LOGGER.info("Gobblin Yarn application finished with final status: " +
applicationReport.getFinalApplicationStatus().toString());
if (applicationReport.getFinalApplicationStatus() == FinalApplicationStatus.FAILED) {
LOGGER.error("Gobblin Yarn application failed for the following reason: " + applicationReport.getDiagnostics());
}
try {
GobblinYarnAppLauncher.this.stop();
} catch (IOException ioe) {
LOGGER.error("Failed to close the " + GobblinYarnAppLauncher.class.getSimpleName(), ioe);
} catch (TimeoutException te) {
LOGGER.error("Timeout in stopping the service manager", te);
} finally {
if (this.emailNotificationOnShutdown) {
sendEmailOnShutdown(Optional.of(applicationReport));
}
}
}
}
@Subscribe
public void handleGetApplicationReportFailureEvent(
@SuppressWarnings("unused") GetApplicationReportFailureEvent getApplicationReportFailureEvent) {
int numConsecutiveFailures = this.getApplicationReportFailureCount.incrementAndGet();
if (numConsecutiveFailures > this.maxGetApplicationReportFailures) {
LOGGER.warn(String
.format("Number of consecutive failures to get the ApplicationReport %d exceeds the threshold %d",
numConsecutiveFailures, this.maxGetApplicationReportFailures));
try {
stop();
} catch (IOException ioe) {
LOGGER.error("Failed to close the " + GobblinYarnAppLauncher.class.getSimpleName(), ioe);
} catch (TimeoutException te) {
LOGGER.error("Timeout in stopping the service manager", te);
} finally {
if (this.emailNotificationOnShutdown) {
sendEmailOnShutdown(Optional.<ApplicationReport>absent());
}
}
}
}
@VisibleForTesting
void connectHelixManager() {
try {
this.helixManager.connect();
} catch (Exception e) {
LOGGER.error("HelixManager failed to connect", e);
throw Throwables.propagate(e);
}
}
@VisibleForTesting
void disconnectHelixManager() {
if (this.helixManager.isConnected()) {
this.helixManager.disconnect();
}
}
@VisibleForTesting
void startYarnClient() {
this.yarnClient.start();
}
@VisibleForTesting
void stopYarnClient() {
this.yarnClient.stop();
}
private Optional<ApplicationId> getApplicationId() throws YarnException, IOException {
Optional<ApplicationId> reconnectableApplicationId = getReconnectableApplicationId();
if (reconnectableApplicationId.isPresent()) {
LOGGER.info("Found reconnectable application with application ID: " + reconnectableApplicationId.get());
return reconnectableApplicationId;
}
LOGGER.info("No reconnectable application found so submitting a new application");
return Optional.of(setupAndSubmitApplication());
}
@VisibleForTesting
Optional<ApplicationId> getReconnectableApplicationId() throws YarnException, IOException {
List<ApplicationReport> applicationReports =
this.yarnClient.getApplications(APPLICATION_TYPES, RECONNECTABLE_APPLICATION_STATES);
if (applicationReports == null || applicationReports.isEmpty()) {
return Optional.absent();
}
// Try to find an application with a matching application name
for (ApplicationReport applicationReport : applicationReports) {
if (this.applicationName.equals(applicationReport.getName())) {
return Optional.of(applicationReport.getApplicationId());
}
}
return Optional.absent();
}
/**
* Setup and submit the Gobblin Yarn application.
*
* @throws IOException if there's anything wrong setting up and submitting the Yarn application
* @throws YarnException if there's anything wrong setting up and submitting the Yarn application
*/
@VisibleForTesting
ApplicationId setupAndSubmitApplication() throws IOException, YarnException {
YarnClientApplication gobblinYarnApp = this.yarnClient.createApplication();
ApplicationSubmissionContext appSubmissionContext = gobblinYarnApp.getApplicationSubmissionContext();
appSubmissionContext.setApplicationType(GOBBLIN_YARN_APPLICATION_TYPE);
ApplicationId applicationId = appSubmissionContext.getApplicationId();
GetNewApplicationResponse newApplicationResponse = gobblinYarnApp.getNewApplicationResponse();
// Set up resource type requirements for ApplicationMaster
Resource resource = prepareContainerResource(newApplicationResponse);
// Add lib jars, and jars and files that the ApplicationMaster need as LocalResources
Map<String, LocalResource> appMasterLocalResources = addAppMasterLocalResources(applicationId);
ContainerLaunchContext amContainerLaunchContext = Records.newRecord(ContainerLaunchContext.class);
amContainerLaunchContext.setLocalResources(appMasterLocalResources);
amContainerLaunchContext.setEnvironment(YarnHelixUtils.getEnvironmentVariables(this.yarnConfiguration));
amContainerLaunchContext.setCommands(Lists.newArrayList(buildApplicationMasterCommand(resource.getMemory())));
if (UserGroupInformation.isSecurityEnabled()) {
setupSecurityTokens(amContainerLaunchContext);
}
// Setup the application submission context
appSubmissionContext.setApplicationName(this.applicationName);
appSubmissionContext.setResource(resource);
appSubmissionContext.setQueue(this.appQueueName);
appSubmissionContext.setPriority(Priority.newInstance(0));
appSubmissionContext.setAMContainerSpec(amContainerLaunchContext);
// Also setup container local resources by copying local jars and files the container need to HDFS
addContainerLocalResources(applicationId);
// Submit the application
LOGGER.info("Submitting application " + applicationId);
this.yarnClient.submitApplication(appSubmissionContext);
LOGGER.info("Application successfully submitted and accepted");
ApplicationReport applicationReport = this.yarnClient.getApplicationReport(applicationId);
LOGGER.info("Application Name: " + applicationReport.getName());
LOGGER.info("Application Tracking URL: " + applicationReport.getTrackingUrl());
LOGGER.info("Application User: " + applicationReport.getUser() + " Queue: " + applicationReport.getQueue());
return applicationId;
}
private Resource prepareContainerResource(GetNewApplicationResponse newApplicationResponse) {
int memoryMbs = this.config.getInt(GobblinYarnConfigurationKeys.APP_MASTER_MEMORY_MBS_KEY);
int maximumMemoryCapacity = newApplicationResponse.getMaximumResourceCapability().getMemory();
if (memoryMbs > maximumMemoryCapacity) {
LOGGER.info(String.format("Specified AM memory [%d] is above the maximum memory capacity [%d] of the "
+ "cluster, using the maximum memory capacity instead.", memoryMbs, maximumMemoryCapacity));
memoryMbs = maximumMemoryCapacity;
}
int vCores = this.config.getInt(GobblinYarnConfigurationKeys.APP_MASTER_CORES_KEY);
int maximumVirtualCoreCapacity = newApplicationResponse.getMaximumResourceCapability().getVirtualCores();
if (vCores > maximumVirtualCoreCapacity) {
LOGGER.info(String.format("Specified AM vcores [%d] is above the maximum vcore capacity [%d] of the "
+ "cluster, using the maximum vcore capacity instead.", memoryMbs, maximumMemoryCapacity));
vCores = maximumVirtualCoreCapacity;
}
// Set up resource type requirements for ApplicationMaster
return Resource.newInstance(memoryMbs, vCores);
}
private Map<String, LocalResource> addAppMasterLocalResources(ApplicationId applicationId) throws IOException {
Path appWorkDir = GobblinClusterUtils.getAppWorkDirPath(this.fs, this.applicationName, applicationId.toString());
Path appMasterWorkDir = new Path(appWorkDir, GobblinYarnConfigurationKeys.APP_MASTER_WORK_DIR_NAME);
Map<String, LocalResource> appMasterResources = Maps.newHashMap();
if (this.config.hasPath(GobblinYarnConfigurationKeys.LIB_JARS_DIR_KEY)) {
Path libJarsDestDir = new Path(appWorkDir, GobblinYarnConfigurationKeys.LIB_JARS_DIR_NAME);
addLibJars(new Path(this.config.getString(GobblinYarnConfigurationKeys.LIB_JARS_DIR_KEY)),
Optional.of(appMasterResources), libJarsDestDir);
}
if (this.config.hasPath(GobblinYarnConfigurationKeys.APP_MASTER_JARS_KEY)) {
Path appJarsDestDir = new Path(appMasterWorkDir, GobblinYarnConfigurationKeys.APP_JARS_DIR_NAME);
addAppJars(this.config.getString(GobblinYarnConfigurationKeys.APP_MASTER_JARS_KEY),
Optional.of(appMasterResources), appJarsDestDir);
}
if (this.config.hasPath(GobblinYarnConfigurationKeys.APP_MASTER_FILES_LOCAL_KEY)) {
Path appFilesDestDir = new Path(appMasterWorkDir, GobblinYarnConfigurationKeys.APP_FILES_DIR_NAME);
addAppLocalFiles(this.config.getString(GobblinYarnConfigurationKeys.APP_MASTER_FILES_LOCAL_KEY),
Optional.of(appMasterResources), appFilesDestDir);
}
if (this.config.hasPath(GobblinYarnConfigurationKeys.APP_MASTER_FILES_REMOTE_KEY)) {
addAppRemoteFiles(this.config.getString(GobblinYarnConfigurationKeys.APP_MASTER_FILES_REMOTE_KEY),
appMasterResources);
}
if (this.config.hasPath(GobblinClusterConfigurationKeys.JOB_CONF_PATH_KEY)) {
Path appFilesDestDir = new Path(appMasterWorkDir, GobblinYarnConfigurationKeys.APP_FILES_DIR_NAME);
addJobConfPackage(this.config.getString(GobblinClusterConfigurationKeys.JOB_CONF_PATH_KEY), appFilesDestDir,
appMasterResources);
}
return appMasterResources;
}
private void addContainerLocalResources(ApplicationId applicationId) throws IOException {
Path appWorkDir = GobblinClusterUtils.getAppWorkDirPath(this.fs, this.applicationName, applicationId.toString());
Path containerWorkDir = new Path(appWorkDir, GobblinYarnConfigurationKeys.CONTAINER_WORK_DIR_NAME);
if (this.config.hasPath(GobblinYarnConfigurationKeys.CONTAINER_JARS_KEY)) {
Path appJarsDestDir = new Path(containerWorkDir, GobblinYarnConfigurationKeys.APP_JARS_DIR_NAME);
addAppJars(this.config.getString(GobblinYarnConfigurationKeys.CONTAINER_JARS_KEY),
Optional.<Map<String, LocalResource>>absent(), appJarsDestDir);
}
if (this.config.hasPath(GobblinYarnConfigurationKeys.CONTAINER_FILES_LOCAL_KEY)) {
Path appFilesDestDir = new Path(containerWorkDir, GobblinYarnConfigurationKeys.APP_FILES_DIR_NAME);
addAppLocalFiles(this.config.getString(GobblinYarnConfigurationKeys.CONTAINER_FILES_LOCAL_KEY),
Optional.<Map<String, LocalResource>>absent(), appFilesDestDir);
}
}
private void addLibJars(Path srcLibJarDir, Optional<Map<String, LocalResource>> resourceMap, Path destDir)
throws IOException {
FileSystem localFs = FileSystem.getLocal(this.yarnConfiguration);
FileStatus[] libJarFiles = localFs.listStatus(srcLibJarDir);
if (libJarFiles == null || libJarFiles.length == 0) {
return;
}
for (FileStatus libJarFile : libJarFiles) {
Path destFilePath = new Path(destDir, libJarFile.getPath().getName());
this.fs.copyFromLocalFile(libJarFile.getPath(), destFilePath);
if (resourceMap.isPresent()) {
YarnHelixUtils.addFileAsLocalResource(this.fs, destFilePath, LocalResourceType.FILE, resourceMap.get());
}
}
}
private void addAppJars(String jarFilePathList, Optional<Map<String, LocalResource>> resourceMap,
Path destDir) throws IOException {
for (String jarFilePath : SPLITTER.split(jarFilePathList)) {
Path srcFilePath = new Path(jarFilePath);
Path destFilePath = new Path(destDir, srcFilePath.getName());
this.fs.copyFromLocalFile(srcFilePath, destFilePath);
if (resourceMap.isPresent()) {
YarnHelixUtils.addFileAsLocalResource(this.fs, destFilePath, LocalResourceType.FILE, resourceMap.get());
}
}
}
private void addAppLocalFiles(String localFilePathList, Optional<Map<String, LocalResource>> resourceMap,
Path destDir) throws IOException {
for (String localFilePath : SPLITTER.split(localFilePathList)) {
Path srcFilePath = new Path(localFilePath);
Path destFilePath = new Path(destDir, srcFilePath.getName());
this.fs.copyFromLocalFile(srcFilePath, destFilePath);
if (resourceMap.isPresent()) {
YarnHelixUtils.addFileAsLocalResource(this.fs, destFilePath, LocalResourceType.FILE, resourceMap.get());
}
}
}
private void addAppRemoteFiles(String hdfsFileList, Map<String, LocalResource> resourceMap)
throws IOException {
for (String hdfsFilePath : SPLITTER.split(hdfsFileList)) {
YarnHelixUtils.addFileAsLocalResource(this.fs, new Path(hdfsFilePath), LocalResourceType.FILE, resourceMap);
}
}
private void addJobConfPackage(String jobConfPackagePath, Path destDir, Map<String, LocalResource> resourceMap)
throws IOException {
Path srcFilePath = new Path(jobConfPackagePath);
Path destFilePath = new Path(destDir, srcFilePath.getName() + GobblinClusterConfigurationKeys.TAR_GZ_FILE_SUFFIX);
StreamUtils.tar(FileSystem.getLocal(this.yarnConfiguration), this.fs, srcFilePath, destFilePath);
YarnHelixUtils.addFileAsLocalResource(this.fs, destFilePath, LocalResourceType.ARCHIVE, resourceMap);
}
private String buildApplicationMasterCommand(int memoryMbs) {
String appMasterClassName = GobblinApplicationMaster.class.getSimpleName();
return new StringBuilder()
.append(ApplicationConstants.Environment.JAVA_HOME.$()).append("/bin/java")
.append(" -Xmx").append(memoryMbs).append("M")
.append(" ").append(JvmUtils.formatJvmArguments(this.appMasterJvmArgs))
.append(" ").append(GobblinApplicationMaster.class.getName())
.append(" --").append(GobblinClusterConfigurationKeys.APPLICATION_NAME_OPTION_NAME)
.append(" ").append(this.applicationName)
.append(" 1>").append(ApplicationConstants.LOG_DIR_EXPANSION_VAR).append(File.separator).append(
appMasterClassName).append(".").append(ApplicationConstants.STDOUT)
.append(" 2>").append(ApplicationConstants.LOG_DIR_EXPANSION_VAR).append(File.separator).append(
appMasterClassName).append(".").append(ApplicationConstants.STDERR)
.toString();
}
private void setupSecurityTokens(ContainerLaunchContext containerLaunchContext) throws IOException {
Credentials credentials = UserGroupInformation.getCurrentUser().getCredentials();
String tokenRenewer = this.yarnConfiguration.get(YarnConfiguration.RM_PRINCIPAL);
if (tokenRenewer == null || tokenRenewer.length() == 0) {
throw new IOException("Failed to get master Kerberos principal for the RM to use as renewer");
}
// For now, only getting tokens for the default file-system.
Token<?> tokens[] = this.fs.addDelegationTokens(tokenRenewer, credentials);
if (tokens != null) {
for (Token<?> token : tokens) {
LOGGER.info("Got delegation token for " + this.fs.getUri() + "; " + token);
}
}
Closer closer = Closer.create();
try {
DataOutputBuffer dataOutputBuffer = closer.register(new DataOutputBuffer());
credentials.writeTokenStorageToStream(dataOutputBuffer);
ByteBuffer fsTokens = ByteBuffer.wrap(dataOutputBuffer.getData(), 0, dataOutputBuffer.getLength());
containerLaunchContext.setTokens(fsTokens);
} catch (Throwable t) {
throw closer.rethrow(t);
} finally {
closer.close();
}
}
private LogCopier buildLogCopier(Config config, Path sinkLogDir, Path appWorkDir) throws IOException {
FileSystem rawLocalFs = this.closer.register(new RawLocalFileSystem());
rawLocalFs.initialize(URI.create(ConfigurationKeys.LOCAL_FS_URI), new Configuration());
LogCopier.Builder builder = LogCopier.newBuilder()
.useSrcFileSystem(this.fs)
.useDestFileSystem(rawLocalFs)
.readFrom(getHdfsLogDir(appWorkDir))
.writeTo(sinkLogDir)
.acceptsLogFileExtensions(ImmutableSet.of(ApplicationConstants.STDOUT, ApplicationConstants.STDERR));
if (config.hasPath(GobblinYarnConfigurationKeys.LOG_COPIER_MAX_FILE_SIZE)) {
builder.useMaxBytesPerLogFile(config.getBytes(GobblinYarnConfigurationKeys.LOG_COPIER_MAX_FILE_SIZE));
}
if (config.hasPath(GobblinYarnConfigurationKeys.LOG_COPIER_SCHEDULER)) {
builder.useScheduler(config.getString(GobblinYarnConfigurationKeys.LOG_COPIER_SCHEDULER));
}
return builder.build();
}
private Path getHdfsLogDir(Path appWorkDir) throws IOException {
Path logRootDir = new Path(appWorkDir, GobblinYarnConfigurationKeys.APP_LOGS_DIR_NAME);
if (!this.fs.exists(logRootDir)) {
this.fs.mkdirs(logRootDir);
}
return logRootDir;
}
private YarnAppSecurityManager buildYarnAppSecurityManager() throws IOException {
Path tokenFilePath = new Path(this.fs.getHomeDirectory(), this.applicationName + Path.SEPARATOR +
GobblinYarnConfigurationKeys.TOKEN_FILE_NAME);
return new YarnAppSecurityManager(this.config, this.helixManager, this.fs, tokenFilePath);
}
@VisibleForTesting
void sendShutdownRequest() {
Criteria criteria = new Criteria();
criteria.setInstanceName("%");
criteria.setResource("%");
criteria.setPartition("%");
criteria.setPartitionState("%");
criteria.setRecipientInstanceType(InstanceType.CONTROLLER);
criteria.setSessionSpecific(true);
Message shutdownRequest = new Message(GobblinHelixConstants.SHUTDOWN_MESSAGE_TYPE,
HelixMessageSubTypes.APPLICATION_MASTER_SHUTDOWN.toString().toLowerCase() + UUID.randomUUID().toString());
shutdownRequest.setMsgSubType(HelixMessageSubTypes.APPLICATION_MASTER_SHUTDOWN.toString());
shutdownRequest.setMsgState(Message.MessageState.NEW);
shutdownRequest.setTgtSessionId("*");
int messagesSent = this.helixManager.getMessagingService().send(criteria, shutdownRequest);
if (messagesSent == 0) {
LOGGER.error(String.format("Failed to send the %s message to the controller", shutdownRequest.getMsgSubType()));
}
}
@VisibleForTesting
void cleanUpAppWorkDirectory(ApplicationId applicationId) throws IOException {
Path appWorkDir = GobblinClusterUtils.getAppWorkDirPath(this.fs, this.applicationName, applicationId.toString());
if (this.fs.exists(appWorkDir)) {
LOGGER.info("Deleting application working directory " + appWorkDir);
this.fs.delete(appWorkDir, true);
}
}
private void sendEmailOnShutdown(Optional<ApplicationReport> applicationReport) {
String subject = String.format("Gobblin Yarn application %s completed", this.applicationName);
StringBuilder messageBuilder = new StringBuilder("Gobblin Yarn ApplicationReport:");
if (applicationReport.isPresent()) {
messageBuilder.append("\n");
messageBuilder.append("\tApplication ID: ").append(applicationReport.get().getApplicationId()).append("\n");
messageBuilder.append("\tApplication attempt ID: ")
.append(applicationReport.get().getCurrentApplicationAttemptId()).append("\n");
messageBuilder.append("\tFinal application status: ").append(applicationReport.get().getFinalApplicationStatus())
.append("\n");
messageBuilder.append("\tStart time: ").append(applicationReport.get().getStartTime()).append("\n");
messageBuilder.append("\tFinish time: ").append(applicationReport.get().getFinishTime()).append("\n");
if (!Strings.isNullOrEmpty(applicationReport.get().getDiagnostics())) {
messageBuilder.append("\tDiagnostics: ").append(applicationReport.get().getDiagnostics()).append("\n");
}
ApplicationResourceUsageReport resourceUsageReport = applicationReport.get().getApplicationResourceUsageReport();
if (resourceUsageReport != null) {
messageBuilder.append("\tUsed containers: ").append(resourceUsageReport.getNumUsedContainers()).append("\n");
Resource usedResource = resourceUsageReport.getUsedResources();
if (usedResource != null) {
messageBuilder.append("\tUsed memory (MBs): ").append(usedResource.getMemory()).append("\n");
messageBuilder.append("\tUsed vcores: ").append(usedResource.getVirtualCores()).append("\n");
}
}
} else {
messageBuilder.append(' ').append("Not available");
}
try {
EmailUtils.sendEmail(ConfigUtils.configToState(this.config), subject, messageBuilder.toString());
} catch (EmailException ee) {
LOGGER.error("Failed to send email notification on shutdown", ee);
}
}
public static void main(String[] args) throws Exception {
final GobblinYarnAppLauncher gobblinYarnAppLauncher =
new GobblinYarnAppLauncher(ConfigFactory.load(), new YarnConfiguration());
Runtime.getRuntime().addShutdownHook(new Thread() {
@Override
public void run() {
try {
gobblinYarnAppLauncher.stop();
} catch (IOException ioe) {
LOGGER.error("Failed to shutdown the " + GobblinYarnAppLauncher.class.getSimpleName(), ioe);
} catch (TimeoutException te) {
LOGGER.error("Timeout in stopping the service manager", te);
} finally {
if (gobblinYarnAppLauncher.emailNotificationOnShutdown) {
gobblinYarnAppLauncher.sendEmailOnShutdown(Optional.<ApplicationReport>absent());
}
}
}
});
gobblinYarnAppLauncher.launch();
}
}