/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package gobblin.aws; import java.io.File; import java.io.IOException; import java.util.Arrays; import java.util.Collections; import java.util.Date; import java.util.List; import java.util.UUID; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import org.apache.commons.io.FileUtils; import org.apache.commons.mail.EmailException; import org.apache.helix.Criteria; import org.apache.helix.HelixManager; import org.apache.helix.HelixManagerFactory; import org.apache.helix.InstanceType; import org.apache.helix.messaging.AsyncCallback; import org.apache.helix.model.Message; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.amazonaws.regions.Region; import com.amazonaws.regions.Regions; import com.amazonaws.services.autoscaling.model.AutoScalingGroup; import com.amazonaws.services.autoscaling.model.BlockDeviceMapping; import com.amazonaws.services.autoscaling.model.InstanceMonitoring; import com.amazonaws.services.autoscaling.model.Tag; import com.amazonaws.services.autoscaling.model.TagDescription; import com.amazonaws.services.ec2.model.AvailabilityZone; import com.amazonaws.services.ec2.model.Instance; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Optional; import com.google.common.base.Throwables; import com.google.common.collect.Lists; import com.google.common.eventbus.EventBus; import com.google.common.io.Closer; import com.google.common.util.concurrent.Service; import com.google.common.util.concurrent.ServiceManager; import com.typesafe.config.Config; import com.typesafe.config.ConfigFactory; import gobblin.annotation.Alpha; import gobblin.cluster.GobblinClusterConfigurationKeys; import gobblin.cluster.GobblinClusterUtils; import gobblin.cluster.GobblinHelixConstants; import gobblin.cluster.HelixMessageSubTypes; import gobblin.cluster.HelixUtils; import gobblin.util.ConfigUtils; import gobblin.util.EmailUtils; import static gobblin.aws.GobblinAWSUtils.*; import static gobblin.aws.GobblinAWSConfigurationKeys.*; import static gobblin.cluster.GobblinClusterConfigurationKeys.*; /** * A client driver to launch Gobblin as an AWS Cluster. * * <p> * This class upon starting, will check if there is an AWS Cluster that is already running and * it is able to reconnect to. More specifically, it checks if an cluster with the same cluster name * exists and can be reconnected to i.e. if the cluster has not completed yet. If so, it simply starts * monitoring that cluster. * </p> * * <p> * On the other hand, if there's no such a reconnectable AWS cluster, This class will launch a new AWS * cluster and start the {@link GobblinAWSClusterManager}. It also persists the new cluster details so it * is able to reconnect to the AWS cluster if it is restarted for some reason. * </p> * * <p> * If a shutdown signal is received, it sends a Helix * {@link org.apache.helix.model.Message.MessageType#SCHEDULER_MSG} to the {@link GobblinAWSClusterManager} * asking it to shutdown. It also sends an email notification for the shutdown if * {@link GobblinAWSConfigurationKeys#EMAIL_NOTIFICATION_ON_SHUTDOWN_KEY} is {@code true}. * </p> * * @author Abhishek Tiwari */ @Alpha public class GobblinAWSClusterLauncher { private static final Logger LOGGER = LoggerFactory.getLogger(GobblinAWSClusterLauncher.class); public static final String CLUSTER_NAME_ASG_TAG = "ClusterName"; public static final String CLUSTER_ID_ASG_TAG = "ClusterId"; public static final String ASG_TYPE_ASG_TAG = "AsgType"; public static final String ASG_TYPE_MASTER = "master"; public static final String ASG_TYPE_WORKERS = "workers"; public static final String MASTER_ASG_NAME_PREFIX = "GobblinMasterASG_"; public static final String MASTER_LAUNCH_CONFIG_NAME_PREFIX = "GobblinMasterLaunchConfig_"; public static final String WORKERS_ASG_NAME_PREFIX = "GobblinWorkerASG_"; public static final String WORKERS_LAUNCH_CONFIG_PREFIX = "GobblinWorkerLaunchConfig_"; private final Config config; private final String zkConnectionString; private final String helixClusterName; private final HelixManager helixManager; private final EventBus eventBus = new EventBus(GobblinAWSClusterLauncher.class.getSimpleName()); private volatile Optional<ServiceManager> serviceManager = Optional.absent(); private AWSClusterSecurityManager awsClusterSecurityManager; private AWSSdkClient awsSdkClient; private final Closer closer = Closer.create(); // AWS cluster meta private final String clusterName; private volatile Optional<String> clusterId = Optional.absent(); private volatile boolean stopped = false; private final boolean emailNotificationOnShutdown; // AWS Gobblin cluster common config private final String awsRegion; private final String awsConfDir; // AWS Gobblin Master Instance config private final String masterAmiId; private final String masterInstanceType; private final String masterJvmMemory; // AWS Gobblin Worker Instance config private final String workerAmiId; private final String workerInstanceType; private final String workerJvmMemory; private final Integer minWorkers; private final Integer maxWorkers; private final Integer desiredWorkers; private final Optional<String> masterJvmArgs; private final Optional<String> workerJvmArgs; private String masterPublicIp; private final String nfsParentDir; private final String masterJarsDir; private final String masterS3ConfUri; private final String masterS3ConfFiles; private final String masterS3JarsUri; private final String masterS3JarsFiles; private final String workerJarsDir; private final String workerS3ConfUri; private final String workerS3ConfFiles; private final String workerS3JarsUri; private final String workerS3JarsFiles; private final String sinkLogRootDir; private final String appWorkDir; private String masterLaunchConfigName; private String masterAutoScalingGroupName; private String workerLaunchConfigName; private String workerAutoScalingGroupName; private final Optional<String> gobblinVersion; public GobblinAWSClusterLauncher(Config config) throws IOException { this.config = config; // Mandatory configs this.zkConnectionString = config.getString(GobblinClusterConfigurationKeys.ZK_CONNECTION_STRING_KEY); LOGGER.info("Using ZooKeeper connection string: " + this.zkConnectionString); // Configs with default values this.clusterName = ConfigUtils.getString(config, CLUSTER_NAME_KEY, DEFAULT_CLUSTER_NAME); this.helixClusterName = ConfigUtils.getString(config, HELIX_CLUSTER_NAME_KEY, this.clusterName); this.nfsParentDir = appendSlash(ConfigUtils.getString(config, NFS_PARENT_DIR_KEY, DEFAULT_NFS_PARENT_DIR)); this.awsRegion = ConfigUtils.getString(config, AWS_REGION_KEY, DEFAULT_AWS_REGION); this.awsConfDir = appendSlash(ConfigUtils.getString(config, AWS_CONF_DIR, nfsParentDir + DEFAULT_AWS_CONF_DIR_POSTFIX)); this.masterAmiId = ConfigUtils.getString(config, MASTER_AMI_ID_KEY, DEFAULT_MASTER_AMI_ID); this.masterInstanceType = ConfigUtils.getString(config, MASTER_INSTANCE_TYPE_KEY, DEFAULT_MASTER_INSTANCE_TYPE); this.masterJvmMemory = ConfigUtils.getString(config, MASTER_JVM_MEMORY_KEY, DEFAULT_MASTER_JVM_MEMORY); this.workerAmiId = ConfigUtils.getString(config, WORKER_AMI_ID_KEY, DEFAULT_WORKER_AMI_ID); this.workerInstanceType = ConfigUtils.getString(config, WORKER_INSTANCE_TYPE_KEY, DEFAULT_WORKER_INSTANCE_TYPE); this.workerJvmMemory = ConfigUtils.getString(config, WORKER_JVM_MEMORY_KEY, DEFAULT_WORKER_JVM_MEMORY); this.minWorkers = ConfigUtils.getInt(config, MIN_WORKERS_KEY, DEFAULT_MIN_WORKERS); this.maxWorkers = ConfigUtils.getInt(config, MAX_WORKERS_KEY, DEFAULT_MAX_WORKERS); this.desiredWorkers = ConfigUtils.getInt(config, DESIRED_WORKERS_KEY, DEFAULT_DESIRED_WORKERS); this.masterJvmArgs = config.hasPath(GobblinAWSConfigurationKeys.MASTER_JVM_ARGS_KEY) ? Optional.of(config.getString(GobblinAWSConfigurationKeys.MASTER_JVM_ARGS_KEY)) : Optional.<String>absent(); this.workerJvmArgs = config.hasPath(GobblinAWSConfigurationKeys.WORKER_JVM_ARGS_KEY) ? Optional.of(config.getString(GobblinAWSConfigurationKeys.WORKER_JVM_ARGS_KEY)) : Optional.<String>absent(); this.masterJarsDir = appendSlash( ConfigUtils.getString(config, MASTER_JARS_KEY, nfsParentDir + DEFAULT_MASTER_JARS_POSTFIX)); this.masterS3ConfUri = appendSlash( ConfigUtils.getString(config, MASTER_S3_CONF_URI_KEY, DEFAULT_MASTER_S3_CONF_URI)); this.masterS3ConfFiles = ConfigUtils.getString(config, MASTER_S3_CONF_FILES_KEY, DEFAULT_MASTER_S3_CONF_FILES); this.masterS3JarsUri = ConfigUtils.getString(config, MASTER_S3_JARS_URI_KEY, DEFAULT_MASTER_S3_JARS_URI); this.masterS3JarsFiles = ConfigUtils.getString(config, MASTER_S3_JARS_FILES_KEY, DEFAULT_MASTER_S3_JARS_FILES); this.workerJarsDir = appendSlash(ConfigUtils.getString(config, WORKER_JARS_KEY, nfsParentDir + DEFAULT_WORKER_JARS_POSTFIX)); this.workerS3ConfUri = appendSlash( ConfigUtils.getString(config, WORKER_S3_CONF_URI_KEY, DEFAULT_WORKER_S3_CONF_URI)); this.workerS3ConfFiles = ConfigUtils.getString(config, WORKER_S3_CONF_FILES_KEY, DEFAULT_WORKER_S3_CONF_FILES); this.workerS3JarsUri = ConfigUtils.getString(config, WORKER_S3_JARS_URI_KEY, DEFAULT_WORKER_S3_JARS_URI); this.workerS3JarsFiles = ConfigUtils.getString(config, WORKER_S3_JARS_FILES_KEY, DEFAULT_WORKER_S3_JARS_FILES); this.sinkLogRootDir = appendSlash(ConfigUtils.getString(config, LOGS_SINK_ROOT_DIR_KEY, nfsParentDir + DEFAULT_LOGS_SINK_ROOT_DIR_POSTFIX)); this.appWorkDir = appendSlash(ConfigUtils.getString(config, APP_WORK_DIR, nfsParentDir + DEFAULT_APP_WORK_DIR_POSTFIX)); this.emailNotificationOnShutdown = ConfigUtils .getBoolean(config, EMAIL_NOTIFICATION_ON_SHUTDOWN_KEY, DEFAULT_EMAIL_NOTIFICATION_ON_SHUTDOWN); this.awsClusterSecurityManager = new AWSClusterSecurityManager(this.config); this.awsSdkClient = createAWSSdkClient(); if (config.hasPath(GobblinAWSConfigurationKeys.GOBBLIN_VERSION)) { this.gobblinVersion = Optional.of(config.getString(GobblinAWSConfigurationKeys.GOBBLIN_VERSION)); } else { this.gobblinVersion = Optional.<String>absent(); } this.helixManager = HelixManagerFactory.getZKHelixManager(this.helixClusterName, GobblinClusterUtils.getHostname(), InstanceType.SPECTATOR, this.zkConnectionString); } /** * Launch a new Gobblin cluster on AWS. * * @throws IOException If there's something wrong launching the cluster */ public void launch() throws IOException { this.eventBus.register(this); // Create Helix cluster and connect to it HelixUtils.createGobblinHelixCluster(this.zkConnectionString, this.helixClusterName); LOGGER.info("Created Helix cluster " + this.helixClusterName); connectHelixManager(); // Start all the services List<Service> services = Lists.newArrayList(); services.add(this.awsClusterSecurityManager); this.serviceManager = Optional.of(new ServiceManager(services)); this.serviceManager.get().startAsync(); // Core logic to launch cluster this.clusterId = getClusterId(); // TODO: Add cluster monitoring } /** * Stop this {@link GobblinAWSClusterLauncher} instance. * * @throws IOException If this {@link GobblinAWSClusterLauncher} instance fails to clean up its working directory. */ public synchronized void stop() throws IOException, TimeoutException { if (this.stopped) { return; } LOGGER.info("Stopping the " + GobblinAWSClusterLauncher.class.getSimpleName()); try { if (this.clusterId.isPresent()) { sendShutdownRequest(); } if (this.serviceManager.isPresent()) { this.serviceManager.get().stopAsync().awaitStopped(5, TimeUnit.MINUTES); } disconnectHelixManager(); } finally { try { if (this.clusterId.isPresent()) { cleanUpClusterWorkDirectory(this.clusterId.get()); } } finally { this.closer.close(); } } this.stopped = true; } @VisibleForTesting void connectHelixManager() { try { this.helixManager.connect(); } catch (Exception e) { LOGGER.error("HelixManager failed to connect", e); throw Throwables.propagate(e); } } @VisibleForTesting void disconnectHelixManager() { if (this.helixManager.isConnected()) { this.helixManager.disconnect(); } } @VisibleForTesting AWSSdkClient createAWSSdkClient() { return new AWSSdkClient(this.awsClusterSecurityManager, Region.getRegion(Regions.fromName(this.awsRegion))); } private Optional<String> getClusterId() throws IOException { final Optional<String> reconnectableClusterId = getReconnectableClusterId(); if (reconnectableClusterId.isPresent()) { LOGGER.info("Found reconnectable cluster with cluster ID: " + reconnectableClusterId.get()); return reconnectableClusterId; } LOGGER.info("No reconnectable cluster found so creating a cluster"); return Optional.of(setupGobblinCluster()); } @VisibleForTesting Optional<String> getReconnectableClusterId() throws IOException { // List ASGs with Tag of cluster name final Tag clusterNameTag = new Tag() .withKey(CLUSTER_NAME_ASG_TAG) .withValue(this.clusterName); final List<AutoScalingGroup> autoScalingGroups = this.awsSdkClient.getAutoScalingGroupsWithTag(clusterNameTag); // If no auto scaling group is found, we don't have an existing cluster to connect to if (autoScalingGroups.size() == 0) { return Optional.absent(); } // If more than 0 auto scaling groups are found, validate the setup if (autoScalingGroups.size() != 2) { throw new IOException("Expected 2 auto scaling groups (1 each for master and workers) but found: " + autoScalingGroups.size()); } // Retrieve cluster information from ASGs Optional<String> clusterId = Optional.absent(); Optional<AutoScalingGroup> masterAsg = Optional.absent(); Optional<AutoScalingGroup> workersAsg = Optional.absent(); for (TagDescription tagDescription : autoScalingGroups.get(0).getTags()) { LOGGER.info("Found tag: " + tagDescription); if (tagDescription.getKey().equalsIgnoreCase(CLUSTER_ID_ASG_TAG)) { clusterId = Optional.of(tagDescription.getValue()); } if (tagDescription.getKey().equalsIgnoreCase(ASG_TYPE_ASG_TAG)) { if (tagDescription.getValue().equalsIgnoreCase(ASG_TYPE_MASTER)) { masterAsg = Optional.of(autoScalingGroups.get(0)); workersAsg = Optional.of(autoScalingGroups.get(1)); } else { masterAsg = Optional.of(autoScalingGroups.get(1)); workersAsg = Optional.of(autoScalingGroups.get(0)); } } } if (!clusterId.isPresent()) { throw new IOException("Found 2 auto scaling group names for: " + this.clusterName + " but tags seem to be corrupted, hence could not determine cluster id"); } if (!masterAsg.isPresent() || !workersAsg.isPresent()) { throw new IOException("Found 2 auto scaling group names for: " + this.clusterName + " but tags seem to be corrupted, hence could not determine master and workers ASG"); } // Get Master and Workers launch config name and auto scaling group name this.masterAutoScalingGroupName = masterAsg.get().getAutoScalingGroupName(); this.masterLaunchConfigName = masterAsg.get().getLaunchConfigurationName(); this.workerAutoScalingGroupName = workersAsg.get().getAutoScalingGroupName(); this.workerLaunchConfigName = workersAsg.get().getLaunchConfigurationName(); LOGGER.info("Trying to find cluster master public ip"); this.masterPublicIp = getMasterPublicIp(); LOGGER.info("Master public ip: "+ this.masterPublicIp); return clusterId; } /** * Setup the Gobblin AWS cluster. * * @throws IOException If there's anything wrong setting up the AWS cluster */ @VisibleForTesting String setupGobblinCluster() throws IOException { final String uuid = UUID.randomUUID().toString(); // Create security group // TODO: Make security group restrictive final String securityGroupName = "GobblinSecurityGroup_" + uuid; this.awsSdkClient.createSecurityGroup(securityGroupName, "Gobblin cluster security group"); this.awsSdkClient.addPermissionsToSecurityGroup(securityGroupName, "0.0.0.0/0", "tcp", 0, 65535); // Create key value pair final String keyName = "GobblinKey_" + uuid; final String material = this.awsSdkClient.createKeyValuePair(keyName); LOGGER.debug("Material is: " + material); FileUtils.writeStringToFile(new File(keyName + ".pem"), material); // Get all availability zones in the region. Currently, we will only use first final List<AvailabilityZone> availabilityZones = this.awsSdkClient.getAvailabilityZones(); // Launch Cluster Master final String clusterId = launchClusterMaster(uuid, keyName, securityGroupName, availabilityZones.get(0)); // Launch WorkUnit runners launchWorkUnitRunners(uuid, keyName, securityGroupName, availabilityZones.get(0)); return clusterId; } private String launchClusterMaster(String uuid, String keyName, String securityGroups, AvailabilityZone availabilityZone) { // Get cloud-init script to launch cluster master final String userData = CloudInitScriptBuilder.buildClusterMasterCommand(this.clusterName, this.nfsParentDir, this.sinkLogRootDir, this.awsConfDir, this.appWorkDir, this.masterS3ConfUri, this.masterS3ConfFiles, this.masterS3JarsUri, this.masterS3JarsFiles, this.masterJarsDir, this.masterJvmMemory, this.masterJvmArgs, this.gobblinVersion); // Create launch config for Cluster master this.masterLaunchConfigName = MASTER_LAUNCH_CONFIG_NAME_PREFIX + uuid; this.awsSdkClient.createLaunchConfig(this.masterLaunchConfigName, this.masterAmiId, this.masterInstanceType, keyName, securityGroups, Optional.<String>absent(), Optional.<String>absent(), Optional.<BlockDeviceMapping>absent(), Optional.<String>absent(), Optional.<InstanceMonitoring>absent(), userData); // Create ASG for Cluster master // TODO: Make size configurable when we have support multi-master this.masterAutoScalingGroupName = MASTER_ASG_NAME_PREFIX + uuid; final int minNumMasters = 1; final int maxNumMasters = 1; final int desiredNumMasters = 1; final Tag clusterNameTag = new Tag().withKey(CLUSTER_NAME_ASG_TAG).withValue(this.clusterName); final Tag clusterUuidTag = new Tag().withKey(CLUSTER_ID_ASG_TAG).withValue(uuid); final Tag asgTypeTag = new Tag().withKey(ASG_TYPE_ASG_TAG).withValue(ASG_TYPE_MASTER); this.awsSdkClient.createAutoScalingGroup(this.masterAutoScalingGroupName, this.masterLaunchConfigName, minNumMasters, maxNumMasters, desiredNumMasters, Optional.of(availabilityZone.getZoneName()), Optional.<Integer>absent(), Optional.<Integer>absent(), Optional.<String>absent(), Optional.<String>absent(), Optional.<String>absent(), Lists.newArrayList(clusterNameTag, clusterUuidTag, asgTypeTag)); LOGGER.info("Waiting for cluster master to launch"); this.masterPublicIp = getMasterPublicIp(); LOGGER.info("Master public ip: "+ this.masterPublicIp); return uuid; } private String getMasterPublicIp() { final long startTime = System.currentTimeMillis(); final long launchTimeout = TimeUnit.MINUTES.toMillis(10); boolean isMasterLaunched = false; List<Instance> instanceIds = Collections.emptyList(); while (!isMasterLaunched && (System.currentTimeMillis() - startTime) < launchTimeout) { try { Thread.sleep(5000); } catch (InterruptedException e) { throw new RuntimeException("Interrupted while waiting for cluster master to boot up", e); } instanceIds = this.awsSdkClient.getInstancesForGroup(this.masterAutoScalingGroupName, "running"); isMasterLaunched = instanceIds.size() > 0; } if (!isMasterLaunched) { throw new RuntimeException("Timed out while waiting for cluster master. " + "Check for issue manually for ASG: " + this.masterAutoScalingGroupName); } // This will change if cluster master restarts, but that will be handled by Helix events // TODO: Add listener to Helix / Zookeeper for master restart and update master public ip // .. although we do not use master public ip for anything return instanceIds.get(0).getPublicIpAddress(); } private void launchWorkUnitRunners(String uuid, String keyName, String securityGroups, AvailabilityZone availabilityZone) { // Get cloud-init script to launch cluster worker final String userData = CloudInitScriptBuilder.buildClusterWorkerCommand(this.clusterName, this.nfsParentDir, this.sinkLogRootDir, this.awsConfDir, this.appWorkDir, this.masterPublicIp, this.workerS3ConfUri, this.workerS3ConfFiles, this.workerS3JarsUri, this.workerS3JarsFiles, this.workerJarsDir, this.workerJvmMemory, this.workerJvmArgs, this.gobblinVersion); // Create launch config for Cluster worker this.workerLaunchConfigName = WORKERS_LAUNCH_CONFIG_PREFIX + uuid; this.awsSdkClient.createLaunchConfig(this.workerLaunchConfigName, this.workerAmiId, this.workerInstanceType, keyName, securityGroups, Optional.<String>absent(), Optional.<String>absent(), Optional.<BlockDeviceMapping>absent(), Optional.<String>absent(), Optional.<InstanceMonitoring>absent(), userData); // Create ASG for Cluster workers this.workerAutoScalingGroupName = WORKERS_ASG_NAME_PREFIX + uuid; final Tag clusterNameTag = new Tag().withKey(CLUSTER_NAME_ASG_TAG).withValue(this.clusterName); final Tag clusterUuidTag = new Tag().withKey(CLUSTER_ID_ASG_TAG).withValue(uuid); final Tag asgTypeTag = new Tag().withKey(ASG_TYPE_ASG_TAG).withValue(ASG_TYPE_WORKERS); this.awsSdkClient.createAutoScalingGroup(this.workerAutoScalingGroupName, this.workerLaunchConfigName, this.minWorkers, this.maxWorkers, this.desiredWorkers, Optional.of(availabilityZone.getZoneName()), Optional.<Integer>absent(), Optional.<Integer>absent(), Optional.<String>absent(), Optional.<String>absent(), Optional.<String>absent(), Lists.newArrayList(clusterNameTag, clusterUuidTag, asgTypeTag)); } @VisibleForTesting void sendShutdownRequest() { final Criteria criteria = new Criteria(); criteria.setInstanceName("%"); criteria.setResource("%"); criteria.setPartition("%"); criteria.setPartitionState("%"); criteria.setRecipientInstanceType(InstanceType.CONTROLLER); criteria.setSessionSpecific(true); final Message shutdownRequest = new Message(GobblinHelixConstants.SHUTDOWN_MESSAGE_TYPE, HelixMessageSubTypes.APPLICATION_MASTER_SHUTDOWN.toString().toLowerCase() + UUID.randomUUID().toString()); shutdownRequest.setMsgSubType(HelixMessageSubTypes.APPLICATION_MASTER_SHUTDOWN.toString()); shutdownRequest.setMsgState(Message.MessageState.NEW); shutdownRequest.setTgtSessionId("*"); // Wait for 5 minutes final int timeout = 300000; // Send shutdown request to Cluster master, which will send shutdown request to workers // Upon receiving shutdown response from workers, master will shut itself down and call back shutdownASG() final int messagesSent = this.helixManager.getMessagingService().send(criteria, shutdownRequest, shutdownASG(),timeout); if (messagesSent == 0) { LOGGER.error(String.format("Failed to send the %s message to the controller", shutdownRequest.getMsgSubType())); } } /*** * Callback method that deletes {@link AutoScalingGroup}s * @return Callback method that deletes {@link AutoScalingGroup}s */ private AsyncCallback shutdownASG() { Optional<List<String>> optionalLaunchConfigurationNames = Optional .of(Arrays.asList(this.masterLaunchConfigName, this.workerLaunchConfigName)); Optional<List<String>> optionalAutoScalingGroupNames = Optional .of(Arrays.asList(this.masterAutoScalingGroupName, this.workerAutoScalingGroupName)); return new AWSShutdownHandler(this.awsSdkClient, optionalLaunchConfigurationNames, optionalAutoScalingGroupNames); } private void cleanUpClusterWorkDirectory(String clusterId) throws IOException { final File appWorkDir = new File(GobblinClusterUtils.getAppWorkDirPath(this.clusterName, clusterId)); if (appWorkDir.exists() && appWorkDir.isDirectory()) { LOGGER.info("Deleting application working directory " + appWorkDir); FileUtils.deleteDirectory(appWorkDir); } } private void sendEmailOnShutdown(Optional<String> report) { final String subject = String.format("Gobblin AWS cluster %s completed", this.clusterName); final StringBuilder messageBuilder = new StringBuilder("Gobblin AWS cluster was shutdown at: " + new Date()); if (report.isPresent()) { messageBuilder.append(' ').append(report.get()); } try { EmailUtils.sendEmail(ConfigUtils.configToState(this.config), subject, messageBuilder.toString()); } catch (EmailException ee) { LOGGER.error("Failed to send email notification on shutdown", ee); } } public static void main(String[] args) throws Exception { final GobblinAWSClusterLauncher gobblinAWSClusterLauncher = new GobblinAWSClusterLauncher(ConfigFactory.load()); Runtime.getRuntime().addShutdownHook(new Thread() { @Override public void run() { try { gobblinAWSClusterLauncher.stop(); } catch (IOException ioe) { LOGGER.error("Failed to shutdown the " + GobblinAWSClusterLauncher.class.getSimpleName(), ioe); } catch (TimeoutException te) { LOGGER.error("Timeout in stopping the service manager", te); } finally { if (gobblinAWSClusterLauncher.emailNotificationOnShutdown) { gobblinAWSClusterLauncher.sendEmailOnShutdown(Optional.<String>absent()); } } } }); gobblinAWSClusterLauncher.launch(); } }