/**
* Copyright 2015 StreamSets Inc.
*
* Licensed under the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.streamsets.datacollector.cluster;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Joiner;
import com.google.common.base.Splitter;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Maps;
import com.streamsets.datacollector.config.PipelineConfiguration;
import com.streamsets.datacollector.config.RuleDefinitions;
import com.streamsets.datacollector.config.StageConfiguration;
import com.streamsets.datacollector.config.StageDefinition;
import com.streamsets.datacollector.creation.PipelineBean;
import com.streamsets.datacollector.creation.PipelineBeanCreator;
import com.streamsets.datacollector.creation.PipelineConfigBean;
import com.streamsets.datacollector.creation.StageBean;
import com.streamsets.datacollector.execution.runner.common.Constants;
import com.streamsets.datacollector.http.WebServerTask;
import com.streamsets.datacollector.json.ObjectMapperFactory;
import com.streamsets.datacollector.main.RuntimeInfo;
import com.streamsets.datacollector.main.RuntimeModule;
import com.streamsets.datacollector.restapi.bean.BeanHelper;
import com.streamsets.datacollector.security.SecurityConfiguration;
import com.streamsets.datacollector.stagelibrary.StageLibraryTask;
import com.streamsets.datacollector.stagelibrary.StageLibraryUtils;
import com.streamsets.datacollector.store.PipelineInfo;
import com.streamsets.datacollector.store.impl.FileAclStoreTask;
import com.streamsets.datacollector.store.impl.FilePipelineStoreTask;
import com.streamsets.datacollector.util.Configuration;
import com.streamsets.datacollector.util.PipelineConfigurationUtil;
import com.streamsets.datacollector.util.PipelineDirectoryUtil;
import com.streamsets.datacollector.util.SystemProcessFactory;
import com.streamsets.datacollector.validation.Issue;
import com.streamsets.lib.security.acl.AclDtoJsonMapper;
import com.streamsets.lib.security.acl.dto.Acl;
import com.streamsets.lib.security.http.RemoteSSOService;
import com.streamsets.pipeline.api.Config;
import com.streamsets.pipeline.api.ExecutionMode;
import com.streamsets.pipeline.api.impl.PipelineUtils;
import com.streamsets.pipeline.api.impl.Utils;
import com.streamsets.pipeline.lib.util.ThreadUtil;
import com.streamsets.pipeline.util.SystemProcess;
import org.apache.commons.codec.binary.Base64;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.io.IOUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.annotation.Nullable;
import java.io.File;
import java.io.FileFilter;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.net.URL;
import java.net.URLClassLoader;
import java.nio.file.Paths;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Properties;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import static com.streamsets.datacollector.definition.StageLibraryDefinitionExtractor.DATA_COLLECTOR_LIBRARY_PROPERTIES;
import static java.util.Arrays.stream;
public class ClusterProviderImpl implements ClusterProvider {
static final Pattern YARN_APPLICATION_ID_REGEX = Pattern.compile("\\s(application_[0-9]+_[0-9]+)(\\s|$)");
static final Pattern MESOS_DRIVER_ID_REGEX = Pattern.compile("\\s(driver-[0-9]+-[0-9]+)(\\s|$)");
static final Pattern NO_VALID_CREDENTIALS = Pattern.compile("(No valid credentials provided.*)");
static final String CLUSTER_DPM_APP_TOKEN = "cluster-application-token.txt";
public static final String CLUSTER_TYPE = "CLUSTER_TYPE";
public static final String CLUSTER_TYPE_MESOS = "mesos";
public static final String CLUSTER_TYPE_MAPREDUCE = "mr";
public static final String CLUSTER_TYPE_YARN = "yarn";
private static final String STAGING_DIR = "STAGING_DIR";
private static final String MESOS_UBER_JAR_PATH = "MESOS_UBER_JAR_PATH";
private static final String MESOS_UBER_JAR = "MESOS_UBER_JAR";
private static final String ETC_TAR_ARCHIVE = "ETC_TAR_ARCHIVE";
private static final String LIBS_TAR_ARCHIVE = "LIBS_TAR_ARCHIVE";
private static final String RESOURCES_TAR_ARCHIVE = "RESOURCES_TAR_ARCHIVE";
private static final String MESOS_HOSTING_JAR_DIR = "MESOS_HOSTING_JAR_DIR";
private static final String KERBEROS_AUTH = "KERBEROS_AUTH";
private static final String KERBEROS_KEYTAB = "KERBEROS_KEYTAB";
private static final String KERBEROS_PRINCIPAL = "KERBEROS_PRINCIPAL";
private static final String CLUSTER_MODE_JAR_BLACKLIST = "cluster.jar.blacklist.regex_";
static final String CLUSTER_BOOTSTRAP_JAR_REGEX = "cluster.bootstrap.jar.regex_";
static final Pattern CLUSTER_BOOTSTRAP_API_JAR_PATTERN = Pattern.compile(
"streamsets-datacollector-cluster-bootstrap-api-\\d+.*.jar$");
static final Pattern BOOTSTRAP_MAIN_JAR_PATTERN = Pattern.compile("streamsets-datacollector-bootstrap-\\d+.*.jar$");
static final Pattern CLUSTER_BOOTSTRAP_JAR_PATTERN = Pattern.compile
("streamsets-datacollector-cluster-bootstrap-\\d+.*.jar$");
static final Pattern CLUSTER_BOOTSTRAP_MESOS_JAR_PATTERN = Pattern.compile
("streamsets-datacollector-mesos-bootstrap-\\d+.*.jar$");
private static final String ALL_STAGES = "*";
private static final String TOPIC = "topic";
private static final String MESOS_HOSTING_DIR_PARENT = "mesos";
public static final String SPARK_PROCESSOR_STAGE = "com.streamsets.pipeline.stage.processor.spark.SparkDProcessor";
private final RuntimeInfo runtimeInfo;
private final YARNStatusParser yarnStatusParser;
private final MesosStatusParser mesosStatusParser;
/**
* Only null in the case of tests
*/
@Nullable
private final SecurityConfiguration securityConfiguration;
private static final Logger LOG = LoggerFactory.getLogger(ClusterProviderImpl.class);
private static final boolean IS_TRACE_ENABLED = LOG.isTraceEnabled();
@VisibleForTesting
ClusterProviderImpl() {
this(null, null);
}
public ClusterProviderImpl(RuntimeInfo runtimeInfo, @Nullable SecurityConfiguration securityConfiguration) {
this.runtimeInfo = runtimeInfo;
this.securityConfiguration = securityConfiguration;
this.yarnStatusParser = new YARNStatusParser();
this.mesosStatusParser = new MesosStatusParser();
}
@Override
public void killPipeline(
SystemProcessFactory systemProcessFactory,
File sparkManager,
File tempDir,
String appId,
PipelineConfiguration pipelineConfiguration
) throws TimeoutException, IOException {
Map<String, String> environment = new HashMap<>();
environment.put(CLUSTER_TYPE, CLUSTER_TYPE_YARN);
addKerberosConfiguration(environment);
ImmutableList.Builder<String> args = ImmutableList.builder();
args.add(sparkManager.getAbsolutePath());
args.add("kill");
args.add(appId);
ExecutionMode executionMode = PipelineBeanCreator.get()
.getExecutionMode(pipelineConfiguration, new ArrayList<Issue>());
if (executionMode == ExecutionMode.CLUSTER_MESOS_STREAMING) {
addMesosArgs(pipelineConfiguration, environment, args);
}
SystemProcess process = systemProcessFactory
.create(ClusterProviderImpl.class.getSimpleName(), tempDir, args.build());
try {
process.start(environment);
if (!process.waitFor(30, TimeUnit.SECONDS)) {
logOutput(appId, process);
throw new TimeoutException(errorString("Kill command for {} timed out.", appId));
}
} finally {
process.cleanup();
}
}
private static String errorString(String template, Object... args) {
return Utils.format("ERROR: " + template, args);
}
private static void logOutput(String appId, SystemProcess process) {
try {
LOG.info("Status command standard error: {} ", Joiner.on("\n").join(process.getAllError()));
LOG.info("Status command standard output: {} ", Joiner.on("\n").join(process.getAllOutput()));
} catch (Exception e) {
String msg = errorString("Could not read output of command '{}' for app {}: {}", process.getCommand(), appId, e);
LOG.error(msg, e);
}
}
@Override
public ClusterPipelineStatus getStatus(
SystemProcessFactory systemProcessFactory,
File sparkManager,
File tempDir,
String appId,
PipelineConfiguration pipelineConfiguration
) throws TimeoutException, IOException {
Map<String, String> environment = new HashMap<>();
environment.put(CLUSTER_TYPE, CLUSTER_TYPE_YARN);
addKerberosConfiguration(environment);
ImmutableList.Builder<String> args = ImmutableList.builder();
args.add(sparkManager.getAbsolutePath());
args.add("status");
args.add(appId);
ExecutionMode executionMode = PipelineBeanCreator.get()
.getExecutionMode(pipelineConfiguration, new ArrayList<Issue>());
if (executionMode == ExecutionMode.CLUSTER_MESOS_STREAMING) {
addMesosArgs(pipelineConfiguration, environment, args);
}
SystemProcess process = systemProcessFactory
.create(ClusterProviderImpl.class.getSimpleName(), tempDir, args.build());
try {
process.start(environment);
if (!process.waitFor(30, TimeUnit.SECONDS)) {
logOutput(appId, process);
throw new TimeoutException(errorString("YARN status command for {} timed out.", appId));
}
if (process.exitValue() != 0) {
logOutput(appId, process);
throw new IllegalStateException(errorString("Status command for {} failed with exit code {}.", appId,
process.exitValue()));
}
logOutput(appId, process);
String status;
if (executionMode == ExecutionMode.CLUSTER_MESOS_STREAMING) {
status = mesosStatusParser.parseStatus(process.getAllOutput());
} else {
status = yarnStatusParser.parseStatus(process.getAllOutput());
}
return ClusterPipelineStatus.valueOf(status);
} finally {
process.cleanup();
}
}
private void addMesosArgs(
PipelineConfiguration pipelineConfiguration,
Map<String, String> environment,
ImmutableList.Builder<String> args
) {
String mesosDispatcherURL = Utils.checkNotNull(
PipelineBeanCreator.get().getMesosDispatcherURL(pipelineConfiguration), "mesosDispatcherURL"
);
environment.put(CLUSTER_TYPE, CLUSTER_TYPE_MESOS);
args.add("--master");
args.add(mesosDispatcherURL);
}
private void rewriteProperties(
File sdcPropertiesFile,
File etcStagingDir,
Map<String, String> sourceConfigs,
Map<String, String> sourceInfo,
String clusterToken,
Optional<String> mesosURL
) throws IOException {
InputStream sdcInStream = null;
OutputStream sdcOutStream = null;
Properties sdcProperties = new Properties();
try {
sdcInStream = new FileInputStream(sdcPropertiesFile);
sdcProperties.load(sdcInStream);
copyDpmTokenIfRequired(sdcProperties, etcStagingDir);
sdcProperties.setProperty(RuntimeModule.PIPELINE_EXECUTION_MODE_KEY, ExecutionMode.SLAVE.name());
sdcProperties.setProperty(WebServerTask.REALM_FILE_PERMISSION_CHECK, "false");
sdcProperties.remove(RuntimeModule.DATA_COLLECTOR_BASE_HTTP_URL);
if (runtimeInfo != null) {
if (runtimeInfo.getSSLContext() != null) {
sdcProperties.setProperty(WebServerTask.HTTP_PORT_KEY, "-1");
sdcProperties.setProperty(WebServerTask.HTTPS_PORT_KEY, "0");
} else {
sdcProperties.setProperty(WebServerTask.HTTP_PORT_KEY, "0");
sdcProperties.setProperty(WebServerTask.HTTPS_PORT_KEY, "-1");
}
String id = String.valueOf(runtimeInfo.getId());
sdcProperties.setProperty(Constants.SDC_ID, id);
sdcProperties.setProperty(Constants.PIPELINE_CLUSTER_TOKEN_KEY, clusterToken);
sdcProperties.setProperty(Constants.CALLBACK_SERVER_URL_KEY, runtimeInfo.getClusterCallbackURL());
}
if (mesosURL.isPresent()) {
sdcProperties.setProperty(Constants.MESOS_JAR_URL, mesosURL.get());
}
addClusterConfigs(sourceConfigs, sdcProperties);
addClusterConfigs(sourceInfo, sdcProperties);
sdcOutStream = new FileOutputStream(sdcPropertiesFile);
sdcProperties.store(sdcOutStream, null);
LOG.debug("sourceConfigs = {}", sourceConfigs);
LOG.debug("sourceInfo = {}", sourceInfo);
LOG.debug("sdcProperties = {}", sdcProperties);
sdcOutStream.flush();
sdcOutStream.close();
} finally {
if (sdcInStream != null) {
IOUtils.closeQuietly(sdcInStream);
}
if (sdcOutStream != null) {
IOUtils.closeQuietly(sdcOutStream);
}
}
}
private void addClusterConfigs(Map<String, String> configs, Properties properties) {
for (Map.Entry<String, String> entry : configs.entrySet()) {
properties.setProperty(entry.getKey(), entry.getValue());
}
}
private static File getBootstrapClusterJar(File bootstrapDir, final Pattern pattern) {
File clusterBootstrapDir = new File(bootstrapDir, "cluster");
return getBootstrapJar(clusterBootstrapDir, pattern);
}
private static File getBootstrapMainJar(File bootstrapDir, final Pattern pattern) {
File bootstrapMainDir = new File(bootstrapDir, "main");
return getBootstrapJar(bootstrapMainDir, pattern);
}
private static File getBootstrapJar(File bootstrapDir, final Pattern pattern) {
Utils.checkState(
bootstrapDir.isDirectory(),
Utils.format("SDC bootstrap cluster lib does not exist: {}", bootstrapDir)
);
File[] candidates = bootstrapDir.listFiles(new FileFilter() {
@Override
public boolean accept(File candidate) {
return pattern.matcher(candidate.getName()).matches();
}
});
Utils.checkState(candidates != null, Utils.format("Did not find jar matching {} in {}", pattern, bootstrapDir));
Utils.checkState(candidates.length == 1, Utils.format("Did not find exactly one bootstrap jar: {}",
Arrays.toString(candidates)));
return candidates[0];
}
private void addKerberosConfiguration(Map<String, String> environment) {
if (securityConfiguration != null) {
environment.put(KERBEROS_AUTH, String.valueOf(securityConfiguration.isKerberosEnabled()));
if (securityConfiguration.isKerberosEnabled()) {
environment.put(KERBEROS_PRINCIPAL, securityConfiguration.getKerberosPrincipal());
environment.put(KERBEROS_KEYTAB, securityConfiguration.getKerberosKeytab());
}
}
}
static File createDirectoryClone(File srcDir, String dirName, File tempDir) throws IOException {
File tempSrcDir = new File(tempDir, dirName);
FileUtils.deleteQuietly(tempSrcDir);
Utils.checkState(tempSrcDir.mkdir(), Utils.formatL("Could not create {}", tempSrcDir));
doCopyDirectory(srcDir, tempSrcDir);
return tempSrcDir;
}
private static void doCopyDirectory(File srcDir, File destDir)
throws IOException {
// code copied from commons-io FileUtils to work around files which cannot be read
// recurse
final File[] srcFiles = srcDir.listFiles();
if (srcFiles == null) { // null if abstract pathname does not denote a directory, or if an I/O error occurs
throw new IOException("Failed to list contents of " + srcDir);
}
if (destDir.exists()) {
if (!destDir.isDirectory()) {
throw new IOException("Destination '" + destDir + "' exists but is not a directory");
}
} else {
if (!destDir.mkdirs() && !destDir.isDirectory()) {
throw new IOException("Destination '" + destDir + "' directory cannot be created");
}
}
if (!destDir.canWrite()) {
throw new IOException("Destination '" + destDir + "' cannot be written to");
}
for (final File srcFile : srcFiles) {
final File dstFile = new File(destDir, srcFile.getName());
if (srcFile.canRead()) { // ignore files which cannot be read
if (srcFile.isDirectory()) {
doCopyDirectory(srcFile, dstFile);
} else {
try (InputStream in = new FileInputStream((srcFile))) {
try (OutputStream out = new FileOutputStream((dstFile))) {
IOUtils.copy(in, out);
}
}
}
}
}
}
static boolean exclude(List<String> blacklist, String name) {
for (String pattern : blacklist) {
if (Pattern.compile(pattern).matcher(name).find()) {
return true;
} else if (IS_TRACE_ENABLED) {
LOG.trace("Pattern '{}' does not match '{}'", pattern, name);
}
}
return false;
}
@VisibleForTesting
static Properties readDataCollectorProperties(ClassLoader cl) throws IOException {
Properties properties = new Properties();
while (cl != null) {
Enumeration<URL> urls = cl.getResources(DATA_COLLECTOR_LIBRARY_PROPERTIES);
if (urls != null) {
while (urls.hasMoreElements()) {
URL url = urls.nextElement();
LOG.trace("Loading data collector library properties: {}", url);
try (InputStream inputStream = url.openStream()) {
properties.load(inputStream);
}
}
}
cl = cl.getParent();
}
LOG.trace("Final properties: {} ", properties);
return properties;
}
private static List<URL> findJars(String name, URLClassLoader cl, @Nullable String stageClazzName)
throws IOException {
Properties properties = readDataCollectorProperties(cl);
List<String> blacklist = new ArrayList<>();
for (Map.Entry entry : properties.entrySet()) {
String key = (String) entry.getKey();
if (stageClazzName != null && key.equals(CLUSTER_MODE_JAR_BLACKLIST + stageClazzName)) {
String value = (String) entry.getValue();
blacklist.addAll(Splitter.on(",").trimResults().omitEmptyStrings().splitToList(value));
} else if (key.equals(CLUSTER_MODE_JAR_BLACKLIST + ALL_STAGES)) {
String value = (String) entry.getValue();
blacklist.addAll(Splitter.on(",").trimResults().omitEmptyStrings().splitToList(value));
}
}
if (IS_TRACE_ENABLED) {
LOG.trace("Blacklist for '{}': '{}'", name, blacklist);
}
List<URL> urls = new ArrayList<>();
for (URL url : cl.getURLs()) {
if (blacklist.isEmpty()) {
urls.add(url);
} else {
if (exclude(blacklist, FilenameUtils.getName(url.getPath()))) {
LOG.trace("Skipping '{}' for '{}' due to '{}'", url, name, blacklist);
} else {
urls.add(url);
}
}
}
return urls;
}
@Override
public ApplicationState startPipeline(
SystemProcessFactory systemProcessFactory,
File clusterManager,
File outputDir,
Map<String, String> environment,
Map<String, String> sourceInfo,
PipelineConfiguration pipelineConfiguration,
StageLibraryTask stageLibrary,
File etcDir, File resourcesDir,
File staticWebDir, File bootstrapDir,
URLClassLoader apiCL,
URLClassLoader containerCL,
long timeToWaitForFailure,
RuleDefinitions ruleDefinitions,
Acl acl
) throws IOException, TimeoutException {
File stagingDir = new File(outputDir, "staging");
if (!stagingDir.mkdirs() || !stagingDir.isDirectory()) {
String msg = Utils.format("Could not create staging directory: {}", stagingDir);
throw new IllegalStateException(msg);
}
try {
return startPipelineInternal(
systemProcessFactory,
clusterManager,
outputDir,
environment,
sourceInfo,
pipelineConfiguration,
stageLibrary,
etcDir,
resourcesDir,
staticWebDir,
bootstrapDir,
apiCL,
containerCL,
timeToWaitForFailure,
stagingDir,
ruleDefinitions,
acl
);
} finally {
// in testing mode the staging dir is used by yarn
// tasks and thus cannot be deleted
if (!Boolean.getBoolean("sdc.testing-mode") && !FileUtils.deleteQuietly(stagingDir)) {
LOG.warn("Unable to cleanup: {}", stagingDir);
}
}
}
@SuppressWarnings("unchecked")
private ApplicationState startPipelineInternal(
SystemProcessFactory systemProcessFactory,
File clusterManager,
File outputDir,
Map<String, String> environment,
Map<String, String> sourceInfo,
PipelineConfiguration pipelineConfiguration,
StageLibraryTask stageLibrary,
File etcDir,
File resourcesDir,
File staticWebDir,
File bootstrapDir,
URLClassLoader apiCL,
URLClassLoader containerCL,
long timeToWaitForFailure,
File stagingDir,
RuleDefinitions ruleDefinitions,
Acl acl
) throws IOException, TimeoutException {
environment = Maps.newHashMap(environment);
// create libs.tar.gz file for pipeline
Map<String, List<URL>> streamsetsLibsCl = new HashMap<>();
Map<String, List<URL>> userLibsCL = new HashMap<>();
Map<String, String> sourceConfigs = new HashMap<>();
ImmutableList.Builder<StageConfiguration> pipelineConfigurations = ImmutableList.builder();
// order is important here as we don't want error stage
// configs overriding source stage configs
String clusterToken = UUID.randomUUID().toString();
Set<String> jarsToShip = new LinkedHashSet<>();
List<Issue> errors = new ArrayList<>();
PipelineBean pipelineBean = PipelineBeanCreator.get().create(false, stageLibrary, pipelineConfiguration, errors);
if (!errors.isEmpty()) {
String msg = Utils.format("Found '{}' configuration errors: {}", errors.size(), errors);
throw new IllegalStateException(msg);
}
pipelineConfigurations.add(pipelineBean.getErrorStage().getConfiguration());
StageBean statsStage = pipelineBean.getStatsAggregatorStage();
// statsStage is null for pre 1.3 pipelines
if (statsStage != null) {
pipelineConfigurations.add(statsStage.getConfiguration());
}
pipelineConfigurations.add(pipelineBean.getOrigin().getConfiguration());
for (StageBean stageBean : pipelineBean.getPipelineStageBeans().getStages()) {
pipelineConfigurations.add(stageBean.getConfiguration());
}
ExecutionMode executionMode = ExecutionMode.STANDALONE;
for (StageConfiguration stageConf : pipelineConfigurations.build()) {
StageDefinition stageDef = stageLibrary.getStage(stageConf.getLibrary(), stageConf.getStageName(), false);
if (stageConf.getInputLanes().isEmpty()) {
for (Config conf : stageConf.getConfiguration()) {
if (conf.getValue() != null) {
Object value = conf.getValue();
if (value instanceof List) {
List values = (List) value;
if (values.isEmpty()) {
LOG.debug("Conf value for " + conf.getName() + " is empty");
} else {
Object first = values.get(0);
if (canCastToString(first)) {
sourceConfigs.put(conf.getName(), Joiner.on(",").join(values));
} else if (first instanceof Map) {
addToSourceConfigs(sourceConfigs, (List<Map<String, Object>>) values);
} else {
LOG.info(
"List is of type '{}' which cannot be converted to property value.",
first.getClass().getName()
);
}
}
} else if (canCastToString(conf.getValue())) {
LOG.debug("Adding to source configs " + conf.getName() + "=" + value);
sourceConfigs.put(conf.getName(), String.valueOf(value));
} else if (value instanceof Enum) {
value = ((Enum) value).name();
LOG.debug("Adding to source configs " + conf.getName() + "=" + value);
sourceConfigs.put(conf.getName(), String.valueOf(value));
} else {
LOG.warn("Conf value is of unknown type " + conf.getValue());
}
}
}
executionMode = PipelineBeanCreator.get().getExecutionMode(pipelineConfiguration, new ArrayList<Issue>());
List<String> libJarsRegex = stageDef.getLibJarsRegex();
if (!libJarsRegex.isEmpty()) {
for (URL jarUrl : ((URLClassLoader) stageDef.getStageClassLoader()).getURLs()) {
File jarFile = new File(jarUrl.getPath());
for (String libJar : libJarsRegex) {
Pattern pattern = Pattern.compile(libJar);
Matcher matcher = pattern.matcher(jarFile.getName());
if (matcher.matches()) {
jarsToShip.add(jarFile.getAbsolutePath());
}
}
}
}
}
String type = StageLibraryUtils.getLibraryType(stageDef.getStageClassLoader());
String name = StageLibraryUtils.getLibraryName(stageDef.getStageClassLoader());
if (ClusterModeConstants.STREAMSETS_LIBS.equals(type)) {
streamsetsLibsCl.put(
name, findJars(name, (URLClassLoader) stageDef.getStageClassLoader(), stageDef.getClassName())
);
} else if (ClusterModeConstants.USER_LIBS.equals(type)) {
userLibsCL.put(name, findJars(name, (URLClassLoader) stageDef.getStageClassLoader(), stageDef.getClassName()));
} else {
throw new IllegalStateException(Utils.format("Error unknown stage library type: '{}'", type));
}
// TODO: Get extras dir from the env var.
// Then traverse each jar's parent (getParent method) and add only the ones who has the extras dir as parent.
// Add all jars of stagelib to --jars. We only really need stuff from the extras directory.
if (stageDef.getClassName().equals(SPARK_PROCESSOR_STAGE)) {
LOG.info("Spark processor found in pipeline, adding to spark-submit");
File extras = new File(System.getenv("STREAMSETS_LIBRARIES_EXTRA_DIR"));
LOG.info("Found extras dir: " + extras.toString());
File stageLibExtras = new File(extras.toString() + "/" + stageConf.getLibrary() + "/" + "lib");
LOG.info("StageLib Extras dir: " + stageLibExtras.toString());
File[] extraJarsForStageLib = stageLibExtras.listFiles();
if (extraJarsForStageLib != null) {
stream(extraJarsForStageLib).map(File::toString).forEach(jarsToShip::add);
}
addJarsToJarsList((URLClassLoader) stageDef.getStageClassLoader(), jarsToShip, "streamsets-datacollector-spark-api-[0-9]+.*");
}
}
if (executionMode == ExecutionMode.CLUSTER_YARN_STREAMING ||
executionMode == ExecutionMode.CLUSTER_MESOS_STREAMING) {
LOG.info("Execution Mode is CLUSTER_STREAMING. Adding container jar and API jar to spark-submit");
addJarsToJarsList(containerCL, jarsToShip, "streamsets-datacollector-container-[0-9]+.*");
addJarsToJarsList(apiCL, jarsToShip, "streamsets-datacollector-api-[0-9]+.*");
}
LOG.info("stagingDir = '{}'", stagingDir);
LOG.info("bootstrapDir = '{}'", bootstrapDir);
LOG.info("etcDir = '{}'", etcDir);
LOG.info("resourcesDir = '{}'", resourcesDir);
LOG.info("staticWebDir = '{}'", staticWebDir);
Utils.checkState(staticWebDir.isDirectory(), Utils.format("Expected '{}' to be a directory", staticWebDir));
File libsTarGz = new File(stagingDir, "libs.tar.gz");
try {
TarFileCreator.createLibsTarGz(
findJars("api", apiCL, null),
findJars("container", containerCL, null),
streamsetsLibsCl,
userLibsCL,
staticWebDir,
libsTarGz
);
} catch (Exception ex) {
String msg = errorString("Serializing classpath: '{}'", ex);
throw new RuntimeException(msg, ex);
}
File resourcesTarGz = new File(stagingDir, "resources.tar.gz");
try {
resourcesDir = createDirectoryClone(resourcesDir, "resources", stagingDir);
TarFileCreator.createTarGz(resourcesDir, resourcesTarGz);
} catch (Exception ex) {
String msg = errorString("Serializing resources directory: '{}': {}", resourcesDir.getName(), ex);
throw new RuntimeException(msg, ex);
}
File etcTarGz = new File(stagingDir, "etc.tar.gz");
File sdcPropertiesFile;
File bootstrapJar = getBootstrapMainJar(bootstrapDir, BOOTSTRAP_MAIN_JAR_PATTERN);
File clusterBootstrapJar;
String mesosHostingJarDir = null;
String mesosURL = null;
Pattern clusterBootstrapJarFile = findClusterBootstrapJar(executionMode, pipelineConfiguration, stageLibrary);
clusterBootstrapJar = getBootstrapClusterJar(bootstrapDir, clusterBootstrapJarFile);
if (executionMode == ExecutionMode.CLUSTER_MESOS_STREAMING) {
String topic = sourceConfigs.get(TOPIC);
String pipelineName = sourceInfo.get(ClusterModeConstants.CLUSTER_PIPELINE_NAME);
mesosHostingJarDir = MESOS_HOSTING_DIR_PARENT + File.separatorChar + getSha256(getMesosHostingDir(topic, pipelineName));
mesosURL = runtimeInfo.getBaseHttpUrl() + File.separatorChar + mesosHostingJarDir + File.separatorChar
+ clusterBootstrapJar.getName();
} else if (executionMode == ExecutionMode.CLUSTER_YARN_STREAMING) {
jarsToShip.add(getBootstrapClusterJar(bootstrapDir, CLUSTER_BOOTSTRAP_API_JAR_PATTERN).getAbsolutePath());
}
try {
etcDir = createDirectoryClone(etcDir, "etc", stagingDir);
if (executionMode == ExecutionMode.CLUSTER_MESOS_STREAMING) {
try (
InputStream clusterLog4jProperties = Utils.checkNotNull(getClass().getResourceAsStream("/cluster-spark-log4j.properties"), "Cluster Log4J Properties")
) {
File log4jProperty = new File(etcDir, runtimeInfo.getLog4jPropertiesFileName());
if (!log4jProperty.isFile()) {
throw new IllegalStateException(
Utils.format("Log4j config file doesn't exist: '{}'", log4jProperty.getAbsolutePath())
);
}
LOG.info("Copying log4j properties for mesos cluster mode");
FileUtils.copyInputStreamToFile(clusterLog4jProperties,
log4jProperty);
}
}
PipelineInfo pipelineInfo = Utils.checkNotNull(pipelineConfiguration.getInfo(), "Pipeline Info");
String pipelineName = pipelineInfo.getPipelineId();
File rootDataDir = new File(etcDir, "data");
File pipelineBaseDir = new File(rootDataDir, PipelineDirectoryUtil.PIPELINE_INFO_BASE_DIR);
File pipelineDir = new File(pipelineBaseDir, PipelineUtils.escapedPipelineName(pipelineName));
if (!pipelineDir.exists()) {
if (!pipelineDir.mkdirs()) {
throw new RuntimeException("Failed to create pipeline directory " + pipelineDir.getPath());
}
}
File pipelineFile = new File(pipelineDir, FilePipelineStoreTask.PIPELINE_FILE);
ObjectMapperFactory.getOneLine().writeValue(pipelineFile,
BeanHelper.wrapPipelineConfiguration(pipelineConfiguration));
File infoFile = new File(pipelineDir, FilePipelineStoreTask.INFO_FILE);
ObjectMapperFactory.getOneLine().writeValue(infoFile, BeanHelper.wrapPipelineInfo(pipelineInfo));
Utils.checkNotNull(ruleDefinitions, "ruleDefinitions");
File rulesFile = new File(pipelineDir, FilePipelineStoreTask.RULES_FILE);
ObjectMapperFactory.getOneLine().writeValue(rulesFile, BeanHelper.wrapRuleDefinitions(ruleDefinitions));
if (null != acl) {
// acl could be null if permissions is not enabled
File aclFile = new File(pipelineDir, FileAclStoreTask.ACL_FILE);
ObjectMapperFactory.getOneLine().writeValue(aclFile, AclDtoJsonMapper.INSTANCE.toAclJson(acl));
}
sdcPropertiesFile = new File(etcDir, "sdc.properties");
if (executionMode == ExecutionMode.CLUSTER_MESOS_STREAMING) {
String hdfsS3ConfDirValue = PipelineBeanCreator.get().getHdfsS3ConfDirectory(pipelineConfiguration);
if (hdfsS3ConfDirValue != null && !hdfsS3ConfDirValue.isEmpty()) {
File hdfsS3ConfDir = new File(resourcesDir, hdfsS3ConfDirValue).getAbsoluteFile();
if (!hdfsS3ConfDir.exists()) {
String msg = Utils.format("HDFS/S3 Checkpoint Configuration Directory '{}' doesn't exist",
hdfsS3ConfDir.getPath());
throw new IllegalArgumentException(msg);
} else {
File coreSite = new File(hdfsS3ConfDir, "core-site.xml");
if (!coreSite.exists()) {
String msg = Utils.format("HDFS/S3 Checkpoint Configuration file core-site.xml '{}' doesn't exist",
coreSite.getPath());
throw new IllegalStateException(msg);
}
sourceConfigs.put("hdfsS3ConfDir", hdfsS3ConfDirValue);
}
} else {
throw new IllegalStateException("HDFS/S3 Checkpoint configuration directory is required");
}
}
rewriteProperties(sdcPropertiesFile, etcDir, sourceConfigs, sourceInfo, clusterToken, Optional.ofNullable
(mesosURL));
TarFileCreator.createTarGz(etcDir, etcTarGz);
} catch (RuntimeException ex) {
String msg = errorString("serializing etc directory: {}", ex);
throw new RuntimeException(msg, ex);
}
File log4jProperties = new File(stagingDir, "log4j.properties");
InputStream clusterLog4jProperties = null;
try {
if (executionMode == ExecutionMode.CLUSTER_BATCH) {
clusterLog4jProperties = Utils.checkNotNull(
getClass().getResourceAsStream("/cluster-mr-log4j.properties"), "Cluster Log4J Properties"
);
} else if (executionMode == ExecutionMode.CLUSTER_YARN_STREAMING) {
clusterLog4jProperties =
Utils.checkNotNull(
getClass().getResourceAsStream("/cluster-spark-log4j.properties"), "Cluster Log4J Properties"
);
}
if (clusterLog4jProperties != null) {
FileUtils.copyInputStreamToFile(clusterLog4jProperties, log4jProperties);
}
} catch (IOException ex) {
String msg = errorString("copying log4j configuration: {}", ex);
throw new RuntimeException(msg, ex);
} finally {
if (clusterLog4jProperties != null) {
IOUtils.closeQuietly(clusterLog4jProperties);
}
}
addKerberosConfiguration(environment);
errors.clear();
PipelineConfigBean config = PipelineBeanCreator.get().create(pipelineConfiguration, errors);
Utils.checkArgument(config != null, Utils.formatL("Invalid pipeline configuration: {}", errors));
String numExecutors = sourceInfo.get(ClusterModeConstants.NUM_EXECUTORS_KEY);
List<String> args;
File hostingDir = null;
if (executionMode == ExecutionMode.CLUSTER_BATCH) {
LOG.info("Submitting MapReduce Job");
environment.put(CLUSTER_TYPE, CLUSTER_TYPE_MAPREDUCE);
args = generateMRArgs(
clusterManager.getAbsolutePath(),
String.valueOf(config.clusterSlaveMemory),
config.clusterSlaveJavaOpts,
libsTarGz.getAbsolutePath(),
etcTarGz.getAbsolutePath(),
resourcesTarGz.getAbsolutePath(),
log4jProperties.getAbsolutePath(),
bootstrapJar.getAbsolutePath(),
sdcPropertiesFile.getAbsolutePath(),
clusterBootstrapJar.getAbsolutePath(),
jarsToShip
);
} else if (executionMode == ExecutionMode.CLUSTER_YARN_STREAMING) {
LOG.info("Submitting Spark Job on Yarn");
environment.put(CLUSTER_TYPE, CLUSTER_TYPE_YARN);
args = generateSparkArgs(
clusterManager.getAbsolutePath(),
String.valueOf(config.clusterSlaveMemory),
config.clusterSlaveJavaOpts,
numExecutors,
libsTarGz.getAbsolutePath(),
etcTarGz.getAbsolutePath(),
resourcesTarGz.getAbsolutePath(),
log4jProperties.getAbsolutePath(),
bootstrapJar.getAbsolutePath(),
jarsToShip,
clusterBootstrapJar.getAbsolutePath()
);
} else if (executionMode == ExecutionMode.CLUSTER_MESOS_STREAMING) {
LOG.info("Submitting Spark Job on Mesos");
environment.put(CLUSTER_TYPE, CLUSTER_TYPE_MESOS);
environment.put(STAGING_DIR, stagingDir.getAbsolutePath());
environment.put(MESOS_UBER_JAR_PATH, clusterBootstrapJar.getAbsolutePath());
environment.put(MESOS_UBER_JAR, clusterBootstrapJar.getName());
environment.put(ETC_TAR_ARCHIVE, "etc.tar.gz");
environment.put(LIBS_TAR_ARCHIVE, "libs.tar.gz");
environment.put(RESOURCES_TAR_ARCHIVE, "resources.tar.gz");
hostingDir = new File(runtimeInfo.getDataDir(), Utils.checkNotNull(mesosHostingJarDir, "mesos jar dir cannot be null"));
if (!hostingDir.mkdirs()) {
throw new RuntimeException("Couldn't create hosting dir: " + hostingDir.toString());
}
environment.put(MESOS_HOSTING_JAR_DIR, hostingDir.getAbsolutePath());
args =
generateMesosArgs(clusterManager.getAbsolutePath(), config.mesosDispatcherURL,
Utils.checkNotNull(mesosURL, "mesos jar url cannot be null"));
} else {
throw new IllegalStateException(Utils.format("Incorrect execution mode: {}", executionMode));
}
SystemProcess process = systemProcessFactory.create(ClusterProviderImpl.class.getSimpleName(), outputDir, args);
LOG.info("Starting: " + process);
try {
process.start(environment);
long start = System.currentTimeMillis();
Set<String> applicationIds = new HashSet<>();
while (true) {
long elapsedSeconds = TimeUnit.SECONDS.convert(System.currentTimeMillis() - start, TimeUnit.MILLISECONDS);
LOG.debug("Waiting for application id, elapsed seconds: " + elapsedSeconds);
if (applicationIds.size() > 1) {
logOutput("unknown", process);
throw new IllegalStateException(errorString("Found more than one application id: {}", applicationIds));
} else if (!applicationIds.isEmpty()) {
String appId = applicationIds.iterator().next();
logOutput(appId, process);
ApplicationState applicationState = new ApplicationState();
applicationState.setId(appId);
applicationState.setSdcToken(clusterToken);
if (mesosHostingJarDir != null) {
applicationState.setDirId(mesosHostingJarDir);
}
return applicationState;
}
if (!ThreadUtil.sleep(1000)) {
if (hostingDir != null) {
FileUtils.deleteQuietly(hostingDir);
}
throw new IllegalStateException("Interrupted while waiting for pipeline to start");
}
List<String> lines = new ArrayList<>();
lines.addAll(process.getOutput());
lines.addAll(process.getError());
Matcher m;
for (String line : lines) {
if (executionMode == ExecutionMode.CLUSTER_MESOS_STREAMING) {
m = MESOS_DRIVER_ID_REGEX.matcher(line);
} else {
m = YARN_APPLICATION_ID_REGEX.matcher(line);
}
if (m.find()) {
LOG.info("Found application id " + m.group(1));
applicationIds.add(m.group(1));
}
m = NO_VALID_CREDENTIALS.matcher(line);
if (m.find()) {
LOG.info("Kerberos Error found on line: " + line);
String msg = "Kerberos Error: " + m.group(1);
throw new IOException(msg);
}
}
if (elapsedSeconds > timeToWaitForFailure) {
logOutput("unknown", process);
String msg = Utils.format("Timed out after waiting {} seconds for for cluster application to start. " +
"Submit command {} alive.", elapsedSeconds, (process.isAlive() ? "is" : "is not"));
if (hostingDir != null) {
FileUtils.deleteQuietly(hostingDir);
}
throw new IllegalStateException(msg);
}
}
} finally {
process.cleanup();
}
}
private void addJarsToJarsList(URLClassLoader cl, Set<String> jarsToShip, String regex) {
jarsToShip.addAll(getFilesInCL(cl, regex));
}
private List<String> getFilesInCL(URLClassLoader cl, String regex) {
List<String> files = new ArrayList<>();
for (URL url : cl.getURLs()){
File jar = new File(url.getPath());
if (jar.getName().matches(regex)) {
LOG.info(Utils.format("Adding {} to ship.", url.getPath()));
files.add(jar.getAbsolutePath());
}
}
return files;
}
@VisibleForTesting
void copyDpmTokenIfRequired(Properties sdcProps, File etcStagingDir) throws IOException {
String configFiles = sdcProps.getProperty(Configuration.CONFIG_INCLUDES);
if (configFiles != null) {
for (String include : Splitter.on(",").trimResults().omitEmptyStrings().split(configFiles)) {
File file = new File(etcStagingDir, include);
try (Reader reader = new FileReader(file)) {
Properties includesDpmProps = new Properties();
includesDpmProps.load(reader);
if (copyDpmTokenIfEnabled(includesDpmProps, etcStagingDir, include)) {
break;
}
}
}
} else { //config.includes won't be there for parcels installation, all configs in sdc.properties
copyDpmTokenIfEnabled(sdcProps, etcStagingDir, null);
}
}
private boolean copyDpmTokenIfEnabled(Properties props, File etcStagingDir, String include) throws IOException {
Object isDPMEnabled = props.get(RemoteSSOService.DPM_ENABLED);
if (isDPMEnabled != null) {
if (Boolean.parseBoolean(((String) isDPMEnabled).trim())) {
copyDpmTokenIfAbsolute(props, etcStagingDir);
if (include != null) {
try (OutputStream outputStream = new FileOutputStream(new File(etcStagingDir, include))) {
props.store(outputStream, null);
}
}
return true;
}
}
return false;
}
private void copyDpmTokenIfAbsolute(Properties includesDpmProps, File etcStagingDir) throws IOException {
String dpmTokenFile = includesDpmProps.getProperty(RemoteSSOService.SECURITY_SERVICE_APP_AUTH_TOKEN_CONFIG);
String tokenFile = Configuration.FileRef.getUnresolvedValueWithoutDelimiter(dpmTokenFile,
Configuration.FileRef.PREFIX,
Configuration.FileRef.SUFFIX,
Configuration.FileRef.DELIMITER
);
if (Paths.get(tokenFile).isAbsolute()) {
LOG.info("Copying application token from absolute location {} to etc's staging dir: {}",
tokenFile,
etcStagingDir
);
try (InputStream inStream = new FileInputStream((tokenFile))) {
try (OutputStream out = new FileOutputStream(new File(etcStagingDir, CLUSTER_DPM_APP_TOKEN))) {
IOUtils.copy(inStream, out);
}
}
// set the property
includesDpmProps.setProperty(RemoteSSOService.SECURITY_SERVICE_APP_AUTH_TOKEN_CONFIG,
Configuration.FileRef.DELIMITER +
CLUSTER_DPM_APP_TOKEN + Configuration.FileRef.DELIMITER
);
}
}
@VisibleForTesting
Pattern findClusterBootstrapJar(
ExecutionMode executionMode, PipelineConfiguration pipelineConf, StageLibraryTask stageLibraryTask
) throws IOException {
StageConfiguration stageConf = PipelineConfigurationUtil.getSourceStageConf(pipelineConf);
StageDefinition stageDefinition = stageLibraryTask.getStage(stageConf.getLibrary(),
stageConf.getStageName(),
false
);
ClassLoader stageClassLoader = stageDefinition.getStageClassLoader();
Properties dataCollectorProps = readDataCollectorProperties(stageClassLoader);
for (Map.Entry entry : dataCollectorProps.entrySet()) {
String key = (String) entry.getKey();
String value = (String) entry.getValue();
LOG.debug("Datacollector library properties key : '{}', value: '{}'", key, value);
if (key.equals(CLUSTER_BOOTSTRAP_JAR_REGEX + executionMode + "_" + stageDefinition.getClassName())) {
LOG.info("Using bootstrap jar pattern: '{}'", value);
return Pattern.compile(value + "-\\d+.*");
}
}
Pattern defaultJarPattern;
if (executionMode == ExecutionMode.CLUSTER_MESOS_STREAMING) {
defaultJarPattern = CLUSTER_BOOTSTRAP_MESOS_JAR_PATTERN;
} else if (executionMode == ExecutionMode.CLUSTER_YARN_STREAMING) {
defaultJarPattern = CLUSTER_BOOTSTRAP_JAR_PATTERN;
} else {
defaultJarPattern = CLUSTER_BOOTSTRAP_API_JAR_PATTERN;
}
return defaultJarPattern;
}
private List<String> generateMesosArgs(String clusterManager, String mesosDispatcherURL, String mesosJar) {
List<String> args = new ArrayList<>();
args.add(clusterManager);
args.add("start");
args.add("--deploy-mode");
args.add("cluster");
// total executor cores option currently doesn't work for spark on mesos
args.add("--total-executor-cores");
args.add("1");
args.add("--master");
args.add(mesosDispatcherURL);
args.add("--class");
args.add("com.streamsets.pipeline.mesos.BootstrapMesosDriver");
args.add(mesosJar);
return args;
}
private List<String> generateMRArgs(String clusterManager, String slaveMemory, String javaOpts,
String libsTarGz, String etcTarGz, String resourcesTarGz, String log4jProperties,
String bootstrapJar, String sdcPropertiesFile,
String clusterBootstrapJar, Set<String> jarsToShip) {
List<String> args = new ArrayList<>();
args.add(clusterManager);
args.add("start");
args.add("jar");
args.add(clusterBootstrapJar);
args.add("com.streamsets.pipeline.BootstrapClusterBatch");
args.add("-archives");
args.add(Joiner.on(",").join(libsTarGz, etcTarGz, resourcesTarGz));
args.add("-D");
args.add("mapreduce.job.log4j-properties-file=" + log4jProperties);
args.add("-libjars");
StringBuilder libJarString = new StringBuilder(bootstrapJar);
for (String jarToShip : jarsToShip) {
libJarString.append(",").append(jarToShip);
}
args.add(libJarString.toString());
args.add(sdcPropertiesFile);
args.add(
Joiner.on(" ").join(
String.format("-Xmx%sm", slaveMemory),
javaOpts,
"-javaagent:./" + (new File(bootstrapJar)).getName()
)
);
return args;
}
private List<String> generateSparkArgs(
String clusterManager,
String slaveMemory,
String javaOpts,
String numExecutors,
String libsTarGz,
String etcTarGz,
String resourcesTarGz,
String log4jProperties,
String bootstrapJar,
Set<String> jarsToShip,
String clusterBootstrapJar
) {
List<String> args = new ArrayList<>();
args.add(clusterManager);
args.add("start");
// we only support yarn-cluster mode
args.add("--master");
args.add("yarn-cluster");
args.add("--executor-memory");
args.add(slaveMemory + "m");
// one single sdc per executor
args.add("--executor-cores");
args.add("1");
// Number of Executors based on the origin parallelism
checkNumExecutors(numExecutors);
args.add("--num-executors");
args.add(numExecutors);
// ship our stage libs and etc directory
args.add("--archives");
args.add(Joiner.on(",").join(libsTarGz, etcTarGz, resourcesTarGz));
// required or else we won't be able to log on cluster
args.add("--files");
args.add(log4jProperties);
args.add("--jars");
StringBuilder libJarString = new StringBuilder(bootstrapJar);
for (String jarToShip : jarsToShip) {
libJarString.append(",").append(jarToShip);
}
args.add(libJarString.toString());
// use our javaagent and java opt configs
args.add("--conf");
args.add("spark.executor.extraJavaOptions=" +
Joiner.on(" ").join("-javaagent:./" + (new File(bootstrapJar)).getName(), javaOpts)
);
// main class
args.add("--class");
args.add("com.streamsets.pipeline.BootstrapClusterStreaming");
args.add(clusterBootstrapJar);
return args;
}
private void addToSourceConfigs(Map<String, String> sourceConfigs, List<Map<String, Object>> arrayListValues) {
for (Map<String, Object> map : arrayListValues) {
String confKey = null;
String confValue = null;
for (Map.Entry<String, Object> mapEntry : map.entrySet()) {
String mapKey = mapEntry.getKey();
Object mapValue = mapEntry.getValue();
switch (mapKey) {
case "key":
// Assuming the key is always string
confKey = String.valueOf(mapValue);
break;
case "value":
confValue = canCastToString(mapValue) ? String.valueOf(mapValue) : null;
break;
default:
confKey = mapKey;
confValue = canCastToString(mapValue) ? String.valueOf(mapValue) : null;
break;
}
if (confKey != null && confValue != null) {
LOG.debug("Adding to source configs " + confKey + "=" + confValue);
sourceConfigs.put(confKey, confValue);
}
}
}
}
private boolean canCastToString(Object value) {
return value instanceof String || value instanceof Number || value.getClass().isPrimitive() ||
value instanceof Boolean;
}
private void checkNumExecutors(String numExecutorsString) {
Utils.checkNotNull(numExecutorsString, "Number of executors not found");
int numExecutors;
try {
numExecutors = Integer.parseInt(numExecutorsString);
} catch (NumberFormatException e) {
throw new IllegalArgumentException("Number of executors is not a valid integer");
}
Utils.checkArgument(numExecutors > 0, "Number of executors cannot be less than 1");
}
private enum ClusterOrigin {
HDFS, KAFKA;
}
private String getMesosHostingDir(String topic, String pipelineName) {
String sdcId = String.valueOf(runtimeInfo.getId());
String mesosHostingDir = sdcId + File.separatorChar + topic + File.separatorChar + pipelineName;
return mesosHostingDir;
}
private String getSha256(String mesosHostingDir) throws UnsupportedEncodingException {
MessageDigest md;
try {
md = MessageDigest.getInstance("SHA-256");
} catch (NoSuchAlgorithmException e) {
throw new IllegalStateException(e);
}
md.update(mesosHostingDir.getBytes("UTF-8"));
return Base64.encodeBase64URLSafeString(md.digest());
}
}