/**
* Copyright 2015 StreamSets Inc.
*
* Licensed under the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.streamsets.pipeline;
import com.streamsets.pipeline.spark.api.SparkTransformer;
import org.apache.spark.api.java.JavaSparkContext;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FilenameFilter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.lang.instrument.Instrumentation;
import java.lang.reflect.Method;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.jar.JarEntry;
import java.util.jar.JarFile;
import java.util.stream.Collectors;
/**
* This class is responsible for all activities which cross classloaders. At present
* there are two use cases for this class:
* <ol>
* <li>Bootstrapping an Executor which is started as part of a spark job</li>
* <li>Obtaining a reference on the dummy source which is used to feed a pipeline</li>
* </ol>
*/
public class BootstrapCluster {
public static final String STREAMSETS_LIBS_PREFIX = "streamsets-libs/";
/**
* We might have to have a reset method for unit tests
*/
private static volatile boolean initialized = false;
private static Properties properties;
private static ClassLoader apiCL;
private static ClassLoader containerCL;
private static ClassLoader sparkCL;
private static List<ClassLoader> stageLibrariesCLs;
private static String dataDir;
private static final String MESOS_BOOTSTRAP_JAR_REGEX = "streamsets-datacollector-mesos-bootstrap";
public static final String SDC_MESOS_BASE_DIR = "sdc_mesos";
private static File mesosBootstrapFile;
private static List<ClassLoader> transformerCLs;
private static String pipelineJson;
private static List<SparkTransformer> transformers;
private BootstrapCluster() {}
public static synchronized Properties getProperties() throws Exception {
initialize();
return properties;
}
private static synchronized void initialize() throws Exception {
if (initialized) {
return;
}
boolean isTestingMode = Boolean.getBoolean("sdc.testing-mode");
String libraryRoot;
String etcRoot;
String resourcesRoot;
if (isTestingMode) {
libraryRoot = (new File(System.getProperty("user.dir"), "target")).getAbsolutePath();
etcRoot = (new File(System.getProperty("user.dir"), "target")).getAbsolutePath();
resourcesRoot = (new File(System.getProperty("user.dir"), "target")).getAbsolutePath();
} else if (System.getProperty("SDC_MESOS_BASE_DIR") == null){
libraryRoot = (new File(System.getProperty("user.dir"), "libs.tar.gz")).getAbsolutePath();
etcRoot = (new File(System.getProperty("user.dir") + "/etc.tar.gz/etc/")).getAbsolutePath();
resourcesRoot = (new File(System.getProperty("user.dir") + "/resources.tar.gz/resources/")).getAbsolutePath();
} else {
String sdcMesosBaseDir = System.getProperty("SDC_MESOS_BASE_DIR");
libraryRoot = new File(sdcMesosBaseDir, "libs").getAbsolutePath();
etcRoot = new File(sdcMesosBaseDir, "etc").getAbsolutePath();
resourcesRoot = new File(sdcMesosBaseDir, "resources").getAbsolutePath();
}
System.setProperty("sdc.transient-env", "true");
System.setProperty("sdc.static-web.dir", (new File(libraryRoot, "sdc-static-web")).getAbsolutePath());
System.setProperty("sdc.conf.dir", etcRoot);
System.setProperty("sdc.resources.dir", resourcesRoot);
File sdcProperties = new File(etcRoot, "sdc.properties");
if (!sdcProperties.isFile()) {
String msg = "SDC Properties file does not exist at expected location: " + sdcProperties;
throw new IllegalStateException(msg);
}
properties = new Properties();
try (FileInputStream inStream = new FileInputStream(sdcProperties)) {
properties.load(inStream);
}
File rootDataDir = new File(etcRoot, "data");
dataDir = rootDataDir.getAbsolutePath();
File basePipelineDir = new File(rootDataDir, "pipelines");
String pipelineName = properties.getProperty("cluster.pipeline.name");
if (pipelineName == null) {
throw new IllegalStateException("Pipeline to be run cannot be null");
}
SDCClassLoader.setDebug(Boolean.getBoolean(BootstrapMain.PIPELINE_BOOTSTRAP_CLASSLOADER_SYS_PROP));
List<URL> apiUrls;
List<URL> containerUrls;
Map<String, List<URL>> streamsetsLibsUrls;
Map<String, List<URL>> userLibsUrls;
if (isTestingMode) {
apiUrls = new ArrayList<>();
containerUrls = new ArrayList<>();
streamsetsLibsUrls = new HashMap<>();
userLibsUrls = new HashMap<>();
// for now we pull in container in for testing mode
streamsetsLibsUrls.put("streamsets-libs/streamsets-datacollector-spark-protolib",
BootstrapMain.getClasspathUrls(System.getProperty("user.dir") + "/target/"));
} else {
apiUrls = BootstrapMain.getClasspathUrls(libraryRoot + "/api-lib/*.jar");
containerUrls = BootstrapMain.getClasspathUrls(libraryRoot + "/container-lib/*.jar");
Set<String> systemStageLibs;
Set<String> userStageLibs;
if (BootstrapMain.isDeprecatedWhiteListConfiguration(etcRoot)) {
System.out.println(String.format(
BootstrapMain.WARN_MSG,
"Using deprecated stage library whitelist configuration file",
BootstrapMain.WHITE_LIST_FILE
));
systemStageLibs = BootstrapMain.getWhiteList(etcRoot, BootstrapMain.SYSTEM_LIBS_WHITE_LIST_KEY);
userStageLibs = BootstrapMain.getWhiteList(etcRoot, BootstrapMain.USER_LIBS_WHITE_LIST_KEY);
} else {
systemStageLibs = BootstrapMain.getSystemStageLibs(etcRoot);
userStageLibs = BootstrapMain.getUserStageLibs(etcRoot);
}
String libsCommonLibDir = libraryRoot + "/libs-common-lib";
// in cluster mode, the library extra dir files from the master are collapsed on the library dir
streamsetsLibsUrls = BootstrapMain.getStageLibrariesClasspaths(libraryRoot + "/streamsets-libs", null,
systemStageLibs, libsCommonLibDir);
userLibsUrls = BootstrapMain.getStageLibrariesClasspaths(libraryRoot + "/user-libs", null, userStageLibs,
libsCommonLibDir);
}
Map<String, List<URL>> libsUrls = new LinkedHashMap<String, List<URL>> ();
libsUrls.putAll(streamsetsLibsUrls);
libsUrls.putAll(userLibsUrls);
ClassLoader parent = Thread.currentThread().getContextClassLoader();
if (parent == null) {
parent = ClassLoader.getSystemClassLoader();
}
apiCL = SDCClassLoader.getAPIClassLoader(apiUrls, parent);
containerCL = SDCClassLoader.getContainerCLassLoader(containerUrls, apiCL);
stageLibrariesCLs = new ArrayList<>();
File pipelineDir = new File(basePipelineDir, escapedPipelineName(apiCL, pipelineName));
File pipelineJsonFile = new File(pipelineDir, "pipeline.json");
if (!pipelineJsonFile.isFile()) {
String msg = "Pipeline JSON file does not exist at expected location: " + pipelineJsonFile;
throw new IllegalStateException(msg);
}
try {
pipelineJson = new String(Files.readAllBytes(Paths.get(pipelineJsonFile.toURI())), StandardCharsets.UTF_8);
} catch (Exception ex) {
String msg = "Error reading Pipeline JSON File at: " + pipelineJsonFile;
throw new IllegalStateException(msg, ex);
}
String sparkLib = getSourceLibraryName();
List<String> sparkProcessorLibs =
getSparkProcessorLibraryNames()
.stream()
.map(x -> STREAMSETS_LIBS_PREFIX + x).collect(Collectors.toList());
if (sparkLib == null) {
throw new IllegalStateException("Couldn't find the source library in pipeline file");
}
String lookupLib = STREAMSETS_LIBS_PREFIX + sparkLib;
System.err.println("\n Cluster lib is " + lookupLib);
for (Map.Entry<String,List<URL>> entry : libsUrls.entrySet()) {
String[] parts = entry.getKey().split(System.getProperty("file.separator"));
if (parts.length != 2) {
String msg = "Invalid library name: " + entry.getKey();
throw new IllegalStateException(msg);
}
String type = parts[0];
String name = parts[1];
SDCClassLoader sdcClassLoader = SDCClassLoader.getStageClassLoader(type, name, entry.getValue(), apiCL); //NOSONAR
// TODO add spark, scala, etc to blacklist
if (lookupLib.equals(entry.getKey())) {
if (sparkCL != null) {
throw new IllegalStateException("Found two classloaders for " + lookupLib);
}
sparkCL = sdcClassLoader;
}
if (sparkProcessorLibs.contains(entry.getKey())) {
if (transformerCLs == null) {
transformerCLs = new ArrayList<>();
}
transformerCLs.add(sdcClassLoader);
}
stageLibrariesCLs.add(sdcClassLoader);
}
if (sparkCL == null) {
throw new IllegalStateException("Could not find classloader for " + lookupLib);
}
try {
Instrumentation instrumentation = BootstrapMain.getInstrumentation();
if (instrumentation != null) {
Method memoryUsageCollectorInitialize = Class.forName("com.streamsets.datacollector.memory.MemoryUsageCollector",
true, containerCL).getMethod("initialize", Instrumentation.class);
memoryUsageCollectorInitialize.invoke(null, instrumentation);
}
} catch (Exception ex) {
String msg = "Error trying to initialize MemoryUsageCollector: " + ex;
throw new IllegalStateException(msg, ex);
}
try {
Class<?> runtimeModuleClz = Class.forName("com.streamsets.datacollector.main.SlaveRuntimeModule", true,
containerCL);
Method setStageLibraryClassLoadersMethod = runtimeModuleClz.getMethod("setStageLibraryClassLoaders", List.class);
setStageLibraryClassLoadersMethod.invoke(null, stageLibrariesCLs);
} catch (Exception ex) {
String msg = "Error trying to bookstrap Spark while setting stage classloaders: " + ex;
throw new IllegalStateException(msg, ex);
}
initialized = true;
}
private static String escapedPipelineName(ClassLoader apiCL, String name) {
try {
Class pipelineUtils = apiCL.loadClass("com.streamsets.pipeline.api.impl.PipelineUtils");
Method escapedPipelineName = pipelineUtils.getMethod("escapedPipelineName", String.class);
return (String) escapedPipelineName.invoke(null, name);
} catch (Exception ex) {
throw new RuntimeException("Error escaping pipeline name '" + name + "': " + ex, ex);
}
}
/**
* Obtaining a reference on the dummy source which is used to feed a pipeline<br/>
* Direction: Stage -> Container
* @param postBatchRunnable runnable to run after each batch is finished
* @return a source object associated with the newly created pipeline
* @throws Exception
*/
public static /*PipelineStartResult*/ Object startPipeline(Runnable postBatchRunnable) throws Exception {
BootstrapCluster.initialize();
ClassLoader originalClassLoader = Thread.currentThread().getContextClassLoader();
try {
Thread.currentThread().setContextClassLoader(containerCL);
Class embeddedPipelineFactoryClz = Class.forName("com.streamsets.datacollector.EmbeddedDataCollectorFactory", true,
containerCL);
Method createPipelineMethod = embeddedPipelineFactoryClz.getMethod("startPipeline", Runnable.class);
return createPipelineMethod.invoke(null, postBatchRunnable);
} catch (Exception ex) {
String msg = "Error trying to create pipeline: " + ex;
throw new IllegalStateException(msg, ex);
} finally {
Thread.currentThread().setContextClassLoader(originalClassLoader);
}
}
private static String getSourceLibraryName() throws Exception {
try {
return callOnPiplineConfigurationUtil("getSourceLibName");
} catch (Exception ex) {
String msg = "Error trying to retrieve library name from pipeline json: " + ex;
throw new IllegalStateException(msg, ex);
}
}
public static List<String> getSparkProcessorLibraryNames() throws Exception {
return callOnPiplineConfigurationUtil("getSparkProcessorConf");
}
@SuppressWarnings("unchecked")
private static <T> T callOnPiplineConfigurationUtil(String method) throws Exception {
ClassLoader originalClassLoader = Thread.currentThread().getContextClassLoader();
try {
Thread.currentThread().setContextClassLoader(containerCL);
Class pipelineConfigurationUtil =
Class.forName("com.streamsets.datacollector.util.PipelineConfigurationUtil", true, containerCL);
Method m = pipelineConfigurationUtil.getMethod(method, String.class);
return (T) m.invoke(null, pipelineJson);
} finally {
Thread.currentThread().setContextClassLoader(originalClassLoader);
}
}
@SuppressWarnings("unchecked")
public static void createTransformers(JavaSparkContext context) throws Exception {
if (transformerCLs == null) {
return;
}
List<Object> configs = callOnPiplineConfigurationUtil("getSparkTransformers");
transformers = new ArrayList<>();
for (Object transformerConfig : configs) {
try {
String transformerClass =
(String) transformerConfig.getClass().getMethod("getTransformerClass").invoke(transformerConfig);
Class<?> clazz = Class.forName(transformerClass);
SparkTransformer transformer = (SparkTransformer) clazz.newInstance();
List<String> params =
(List<String>) transformerConfig.getClass().
getMethod("getTransformerParameters").invoke(transformerConfig);
transformer.init(context, params);
transformers.add(transformer);
} catch (Exception ex) {
throw new RuntimeException(ex);
}
}
}
public static List<SparkTransformer> getTransformers() {
return transformers;
}
public static Object getClusterFunction(Integer id) throws Exception {
BootstrapCluster.initialize();
ClassLoader originalClassLoader = Thread.currentThread().getContextClassLoader();
try {
Thread.currentThread().setContextClassLoader(sparkCL);
return Class.forName("com.streamsets.pipeline.cluster.ClusterFunctionImpl", true,
sparkCL).getMethod("create", Properties.class, Integer.class, String.class).invoke(null, properties, id, dataDir);
} catch (Exception ex) {
String msg = "Error trying to obtain ClusterFunction Class: " + ex;
throw new IllegalStateException(msg, ex);
} finally {
Thread.currentThread().setContextClassLoader(originalClassLoader);
}
}
public static void printSystemPropsEnvVariables() {
Map<String, String> env = System.getenv();
System.out.println("Below are the environment variables: ");
for (Map.Entry<String, String> mapEntry : env.entrySet()) {
System.out.format("%s=%s%n", mapEntry.getKey(), mapEntry.getValue());
}
Properties p = System.getProperties();
System.out.println("\n\n Below are the Java system properties: ");
for (Map.Entry<Object, Object> mapEntry : p.entrySet()) {
System.out.format("%s=%s%n", (String)mapEntry.getKey(), (String)mapEntry.getValue());
}
}
private static class MesosBootstrapJarFileFilter implements FilenameFilter {
@Override
public boolean accept(File dir, String name) {
return name.startsWith(MESOS_BOOTSTRAP_JAR_REGEX);
}
}
public static File getMesosBootstrapFile() {
if (mesosBootstrapFile == null) {
throw new IllegalStateException("Mesos bootstrap file cannot be found");
}
return mesosBootstrapFile;
}
public static int findAndExtractJar(File mesosHomeDir, File sparkHomeDir) throws IOException, InterruptedException {
FilenameFilter mesosBootstrapJarFilter = new MesosBootstrapJarFileFilter();
File[] mesosBootstrapFile = mesosHomeDir.listFiles(mesosBootstrapJarFilter);
checkNotNull(mesosBootstrapFile, mesosHomeDir);
if (mesosBootstrapFile.length == 0) {
mesosBootstrapFile = sparkHomeDir.listFiles(mesosBootstrapJarFilter);
}
checkNotNull(mesosBootstrapFile, sparkHomeDir);
if (mesosBootstrapFile.length == 0) {
throw new IllegalStateException("Cannot find file starting with " + MESOS_BOOTSTRAP_JAR_REGEX + " in "
+ sparkHomeDir + " or in " + mesosHomeDir);
} else if (mesosBootstrapFile.length > 1) {
throw new IllegalStateException("Found more than one file matching " + MESOS_BOOTSTRAP_JAR_REGEX
+ "; list of files are: " + Arrays.toString(mesosBootstrapFile));
}
File mesosBaseDir = new File(mesosHomeDir, SDC_MESOS_BASE_DIR);
if (!mesosBaseDir.mkdir()) {
throw new IllegalStateException("Error while creating dir: " + mesosBaseDir.getAbsolutePath());
}
BootstrapCluster.mesosBootstrapFile = mesosBootstrapFile[0];
extractFromJar(mesosBootstrapFile[0], mesosBaseDir);
return extractArchives(mesosBaseDir);
}
private static void checkNotNull(File[] mesosBootstrapFile, File sourceDir) {
if (mesosBootstrapFile == null) {
throw new IllegalStateException("Cannot list files in dir: " + sourceDir.getAbsolutePath());
}
}
private static int extractArchives(File mesosBaseDir) throws IOException, InterruptedException {
// Extract archives from the uber jar
String[] cmd = {"/bin/bash", "-c",
"cd " + mesosBaseDir.getAbsolutePath() + " && "
+ "tar -xf etc.tar.gz && "
+ "mkdir libs && "
+ "tar -xf libs.tar.gz -C libs/ && "
+ "tar -xf resources.tar.gz" };
ProcessBuilder processBuilder = new ProcessBuilder(cmd);
processBuilder.redirectErrorStream(true);
Process process = processBuilder.start();
try (BufferedReader stdOutReader =
new BufferedReader(new InputStreamReader(process.getInputStream(), StandardCharsets.UTF_8))) {
String line = null;
while ((line = stdOutReader.readLine()) != null) {
System.out.println(line);
}
process.waitFor();
}
return process.exitValue();
}
private static void extractFromJar(File sourceFile, File destDir) throws IOException {
try(JarFile jar = new JarFile(sourceFile)) {
Enumeration enumEntries = jar.entries();
while (enumEntries.hasMoreElements()) {
JarEntry jarEntry = (JarEntry) enumEntries.nextElement();
java.io.File destFile = new java.io.File(destDir, jarEntry.getName());
File parentDestFile = destFile.getParentFile();
// if parent file does not exist, create the chain of dirs for this file
if (!parentDestFile.isDirectory() && !parentDestFile.mkdirs()) {
throw new IllegalStateException("Cannot create parent directories for file: " + destFile.getAbsolutePath());
}
if (jarEntry.isDirectory()) {
continue;
}
try(
InputStream is = jar.getInputStream(jarEntry);
FileOutputStream fos = new java.io.FileOutputStream(destFile);
){
byte[] buffer = new byte[8092];
int bytesRead;
while ((bytesRead = is.read(buffer)) != -1) { // write contents of 'is' to 'fos'
fos.write(buffer, 0, bytesRead);
}
}
}
}
}
}