/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package gobblin.util;
import java.io.BufferedWriter;
import java.io.IOException;
import java.io.OutputStreamWriter;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import gobblin.configuration.ConfigurationKeys;
/**
* A utility class for generating script to move the heap dump .prof files to HDFS for hadoop tasks, when Java heap out of memory error is thrown.
*/
public class HeapDumpForTaskUtils {
private static final Logger LOG = LoggerFactory.getLogger(HeapDumpForTaskUtils.class);
private static final String DUMP_FOLDER = "dumps";
/**
* Generate the dumpScript, which is used when OOM error is thrown during task execution.
* The current content dumpScript puts the .prof files to the DUMP_FOLDER within the same directory of the dumpScript.
*
* User needs to add the following options to the task java.opts:
*
* -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=./heapFileName.hprof -XX:OnOutOfMemoryError=./dumpScriptFileName
*
* @param dumpScript The path to the dumpScript, which needs to be added to the Distributed cache.
* To use it, simply put the path of dumpScript to the gobblin config: job.hdfs.files.
* @param fs File system
* @param heapFileName the name of the .prof file.
* @param chmod chmod for the dump script. For hdfs file, e.g, "hadoop fs -chmod 755"
* @throws IOException
*/
public static void generateDumpScript(Path dumpScript, FileSystem fs, String heapFileName, String chmod)
throws IOException {
if (fs.exists(dumpScript)) {
LOG.info("Heap dump script already exists: " + dumpScript);
return;
}
try (BufferedWriter scriptWriter =
new BufferedWriter(new OutputStreamWriter(fs.create(dumpScript), ConfigurationKeys.DEFAULT_CHARSET_ENCODING))) {
Path dumpDir = new Path(dumpScript.getParent(), DUMP_FOLDER);
if (!fs.exists(dumpDir)) {
fs.mkdirs(dumpDir);
}
scriptWriter.write("#!/bin/sh\n");
scriptWriter.write("if [ -n \"$HADOOP_PREFIX\" ]; then\n");
scriptWriter
.write(" ${HADOOP_PREFIX}/bin/hadoop dfs -put " + heapFileName + " " + dumpDir + "/${PWD//\\//_}.hprof\n");
scriptWriter.write("else\n");
scriptWriter
.write(" ${HADOOP_HOME}/bin/hadoop dfs -put " + heapFileName + " " + dumpDir + "/${PWD//\\//_}.hprof\n");
scriptWriter.write("fi\n");
} catch (IOException ioe) {
LOG.error("Heap dump script is not generated successfully.");
if (fs.exists(dumpScript)) {
fs.delete(dumpScript, true);
}
throw ioe;
}
Runtime.getRuntime().exec(chmod + " " + dumpScript);
}
}