/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.chukwa.datacollection.writer.localfs;
import java.io.File;
import java.io.IOException;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.util.Calendar;
import java.util.List;
import java.util.Timer;
import java.util.TimerTask;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
import org.apache.hadoop.chukwa.ChukwaArchiveKey;
import org.apache.hadoop.chukwa.Chunk;
import org.apache.hadoop.chukwa.ChunkImpl;
import org.apache.hadoop.chukwa.datacollection.writer.ChukwaWriter;
import org.apache.hadoop.chukwa.datacollection.writer.WriterException;
import org.apache.hadoop.chukwa.util.DaemonWatcher;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.SequenceFile;
import org.apache.log4j.Logger;
/**
* <p>This class <b>is</b> thread-safe -- rotate() and save() both synchronize on
* lock object.
* </p>
* <p>
* Write data to a local fileSystem then move it to the remote HDFS
* <br>
* Warning:
* <br>
* There's no lock/waiting time for the remote client.
* The connection is released as soon as the last append is done,
* so therefore there is no guarantee that this class will not loose
* any data.
* <br>
* This class has been designed this way for performance reason.
* </p>
* <p>
* In order to use this class, you need to define some parameters,
* in chukwa-collector-conf.xml
* <p>
* <br>
* <property><br>
* <name>chukwaCollector.localOutputDir</name><br>
* <value>/grid/0/gs/chukwa/chukwa-0.1.2/dataSink/</value><br>
* <description>Chukwa data sink directory</description><br>
* </property><br>
*<br>
* <property><br>
* <name>chukwaCollector.writerClass</name><br>
* <value>org.apache.hadoop.chukwa.datacollection.writer.localfs.LocalWriter</value><br>
* <description>Local chukwa writer</description><br>
* </property><br>
* <br>
*/
public class LocalWriter implements ChukwaWriter {
static Logger log = Logger.getLogger(LocalWriter.class);
static final int STAT_INTERVAL_SECONDS = 30;
static String localHostAddr = null;
private final Object lock = new Object();
private BlockingQueue<String> fileQueue = null;
@SuppressWarnings("unused")
private LocalToRemoteHdfsMover localToRemoteHdfsMover = null;
private FileSystem fs = null;
private Configuration conf = null;
private String localOutputDir = null;
private Calendar calendar = Calendar.getInstance();
private Path currentPath = null;
private String currentFileName = null;
private FSDataOutputStream currentOutputStr = null;
private SequenceFile.Writer seqFileWriter = null;
private int rotateInterval = 1000 * 60;
private volatile long dataSize = 0;
private volatile boolean isRunning = false;
private Timer rotateTimer = null;
private Timer statTimer = null;
private int initWriteChunkRetries = 10;
private int writeChunkRetries = initWriteChunkRetries;
private boolean chunksWrittenThisRotate = false;
private long timePeriod = -1;
private long nextTimePeriodComputation = -1;
private int minPercentFreeDisk = 20;
static {
try {
localHostAddr = "_" + InetAddress.getLocalHost().getHostName() + "_";
} catch (UnknownHostException e) {
localHostAddr = "-NA-";
}
}
public void init(Configuration conf) throws WriterException {
this.conf = conf;
try {
fs = FileSystem.getLocal(conf);
localOutputDir = conf.get("chukwaCollector.localOutputDir",
"/chukwa/datasink/");
if (!localOutputDir.endsWith("/")) {
localOutputDir += "/";
}
Path pLocalOutputDir = new Path(localOutputDir);
if (!fs.exists(pLocalOutputDir)) {
boolean exist = fs.mkdirs(pLocalOutputDir);
if (!exist) {
throw new WriterException("Cannot create local dataSink dir: "
+ localOutputDir);
}
} else {
FileStatus fsLocalOutputDir = fs.getFileStatus(pLocalOutputDir);
if (!fsLocalOutputDir.isDir()) {
throw new WriterException("local dataSink dir is not a directory: "
+ localOutputDir);
}
}
} catch (Throwable e) {
log.fatal("Cannot initialize LocalWriter", e);
DaemonWatcher.bailout(-1);
}
minPercentFreeDisk = conf.getInt("chukwaCollector.minPercentFreeDisk",20);
rotateInterval = conf.getInt("chukwaCollector.rotateInterval",
1000 * 60 * 5);// defaults to 5 minutes
initWriteChunkRetries = conf
.getInt("chukwaCollector.writeChunkRetries", 10);
writeChunkRetries = initWriteChunkRetries;
log.info("rotateInterval is " + rotateInterval);
log.info("outputDir is " + localOutputDir);
log.info("localFileSystem is " + fs.getUri().toString());
log.info("minPercentFreeDisk is " + minPercentFreeDisk);
// Setup everything by rotating
rotate();
rotateTimer = new Timer();
rotateTimer.schedule(new RotateTask(), rotateInterval,
rotateInterval);
statTimer = new Timer();
statTimer.schedule(new StatReportingTask(), 1000,
STAT_INTERVAL_SECONDS * 1000);
fileQueue = new LinkedBlockingQueue<String>();
localToRemoteHdfsMover = new LocalToRemoteHdfsMover(fileQueue, conf);
}
private class RotateTask extends TimerTask {
public void run() {
rotate();
};
}
private class StatReportingTask extends TimerTask {
private long lastTs = System.currentTimeMillis();
public void run() {
long time = System.currentTimeMillis();
long currentDs = dataSize;
dataSize = 0;
long interval = time - lastTs;
lastTs = time;
long dataRate = 1000 * currentDs / interval; // kb/sec
log.info("stat:datacollection.writer.local.LocalWriter dataSize="
+ currentDs + " dataRate=" + dataRate);
}
};
protected void computeTimePeriod() {
synchronized (calendar) {
calendar.setTimeInMillis(System.currentTimeMillis());
calendar.set(Calendar.MINUTE, 0);
calendar.set(Calendar.SECOND, 0);
calendar.set(Calendar.MILLISECOND, 0);
timePeriod = calendar.getTimeInMillis();
calendar.add(Calendar.HOUR, 1);
nextTimePeriodComputation = calendar.getTimeInMillis();
}
}
/**
* Best effort, there's no guarantee that chunks
* have really been written to disk
*/
public CommitStatus add(List<Chunk> chunks) throws WriterException {
if (!isRunning) {
throw new WriterException("Writer not yet ready");
}
long now = System.currentTimeMillis();
if (chunks != null) {
try {
chunksWrittenThisRotate = true;
ChukwaArchiveKey archiveKey = new ChukwaArchiveKey();
synchronized (lock) {
if (System.currentTimeMillis() >= nextTimePeriodComputation) {
computeTimePeriod();
}
for (Chunk chunk : chunks) {
archiveKey.setTimePartition(timePeriod);
archiveKey.setDataType(chunk.getDataType());
archiveKey.setStreamName(chunk.getTags() + "/" + chunk.getSource()
+ "/" + chunk.getStreamName());
archiveKey.setSeqId(chunk.getSeqID());
if (chunk != null) {
seqFileWriter.append(archiveKey, chunk);
// compute size for stats
dataSize += chunk.getData().length;
}
}
}// End synchro
long end = System.currentTimeMillis();
if (log.isDebugEnabled()) {
log.debug("duration=" + (end-now) + " size=" + chunks.size());
}
} catch (IOException e) {
writeChunkRetries--;
log.error("Could not save the chunk. ", e);
if (writeChunkRetries < 0) {
log
.fatal("Too many IOException when trying to write a chunk, Collector is going to exit!");
DaemonWatcher.bailout(-1);
}
throw new WriterException(e);
}
}
return COMMIT_OK;
}
protected void rotate() {
isRunning = true;
calendar.setTimeInMillis(System.currentTimeMillis());
log.info("start Date [" + calendar.getTime() + "]");
log.info("Rotate from " + Thread.currentThread().getName());
String newName = new java.text.SimpleDateFormat("yyyyddHHmmssSSS")
.format(calendar.getTime());
newName += localHostAddr + new java.rmi.server.UID().toString();
newName = newName.replace("-", "");
newName = newName.replace(":", "");
newName = newName.replace(".", "");
newName = localOutputDir + "/" + newName.trim();
synchronized (lock) {
try {
FSDataOutputStream previousOutputStr = currentOutputStr;
Path previousPath = currentPath;
String previousFileName = currentFileName;
if (previousOutputStr != null) {
previousOutputStr.close();
if (chunksWrittenThisRotate) {
fs.rename(previousPath, new Path(previousFileName + ".done"));
fileQueue.add(previousFileName + ".done");
} else {
log.info("no chunks written to " + previousPath + ", deleting");
fs.delete(previousPath, false);
}
}
Path newOutputPath = new Path(newName + ".chukwa");
FSDataOutputStream newOutputStr = fs.create(newOutputPath);
currentOutputStr = newOutputStr;
currentPath = newOutputPath;
currentFileName = newName;
chunksWrittenThisRotate = false;
// Uncompressed for now
seqFileWriter = SequenceFile.createWriter(conf, newOutputStr,
ChukwaArchiveKey.class, ChunkImpl.class,
SequenceFile.CompressionType.NONE, null);
} catch (IOException e) {
log.fatal("IO Exception in rotate. Exiting!", e);
// Shutting down the collector
// Watchdog will re-start it automatically
DaemonWatcher.bailout(-1);
}
}
// Check for disk space
File directory4Space = new File(localOutputDir);
long totalSpace = directory4Space.getTotalSpace();
long freeSpace = directory4Space.getFreeSpace();
long minFreeAvailable = (totalSpace * minPercentFreeDisk) /100;
if (log.isDebugEnabled()) {
log.debug("Directory: " + localOutputDir + ", totalSpace: " + totalSpace
+ ", freeSpace: " + freeSpace + ", minFreeAvailable: " + minFreeAvailable
+ ", percentFreeDisk: " + minPercentFreeDisk);
}
if (freeSpace < minFreeAvailable) {
log.fatal("No space left on device, Bail out!");
DaemonWatcher.bailout(-1);
}
log.debug("finished rotate()");
}
public void close() {
synchronized (lock) {
if (rotateTimer != null) {
rotateTimer.cancel();
}
if (statTimer != null) {
statTimer.cancel();
}
try {
if (this.currentOutputStr != null) {
this.currentOutputStr.close();
if (seqFileWriter != null) {
seqFileWriter.close();
}
}
if (localToRemoteHdfsMover != null) {
localToRemoteHdfsMover.shutdown();
}
fs.rename(currentPath, new Path(currentFileName + ".done"));
} catch (IOException e) {
log.error("failed to close and rename stream", e);
}
}
}
}