/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.chukwa.tools.backfilling;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import junit.framework.Assert;
import junit.framework.TestCase;
import org.apache.commons.io.FileUtils;
import org.apache.hadoop.chukwa.ChukwaArchiveKey;
import org.apache.hadoop.chukwa.ChunkImpl;
import org.apache.hadoop.chukwa.extraction.engine.RecordUtil;
import org.apache.hadoop.chukwa.validationframework.util.MD5;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.SequenceFile;
public class TestBackfillingLoader extends TestCase{
public void testBackfillingLoaderWithCharFileTailingAdaptorUTF8NewLineEscaped() {
String tmpDir = System.getProperty("test.build.data", "/tmp");
long ts = System.currentTimeMillis();
String dataDir = tmpDir + "/TestBackfillingLoader_" + ts;
Configuration conf = new Configuration();
conf.set("writer.hdfs.filesystem", "file:///");
conf.set("chukwaCollector.outputDir", dataDir + "/log/");
conf.set("chukwaCollector.rotateInterval", "" + (Integer.MAX_VALUE -1));
String cluster = "MyCluster_" + ts;
String machine = "machine_" + ts;
String adaptorName = "org.apache.hadoop.chukwa.datacollection.adaptor.filetailer.CharFileTailingAdaptorUTF8NewLineEscaped";
String recordType = "MyRecordType_" + ts;
try {
FileSystem fs = FileSystem.getLocal(conf);
File in1Dir = new File(dataDir + "/input");
in1Dir.mkdirs();
int lineCount = 107;
File inputFile = makeTestFile(dataDir + "/input/in1.txt",lineCount);
long size = inputFile.length();
String logFile = inputFile.getAbsolutePath();
System.out.println("Output:" + logFile);
System.out.println("File:" + inputFile.length());
BackfillingLoader loader = new BackfillingLoader(conf,cluster,machine,adaptorName,recordType,logFile);
loader.process();
File finalOutputFile = new File(dataDir + "/input/in1.txt.sav");
Assert.assertTrue(inputFile.exists() == false);
Assert.assertTrue(finalOutputFile.exists() == true);
String doneFile = null;
File directory = new File(dataDir + "/log/");
String[] files = directory.list();
for(String file: files) {
if ( file.endsWith(".done") ){
doneFile = dataDir + "/log/" + file;
break;
}
}
long seqId = validateDataSink(fs,conf,doneFile,finalOutputFile,
cluster, recordType, machine, logFile);
Assert.assertTrue(seqId == size);
} catch (Throwable e) {
e.printStackTrace();
Assert.fail();
}
try {
FileUtils.deleteDirectory(new File(dataDir));
} catch (IOException e) {
e.printStackTrace();
}
}
public void testBackfillingLoaderWithFileAdaptor() {
String tmpDir = System.getProperty("test.build.data", "/tmp");
long ts = System.currentTimeMillis();
String dataDir = tmpDir + "/TestBackfillingLoader_" + ts;
Configuration conf = new Configuration();
conf.set("writer.hdfs.filesystem", "file:///");
conf.set("chukwaCollector.outputDir", dataDir + "/log/");
conf.set("chukwaCollector.rotateInterval", "" + (Integer.MAX_VALUE -1));
String cluster = "MyCluster_" + ts;
String machine = "machine_" + ts;
String adaptorName = "org.apache.hadoop.chukwa.datacollection.adaptor.FileAdaptor";
String recordType = "MyRecordType_" + ts;
try {
FileSystem fs = FileSystem.getLocal(conf);
File in1Dir = new File(dataDir + "/input");
in1Dir.mkdirs();
int lineCount = 118;
File inputFile = makeTestFile(dataDir + "/input/in2.txt",lineCount);
long size = inputFile.length();
String logFile = inputFile.getAbsolutePath();
System.out.println("Output:" + logFile);
System.out.println("File:" + inputFile.length());
BackfillingLoader loader = new BackfillingLoader(conf,cluster,machine,adaptorName,recordType,logFile);
loader.process();
File finalOutputFile = new File(dataDir + "/input/in2.txt.sav");
Assert.assertTrue(inputFile.exists() == false);
Assert.assertTrue(finalOutputFile.exists() == true);
String doneFile = null;
File directory = new File(dataDir + "/log/");
String[] files = directory.list();
for(String file: files) {
if ( file.endsWith(".done") ){
doneFile = dataDir + "/log/" + file;
break;
}
}
long seqId = validateDataSink(fs,conf,doneFile,finalOutputFile,
cluster, recordType, machine, logFile);
Assert.assertTrue(seqId == size);
} catch (Throwable e) {
e.printStackTrace();
Assert.fail();
}
try {
FileUtils.deleteDirectory(new File(dataDir));
} catch (IOException e) {
e.printStackTrace();
}
}
public void testBackfillingLoaderWithCharFileTailingAdaptorUTF8NewLineEscapedBigFile() {
String tmpDir = System.getProperty("test.build.data", "/tmp");
long ts = System.currentTimeMillis();
String dataDir = tmpDir + "/TestBackfillingLoader_" + ts;
Configuration conf = new Configuration();
conf.set("writer.hdfs.filesystem", "file:///");
conf.set("chukwaCollector.outputDir", dataDir + "/log/");
conf.set("chukwaCollector.rotateInterval", "" + (Integer.MAX_VALUE -1));
String cluster = "MyCluster_" + ts;
String machine = "machine_" + ts;
String adaptorName = "org.apache.hadoop.chukwa.datacollection.adaptor.filetailer.CharFileTailingAdaptorUTF8NewLineEscaped";
String recordType = "MyRecordType_" + ts;
try {
FileSystem fs = FileSystem.getLocal(conf);
File in1Dir = new File(dataDir + "/input");
in1Dir.mkdirs();
int lineCount = 1024*1024;//34MB
File inputFile = makeTestFile(dataDir + "/input/in1.txt",lineCount);
long size = inputFile.length();
String logFile = inputFile.getAbsolutePath();
System.out.println("Output:" + logFile);
System.out.println("File:" + inputFile.length());
BackfillingLoader loader = new BackfillingLoader(conf,cluster,machine,adaptorName,recordType,logFile);
loader.process();
File finalOutputFile = new File(dataDir + "/input/in1.txt.sav");
Assert.assertTrue(inputFile.exists() == false);
Assert.assertTrue(finalOutputFile.exists() == true);
String doneFile = null;
File directory = new File(dataDir + "/log/");
String[] files = directory.list();
for(String file: files) {
if ( file.endsWith(".done") ){
doneFile = dataDir + "/log/" + file;
break;
}
}
long seqId = validateDataSink(fs,conf,doneFile,finalOutputFile,
cluster, recordType, machine, logFile);
Assert.assertTrue(seqId == size);
} catch (Throwable e) {
e.printStackTrace();
Assert.fail();
}
try {
FileUtils.deleteDirectory(new File(dataDir));
} catch (IOException e) {
e.printStackTrace();
}
}
public void testBackfillingLoaderWithCharFileTailingAdaptorUTF8NewLineEscapedBigFileLocalWriter() {
String tmpDir = System.getProperty("test.build.data", "/tmp");
long ts = System.currentTimeMillis();
String dataDir = tmpDir + "/TestBackfillingLoader_" + ts;
Configuration conf = new Configuration();
conf.set("writer.hdfs.filesystem", "file:///");
conf.set("chukwaCollector.outputDir", dataDir + "/log/");
conf.set("chukwaCollector.rotateInterval", "" + (Integer.MAX_VALUE -1));
conf.set("chukwaCollector.localOutputDir", dataDir + "/log/");
conf.set("chukwaCollector.writerClass", "org.apache.hadoop.chukwa.datacollection.writer.localfs.LocalWriter");
conf.set("chukwaCollector.minPercentFreeDisk", "2");//so unit tests pass on machines with full-ish disks
String cluster = "MyCluster_" + ts;
String machine = "machine_" + ts;
String adaptorName = "org.apache.hadoop.chukwa.datacollection.adaptor.filetailer.CharFileTailingAdaptorUTF8NewLineEscaped";
String recordType = "MyRecordType_" + ts;
try {
FileSystem fs = FileSystem.getLocal(conf);
File in1Dir = new File(dataDir + "/input");
in1Dir.mkdirs();
int lineCount = 1024*1024*2;//64MB
File inputFile = makeTestFile(dataDir + "/input/in1.txt",lineCount);
long size = inputFile.length();
String logFile = inputFile.getAbsolutePath();
System.out.println("Output:" + logFile);
System.out.println("File:" + inputFile.length());
BackfillingLoader loader = new BackfillingLoader(conf,cluster,machine,adaptorName,recordType,logFile);
loader.process();
File finalOutputFile = new File(dataDir + "/input/in1.txt.sav");
Assert.assertTrue(inputFile.exists() == false);
Assert.assertTrue(finalOutputFile.exists() == true);
String doneFile = null;
File directory = new File(dataDir + "/log/");
String[] files = directory.list();
for(String file: files) {
if ( file.endsWith(".done") ){
doneFile = dataDir + "/log/" + file;
break;
}
}
long seqId = validateDataSink(fs,conf,doneFile,finalOutputFile,
cluster, recordType, machine, logFile);
Assert.assertTrue(seqId == size);
} catch (Throwable e) {
e.printStackTrace();
Assert.fail();
}
try {
FileUtils.deleteDirectory(new File(dataDir));
} catch (IOException e) {
e.printStackTrace();
}
}
protected long validateDataSink(FileSystem fs,Configuration conf, String dataSinkFile, File logFile,
String cluster,String dataType, String source, String application) throws Throwable {
SequenceFile.Reader reader = null;
long lastSeqId = -1;
BufferedWriter out = null;
try {
reader = new SequenceFile.Reader(fs, new Path(dataSinkFile), conf);
ChukwaArchiveKey key = new ChukwaArchiveKey();
ChunkImpl chunk = ChunkImpl.getBlankChunk();
String dataSinkDumpName = dataSinkFile + ".dump";
out = new BufferedWriter(new FileWriter(dataSinkDumpName));
while (reader.next(key, chunk)) {
Assert.assertTrue(cluster.equals(RecordUtil.getClusterName(chunk)));
Assert.assertTrue(dataType.equals(chunk.getDataType()));
Assert.assertTrue(source.equals(chunk.getSource()));
out.write(new String(chunk.getData()));
lastSeqId = chunk.getSeqID() ;
}
out.close();
out = null;
reader.close();
reader = null;
String dataSinkMD5 = MD5.checksum(new File(dataSinkDumpName));
String logFileMD5 = MD5.checksum(logFile);
Assert.assertTrue(dataSinkMD5.equals(logFileMD5));
}
finally {
if (out != null) {
out.close();
}
if (reader != null) {
reader.close();
}
}
return lastSeqId;
}
private File makeTestFile(String name, int size) throws IOException {
File tmpOutput = new File(name);
FileOutputStream fos = new FileOutputStream(tmpOutput);
PrintWriter pw = new PrintWriter(fos);
for (int i = 0; i < size; ++i) {
pw.print(i + " ");
pw.println("abcdefghijklmnopqrstuvwxyz");
}
pw.flush();
pw.close();
return tmpOutput;
}
}