/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.mapreduce;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.MediumTests;
import org.apache.hadoop.hbase.MiniHBaseCluster;
import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.filter.Filter;
import org.apache.hadoop.hbase.filter.PrefixFilter;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.util.GenericOptionsParser;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.Assert;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import org.junit.experimental.categories.Category;
@Category(MediumTests.class)
public class TestImportExport {
private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
private static final byte[] ROW1 = Bytes.toBytes("row1");
private static final byte[] ROW2 = Bytes.toBytes("row2");
private static final String FAMILYA_STRING = "a";
private static final String FAMILYB_STRING = "b";
private static final byte[] FAMILYA = Bytes.toBytes(FAMILYA_STRING);
private static final byte[] FAMILYB = Bytes.toBytes(FAMILYB_STRING);
private static final byte[] QUAL = Bytes.toBytes("q");
private static final String OUTPUT_DIR = "outputdir";
private static MiniHBaseCluster cluster;
private static long now = System.currentTimeMillis();
@BeforeClass
public static void beforeClass() throws Exception {
cluster = UTIL.startMiniCluster();
UTIL.startMiniMapReduceCluster();
}
@AfterClass
public static void afterClass() throws Exception {
UTIL.shutdownMiniMapReduceCluster();
UTIL.shutdownMiniCluster();
}
@Before
@After
public void cleanup() throws Exception {
FileSystem fs = FileSystem.get(UTIL.getConfiguration());
fs.delete(new Path(OUTPUT_DIR), true);
}
/**
* Test simple replication case with column mapping
* @throws Exception
*/
@Test
public void testSimpleCase() throws Exception {
String EXPORT_TABLE = "exportSimpleCase";
HTable t = UTIL.createTable(Bytes.toBytes(EXPORT_TABLE), FAMILYA);
Put p = new Put(ROW1);
p.add(FAMILYA, QUAL, now, QUAL);
p.add(FAMILYA, QUAL, now+1, QUAL);
p.add(FAMILYA, QUAL, now+2, QUAL);
t.put(p);
p = new Put(ROW2);
p.add(FAMILYA, QUAL, now, QUAL);
p.add(FAMILYA, QUAL, now+1, QUAL);
p.add(FAMILYA, QUAL, now+2, QUAL);
t.put(p);
String[] args = new String[] {
EXPORT_TABLE,
OUTPUT_DIR,
"1000"
};
GenericOptionsParser opts = new GenericOptionsParser(new Configuration(cluster.getConfiguration()), args);
Configuration conf = opts.getConfiguration();
args = opts.getRemainingArgs();
Job job = Export.createSubmittableJob(conf, args);
job.getConfiguration().set("mapreduce.framework.name", "yarn");
job.waitForCompletion(false);
assertTrue(job.isSuccessful());
String IMPORT_TABLE = "importTableSimpleCase";
t = UTIL.createTable(Bytes.toBytes(IMPORT_TABLE), FAMILYB);
args = new String[] {
"-D" + Import.CF_RENAME_PROP + "="+FAMILYA_STRING+":"+FAMILYB_STRING,
IMPORT_TABLE,
OUTPUT_DIR
};
opts = new GenericOptionsParser(new Configuration(cluster.getConfiguration()), args);
conf = opts.getConfiguration();
args = opts.getRemainingArgs();
job = Import.createSubmittableJob(conf, args);
job.getConfiguration().set("mapreduce.framework.name", "yarn");
job.waitForCompletion(false);
assertTrue(job.isSuccessful());
Get g = new Get(ROW1);
g.setMaxVersions();
Result r = t.get(g);
assertEquals(3, r.size());
g = new Get(ROW2);
g.setMaxVersions();
r = t.get(g);
assertEquals(3, r.size());
}
/**
* Test export .META. table
*
* @throws Exception
*/
@Test
public void testMetaExport() throws Exception {
String EXPORT_TABLE = ".META.";
String[] args = new String[] { EXPORT_TABLE, OUTPUT_DIR, "1", "0", "0" };
GenericOptionsParser opts = new GenericOptionsParser(new Configuration(
cluster.getConfiguration()), args);
Configuration conf = opts.getConfiguration();
args = opts.getRemainingArgs();
Job job = Export.createSubmittableJob(conf, args);
job.getConfiguration().set("mapreduce.framework.name", "yarn");
job.waitForCompletion(false);
assertTrue(job.isSuccessful());
}
@Test
public void testWithDeletes() throws Exception {
String EXPORT_TABLE = "exportWithDeletes";
HTableDescriptor desc = new HTableDescriptor(EXPORT_TABLE);
desc.addFamily(new HColumnDescriptor(FAMILYA)
.setMaxVersions(5)
.setKeepDeletedCells(true)
);
UTIL.getHBaseAdmin().createTable(desc);
HTable t = new HTable(UTIL.getConfiguration(), EXPORT_TABLE);
Put p = new Put(ROW1);
p.add(FAMILYA, QUAL, now, QUAL);
p.add(FAMILYA, QUAL, now+1, QUAL);
p.add(FAMILYA, QUAL, now+2, QUAL);
p.add(FAMILYA, QUAL, now+3, QUAL);
p.add(FAMILYA, QUAL, now+4, QUAL);
t.put(p);
Delete d = new Delete(ROW1, now+3, null);
t.delete(d);
d = new Delete(ROW1);
d.deleteColumns(FAMILYA, QUAL, now+2);
t.delete(d);
String[] args = new String[] {
"-D" + Export.RAW_SCAN + "=true",
EXPORT_TABLE,
OUTPUT_DIR,
"1000"
};
GenericOptionsParser opts = new GenericOptionsParser(new Configuration(cluster.getConfiguration()), args);
Configuration conf = opts.getConfiguration();
args = opts.getRemainingArgs();
Job job = Export.createSubmittableJob(conf, args);
job.getConfiguration().set("mapreduce.framework.name", "yarn");
job.waitForCompletion(false);
assertTrue(job.isSuccessful());
String IMPORT_TABLE = "importWithDeletes";
desc = new HTableDescriptor(IMPORT_TABLE);
desc.addFamily(new HColumnDescriptor(FAMILYA)
.setMaxVersions(5)
.setKeepDeletedCells(true)
);
UTIL.getHBaseAdmin().createTable(desc);
t.close();
t = new HTable(UTIL.getConfiguration(), IMPORT_TABLE);
args = new String[] {
IMPORT_TABLE,
OUTPUT_DIR
};
opts = new GenericOptionsParser(new Configuration(cluster.getConfiguration()), args);
conf = opts.getConfiguration();
args = opts.getRemainingArgs();
job = Import.createSubmittableJob(conf, args);
job.getConfiguration().set("mapreduce.framework.name", "yarn");
job.waitForCompletion(false);
assertTrue(job.isSuccessful());
Scan s = new Scan();
s.setMaxVersions();
s.setRaw(true);
ResultScanner scanner = t.getScanner(s);
Result r = scanner.next();
KeyValue[] res = r.raw();
assertTrue(res[0].isDeleteFamily());
assertEquals(now+4, res[1].getTimestamp());
assertEquals(now+3, res[2].getTimestamp());
assertTrue(res[3].isDelete());
assertEquals(now+2, res[4].getTimestamp());
assertEquals(now+1, res[5].getTimestamp());
assertEquals(now, res[6].getTimestamp());
t.close();
}
@Test
public void testWithFilter() throws Exception {
String EXPORT_TABLE = "exportSimpleCase_ImportWithFilter";
HTableDescriptor desc = new HTableDescriptor(EXPORT_TABLE);
desc.addFamily(new HColumnDescriptor(FAMILYA).setMaxVersions(5));
UTIL.getHBaseAdmin().createTable(desc);
HTable exportTable = new HTable(UTIL.getConfiguration(), EXPORT_TABLE);
Put p = new Put(ROW1);
p.add(FAMILYA, QUAL, now, QUAL);
p.add(FAMILYA, QUAL, now + 1, QUAL);
p.add(FAMILYA, QUAL, now + 2, QUAL);
p.add(FAMILYA, QUAL, now + 3, QUAL);
p.add(FAMILYA, QUAL, now + 4, QUAL);
exportTable.put(p);
String[] args = new String[] { EXPORT_TABLE, OUTPUT_DIR, "1000" };
GenericOptionsParser opts = new GenericOptionsParser(new Configuration(
cluster.getConfiguration()), args);
Configuration conf = opts.getConfiguration();
args = opts.getRemainingArgs();
Job job = Export.createSubmittableJob(conf, args);
job.getConfiguration().set("mapreduce.framework.name", "yarn");
job.waitForCompletion(false);
assertTrue(job.isSuccessful());
String IMPORT_TABLE = "importWithFilter";
desc = new HTableDescriptor(IMPORT_TABLE);
desc.addFamily(new HColumnDescriptor(FAMILYA).setMaxVersions(5));
UTIL.getHBaseAdmin().createTable(desc);
HTable importTable = new HTable(UTIL.getConfiguration(), IMPORT_TABLE);
args = new String[] { "-D" + Import.FILTER_CLASS_CONF_KEY + "=" + PrefixFilter.class.getName(),
"-D" + Import.FILTER_ARGS_CONF_KEY + "=" + Bytes.toString(ROW1), IMPORT_TABLE, OUTPUT_DIR,
"1000" };
opts = new GenericOptionsParser(new Configuration(cluster.getConfiguration()), args);
conf = opts.getConfiguration();
args = opts.getRemainingArgs();
job = Import.createSubmittableJob(conf, args);
job.getConfiguration().set("mapreduce.framework.name", "yarn");
job.waitForCompletion(false);
assertTrue(job.isSuccessful());
// get the count of the source table for that time range
PrefixFilter filter = new PrefixFilter(ROW1);
int count = getCount(exportTable, filter);
Assert.assertEquals("Unexpected row count between export and import tables", count,
getCount(importTable, null));
// and then test that a broken command doesn't bork everything - easier here because we don't
// need to re-run the export job
args = new String[] { "-D" + Import.FILTER_CLASS_CONF_KEY + "=" + Filter.class.getName(),
"-D" + Import.FILTER_ARGS_CONF_KEY + "=" + Bytes.toString(ROW1) + "", EXPORT_TABLE,
OUTPUT_DIR, "1000" };
opts = new GenericOptionsParser(new Configuration(cluster.getConfiguration()), args);
conf = opts.getConfiguration();
args = opts.getRemainingArgs();
job = Import.createSubmittableJob(conf, args);
job.getConfiguration().set("mapreduce.framework.name", "yarn");
job.waitForCompletion(false);
assertFalse("Job succeeedd, but it had a non-instantiable filter!", job.isSuccessful());
// cleanup
exportTable.close();
importTable.close();
}
/**
* Count the number of keyvalues in the specified table for the given timerange
* @param start
* @param end
* @param table
* @return
* @throws IOException
*/
private int getCount(HTable table, Filter filter) throws IOException {
Scan scan = new Scan();
scan.setFilter(filter);
ResultScanner results = table.getScanner(scan);
int count = 0;
for (Result res : results) {
count += res.size();
}
results.close();
return count;
}
}