/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package gobblin.data.management.conversion.hive;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.metastore.api.Table;
import org.joda.time.DateTime;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
import com.google.common.base.Optional;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import gobblin.configuration.ConfigurationKeys;
import gobblin.configuration.SourceState;
import gobblin.configuration.WorkUnitState;
import gobblin.data.management.ConversionHiveTestUtils;
import gobblin.data.management.conversion.hive.source.HiveSource;
import gobblin.data.management.conversion.hive.source.HiveWorkUnit;
import gobblin.data.management.conversion.hive.watermarker.PartitionLevelWatermarker;
import gobblin.data.management.conversion.hive.watermarker.TableLevelWatermarker;
import gobblin.source.workunit.WorkUnit;
@Test(groups = { "gobblin.data.management.conversion" })
public class HiveSourceTest {
private LocalHiveMetastoreTestUtils hiveMetastoreTestUtils;
private HiveSource hiveSource;
@BeforeClass
public void setup() throws Exception {
this.hiveMetastoreTestUtils = LocalHiveMetastoreTestUtils.getInstance();
this.hiveSource = new HiveSource();
}
@Test
public void testGetWorkUnitsForTable() throws Exception {
String dbName = "testdb2";
String tableName = "testtable2";
String tableSdLoc = "/tmp/testtable2";
this.hiveMetastoreTestUtils.getLocalMetastoreClient().dropDatabase(dbName, false, true, true);
SourceState testState = getTestState(dbName);
this.hiveMetastoreTestUtils.createTestTable(dbName, tableName, tableSdLoc, Optional.<String> absent());
List<WorkUnit> workUnits = hiveSource.getWorkunits(testState);
// One workunit for the table, no dummy workunits
Assert.assertEquals(workUnits.size(), 1);
WorkUnit wu = workUnits.get(0);
HiveWorkUnit hwu = new HiveWorkUnit(wu);
Assert.assertEquals(hwu.getHiveDataset().getDbAndTable().getDb(), dbName);
Assert.assertEquals(hwu.getHiveDataset().getDbAndTable().getTable(), tableName);
Assert.assertEquals(hwu.getTableSchemaUrl(), new Path("/tmp/dummy"));
}
@Test
public void testGetWorkUnitsForPartitions() throws Exception {
String dbName = "testdb3";
String tableName = "testtable3";
String tableSdLoc = "/tmp/testtable3";
this.hiveMetastoreTestUtils.getLocalMetastoreClient().dropDatabase(dbName, false, true, true);
SourceState testState = getTestState(dbName);
Table tbl = this.hiveMetastoreTestUtils.createTestTable(dbName, tableName, tableSdLoc, Optional.of("field"));
this.hiveMetastoreTestUtils.addTestPartition(tbl, ImmutableList.of("f1"), (int) System.currentTimeMillis());
List<WorkUnit> workUnits = this.hiveSource.getWorkunits(testState);
// One workunit for the partition + 1 dummy watermark workunit
Assert.assertEquals(workUnits.size(), 2);
WorkUnit wu = workUnits.get(0);
WorkUnit wu2 = workUnits.get(1);
HiveWorkUnit hwu = null;
if (!wu.contains(PartitionLevelWatermarker.IS_WATERMARK_WORKUNIT_KEY)) {
hwu = new HiveWorkUnit(wu);
} else {
hwu = new HiveWorkUnit(wu2);
}
Assert.assertEquals(hwu.getHiveDataset().getDbAndTable().getDb(), dbName);
Assert.assertEquals(hwu.getHiveDataset().getDbAndTable().getTable(), tableName);
Assert.assertEquals(hwu.getPartitionName().get(), "field=f1");
}
@Test
public void testGetWorkunitsAfterWatermark() throws Exception {
String dbName = "testdb4";
String tableName1 = "testtable1";
String tableSdLoc1 = "/tmp/testtable1";
String tableName2 = "testtable2";
String tableSdLoc2 = "/tmp/testtable2";
this.hiveMetastoreTestUtils.getLocalMetastoreClient().dropDatabase(dbName, false, true, true);
this.hiveMetastoreTestUtils.createTestTable(dbName, tableName1, tableSdLoc1, Optional.<String> absent());
this.hiveMetastoreTestUtils.createTestTable(dbName, tableName2, tableSdLoc2, Optional.<String> absent(), true);
List<WorkUnitState> previousWorkUnitStates = Lists.newArrayList();
Table table1 = this.hiveMetastoreTestUtils.getLocalMetastoreClient().getTable(dbName, tableName1);
previousWorkUnitStates.add(ConversionHiveTestUtils.createWus(dbName, tableName1,
TimeUnit.MILLISECONDS.convert(table1.getCreateTime(), TimeUnit.SECONDS)));
SourceState testState = new SourceState(getTestState(dbName), previousWorkUnitStates);
testState.setProp(HiveSource.HIVE_SOURCE_WATERMARKER_FACTORY_CLASS_KEY, TableLevelWatermarker.Factory.class.getName());
List<WorkUnit> workUnits = this.hiveSource.getWorkunits(testState);
Assert.assertEquals(workUnits.size(), 1);
WorkUnit wu = workUnits.get(0);
HiveWorkUnit hwu = new HiveWorkUnit(wu);
Assert.assertEquals(hwu.getHiveDataset().getDbAndTable().getDb(), dbName);
Assert.assertEquals(hwu.getHiveDataset().getDbAndTable().getTable(), tableName2);
}
@Test
public void testShouldCreateWorkunitsOlderThanLookback() throws Exception {
long currentTime = System.currentTimeMillis();
long partitionCreateTime = new DateTime(currentTime).minusDays(35).getMillis();
org.apache.hadoop.hive.ql.metadata.Partition partition =
this.hiveMetastoreTestUtils.createDummyPartition(partitionCreateTime);
SourceState testState = getTestState("testDb6");
HiveSource source = new HiveSource();
source.initialize(testState);
boolean isOlderThanLookback = source.isOlderThanLookback(partition);
Assert.assertEquals(isOlderThanLookback, true, "Should not create workunits older than lookback");
}
@Test
public void testShouldCreateWorkunitsNewerThanLookback() throws Exception {
long currentTime = System.currentTimeMillis();
// Default lookback time is 3 days
long partitionCreateTime = new DateTime(currentTime).minusDays(2).getMillis();
org.apache.hadoop.hive.ql.metadata.Partition partition =
this.hiveMetastoreTestUtils.createDummyPartition(partitionCreateTime);
SourceState testState = getTestState("testDb7");
HiveSource source = new HiveSource();
source.initialize(testState);
boolean isOlderThanLookback = source.isOlderThanLookback(partition);
Assert.assertEquals(isOlderThanLookback, false, "Should create workunits newer than lookback");
}
@Test
public void testIsOlderThanLookbackForDistcpGenerationTime() throws Exception {
long currentTime = System.currentTimeMillis();
// Default lookback time is 3 days
long partitionCreateTime = new DateTime(currentTime).minusDays(2).getMillis();
Map<String, String> parameters = Maps.newHashMap();
parameters.put(HiveSource.DISTCP_REGISTRATION_GENERATION_TIME_KEY, partitionCreateTime + "");
org.apache.hadoop.hive.ql.metadata.Partition partition = this.hiveMetastoreTestUtils.createDummyPartition(0);
partition.getTPartition().setParameters(parameters);
SourceState testState = getTestState("testDb6");
HiveSource source = new HiveSource();
source.initialize(testState);
boolean isOlderThanLookback = source.isOlderThanLookback(partition);
Assert.assertEquals(isOlderThanLookback, false, "Should create workunits newer than lookback");
}
private static SourceState getTestState(String dbName) {
SourceState testState = new SourceState();
testState.setProp("hive.dataset.database", dbName);
testState.setProp("hive.dataset.table.pattern", "*");
testState.setProp(ConfigurationKeys.JOB_ID_KEY, "testJobId");
return testState;
}
}