/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package gobblin.data.management.conversion.hive.converter;
import java.io.IOException;
import java.util.List;
import org.apache.avro.Schema;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.Partition;
import org.apache.hadoop.hive.metastore.api.Table;
import org.testng.Assert;
import org.testng.annotations.Test;
import com.google.common.base.Optional;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Lists;
import com.typesafe.config.Config;
import com.typesafe.config.ConfigFactory;
import gobblin.configuration.WorkUnitState;
import gobblin.data.management.ConversionHiveTestUtils;
import gobblin.data.management.conversion.hive.LocalHiveMetastoreTestUtils;
import gobblin.data.management.conversion.hive.dataset.ConvertibleHiveDataset;
import gobblin.data.management.conversion.hive.dataset.ConvertibleHiveDatasetTest;
import gobblin.data.management.conversion.hive.entities.QueryBasedHiveConversionEntity;
import gobblin.data.management.conversion.hive.entities.SchemaAwareHivePartition;
import gobblin.data.management.conversion.hive.entities.SchemaAwareHiveTable;
import gobblin.data.management.copy.hive.WhitelistBlacklist;
@Test(groups = { "gobblin.data.management.conversion" })
public class HiveAvroToOrcConverterTest {
private static String resourceDir = "hiveConverterTest";
private LocalHiveMetastoreTestUtils hiveMetastoreTestUtils;
public HiveAvroToOrcConverterTest() {
this.hiveMetastoreTestUtils = LocalHiveMetastoreTestUtils.getInstance();
}
/***
* Test flattened DDL and DML generation
* @throws IOException
*/
@Test
public void testFlattenSchemaDDLandDML() throws Exception {
String dbName = "testdb";
String tableName = "testtable";
String tableSdLoc = "/tmp/testtable";
this.hiveMetastoreTestUtils.getLocalMetastoreClient().dropDatabase(dbName, false, true, true);
Table table = this.hiveMetastoreTestUtils.createTestTable(dbName, tableName, tableSdLoc, Optional.<String> absent());
Schema schema = ConversionHiveTestUtils.readSchemaFromJsonFile(resourceDir, "recordWithinRecordWithinRecord_nested.json");
WorkUnitState wus = ConversionHiveTestUtils.createWus(dbName, tableName, 0);
try (HiveAvroToFlattenedOrcConverter converter = new HiveAvroToFlattenedOrcConverter();) {
Config config = ConfigFactory.parseMap(
ImmutableMap.<String, String>builder().put("destinationFormats", "flattenedOrc")
.put("flattenedOrc.destination.dbName", dbName)
.put("flattenedOrc.destination.tableName", tableName + "_orc")
.put("flattenedOrc.destination.dataPath", "file:" + tableSdLoc + "_orc").build());
ConvertibleHiveDataset cd = ConvertibleHiveDatasetTest.createTestConvertibleDataset(config);
List<QueryBasedHiveConversionEntity> conversionEntities =
Lists.newArrayList(converter.convertRecord(converter.convertSchema(schema, wus),
new QueryBasedHiveConversionEntity(cd, new SchemaAwareHiveTable(table, schema)), wus));
Assert.assertEquals(conversionEntities.size(), 1, "Only one query entity should be returned");
QueryBasedHiveConversionEntity queryBasedHiveConversionEntity = conversionEntities.get(0);
List<String> queries = queryBasedHiveConversionEntity.getQueries();
Assert.assertEquals(queries.size(), 4, "4 DDL and one DML query should be returned");
// Ignoring part before first bracket in DDL and 'select' clause in DML because staging table has
// .. a random name component
String actualDDLQuery = StringUtils.substringAfter("(", queries.get(0).trim());
String actualDMLQuery = StringUtils.substringAfter("SELECT", queries.get(0).trim());
String expectedDDLQuery = StringUtils.substringAfter("(",
ConversionHiveTestUtils.readQueryFromFile(resourceDir, "recordWithinRecordWithinRecord_flattened.ddl"));
String expectedDMLQuery = StringUtils.substringAfter("SELECT",
ConversionHiveTestUtils.readQueryFromFile(resourceDir, "recordWithinRecordWithinRecord_flattened.dml"));
Assert.assertEquals(actualDDLQuery, expectedDDLQuery);
Assert.assertEquals(actualDMLQuery, expectedDMLQuery);
}
}
/***
* Test nested DDL and DML generation
* @throws IOException
*/
@Test
public void testNestedSchemaDDLandDML() throws Exception {
String dbName = "testdb";
String tableName = "testtable";
String tableSdLoc = "/tmp/testtable";
this.hiveMetastoreTestUtils.getLocalMetastoreClient().dropDatabase(dbName, false, true, true);
Table table = this.hiveMetastoreTestUtils.createTestTable(dbName, tableName, tableSdLoc, Optional.<String> absent());
Schema schema = ConversionHiveTestUtils.readSchemaFromJsonFile(resourceDir, "recordWithinRecordWithinRecord_nested.json");
WorkUnitState wus = ConversionHiveTestUtils.createWus(dbName, tableName, 0);
wus.getJobState().setProp("orc.table.flatten.schema", "false");
try (HiveAvroToNestedOrcConverter converter = new HiveAvroToNestedOrcConverter();) {
Config config = ConfigFactory.parseMap(ImmutableMap.<String, String> builder()
.put("destinationFormats", "nestedOrc")
.put("nestedOrc.destination.tableName","testtable_orc_nested")
.put("nestedOrc.destination.dbName",dbName)
.put("nestedOrc.destination.dataPath","file:/tmp/testtable_orc_nested")
.build());
ConvertibleHiveDataset cd = ConvertibleHiveDatasetTest.createTestConvertibleDataset(config);
List<QueryBasedHiveConversionEntity> conversionEntities =
Lists.newArrayList(converter.convertRecord(converter.convertSchema(schema, wus), new QueryBasedHiveConversionEntity(cd , new SchemaAwareHiveTable(table, schema)), wus));
Assert.assertEquals(conversionEntities.size(), 1, "Only one query entity should be returned");
QueryBasedHiveConversionEntity queryBasedHiveConversionEntity = conversionEntities.get(0);
List<String> queries = queryBasedHiveConversionEntity.getQueries();
Assert.assertEquals(queries.size(), 4, "4 DDL and one DML query should be returned");
// Ignoring part before first bracket in DDL and 'select' clause in DML because staging table has
// .. a random name component
String actualDDLQuery = StringUtils.substringAfter("(", queries.get(0).trim());
String actualDMLQuery = StringUtils.substringAfter("SELECT", queries.get(0).trim());
String expectedDDLQuery = StringUtils.substringAfter("(",
ConversionHiveTestUtils.readQueryFromFile(resourceDir, "recordWithinRecordWithinRecord_nested.ddl"));
String expectedDMLQuery = StringUtils.substringAfter("SELECT",
ConversionHiveTestUtils.readQueryFromFile(resourceDir, "recordWithinRecordWithinRecord_nested.dml"));
Assert.assertEquals(actualDDLQuery, expectedDDLQuery);
Assert.assertEquals(actualDMLQuery, expectedDMLQuery);
}
}
@Test
public void dropReplacedPartitionsTest() throws Exception {
Table table = ConvertibleHiveDatasetTest.getTestTable("dbName", "tableName");
table.setTableType("VIRTUAL_VIEW");
table.setPartitionKeys(ImmutableList.of(new FieldSchema("year", "string", ""), new FieldSchema("month", "string", "")));
Partition part = new Partition();
part.setParameters(ImmutableMap.of("gobblin.replaced.partitions", "2015,12|2016,01"));
SchemaAwareHiveTable hiveTable = new SchemaAwareHiveTable(table, null);
SchemaAwareHivePartition partition = new SchemaAwareHivePartition(table, part, null);
QueryBasedHiveConversionEntity conversionEntity = new QueryBasedHiveConversionEntity(null, hiveTable, Optional.of(partition));
List<ImmutableMap<String, String>> expected =
ImmutableList.of(ImmutableMap.of("year", "2015", "month", "12"), ImmutableMap.of("year", "2016", "month", "01"));
Assert.assertEquals(AbstractAvroToOrcConverter.getDropPartitionsDDLInfo(conversionEntity), expected);
// Make sure that a partition itself is not dropped
Partition replacedSelf = new Partition();
replacedSelf.setParameters(ImmutableMap.of("gobblin.replaced.partitions", "2015,12|2016,01|2016,02"));
replacedSelf.setValues(ImmutableList.of("2016", "02"));
conversionEntity = new QueryBasedHiveConversionEntity(null, hiveTable, Optional.of(new SchemaAwareHivePartition(table, replacedSelf, null)));
Assert.assertEquals(AbstractAvroToOrcConverter.getDropPartitionsDDLInfo(conversionEntity), expected);
}
@Test
/***
* More comprehensive tests for WhiteBlackList are in: {@link gobblin.data.management.copy.hive.WhitelistBlacklistTest}
*/
public void hiveViewRegistrationWhiteBlackListTest() throws Exception {
WorkUnitState wus = ConversionHiveTestUtils.createWus("dbName", "tableName", 0);
Optional<WhitelistBlacklist> optionalWhitelistBlacklist = AbstractAvroToOrcConverter.getViewWhiteBackListFromWorkUnit(wus);
Assert.assertTrue(!optionalWhitelistBlacklist.isPresent(),
"No whitelist blacklist specified in WUS, WhiteListBlackList object should be absent");
wus.setProp(AbstractAvroToOrcConverter.HIVE_CONVERSION_VIEW_REGISTRATION_WHITELIST, "");
wus.setProp(AbstractAvroToOrcConverter.HIVE_CONVERSION_VIEW_REGISTRATION_BLACKLIST, "");
optionalWhitelistBlacklist = AbstractAvroToOrcConverter.getViewWhiteBackListFromWorkUnit(wus);
Assert.assertTrue(optionalWhitelistBlacklist.isPresent(),
"Whitelist blacklist specified in WUS, WhiteListBlackList object should be present");
Assert.assertTrue(optionalWhitelistBlacklist.get().acceptDb("mydb"));
Assert.assertTrue(optionalWhitelistBlacklist.get().acceptTable("mydb", "mytable"));
wus.setProp(AbstractAvroToOrcConverter.HIVE_CONVERSION_VIEW_REGISTRATION_WHITELIST, "yourdb");
wus.setProp(AbstractAvroToOrcConverter.HIVE_CONVERSION_VIEW_REGISTRATION_BLACKLIST, "");
optionalWhitelistBlacklist = AbstractAvroToOrcConverter.getViewWhiteBackListFromWorkUnit(wus);
Assert.assertTrue(optionalWhitelistBlacklist.isPresent(),
"Whitelist blacklist specified in WUS, WhiteListBlackList object should be present");
Assert.assertTrue(!optionalWhitelistBlacklist.get().acceptDb("mydb"));
Assert.assertTrue(!optionalWhitelistBlacklist.get().acceptTable("mydb", "mytable"));
Assert.assertTrue(optionalWhitelistBlacklist.get().acceptDb("yourdb"));
Assert.assertTrue(optionalWhitelistBlacklist.get().acceptTable("yourdb", "mytable"));
wus.setProp(AbstractAvroToOrcConverter.HIVE_CONVERSION_VIEW_REGISTRATION_WHITELIST, "yourdb.yourtable");
wus.setProp(AbstractAvroToOrcConverter.HIVE_CONVERSION_VIEW_REGISTRATION_BLACKLIST, "");
optionalWhitelistBlacklist = AbstractAvroToOrcConverter.getViewWhiteBackListFromWorkUnit(wus);
Assert.assertTrue(optionalWhitelistBlacklist.isPresent(),
"Whitelist blacklist specified in WUS, WhiteListBlackList object should be present");
Assert.assertTrue(!optionalWhitelistBlacklist.get().acceptDb("mydb"));
Assert.assertTrue(!optionalWhitelistBlacklist.get().acceptTable("yourdb", "mytable"));
Assert.assertTrue(optionalWhitelistBlacklist.get().acceptDb("yourdb"));
Assert.assertTrue(optionalWhitelistBlacklist.get().acceptTable("yourdb", "yourtable"));
wus.setProp(AbstractAvroToOrcConverter.HIVE_CONVERSION_VIEW_REGISTRATION_WHITELIST, "");
wus.setProp(AbstractAvroToOrcConverter.HIVE_CONVERSION_VIEW_REGISTRATION_BLACKLIST, "yourdb.yourtable");
optionalWhitelistBlacklist = AbstractAvroToOrcConverter.getViewWhiteBackListFromWorkUnit(wus);
Assert.assertTrue(optionalWhitelistBlacklist.isPresent(),
"Whitelist blacklist specified in WUS, WhiteListBlackList object should be present");
Assert.assertTrue(optionalWhitelistBlacklist.get().acceptDb("mydb"));
Assert.assertTrue(optionalWhitelistBlacklist.get().acceptTable("yourdb", "mytable"));
Assert.assertTrue(optionalWhitelistBlacklist.get().acceptDb("yourdb"));
Assert.assertTrue(!optionalWhitelistBlacklist.get().acceptTable("yourdb", "yourtable"));
}
}