/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package gobblin.data.management.copy.hive.avro;
import java.io.IOException;
import java.net.URI;
import com.google.common.base.Optional;
import lombok.extern.slf4j.Slf4j;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.metadata.Partition;
import gobblin.data.management.copy.hive.HiveCopyEntityHelper;
import gobblin.util.PathUtils;
/**
* Update avro related entries in creating {@link gobblin.data.management.copy.CopyEntity}s for copying a Hive table.
*/
@Slf4j
public class HiveAvroCopyEntityHelper {
private static final String HIVE_TABLE_AVRO_SCHEMA_URL = "avro.schema.url";
/**
* Currently updated the {@link #HIVE_TABLE_AVRO_SCHEMA_URL} location for new hive table
* @param targetTable, new Table to be registered in hive
* @throws IOException
*/
public static void updateTableAttributesIfAvro(Table targetTable, HiveCopyEntityHelper hiveHelper) throws IOException {
if (!isHiveTableAvroType(targetTable)) {
return;
}
// need to update the {@link #HIVE_TABLE_AVRO_SCHEMA_URL} location
String oldAvroSchemaURL = targetTable.getTTable().getSd().getSerdeInfo().getParameters().get(HIVE_TABLE_AVRO_SCHEMA_URL);
if (oldAvroSchemaURL != null) {
Path oldAvroSchemaPath = new Path(oldAvroSchemaURL);
URI sourceFileSystemURI = hiveHelper.getDataset().getFs().getUri();
if (PathUtils.isAbsoluteAndSchemeAuthorityNull(oldAvroSchemaPath)
|| (oldAvroSchemaPath.toUri().getScheme().equals(sourceFileSystemURI.getScheme())
&& oldAvroSchemaPath.toUri().getAuthority().equals(sourceFileSystemURI.getAuthority()))) {
String newAvroSchemaURL = hiveHelper.getTargetPathHelper().getTargetPath(oldAvroSchemaPath, hiveHelper.getTargetFileSystem(),
Optional.<Partition>absent(), true).toString();
targetTable.getTTable().getSd().getSerdeInfo().getParameters().put(HIVE_TABLE_AVRO_SCHEMA_URL, newAvroSchemaURL);
log.info(String.format("For table %s, change %s from %s to %s", targetTable.getCompleteName(),
HIVE_TABLE_AVRO_SCHEMA_URL, oldAvroSchemaURL, newAvroSchemaURL));
}
}
}
/**
* Tell whether a hive table is actually an Avro table
* @param targetTable
* @return
* @throws IOException
*/
public static boolean isHiveTableAvroType(Table targetTable) throws IOException {
String serializationLib = targetTable.getTTable().getSd().getSerdeInfo().getSerializationLib();
String inputFormat = targetTable.getTTable().getSd().getInputFormat();
String outputFormat = targetTable.getTTable().getSd().getOutputFormat();
return inputFormat.endsWith("AvroContainerInputFormat") || outputFormat.endsWith("AvroContainerOutputFormat")
|| serializationLib.endsWith("AvroSerDe");
}
}