/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package gobblin.compaction.hivebasedconstructs;
import java.io.IOException;
import java.util.List;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.metastore.IMetaStoreClient;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.thrift.TException;
import com.google.common.base.Splitter;
import gobblin.configuration.ConfigurationKeys;
import gobblin.configuration.WorkUnitState;
import gobblin.data.management.conversion.hive.watermarker.PartitionLevelWatermarker;
import gobblin.source.extractor.Extractor;
import gobblin.util.AutoReturnableObject;
import gobblin.data.management.conversion.hive.extractor.HiveBaseExtractor;
import lombok.extern.slf4j.Slf4j;
/**
* {@link Extractor} that extracts primary key field name, delta field name, and location from hive metastore and
* creates an {@link MRCompactionEntity}
*/
@Slf4j
public class HiveMetadataForCompactionExtractor extends HiveBaseExtractor<Void, MRCompactionEntity> {
public static final String COMPACTION_PRIMARY_KEY = "hive.metastore.primaryKey";
public static final String COMPACTION_DELTA = "hive.metastore.delta";
private MRCompactionEntity compactionEntity;
private boolean extracted = false;
public HiveMetadataForCompactionExtractor(WorkUnitState state, FileSystem fs) throws IOException, TException, HiveException {
super(state);
if (Boolean.valueOf(state.getPropAsBoolean(PartitionLevelWatermarker.IS_WATERMARK_WORKUNIT_KEY))) {
log.info("Ignoring Watermark workunit for {}", state.getProp(ConfigurationKeys.DATASET_URN_KEY));
return;
}
try (AutoReturnableObject<IMetaStoreClient> client = this.pool.getClient()) {
Table table = client.get().getTable(this.dbName, this.tableName);
String primaryKeyString = table.getParameters().get(state.getProp(COMPACTION_PRIMARY_KEY));
List<String> primaryKeyList = Splitter.on(',').omitEmptyStrings().trimResults().splitToList(primaryKeyString);
String deltaString = table.getParameters().get(state.getProp(COMPACTION_DELTA));
List<String> deltaList = Splitter.on(',').omitEmptyStrings().trimResults().splitToList(deltaString);
Path dataFilesPath = new Path(table.getSd().getLocation());
compactionEntity = new MRCompactionEntity(primaryKeyList, deltaList, dataFilesPath, state.getProperties());
}
}
@Override
public MRCompactionEntity readRecord(MRCompactionEntity reuse) {
if (!extracted) {
extracted = true;
return compactionEntity;
} else {
return null;
}
}
@Override
public Void getSchema() throws IOException {
return null;
}
}