/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package gobblin.data.management.version.finder;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.ql.metadata.Partition;
import org.joda.time.DateTimeZone;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
import com.google.common.base.Predicate;
import com.google.common.collect.Iterables;
import com.typesafe.config.Config;
import gobblin.configuration.ConfigurationKeys;
import gobblin.data.management.version.TimestampedHiveDatasetVersion;
import gobblin.util.ConfigUtils;
/**
* A Hive Partition finder where the the version is the partition value.
* <p>
* The hive table needs to be date partitioned by prop value {@value #PARTITION_KEY_NAME_KEY}. The value of this key must be
* a date pattern as per prop value {@value #PARTITION_VALUE_DATE_TIME_PATTERN_KEY}.
* </p>
* <p>
* E.g if the hive partition is datepartition=2016-01-10-22/field1=f1Value.
* The {@value #PARTITION_KEY_NAME_KEY}=datepartiton and {@value #PARTITION_VALUE_DATE_TIME_PATTERN_KEY}=yyyy-MM-dd-HH
*
* </p>
*/
public class DatePartitionHiveVersionFinder extends AbstractHiveDatasetVersionFinder {
public static final String PARTITION_VALUE_DATE_TIME_PATTERN_KEY = "hive.partition.value.datetime.pattern";
public static final String DEFAULT_PARTITION_VALUE_DATE_TIME_PATTERN = "yyyy-MM-dd-HH";
public static final String PARTITION_VALUE_DATE_TIME_TIMEZONE_KEY = "hive.partition.value.datetime.timezone";
public static final String DEFAULT_PARTITION_VALUE_DATE_TIME_TIMEZONE = ConfigurationKeys.PST_TIMEZONE_NAME;
public static final String PARTITION_KEY_NAME_KEY = "hive.partition.key.name";
public static final String DEFAULT_PARTITION_KEY_NAME = "datepartition";
protected final DateTimeFormatter formatter;
private final String partitionKeyName;
private final Predicate<FieldSchema> partitionKeyNamePredicate;
private final String pattern;
public DatePartitionHiveVersionFinder(FileSystem fs, Config config) {
this.pattern =
ConfigUtils.getString(config, PARTITION_VALUE_DATE_TIME_PATTERN_KEY, DEFAULT_PARTITION_VALUE_DATE_TIME_PATTERN);
if (config.hasPath(PARTITION_VALUE_DATE_TIME_TIMEZONE_KEY)) {
this.formatter = DateTimeFormat.forPattern(pattern)
.withZone(DateTimeZone.forID(config.getString(PARTITION_VALUE_DATE_TIME_TIMEZONE_KEY)));
} else {
this.formatter =
DateTimeFormat.forPattern(pattern).withZone(DateTimeZone.forID(DEFAULT_PARTITION_VALUE_DATE_TIME_TIMEZONE));
}
this.partitionKeyName = ConfigUtils.getString(config, PARTITION_KEY_NAME_KEY, DEFAULT_PARTITION_KEY_NAME);
this.partitionKeyNamePredicate = new Predicate<FieldSchema>() {
@Override
public boolean apply(FieldSchema input) {
return StringUtils.equalsIgnoreCase(input.getName(), DatePartitionHiveVersionFinder.this.partitionKeyName);
}
};
}
/**
* Create a {@link TimestampedHiveDatasetVersion} from a {@link Partition}. The hive table is expected
* to be date partitioned by {@link #partitionKeyName}. The partition value format must be {@link #pattern}
*
* @throws IllegalArgumentException when {@link #partitionKeyName} is not found in the <code></code>
* @throws IllegalArgumentException when a value can not be found for {@link #partitionKeyName} in the <code>partition</code>
* @throws IllegalArgumentException if the partition value can not be parsed with {@link #pattern}
* {@inheritDoc}
*/
@Override
protected TimestampedHiveDatasetVersion getDatasetVersion(Partition partition) {
int index = Iterables.indexOf(partition.getTable().getPartitionKeys(), this.partitionKeyNamePredicate);
if (index == -1) {
throw new IllegalArgumentException(String
.format("Failed to find partition key %s in the table %s", this.partitionKeyName,
partition.getTable().getCompleteName()));
}
if (index >= partition.getValues().size()) {
throw new IllegalArgumentException(String
.format("Failed to find partition value for key %s in the partition %s", this.partitionKeyName,
partition.getName()));
}
return new TimestampedHiveDatasetVersion(
this.formatter.parseDateTime(partition.getValues().get(index).trim().substring(0, this.pattern.length())),
partition);
}
}