/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tez.runtime.library.conf;
import javax.annotation.Nullable;
import java.util.Map;
import com.google.common.base.Preconditions;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.tez.dag.api.EdgeManagerPluginDescriptor;
import org.apache.tez.dag.api.EdgeProperty;
import org.apache.tez.dag.api.InputDescriptor;
import org.apache.tez.dag.api.OutputDescriptor;
import org.apache.tez.dag.api.UserPayload;
import org.apache.tez.runtime.library.output.OrderedPartitionedKVOutput;
/**
* Configure payloads for the OrderedPartitionedKVOutput and OrderedGroupedKVInput pair </p>
*
* Values will be picked up from tez-site if not specified, otherwise defaults from
* {@link org.apache.tez.runtime.library.api.TezRuntimeConfiguration} will be used.
*/
@InterfaceAudience.Public
@InterfaceStability.Evolving
public class OrderedPartitionedKVEdgeConfig
extends HadoopKeyValuesBasedBaseEdgeConfig {
private final OrderedPartitionedKVOutputConfig outputConf;
private final OrderedGroupedKVInputConfig inputConf;
private OrderedPartitionedKVEdgeConfig(
OrderedPartitionedKVOutputConfig outputConfiguration,
OrderedGroupedKVInputConfig inputConfiguration) {
this.outputConf = outputConfiguration;
this.inputConf = inputConfiguration;
}
/**
* Create a builder to configure the relevant Input and Output. </p> This method should only be
* used when using a custom Partitioner which requires specific Configuration. {@link
* #newBuilder(String, String, String)} is the preferred method to crate an instance of the
* Builder
*
* @param keyClassName the key class name
* @param valueClassName the value class name
* @param partitionerClassName the partitioner class name
* @param partitionerConf the partitioner configuration. This can be null, and is a {@link
* java.util.Map} of key-value pairs. The keys should be limited to
* the ones required by the partitioner.
* @return a builder to configure the edge
*/
public static Builder newBuilder(String keyClassName, String valueClassName,
String partitionerClassName,
@Nullable Map<String, String> partitionerConf) {
return new Builder(keyClassName, valueClassName, partitionerClassName, partitionerConf);
}
/**
* Create a builder to configure the relevant Input and Output
*
* @param keyClassName the key class name
* @param valueClassName the value class name
* @param partitionerClassName the partitioner class name
* @return a builder to configure the edge
*/
public static Builder newBuilder(String keyClassName, String valueClassName,
String partitionerClassName) {
return newBuilder(keyClassName, valueClassName, partitionerClassName, null);
}
@Override
public UserPayload getOutputPayload() {
return outputConf.toUserPayload();
}
@Override
public String getOutputClassName() {
return OrderedPartitionedKVOutput.class.getName();
}
@Override
public UserPayload getInputPayload() {
return inputConf.toUserPayload();
}
@Override
public String getOutputHistoryText() {
return outputConf.toHistoryText();
}
@Override
public String getInputHistoryText() {
return inputConf.toHistoryText();
}
@Override
public String getInputClassName() {
return inputConf.getInputClassName();
}
/**
* This is a convenience method for the typical usage of this edge, and creates an instance of
* {@link org.apache.tez.dag.api.EdgeProperty} which is likely to be used. </p>
* * In this case - DataMovementType.SCATTER_GATHER, EdgeProperty.DataSourceType.PERSISTED,
* EdgeProperty.SchedulingType.SEQUENTIAL
*
* @return an {@link org.apache.tez.dag.api.EdgeProperty} instance
*/
public EdgeProperty createDefaultEdgeProperty() {
EdgeProperty edgeProperty = EdgeProperty.create(EdgeProperty.DataMovementType.SCATTER_GATHER,
EdgeProperty.DataSourceType.PERSISTED, EdgeProperty.SchedulingType.SEQUENTIAL,
OutputDescriptor.create(
getOutputClassName()).setUserPayload(getOutputPayload()),
InputDescriptor.create(
getInputClassName()).setUserPayload(getInputPayload()));
Utils.setEdgePropertyHistoryText(this, edgeProperty);
return edgeProperty;
}
/**
* This is a convenience method for creating an Edge descriptor based on the specified
* EdgeManagerDescriptor.
*
* @param edgeManagerDescriptor the custom edge specification
* @return an {@link org.apache.tez.dag.api.EdgeProperty} instance
*/
public EdgeProperty createDefaultCustomEdgeProperty(EdgeManagerPluginDescriptor edgeManagerDescriptor) {
Preconditions.checkNotNull(edgeManagerDescriptor, "EdgeManagerDescriptor cannot be null");
EdgeProperty edgeProperty =
EdgeProperty.create(edgeManagerDescriptor, EdgeProperty.DataSourceType.PERSISTED,
EdgeProperty.SchedulingType.SEQUENTIAL,
OutputDescriptor.create(getOutputClassName()).setUserPayload(getOutputPayload()),
InputDescriptor.create(getInputClassName()).setUserPayload(getInputPayload()));
Utils.setEdgePropertyHistoryText(this, edgeProperty);
return edgeProperty;
}
@InterfaceAudience.Public
@InterfaceStability.Evolving
public static class Builder extends HadoopKeyValuesBasedBaseEdgeConfig.Builder<Builder> {
private final OrderedPartitionedKVOutputConfig.Builder outputBuilder =
new OrderedPartitionedKVOutputConfig.Builder();
private final OrderedPartitionedKVOutputConfig.SpecificBuilder<OrderedPartitionedKVEdgeConfig.Builder>
specificOutputBuilder =
new OrderedPartitionedKVOutputConfig.SpecificBuilder<OrderedPartitionedKVEdgeConfig.Builder>(
this, outputBuilder);
private final OrderedGroupedKVInputConfig.Builder inputBuilder =
new OrderedGroupedKVInputConfig.Builder();
private final OrderedGroupedKVInputConfig.SpecificBuilder<OrderedPartitionedKVEdgeConfig.Builder>
specificInputBuilder =
new OrderedGroupedKVInputConfig.SpecificBuilder<OrderedPartitionedKVEdgeConfig.Builder>(this,
inputBuilder);
@InterfaceAudience.Private
Builder(String keyClassName, String valueClassName, String partitionerClassName,
Map<String, String> partitionerConf) {
outputBuilder.setKeyClassName(keyClassName);
outputBuilder.setValueClassName(valueClassName);
outputBuilder.setPartitioner(partitionerClassName, partitionerConf);
inputBuilder.setKeyClassName(keyClassName);
inputBuilder.setValueClassName(valueClassName);
}
/**
* Set the key comparator class
*
* @param comparatorClassName the key comparator class name
* @return instance of the current builder
*/
public Builder setKeyComparatorClass(String comparatorClassName) {
return setKeyComparatorClass(comparatorClassName, null);
}
/**
* Set the key comparator class and it's associated configuration. This method should only be
* used if the comparator requires some specific configuration, which is typically not the
* case. {@link #setKeyComparatorClass(String)} is the preferred method for setting a
* comparator.
*
* @param comparatorClassName the key comparator class name
* @param comparatorConf the comparator configuration. This can be null, and is a {@link
* java.util.Map} of key-value pairs. The keys should be limited to
* the ones required by the comparator.
* @return instance of the current builder
*/
public Builder setKeyComparatorClass(String comparatorClassName,
@Nullable Map<String, String> comparatorConf) {
outputBuilder.setKeyComparatorClass(comparatorClassName, comparatorConf);
inputBuilder.setKeyComparatorClass(comparatorClassName, comparatorConf);
return this;
}
/**
* Set serialization class and the relevant comparator to be used for sorting.
* Providing custom serialization class could change the way, keys needs to be compared in
* sorting. Providing invalid comparator here could create invalid results.
*
* @param serializationClassName
* @param comparatorClassName
* @param serializerConf the serializer configuration. This can be null, and is a
* {@link java.util.Map} of key-value pairs. The keys should be limited
* to the ones required by the comparator.
* @return this object for further chained method calls
*/
public Builder setKeySerializationClass(String serializationClassName,
String comparatorClassName, @Nullable Map<String, String> serializerConf) {
outputBuilder.setKeySerializationClass(serializationClassName, comparatorClassName, serializerConf);
inputBuilder.setKeySerializationClass(serializationClassName, comparatorClassName, serializerConf);
return this;
}
/**
* Set serialization class responsible for providing serializer/deserializer for values.
*
* @param serializationClassName
* @param serializerConf the serializer configuration. This can be null, and is a
* {@link java.util.Map} of key-value pairs. The keys should be limited
* to the ones required by the comparator.
* @return this object for further chained method calls
*/
public Builder setValueSerializationClass(String serializationClassName,
@Nullable Map<String, String> serializerConf) {
outputBuilder.setValueSerializationClass(serializationClassName, serializerConf);
inputBuilder.setValueSerializationClass(serializationClassName, serializerConf);
return this;
}
@Override
public Builder setCompression(boolean enabled, @Nullable String compressionCodec,
@Nullable Map<String, String> codecConf) {
outputBuilder.setCompression(enabled, compressionCodec, codecConf);
inputBuilder.setCompression(enabled, compressionCodec, codecConf);
return this;
}
@Override
public Builder setAdditionalConfiguration(String key, String value) {
outputBuilder.setAdditionalConfiguration(key, value);
inputBuilder.setAdditionalConfiguration(key, value);
return this;
}
@Override
public Builder setAdditionalConfiguration(Map<String, String> confMap) {
outputBuilder.setAdditionalConfiguration(confMap);
inputBuilder.setAdditionalConfiguration(confMap);
return this;
}
@Override
/**
* Edge config options are derived from client-side tez-site.xml (recommended).
* Optionally invoke setFromConfiguration to override these config options via commandline arguments.
*
* @param conf
* @return this object for further chained method calls
*/
public Builder setFromConfiguration(Configuration conf) {
outputBuilder.setFromConfiguration(conf);
inputBuilder.setFromConfiguration(conf);
return this;
}
@Override
public Builder setFromConfigurationUnfiltered(Configuration conf) {
outputBuilder.setFromConfigurationUnfiltered(conf);
inputBuilder.setFromConfigurationUnfiltered(conf);
return this;
}
/**
* Configure the specific output
* @return a builder to configure the output
*/
public OrderedPartitionedKVOutputConfig.SpecificBuilder<Builder> configureOutput() {
return specificOutputBuilder;
}
/**
* Configure the specific input
* @return a builder to configure the input
*/
public OrderedGroupedKVInputConfig.SpecificBuilder<Builder> configureInput() {
return specificInputBuilder;
}
/**
* Build and return an instance of the configuration
* @return an instance of the acatual configuration
*/
public OrderedPartitionedKVEdgeConfig build() {
return new OrderedPartitionedKVEdgeConfig(outputBuilder.build(), inputBuilder.build());
}
}
}