/* * * * * Licensed to the Apache Software Foundation (ASF) under one * * or more contributor license agreements. See the NOTICE file * * distributed with this work for additional information * * regarding copyright ownership. The ASF licenses this file * * to you under the Apache License, Version 2.0 (the * * "License"); you may not use this file except in compliance * * with the License. You may obtain a copy of the License at * * * * http://www.apache.org/licenses/LICENSE-2.0 * * * * Unless required by applicable law or agreed to in writing, software * * distributed under the License is distributed on an "AS IS" BASIS, * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * * See the License for the specific language governing permissions and * * limitations under the License. * */ package org.apache.tez.runtime.library.conf; import javax.annotation.Nullable; import java.io.IOException; import java.util.Map; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; import com.google.common.collect.Lists; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.tez.common.TezUtils; import org.apache.tez.dag.api.UserPayload; import org.apache.tez.runtime.library.api.TezRuntimeConfiguration; import org.apache.tez.runtime.library.common.ConfigUtils; import org.apache.tez.runtime.library.output.OrderedPartitionedKVOutput; @InterfaceAudience.Public @InterfaceStability.Evolving /** * Configure {@link org.apache.tez.runtime.library.output.OrderedPartitionedKVOutput} </p> * * Values will be picked up from tez-site if not specified, otherwise defaults from * {@link org.apache.tez.runtime.library.api.TezRuntimeConfiguration} will be used. */ public class OrderedPartitionedKVOutputConfig { /** * Currently supported sorter implementations */ public enum SorterImpl { /** Legacy sorter implementation based on Hadoop MapReduce shuffle impl. * Restricted to 2 GB memory limits. */ LEGACY, /** Pipeline sorter - a more efficient sorter that supports > 2 GB sort buffers */ PIPELINED } /** * Configure parameters which are specific to the Output. */ @InterfaceAudience.Private public static interface SpecificConfigBuilder<T> extends BaseConfigBuilder<T> { /** * Set the buffer size to use when sort the output * * @param sortBufferSize the size of the buffer in MB * @return instance of the current builder */ public T setSortBufferSize(int sortBufferSize); /** * Configure the combiner class * * @param combinerClassName the combiner class name * @return instance of the current builder */ public T setCombiner(String combinerClassName); /** * Configure the combiner class and it's associated configuration (specified as key-value * pairs). This method should only be used if the combiner requires some specific configuration. * {@link #setCombiner(String)} is the preferred method for setting a combiner. * * @param combinerClassName the combiner class name * @param combinerConf the combiner configuration. This can be null, and otherwise * is a {@link java.util.Map} of key-value pairs. The keys should * be limited to the ones required by the combiner. * @return instance of the current builder */ public T setCombiner(String combinerClassName, @Nullable Map<String, String> combinerConf); /** * Configure the number of threads to be used by the sorter * * @param numThreads the number of threads * @return instance of the current builder */ public T setSorterNumThreads(int numThreads); /** * Configure which sorter implementation to be used * * @param sorterImpl Use an in-built sorter implementations. * @return instance of the current builder */ public T setSorter(SorterImpl sorterImpl); } @SuppressWarnings("rawtypes") @InterfaceAudience.Public @InterfaceStability.Evolving public static class SpecificBuilder<E extends HadoopKeyValuesBasedBaseEdgeConfig.Builder> implements SpecificConfigBuilder<SpecificBuilder> { private final E edgeBuilder; private final Builder builder; SpecificBuilder(E edgeBuilder, Builder builder) { this.edgeBuilder = edgeBuilder; this.builder = builder; } @Override public SpecificBuilder<E> setSortBufferSize(int sortBufferSize) { builder.setSortBufferSize(sortBufferSize); return this; } public SpecificBuilder<E> setCombiner(String combinerClassName) { return this.setCombiner(combinerClassName, null); } @Override public SpecificBuilder<E> setCombiner(String combinerClassName, Map<String, String> combinerConf) { builder.setCombiner(combinerClassName, combinerConf); return this; } @Override public SpecificBuilder<E> setSorterNumThreads(int numThreads) { builder.setSorterNumThreads(numThreads); return this; } @Override public SpecificBuilder setSorter(SorterImpl sorterImpl) { builder.setSorter(sorterImpl); return this; } @Override public SpecificBuilder<E> setAdditionalConfiguration(String key, String value) { builder.setAdditionalConfiguration(key, value); return this; } @Override public SpecificBuilder<E> setAdditionalConfiguration(Map<String, String> confMap) { builder.setAdditionalConfiguration(confMap); return this; } @Override public SpecificBuilder<E> setFromConfiguration(Configuration conf) { builder.setFromConfiguration(conf); return this; } @Override public SpecificBuilder setFromConfigurationUnfiltered( Configuration conf) { builder.setFromConfigurationUnfiltered(conf); return this; } public E done() { return edgeBuilder; } } @InterfaceAudience.Private @VisibleForTesting Configuration conf; @InterfaceAudience.Private @VisibleForTesting OrderedPartitionedKVOutputConfig() { } private OrderedPartitionedKVOutputConfig(Configuration conf) { this.conf = conf; } /** * Get a UserPayload representation of the Configuration * @return a {@link org.apache.tez.dag.api.UserPayload} instance */ public UserPayload toUserPayload() { try { return TezUtils.createUserPayloadFromConf(conf); } catch (IOException e) { throw new RuntimeException(e); } } @InterfaceAudience.Private public void fromUserPayload(UserPayload payload) { try { this.conf = TezUtils.createConfFromUserPayload(payload); } catch (IOException e) { throw new RuntimeException(e); } } @InterfaceAudience.Private String toHistoryText() { if (conf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_CONVERT_USER_PAYLOAD_TO_HISTORY_TEXT, TezRuntimeConfiguration.TEZ_RUNTIME_CONVERT_USER_PAYLOAD_TO_HISTORY_TEXT_DEFAULT)) { return TezUtils.convertToHistoryText(conf); } return null; } public static Builder newBuilder(String keyClass, String valueClass, String partitionerClassName) { return newBuilder(keyClass, valueClass, partitionerClassName, null); } public static Builder newBuilder(String keyClass, String valueClass, String partitionerClassName, Map<String, String> partitionerConf) { return new Builder(keyClass, valueClass, partitionerClassName, partitionerConf); } @InterfaceAudience.Public @InterfaceStability.Evolving public static class Builder implements SpecificConfigBuilder<Builder> { private final Configuration conf = new Configuration(false); /** * Create a configuration builder for {@link org.apache.tez.runtime.library.output.OrderedPartitionedKVOutput} * * @param keyClassName the key class name * @param valueClassName the value class name * @param partitionerClassName the partitioner class name * @param partitionerConf the partitioner configuration. This can be null, and is a {@link * java.util.Map} of key-value pairs. The keys should be limited to * the ones required by the partitioner. */ @InterfaceAudience.Private Builder(String keyClassName, String valueClassName, String partitionerClassName, @Nullable Map<String, String> partitionerConf) { this(); Preconditions.checkNotNull(keyClassName, "Key class name cannot be null"); Preconditions.checkNotNull(valueClassName, "Value class name cannot be null"); Preconditions.checkNotNull(partitionerClassName, "Partitioner class name cannot be null"); setKeyClassName(keyClassName); setValueClassName(valueClassName); setPartitioner(partitionerClassName, partitionerConf); } @InterfaceAudience.Private Builder() { Map<String, String> tezDefaults = ConfigUtils .extractConfigurationMap(TezRuntimeConfiguration.getTezRuntimeConfigDefaults(), OrderedPartitionedKVOutput.getConfigurationKeySet()); ConfigUtils.addConfigMapToConfiguration(this.conf, tezDefaults); ConfigUtils.addConfigMapToConfiguration(this.conf, TezRuntimeConfiguration.getOtherConfigDefaults()); } @InterfaceAudience.Private Builder setKeyClassName(String keyClassName) { Preconditions.checkNotNull(keyClassName, "Key class name cannot be null"); this.conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS, keyClassName); return this; } @InterfaceAudience.Private Builder setValueClassName(String valueClassName) { Preconditions.checkNotNull(valueClassName, "Value class name cannot be null"); this.conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS, valueClassName); return this; } @InterfaceAudience.Private Builder setPartitioner(String partitionerClassName, @Nullable Map<String, String> partitionerConf) { Preconditions.checkNotNull(partitionerClassName, "Partitioner class name cannot be null"); this.conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_PARTITIONER_CLASS, partitionerClassName); if (partitionerConf != null) { // Merging the confs for now. Change to be specific in the future. ConfigUtils.mergeConfsWithExclusions(this.conf, partitionerConf, TezRuntimeConfiguration.getRuntimeConfigKeySet()); } return this; } @Override public Builder setSortBufferSize(int sortBufferSize) { this.conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB, sortBufferSize); return this; } @Override public Builder setCombiner(String combinerClassName) { return this.setCombiner(combinerClassName, null); } @Override public Builder setCombiner(String combinerClassName, Map<String, String> combinerConf) { this.conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_COMBINER_CLASS, combinerClassName); if (combinerConf != null) { // Merging the confs for now. Change to be specific in the future. ConfigUtils.mergeConfsWithExclusions(this.conf, combinerConf, TezRuntimeConfiguration.getRuntimeConfigKeySet()); } return this; } @Override public Builder setSorterNumThreads(int numThreads) { this.conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_PIPELINED_SORTER_SORT_THREADS, numThreads); return this; } @Override public Builder setSorter(SorterImpl sorterImpl) { Preconditions.checkNotNull(sorterImpl, "Sorter cannot be null"); this.conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_SORTER_CLASS, sorterImpl.name()); return this; } @SuppressWarnings("unchecked") @Override public Builder setAdditionalConfiguration(String key, String value) { Preconditions.checkNotNull(key, "Key cannot be null"); if (ConfigUtils.doesKeyQualify(key, Lists.newArrayList(OrderedPartitionedKVOutput.getConfigurationKeySet(), TezRuntimeConfiguration.getRuntimeAdditionalConfigKeySet()), TezRuntimeConfiguration.getAllowedPrefixes())) { if (value == null) { this.conf.unset(key); } else { this.conf.set(key, value); } } return this; } @SuppressWarnings("unchecked") @Override public Builder setAdditionalConfiguration(Map<String, String> confMap) { Preconditions.checkNotNull(confMap, "ConfMap cannot be null"); Map<String, String> map = ConfigUtils.extractConfigurationMap(confMap, Lists.newArrayList(OrderedPartitionedKVOutput.getConfigurationKeySet(), TezRuntimeConfiguration.getRuntimeAdditionalConfigKeySet()), TezRuntimeConfiguration.getAllowedPrefixes()); ConfigUtils.addConfigMapToConfiguration(this.conf, map); return this; } @SuppressWarnings("unchecked") @Override public Builder setFromConfiguration(Configuration conf) { // Maybe ensure this is the first call ? Otherwise this can end up overriding other parameters Preconditions.checkArgument(conf != null, "Configuration cannot be null"); Map<String, String> map = ConfigUtils.extractConfigurationMap(conf, Lists.newArrayList(OrderedPartitionedKVOutput.getConfigurationKeySet(), TezRuntimeConfiguration.getRuntimeAdditionalConfigKeySet()), TezRuntimeConfiguration.getAllowedPrefixes()); ConfigUtils.addConfigMapToConfiguration(this.conf, map); return this; } @SuppressWarnings("unchecked") @Override public Builder setFromConfigurationUnfiltered(Configuration conf) { // Maybe ensure this is the first call ? Otherwise this can end up overriding other parameters Preconditions.checkArgument(conf != null, "Configuration cannot be null"); ConfigUtils.mergeConfs(this.conf, conf); return this; } /** * Set the key comparator class * * @param comparatorClassName the key comparator class name * @return instance of the current builder */ public Builder setKeyComparatorClass(String comparatorClassName) { return this.setKeyComparatorClass(comparatorClassName, null); } /** * Set the key comparator class and it's associated configuration. This method should only be * used if the comparator requires some specific configuration, which is typically not the * case. {@link #setKeyComparatorClass(String)} is the preferred method for setting a * comparator. * * @param comparatorClassName the key comparator class name * @param comparatorConf the comparator configuration. This can be null, and is a {@link * java.util.Map} of key-value pairs. The keys should be limited to * the ones required by the comparator. * @return instance of the current builder */ public Builder setKeyComparatorClass(String comparatorClassName, @Nullable Map<String, String> comparatorConf) { Preconditions.checkNotNull(comparatorClassName, "Comparator class name cannot be null"); this.conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_COMPARATOR_CLASS, comparatorClassName); if (comparatorConf != null) { // Merging the confs for now. Change to be specific in the future. ConfigUtils.mergeConfsWithExclusions(this.conf, comparatorConf, TezRuntimeConfiguration.getRuntimeConfigKeySet()); } return this; } public Builder setCompression(boolean enabled, @Nullable String compressionCodec, @Nullable Map<String, String> codecConf) { this.conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_COMPRESS, enabled); if (enabled && compressionCodec != null) { this.conf .set(TezRuntimeConfiguration.TEZ_RUNTIME_COMPRESS_CODEC, compressionCodec); } if (codecConf != null) { // Merging the confs for now. Change to be specific in the future. ConfigUtils.mergeConfsWithExclusions(this.conf, codecConf, TezRuntimeConfiguration.getRuntimeConfigKeySet()); } return this; } /** * Set serialization class and the relevant comparator to be used for sorting. * Providing custom serialization class could change the way, keys needs to be compared in * sorting. Providing invalid comparator here could create invalid results. * * @param serializationClassName * @param comparatorClassName * @param serializerConf the serializer configuration. This can be null, and is a * {@link java.util.Map} of key-value pairs. The keys should be limited * to the ones required by the comparator. * @return this object for further chained method calls */ public Builder setKeySerializationClass(String serializationClassName, String comparatorClassName, @Nullable Map<String, String> serializerConf) { Preconditions.checkArgument(serializationClassName != null, "serializationClassName cannot be null"); Preconditions.checkArgument(comparatorClassName != null, "comparator cannot be null"); this.conf.set(CommonConfigurationKeys.IO_SERIALIZATIONS_KEY, serializationClassName + "," + conf.get(CommonConfigurationKeys.IO_SERIALIZATIONS_KEY)); setKeyComparatorClass(comparatorClassName, null); if (serializerConf != null) { // Merging the confs for now. Change to be specific in the future. ConfigUtils.mergeConfsWithExclusions(this.conf, serializerConf, TezRuntimeConfiguration.getRuntimeConfigKeySet()); } return this; } /** * Set serialization class responsible for providing serializer/deserializer for values. * * @param serializationClassName * @param serializerConf the serializer configuration. This can be null, and is a * {@link java.util.Map} of key-value pairs. The keys should be limited * to the ones required by the comparator. * @return this object for further chained method calls */ public Builder setValueSerializationClass(String serializationClassName, @Nullable Map<String, String> serializerConf) { Preconditions.checkArgument(serializationClassName != null, "serializationClassName cannot be null"); this.conf.set(CommonConfigurationKeys.IO_SERIALIZATIONS_KEY, serializationClassName + "," + conf.get(CommonConfigurationKeys.IO_SERIALIZATIONS_KEY)); if (serializerConf != null) { // Merging the confs for now. Change to be specific in the future. ConfigUtils.mergeConfsWithExclusions(this.conf, serializerConf, TezRuntimeConfiguration.getRuntimeConfigKeySet()); } return this; } /** * Create the actual configuration instance. * * @return an instance of the Configuration */ public OrderedPartitionedKVOutputConfig build() { return new OrderedPartitionedKVOutputConfig(this.conf); } } }