MRInput.java example

Explorer
tez-master
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.tez.mapreduce.input;

import javax.annotation.Nullable;

import java.io.IOException;
import java.net.URI;
import java.util.Collection;
import java.util.List;
import java.util.concurrent.locks.Condition;
import java.util.concurrent.locks.ReentrantLock;

import com.google.protobuf.ByteString;

import org.apache.tez.runtime.api.ProgressFailedException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.classification.InterfaceAudience.Private;
import org.apache.hadoop.classification.InterfaceAudience.Public;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.split.JobSplit.TaskSplitIndex;
import org.apache.hadoop.mapreduce.split.JobSplit.TaskSplitMetaInfo;
import org.apache.hadoop.security.Credentials;
import org.apache.tez.common.TezUtils;
import org.apache.tez.common.counters.TaskCounter;
import org.apache.tez.dag.api.DataSourceDescriptor;
import org.apache.tez.dag.api.InputDescriptor;
import org.apache.tez.dag.api.InputInitializerDescriptor;
import org.apache.tez.dag.api.TezUncheckedException;
import org.apache.tez.dag.api.UserPayload;
import org.apache.tez.dag.api.VertexLocationHint;
import org.apache.tez.mapreduce.common.MRInputAMSplitGenerator;
import org.apache.tez.mapreduce.common.MRInputSplitDistributor;
import org.apache.tez.mapreduce.hadoop.InputSplitInfo;
import org.apache.tez.mapreduce.hadoop.MRHelpers;
import org.apache.tez.mapreduce.hadoop.MRInputHelpers;
import org.apache.tez.mapreduce.hadoop.MRJobConfig;
import org.apache.tez.mapreduce.input.base.MRInputBase;
import org.apache.tez.mapreduce.lib.MRInputUtils;
import org.apache.tez.mapreduce.lib.MRReader;
import org.apache.tez.mapreduce.lib.MRReaderMapReduce;
import org.apache.tez.mapreduce.lib.MRReaderMapred;
import org.apache.tez.mapreduce.protos.MRRuntimeProtos;
import org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRSplitProto;
import org.apache.tez.runtime.api.Event;
import org.apache.tez.runtime.api.Input;
import org.apache.tez.runtime.api.InputContext;
import org.apache.tez.runtime.api.events.InputDataInformationEvent;
import org.apache.tez.runtime.library.api.KeyValueReader;
import org.apache.tez.runtime.library.api.TezRuntimeConfiguration;

import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;

/**
 * {@link MRInput} is an {@link Input} which provides key/values pairs
 * for the consumer.
 *
 * It is compatible with all standard Apache Hadoop MapReduce 
 * {@link InputFormat} implementations.
 * 
 * This class is not meant to be extended by external projects.
 */
@Public
public class MRInput extends MRInputBase {

  @Private public static final String TEZ_MAPREDUCE_DAG_INDEX = "tez.mapreduce.dag.index";
  @Private public static final String TEZ_MAPREDUCE_DAG_NAME = "tez.mapreduce.dag.name";
  @Private public static final String TEZ_MAPREDUCE_VERTEX_INDEX = "tez.mapreduce.vertex.index";
  @Private public static final String TEZ_MAPREDUCE_VERTEX_NAME = "tez.mapreduce.vertex.name";
  @Private public static final String TEZ_MAPREDUCE_TASK_INDEX = "tez.mapreduce.task.index";
  @Private public static final String TEZ_MAPREDUCE_TASK_ATTEMPT_INDEX = "tez.mapreduce.task.attempt.index";
  @Private public static final String TEZ_MAPREDUCE_INPUT_INDEX = "tez.mapreduce.input.index";
  @Private public static final String TEZ_MAPREDUCE_INPUT_NAME = "tez.mapreduce.input.name";
  @Private public static final String TEZ_MAPREDUCE_APPLICATION_ID = "tez.mapreduce.application.id";
  @Private public static final String TEZ_MAPREDUCE_UNIQUE_IDENTIFIER = "tez.mapreduce.unique.identifier";
  @Private public static final String TEZ_MAPREDUCE_DAG_ATTEMPT_NUMBER = "tez.mapreduce.dag.attempt.number";
  @Private public static final String TEZ_MAPREDUCE_DAG_ID= "tez.mapreduce.dag.id";
  @Private public static final String TEZ_MAPREDUCE_VERTEX_ID = "tez.mapreduce.vertex.id";
  @Private public static final String TEZ_MAPREDUCE_TASK_ID = "tez.mapreduce.task.id";
  @Private public static final String TEZ_MAPREDUCE_TASK_ATTEMPT_ID = "tez.mapreduce.task.attempt.id";



  /**
   * Helper class to configure {@link MRInput}
   *
   */
  public static class MRInputConfigBuilder {
    final Configuration conf;
    final Class<?> inputFormat;
    final boolean inputFormatProvided;
    boolean useNewApi;
    boolean groupSplitsInAM = true;
    boolean sortSplitsInAM = true;
    boolean generateSplitsInAM = true;
    String inputClassName = MRInput.class.getName();
    boolean getCredentialsForSourceFilesystem = true;
    String inputPaths = null;
    InputInitializerDescriptor customInitializerDescriptor = null;

    MRInputConfigBuilder(Configuration conf, Class<?> inputFormatParam) {
      this.conf = conf;
      if (inputFormatParam != null) {
        inputFormatProvided = true;
        this.inputFormat = inputFormatParam;
        if (org.apache.hadoop.mapred.InputFormat.class.isAssignableFrom(inputFormatParam)) {
          useNewApi = false;
        } else if (org.apache.hadoop.mapreduce.InputFormat.class.isAssignableFrom(inputFormatParam)) {
          useNewApi = true;
        } else {
          throw new TezUncheckedException("inputFormat must be assignable from either " +
              "org.apache.hadoop.mapred.InputFormat or " +
              "org.apache.hadoop.mapreduce.InputFormat" +
              " Given: " + inputFormatParam.getName());
        }
      } else {
        inputFormatProvided = false;
        useNewApi = conf.getBoolean(MRJobConfig.NEW_API_MAPPER_CONFIG, true);
        try {
          if (useNewApi) {
            this.inputFormat = conf.getClassByName(conf.get(MRJobConfig.INPUT_FORMAT_CLASS_ATTR));
            Preconditions.checkState(org.apache.hadoop.mapreduce.InputFormat.class
                .isAssignableFrom(this.inputFormat));
          } else {
            this.inputFormat = conf.getClassByName(conf.get("mapred.input.format.class"));
            Preconditions.checkState(org.apache.hadoop.mapred.InputFormat.class
                .isAssignableFrom(this.inputFormat));
          }
        } catch (ClassNotFoundException e) {
          throw new TezUncheckedException(e);
        }
        initializeInputPath();
      }
    }
    
    MRInputConfigBuilder setInputClassName(String className) {
      this.inputClassName = className;
      return this;
    }

    private MRInputConfigBuilder setInputPaths(String inputPaths) {
      if (!(org.apache.hadoop.mapred.FileInputFormat.class.isAssignableFrom(inputFormat) || 
          FileInputFormat.class.isAssignableFrom(inputFormat))) {
        throw new TezUncheckedException("When setting inputPaths the inputFormat must be " + 
            "assignable from either org.apache.hadoop.mapred.FileInputFormat or " +
            "org.apache.hadoop.mapreduce.lib.input.FileInputFormat. " +
            "Otherwise use the non-path configBuilder." +
            " Given: " + inputFormat.getName());
      }
      conf.set(FileInputFormat.INPUT_DIR, inputPaths);
      this.inputPaths = inputPaths;
      return this;
    }

    private void initializeInputPath() {
      Preconditions.checkState(inputFormatProvided == false,
          "Should only be invoked when no inputFormat is provided");
      if (org.apache.hadoop.mapred.FileInputFormat.class.isAssignableFrom(inputFormat) ||
          FileInputFormat.class.isAssignableFrom(inputFormat)) {
        inputPaths = conf.get(FileInputFormat.INPUT_DIR);
      }
    }

    /**
     * Set whether splits should be grouped (default true)
     * @param value whether to group splits in the AM or not
     * @return {@link org.apache.tez.mapreduce.input.MRInput.MRInputConfigBuilder}
     */
    public MRInputConfigBuilder groupSplits(boolean value) {
      groupSplitsInAM = value;
      return this;
    }

    /**
     * Set whether splits should be sorted (default true)
     * @param value whether to sort splits in the AM or not
     * @return {@link org.apache.tez.mapreduce.input.MRInput.MRInputConfigBuilder}
     */
    public MRInputConfigBuilder sortSplits(boolean value) {
      sortSplitsInAM = value;
      return this;
    }

    /**
     * Set whether splits should be generated in the Tez App Master (default true)
     * @param value whether to generate splits in the AM or not
     * @return {@link org.apache.tez.mapreduce.input.MRInput.MRInputConfigBuilder}
     */
    public MRInputConfigBuilder generateSplitsInAM(boolean value) {
      generateSplitsInAM = value;
      return this;
    }

    /**
     * Get the credentials for the inputPaths from their {@link FileSystem}s
     * Use the method to turn this off when not using a {@link FileSystem}
     * or when {@link Credentials} are not supported
     * @param value whether to get credentials or not. (true by default)
     * @return {@link org.apache.tez.mapreduce.input.MRInput.MRInputConfigBuilder}
     */
    public MRInputConfigBuilder getCredentialsForSourceFileSystem(boolean value) {
      getCredentialsForSourceFilesystem = value;
      return this;
    }

    /**
     * This method is intended to be used in case a custom {@link org.apache.tez.runtime.api.InputInitializer}
     * is being used along with MRInput. If a custom descriptor is used, the config builder will not be
     * able to setup location hints, parallelism, etc, and configuring the {@link
     * org.apache.tez.dag.api.Vertex} on which this Input is used is the responsibility of the user.
     *
     * Credential fetching can be controlled via the {@link #getCredentialsForSourceFilesystem} method.
     * Whether grouping is enabled or not can be controlled via {@link #groupSplitsInAM} method.
     *
     * @param customInitializerDescriptor the initializer descriptor
     * @return {@link org.apache.tez.mapreduce.input.MRInput.MRInputConfigBuilder}
     */
    public MRInputConfigBuilder setCustomInitializerDescriptor(
        InputInitializerDescriptor customInitializerDescriptor) {
      this.customInitializerDescriptor = customInitializerDescriptor;
      return this;
    }

    /**
     * Create the {@link DataSourceDescriptor}
     *
     * @return {@link DataSourceDescriptor}
     */
    public DataSourceDescriptor build() {
      if (org.apache.hadoop.mapred.FileInputFormat.class.isAssignableFrom(inputFormat) ||
          FileInputFormat.class.isAssignableFrom(inputFormat)) {
        if (inputPaths == null) {
          throw new TezUncheckedException(
              "InputPaths must be specified for InputFormats based on " +
                  FileInputFormat.class.getName() + " or " +
                  org.apache.hadoop.mapred.FileInputFormat.class.getName());
        }
      }
      try {
        if (this.customInitializerDescriptor != null) {
          return createCustomDataSource();
        } else {
          if (generateSplitsInAM) {
            return createGeneratorDataSource();
          } else {
            return createDistributorDataSource();
          }
        }
      } catch (Exception e) {
        throw new TezUncheckedException(e);
      }
    }
    
    private DataSourceDescriptor createDistributorDataSource() throws IOException {
      InputSplitInfo inputSplitInfo;
      setupBasicConf(conf);
      try {
        inputSplitInfo = MRInputHelpers.generateInputSplitsToMem(conf, false, true, 0);
      } catch (Exception e) {
        throw new TezUncheckedException(e);
      }
      MRHelpers.translateMRConfToTez(conf);

      UserPayload payload = MRInputHelpersInternal.createMRInputPayload(conf,
          inputSplitInfo.getSplitsProto());
      Credentials credentials = null;
      if (getCredentialsForSourceFilesystem && inputSplitInfo.getCredentials() != null) {
        credentials = inputSplitInfo.getCredentials();
      }
      DataSourceDescriptor ds = DataSourceDescriptor.create(
          InputDescriptor.create(inputClassName).setUserPayload(payload),
          InputInitializerDescriptor.create(MRInputSplitDistributor.class.getName()),
          inputSplitInfo.getNumTasks(), credentials,
          VertexLocationHint.create(inputSplitInfo.getTaskLocationHints()), null);
      if (conf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_CONVERT_USER_PAYLOAD_TO_HISTORY_TEXT,
          TezRuntimeConfiguration.TEZ_RUNTIME_CONVERT_USER_PAYLOAD_TO_HISTORY_TEXT_DEFAULT)) {
        ds.getInputDescriptor().setHistoryText(TezUtils.convertToHistoryText(conf));
      }

      return ds;
    }

    private DataSourceDescriptor createCustomDataSource() throws IOException {
      setupBasicConf(conf);

      MRHelpers.translateMRConfToTez(conf);

      Collection<URI> uris = maybeGetURIsForCredentials();

      UserPayload payload = MRInputHelpersInternal.createMRInputPayload(
          conf, groupSplitsInAM, sortSplitsInAM);

      DataSourceDescriptor ds = DataSourceDescriptor
          .create(InputDescriptor.create(inputClassName).setUserPayload(payload),
              customInitializerDescriptor, null);

      if (conf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_CONVERT_USER_PAYLOAD_TO_HISTORY_TEXT,
          TezRuntimeConfiguration.TEZ_RUNTIME_CONVERT_USER_PAYLOAD_TO_HISTORY_TEXT_DEFAULT)) {
        ds.getInputDescriptor().setHistoryText(TezUtils.convertToHistoryText(conf));
      }

      if (uris != null) {
        ds.addURIsForCredentials(uris);
      }
      return ds;
    }

    private DataSourceDescriptor createGeneratorDataSource() throws IOException {
      setupBasicConf(conf);
      MRHelpers.translateMRConfToTez(conf);
      
      Collection<URI> uris = maybeGetURIsForCredentials();

      UserPayload payload = MRInputHelpersInternal.createMRInputPayload(
          conf, groupSplitsInAM, sortSplitsInAM);

      DataSourceDescriptor ds = DataSourceDescriptor.create(
          InputDescriptor.create(inputClassName).setUserPayload(payload),
          InputInitializerDescriptor.create(MRInputAMSplitGenerator.class.getName()), null);

      if (conf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_CONVERT_USER_PAYLOAD_TO_HISTORY_TEXT,
          TezRuntimeConfiguration.TEZ_RUNTIME_CONVERT_USER_PAYLOAD_TO_HISTORY_TEXT_DEFAULT)) {
        ds.getInputDescriptor().setHistoryText(TezUtils.convertToHistoryText(conf));
      }

      if (uris != null) {
        ds.addURIsForCredentials(uris);
      }
      return ds;
    }

    private void setupBasicConf(Configuration inputConf) {
      if (inputFormatProvided) {
        inputConf.setBoolean(MRJobConfig.NEW_API_MAPPER_CONFIG, useNewApi);
        if (useNewApi) {
          inputConf.set(MRJobConfig.INPUT_FORMAT_CLASS_ATTR, inputFormat.getName());
        } else {
          inputConf.set("mapred.input.format.class", inputFormat.getName());
        }
      }
    }

    private Collection<URI> maybeGetURIsForCredentials() {
      if (getCredentialsForSourceFilesystem && inputPaths != null) {
        try {
          List<URI> uris = Lists.newLinkedList();
          for (String inputPath : inputPaths.split(",")) {
            Path path = new Path(inputPath);
            FileSystem fs;
            fs = path.getFileSystem(conf);
            Path qPath = fs.makeQualified(path);
            uris.add(qPath.toUri());
          }
          return uris;
        } catch (IOException e) {
          throw new TezUncheckedException(e);
        }
      }
      return null;
    }

  }

  /**
   * Create an {@link org.apache.tez.mapreduce.input.MRInput.MRInputConfigBuilder} </p>
   * The preferred usage model is to provide all of the parameters, and use methods to configure
   * the Input.
   * <p/>
   * For legacy applications, which may already have a fully configured {@link Configuration}
   * instance, the inputFormat can be specified as null
   *
   * @param conf        Configuration for the {@link MRInput}. This configuration instance will be
   *                    modified in place
   * @param inputFormat InputFormat derived class. This can be null. If the InputFormat specified
   *                    is
   *                    null, the provided configuration should be complete.
   * @return {@link org.apache.tez.mapreduce.input.MRInput.MRInputConfigBuilder}
   */
  public static MRInputConfigBuilder createConfigBuilder(Configuration conf,
                                                         @Nullable Class<?> inputFormat) {
    return new MRInputConfigBuilder(conf, inputFormat);
  }

  /**
   * Create an {@link org.apache.tez.mapreduce.input.MRInput.MRInputConfigBuilder} 
   * for {@link org.apache.hadoop.mapreduce.lib.input.FileInputFormat}
   * or {@link org.apache.hadoop.mapred.FileInputFormat} format based InputFormats.
   * <p/>
   * The preferred usage model is to provide all of the parameters, and use methods to configure
   * the Input.
   * <p/>
   * For legacy applications, which may already have a fully configured {@link Configuration}
   * instance, the inputFormat and inputPath can be specified as null
   *
   * @param conf        Configuration for the {@link MRInput}. This configuration instance will be
   *                    modified in place
   * @param inputFormat InputFormat derived class. This can be null. If the InputFormat specified
   *                    is
   *                    null, the provided configuration should be complete.
   * @param inputPaths  Comma separated input paths
   * @return {@link org.apache.tez.mapreduce.input.MRInput.MRInputConfigBuilder}
   */
  public static MRInputConfigBuilder createConfigBuilder(Configuration conf,
                                                         @Nullable Class<?> inputFormat,
                                                         @Nullable String inputPaths) {
    MRInputConfigBuilder configurer = new MRInputConfigBuilder(conf, inputFormat);
    if (inputPaths != null) {
      return configurer.setInputPaths(inputPaths);
    }
    return configurer;
  }

  private static final Logger LOG = LoggerFactory.getLogger(MRInput.class);
  
  private final ReentrantLock rrLock = new ReentrantLock();
  private final Condition rrInited = rrLock.newCondition();
  
  private volatile boolean eventReceived = false;

  private boolean readerCreated = false;

  protected MRReader mrReader;

  protected TaskSplitIndex splitMetaInfo = new TaskSplitIndex();

  // Potential counters - #splits, #totalSize, #actualyBytesRead
  
  @Private
  volatile boolean splitInfoViaEvents;

  public MRInput(InputContext inputContext, int numPhysicalInputs) {
    super(inputContext, numPhysicalInputs);
  }

  @Override
  public List<Event> initialize() throws IOException {
    super.initialize();
    getContext().inputIsReady();
    this.splitInfoViaEvents = jobConf.getBoolean(MRJobConfig.MR_TEZ_SPLITS_VIA_EVENTS,
        MRJobConfig.MR_TEZ_SPLITS_VIA_EVENTS_DEFAULT);
    LOG.info(getContext().getSourceVertexName() + " using newmapreduce API=" + useNewApi +
        ", split via event=" + splitInfoViaEvents + ", numPhysicalInputs=" +
        getNumPhysicalInputs());
    initializeInternal();
    return null;
  }

  @Override
  public void start() {
    Preconditions.checkState(getNumPhysicalInputs() == 0 || getNumPhysicalInputs() == 1,
        "Expecting 0 or 1 physical input for MRInput");
  }

  @Private
  void initializeInternal() throws IOException {
    // Primarily for visibility
    rrLock.lock();
    try {
      
      if (splitInfoViaEvents) {
        if (useNewApi) {
          mrReader = new MRReaderMapReduce(jobConf, getContext().getCounters(), inputRecordCounter,
              getContext().getApplicationId().getClusterTimestamp(), getContext()
                  .getTaskVertexIndex(), getContext().getApplicationId().getId(), getContext()
                  .getTaskIndex(), getContext().getTaskAttemptNumber(), getContext());
        } else {
          mrReader = new MRReaderMapred(jobConf, getContext().getCounters(), inputRecordCounter, 
              getContext());
        }
      } else {
        TaskSplitMetaInfo[] allMetaInfo = MRInputUtils.readSplits(jobConf);
        TaskSplitMetaInfo thisTaskMetaInfo = allMetaInfo[getContext().getTaskIndex()];
        TaskSplitIndex splitMetaInfo = new TaskSplitIndex(thisTaskMetaInfo.getSplitLocation(),
            thisTaskMetaInfo.getStartOffset());
        long splitLength = -1;
        if (useNewApi) {
          org.apache.hadoop.mapreduce.InputSplit newInputSplit = MRInputUtils
              .getNewSplitDetailsFromDisk(splitMetaInfo, jobConf, getContext().getCounters()
                  .findCounter(TaskCounter.SPLIT_RAW_BYTES));
          try {
            splitLength = newInputSplit.getLength();
          } catch (InterruptedException e) {
            LOG.warn("Got interrupted while reading split length: ", e);
          }
          mrReader = new MRReaderMapReduce(jobConf, newInputSplit, getContext().getCounters(),
              inputRecordCounter, getContext().getApplicationId().getClusterTimestamp(),
              getContext().getTaskVertexIndex(), getContext().getApplicationId().getId(),
              getContext().getTaskIndex(), getContext().getTaskAttemptNumber(), getContext());
        } else {
          org.apache.hadoop.mapred.InputSplit oldInputSplit = MRInputUtils
              .getOldSplitDetailsFromDisk(splitMetaInfo, jobConf, getContext().getCounters()
                  .findCounter(TaskCounter.SPLIT_RAW_BYTES));
          splitLength = oldInputSplit.getLength();
          mrReader =
              new MRReaderMapred(jobConf, oldInputSplit, getContext().getCounters(),
                  inputRecordCounter, getContext());
        }
        if (splitLength != -1) {
          getContext().getCounters().findCounter(TaskCounter.INPUT_SPLIT_LENGTH_BYTES)
              .increment(splitLength);
        }
      }
    } finally {
      rrLock.unlock();
    }
    LOG.info("Initialized MRInput: " + getContext().getSourceVertexName());
  }

  /**
   * Returns a {@link KeyValueReader} that can be used to read 
   * Map Reduce compatible key value data. An exception will be thrown if next()
   * is invoked after false, either from the framework or from the underlying InputFormat
   */
  @Override
  public KeyValueReader getReader() throws IOException {
    Preconditions
        .checkState(readerCreated == false,
            "Only a single instance of record reader can be created for this input.");
    readerCreated = true;
    if (getNumPhysicalInputs() == 0) {
      return new KeyValueReader() {
        @Override
        public boolean next() throws IOException {
          getContext().notifyProgress();
          return false;
        }

        @Override
        public Object getCurrentKey() throws IOException {
          return null;
        }

        @Override
        public Object getCurrentValue() throws IOException {
          return null;
        }
      };
    }
    rrLock.lock();
    try {
      if (!mrReader.isSetup())
        checkAndAwaitRecordReaderInitialization();
    } finally {
      rrLock.unlock();
    }

    return mrReader;
  }

  @Override
  public void handleEvents(List<Event> inputEvents) throws Exception {
    if (getNumPhysicalInputs() == 0) {
      throw new IllegalStateException(
          "Unexpected event. MRInput has been setup to receive 0 events");
    }
    if (eventReceived || inputEvents.size() != 1) {
      throw new IllegalStateException(
          "MRInput expects only a single input. Received: current eventListSize: "
              + inputEvents.size() + "Received previous input: "
              + eventReceived);
    }
    Event event = inputEvents.iterator().next();
    Preconditions.checkArgument(event instanceof InputDataInformationEvent,
        getClass().getSimpleName()
            + " can only handle a single event of type: "
            + InputDataInformationEvent.class.getSimpleName());

    processSplitEvent((InputDataInformationEvent) event);
  }

  @Override
  public List<Event> close() throws IOException {
    mrReader.close();
    long inputRecords = getContext().getCounters()
        .findCounter(TaskCounter.INPUT_RECORDS_PROCESSED).getValue();
    getContext().getStatisticsReporter().reportItemsProcessed(inputRecords);

    return null;
  }

  /**
   * {@link MRInput} sets some additional parameters like split location when using
   * the new API. This methods returns the list of additional updates, and
   * should be used by Processors using the old MapReduce API with {@link MRInput}.
   * 
   * @return the additional fields set by {@link MRInput}
   */
  public Configuration getConfigUpdates() {
    if (!useNewApi) {
      return ((MRReaderMapred) mrReader).getConfigUpdates();
    } else {
      return null;
    }
  }

  @Override
  public float getProgress() throws ProgressFailedException, InterruptedException {
    try {
      return (mrReader != null) ? mrReader.getProgress() : 0.0f;
    } catch (IOException e) {
      throw new ProgressFailedException("getProgress encountered IOException ", e);
    }
  }

  void processSplitEvent(InputDataInformationEvent event)
      throws IOException {
    rrLock.lock();
    try {
      initFromEventInternal(event);
      if (LOG.isDebugEnabled()) {
        LOG.debug(getContext().getSourceVertexName() + " notifying on RecordReader initialized");
      }
      rrInited.signal();
    } finally {
      rrLock.unlock();
    }
  }
  
  void checkAndAwaitRecordReaderInitialization() throws IOException {
    assert rrLock.getHoldCount() == 1;
    rrLock.lock();
    try {
      if (LOG.isDebugEnabled()) {
        LOG.debug(getContext().getSourceVertexName() + " awaiting RecordReader initialization");
      }
      rrInited.await();
    } catch (Exception e) {
      throw new IOException(
          "Interrupted waiting for RecordReader initiailization");
    } finally {
      rrLock.unlock();
    }
  }

  @Private
  void initFromEvent(InputDataInformationEvent initEvent)
      throws IOException {
    rrLock.lock();
    try {
      initFromEventInternal(initEvent);
    } finally {
      rrLock.unlock();
    }
  }
  
  private void initFromEventInternal(InputDataInformationEvent initEvent) throws IOException {
    if (LOG.isDebugEnabled()) {
      LOG.debug(getContext().getSourceVertexName() + " initializing RecordReader from event");
    }
    Preconditions.checkState(initEvent != null, "InitEvent must be specified");
    MRSplitProto splitProto = MRSplitProto.parseFrom(ByteString.copyFrom(initEvent.getUserPayload()));
    Object splitObj = null;
    long splitLength = -1;
    if (useNewApi) {
      InputSplit split = MRInputUtils.getNewSplitDetailsFromEvent(splitProto, jobConf);
      splitObj = split;
      try {
        splitLength = split.getLength();
      } catch (InterruptedException e) {
        LOG.warn("Thread interrupted while getting split length: ", e);
      }
      if (LOG.isDebugEnabled()) {
        LOG.debug(getContext().getSourceVertexName() + " split Details -> SplitClass: " +
            split.getClass().getName() + ", NewSplit: " + split + ", length: " + splitLength);
      }

    } else {
      org.apache.hadoop.mapred.InputSplit split =
          MRInputUtils.getOldSplitDetailsFromEvent(splitProto, jobConf);
      splitObj = split;
      splitLength = split.getLength();
      if (LOG.isDebugEnabled()) {
        LOG.debug(getContext().getSourceVertexName() + " split Details -> SplitClass: " +
            split.getClass().getName() + ", OldSplit: " + split + ", length: " + splitLength);
      }
    }
    if (splitLength != -1) {
      getContext().getCounters().findCounter(TaskCounter.INPUT_SPLIT_LENGTH_BYTES)
          .increment(splitLength);
    }
    mrReader.setSplit(splitObj);
    LOG.info(getContext().getSourceVertexName() + " initialized RecordReader from event");
  }

  private static class MRInputHelpersInternal extends MRInputHelpers {

    protected static UserPayload createMRInputPayload(Configuration conf,
        boolean isGrouped, boolean isSorted) throws IOException {
      return MRInputHelpers.createMRInputPayload(conf, null, isGrouped,
          isSorted);
    }

    protected static UserPayload createMRInputPayload(Configuration conf,
        MRRuntimeProtos.MRSplitsProto mrSplitsProto) throws
        IOException {
      return MRInputHelpers.createMRInputPayload(conf, mrSplitsProto, false,
          true);
    }
  }

}