CarbonInputSplitTaskInfo.java example

Explorer
carbondata-master
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.carbondata.hadoop.util;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.TreeMap;

import org.apache.carbondata.core.datastore.block.Distributable;
import org.apache.carbondata.hadoop.CarbonInputSplit;

public class CarbonInputSplitTaskInfo implements Distributable {

  private final List<CarbonInputSplit> carbonBlockInfoList;

  private final String taskId;

  public String getTaskId() {
    return taskId;
  }

  public List<CarbonInputSplit> getCarbonInputSplitList() {
    return carbonBlockInfoList;
  }

  public CarbonInputSplitTaskInfo(String taskId, List<CarbonInputSplit> carbonSplitListInfo) {
    this.taskId = taskId;
    this.carbonBlockInfoList = carbonSplitListInfo;
  }

  @Override public String[] getLocations() {
    Set<String> locations = new HashSet<String>();
    for (CarbonInputSplit splitInfo : carbonBlockInfoList) {
      try {
        locations.addAll(Arrays.asList(splitInfo.getLocations()));
      } catch (IOException e) {
        throw new RuntimeException("Fail to get location of split: " + splitInfo, e);
      }
    }
    locations.toArray(new String[locations.size()]);
    List<String> nodes = CarbonInputSplitTaskInfo.maxNoNodes(carbonBlockInfoList);
    return nodes.toArray(new String[nodes.size()]);
  }

  @Override public int compareTo(Distributable o) {
    return taskId.compareTo(((CarbonInputSplitTaskInfo) o).getTaskId());
  }

  /**
   * Finding which node has the maximum number of blocks for it.
   *
   * @param splitList
   * @return
   */
  public static List<String> maxNoNodes(List<CarbonInputSplit> splitList) {
    boolean useIndex = true;
    Integer maxOccurence = 0;
    String maxNode = null;
    Map<String, Integer> nodeAndOccurenceMapping = new TreeMap<>();

    // populate the map of node and number of occurences of that node.
    for (CarbonInputSplit split : splitList) {
      try {
        for (String node : split.getLocations()) {
          Integer nodeOccurence = nodeAndOccurenceMapping.get(node);
          if (null == nodeOccurence) {
            nodeAndOccurenceMapping.put(node, 1);
          } else {
            nodeOccurence++;
          }
        }
      } catch (IOException e) {
        throw new RuntimeException("Fail to get location of split: " + split, e);
      }
    }
    Integer previousValueOccurence = null;

    // check which node is occured maximum times.
    for (Map.Entry<String, Integer> entry : nodeAndOccurenceMapping.entrySet()) {
      // finding the maximum node.
      if (entry.getValue() > maxOccurence) {
        maxOccurence = entry.getValue();
        maxNode = entry.getKey();
      }
      // first time scenario. initialzing the previous value.
      if (null == previousValueOccurence) {
        previousValueOccurence = entry.getValue();
      } else {
        // for the case where all the nodes have same number of blocks then
        // we need to return complete list instead of max node.
        if (!Objects.equals(previousValueOccurence, entry.getValue())) {
          useIndex = false;
        }
      }
    }

    // if all the nodes have equal occurence then returning the complete key set.
    if (useIndex) {
      return new ArrayList<>(nodeAndOccurenceMapping.keySet());
    }

    // if any max node is found then returning the max node.
    List<String> node = new ArrayList<>(1);
    node.add(maxNode);
    return node;
  }

}