BlockRecognizer.java example

Explorer
sonarqube-master
/*
 * SonarQube
 * Copyright (C) 2009-2017 SonarSource SA
 * mailto:info AT sonarsource DOT com
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 3 of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 */
package org.sonar.core.issue.tracking;

import com.google.common.collect.LinkedHashMultimap;
import com.google.common.collect.Lists;
import com.google.common.collect.Multimap;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

class BlockRecognizer<RAW extends Trackable, BASE extends Trackable> {

  /**
   * If base source code is available, then detect code moves through block hashes.
   * Only the issues associated to a line can be matched here.
   */
  void match(Input<RAW> rawInput, Input<BASE> baseInput, Tracking<RAW, BASE> tracking) {
    BlockHashSequence rawHashSequence = rawInput.getBlockHashSequence();
    BlockHashSequence baseHashSequence = baseInput.getBlockHashSequence();

    Multimap<Integer, RAW> rawsByLine = groupByLine(tracking.getUnmatchedRaws(), rawHashSequence);
    Multimap<Integer, BASE> basesByLine = groupByLine(tracking.getUnmatchedBases(), baseHashSequence);
    Map<Integer, HashOccurrence> occurrencesByHash = new HashMap<>();

    for (Integer line : basesByLine.keySet()) {
      int hash = baseHashSequence.getBlockHashForLine(line);
      HashOccurrence hashOccurrence = occurrencesByHash.get(hash);
      if (hashOccurrence == null) {
        // first occurrence in base
        hashOccurrence = new HashOccurrence();
        hashOccurrence.baseLine = line;
        hashOccurrence.baseCount = 1;
        occurrencesByHash.put(hash, hashOccurrence);
      } else {
        hashOccurrence.baseCount++;
      }
    }

    for (Integer line : rawsByLine.keySet()) {
      int hash = rawHashSequence.getBlockHashForLine(line);
      HashOccurrence hashOccurrence = occurrencesByHash.get(hash);
      if (hashOccurrence != null) {
        hashOccurrence.rawLine = line;
        hashOccurrence.rawCount++;
      }
    }

    for (HashOccurrence hashOccurrence : occurrencesByHash.values()) {
      if (hashOccurrence.baseCount == 1 && hashOccurrence.rawCount == 1) {
        // Guaranteed that baseLine has been moved to rawLine, so we can map all issues on baseLine to all issues on rawLine
        map(rawsByLine.get(hashOccurrence.rawLine), basesByLine.get(hashOccurrence.baseLine), tracking);
        basesByLine.removeAll(hashOccurrence.baseLine);
        rawsByLine.removeAll(hashOccurrence.rawLine);
      }
    }

    // Check if remaining number of lines exceeds threshold. It avoids processing too many combinations.
    if (basesByLine.keySet().size() * rawsByLine.keySet().size() >= 250_000) {
      return;
    }

    List<LinePair> possibleLinePairs = Lists.newArrayList();
    for (Integer baseLine : basesByLine.keySet()) {
      for (Integer rawLine : rawsByLine.keySet()) {
        int weight = lengthOfMaximalBlock(baseInput.getLineHashSequence(), baseLine, rawInput.getLineHashSequence(), rawLine);
        possibleLinePairs.add(new LinePair(baseLine, rawLine, weight));
      }
    }
    Collections.sort(possibleLinePairs, LinePairComparator.INSTANCE);
    for (LinePair linePair : possibleLinePairs) {
      // High probability that baseLine has been moved to rawLine, so we can map all issues on baseLine to all issues on rawLine
      map(rawsByLine.get(linePair.rawLine), basesByLine.get(linePair.baseLine), tracking);
    }
  }

  /**
   * @param startLineA number of line from first version of text (numbering starts from 1)
   * @param startLineB number of line from second version of text (numbering starts from 1)
   */
  static int lengthOfMaximalBlock(LineHashSequence hashesA, int startLineA, LineHashSequence hashesB, int startLineB) {
    if (!hashesA.getHashForLine(startLineA).equals(hashesB.getHashForLine(startLineB))) {
      return 0;
    }
    int length = 0;
    int ai = startLineA;
    int bi = startLineB;
    while (ai <= hashesA.length() && bi <= hashesB.length() && hashesA.getHashForLine(ai).equals(hashesB.getHashForLine(bi))) {
      ai++;
      bi++;
      length++;
    }
    ai = startLineA;
    bi = startLineB;
    while (ai > 0 && bi > 0 && hashesA.getHashForLine(ai).equals(hashesB.getHashForLine(bi))) {
      ai--;
      bi--;
      length++;
    }
    // Note that position (startA, startB) was counted twice
    return length - 1;
  }

  private void map(Collection<RAW> raws, Collection<BASE> bases, Tracking<RAW, BASE> result) {
    for (RAW raw : raws) {
      for (BASE base : bases) {
        if (result.containsUnmatchedBase(base) && base.getRuleKey().equals(raw.getRuleKey())) {
          result.match(raw, base);
          break;
        }
      }
    }
  }

  private static <T extends Trackable> Multimap<Integer, T> groupByLine(Iterable<T> trackables, BlockHashSequence hashSequence) {
    Multimap<Integer, T> result = LinkedHashMultimap.create();
    for (T trackable : trackables) {
      Integer line = trackable.getLine();
      if (hashSequence.hasLine(line)) {
        result.put(line, trackable);
      }
    }
    return result;
  }

  private static class LinePair {
    int baseLine;
    int rawLine;
    int weight;

    public LinePair(int baseLine, int rawLine, int weight) {
      this.baseLine = baseLine;
      this.rawLine = rawLine;
      this.weight = weight;
    }
  }

  private static class HashOccurrence {
    int baseLine;
    int rawLine;
    int baseCount;
    int rawCount;
  }

  private enum LinePairComparator implements Comparator<LinePair> {
    INSTANCE;

    @Override
    public int compare(LinePair o1, LinePair o2) {
      int weightDiff = o2.weight - o1.weight;
      if (weightDiff != 0) {
        return weightDiff;
      } else {
        return Math.abs(o1.baseLine - o1.rawLine) - Math.abs(o2.baseLine - o2.rawLine);
      }
    }
  }
}