/* * SonarQube * Copyright (C) 2009-2017 SonarSource SA * mailto:info AT sonarsource DOT com * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 3 of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software Foundation, * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ package org.sonar.core.issue.tracking; import com.google.common.collect.LinkedHashMultimap; import com.google.common.collect.Lists; import com.google.common.collect.Multimap; import java.util.Collection; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.List; import java.util.Map; class BlockRecognizer<RAW extends Trackable, BASE extends Trackable> { /** * If base source code is available, then detect code moves through block hashes. * Only the issues associated to a line can be matched here. */ void match(Input<RAW> rawInput, Input<BASE> baseInput, Tracking<RAW, BASE> tracking) { BlockHashSequence rawHashSequence = rawInput.getBlockHashSequence(); BlockHashSequence baseHashSequence = baseInput.getBlockHashSequence(); Multimap<Integer, RAW> rawsByLine = groupByLine(tracking.getUnmatchedRaws(), rawHashSequence); Multimap<Integer, BASE> basesByLine = groupByLine(tracking.getUnmatchedBases(), baseHashSequence); Map<Integer, HashOccurrence> occurrencesByHash = new HashMap<>(); for (Integer line : basesByLine.keySet()) { int hash = baseHashSequence.getBlockHashForLine(line); HashOccurrence hashOccurrence = occurrencesByHash.get(hash); if (hashOccurrence == null) { // first occurrence in base hashOccurrence = new HashOccurrence(); hashOccurrence.baseLine = line; hashOccurrence.baseCount = 1; occurrencesByHash.put(hash, hashOccurrence); } else { hashOccurrence.baseCount++; } } for (Integer line : rawsByLine.keySet()) { int hash = rawHashSequence.getBlockHashForLine(line); HashOccurrence hashOccurrence = occurrencesByHash.get(hash); if (hashOccurrence != null) { hashOccurrence.rawLine = line; hashOccurrence.rawCount++; } } for (HashOccurrence hashOccurrence : occurrencesByHash.values()) { if (hashOccurrence.baseCount == 1 && hashOccurrence.rawCount == 1) { // Guaranteed that baseLine has been moved to rawLine, so we can map all issues on baseLine to all issues on rawLine map(rawsByLine.get(hashOccurrence.rawLine), basesByLine.get(hashOccurrence.baseLine), tracking); basesByLine.removeAll(hashOccurrence.baseLine); rawsByLine.removeAll(hashOccurrence.rawLine); } } // Check if remaining number of lines exceeds threshold. It avoids processing too many combinations. if (basesByLine.keySet().size() * rawsByLine.keySet().size() >= 250_000) { return; } List<LinePair> possibleLinePairs = Lists.newArrayList(); for (Integer baseLine : basesByLine.keySet()) { for (Integer rawLine : rawsByLine.keySet()) { int weight = lengthOfMaximalBlock(baseInput.getLineHashSequence(), baseLine, rawInput.getLineHashSequence(), rawLine); possibleLinePairs.add(new LinePair(baseLine, rawLine, weight)); } } Collections.sort(possibleLinePairs, LinePairComparator.INSTANCE); for (LinePair linePair : possibleLinePairs) { // High probability that baseLine has been moved to rawLine, so we can map all issues on baseLine to all issues on rawLine map(rawsByLine.get(linePair.rawLine), basesByLine.get(linePair.baseLine), tracking); } } /** * @param startLineA number of line from first version of text (numbering starts from 1) * @param startLineB number of line from second version of text (numbering starts from 1) */ static int lengthOfMaximalBlock(LineHashSequence hashesA, int startLineA, LineHashSequence hashesB, int startLineB) { if (!hashesA.getHashForLine(startLineA).equals(hashesB.getHashForLine(startLineB))) { return 0; } int length = 0; int ai = startLineA; int bi = startLineB; while (ai <= hashesA.length() && bi <= hashesB.length() && hashesA.getHashForLine(ai).equals(hashesB.getHashForLine(bi))) { ai++; bi++; length++; } ai = startLineA; bi = startLineB; while (ai > 0 && bi > 0 && hashesA.getHashForLine(ai).equals(hashesB.getHashForLine(bi))) { ai--; bi--; length++; } // Note that position (startA, startB) was counted twice return length - 1; } private void map(Collection<RAW> raws, Collection<BASE> bases, Tracking<RAW, BASE> result) { for (RAW raw : raws) { for (BASE base : bases) { if (result.containsUnmatchedBase(base) && base.getRuleKey().equals(raw.getRuleKey())) { result.match(raw, base); break; } } } } private static <T extends Trackable> Multimap<Integer, T> groupByLine(Iterable<T> trackables, BlockHashSequence hashSequence) { Multimap<Integer, T> result = LinkedHashMultimap.create(); for (T trackable : trackables) { Integer line = trackable.getLine(); if (hashSequence.hasLine(line)) { result.put(line, trackable); } } return result; } private static class LinePair { int baseLine; int rawLine; int weight; public LinePair(int baseLine, int rawLine, int weight) { this.baseLine = baseLine; this.rawLine = rawLine; this.weight = weight; } } private static class HashOccurrence { int baseLine; int rawLine; int baseCount; int rawCount; } private enum LinePairComparator implements Comparator<LinePair> { INSTANCE; @Override public int compare(LinePair o1, LinePair o2) { int weightDiff = o2.weight - o1.weight; if (weightDiff != 0) { return weightDiff; } else { return Math.abs(o1.baseLine - o1.rawLine) - Math.abs(o2.baseLine - o2.rawLine); } } } }