/*
* SonarQube
* Copyright (C) 2009-2017 SonarSource SA
* mailto:info AT sonarsource DOT com
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 3 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package org.sonar.core.issue.tracking;
import com.google.common.collect.LinkedHashMultimap;
import com.google.common.collect.Lists;
import com.google.common.collect.Multimap;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
class BlockRecognizer<RAW extends Trackable, BASE extends Trackable> {
/**
* If base source code is available, then detect code moves through block hashes.
* Only the issues associated to a line can be matched here.
*/
void match(Input<RAW> rawInput, Input<BASE> baseInput, Tracking<RAW, BASE> tracking) {
BlockHashSequence rawHashSequence = rawInput.getBlockHashSequence();
BlockHashSequence baseHashSequence = baseInput.getBlockHashSequence();
Multimap<Integer, RAW> rawsByLine = groupByLine(tracking.getUnmatchedRaws(), rawHashSequence);
Multimap<Integer, BASE> basesByLine = groupByLine(tracking.getUnmatchedBases(), baseHashSequence);
Map<Integer, HashOccurrence> occurrencesByHash = new HashMap<>();
for (Integer line : basesByLine.keySet()) {
int hash = baseHashSequence.getBlockHashForLine(line);
HashOccurrence hashOccurrence = occurrencesByHash.get(hash);
if (hashOccurrence == null) {
// first occurrence in base
hashOccurrence = new HashOccurrence();
hashOccurrence.baseLine = line;
hashOccurrence.baseCount = 1;
occurrencesByHash.put(hash, hashOccurrence);
} else {
hashOccurrence.baseCount++;
}
}
for (Integer line : rawsByLine.keySet()) {
int hash = rawHashSequence.getBlockHashForLine(line);
HashOccurrence hashOccurrence = occurrencesByHash.get(hash);
if (hashOccurrence != null) {
hashOccurrence.rawLine = line;
hashOccurrence.rawCount++;
}
}
for (HashOccurrence hashOccurrence : occurrencesByHash.values()) {
if (hashOccurrence.baseCount == 1 && hashOccurrence.rawCount == 1) {
// Guaranteed that baseLine has been moved to rawLine, so we can map all issues on baseLine to all issues on rawLine
map(rawsByLine.get(hashOccurrence.rawLine), basesByLine.get(hashOccurrence.baseLine), tracking);
basesByLine.removeAll(hashOccurrence.baseLine);
rawsByLine.removeAll(hashOccurrence.rawLine);
}
}
// Check if remaining number of lines exceeds threshold. It avoids processing too many combinations.
if (basesByLine.keySet().size() * rawsByLine.keySet().size() >= 250_000) {
return;
}
List<LinePair> possibleLinePairs = Lists.newArrayList();
for (Integer baseLine : basesByLine.keySet()) {
for (Integer rawLine : rawsByLine.keySet()) {
int weight = lengthOfMaximalBlock(baseInput.getLineHashSequence(), baseLine, rawInput.getLineHashSequence(), rawLine);
possibleLinePairs.add(new LinePair(baseLine, rawLine, weight));
}
}
Collections.sort(possibleLinePairs, LinePairComparator.INSTANCE);
for (LinePair linePair : possibleLinePairs) {
// High probability that baseLine has been moved to rawLine, so we can map all issues on baseLine to all issues on rawLine
map(rawsByLine.get(linePair.rawLine), basesByLine.get(linePair.baseLine), tracking);
}
}
/**
* @param startLineA number of line from first version of text (numbering starts from 1)
* @param startLineB number of line from second version of text (numbering starts from 1)
*/
static int lengthOfMaximalBlock(LineHashSequence hashesA, int startLineA, LineHashSequence hashesB, int startLineB) {
if (!hashesA.getHashForLine(startLineA).equals(hashesB.getHashForLine(startLineB))) {
return 0;
}
int length = 0;
int ai = startLineA;
int bi = startLineB;
while (ai <= hashesA.length() && bi <= hashesB.length() && hashesA.getHashForLine(ai).equals(hashesB.getHashForLine(bi))) {
ai++;
bi++;
length++;
}
ai = startLineA;
bi = startLineB;
while (ai > 0 && bi > 0 && hashesA.getHashForLine(ai).equals(hashesB.getHashForLine(bi))) {
ai--;
bi--;
length++;
}
// Note that position (startA, startB) was counted twice
return length - 1;
}
private void map(Collection<RAW> raws, Collection<BASE> bases, Tracking<RAW, BASE> result) {
for (RAW raw : raws) {
for (BASE base : bases) {
if (result.containsUnmatchedBase(base) && base.getRuleKey().equals(raw.getRuleKey())) {
result.match(raw, base);
break;
}
}
}
}
private static <T extends Trackable> Multimap<Integer, T> groupByLine(Iterable<T> trackables, BlockHashSequence hashSequence) {
Multimap<Integer, T> result = LinkedHashMultimap.create();
for (T trackable : trackables) {
Integer line = trackable.getLine();
if (hashSequence.hasLine(line)) {
result.put(line, trackable);
}
}
return result;
}
private static class LinePair {
int baseLine;
int rawLine;
int weight;
public LinePair(int baseLine, int rawLine, int weight) {
this.baseLine = baseLine;
this.rawLine = rawLine;
this.weight = weight;
}
}
private static class HashOccurrence {
int baseLine;
int rawLine;
int baseCount;
int rawCount;
}
private enum LinePairComparator implements Comparator<LinePair> {
INSTANCE;
@Override
public int compare(LinePair o1, LinePair o2) {
int weightDiff = o2.weight - o1.weight;
if (weightDiff != 0) {
return weightDiff;
} else {
return Math.abs(o1.baseLine - o1.rawLine) - Math.abs(o2.baseLine - o2.rawLine);
}
}
}
}