/*
* SonarQube
* Copyright (C) 2009-2017 SonarSource SA
* mailto:info AT sonarsource DOT com
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 3 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package org.sonar.duplications.detector.suffixtree;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.sonar.duplications.block.Block;
import org.sonar.duplications.block.ByteArray;
import org.sonar.duplications.index.CloneGroup;
import org.sonar.duplications.index.CloneIndex;
public final class SuffixTreeCloneDetectionAlgorithm {
private static final Comparator<Block> BLOCK_COMPARATOR = (o1, o2) -> o1.getIndexInFile() - o2.getIndexInFile();
private SuffixTreeCloneDetectionAlgorithm() {
// only statics
}
public static List<CloneGroup> detect(CloneIndex cloneIndex, Collection<Block> fileBlocks) {
if (fileBlocks.isEmpty()) {
return Collections.emptyList();
}
TextSet text = createTextSet(cloneIndex, fileBlocks);
if (text == null) {
return Collections.emptyList();
}
DuplicationsCollector reporter = new DuplicationsCollector(text);
Search.perform(text, reporter);
return reporter.getResult();
}
private static TextSet createTextSet(CloneIndex index, Collection<Block> fileBlocks) {
Set<ByteArray> hashes = new HashSet<>();
for (Block fileBlock : fileBlocks) {
hashes.add(fileBlock.getBlockHash());
}
String originResourceId = fileBlocks.iterator().next().getResourceId();
Map<String, List<Block>> fromIndex = retrieveFromIndex(index, originResourceId, hashes);
if (fromIndex.isEmpty() && hashes.size() == fileBlocks.size()) {
// optimization for the case when there is no duplications
return null;
}
return createTextSet(fileBlocks, fromIndex);
}
private static TextSet createTextSet(Collection<Block> fileBlocks, Map<String, List<Block>> fromIndex) {
TextSet.Builder textSetBuilder = TextSet.builder();
// TODO Godin: maybe we can reduce size of tree and so memory consumption by removing non-repeatable blocks
List<Block> sortedFileBlocks = new ArrayList<>(fileBlocks);
Collections.sort(sortedFileBlocks, BLOCK_COMPARATOR);
textSetBuilder.add(sortedFileBlocks);
for (List<Block> list : fromIndex.values()) {
Collections.sort(list, BLOCK_COMPARATOR);
int i = 0;
while (i < list.size()) {
int j = i + 1;
while ((j < list.size()) && (list.get(j).getIndexInFile() == list.get(j - 1).getIndexInFile() + 1)) {
j++;
}
textSetBuilder.add(list.subList(i, j));
i = j;
}
}
return textSetBuilder.build();
}
private static Map<String, List<Block>> retrieveFromIndex(CloneIndex index, String originResourceId, Set<ByteArray> hashes) {
Map<String, List<Block>> collection = new HashMap<>();
for (ByteArray hash : hashes) {
Collection<Block> blocks = index.getBySequenceHash(hash);
for (Block blockFromIndex : blocks) {
// Godin: skip blocks for this file if they come from index
String resourceId = blockFromIndex.getResourceId();
if (!originResourceId.equals(resourceId)) {
List<Block> list = collection.get(resourceId);
if (list == null) {
list = new ArrayList<>();
collection.put(resourceId, list);
}
list.add(blockFromIndex);
}
}
}
return collection;
}
}