/* * SonarQube * Copyright (C) 2009-2017 SonarSource SA * mailto:info AT sonarsource DOT com * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 3 of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software Foundation, * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ package org.sonar.duplications.detector.suffixtree; import java.util.ArrayList; import java.util.Collections; import java.util.List; import org.sonar.duplications.block.Block; import org.sonar.duplications.detector.ContainsInComparator; import org.sonar.duplications.index.CloneGroup; import org.sonar.duplications.index.ClonePart; import org.sonar.duplications.utils.SortedListsUtils; /** * Implementation of {@link Search.Collector}, which constructs {@link CloneGroup}s. */ public class DuplicationsCollector extends Search.Collector { private final TextSet text; private final String originResourceId; private final List<CloneGroup> filtered = new ArrayList<>(); private int length; private int count; private int[][] blockNumbers; public DuplicationsCollector(TextSet text) { this.text = text; this.originResourceId = text.getBlock(0).getResourceId(); } /** * @return current result */ public List<CloneGroup> getResult() { return filtered; } @Override public void startOfGroup(int size, int length) { this.blockNumbers = new int[size][2]; this.length = length; } /** * Constructs ClonePart and saves it for future processing in {@link #endOfGroup()}. * * @param start number of first block from text for this part * @param end number of last block from text for this part */ @Override public void part(int start, int end) { blockNumbers[count][0] = start; blockNumbers[count][1] = end - 1; count++; } /** * Constructs CloneGroup and saves it. */ @Override public void endOfGroup() { ClonePart origin = null; CloneGroup.Builder builder = CloneGroup.builder().setLength(length); List<ClonePart> parts = new ArrayList<>(count); for (int[] b : blockNumbers) { Block firstBlock = text.getBlock(b[0]); Block lastBlock = text.getBlock(b[1]); ClonePart part = new ClonePart( firstBlock.getResourceId(), firstBlock.getIndexInFile(), firstBlock.getStartLine(), lastBlock.getEndLine()); // TODO Godin: maybe use FastStringComparator here ? if (originResourceId.equals(part.getResourceId())) { // part from origin if (origin == null) { origin = part; // To calculate length important to use the origin, because otherwise block may come from DB without required data builder.setLengthInUnits(lastBlock.getEndUnit() - firstBlock.getStartUnit() + 1); } else if (part.getUnitStart() < origin.getUnitStart()) { origin = part; } } parts.add(part); } Collections.sort(parts, ContainsInComparator.CLONEPART_COMPARATOR); builder.setOrigin(origin).setParts(parts); filter(builder.build()); reset(); } /** * Prepare for processing of next duplication. */ private void reset() { blockNumbers = null; count = 0; } /** * Saves CloneGroup, if it is not included into previously saved. * <p> * Current CloneGroup can not include none of CloneGroup, which were constructed before. * Proof: * According to an order of visiting nodes in suffix tree - length of earlier >= length of current. * If length of earlier > length of current, then earlier not contained in current. * If length of earlier = length of current, then earlier can be contained in current only * when current has exactly the same and maybe some additional CloneParts as earlier, * what in his turn will mean that two inner-nodes on same depth will satisfy condition * current.startSize <= earlier.startSize <= earlier.endSize <= current.endSize , which is not possible for different inner-nodes on same depth. * </p> * Thus this method checks only that none of CloneGroup, which was constructed before, does not include current CloneGroup. */ private void filter(CloneGroup current) { for (CloneGroup earlier : filtered) { if (containsIn(current, earlier)) { return; } } filtered.add(current); } /** * Checks that second CloneGroup includes first one. * <p> * CloneGroup A is included in another CloneGroup B, if every part pA from A has part pB in B, * which satisfy the conditions: * <pre> * (pA.resourceId == pB.resourceId) and (pB.unitStart <= pA.unitStart) and (pA.unitEnd <= pB.unitEnd) * </pre> * And all resourcesId from B exactly the same as all resourceId from A, which means that also every part pB from B has part pA in A, * which satisfy the condition: * <pre> * pB.resourceId == pA.resourceId * </pre> * Inclusion is the partial order, thus this relation is: * <ul> * <li>reflexive - A in A</li> * <li>transitive - (A in B) and (B in C) => (A in C)</li> * <li>antisymmetric - (A in B) and (B in A) <=> (A = B)</li> * </ul> * </p> * <p> * This method uses the fact that all parts already sorted by resourceId and unitStart (see {@link ContainsInComparator#CLONEPART_COMPARATOR}), * so running time - O(|A|+|B|). * </p> */ private static boolean containsIn(CloneGroup first, CloneGroup second) { List<ClonePart> firstParts = first.getCloneParts(); List<ClonePart> secondParts = second.getCloneParts(); // TODO Godin: according to tests seems that if first part of condition is true, then second part can not be false // if this can be proved, then second part can be removed return SortedListsUtils.contains(secondParts, firstParts, new ContainsInComparator(second.getCloneUnitLength(), first.getCloneUnitLength())) && SortedListsUtils.contains(firstParts, secondParts, ContainsInComparator.RESOURCE_ID_COMPARATOR); } }