/* * SonarQube * Copyright (C) 2009-2017 SonarSource SA * mailto:info AT sonarsource DOT com * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 3 of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software Foundation, * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ package org.sonar.duplications.detector.suffixtree; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.Deque; import java.util.LinkedList; import java.util.List; import java.util.ListIterator; public final class Search { private final SuffixTree tree; private final TextSet text; private final Collector reporter; private final List<Integer> list = new ArrayList<>(); private final List<Node> innerNodes = new ArrayList<>(); private static final Comparator<Node> DEPTH_COMPARATOR = (o1, o2) -> o2.depth - o1.depth; private Search(SuffixTree tree, TextSet text, Collector reporter) { this.tree = tree; this.text = text; this.reporter = reporter; } public static void perform(TextSet text, Collector reporter) { new Search(SuffixTree.create(text), text, reporter).compute(); } private void compute() { // O(N) dfs(); // O(N * log(N)) Collections.sort(innerNodes, DEPTH_COMPARATOR); // O(N) visitInnerNodes(); } /** * Depth-first search (DFS). */ private void dfs() { Deque<Node> stack = new LinkedList<>(); stack.add(tree.getRootNode()); while (!stack.isEmpty()) { Node node = stack.removeLast(); node.startSize = list.size(); if (node.getEdges().isEmpty()) { // leaf list.add(node.depth); node.endSize = list.size(); } else { if (!node.equals(tree.getRootNode())) { // inner node = not leaf and not root innerNodes.add(node); } for (Edge edge : node.getEdges()) { Node endNode = edge.getEndNode(); endNode.depth = node.depth + edge.getSpan() + 1; stack.addLast(endNode); } } } // At this point all inner nodes are ordered by the time of entering, so we visit them from last to first ListIterator<Node> iterator = innerNodes.listIterator(innerNodes.size()); while (iterator.hasPrevious()) { Node node = iterator.previous(); int max = -1; for (Edge edge : node.getEdges()) { max = Math.max(edge.getEndNode().endSize, max); } node.endSize = max; } } /** * Each inner-node represents prefix of some suffixes, thus substring of text. */ private void visitInnerNodes() { for (Node node : innerNodes) { if (containsOrigin(node)) { report(node); } } } /** * TODO Godin: in fact computations here are the same as in {@link #report(Node)}, * so maybe would be better to remove this duplication, * however it should be noted that this check can't be done in {@link Collector#endOfGroup()}, * because it might lead to creation of unnecessary new objects */ private boolean containsOrigin(Node node) { for (int i = node.startSize; i < node.endSize; i++) { int start = tree.text.length() - list.get(i); int end = start + node.depth; if (text.isInsideOrigin(end)) { return true; } } return false; } private void report(Node node) { reporter.startOfGroup(node.endSize - node.startSize, node.depth); for (int i = node.startSize; i < node.endSize; i++) { int start = tree.text.length() - list.get(i); int end = start + node.depth; reporter.part(start, end); } reporter.endOfGroup(); } public abstract static class Collector { /** * Invoked at the beginning of processing for current node. * <p> * Length - is a depth of node. And nodes are visited in descending order of depth, * thus we guaranty that length will not increase between two sequential calls of this method * (can be equal or less than previous value). * </p> * * @param size number of parts in group * @param length length of each part in group */ abstract void startOfGroup(int size, int length); /** * Invoked as many times as leaves in the subtree, where current node is root. * * @param start start position in generalised text * @param end end position in generalised text */ abstract void part(int start, int end); /** * Invoked at the end of processing for current node. */ abstract void endOfGroup(); } }