DuplicationsCollector.java example

Explorer
sonarqube-master
/*
 * SonarQube
 * Copyright (C) 2009-2017 SonarSource SA
 * mailto:info AT sonarsource DOT com
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 3 of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 */
package org.sonar.duplications.detector.suffixtree;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import org.sonar.duplications.block.Block;
import org.sonar.duplications.detector.ContainsInComparator;
import org.sonar.duplications.index.CloneGroup;
import org.sonar.duplications.index.ClonePart;
import org.sonar.duplications.utils.SortedListsUtils;

/**
 * Implementation of {@link Search.Collector}, which constructs {@link CloneGroup}s.
 */
public class DuplicationsCollector extends Search.Collector {

  private final TextSet text;
  private final String originResourceId;

  private final List<CloneGroup> filtered = new ArrayList<>();

  private int length;
  private int count;
  private int[][] blockNumbers;

  public DuplicationsCollector(TextSet text) {
    this.text = text;
    this.originResourceId = text.getBlock(0).getResourceId();
  }

  /**
   * @return current result
   */
  public List<CloneGroup> getResult() {
    return filtered;
  }

  @Override
  public void startOfGroup(int size, int length) {
    this.blockNumbers = new int[size][2];
    this.length = length;
  }

  /**
   * Constructs ClonePart and saves it for future processing in {@link #endOfGroup()}.
   *
   * @param start number of first block from text for this part
   * @param end number of last block from text for this part
   */
  @Override
  public void part(int start, int end) {
    blockNumbers[count][0] = start;
    blockNumbers[count][1] = end - 1;
    count++;
  }

  /**
   * Constructs CloneGroup and saves it.
   */
  @Override
  public void endOfGroup() {
    ClonePart origin = null;

    CloneGroup.Builder builder = CloneGroup.builder().setLength(length);

    List<ClonePart> parts = new ArrayList<>(count);
    for (int[] b : blockNumbers) {
      Block firstBlock = text.getBlock(b[0]);
      Block lastBlock = text.getBlock(b[1]);
      ClonePart part = new ClonePart(
        firstBlock.getResourceId(),
        firstBlock.getIndexInFile(),
        firstBlock.getStartLine(),
        lastBlock.getEndLine());

      // TODO Godin: maybe use FastStringComparator here ?
      if (originResourceId.equals(part.getResourceId())) {
        // part from origin
        if (origin == null) {
          origin = part;
          // To calculate length important to use the origin, because otherwise block may come from DB without required data
          builder.setLengthInUnits(lastBlock.getEndUnit() - firstBlock.getStartUnit() + 1);
        } else if (part.getUnitStart() < origin.getUnitStart()) {
          origin = part;
        }
      }

      parts.add(part);
    }

    Collections.sort(parts, ContainsInComparator.CLONEPART_COMPARATOR);
    builder.setOrigin(origin).setParts(parts);

    filter(builder.build());

    reset();
  }

  /**
   * Prepare for processing of next duplication.
   */
  private void reset() {
    blockNumbers = null;
    count = 0;
  }

  /**
   * Saves CloneGroup, if it is not included into previously saved.
   * <p>
   * Current CloneGroup can not include none of CloneGroup, which were constructed before.
   * Proof:
   * According to an order of visiting nodes in suffix tree - length of earlier >= length of current.
   * If length of earlier > length of current, then earlier not contained in current.
   * If length of earlier = length of current, then earlier can be contained in current only
   * when current has exactly the same and maybe some additional CloneParts as earlier,
   * what in his turn will mean that two inner-nodes on same depth will satisfy condition
   * current.startSize <= earlier.startSize <= earlier.endSize <= current.endSize , which is not possible for different inner-nodes on same depth.
   * </p>
   * Thus this method checks only that none of CloneGroup, which was constructed before, does not include current CloneGroup.
   */
  private void filter(CloneGroup current) {
    for (CloneGroup earlier : filtered) {
      if (containsIn(current, earlier)) {
        return;
      }
    }
    filtered.add(current);
  }

  /**
   * Checks that second CloneGroup includes first one.
   * <p>
   * CloneGroup A is included in another CloneGroup B, if every part pA from A has part pB in B,
   * which satisfy the conditions:
   * <pre>
   * (pA.resourceId == pB.resourceId) and (pB.unitStart <= pA.unitStart) and (pA.unitEnd <= pB.unitEnd)
   * </pre>
   * And all resourcesId from B exactly the same as all resourceId from A, which means that also every part pB from B has part pA in A,
   * which satisfy the condition:
   * <pre>
   * pB.resourceId == pA.resourceId
   * </pre>
   * Inclusion is the partial order, thus this relation is:
   * <ul>
   * <li>reflexive - A in A</li>
   * <li>transitive - (A in B) and (B in C) => (A in C)</li>
   * <li>antisymmetric - (A in B) and (B in A) <=> (A = B)</li>
   * </ul>
   * </p>
   * <p>
   * This method uses the fact that all parts already sorted by resourceId and unitStart (see {@link ContainsInComparator#CLONEPART_COMPARATOR}),
   * so running time - O(|A|+|B|).
   * </p>
   */
  private static boolean containsIn(CloneGroup first, CloneGroup second) {
    List<ClonePart> firstParts = first.getCloneParts();
    List<ClonePart> secondParts = second.getCloneParts();
    // TODO Godin: according to tests seems that if first part of condition is true, then second part can not be false
    // if this can be proved, then second part can be removed
    return SortedListsUtils.contains(secondParts, firstParts, new ContainsInComparator(second.getCloneUnitLength(), first.getCloneUnitLength()))
      && SortedListsUtils.contains(firstParts, secondParts, ContainsInComparator.RESOURCE_ID_COMPARATOR);
  }

}