/* * SonarQube * Copyright (C) 2009-2017 SonarSource SA * mailto:info AT sonarsource DOT com * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 3 of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software Foundation, * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ package org.sonar.duplications.block; import java.util.ArrayList; import org.sonar.duplications.statement.Statement; import java.util.Collections; import java.util.List; /** * Creates blocks from statements, each block will contain specified number of statements (<code>blockSize</code>) and 64-bits (8-bytes) hash value. * Hash value computed using * <a href="http://en.wikipedia.org/wiki/Rolling_hash#Rabin-Karp_rolling_hash">Rabin-Karp rolling hash</a> : * <blockquote><pre> * s[0]*31^(blockSize-1) + s[1]*31^(blockSize-2) + ... + s[blockSize-1] * </pre></blockquote> * using <code>long</code> arithmetic, where <code>s[i]</code> * is the hash code of <code>String</code> (which is cached) for statement with number i. * Thus running time - O(N), where N - number of statements. * Implementation fully thread-safe. */ public class BlockChunker { private static final long PRIME_BASE = 31; private final int blockSize; private final long power; public BlockChunker(int blockSize) { this.blockSize = blockSize; long pow = 1; for (int i = 0; i < blockSize - 1; i++) { pow = pow * PRIME_BASE; } this.power = pow; } public List<Block> chunk(String resourceId, List<Statement> statements) { List<Statement> filtered = new ArrayList<>(); int i = 0; while (i < statements.size()) { Statement first = statements.get(i); int j = i + 1; while (j < statements.size() && statements.get(j).getValue().equals(first.getValue())) { j++; } filtered.add(statements.get(i)); if (i < j - 1) { filtered.add(statements.get(j - 1)); } i = j; } statements = filtered; if (statements.size() < blockSize) { return Collections.emptyList(); } Statement[] statementsArr = statements.toArray(new Statement[statements.size()]); List<Block> blocks = new ArrayList<>(statementsArr.length - blockSize + 1); long hash = 0; int first = 0; int last = 0; for (; last < blockSize - 1; last++) { hash = hash * PRIME_BASE + statementsArr[last].getValue().hashCode(); } Block.Builder blockBuilder = Block.builder().setResourceId(resourceId); for (; last < statementsArr.length; last++, first++) { Statement firstStatement = statementsArr[first]; Statement lastStatement = statementsArr[last]; // add last statement to hash hash = hash * PRIME_BASE + lastStatement.getValue().hashCode(); // create block Block block = blockBuilder.setBlockHash(new ByteArray(hash)) .setIndexInFile(first) .setLines(firstStatement.getStartLine(), lastStatement.getEndLine()) .build(); blocks.add(block); // remove first statement from hash hash -= power * firstStatement.getValue().hashCode(); } return blocks; } public int getBlockSize() { return blockSize; } }