/* * SonarQube * Copyright (C) 2009-2017 SonarSource SA * mailto:info AT sonarsource DOT com * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 3 of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software Foundation, * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ package org.sonar.duplications.index; import java.util.ArrayList; import java.util.Collection; import java.util.Iterator; import java.util.List; import java.util.NoSuchElementException; import org.sonar.duplications.block.Block; import org.sonar.duplications.block.ByteArray; import org.sonar.duplications.utils.FastStringComparator; import javax.annotation.Nullable; /** * Provides an index optimized by memory. * <p> * Each object in Java has an overhead - see * <a href="http://devblog.streamy.com/2009/07/24/determine-size-of-java-object-class/">"HOWTO: Determine the size of a Java Object or Class"</a>. * So to optimize memory consumption, we use flat arrays, however this increases time of queries. * During usual detection of duplicates most time consuming method is a {@link #getByResourceId(String)}: * around 50% of time spent in this class and number of invocations of this method is 1% of total invocations, * however total time spent in this class less than 1 second for small projects and around 2 seconds for projects like JDK. * </p> * <p> * Note that this implementation currently does not support deletion, however it's possible to implement. * </p> */ public class PackedMemoryCloneIndex extends AbstractCloneIndex { private static final int DEFAULT_INITIAL_CAPACITY = 1024; private static final int BLOCK_INTS = 5; private final int hashInts; private final int blockInts; /** * Indicates that index requires sorting to perform queries. */ private boolean sorted; /** * Current number of blocks in index. */ private int size; private String[] resourceIds; private int[] blockData; private int[] resourceIdsIndex; private final Block.Builder blockBuilder = Block.builder(); public PackedMemoryCloneIndex() { this(8, DEFAULT_INITIAL_CAPACITY); } /** * @param hashBytes size of hash in bytes * @param initialCapacity the initial capacity */ public PackedMemoryCloneIndex(int hashBytes, int initialCapacity) { this.sorted = false; this.hashInts = hashBytes / 4; this.blockInts = hashInts + BLOCK_INTS; this.size = 0; this.resourceIds = new String[initialCapacity]; this.blockData = new int[initialCapacity * blockInts]; this.resourceIdsIndex = new int[initialCapacity]; } /** * {@inheritDoc} * <p> * <strong>Note that this implementation does not guarantee that blocks would be sorted by index.</strong> * </p> */ @Override public Collection<Block> getByResourceId(String resourceId) { ensureSorted(); // prepare resourceId for binary search resourceIds[size] = resourceId; resourceIdsIndex[size] = size; int index = DataUtils.binarySearch(byResourceId); List<Block> result = new ArrayList<>(); int realIndex = resourceIdsIndex[index]; while (index < size && FastStringComparator.INSTANCE.compare(resourceIds[realIndex], resourceId) == 0) { result.add(getBlock(realIndex, resourceId)); index++; realIndex = resourceIdsIndex[index]; } return result; } private Block createBlock(int index, String resourceId, @Nullable ByteArray byteHash) { int offset = index * blockInts; ByteArray blockHash; if (byteHash == null) { int[] hash = new int[hashInts]; for (int j = 0; j < hashInts; j++) { hash[j] = blockData[offset++]; } blockHash = new ByteArray(hash); } else { blockHash = byteHash; offset += hashInts; } int indexInFile = blockData[offset++]; int firstLineNumber = blockData[offset++]; int lastLineNumber = blockData[offset++]; int startUnit = blockData[offset++]; int endUnit = blockData[offset]; return blockBuilder .setResourceId(resourceId) .setBlockHash(blockHash) .setIndexInFile(indexInFile) .setLines(firstLineNumber, lastLineNumber) .setUnit(startUnit, endUnit) .build(); } private Block getBlock(int index, String resourceId) { return createBlock(index, resourceId, null); } private class ResourceIterator implements Iterator<ResourceBlocks> { private int index = 0; @Override public boolean hasNext() { return index < size; } @Override public ResourceBlocks next() { if (!hasNext()) { throw new NoSuchElementException(); } String resourceId = resourceIds[resourceIdsIndex[index]]; List<Block> blocks = new ArrayList<>(); // while we are at the same resource, keep going do { blocks.add(getBlock(resourceIdsIndex[index], resourceId)); index++; } while (hasNext() && FastStringComparator.INSTANCE.compare(resourceIds[resourceIdsIndex[index]], resourceId) == 0); return new ResourceBlocks(resourceId, blocks); } @Override public void remove() { throw new UnsupportedOperationException(); } } public static class ResourceBlocks { private Collection<Block> blocks; private String resourceId; public ResourceBlocks(String resourceId, Collection<Block> blocks) { this.resourceId = resourceId; this.blocks = blocks; } public Collection<Block> blocks() { return blocks; } public String resourceId() { return resourceId; } } /** * {@inheritDoc} */ @Override public Iterator<ResourceBlocks> iterator() { ensureSorted(); return new ResourceIterator(); } /** * {@inheritDoc} */ @Override public Collection<Block> getBySequenceHash(ByteArray sequenceHash) { ensureSorted(); // prepare hash for binary search int[] hash = sequenceHash.toIntArray(); if (hash.length != hashInts) { throw new IllegalArgumentException("Expected " + hashInts + " ints in hash, but got " + hash.length); } int offset = size * blockInts; for (int i = 0; i < hashInts; i++) { blockData[offset++] = hash[i]; } int index = DataUtils.binarySearch(byBlockHash); List<Block> result = new ArrayList<>(); while (index < size && !isLessByHash(size, index)) { // extract block (note that there is no need to extract hash) String resourceId = resourceIds[index]; result.add(createBlock(index, resourceId, sequenceHash)); index++; } return result; } /** * {@inheritDoc} * <p> * <strong>Note that this implementation allows insertion of two blocks with same index for one resource.</strong> * </p> */ @Override public void insert(Block block) { sorted = false; ensureCapacity(); resourceIds[size] = block.getResourceId(); int[] hash = block.getBlockHash().toIntArray(); if (hash.length != hashInts) { throw new IllegalArgumentException("Expected " + hashInts + " ints in hash, but got " + hash.length); } int offset = size * blockInts; for (int i = 0; i < hashInts; i++) { blockData[offset++] = hash[i]; } blockData[offset++] = block.getIndexInFile(); blockData[offset++] = block.getStartLine(); blockData[offset++] = block.getEndLine(); blockData[offset++] = block.getStartUnit(); blockData[offset] = block.getEndUnit(); size++; } /** * Increases the capacity, if necessary. */ private void ensureCapacity() { if (size < resourceIds.length) { return; } int newCapacity = (resourceIds.length * 3) / 2 + 1; // Increase size of resourceIds String[] oldResourceIds = resourceIds; resourceIds = new String[newCapacity]; System.arraycopy(oldResourceIds, 0, resourceIds, 0, oldResourceIds.length); // Increase size of blockData int[] oldBlockData = blockData; blockData = new int[newCapacity * blockInts]; System.arraycopy(oldBlockData, 0, blockData, 0, oldBlockData.length); // Increase size of byResourceIndices (no need to copy old, because would be restored in method ensureSorted) resourceIdsIndex = new int[newCapacity]; sorted = false; } /** * Performs sorting, if necessary. */ private void ensureSorted() { if (sorted) { return; } ensureCapacity(); DataUtils.sort(byBlockHash); for (int i = 0; i < size; i++) { resourceIdsIndex[i] = i; } DataUtils.sort(byResourceId); sorted = true; } private boolean isLessByHash(int i, int j) { int i2 = i * blockInts; int j2 = j * blockInts; for (int k = 0; k < hashInts; k++, i2++, j2++) { if (blockData[i2] < blockData[j2]) { return true; } if (blockData[i2] > blockData[j2]) { return false; } } return false; } private final DataUtils.Sortable byBlockHash = new DataUtils.Sortable() { @Override public void swap(int i, int j) { String tmp = resourceIds[i]; resourceIds[i] = resourceIds[j]; resourceIds[j] = tmp; i *= blockInts; j *= blockInts; for (int k = 0; k < blockInts; k++, i++, j++) { int x = blockData[i]; blockData[i] = blockData[j]; blockData[j] = x; } } @Override public boolean isLess(int i, int j) { return isLessByHash(i, j); } @Override public int size() { return size; } }; private final DataUtils.Sortable byResourceId = new DataUtils.Sortable() { @Override public void swap(int i, int j) { int tmp = resourceIdsIndex[i]; resourceIdsIndex[i] = resourceIdsIndex[j]; resourceIdsIndex[j] = tmp; } @Override public boolean isLess(int i, int j) { String s1 = resourceIds[resourceIdsIndex[i]]; String s2 = resourceIds[resourceIdsIndex[j]]; return FastStringComparator.INSTANCE.compare(s1, s2) < 0; } @Override public int size() { return size; } }; @Override /** * Computation is O(N) */ public int noResources() { ensureSorted(); int count = 0; String lastResource = null; for (int i = 0; i < size; i++) { String resource = resourceIds[resourceIdsIndex[i]]; if (resource != null && !resource.equals(lastResource)) { count++; lastResource = resource; } } return count; } }