* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* See the License for the specific language governing permissions and
* limitations under the License.
package org.apache.cassandra.db.compaction;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.concurrent.*;
import com.google.common.collect.AbstractIterator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.cassandra.concurrent.DebuggableThreadPoolExecutor;
import org.apache.cassandra.concurrent.NamedThreadFactory;
import org.apache.cassandra.config.DatabaseDescriptor;
import org.apache.cassandra.db.*;
import org.apache.cassandra.db.columniterator.OnDiskAtomIterator;
import org.apache.cassandra.io.sstable.SSTableIdentityIterator;
import org.apache.cassandra.utils.*;
* A class to run compaction taking advantage of multiple-core processes:
* One Deserializer thread per input sstable performs read + deserialize (a row at a time).
* The resulting ColumnFamilies are added to a queue, which is fed to the merge Reducer.
* The merge Reducer creates MergeTasks on a thread-per-core Executor, and returns AsyncPrecompactedRow objects.
* The main complication is in handling larger-than-memory rows. When one is encountered, no further deserialization
* is done until that row is merged and written -- creating a pipeline stall, as it were. Thus, this is intended
* to be useful with mostly-in-memory row sizes, but preserves correctness in the face of occasional exceptions.
public class ParallelCompactionIterable extends AbstractCompactionIterable
private static final Logger logger = LoggerFactory.getLogger(ParallelCompactionIterable.class);
private final int maxInMemorySize;
public ParallelCompactionIterable(OperationType type, List<ICompactionScanner> scanners, CompactionController controller)
this(type, scanners, controller, DatabaseDescriptor.getInMemoryCompactionLimit() / scanners.size());
public ParallelCompactionIterable(OperationType type, List<ICompactionScanner> scanners, CompactionController controller, int maxInMemorySize)
super(controller, type, scanners);
this.maxInMemorySize = maxInMemorySize;
public CloseableIterator<AbstractCompactedRow> iterator()
List<CloseableIterator<RowContainer>> sources = new ArrayList<CloseableIterator<RowContainer>>(scanners.size());
for (ICompactionScanner scanner : scanners)
sources.add(new Deserializer(scanner, maxInMemorySize));
return new Unwrapper(MergeIterator.get(sources, RowContainer.comparator, new Reducer()));
private static class Unwrapper extends AbstractIterator<AbstractCompactedRow> implements CloseableIterator<AbstractCompactedRow>
private final CloseableIterator<CompactedRowContainer> reducer;
public Unwrapper(CloseableIterator<CompactedRowContainer> reducer)
this.reducer = reducer;
protected AbstractCompactedRow computeNext()
if (!reducer.hasNext())
return endOfData();
CompactedRowContainer container = reducer.next();
AbstractCompactedRow compactedRow;
compactedRow = container.future == null
? container.row
: new PrecompactedRow(container.key, FBUtilities.waitOnFuture(container.future));
return compactedRow;
public void close() throws IOException
private class Reducer extends MergeIterator.Reducer<RowContainer, CompactedRowContainer>
private final List<RowContainer> rows = new ArrayList<RowContainer>();
private final ThreadPoolExecutor executor = new DebuggableThreadPoolExecutor(FBUtilities.getAvailableProcessors(),
new SynchronousQueue<Runnable>(),
new NamedThreadFactory("CompactionReducer"));
public void reduce(RowContainer current)
protected CompactedRowContainer getReduced()
assert rows.size() > 0;
CompactedRowContainer compacted = getCompactedRow(rows);
long n = 0;
for (ICompactionScanner scanner : scanners)
n += scanner.getCurrentPosition();
bytesRead = n;
return compacted;
public CompactedRowContainer getCompactedRow(List<RowContainer> rows)
boolean inMemory = true;
for (RowContainer container : rows)
if (container.row == null)
inMemory = false;
if (inMemory)
// caller will re-use rows List, so make ourselves a copy
List<Row> rawRows = new ArrayList<Row>(rows.size());
for (RowContainer rowContainer : rows)
return new CompactedRowContainer(rows.get(0).getKey(), executor.submit(new MergeTask(rawRows)));
List<OnDiskAtomIterator> iterators = new ArrayList<OnDiskAtomIterator>(rows.size());
for (RowContainer container : rows)
iterators.add(container.row == null ? container.wrapper : new DeserializedColumnIterator(container.row));
return new CompactedRowContainer(new LazilyCompactedRow(controller, iterators));
public void close()
* Merges a set of in-memory rows
private class MergeTask implements Callable<ColumnFamily>
private final List<Row> rows;
public MergeTask(List<Row> rows)
this.rows = rows;
public ColumnFamily call() throws Exception
final ColumnFamily returnCF = ArrayBackedSortedColumns.factory.create(controller.cfs.metadata);
List<CloseableIterator<Column>> data = new ArrayList<CloseableIterator<Column>>(rows.size());
for (Row row : rows)
PrecompactedRow.merge(returnCF, data, controller.cfs.indexManager.updaterFor(rows.get(0).key));
return PrecompactedRow.removeDeletedAndOldShards(rows.get(0).key, controller, returnCF);
private class DeserializedColumnIterator implements OnDiskAtomIterator
private final Row row;
private final Iterator<Column> iter;
public DeserializedColumnIterator(Row row)
this.row = row;
iter = row.cf.iterator();
public ColumnFamily getColumnFamily()
return row.cf;
public DecoratedKey getKey()
return row.key;
public void close() throws IOException {}
public boolean hasNext()
return iter.hasNext();
public OnDiskAtom next()
return iter.next();
public void remove()
throw new UnsupportedOperationException();
private static class Deserializer extends AbstractIterator<RowContainer> implements CloseableIterator<RowContainer>
private final LinkedBlockingQueue<RowContainer> queue = new LinkedBlockingQueue<RowContainer>(1);
private static final RowContainer finished = new RowContainer((Row) null);
private final ICompactionScanner scanner;
public Deserializer(ICompactionScanner ssts, final int maxInMemorySize)
this.scanner = ssts;
Runnable runnable = new WrappedRunnable()
protected void runMayThrow() throws Exception
SimpleCondition condition = null;
while (true)
if (condition != null)
condition = null;
if (!scanner.hasNext())
SSTableIdentityIterator iter = (SSTableIdentityIterator) scanner.next();
if (iter.dataSize > maxInMemorySize)
logger.debug("parallel lazy deserialize from {}", iter.getPath());
condition = new SimpleCondition();
queue.put(new RowContainer(new NotifyingSSTableIdentityIterator(iter, condition)));
logger.debug("parallel eager deserialize from {}", iter.getPath());
queue.put(new RowContainer(new Row(iter.getKey(), iter.getColumnFamilyWithColumns(ArrayBackedSortedColumns.factory))));
new Thread(runnable, "Deserialize " + scanner.getBackingFiles()).start();
protected RowContainer computeNext()
RowContainer container;
container = queue.take();
catch (InterruptedException e)
throw new AssertionError(e);
return container == finished ? endOfData() : container;
public void close() throws IOException
* a wrapper around SSTII that notifies the given condition when it is closed
private static class NotifyingSSTableIdentityIterator implements OnDiskAtomIterator
private final SSTableIdentityIterator wrapped;
private final SimpleCondition condition;
public NotifyingSSTableIdentityIterator(SSTableIdentityIterator wrapped, SimpleCondition condition)
this.wrapped = wrapped;
this.condition = condition;
public ColumnFamily getColumnFamily()
return wrapped.getColumnFamily();
public DecoratedKey getKey()
return wrapped.getKey();
public void close() throws IOException
public boolean hasNext()
return wrapped.hasNext();
public OnDiskAtom next()
return wrapped.next();
public void remove()
throw new UnsupportedOperationException();
private static class RowContainer
// either row is not null, or wrapper is not null. But not both.
public final Row row;
public final NotifyingSSTableIdentityIterator wrapper;
public static final Comparator<RowContainer> comparator = new Comparator<RowContainer>()
public int compare(RowContainer o1, RowContainer o2)
return o1.getKey().compareTo(o2.getKey());
private RowContainer(Row row)
this.row = row;
wrapper = null;
public RowContainer(NotifyingSSTableIdentityIterator wrapper)
this.wrapper = wrapper;
row = null;
public DecoratedKey getKey()
return row == null ? wrapper.getKey() : row.key;
private static class CompactedRowContainer
public final DecoratedKey key;
/** either "future" or "row" will be not-null, but not both at once. */
public final Future<ColumnFamily> future;
public final LazilyCompactedRow row;
private CompactedRowContainer(DecoratedKey key, Future<ColumnFamily> future)
this.key = key;
this.future = future;
row = null;
private CompactedRowContainer(LazilyCompactedRow row)
this.row = row;
future = null;
key = null;