/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.cassandra.db;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.net.InetAddress;
import java.security.MessageDigest;
import java.util.Set;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Sets;
import org.apache.cassandra.serializers.MarshalException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.cassandra.config.CFMetaData;
import org.apache.cassandra.config.DatabaseDescriptor;
import org.apache.cassandra.db.context.CounterContext;
import org.apache.cassandra.db.context.IContext.ContextRelationship;
import org.apache.cassandra.db.marshal.AbstractType;
import org.apache.cassandra.exceptions.OverloadedException;
import org.apache.cassandra.exceptions.RequestExecutionException;
import org.apache.cassandra.io.util.DataOutputBuffer;
import org.apache.cassandra.utils.Allocator;
import org.apache.cassandra.service.AbstractWriteResponseHandler;
import org.apache.cassandra.service.StorageProxy;
import org.apache.cassandra.utils.*;
/**
* A column that represents a partitioned counter.
*/
public class CounterColumn extends Column
{
private static final Logger logger = LoggerFactory.getLogger(CounterColumn.class);
protected static final CounterContext contextManager = CounterContext.instance();
private final long timestampOfLastDelete;
public CounterColumn(ByteBuffer name, long value, long timestamp)
{
this(name, contextManager.create(value, HeapAllocator.instance), timestamp);
}
public CounterColumn(ByteBuffer name, long value, long timestamp, long timestampOfLastDelete)
{
this(name, contextManager.create(value, HeapAllocator.instance), timestamp, timestampOfLastDelete);
}
public CounterColumn(ByteBuffer name, ByteBuffer value, long timestamp)
{
this(name, value, timestamp, Long.MIN_VALUE);
}
public CounterColumn(ByteBuffer name, ByteBuffer value, long timestamp, long timestampOfLastDelete)
{
super(name, value, timestamp);
this.timestampOfLastDelete = timestampOfLastDelete;
}
public static CounterColumn create(ByteBuffer name, ByteBuffer value, long timestamp, long timestampOfLastDelete, ColumnSerializer.Flag flag)
{
// #elt being negative means we have to clean delta
short count = value.getShort(value.position());
if (flag == ColumnSerializer.Flag.FROM_REMOTE || (flag == ColumnSerializer.Flag.LOCAL && count < 0))
value = CounterContext.instance().clearAllDelta(value);
return new CounterColumn(name, value, timestamp, timestampOfLastDelete);
}
@Override
public Column withUpdatedName(ByteBuffer newName)
{
return new CounterColumn(newName, value, timestamp, timestampOfLastDelete);
}
public long timestampOfLastDelete()
{
return timestampOfLastDelete;
}
public long total()
{
return contextManager.total(value);
}
@Override
public int dataSize()
{
/*
* A counter column adds to a Column :
* + 8 bytes for timestampOfLastDelete
*/
return super.dataSize() + TypeSizes.NATIVE.sizeof(timestampOfLastDelete);
}
@Override
public int serializedSize(TypeSizes typeSizes)
{
return super.serializedSize(typeSizes) + typeSizes.sizeof(timestampOfLastDelete);
}
@Override
public Column diff(Column column)
{
assert (column instanceof CounterColumn) || (column instanceof DeletedColumn) : "Wrong class type: " + column.getClass();
if (timestamp() < column.timestamp())
return column;
// Note that if at that point, column can't be a tombstone. Indeed,
// column is the result of merging us with other nodes results, and
// merging a CounterColumn with a tombstone never return a tombstone
// unless that tombstone timestamp is greater that the CounterColumn
// one.
assert !(column instanceof DeletedColumn) : "Wrong class type: " + column.getClass();
if (timestampOfLastDelete() < ((CounterColumn)column).timestampOfLastDelete())
return column;
ContextRelationship rel = contextManager.diff(column.value(), value());
if (ContextRelationship.GREATER_THAN == rel || ContextRelationship.DISJOINT == rel)
return column;
return null;
}
/*
* We have to special case digest creation for counter column because
* we don't want to include the information about which shard of the
* context is a delta or not, since this information differs from node to
* node.
*/
@Override
public void updateDigest(MessageDigest digest)
{
digest.update(name.duplicate());
// We don't take the deltas into account in a digest
contextManager.updateDigest(digest, value);
DataOutputBuffer buffer = new DataOutputBuffer();
try
{
buffer.writeLong(timestamp);
buffer.writeByte(serializationFlags());
buffer.writeLong(timestampOfLastDelete);
}
catch (IOException e)
{
throw new RuntimeException(e);
}
digest.update(buffer.getData(), 0, buffer.getLength());
}
@Override
public Column reconcile(Column column, Allocator allocator)
{
assert (column instanceof CounterColumn) || (column instanceof DeletedColumn) : "Wrong class type: " + column.getClass();
// live + tombstone: track last tombstone
if (column.isMarkedForDelete(Long.MIN_VALUE)) // cannot be an expired column, so the current time is irrelevant
{
// live < tombstone
if (timestamp() < column.timestamp())
{
return column;
}
// live last delete >= tombstone
if (timestampOfLastDelete() >= column.timestamp())
{
return this;
}
// live last delete < tombstone
return new CounterColumn(name(), value(), timestamp(), column.timestamp());
}
// live < live last delete
if (timestamp() < ((CounterColumn)column).timestampOfLastDelete())
return column;
// live last delete > live
if (timestampOfLastDelete() > column.timestamp())
return this;
// live + live: merge clocks; update value
return new CounterColumn(
name(),
contextManager.merge(value(), column.value(), allocator),
Math.max(timestamp(), column.timestamp()),
Math.max(timestampOfLastDelete(), ((CounterColumn)column).timestampOfLastDelete()));
}
@Override
public boolean equals(Object o)
{
// super.equals() returns false if o is not a CounterColumn
return super.equals(o) && timestampOfLastDelete == ((CounterColumn)o).timestampOfLastDelete;
}
@Override
public int hashCode()
{
int result = super.hashCode();
result = 31 * result + (int)(timestampOfLastDelete ^ (timestampOfLastDelete >>> 32));
return result;
}
@Override
public Column localCopy(ColumnFamilyStore cfs)
{
return new CounterColumn(cfs.internOrCopy(name, HeapAllocator.instance), ByteBufferUtil.clone(value), timestamp, timestampOfLastDelete);
}
@Override
public Column localCopy(ColumnFamilyStore cfs, Allocator allocator)
{
return new CounterColumn(cfs.internOrCopy(name, allocator), allocator.clone(value), timestamp, timestampOfLastDelete);
}
@Override
public String getString(AbstractType<?> comparator)
{
StringBuilder sb = new StringBuilder();
sb.append(comparator.getString(name));
sb.append(":");
sb.append(false);
sb.append(":");
sb.append(contextManager.toString(value));
sb.append("@");
sb.append(timestamp());
sb.append("!");
sb.append(timestampOfLastDelete);
return sb.toString();
}
@Override
public int serializationFlags()
{
return ColumnSerializer.COUNTER_MASK;
}
@Override
public void validateFields(CFMetaData metadata) throws MarshalException
{
validateName(metadata);
// We cannot use the value validator as for other columns as the CounterColumnType validate a long,
// which is not the internal representation of counters
contextManager.validateContext(value());
}
/**
* Check if a given counterId is found in this CounterColumn context.
*/
public boolean hasCounterId(CounterId id)
{
return contextManager.hasCounterId(value(), id);
}
private CounterColumn computeOldShardMerger(int mergeBefore)
{
ByteBuffer bb = contextManager.computeOldShardMerger(value(), CounterId.getOldLocalCounterIds(), mergeBefore);
if (bb == null)
return null;
else
return new CounterColumn(name(), bb, timestamp(), timestampOfLastDelete);
}
private CounterColumn removeOldShards(int gcBefore)
{
ByteBuffer bb = contextManager.removeOldShards(value(), gcBefore);
if (bb == value())
return this;
else
{
return new CounterColumn(name(), bb, timestamp(), timestampOfLastDelete);
}
}
public static void mergeAndRemoveOldShards(DecoratedKey key, ColumnFamily cf, int gcBefore, int mergeBefore)
{
mergeAndRemoveOldShards(key, cf, gcBefore, mergeBefore, true);
}
/**
* There is two phase to the removal of old shards.
* First phase: we merge the old shard value to the current shard and
* 'nulify' the old one. We then send the counter context with the old
* shard nulified to all other replica.
* Second phase: once an old shard has been nulified for longer than
* gc_grace (to be sure all other replica had been aware of the merge), we
* simply remove that old shard from the context (it's value is 0).
* This method does both phases.
* (Note that the sendToOtherReplica flag is here only to facilitate
* testing. It should be true in real code so use the method above
* preferably)
*/
public static void mergeAndRemoveOldShards(DecoratedKey key, ColumnFamily cf, int gcBefore, int mergeBefore, boolean sendToOtherReplica)
{
ColumnFamily remoteMerger = null;
for (Column c : cf)
{
if (!(c instanceof CounterColumn))
continue;
CounterColumn cc = (CounterColumn) c;
CounterColumn shardMerger = cc.computeOldShardMerger(mergeBefore);
CounterColumn merged = cc;
if (shardMerger != null)
{
merged = (CounterColumn) cc.reconcile(shardMerger);
if (remoteMerger == null)
remoteMerger = cf.cloneMeShallow();
remoteMerger.addColumn(merged);
}
CounterColumn cleaned = merged.removeOldShards(gcBefore);
if (cleaned != cc)
{
cf.replace(cc, cleaned);
}
}
if (remoteMerger != null && sendToOtherReplica)
{
try
{
sendToOtherReplica(key, remoteMerger);
}
catch (Exception e)
{
logger.error("Error while sending shard merger mutation to remote endpoints", e);
}
}
}
public Column markDeltaToBeCleared()
{
return new CounterColumn(name, contextManager.markDeltaToBeCleared(value), timestamp, timestampOfLastDelete);
}
private static void sendToOtherReplica(DecoratedKey key, ColumnFamily cf) throws RequestExecutionException
{
RowMutation rm = new RowMutation(cf.metadata().ksName, key.key, cf);
final InetAddress local = FBUtilities.getBroadcastAddress();
String localDataCenter = DatabaseDescriptor.getEndpointSnitch().getDatacenter(local);
StorageProxy.performWrite(rm, ConsistencyLevel.ANY, localDataCenter, new StorageProxy.WritePerformer()
{
public void apply(IMutation mutation, Iterable<InetAddress> targets, AbstractWriteResponseHandler responseHandler, String localDataCenter, ConsistencyLevel consistency_level)
throws OverloadedException
{
// We should only send to the remote replica, not the local one
Set<InetAddress> remotes = Sets.difference(ImmutableSet.copyOf(targets), ImmutableSet.of(local));
// Fake local response to be a good lad but we won't wait on the responseHandler
responseHandler.response(null);
StorageProxy.sendToHintedEndpoints((RowMutation) mutation, remotes, responseHandler, localDataCenter, consistency_level);
}
}, null, WriteType.SIMPLE);
// we don't wait for answers
}
}