/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.cassandra.db.commitlog;
import java.io.ByteArrayOutputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FilenameFilter;
import java.io.IOException;
import java.lang.management.ManagementFactory;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import java.util.concurrent.Callable;
import java.util.concurrent.Future;
import javax.management.MBeanServer;
import javax.management.ObjectName;
import org.apache.cassandra.config.Config;
import org.apache.cassandra.config.DatabaseDescriptor;
import org.apache.cassandra.db.RowMutation;
import org.apache.cassandra.io.FSWriteError;
import org.apache.cassandra.metrics.CommitLogMetrics;
import org.apache.cassandra.net.MessagingService;
import org.apache.cassandra.service.CassandraDaemon;
import org.apache.cassandra.utils.FBUtilities;
import org.apache.commons.lang3.StringUtils;
import org.apache.zookeeper.KeeperException.NoNodeException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/*
* Commit Log tracks every write operation into the system. The aim of the commit log is to be able to
* successfully recover data that was not stored to disk via the Memtable.
*/
public class CommitLog implements CommitLogMBean
{
private static final Logger logger = LoggerFactory.getLogger(CommitLog.class);
public static final CommitLog instance = new CommitLog();
private final ICommitLogExecutorService executor;
public final CommitLogAllocator allocator;
public final CommitLogArchiver archiver = new CommitLogArchiver();
public static final int END_OF_SEGMENT_MARKER = 0; // this is written out at the end of a segment
public static final int END_OF_SEGMENT_MARKER_SIZE = 4; // number of bytes of ^^^
public CommitLogSegment activeSegment;
private final CommitLogMetrics metrics;
protected static final String CUR_VER = System.getProperty("cassandra.version", "2.0");
protected static final Map<String, Integer> VERSION_MAP = new HashMap<String, Integer> ()
{{
put("0.7", 1);
put("1.0", 3);
put("1.2", MessagingService.VERSION_12);
put("2.0", MessagingService.VERSION_20);
}};
protected final int getVersion()
{
return VERSION_MAP.get(CUR_VER);
}
private CommitLog()
{
DatabaseDescriptor.createAllDirectories();
allocator = new CommitLogAllocator();
activateNextSegment();
executor = DatabaseDescriptor.getCommitLogSync() == Config.CommitLogSync.batch
? new BatchCommitLogExecutorService()
: new PeriodicCommitLogExecutorService(this);
MBeanServer mbs = ManagementFactory.getPlatformMBeanServer();
try
{
mbs.registerMBean(this, new ObjectName("org.apache.cassandra.db:type=Commitlog"));
}
catch (Exception e)
{
throw new RuntimeException(e);
}
// register metrics
metrics = new CommitLogMetrics(executor, allocator);
}
/**
* FOR TESTING PURPOSES. See CommitLogAllocator.
*/
public void resetUnsafe()
{
allocator.resetUnsafe();
activateNextSegment();
}
/**
* Perform recovery on commit logs located in the directory specified by the config file.
*
* @return the number of mutations replayed
*/
public int recover() throws IOException
{
archiver.maybeRestoreArchive();
File[] files = new File(DatabaseDescriptor.getCommitLogLocation()).listFiles(new FilenameFilter()
{
public boolean accept(File dir, String name)
{
// we used to try to avoid instantiating commitlog (thus creating an empty segment ready for writes)
// until after recover was finished. this turns out to be fragile; it is less error-prone to go
// ahead and allow writes before recover(), and just skip active segments when we do.
return CommitLogDescriptor.isValid(name) && !instance.allocator.manages(name);
}
});
int replayed = 0;
if (files.length == 0)
{
logger.info("No commitlog files found; skipping replay");
}
else
{
Arrays.sort(files, new CommitLogSegment.CommitLogSegmentFileComparator());
logger.info("Replaying {}", StringUtils.join(files, ", "));
replayed = recover(files);
logger.info("Log replay complete, {} replayed mutations", replayed);
for (File f : files)
CommitLog.instance.allocator.recycleSegment(f);
}
allocator.enableReserveSegmentCreation();
return replayed;
}
/**
* Perform recovery on a list of commit log files.
*
* @param clogs the list of commit log files to replay
* @return the number of mutations replayed
*/
public int recover(File... clogs) throws IOException
{
CommitLogReplayer recovery = new CommitLogReplayer();
recovery.recover(clogs);
return recovery.blockForWrites();
}
/**
* Perform recovery on a single commit log.
*/
public void recover(String path) throws IOException
{
recover(new File(path));
}
/**
* @return a Future representing a ReplayPosition such that when it is ready,
* all commitlog tasks enqueued prior to the getContext call will be complete (i.e., appended to the log)
*/
public Future<ReplayPosition> getContext()
{
Callable<ReplayPosition> task = new Callable<ReplayPosition>()
{
public ReplayPosition call()
{
return activeSegment.getContext();
}
};
return executor.submit(task);
}
/**
* Used by tests.
*
* @return the number of active segments (segments with unflushed data in them)
*/
public int activeSegments()
{
return allocator.getActiveSegments().size();
}
/**
* Add a RowMutation to the commit log.
*
* @param rm the RowMutation to add to the log
*/
/*
* pgaref - ZKServer add commitLog Entry!
*/
public static long log_count = 0;
public void add(RowMutation rm)
{
if(org.apache.cassandra.service.CassandraDaemon.ZooServer != null){
if(CassandraDaemon.ZooServer.getServerState().equalsIgnoreCase("LEADING")){
logger.debug("pgaref - LEADER: adding rowmutation in the CommitLog");
ByteArrayOutputStream baos = new ByteArrayOutputStream();
try {
DataOutputStream out = new DataOutputStream(baos);
RowMutation.serializer.serialize(rm, out, getVersion());
out.close();
} catch (IOException e) {
logger.error("pgaref - LEADER: Serializer exception!");
e.printStackTrace();
}
org.apache.cassandra.service.CassandraDaemon.ZooServer.insertPersistent("/cassandra"+String.format("%015d", ++log_count), baos.toByteArray());
/*pgaref Master - Now Delete previous Znode !!
if(log_count > 1L){
//Its the first Znode!
long tmp = (log_count - 2L);
try {
org.apache.cassandra.service.CassandraDaemon.ZooServer.delete("/cassandra"+String.format("%015d", tmp), tmp);
} catch (NoNodeException e) {
logger.error("pgaref - CaZoo M Cannot delete previous Znode!!! : " +tmp +" | "+ e.toString());
}
}*/
}
else{
logger.debug("pgaref - Follower - Commitlog called!!!");
}
}
/*
* LEGACY CODE --- pgaref!
*
if(QuorumPeerMain.getQuorumPeer().getServerState().equalsIgnoreCase("LEADING")){
logger.info("pgaref - Master: adding rowmutation in the CommitLog");
ByteArrayOutputStream baos = new ByteArrayOutputStream();
try {
DataOutputStream out = new DataOutputStream(baos);
RowMutation.serializer.serialize(rm, out, getVersion());
out.close();
} catch (IOException e) {
logger.info("pgaref - Master: Serializer exception!");
e.printStackTrace();
}
if(baos.size() >0){
// Debug 1, 3, 8 => 1l
Request foo = new Request(null, 1l, 1, OpCode.create, ByteBuffer.wrap(baos.toByteArray()), null);
try {
ZooKeeperServer.getRequestProcessor().processRequest(foo);
} catch (RequestProcessorException e) {
logger.info("pgaref - Master: (send) Process Request exception!");
}
}*/
/*
* Client mode
try {
ZooKeeper zk = new ZooKeeper("127.0.0.1:2181", 10000, this);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
DataOutputStream out = new DataOutputStream(baos);
RowMutation.serializer.serialize(rm, out, getVersion());
out.close();
logger.info("pgaref- Write Serialized : "+ baos.size() +" : "+ baos.toString());
zk.create("/cazoo", baos.toByteArray(), Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT_SEQUENTIAL);
zk.close();
} catch (KeeperException ke) {
logger.info("CaZoo KeeperException "+ke);
}catch (InterruptedException ke1) {
logger.info("CaZoo InterruptedException "+ ke1);
}catch (IOException ke2) {
logger.info("CaZoo IOException "+ke2);
}
}*/
executor.add(new LogRecordAdder(rm));
}
/**
* Modifies the per-CF dirty cursors of any commit log segments for the column family according to the position
* given. Discards any commit log segments that are no longer used.
*
* @param cfId the column family ID that was flushed
* @param context the replay position of the flush
*/
public void discardCompletedSegments(final UUID cfId, final ReplayPosition context)
{
Callable task = new Callable()
{
public Object call()
{
logger.debug("discard completed log segments for {}, column family {}", context, cfId);
// Go thru the active segment files, which are ordered oldest to newest, marking the
// flushed CF as clean, until we reach the segment file containing the ReplayPosition passed
// in the arguments. Any segments that become unused after they are marked clean will be
// recycled or discarded.
for (Iterator<CommitLogSegment> iter = allocator.getActiveSegments().iterator(); iter.hasNext();)
{
CommitLogSegment segment = iter.next();
segment.markClean(cfId, context);
// If the segment is no longer needed, and we have another spare segment in the hopper
// (to keep the last segment from getting discarded), pursue either recycling or deleting
// this segment file.
if (iter.hasNext())
{
if (segment.isUnused())
{
logger.debug("Commit log segment {} is unused", segment);
allocator.recycleSegment(segment);
}
else
{
logger.debug("Not safe to delete commit log segment {}; dirty is {}",
segment, segment.dirtyString());
}
}
else
{
logger.debug("Not deleting active commitlog segment {}", segment);
}
// Don't mark or try to delete any newer segments once we've reached the one containing the
// position of the flush.
if (segment.contains(context))
break;
}
return null;
}
};
FBUtilities.waitOnFuture(executor.submit(task));
}
/**
* Forces a disk flush on the commit log files that need it.
*/
public void sync()
{
for (CommitLogSegment segment : allocator.getActiveSegments())
{
segment.sync();
}
}
/**
* @return the number of tasks completed by the commit log executor
*/
public long getCompletedTasks()
{
return metrics.completedTasks.value();
}
/**
* @return the depth of pending commit log executor queue
*/
public long getPendingTasks()
{
return metrics.pendingTasks.value();
}
/**
* @return the total size occupied by commitlo segments expressed in bytes. (used by MBean)
*/
public long getTotalCommitlogSize()
{
return metrics.totalCommitLogSize.value();
}
/**
* Fetches a new segment file from the allocator and activates it.
*
* @return the newly activated segment
*/
private void activateNextSegment()
{
activeSegment = allocator.fetchSegment();
logger.debug("Active segment is now {}", activeSegment);
}
public List<String> getActiveSegmentNames()
{
List<String> segmentNames = new ArrayList<String>();
for (CommitLogSegment segment : allocator.getActiveSegments())
segmentNames.add(segment.getName());
return segmentNames;
}
public List<String> getArchivingSegmentNames()
{
return new ArrayList<String>(archiver.archivePending.keySet());
}
/**
* Shuts down the threads used by the commit log, blocking until completion.
*/
public void shutdownBlocking() throws InterruptedException
{
executor.shutdown();
executor.awaitTermination();
allocator.shutdown();
allocator.awaitTermination();
}
// TODO this should be a Runnable since it doesn't actually return anything, but it's difficult to do that
// without breaking the fragile CheaterFutureTask in BatchCLES.
class LogRecordAdder implements Callable, Runnable
{
final RowMutation rowMutation;
LogRecordAdder(RowMutation rm)
{
this.rowMutation = rm;
}
public void run()
{
long totalSize = RowMutation.serializer.serializedSize(rowMutation, MessagingService.current_version) + CommitLogSegment.ENTRY_OVERHEAD_SIZE;
if (totalSize > DatabaseDescriptor.getCommitLogSegmentSize())
{
logger.warn("Skipping commitlog append of extremely large mutation ({} bytes)", totalSize);
return;
}
if (!activeSegment.hasCapacityFor(totalSize))
{
CommitLogSegment oldSegment = activeSegment;
activateNextSegment();
// Now we can run the user defined command just before switching to the new commit log.
// (Do this here instead of in the recycle call so we can get a head start on the archive.)
archiver.maybeArchive(oldSegment.getPath(), oldSegment.getName());
}
try
{
activeSegment.write(rowMutation);
}
catch (IOException e)
{
throw new FSWriteError(e, activeSegment.getPath());
}
}
public Object call()
{
run();
return null;
}
}
/*
* pgaref - Watcher
* @see org.apache.zookeeper.Watcher#process(org.apache.zookeeper.WatchedEvent)
@Override
synchronized public void process(WatchedEvent event) {
System.out.println("CaZoo: Got a Write event " + event.toString());
}*/
}