/*- * See the file LICENSE for redistribution information. * * Copyright (c) 2002-2006 * Sleepycat Software. All rights reserved. * * $Id: FSyncManager.java,v 1.1 2006/05/06 08:59:58 ckaestne Exp $ */ package com.sleepycat.je.log; import com.sleepycat.je.DatabaseException; import com.sleepycat.je.EnvironmentStats; import com.sleepycat.je.RunRecoveryException; import com.sleepycat.je.StatsConfig; import com.sleepycat.je.config.EnvironmentParams; import com.sleepycat.je.dbi.EnvironmentImpl; import com.sleepycat.je.latch.Latch; import com.sleepycat.je.latch.LatchSupport; import com.sleepycat.je.utilint.PropUtil; /* * The FsyncManager ensures that only one file fsync is issued at a time, for * performance optimization. The goal is to reduce the number of fsyncs issued * by the system by issuing 1 fsync on behalf of a number of threads. * * For example, suppose these writes happen which all need to be fsynced to * disk: * * thread 1 writes a commit record * thread 2 writes a checkpoint * thread 3 writes a commit record * thread 4 writes a commit record * thread 5 writes a checkpoint * * Rather than executing 5 fsyncs, which all must happen synchronously, we hope * to issue fewer. How many fewer depend on timing. Note that the writes * themselves are serialized and are guaranteed to run in order. * * For example: * thread 1 wants to fsync first, no other fsync going on, will issue fsync * thread 2 waits * thread 3 waits * thread 4 waits * - before thread 5 comes, thread 1 finishes fsyncing and returns to * the caller. Now another fsync can be issued that will cover threads * 2,3,4. One of those threads (2, 3, 4} issues the fsync, the others * block. * thread 5 wants to fsync, but sees one going on, so will wait. * - the fsync issued for 2,3,4 can't cover thread 5 because we're not sure * if thread 5's write finished before that fsync call. Thread 5 will have * to issue its own fsync. * * Target file * ----------- * Note that when the buffer pool starts a new file, we fsync the previous file * under the log write latch. Therefore, at any time we only have one target * file to fsync, which is the current write buffer. We do this so that we * don't have to coordinate between files. For example, suppose log files have * 1000 bytes and a commit record is 10 bytes. An LSN of value 6/990 is in * file 6 at offset 990. * * thread 1: logWriteLatch.acquire() * write commit record to LSN 6/980 * logWriteLatch.release() * thread 2: logWriteLatch.acquire() * write commit record to LSN 6/990 * logWriteLatch.release * thread 3: logWriteLatch.acquire() * gets 7/000 as the next LSN to use * see that we flipped to a new file, so call fsync on file 6 * write commit record to LSN 7/000 * logWriteLatch.release() * * Thread 3 will fsync file 6 within the log write latch. That way, at any * time, any non-latched fsyncs should only fsync the latest file. If we * didn't do, there's the chance that thread 3 would fsync file 7 and return to * its caller before the thread 1 and 2 got an fsync for file 6. That wouldn't * be correct, because thread 3's commit might depend on file 6. * * Note that the FileManager keeps a file descriptor that corresponds to the * current end of file, and that is what we fsync. */ class FSyncManager { private EnvironmentImpl envImpl; private long timeout; /* Use as the target for a synchronization block. */ private Latch fsyncLatch; private volatile boolean fsyncInProgress; private FSyncGroup nextFSyncWaiters; /* stats */ private long nFSyncRequests = 0; private long nFSyncs = 0; private long nTimeouts = 0; FSyncManager(EnvironmentImpl envImpl) throws DatabaseException { timeout = PropUtil.microsToMillis(envImpl.getConfigManager().getLong( EnvironmentParams.LOG_FSYNC_TIMEOUT)); this.envImpl = envImpl; fsyncLatch = LatchSupport.makeLatch("fsyncLatch", envImpl); fsyncInProgress = false; nextFSyncWaiters = new FSyncGroup(timeout, envImpl); } /** * Request that this file be fsynced to disk. This thread may or may not * actually execute the fsync, but will not return until a fsync has been * issued and executed on behalf of its write. There is a timeout period * specified by EnvironmentParam.LOG_FSYNC_TIMEOUT that ensures that no * thread gets stuck here indefinitely. * * When a thread comes in, it will find one of two things. * 1. There is no fsync going on right now. This thread should go * ahead and fsync. * 2. There is an active fsync, wait until it's over before * starting a new fsync. * * When a fsync is going on, all those threads that come along are grouped * together as the nextFsyncWaiters. When the current fsync is finished, * one of those nextFsyncWaiters will be selected as a leader to issue the * next fsync. The other members of the group will merely wait until the * fsync done on their behalf is finished. * * When a thread finishes a fsync, it has to: * 1. wake up all the threads that were waiting for its fsync call. * 2. wake up one member of the next group of waiting threads (the * nextFsyncWaiters) so that thread can become the new leader * and issue the next fysnc call. * * If a non-leader member of the nextFsyncWaiters times out, it will issue * its own fsync anyway, in case something happened to the leader. */ void fsync() throws DatabaseException { boolean doFsync = false; boolean isLeader = false; boolean needToWait = false; FSyncGroup inProgressGroup = null; FSyncGroup myGroup = null; synchronized (fsyncLatch) { nFSyncRequests++; /* Figure out if we're calling fsync or waiting. */ if (fsyncInProgress) { needToWait = true; myGroup = nextFSyncWaiters; } else { isLeader = true; doFsync = true; fsyncInProgress = true; inProgressGroup = nextFSyncWaiters; nextFSyncWaiters = new FSyncGroup(timeout, envImpl); } } if (needToWait) { /* * Note that there's no problem if we miss the notify on this set * of waiters. We can check state in the FSyncGroup before we begin * to wait. * * All members of the group may return from their waitForFSync() * call with the need to do a fsync, because of timeout. Only one * will return as the leader. */ int waitStatus = myGroup.waitForFsync(); if (waitStatus == FSyncGroup.DO_LEADER_FSYNC) { synchronized (fsyncLatch) { /* * Check if there's a fsync in progress; this might happen * even if you were designated the leader if a new thread * came in between the point when the old leader woke you * up and now. This new thread may have found that there * was no fsync in progress, and may have started a fsync. */ if (!fsyncInProgress) { isLeader = true; doFsync = true; fsyncInProgress = true; inProgressGroup = myGroup; nextFSyncWaiters = new FSyncGroup(timeout, envImpl); } } } else if (waitStatus == FSyncGroup.DO_TIMEOUT_FSYNC) { doFsync = true; synchronized (fsyncLatch) { nTimeouts++; } } } if (doFsync) { /* * There are 3 ways that this fsync gets called: * * 1. A thread calls sync and there is not a sync call already in * progress. That thread executes fsync for itself only. Other * threads requesting sync form a group of waiters. * * 2. A sync finishes and wakes up a group of waiters. The first * waiter in the group to wake up becomes the leader. It executes * sync for it's group of waiters. As above, other threads * requesting sync form a new group of waiters. * * 3. If members of a group of waiters have timed out, they'll all * just go and do their own sync for themselves. */ executeFSync(); synchronized (fsyncLatch) {//TODO synchronized outside nFSyncs++; if (isLeader) { /* * Wake up the group that requested the fsync before you * started. They've piggybacked off your fsync. */ inProgressGroup.wakeupAll(); /* * Wake up a single waiter, who will become the next * leader. */ nextFSyncWaiters.wakeupOne(); fsyncInProgress = false; } } } } /* * Stats. */ long getNFSyncRequests() { return nFSyncRequests; } long getNFSyncs() { return nFSyncs; } long getNTimeouts() { return nTimeouts; } void loadStats(StatsConfig config, EnvironmentStats stats) throws DatabaseException { stats.setNFSyncs(nFSyncs); stats.setNFSyncRequests(nFSyncRequests); stats.setNFSyncTimeouts(nTimeouts); if (config.getClear()) { nFSyncs = 0; nFSyncRequests = 0; nTimeouts = 0; } } /** * Put the fsync execution into this method so it can be overridden for * testing purposes. */ protected void executeFSync() throws DatabaseException { envImpl.getFileManager().syncLogEnd(); } /* * Embodies a group of threads waiting for a common fsync. Note that * there's no collection here; group membership is merely that the threads * are all waiting on the same monitor. */ static class FSyncGroup { static int DO_TIMEOUT_FSYNC = 0; static int DO_LEADER_FSYNC = 1; static int NO_FSYNC_NEEDED = 2; private volatile boolean fsyncDone; private long fsyncTimeout; private boolean leaderExists; private EnvironmentImpl envImpl; FSyncGroup(long fsyncTimeout, EnvironmentImpl envImpl) { this.fsyncTimeout = fsyncTimeout; fsyncDone = false; leaderExists = false; this.envImpl = envImpl; } synchronized boolean getLeader() { if (fsyncDone) { return false; } else { if (leaderExists) { return false; } else { leaderExists = true; return true; } } } /** * Wait for either a turn to execute a fsync, or to find out that a * fsync was done on your behalf. * * @return true if the fsync wasn't done, and this thread needs to * execute a fsync when it wakes up. This may be true because it's the * leader of its group, or because the wait timed out. */ synchronized int waitForFsync() throws RunRecoveryException { int status = 0; if (!fsyncDone) { long startTime = System.currentTimeMillis(); while (true) { try { wait(fsyncTimeout); } catch (InterruptedException e) { throw new RunRecoveryException(envImpl, "Unexpected interrupt while waiting for fsync", e); } /* * This thread was awoken either by a timeout, by a notify, * or by an interrupt. Is the fsync done? */ if (fsyncDone) { /* The fsync we're waiting on is done, leave. */ status = NO_FSYNC_NEEDED; break; } else { /* * The fsync is not done -- were we woken up to become * the leader? */ if (!leaderExists) { leaderExists = true; status = DO_LEADER_FSYNC; break; } else { /* * We're just a waiter. See if we're timed out or * have more to wait. */ long now = System.currentTimeMillis(); if ((now - startTime) > fsyncTimeout) { /* we timed out. */ status = DO_TIMEOUT_FSYNC; break; } } } } } return status; } synchronized void wakeupAll() { fsyncDone = true; notifyAll(); } synchronized void wakeupOne() { /* FindBugs whines here. */ notify(); } } }