/* * Bitronix Transaction Manager * * Copyright (c) 2010, Bitronix Software. * * This copyrighted material is made available to anyone wishing to use, modify, * copy, or redistribute it subject to the terms and conditions of the GNU * Lesser General Public License, as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this distribution; if not, write to: * Free Software Foundation, Inc. * 51 Franklin Street, Fifth Floor * Boston, MA 02110-1301 USA */ package bitronix.tm.journal; import java.io.File; import java.io.IOException; import java.io.RandomAccessFile; import java.util.Collection; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.NoSuchElementException; import java.util.Set; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicReference; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReadWriteLock; import java.util.concurrent.locks.ReentrantLock; import java.util.concurrent.locks.ReentrantReadWriteLock; import javax.transaction.Status; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import bitronix.tm.BitronixXid; import bitronix.tm.Configuration; import bitronix.tm.TransactionManagerServices; import bitronix.tm.utils.Decoder; import bitronix.tm.utils.MonotonicClock; import bitronix.tm.utils.Uid; /** * Simple implementation of a journal that writes on a two-files disk log. * <p>Files are pre-allocated in size, never grow and when the first one is full, dangling records are copied to the * second file and logging starts again on the latter.</p> * <p>This implementation is not highly efficient but quite robust and simple. It is based on one of the implementations * proposed by Mike Spille.</p> * <p>Configurable properties are all starting with <code>bitronix.tm.journal.disk</code>.</p> * * @see bitronix.tm.Configuration * @see <a href="http://jroller.com/page/pyrasun?entry=xa_exposed_part_iii_the">XA Exposed, Part III: The Implementor's Notebook</a> * @author lorban */ public class DiskJournal implements Journal, MigratableJournal, ReadableJournal { private final static Logger log = LoggerFactory.getLogger(DiskJournal.class); /** * The active log appender. This is exactly the same reference as tla1 or tla2 depending on which one is * currently active */ private AtomicReference<TransactionLogAppender> activeTla; /** * The transaction log appender writing on the 1st file */ private TransactionLogAppender tla1; /** * The transaction log appender writing on the 2nd file */ private TransactionLogAppender tla2; private long maxFileLength; private boolean conservativeJournaling; private Lock journalLock = new ReentrantLock(); private ReadWriteLock swapForceLock = new ReentrantReadWriteLock(true); private Object positionLock = new Object(); private AtomicBoolean needsForce; private Configuration configuration; /** * Create an uninitialized disk journal. You must call open() prior you can use it. */ public DiskJournal() { configuration = TransactionManagerServices.getConfiguration(); needsForce = new AtomicBoolean(); activeTla = new AtomicReference<TransactionLogAppender>(); } /** * Log a new transaction status to journal. Note that the DiskJournal will not check the flow of the transaction. * If you call this method with erroneous data, it will be added to the journal anyway. * * @param status transaction status to log. See {@link javax.transaction.Status} constants. * @param gtrid raw GTRID of the transaction. * @param uniqueNames unique names of the {@link bitronix.tm.resource.common.ResourceBean}s participating in * this transaction. * @throws java.io.IOException in case of disk IO failure or if the disk journal is not open. */ public void log(int status, Uid gtrid, Set<String> uniqueNames) throws IOException { if (activeTla.get() == null) throw new IOException("cannot write log, disk logger is not open"); if (configuration.isFilterLogStatus()) { if (status != Status.STATUS_COMMITTING && status != Status.STATUS_COMMITTED && status != Status.STATUS_UNKNOWN) { if (log.isDebugEnabled()) log.debug("filtered out write to log for status " + Decoder.decodeStatus(status)); return; } } TransactionLogRecord tlog = new TransactionLogRecord(status, gtrid, uniqueNames); try { if (conservativeJournaling) { journalLock.lock(); } synchronized (positionLock) { boolean rollover = activeTla.get().setPositionAndAdvance(tlog); if (rollover) { // time to swap log files try { swapForceLock.writeLock().lock(); swapJournalFiles(); activeTla.get().setPositionAndAdvance(tlog); } finally { swapForceLock.writeLock().unlock(); } } swapForceLock.readLock().lock(); } try { activeTla.get().writeLog(tlog); needsForce.set(true); } finally { swapForceLock.readLock().unlock(); } } finally { if (conservativeJournaling) { journalLock.unlock(); } } } /** * Force active log file to synchronize with the underlying disk device. * * @throws java.io.IOException in case of disk IO failure or if the disk journal is not open. */ public void force() throws IOException { if (activeTla.get() == null) throw new IOException("cannot force log writing, disk logger is not open"); if (needsForce.get() && configuration.isForcedWriteEnabled()) { swapForceLock.writeLock().lock(); try { activeTla.get().force(); needsForce.set(false); } finally { swapForceLock.writeLock().unlock(); } } } /** * Open the disk journal. Files are checked for integrity and DiskJournal will refuse to open corrupted log files. * If files are not present on disk, this method will create and pre-allocate them. * * @throws java.io.IOException in case of disk IO failure. */ public synchronized void open() throws IOException { if (activeTla.get() != null) { log.warn("disk journal already open"); return; } conservativeJournaling = configuration.isConservativeJournaling(); File file1 = new File(configuration.getLogPart1Filename()); File file2 = new File(configuration.getLogPart2Filename()); if (!file1.exists() && !file2.exists()) { log.debug("creation of log files"); createLogfile(file2, configuration.getMaxLogSizeInMb()); // make the clock run a little before creating the 2nd log file to ensure the timestamp headers are not the same long before = MonotonicClock.currentTimeMillis(); while (MonotonicClock.currentTimeMillis() < before + 100L) { try { Thread.sleep(100); } catch (InterruptedException ex) { /* ignore */ } } createLogfile(file1, configuration.getMaxLogSizeInMb()); } if (file1.length() != file2.length()) { if (!configuration.isSkipCorruptedLogs()) throw new IOException("transaction log files are not of the same length, assuming they're corrupt"); log.error("transaction log files are not of the same length: corrupted files?"); } maxFileLength = Math.max(file1.length(), file2.length()); if (log.isDebugEnabled()) log.debug("disk journal files max length: " + maxFileLength); tla1 = new TransactionLogAppender(file1, maxFileLength); tla2 = new TransactionLogAppender(file2, maxFileLength); byte cleanStatus = pickActiveJournalFile(tla1, tla2); if (cleanStatus != TransactionLogHeader.CLEAN_LOG_STATE) { log.warn("active log file is unclean, did you call BitronixTransactionManager.shutdown() at the end of the last run?"); } if (log.isDebugEnabled()) log.debug("disk journal opened"); } /** * Close the disk journal and the underlying files. * * @throws java.io.IOException in case of disk IO failure. */ public synchronized void close() throws IOException { if (activeTla.get() == null) { return; } try { tla1.close(); } catch (IOException ex) { log.error("cannot close " + tla1, ex); } tla1 = null; try { tla2.close(); } catch (IOException ex) { log.error("cannot close " + tla2, ex); } tla2 = null; activeTla.set(null); if (log.isDebugEnabled()) log.debug("disk journal closed"); } public void shutdown() { try { close(); } catch (IOException ex) { log.error("error shutting down disk journal. Transaction log integrity could be compromised!", ex); } } /** * Collect all dangling records of the active log file. * * @return a Map using Uid objects GTRID as key and {@link TransactionLogRecord} as value * @throws java.io.IOException in case of disk IO failure or if the disk journal is not open. */ public Map<Uid, JournalRecord> collectDanglingRecords() throws IOException { if (activeTla.get() == null) throw new IOException("cannot collect dangling records, disk logger is not open"); return collectDanglingRecords(activeTla.get()); } /** * {@inheritDoc} */ public void migrateTo(Journal other) throws IOException, IllegalArgumentException { if (other == this) throw new IllegalArgumentException("Cannot migrate a journal to itself (this == otherJournal)."); if (other == null) throw new IllegalArgumentException("The migration target journal may not be 'null'."); for (Object record : collectDanglingRecords().values()) { JournalRecord jr = (JournalRecord) record; other.log(jr.getStatus(), jr.getGtrid(), jr.getUniqueNames()); } } /** * {@inheritDoc} */ public synchronized void unsafeReadRecordsInto(Collection<JournalRecord> target, boolean includeInvalid) throws IOException { if (activeTla.get() == null) throw new IOException("cannot read records, disk logger is not open"); for (Iterator<TransactionLogRecord> i = iterateRecords(activeTla.get(), includeInvalid); i.hasNext(); ) target.add(i.next()); } /* * Internal impl. */ /** * Create a fresh log file on disk. If the specified file already exists it will be deleted then recreated. * @param logfile the file to create * @param maxLogSizeInMb the file size in megabytes to preallocate * @throws java.io.IOException in case of disk IO failure. */ private static void createLogfile(File logfile, int maxLogSizeInMb) throws IOException { if (logfile.isDirectory()) throw new IOException("log file is referring to a directory: " + logfile.getAbsolutePath()); if (logfile.exists()) { boolean deleted = logfile.delete(); if (!deleted) throw new IOException("log file exists but cannot be overwritten: " + logfile.getAbsolutePath()); } if (logfile.getParentFile() != null) { logfile.getParentFile().mkdirs(); } RandomAccessFile raf = null; try { raf = new RandomAccessFile(logfile, "rw"); raf.seek(TransactionLogHeader.FORMAT_ID_HEADER); raf.writeInt(BitronixXid.FORMAT_ID); raf.writeLong(MonotonicClock.currentTimeMillis()); raf.writeByte(TransactionLogHeader.CLEAN_LOG_STATE); raf.writeLong((long) TransactionLogHeader.HEADER_LENGTH); byte[] buffer = new byte[4096]; int length = (maxLogSizeInMb *1024 *1024) /4096; for (int i = 0; i < length; i++) { raf.write(buffer); } } finally { if (raf != null) raf.close(); } } /** * Initialize the activeTla member variable with the TransactionLogAppender object having the latest timestamp * header. * * @param tla1 the first of the two candidate active TransactionLogAppenders * @param tla2 the second of the two candidate active TransactionLogAppenders * @return the state of the designated active TransactionLogAppender as returned by TransactionLogHeader.getState() * @throws java.io.IOException in case of disk IO failure. * @see TransactionLogHeader */ private synchronized byte pickActiveJournalFile(TransactionLogAppender tla1, TransactionLogAppender tla2) throws IOException { if (tla1.getTimestamp() > tla2.getTimestamp()) { activeTla.set(tla1); if (log.isDebugEnabled()) log.debug("logging to file 1: " + activeTla); } else { activeTla.set(tla2); if (log.isDebugEnabled()) log.debug("logging to file 2: " + activeTla); } byte cleanState = activeTla.get().getState(); activeTla.get().setState(TransactionLogHeader.UNCLEAN_LOG_STATE); if (log.isDebugEnabled()) log.debug("log file activated, forcing file state to disk"); activeTla.get().force(); return cleanState; } /** * <p>Swap the active and the passive journal files so that the active one becomes passive and the passive one * becomes active.</p> * List of actions taken by this method: * <ul> * <li>ensure the all data has been forced to the active log file.</li> * <li>copy dangling COMMITTING records to the passive log file.</li> * <li>update header timestamp of passive log file (makes it become active).</li> * <li>do a force on passive log file. It is now the active file.</li> * <li>switch references of active/passive files.</li> * </ul> * * @throws java.io.IOException in case of disk IO failure. */ private synchronized void swapJournalFiles() throws IOException { if (log.isDebugEnabled()) log.debug("swapping journal log file to " + getPassiveTransactionLogAppender()); //step 1 activeTla.get().force(); //step 2 TransactionLogAppender passiveTla = getPassiveTransactionLogAppender(); passiveTla.rewind(); List<TransactionLogRecord> danglingLogs = activeTla.get().getDanglingLogs(); for (TransactionLogRecord tlog : danglingLogs) { boolean rolloverError = passiveTla.setPositionAndAdvance(tlog); if (rolloverError) { log.error("Moving in-flight transactions the rollover log file would have resulted in an overflow of that file."); break; } passiveTla.writeLog(tlog); } if (log.isDebugEnabled()) log.debug(danglingLogs.size() + " dangling record(s) copied to passive log file"); activeTla.get().clearDanglingLogs(); //step 3 passiveTla.setTimestamp(MonotonicClock.currentTimeMillis()); //step 4 passiveTla.force(); //step 5 activeTla.set(passiveTla); if (log.isDebugEnabled()) log.debug("journal log files swapped"); } /** * @return the TransactionFileAppender of the passive journal file. */ private TransactionLogAppender getPassiveTransactionLogAppender() { return (tla1 == activeTla.get() ? tla2 : tla1); } /** * Create a Map of TransactionLogRecord with COMMITTING status objects using the GTRID byte[] as key that have * no corresponding COMMITTED record * * @param tla the TransactionLogAppender to scan * @return a Map using Uid objects GTRID as key and {@link TransactionLogRecord} as value * @throws java.io.IOException in case of disk IO failure. */ private static Map<Uid, JournalRecord> collectDanglingRecords(TransactionLogAppender tla) throws IOException { Map<Uid, JournalRecord> danglingRecords = new HashMap<Uid, JournalRecord>(64); TransactionLogCursor tlc = tla.getCursor(); try { int committing = 0; int committed = 0; while (true) { TransactionLogRecord tlog; try { tlog = tlc.readLog(); } catch (CorruptedTransactionLogException ex) { if (TransactionManagerServices.getConfiguration().isSkipCorruptedLogs()) { log.error("skipping corrupted log", ex); continue; } throw ex; } if (tlog == null) break; int status = tlog.getStatus(); if (status == Status.STATUS_COMMITTING) { danglingRecords.put(tlog.getGtrid(), tlog); committing++; } // COMMITTED is when there was no problem in the transaction // UNKNOWN is when a 2PC transaction heuristically terminated // ROLLEDBACK is when a 1PC transaction rolled back during commit if (status == Status.STATUS_COMMITTED || status == Status.STATUS_UNKNOWN || status == Status.STATUS_ROLLEDBACK) { JournalRecord rec = danglingRecords.get(tlog.getGtrid()); if (rec != null) { Set<String> recUniqueNames = new HashSet<String>(rec.getUniqueNames()); recUniqueNames.removeAll(tlog.getUniqueNames()); if (recUniqueNames.isEmpty()) { danglingRecords.remove(tlog.getGtrid()); committed++; } else { danglingRecords.put(tlog.getGtrid(), new TransactionLogRecord(rec.getStatus(), rec.getGtrid(), recUniqueNames)); } } } } if (log.isDebugEnabled()) log.debug("collected dangling records of " + tla + ", committing: " + committing + ", committed: " + committed + ", delta: " + danglingRecords.size()); } finally { tlc.close(); } return danglingRecords; } /** * Implements a low level iterator over all entries contained in the active TX log. * * @param tla the TransactionLogAppender to scan * @param skipCrcCheck sets whether CRC checks are applied or not. * @return an iterator over all contained log records. * @throws java.io.IOException in case of the initial disk IO failed (subsequent errors are unchecked exceptions). */ private static Iterator<TransactionLogRecord> iterateRecords( TransactionLogAppender tla, final boolean skipCrcCheck) throws IOException { final TransactionLogCursor tlc = tla.getCursor(); final Iterator<TransactionLogRecord> it = new Iterator<TransactionLogRecord>() { TransactionLogRecord tlog; public boolean hasNext() { while (tlog == null) { try { try { tlog = tlc.readLog(skipCrcCheck); if (tlog == null) break; } catch (CorruptedTransactionLogException ex) { if (TransactionManagerServices.getConfiguration().isSkipCorruptedLogs()) { log.error("skipping corrupted log", ex); continue; } throw ex; } } catch (IOException e) { throw new RuntimeException(e); } } return tlog != null; } public TransactionLogRecord next() { if (!hasNext()) throw new NoSuchElementException(); try { return tlog; } finally { tlog = null; } } public void remove() { throw new UnsupportedOperationException(); } }; try { it.hasNext(); return it; } catch (RuntimeException e) { if (e.getCause() instanceof IOException) throw (IOException) e.getCause(); throw e; } } }