/* XXL: The eXtensible and fleXible Library for data processing Copyright (C) 2000-2011 Prof. Dr. Bernhard Seeger Head of the Database Research Group Department of Mathematics and Computer Science University of Marburg Germany This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; If not, see <http://www.gnu.org/licenses/>. http://code.google.com/p/xxl/ */ package xxl.core.collections.containers.io; import java.io.BufferedInputStream; import java.io.BufferedOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.io.RandomAccessFile; import java.util.Iterator; import java.util.NoSuchElementException; import xxl.core.collections.containers.AbstractContainer; import xxl.core.functions.Function; import xxl.core.io.Block; import xxl.core.io.FilesystemOperations; import xxl.core.io.JavaFilesystemOperations; import xxl.core.io.RandomAccessFileInputStream; import xxl.core.io.RandomAccessFileOutputStream; import xxl.core.io.converters.ByteConverter; import xxl.core.io.converters.FixedSizeConverter; import xxl.core.io.converters.IntegerConverter; import xxl.core.io.converters.LongConverter; import xxl.core.io.converters.ShortConverter; import xxl.core.util.WrappingRuntimeException; /** * This class provides a container that is able to store blocks in a file. * There are some constructors that create a new container and * there are some other constructors that open an existing container. * All functionality concerning files can be exchanged by * passing different factory methods to the constructors (important * for the usage of own filesystems). * <p> * The container depends on five files: a container file, a meta file, * a reservedBitMap file, a updatedBitMap file and a freeList file.<br> * The container file is used for storing the blocks of this container. In * addition, the offset of an block in the container file determines the * id of the block in this container. Every block stored in the container * file takes <tt>blockSize</tt> bytes, therefore blocks that contains * more than <tt>blockSize</tt> bytes cannot be stored in this container. * The metaData file contains the blockSize and the number of reserved blocks. * The reservedBitMap file is a fat file of this container. * It stores for each block whether it is already in use, i.e. cannot be reserved. * In order to store this table, eight bits are joined to an <tt>int</tt> value. * For this reason the <tt>n</tt>th bit of this file is stored as the <tt>(n%8)</tt> * th bit of the <tt>(n/8)</tt> th <tt>byte</tt> value of this file. The name * of this file is determined by the <tt>String prefix</tt> and the file * extension <tt>.rbm</tt>. Similar to the reservedBitMap file, the * updatedBitMap file stores for each block whether an update has been * performed so that it is possible to get the block. * The freeList file stores the offsets of removed blocks. Whenever a block * should be written to this container, the freeList file is checked, whether * there are empty blocks in the container file. If the freeList file contains * offsets of removed blocks the new block will be stored in an empty space of * the container file, else the container file will be enlarged. * <p> * Earlier versions of xxl worked with only three files. Such Containers * are automatically migrated to the five files version (at the first * use). * <p> * Example usage (1). * <pre> * // create a new block file container with ... * * BlockFileContainer container = new BlockFileContainer( * * // files having the file name "BlockFileContainer" * * "BlockFileContainer", * * // a block size of 4 bytes (size of a serialized integer) * * 4 * ); * * // insert 10 blocks containing the integers between 0 and 9 * * for (int i = 0; i < 10; i++) { * * // create a new block * * Block block = new Block(4); * * // catch IOExceptions * * try { * * // write the value of i to the block * * block.dataOutputStream().write(i); * } * catch (IOException ioe) { * System.out.println("An I/O error occured."); * } * * // insert the block into the block file container * * container.insert(block); * } * * // get the ids of all elements in the container * * Iterator iterator = container.ids(); * * // print all elements of the container * * while (iterator.hasNext()) { * * // get the block from the container * * Block block = (Block)container.get(iterator.next()); * * // catch IOExceptions * * try { * * // print the data of the block * * System.out.println(block.dataInputStream().read()); * } * catch (IOException ioe) { * System.out.println("An I/O error occured."); * } * } * * // close the open container and clear its file after use * * container.close(); * container.clear(); * container.delete(); * </pre> * * @see xxl.core.collections.containers.Container * @see IOException * @see Iterator * @see NoSuchElementException * @see RandomAccessFile * @see WrappingRuntimeException */ public class BlockFileContainer extends AbstractContainer { /** * The name for the files of this container. The file names of the * five files a block file container consists of differ only in their * file extensions. */ protected String prefix; /** * Contains operations for opening, creating, renaming and deleting files. */ protected FilesystemOperations fso; /** * Extensions of the files used. */ public static final String EXTENSIONS[] = new String[] {".mtd", ".rbm", ".ubm", ".flt", ".ctr"}; /** * Constant for the mdf-file (inside the EXTENSIONS-array). */ public static final int MTD_FILE = 0; /** * Constant for the rbm-file (inside the EXTENSIONS-array). */ public static final int RBM_FILE = 1; /** * Constant for the ubm-file (inside the EXTENSIONS-array). */ public static final int UBM_FILE = 2; /** * Constant for the flt-file (inside the EXTENSIONS-array). */ public static final int FLT_FILE = 3; /** * Constant for the ctr-file (inside the EXTENSIONS-array). */ public static final int CTR_FILE = 4; /** * Returns the number of files which are used by the container. * @return number of files */ public static int getNumberOfFiles() { return EXTENSIONS.length; } /** * Returns a string array with the filenames which are used by the container. * @param prefix the beginning of each filename. * @return String array containing the filenames. */ public static String[] getFilenamesUsed(String prefix) { String ar[] = new String[EXTENSIONS.length]; for (int i=0; i<EXTENSIONS.length; i++) ar[i] = new String(prefix+EXTENSIONS[i]); return ar; } /** * The container file of this container. The container file is used * for storing the blocks of this container. In addition, the offset * of an block in the container file determines the id of the block in * this container. The name of the container file is determined by the * <tt>String prefix</tt> and the file extension <tt>.ctr</tt>. */ protected RandomAccessFile container; /** * The metaData file contains the blockSize and the number of reserved * blocks. The name of the container file is determined by the * <tt>String prefix</tt> and the file extension <tt>.mtd</tt>. */ protected RandomAccessFile metaData; /** * A fat file of this container. The reservedBitMap file stores a bit * table that shows for each block whether it can be reserved (false) * or not (true). In order to store this table, eight bits are joined * to an <tt>int</tt> value. For this reason the <tt>n</tt>th bit of * this file is stored as the <tt>(n%8)</tt>th bit of the <tt>(n/8)</tt> * th <tt>byte</tt> value of this file. The name of this file is * determined by the <tt>String prefix</tt> and the file extension * <tt>.rbm</tt>. */ protected RandomAccessFile reservedBitMap; /** * A fat file of this container. The updatedBitMap file stores a bit * table that shows for each block whether it can be get (true) or * not (false). In order to store this table, eight bits are joined * to an <tt>int</tt> value. For this reason the <tt>n</tt>th bit of * this file is stored as the <tt>(n%8)</tt>th bit of the <tt>(n/8)</tt> * th <tt>byte</tt> value of this file. The name of this file is * determined by the <tt>String prefix</tt> and the file extension * <tt>.ubm</tt>. */ protected RandomAccessFile updatedBitMap; /** * The freeList file of this container. The freeList file stores the * offsets of removed blocks. Whenever a block should be written to * this container, the freeList file is checked, whether there are * empty blocks in the container file. If the freeList file contains * offsets of removed blocks the new block will be stored in an empty * space of the container file, else the container file will be * enlarged. The name of the freeList file is determined by the * <tt>String prefix</tt> and the file extension <tt>.flt</tt>. */ protected RandomAccessFile freeList; /** * The size reserved for storing a block in this container. Every * block stored in the container file takes <tt>blockSize</tt> bytes, * therefore blocks that contains more than <tt>blockSize</tt> bytes * cannot be stored in this container. */ protected int blockSize; /** * The number of blocks stored in this container. */ protected int size; /** * Determines the type of the identifyers which are produced: * 1: Byte, 2: Short, 3: Integer, 4: Long (default). */ protected byte idType=4; /** * Constructs an empty BlockFileContainer that is able to store blocks * with a maximum size of <tt>blockSize</tt> bytes. The given * <tt>String prefix</tt> specifies the names of the files the are * used for storing the elements of the container. When using existing * files to store the container their data will be overwritten. * <p> * This constructor is useful if you want to keep your file in * a special self developed filesystem. * * @param prefix specifies the names of the files the container * consists of. * @param blockSize the size reserved for storing a block in the * container file. * @param fso Provides an object which performs the operations on the filesystem. */ public BlockFileContainer (String prefix, int blockSize, FilesystemOperations fso) { this.prefix = prefix; this.blockSize = blockSize; this.fso = fso; openFiles(); reset(); } /** * Constructs an empty BlockFileContainer that is able to store blocks * with a maximum size of <tt>blockSize</tt> bytes. The given * <tt>String prefix</tt> specifies the names of the files the are * used for storing the elements of the container. When using existing * files to store the container their data will be overwritten. * * @param prefix specifies the names of the files the container * consists of. * @param blockSize the size reserved for storing a block in the * container file. */ public BlockFileContainer (String prefix, int blockSize) { this(prefix, blockSize, JavaFilesystemOperations.DEFAULT_INSTANCE); } /** * Constructs a BlockFileContainer that consists of existing files * given by the specified file name. Every information the container * needs will be taken from the meta file. * * @param prefix specifies the names of the files the container * consists of. * @param fso Provides an object which performs the operations on the filesystem. */ public BlockFileContainer (String prefix, FilesystemOperations fso) { this.prefix = prefix; this.fso = fso; open(); } /** * Constructs a BlockFileContainer that consists of existing files * given by the specified file name. Every information the container * needs will be taken from the meta file. * * @param prefix specifies the names of the files the container * consists of. */ public BlockFileContainer (String prefix) { this(prefix, JavaFilesystemOperations.DEFAULT_INSTANCE); } /** * Opens the five container files using the given Factory. */ protected void openFiles() { this.container = fso.openFile(prefix+EXTENSIONS[CTR_FILE],"rw"); this.metaData = fso.openFile(prefix+EXTENSIONS[MTD_FILE], "rw"); this.reservedBitMap = fso.openFile(prefix+EXTENSIONS[RBM_FILE], "rw"); this.updatedBitMap = fso.openFile(prefix+EXTENSIONS[UBM_FILE], "rw"); this.freeList = fso.openFile(prefix+EXTENSIONS[FLT_FILE], "rw"); } /** * Always called when opening a BlockFileContainer. This method checks * whether the file structure is according to an old xxl-release. On * demand, the file structure will be migrated automatically. <b>Afterwards, * it is impossible with an old xxl-release to work on the new file * structure</b>. */ protected void migrateOnDemand () { try { if (fso.fileExists(prefix+".fat") && !fso.fileExists(prefix+EXTENSIONS[UBM_FILE])) { RandomAccessFile ubm, meta, ctr, rbm; InputStream inputStream; OutputStream outputStream; fso.renameFile(prefix+".fat",prefix+EXTENSIONS[UBM_FILE]); ubm = fso.openFile(prefix+EXTENSIONS[UBM_FILE], "rw"); meta = fso.openFile(prefix+EXTENSIONS[MTD_FILE], "rw"); ctr = fso.openFile(prefix+EXTENSIONS[CTR_FILE], "rw"); ubm.seek(ubm.length()-16); meta.writeInt(blockSize = ubm.readInt()); meta.writeInt(size = ubm.readInt()); ctr.setLength(ubm.readLong()+blockSize); // maxOffset ctr.close(); meta.close(); ubm.setLength(ubm.length()-16); ubm.seek(0); rbm = fso.openFile(prefix+EXTENSIONS[UBM_FILE], "rw"); inputStream = new BufferedInputStream(new RandomAccessFileInputStream(ubm)); outputStream = new BufferedOutputStream(new RandomAccessFileOutputStream(rbm)); for (int b; (b = inputStream.read())!=-1;) outputStream.write(b); outputStream.close(); inputStream.close(); rbm.close(); ubm.close(); } } catch (IOException ie) { throw new WrappingRuntimeException(ie); } } /** * Opens the files of this container and restores the state of this * container. This method expects the serialized state of the * container at the end of the fat file. After restoring the state of * the container, the serialized data is removed from the fat file. */ protected void open () { if (this.container==null) try { migrateOnDemand(); openFiles(); this.blockSize = metaData.readInt(); this.size = metaData.readInt(); } catch (IOException ie) { throw new WrappingRuntimeException(ie); } } /** * Returns a converter for the ids generated by this container. A * converter transforms an object to its byte representation and vice * versa - also known as serialization in Java.<br> * Because of using the offset in the container file (<tt>long</tt> * value) as id, this method always returns a <tt>LongConverter</tt>. * * @return a converter for serializing the identifiers of the * container. */ public FixedSizeConverter objectIdConverter () { switch (idType) { case 1: return ByteConverter.DEFAULT_INSTANCE; case 2: return ShortConverter.DEFAULT_INSTANCE; case 3: return IntegerConverter.DEFAULT_INSTANCE; default: return LongConverter.DEFAULT_INSTANCE; } } /** * Returns the size of the ids generated by this container in bytes, * which is 8. * @return 8 */ public int getIdSize() { return LongConverter.SIZE; } /** * Returns the size reserved for storing a block in this container. * Every block stored in the container file takes <tt>blockSize</tt> * bytes, therefore blocks that contains more than <tt>blockSize</tt> * bytes cannot be stored in this container. * * @return the size reserved for storing a block in this container. */ public int blockSize () { return blockSize; } /** * Resets this container and any files associated with it.<br> * This implementation sets the length of the associated files to * <tt>0</tt>. Thereafter the size and maximum offset of this * container are corrected. */ public void reset () { open(); try { container.setLength(0); reservedBitMap.setLength(0); updatedBitMap.setLength(0); freeList.setLength(0); size = 0; } catch (IOException ie) { throw new WrappingRuntimeException(ie); } } /** * Removes all elements from the Container. After a call of this * method, <tt>size()</tt> will return 0.<br> * This implementation only calls the <tt>reset()</tt> method. */ public void clear () { reset(); } /** * Closes the Container and releases its associated files. But before * closing the meta file, the serialized state of this container must * be appended. Therefore, the values of the fields <tt>size</tt> and * <tt>blockSize</tt> are append to the meta file. A closed container * can be implicitly reopened by a consecutive call to one of its * methods. */ public void close () { if (this.container!=null) try { container.close(); container = null; metaData.seek(0); metaData.writeInt(blockSize); metaData.writeInt(size); metaData.close(); reservedBitMap.close(); updatedBitMap.close(); freeList.close(); } catch (IOException ie) { throw new WrappingRuntimeException(ie); } } /** * Returns <tt>true</tt> if the container contains a block for the identifier * <tt>id</tt>.<br> * This implementation checks whether the updatedBitMap files contains * an entry for the offset specified by <tt>id</tt>. * * @param id identifier of the block. * @return true if the container has updated a block for the specified * identifier. */ public boolean contains (Object id) { open(); try { long offset = ((Number)id).longValue(); if (offset+blockSize>container.length()) return false; updatedBitMap.seek(offset/blockSize/8); return (updatedBitMap.read()&(1<<(offset/blockSize%8)))!=0; } catch (IOException ie) { throw new WrappingRuntimeException(ie); } } /** * Returns the block associated to the identifier <tt>id</tt>. An * exception is thrown when the desired block is not found via contains. * In this implementation the parameter unfix has no function because * the container is unbuffered. * * @param id identifier of the block. * @param unfix signals whether the object can be removed from the * underlying buffer. * @return the block associated to the specified identifier. * @throws NoSuchElementException if the desired block is not found. */ public Object get (Object id, boolean unfix) throws NoSuchElementException { open(); try { byte [] array = new byte [blockSize]; if (!contains(id)) throw new NoSuchElementException(); container.seek(((Number)id).longValue()); container.read(array); return new Block(array, 0, blockSize); } catch (IOException ie) { throw new WrappingRuntimeException(ie); } } /** * Returns an iterator that delivers all the identifiers of * the container that are in use. * * @return an iterator of all identifiers used by this container. */ public Iterator ids () { open(); return new Iterator () { Long id = new Long(-blockSize), nextId; boolean removeable = false; public boolean hasNext () { try { for (removeable = false; !isUsed(nextId = new Long(id.longValue()+blockSize)); id = nextId) if (nextId.longValue()+blockSize>container.length()) return false; return true; } catch (IOException ie) { throw new WrappingRuntimeException(ie); } } public Object next () throws NoSuchElementException { if (!hasNext()) throw new NoSuchElementException(); removeable = true; return id = nextId; } public void remove () throws IllegalStateException { if (!removeable) throw new IllegalStateException(); BlockFileContainer.this.remove(id); removeable = false; } }; } /** * Checks whether the <tt>id</tt> has been returned previously by a * call to insert or reserve and hasn't been removed so far. * This implementation checks whether the reservedBitMap files contains * an entry for the offset specified by <tt>id</tt>. * * @param id the id to be checked. * @return <tt>true</tt> exactly if the <tt>id</tt> is still in use. */ public boolean isUsed (Object id) { open(); try { long offset = ((Number)id).longValue(); if (offset+blockSize>container.length()) return false; reservedBitMap.seek(offset/blockSize/8); return (reservedBitMap.read()&(1<<(offset/blockSize%8)))!=0; } catch (IOException ie) { throw new WrappingRuntimeException(ie); } } /** * Removes the block with identifier <tt>id</tt>. An exception is * thrown when a block with an identifier <tt>id</tt> is not in the * container. After a call of <tt>remove()</tt> all the iterators (and * cursors) can be in an invalid state.<br> * This implementation clears the entry for the block in both fat files * and adds <tt>id</tt> to the freeList file. * * @param id an identifier of a block. * @throws NoSuchElementException if a block with an identifier * <tt>id</tt> is not in the container. */ public void remove (Object id) throws NoSuchElementException { open(); try { long offset = ((Number)id).longValue(); int b; if (!isUsed(id)) throw new NoSuchElementException(); if (--size==0) reset(); else { reservedBitMap.seek(offset/blockSize/8); b = reservedBitMap.read(); reservedBitMap.seek(reservedBitMap.getFilePointer()-1); reservedBitMap.write(b&~(1<<(offset/blockSize%8))); updatedBitMap.seek(offset/blockSize/8); b = updatedBitMap.read(); updatedBitMap.seek(updatedBitMap.getFilePointer()-1); updatedBitMap.write(b&~(1<<(offset/blockSize%8))); if (offset+blockSize==container.length()) { while (!isUsed(new Long(offset -= blockSize))); reservedBitMap.setLength(offset/blockSize/8+1); updatedBitMap.setLength(offset/blockSize/8+1); if (container.length()>offset+blockSize) container.setLength(offset+blockSize); } else { freeList.seek(freeList.length()); freeList.writeLong(offset); } } } catch (IOException ie) { throw new WrappingRuntimeException(ie); } } /** * Reserves an id for subsequent use. * This implementation sets in the reservedBitMap file the * appropriate bit for the id returned by this method. * * @param getObject A parameterless function providing the object for * that an id should be reserved. Not used by this * implementation. * @return the reserved id. */ public Object reserve (Function getObject) { open(); try { long offset; int b; for (;;) { if (freeList.length()==0) { offset = container.length(); container.setLength(offset+blockSize); break; } freeList.seek(freeList.length()-8); offset = freeList.readLong(); freeList.setLength(freeList.length()-8); if (offset+blockSize<=container.length()) break; } reservedBitMap.seek(offset/blockSize/8); if (reservedBitMap.getFilePointer()==reservedBitMap.length()) { reservedBitMap.write(1); updatedBitMap.seek(updatedBitMap.length()); updatedBitMap.write(0); } else { b = reservedBitMap.read(); reservedBitMap.seek(reservedBitMap.getFilePointer()-1); reservedBitMap.write(b|(1<<(offset/blockSize%8))); } size++; switch (idType) { case 1: return new Byte((byte) offset); case 2: return new Short((short) offset); case 3: return new Integer((int) offset); default: return new Long(offset); } } catch (IOException ie) { throw new WrappingRuntimeException(ie); } } /** * Returns the number of elements of the container. In other words, * the number of set bits in the updatedBitMap file. * * @return the number of elements. */ public int size () { return size; } /** * Overwrites an existing (id,*)-element by (id, object). This method * throws an exception if a block with an identifier <tt>id</tt> does * not exist in the container (checked via isUsed). The parameter <tt>unfix</tt> * has no function because this container is unbuffered. * * @param id identifier of the element. * @param object the new block that should be associated to * <tt>id</tt>. * @param unfix signals a buffered container whether the block can be * removed from the underlying buffer. * @throws NoSuchElementException if a block with an identifier * <tt>id</tt> does not exist in the container. */ public void update (Object id, Object object, boolean unfix) throws NoSuchElementException { open(); try { long offset = ((Number)id).longValue(); Block block = (Block)object; int b; if (offset+blockSize>container.length()) throw new NoSuchElementException(); updatedBitMap.seek(offset/blockSize/8); b = updatedBitMap.read(); if ((b&(1<<(offset/blockSize%8)))==0) { if (!isUsed(id)) throw new NoSuchElementException(); updatedBitMap.seek(updatedBitMap.getFilePointer()-1); updatedBitMap.write(b|(1<<(offset/blockSize%8))); } if (block.size>blockSize) throw new IllegalArgumentException("Block too large: defined block size is " + blockSize + ", actual block size is "+block.size + "."); if (blockSize>block.array.length-block.offset) { byte [] array = new byte[blockSize]; System.arraycopy(block.array, block.offset, array, 0, block.size); block = new Block(array); } container.seek(offset); container.write(block.array, block.offset, blockSize); } catch (IOException ie) { throw new WrappingRuntimeException(ie); } } /** * Deletes the container. If necessary, the container is closed before. */ public void delete() { close(); // delete the files of the container for (int i=0; i<BlockFileContainer.getNumberOfFiles(); i++) fso.deleteFile(prefix+EXTENSIONS[i]); } }