// Records.java
// (C) 2008 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 14.01.2008 on http://yacy.net
//
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package net.yacy.kelondro.io;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.RandomAccessFile;
import net.yacy.cora.document.encoding.UTF8;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.kelondro.util.FileUtils;
import net.yacy.kelondro.util.MemoryControl;
/**
* The Records data structure is a flat file with records of fixed length.
* The file does not contain any meta information and the first record starts
* right at file position 0.
* The access rules are in such a way that a minimum of IO operations are necessary
* Two caches provide a mirror to content in the file: a read cache and a write buffer
* The read cache contains a number of entries from the file; a mirror that moves
* whenever information outside the mirror is requested.
* The write buffer always exists only at the end of the file. It contains only records
* that have never been written to the file before. When the write buffer is flushed,
* the file grows
* The record file may also shrink when the last entry of the file is removed.
* Removal of Entries inside the file is not possible, but such entries can be erased
* by overwriting the data with zero bytes
* All access to the file is made with byte[] that are generated outside of this class
* This class only references byte[] that are handed over to methods of this class.
*/
public final class Records {
private RandomAccessFile raf;
private final File tablefile;
/**
* number of bytes in one record
*/
protected final int recordsize;
/**
* number of entries in buffer
*/
private int buffercount;
private byte[] buffer;
private final byte[] zero;
/**
* stay below hard disc cache (is that necessary?)
*/
private static final int maxWriteBuffer = 16 * 1024;
public Records(final File tablefile, final int recordsize) {
this.tablefile = tablefile;
this.recordsize = recordsize;
// initialize zero buffer
this.zero = new byte[recordsize];
for (int i = 0; i < recordsize; i++) this.zero[i] = 0;
// initialize table file
if (!tablefile.exists()) {
// make new file
FileOutputStream fos = null;
try {
fos = new FileOutputStream(tablefile);
} catch (final FileNotFoundException e) {
// should not happen
ConcurrentLog.logException(e);
}
try { if (fos != null) fos.close(); } catch (final IOException e) {}
}
// open an existing table file
try {
this.raf = new RandomAccessFile(tablefile,"rw");
} catch (final FileNotFoundException e) {
// should never happen
ConcurrentLog.logException(e);
}
// initialize write buffer
int buffersize = Math.max(1, (maxWriteBuffer / recordsize)) * recordsize;
if (!MemoryControl.request(buffersize + 1024 * 1024 * 20, true)) {
// not enough memory there, take less
long lessmem = Math.min(maxWriteBuffer / 8, MemoryControl.available() - (1024 * 1024 * 6) / 6);
//System.out.println("newmem vorher: cachesize = " + cachesize + ", buffersize = " + buffersize + ", available = " + serverMemory.available() + ", lessmem = " + lessmem);
buffersize = Math.max(1, (int) (lessmem / recordsize)) * recordsize;
//System.out.println("newmem nachher: cachesize = " + cachesize + ", buffersize = " + buffersize);
}
this.buffer = new byte[buffersize];
this.buffercount = 0;
}
public void clear() {
try {
this.raf.setLength(0);
int buffersize = Math.max(1, (maxWriteBuffer / recordsize)) * recordsize;
this.buffer = new byte[buffersize];
this.buffercount = 0;
} catch (IOException e) {
ConcurrentLog.logException(e);
}
}
/**
* @param tablefile
* @param recordsize
* @return number of records in table
*/
public final static long tableSize(final File tablefile, final long recordsize) throws IOException {
if (!tablefile.exists()) return 0;
final long size = tablefile.length();
if (size % recordsize != 0) throw new IOException("wrong file size: file = " + tablefile + ", size = " + size + ", recordsize = " + recordsize);
return size / recordsize;
}
public final static void fixTableSize(final File tablefile, final long recordsize) throws IOException {
if (!tablefile.exists()) return;
final long size = tablefile.length();
long cut = size % recordsize;
if (cut > 0) {
RandomAccessFile raf = new RandomAccessFile(tablefile, "rw");
raf.setLength(size - cut);
raf.close();
}
}
/**
* @return the number of records in file plus number of records in buffer
* @throws IOException
*/
public final synchronized long size() throws IOException {
return filesize() + this.buffercount;
}
public final File filename() {
return this.tablefile;
}
/**
* @return records in file
* @throws IOException
*/
private final long filesize() throws IOException {
long records = 0;
try {
records = this.raf.length() / this.recordsize;
} catch (final NullPointerException e) {
// This may happen on shutdown while still something is moving on
ConcurrentLog.logException(e);
}
return records;
}
/**
* checks if the index is inside the buffer
*
* @param index
* @return the index offset inside the buffer or -1 if the index is not in the buffer
* @throws IOException
*/
private final int inBuffer(final long index, final long filesize) throws IOException {
if (index >= filesize && index < filesize + this.buffercount) {
return (int) (index - filesize);
}
return -1;
}
/**
* write buffer to end of file
*/
protected final synchronized void flushBuffer() {
if (this.raf == null) return;
try {
this.raf.seek(this.raf.length());
this.raf.write(this.buffer, 0, this.recordsize * this.buffercount);
} catch (final IOException e) {
ConcurrentLog.logException(e);
}
this.buffercount = 0;
}
public final synchronized void close() {
// close the file
if (this.raf != null) try {
flushBuffer();
this.raf.close();
} catch (final IOException e) {
ConcurrentLog.logException(e);
}
this.raf = null;
this.buffer = null;
}
/**
* @param index record which should be read
* @param b destination array
* @param start offset in b to store data
* @throws IOException
*/
public final synchronized void get(final long index, final byte[] b, final int start) throws IOException {
assert b.length - start >= this.recordsize;
final long filesize = filesize();
final long s = filesize + this.buffercount;
if (index >= s) throw new IndexOutOfBoundsException("kelondroEcoFS.get(" + index + ") outside bounds (" + s + ")");
// check if index is inside of cache
final int q = inBuffer(index, filesize);
if (q < 0) {
// copy records from file to given buffer
this.raf.seek(this.recordsize * index);
this.raf.readFully(b, start, this.recordsize);
return;
}
// read entry from the buffer
System.arraycopy(this.buffer, q * this.recordsize, b, start, this.recordsize);
}
public final synchronized void put(final long index, final byte[] b, final int start) throws IOException {
assert b.length - start >= this.recordsize;
long filesize = filesize();
final long s = filesize + this.buffercount;
if (index > s) throw new IndexOutOfBoundsException("kelondroEcoFS.put(" + index + ") outside bounds (" + s + ")");
// check if this is an empty entry
if (isClean(b , start, this.recordsize)) {
clean(index);
return;
}
// check if index is inside of cache
final int q = inBuffer(index, filesize);
if (q >= 0) {
// write entry to the buffer
System.arraycopy(b, start, this.buffer, q * this.recordsize, this.recordsize);
return;
}
if (index == s) {
// append the record to the end of the file;
// look if there is space in the buffer
if (this.buffercount >= this.buffer.length / this.recordsize) {
assert this.buffercount == this.buffer.length / this.recordsize;
// the record does not fit in current buffer
// write buffer
flushBuffer();
// write new entry to buffer
System.arraycopy(b, start, this.buffer, 0, this.recordsize);
this.buffercount = 1;
} else {
System.arraycopy(b, start, this.buffer, this.buffercount * this.recordsize, this.recordsize);
this.buffercount++;
}
assert this.buffercount <= this.buffer.length / this.recordsize;
} else {
// write the record directly to the file,
// do not care about the cache; this case was checked before
this.raf.seek(index * this.recordsize);
this.raf.write(b, start, this.recordsize);
}
}
public final synchronized void add(final byte[] b, final int start) throws IOException {
assert b.length - start >= this.recordsize;
// check if this is an empty entry
if (isClean(b , start, this.recordsize)) {
// it is not possible to add a clean record at the end of a EcoFS, because
// such records should cause the record to shrink
throw new IOException("add: record at end is clean");
}
// append the record to the end of the file;
// look if there is space in the buffer
if (this.buffercount >= this.buffer.length / this.recordsize) {
assert this.buffercount == this.buffer.length / this.recordsize;
// the record does not fit in current buffer
// write buffer
flushBuffer();
// write new entry to buffer
System.arraycopy(b, start, this.buffer, 0, this.recordsize);
this.buffercount = 1;
} else {
System.arraycopy(b, start, this.buffer, this.buffercount * this.recordsize, this.recordsize);
this.buffercount++;
}
assert this.buffercount <= this.buffer.length / this.recordsize;
}
private final static boolean isClean(final byte[] b, final int offset, final int length) {
for (int i = 0; i < length; i++) {
if (b[i + offset] != 0) return false;
}
return true;
}
private final boolean isClean(final long index) throws IOException {
long filesize = filesize();
long size = filesize + this.buffercount;
assert index < size;
// check if index is inside of buffer
final int q = inBuffer(index, filesize);
if (q >= 0) {
// check entry from the buffer
return isClean(this.buffer, q * this.recordsize, this.recordsize);
}
byte[] b = new byte[this.recordsize];
this.raf.seek(index * this.recordsize);
this.raf.readFully(b, 0, this.recordsize);
return isClean(b, 0, this.recordsize);
}
/**
* @see clean(long, byte[], int)
* @param index
* @throws IOException
*/
private final void clean(final long index) throws IOException {
long filesize = filesize();
final long s = filesize + this.buffercount;
if (index >= s) throw new IndexOutOfBoundsException("kelondroEcoFS.clean(" + index + ") outside bounds (" + s + ")");
if (index == s - 1) {
cleanLast();
return;
}
// check if index is inside of cache
final int q = inBuffer(index, filesize);
if (q >= 0) {
// write zero to the buffer
System.arraycopy(this.zero, 0, this.buffer, q * this.recordsize, this.recordsize);
return;
}
this.raf.seek(index * this.recordsize);
this.raf.write(this.zero, 0, this.recordsize);
}
/**
* @see clean(long, byte[], int)
* @param b
* @param start
* @throws IOException
*/
public final synchronized void cleanLast(final byte[] b, final int start) throws IOException {
cleanLast0(b, start);
long i;
while ((i = size()) > 0 && isClean(i - 1)) {
//System.out.println("Extra clean/1: before size = " + size());
cleanLast0();
//System.out.println(" after size = " + size());
}
}
/**
* this is like
* <code>clean(this.size() - 1, b, start);</code>
*
* @see clean(long, byte[], int)
* @param b
* @param start
* @throws IOException
*/
private final void cleanLast0(final byte[] b, final int start) throws IOException {
assert b.length - start >= this.recordsize;
// check if index is inside of buffer
if (this.buffercount > 0) {
// read entry from the buffer
System.arraycopy(this.buffer, (this.buffercount - 1) * this.recordsize, b, start, this.recordsize);
// shrink buffer
this.buffercount--;
return;
}
// read entry from the file
long endpos = this.raf.length() - this.recordsize;
if (endpos >= 0) { // prevent seek error for 0 size file
this.raf.seek(endpos);
this.raf.readFully(b, start, this.recordsize);
} else {
endpos = 0;
System.arraycopy(this.zero, 0, b, start, this.recordsize);
}
// write zero bytes to the cache and to the file
this.raf.seek(endpos);
this.raf.write(this.zero, 0, this.recordsize);
// shrink file
this.raf.setLength(endpos);
}
/**
* @see clean(long, byte[], int)
* @throws IOException
*/
public final synchronized void cleanLast() throws IOException {
cleanLast0();
long i;
while (((i = size()) > 0) && (isClean(i - 1))) {
//System.out.println("Extra clean/0: before size = " + size());
cleanLast0();
//System.out.println(" after size = " + size());
}
}
private final void cleanLast0() throws IOException {
// check if index is inside of cache
if (this.buffercount > 0) {
// shrink buffer
this.buffercount--;
return;
}
// shrink file
if (this.raf.length() > 0) // already 0 length, nothing to shrink (prevent seek io error)
this.raf.setLength(this.raf.length() - this.recordsize);
}
public final void deleteOnExit() {
this.tablefile.deleteOnExit();
}
/**
* main - writes some data and checks the tables size (with time measureing)
* @param args
*/
public static void main(final String[] args) {
// open a file, add one entry and exit
final File f = new File(args[0]);
if (f.exists()) FileUtils.deletedelete(f);
try {
final Records t = new Records(f, 8);
final byte[] b = new byte[8];
t.add("01234567".getBytes(), 0);
t.add("ABCDEFGH".getBytes(), 0);
t.add("abcdefgh".getBytes(), 0);
t.add("--------".getBytes(), 0);
t.add("********".getBytes(), 0);
for (int i = 0; i < 1000; i++) t.add("++++++++".getBytes(), 0);
t.add("=======0".getBytes(), 0);
t.add("=======1".getBytes(), 0);
t.add("=======2".getBytes(), 0);
t.cleanLast(b, 0);
System.out.println(UTF8.String(b));
t.cleanLast(b, 0);
//t.clean(2, b, 0);
System.out.println(UTF8.String(b));
t.get(1, b, 0);
System.out.println(UTF8.String(b));
t.put(1, "AbCdEfGh".getBytes(), 0);
t.get(1, b, 0);
System.out.println(UTF8.String(b));
t.get(3, b, 0);
System.out.println(UTF8.String(b));
t.get(4, b, 0);
System.out.println(UTF8.String(b));
System.out.println("size = " + t.size());
//t.clean(t.size() - 2);
t.cleanLast();
final long start = System.currentTimeMillis();
long c = 0;
for (int i = 0; i < 100000; i++) {
c = t.size();
}
System.out.println("size() needs " + ((System.currentTimeMillis() - start) / 100) + " nanoseconds");
System.out.println("size = " + c);
t.close();
} catch (final IOException e) {
ConcurrentLog.logException(e);
}
}
}