/*
* The Apache Software License, Version 1.1
*
*
* Copyright (c) 1999 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Xalan" and "Apache Software Foundation" must
* not be used to endorse or promote products derived from this
* software without prior written permission. For written
* permission, please contact apache@apache.org.
*
* 5. Products derived from this software may not be called "Apache",
* nor may "Apache" appear in their name, without prior written
* permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation and was
* originally based on software copyright (c) 1999, Lotus
* Development Corporation., http://www.lotus.com. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*/
package org.exist.util;
import java.nio.ByteBuffer;
/**
* Copied this class from Xalan and adopted it for eXist. Bare-bones, unsafe,
* fast string buffer. No thread-safety, no parameter range checking, exposed
* fields. Note that in typical applications, thread-safety of a StringBuffer
* is a somewhat dubious concept in any case. <p>
*
* Note that Stree and DTM used a single FastStringBuffer as a string pool, by
* recording start and length indices within this single buffer. This minimizes
* heap overhead, but of course requires more work when retrieving the data.
* <p>
*
* FastStringBuffer operates as a "chunked buffer". Doing so reduces the need
* to recopy existing information when an append exceeds the space available;
* we just allocate another chunk and flow across to it. (The array of chunks
* may need to grow, admittedly, but that's a much smaller object.) Some excess
* recopying may arise when we extract Strings which cross chunk boundaries;
* larger chunks make that less frequent. <p>
*
* The size values are parameterized, to allow tuning this code. In theory,
* Result Tree Fragments might want to be tuned differently from the main
* document's text. <p>
*
* %REVIEW% An experiment in self-tuning is included in the code (using nested
* FastStringBuffers to achieve variation in chunk sizes), but this
* implementation has proven to be problematic when data may be being copied
* from the FSB into itself. We should either re-architect that to make this
* safe (if possible) or remove that code and clean up for
* performance/maintainability reasons. <p>
*
*/
public class FastByteBuffer implements ByteArray {
// %BUG% %REVIEW% *****PROBLEM SUSPECTED: If data from an FSB is being copied
// back into the same FSB (variable set from previous variable, for example)
// and blocksize changes in mid-copy... there's risk of severe malfunction in
// the read process, due to how the resizing code re-jiggers storage. Arggh.
// If we want to retain the variable-size-block feature, we need to reconsider
// that issue. For now, I have forced us into fixed-size mode.
static boolean DEBUG_FORCE_FIXED_CHUNKSIZE = true;
// If nonzero, forces the inial chunk size.
/*
*
*/
final static int DEBUG_FORCE_INIT_BITS = 0;
// =m_chunkSize-1;
/**
* Field m_array holds the string buffer's text contents, using an
* array-of-arrays. Note that this array, and the arrays it contains, may
* be reallocated when necessary in order to allow the buffer to grow;
* references to them should be considered to be invalidated after any
* append. However, the only time these arrays are directly exposed is in
* the sendSAXcharacters call.
*/
byte[][] m_array;
/**
* Field m_chunkBits sets our chunking strategy, by saying how many bits of
* index can be used within a single chunk before flowing over to the next
* chunk. For example, if m_chunkbits is set to 15, each chunk can contain
* up to 2^15 (32K) characters
*/
int m_chunkBits = 15;
// =1<<(m_chunkBits-1);
/**
* Field m_chunkMask is m_chunkSize-1 -- in other words, m_chunkBits worth
* of low-order '1' bits, useful for shift-and-mask addressing within the
* chunks.
*/
int m_chunkMask;
/**
* Field m_chunkSize establishes the maximum size of one chunk of the array
* as 2**chunkbits characters. (Which may also be the minimum size if we
* aren't tuning for storage)
*/
int m_chunkSize;
/**
* Field m_firstFree is an index into m_array[m_lastChunk][], pointing to
* the first character in the Chunked Array which is not part of the
* FastStringBuffer's current content. Since m_array[][] is zero-based, the
* length of that content can be calculated as (m_lastChunk<<m_chunkBits) +
* m_firstFree
*/
int m_firstFree = 0;
/**
* Field m_innerFSB, when non-null, is a FastStringBuffer whose total
* length equals m_chunkSize, and which replaces m_array[0]. This allows
* building a hierarchy of FastStringBuffers, where early appends use a
* smaller chunkSize (for less wasted memory overhead) but later ones use a
* larger chunkSize (for less heap activity overhead).
*/
FastByteBuffer m_innerFSB = null;
/**
* Field m_lastChunk is an index into m_array[], pointing to the last chunk
* of the Chunked Array currently in use. Note that additional chunks may
* actually be allocated, eg if the FastStringBuffer had previously been
* truncated or if someone issued an ensureSpace request. <p>
*
* The insertion point for append operations is addressed by the
* combination of m_lastChunk and m_firstFree.
*/
int m_lastChunk = 0;
/**
* Field m_maxChunkBits affects our chunk-growth strategy, by saying what
* the largest permissible chunk size is in this particular
* FastStringBuffer hierarchy.
*/
int m_maxChunkBits = 15;
/**
* Field m_rechunkBits affects our chunk-growth strategy, by saying how
* many chunks should be allocated at one size before we encapsulate them
* into the first chunk of the next size up. For example, if m_rechunkBits
* is set to 3, then after 8 chunks at a given size we will rebundle them
* as the first element of a FastStringBuffer using a chunk size 8 times
* larger (chunkBits shifted left three bits).
*/
int m_rebundleBits = 2;
/**
* Construct a FastStringBuffer, with allocation policy as per parameters.
* <p>
*
* For coding convenience, I've expressed both allocation sizes in terms of
* a number of bits. That's needed for the final size of a chunk, to permit
* fast and efficient shift-and-mask addressing. It's less critical for the
* inital size, and may be reconsidered. <p>
*
* An alternative would be to accept integer sizes and round to powers of
* two; that really doesn't seem to buy us much, if anything.
*
*@param initChunkBits Length in characters of the initial allocation of a
* chunk, expressed in log-base-2. (That is, 10 means allocate 1024
* characters.) Later chunks will use larger allocation units, to trade
* off allocation speed of large document against storage efficiency of
* small ones.
*@param maxChunkBits Number of character-offset bits that should be used
* for addressing within a chunk. Maximum length of a chunk is
* 2^chunkBits characters.
*@param rebundleBits Number of character-offset bits that addressing
* should advance before we attempt to take a step from initChunkBits
* to maxChunkBits
*/
public FastByteBuffer( int initChunkBits, int maxChunkBits,
int rebundleBits ) {
if ( DEBUG_FORCE_INIT_BITS != 0 )
initChunkBits = DEBUG_FORCE_INIT_BITS;
// %REVIEW%
// Should this force to larger value, or smaller? Smaller less efficient, but if
// someone requested variable mode it's because they care about storage space.
// On the other hand, given the other changes I'm making, odds are that we should
// adopt the larger size. Dither, dither, dither... This is just stopgap workaround
// anyway; we need a permanant solution.
//
if ( DEBUG_FORCE_FIXED_CHUNKSIZE )
maxChunkBits = initChunkBits;
//if(DEBUG_FORCE_FIXED_CHUNKSIZE) initChunkBits=maxChunkBits;
m_array = new byte[16][];
// Don't bite off more than we're prepared to swallow!
if ( initChunkBits > maxChunkBits )
initChunkBits = maxChunkBits;
m_chunkBits = initChunkBits;
m_maxChunkBits = maxChunkBits;
m_rebundleBits = rebundleBits;
m_chunkSize = 1 << ( initChunkBits );
m_chunkMask = m_chunkSize - 1;
m_array[0] = new byte[m_chunkSize];
//m_array[0] = ByteArrayPool.getByteArray(m_chunkSize);
}
/**
* Construct a FastStringBuffer, using a default rebundleBits value.
* NEEDSDOC
*
*@param initChunkBits NEEDSDOC
*@param maxChunkBits
*/
public FastByteBuffer( int initChunkBits, int maxChunkBits ) {
this( initChunkBits, maxChunkBits, 2 );
}
/**
* Construct a FastStringBuffer, using default maxChunkBits and
* rebundleBits values. <p>
*
* ISSUE: Should this call assert initial size, or fixed size? Now
* configured as initial, with a default for fixed.
*
*@param initChunkBits
*/
public FastByteBuffer( int initChunkBits ) {
this( initChunkBits, 15, 2 );
}
/** Construct a FastStringBuffer, using a default allocation policy. */
public FastByteBuffer() {
// 10 bits is 1K. 15 bits is 32K. Remember that these are character
// counts, so actual memory allocation unit is doubled for UTF-16 chars.
//
// For reference: In the original FastStringBuffer, we simply
// overallocated by blocksize (default 1KB) on each buffer-growth.
//this( 10, 15, 2 );
this( 6, 10, 3 );
}
/**
* Encapsulation c'tor. After this is called, the source FastStringBuffer
* will be reset to use the new object as its m_innerFSB, and will have had
* its chunk size reset appropriately. IT SHOULD NEVER BE CALLED EXCEPT
* WHEN source.length()==1<<(source.m_chunkBits+source.m_rebundleBits)
* NEEDSDOC
*
*@param source
*/
private FastByteBuffer( FastByteBuffer source ) {
// Copy existing information into new encapsulation
m_chunkBits = source.m_chunkBits;
m_maxChunkBits = source.m_maxChunkBits;
m_rebundleBits = source.m_rebundleBits;
m_chunkSize = source.m_chunkSize;
m_chunkMask = source.m_chunkMask;
m_array = source.m_array;
m_innerFSB = source.m_innerFSB;
// These have to be adjusted because we're calling just at the time
// when we would be about to allocate another chunk
m_lastChunk = source.m_lastChunk - 1;
m_firstFree = source.m_chunkSize;
// Establish capsule as the Inner FSB, reset chunk sizes/addressing
source.m_array = new byte[16][];
source.m_innerFSB = this;
// Since we encapsulated just as we were about to append another
// chunk, return ready to create the chunk after the innerFSB
// -- 1, not 0.
source.m_lastChunk = 1;
source.m_firstFree = 0;
source.m_chunkBits += m_rebundleBits;
source.m_chunkSize = 1 << ( source.m_chunkBits );
source.m_chunkMask = source.m_chunkSize - 1;
}
/**
* Append a single character onto the FastStringBuffer, growing the storage
* if necessary. <p>
*
* NOTE THAT after calling append(), previously obtained references to
* m_array[][] may no longer be valid.... though in fact they should be in
* this instance.
*
*@param value character to be appended.
*/
public final void append( byte value ) {
byte[] chunk;
// We may have preallocated chunks. If so, all but last should
// be at full size.
boolean lastchunk = ( m_lastChunk + 1 == m_array.length );
if ( m_firstFree < m_chunkSize )
// Simplified test single-character-fits
chunk = m_array[m_lastChunk];
else {
// Extend array?
int i = m_array.length;
if ( m_lastChunk + 1 == i ) {
byte[][] newarray = new byte[i + 16][];
System.arraycopy( m_array, 0, newarray, 0, i );
m_array = newarray;
}
// Advance one chunk
chunk = m_array[++m_lastChunk];
if ( chunk == null ) {
// Hierarchical encapsulation
if ( m_lastChunk == 1 << m_rebundleBits
&& m_chunkBits < m_maxChunkBits )
// Should do all the work of both encapsulating
// existing data and establishing new sizes/offsets
m_innerFSB = new FastByteBuffer( this );
// Add a chunk.
chunk = m_array[m_lastChunk] = new byte[m_chunkSize];
//chunk = m_array[m_lastChunk] = ByteArrayPool.getByteArray(m_chunkSize);
}
m_firstFree = 0;
}
// Space exists in the chunk. Append the character.
chunk[m_firstFree++] = value;
}
/**
* Append the contents of the array onto the buffer.
*
*@param chars Description of the Parameter
*/
public final void append( byte[] chars ) {
append( chars, 0, chars.length );
}
/**
* Append part of the contents of a Character Array onto the
* FastStringBuffer, growing the storage if necessary. <p>
*
* NOTE THAT after calling append(), previously obtained references to
* m_array[] may no longer be valid.
*
*@param chars character array from which data is to be copied
*@param start offset in chars of first character to be copied,
* zero-based.
*@param length number of characters to be copied
*/
public final void append( byte[] chars, int start, int length ) {
int strlen = length;
if ( 0 == strlen )
return;
int copyfrom = start;
byte[] chunk = m_array[m_lastChunk];
int available = m_chunkSize - m_firstFree;
// Repeat while data remains to be copied
while ( strlen > 0 ) {
// Copy what fits
if ( available > strlen )
available = strlen;
System.arraycopy( chars, copyfrom, m_array[m_lastChunk], m_firstFree,
available );
strlen -= available;
copyfrom += available;
// If there's more left, allocate another chunk and continue
if ( strlen > 0 ) {
// Extend array?
int i = m_array.length;
if ( m_lastChunk + 1 == i ) {
byte[][] newarray = new byte[i + 16][];
System.arraycopy( m_array, 0, newarray, 0, i );
m_array = newarray;
}
// Advance one chunk
chunk = m_array[++m_lastChunk];
if ( chunk == null ) {
// Hierarchical encapsulation
if ( m_lastChunk == 1 << m_rebundleBits
&& m_chunkBits < m_maxChunkBits )
// Should do all the work of both encapsulating
// existing data and establishing new sizes/offsets
m_innerFSB = new FastByteBuffer( this );
// Add a chunk.
chunk = m_array[m_lastChunk] = new byte[m_chunkSize];
//chunk = m_array[m_lastChunk] = ByteArrayPool.getByteArray(m_chunkSize);
}
available = m_chunkSize;
m_firstFree = 0;
}
}
// Adjust the insert point in the last chunk, when we've reached it.
m_firstFree += available;
}
/**
* Append the contents of another FastStringBuffer onto this
* FastStringBuffer, growing the storage if necessary. <p>
*
* NOTE THAT after calling append(), previously obtained references to
* m_array[] may no longer be valid.
*
*@param value FastStringBuffer whose contents are to be appended.
*/
public final void append( FastByteBuffer value ) {
// Complicating factor here is that the two buffers may use
// different chunk sizes, and even if they're the same we're
// probably on a different alignment due to previously appended
// data. We have to work through the source in bite-sized chunks.
if ( value == null )
return;
int strlen = value.length();
if ( 0 == strlen )
return;
int copyfrom = 0;
byte[] chunk = m_array[m_lastChunk];
int available = m_chunkSize - m_firstFree;
// Repeat while data remains to be copied
while ( strlen > 0 ) {
// Copy what fits
if ( available > strlen )
available = strlen;
int sourcechunk = ( copyfrom + value.m_chunkSize - 1 )
>>> value.m_chunkBits;
int sourcecolumn = copyfrom & value.m_chunkMask;
int runlength = value.m_chunkSize - sourcecolumn;
if ( runlength > available )
runlength = available;
System.arraycopy( value.m_array[sourcechunk], sourcecolumn,
m_array[m_lastChunk], m_firstFree, runlength );
if ( runlength != available )
System.arraycopy( value.m_array[sourcechunk + 1], 0,
m_array[m_lastChunk], m_firstFree + runlength,
available - runlength );
strlen -= available;
copyfrom += available;
// If there's more left, allocate another chunk and continue
if ( strlen > 0 ) {
// Extend array?
int i = m_array.length;
if ( m_lastChunk + 1 == i ) {
byte[][] newarray = new byte[i + 16][];
System.arraycopy( m_array, 0, newarray, 0, i );
m_array = newarray;
}
// Advance one chunk
chunk = m_array[++m_lastChunk];
if ( chunk == null ) {
// Hierarchical encapsulation
if ( m_lastChunk == 1 << m_rebundleBits
&& m_chunkBits < m_maxChunkBits )
// Should do all the work of both encapsulating
// existing data and establishing new sizes/offsets
m_innerFSB = new FastByteBuffer( this );
// Add a chunk.
chunk = m_array[m_lastChunk] = new byte[m_chunkSize];
//chunk = m_array[m_lastChunk] = ByteArrayPool.getByteArray(m_chunkSize);
}
available = m_chunkSize;
m_firstFree = 0;
}
}
// Adjust the insert point in the last chunk, when we've reached it.
m_firstFree += available;
}
public void copyTo( byte[] newBuf, int offset ) {
int pos = offset;
for ( int i = 0; i < m_lastChunk; i++ ) {
if ( i == 0 && m_innerFSB != null )
m_innerFSB.copyTo( newBuf, pos );
else
System.arraycopy( m_array[i], 0, newBuf,
pos, m_chunkSize );
pos += m_chunkSize;
}
System.arraycopy( m_array[m_lastChunk], 0, newBuf, pos, m_firstFree );
}
public void copyTo( ByteArray newBuf ) {
for ( int i = 0; i < m_lastChunk; i++ ) {
if ( i == 0 && m_innerFSB != null )
m_innerFSB.copyTo( newBuf );
else
newBuf.append(m_array[i]);
}
newBuf.append(m_array[m_lastChunk], 0, m_firstFree);
}
public void copyTo( ByteBuffer newBuf ) {
for ( int i = 0; i < m_lastChunk; i++ ) {
if ( i == 0 && m_innerFSB != null )
m_innerFSB.copyTo( newBuf );
else
newBuf.put(m_array[i]);
}
newBuf.put(m_array[m_lastChunk], 0, m_firstFree);
}
public void copyTo( int start, byte[] newBuf, int offset, int len ) {
int stop = start + len;
int startChunk = start >>> m_chunkBits;
int startColumn = start & m_chunkMask;
int stopChunk = stop >>> m_chunkBits;
int stopColumn = stop & m_chunkMask;
int pos = offset;
for(int i = startChunk; i < stopChunk; ++i) {
if( i == 0 && m_innerFSB != null)
m_innerFSB.copyTo(startColumn, newBuf, offset, m_chunkSize - startColumn);
else
System.arraycopy(m_array[i], startColumn, newBuf, pos, m_chunkSize - startColumn);
pos += m_chunkSize - startColumn;
startColumn = 0;
}
if(stopChunk == 0 && m_innerFSB != null)
m_innerFSB.copyTo(startColumn, newBuf, pos, stopColumn - startColumn);
else if(stopColumn > startColumn)
System.arraycopy(m_array[stopChunk], startColumn, newBuf, pos, stopColumn - startColumn);
}
public void copyTo(int start, ByteBuffer buf, int len) {
int stop = start + len;
int startChunk = start >>> m_chunkBits;
int startColumn = start & m_chunkMask;
int stopChunk = stop >>> m_chunkBits;
int stopColumn = stop & m_chunkMask;
for(int i = startChunk; i < stopChunk; ++i) {
if( i == 0 && m_innerFSB != null)
m_innerFSB.copyTo(startColumn, buf, m_chunkSize - startColumn);
else
buf.put(m_array[i], startColumn, m_chunkSize - startColumn);
startColumn = 0;
}
if(stopChunk == 0 && m_innerFSB != null)
m_innerFSB.copyTo(startColumn, buf, stopColumn - startColumn);
else if(stopColumn > startColumn)
buf.put(m_array[stopChunk], startColumn, stopColumn - startColumn);
}
public void set(int position, byte b) {
int chunk = position >>> m_chunkBits;
int column = position & m_chunkMask;
m_array[chunk][column] = b;
}
/**
* Get the length of the list. Synonym for size().
*
*@return the number of characters in the FastStringBuffer's content.
*/
public final int length() {
return ( m_lastChunk << m_chunkBits ) + m_firstFree;
}
/**
* Discard the content of the FastStringBuffer, and most of the memory that
* was allocated by it, restoring the initial state. Note that this may
* eventually be different from setLength(0), which see.
*/
public final void reset() {
m_lastChunk = 0;
m_firstFree = 0;
// Recover the original chunk size
FastByteBuffer innermost = this;
while ( innermost.m_innerFSB != null )
innermost = innermost.m_innerFSB;
m_chunkBits = innermost.m_chunkBits;
m_chunkSize = innermost.m_chunkSize;
m_chunkMask = innermost.m_chunkMask;
// Discard the hierarchy
m_innerFSB = null;
m_array = new byte[16][0];
m_array[0] = new byte[m_chunkSize];
}
/**
* Directly set how much of the FastStringBuffer's storage is to be
* considered part of its content. This is a fast but hazardous operation.
* It is not protected against negative values, or values greater than the
* amount of storage currently available... and even if additional storage
* does exist, its contents are unpredictable. The only safe use for our
* setLength() is to truncate the FastStringBuffer to a shorter string.
*
*@param l New length. If l<0 or l>=getLength(), this operation will not
* report an error but future operations will almost certainly fail.
*/
public final void setLength( int l ) {
m_lastChunk = l >>> m_chunkBits;
if ( m_lastChunk == 0 && m_innerFSB != null )
// Replace this FSB with the appropriate inner FSB, truncated
m_innerFSB.setLength( l, this );
else {
m_firstFree = l & m_chunkMask;
// There's an edge case if l is an exact multiple of m_chunkBits, which risks leaving
// us pointing at the start of a chunk which has not yet been allocated. Rather than
// pay the cost of dealing with that in the append loops (more scattered and more
// inner-loop), we correct it here by moving to the safe side of that
// line -- as we would have left the indexes had we appended up to that point.
if ( m_firstFree == 0 && m_lastChunk > 0 ) {
--m_lastChunk;
m_firstFree = m_chunkSize;
}
}
}
/**
* Subroutine for the public setLength() method. Deals with the fact that
* truncation may require restoring one of the innerFSBs NEEDSDOC
*
*@param l NEEDSDOC
*@param rootFSB
*/
private final void setLength( int l, FastByteBuffer rootFSB ) {
m_lastChunk = l >>> m_chunkBits;
if ( m_lastChunk == 0 && m_innerFSB != null )
m_innerFSB.setLength( l, rootFSB );
else {
// Undo encapsulation -- pop the innerFSB data back up to root.
// Inefficient, but attempts to keep the code simple.
rootFSB.m_chunkBits = m_chunkBits;
rootFSB.m_maxChunkBits = m_maxChunkBits;
rootFSB.m_rebundleBits = m_rebundleBits;
rootFSB.m_chunkSize = m_chunkSize;
rootFSB.m_chunkMask = m_chunkMask;
rootFSB.m_array = m_array;
rootFSB.m_innerFSB = m_innerFSB;
rootFSB.m_lastChunk = m_lastChunk;
// Finally, truncate this sucker.
rootFSB.m_firstFree = l & m_chunkMask;
}
}
/**
* Get the length of the list. Synonym for length().
*
*@return the number of characters in the FastStringBuffer's content.
*/
public final int size() {
return ( m_lastChunk << m_chunkBits ) + m_firstFree;
}
}