/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.cassandra.io.util;
import java.io.*;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.cassandra.io.FSReadError;
import org.apache.cassandra.io.sstable.SSTableWriter;
import org.apache.cassandra.utils.JVMStabilityInspector;
public class MmappedSegmentedFile extends SegmentedFile
{
private static final Logger logger = LoggerFactory.getLogger(MmappedSegmentedFile.class);
// in a perfect world, MAX_SEGMENT_SIZE would be final, but we need to test with a smaller size to stay sane.
public static long MAX_SEGMENT_SIZE = Integer.MAX_VALUE;
/**
* Sorted array of segment offsets and MappedByteBuffers for segments. If mmap is completely disabled, or if the
* segment would be too long to mmap, the value for an offset will be null, indicating that we need to fall back
* to a RandomAccessFile.
*/
private final Segment[] segments;
public MmappedSegmentedFile(String path, long length, Segment[] segments)
{
super(new Cleanup(path, segments), path, length);
this.segments = segments;
}
private MmappedSegmentedFile(MmappedSegmentedFile copy)
{
super(copy);
this.segments = copy.segments;
}
public MmappedSegmentedFile sharedCopy()
{
return new MmappedSegmentedFile(this);
}
/**
* @return The segment entry for the given position.
*/
private Segment floor(long position)
{
assert 0 <= position && position < length: String.format("%d >= %d in %s", position, length, path);
Segment seg = new Segment(position, null);
int idx = Arrays.binarySearch(segments, seg);
assert idx != -1 : String.format("Bad position %d for segments %s in %s", position, Arrays.toString(segments), path);
if (idx < 0)
// round down to entry at insertion point
idx = -(idx + 2);
return segments[idx];
}
/**
* @return The segment containing the given position: must be closed after use.
*/
public FileDataInput getSegment(long position)
{
Segment segment = floor(position);
if (segment.right != null)
{
// segment is mmap'd
return new MappedFileDataInput(segment.right, path, segment.left, (int) (position - segment.left));
}
// we can have single cells or partitions larger than 2Gb, which is our maximum addressable range in a single segment;
// in this case we open as a normal random access reader
// FIXME: brafs are unbounded, so this segment will cover the rest of the file, rather than just the row
RandomAccessReader file = RandomAccessReader.open(new File(path));
file.seek(position);
return file;
}
private static final class Cleanup extends SegmentedFile.Cleanup
{
final Segment[] segments;
protected Cleanup(String path, Segment[] segments)
{
super(path);
this.segments = segments;
}
public void tidy()
{
if (!FileUtils.isCleanerAvailable())
return;
/*
* Try forcing the unmapping of segments using undocumented unsafe sun APIs.
* If this fails (non Sun JVM), we'll have to wait for the GC to finalize the mapping.
* If this works and a thread tries to access any segment, hell will unleash on earth.
*/
try
{
for (Segment segment : segments)
{
if (segment.right == null)
continue;
FileUtils.clean(segment.right);
}
logger.debug("All segments have been unmapped successfully");
}
catch (Exception e)
{
JVMStabilityInspector.inspectThrowable(e);
// This is not supposed to happen
logger.error("Error while unmapping segments", e);
}
}
}
/**
* Overrides the default behaviour to create segments of a maximum size.
*/
static class Builder extends SegmentedFile.Builder
{
// planned segment boundaries
private List<Long> boundaries;
// offset of the open segment (first segment begins at 0).
private long currentStart = 0;
// current length of the open segment.
// used to allow merging multiple too-large-to-mmap segments, into a single buffered segment.
private long currentSize = 0;
public Builder()
{
super();
boundaries = new ArrayList<>();
boundaries.add(0L);
}
public void addPotentialBoundary(long boundary)
{
if (boundary - currentStart <= MAX_SEGMENT_SIZE)
{
// boundary fits into current segment: expand it
currentSize = boundary - currentStart;
return;
}
// close the current segment to try and make room for the boundary
if (currentSize > 0)
{
currentStart += currentSize;
boundaries.add(currentStart);
}
currentSize = boundary - currentStart;
// if we couldn't make room, the boundary needs its own segment
if (currentSize > MAX_SEGMENT_SIZE)
{
currentStart = boundary;
boundaries.add(currentStart);
currentSize = 0;
}
}
public SegmentedFile complete(String path, long overrideLength, boolean isFinal)
{
assert !isFinal || overrideLength <= 0;
long length = overrideLength > 0 ? overrideLength : new File(path).length();
// create the segments
return new MmappedSegmentedFile(path, length, createSegments(path, length));
}
private Segment[] createSegments(String path, long length)
{
RandomAccessFile raf;
try
{
raf = new RandomAccessFile(path, "r");
}
catch (IOException e)
{
throw new RuntimeException(e);
}
// if we're early finishing a range that doesn't span multiple segments, but the finished file now does,
// we remove these from the end (we loop incase somehow this spans multiple segments, but that would
// be a loco dataset
while (length < boundaries.get(boundaries.size() - 1))
boundaries.remove(boundaries.size() -1);
// add a sentinel value == length
List<Long> boundaries = new ArrayList<>(this.boundaries);
if (length != boundaries.get(boundaries.size() - 1))
boundaries.add(length);
int segcount = boundaries.size() - 1;
Segment[] segments = new Segment[segcount];
try
{
for (int i = 0; i < segcount; i++)
{
long start = boundaries.get(i);
long size = boundaries.get(i + 1) - start;
MappedByteBuffer segment = size <= MAX_SEGMENT_SIZE
? raf.getChannel().map(FileChannel.MapMode.READ_ONLY, start, size)
: null;
segments[i] = new Segment(start, segment);
}
}
catch (IOException e)
{
throw new FSReadError(e, path);
}
finally
{
FileUtils.closeQuietly(raf);
}
return segments;
}
@Override
public void serializeBounds(DataOutput out) throws IOException
{
super.serializeBounds(out);
out.writeInt(boundaries.size());
for (long position: boundaries)
out.writeLong(position);
}
@Override
public void deserializeBounds(DataInput in) throws IOException
{
super.deserializeBounds(in);
int size = in.readInt();
List<Long> temp = new ArrayList<>(size);
for (int i = 0; i < size; i++)
temp.add(in.readLong());
boundaries = temp;
}
}
}