/**
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.regionserver;
import java.io.IOException;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.regionserver.ScanQueryMatcher.MatchCode;
import org.apache.hadoop.hbase.util.Bytes;
/**
* Keeps track of the columns for a scan if they are not explicitly specified
*/
@InterfaceAudience.Private
public class ScanWildcardColumnTracker implements ColumnTracker {
private byte [] columnBuffer = null;
private int columnOffset = 0;
private int columnLength = 0;
private int currentCount = 0;
private int maxVersions;
private int minVersions;
/* Keeps track of the latest timestamp and type included for current column.
* Used to eliminate duplicates. */
private long latestTSOfCurrentColumn;
private byte latestTypeOfCurrentColumn;
private long oldestStamp;
/**
* Return maxVersions of every row.
* @param minVersion Minimum number of versions to keep
* @param maxVersion Maximum number of versions to return
* @param oldestUnexpiredTS oldest timestamp that has not expired according
* to the TTL.
*/
public ScanWildcardColumnTracker(int minVersion, int maxVersion,
long oldestUnexpiredTS) {
this.maxVersions = maxVersion;
this.minVersions = minVersion;
this.oldestStamp = oldestUnexpiredTS;
}
/**
* {@inheritDoc}
* This receives puts *and* deletes.
* Deletes do not count as a version, but rather take the version
* of the previous put (so eventually all but the last can be reclaimed).
*/
@Override
public MatchCode checkColumn(byte[] bytes, int offset, int length,
long timestamp, byte type, boolean ignoreCount) throws IOException {
if (columnBuffer == null) {
// first iteration.
resetBuffer(bytes, offset, length);
if (ignoreCount) return ScanQueryMatcher.MatchCode.INCLUDE;
// do not count a delete marker as another version
return checkVersion(type, timestamp);
}
int cmp = Bytes.compareTo(bytes, offset, length,
columnBuffer, columnOffset, columnLength);
if (cmp == 0) {
if (ignoreCount) return ScanQueryMatcher.MatchCode.INCLUDE;
//If column matches, check if it is a duplicate timestamp
if (sameAsPreviousTSAndType(timestamp, type)) {
return ScanQueryMatcher.MatchCode.SKIP;
}
return checkVersion(type, timestamp);
}
resetTSAndType();
// new col > old col
if (cmp > 0) {
// switched columns, lets do something.x
resetBuffer(bytes, offset, length);
if (ignoreCount) return ScanQueryMatcher.MatchCode.INCLUDE;
return checkVersion(type, timestamp);
}
// new col < oldcol
// WARNING: This means that very likely an edit for some other family
// was incorrectly stored into the store for this one. Throw an exception,
// because this might lead to data corruption.
throw new IOException(
"ScanWildcardColumnTracker.checkColumn ran into a column actually " +
"smaller than the previous column: " +
Bytes.toStringBinary(bytes, offset, length));
}
private void resetBuffer(byte[] bytes, int offset, int length) {
columnBuffer = bytes;
columnOffset = offset;
columnLength = length;
currentCount = 0;
}
/**
* Check whether this version should be retained.
* There are 4 variables considered:
* If this version is past max versions -> skip it
* If this kv has expired or was deleted, check min versions
* to decide whther to skip it or not.
*
* Increase the version counter unless this is a delete
*/
private MatchCode checkVersion(byte type, long timestamp) {
if (!KeyValue.isDelete(type)) {
currentCount++;
}
if (currentCount > maxVersions) {
return ScanQueryMatcher.MatchCode.SEEK_NEXT_COL; // skip to next col
}
// keep the KV if required by minversions or it is not expired, yet
if (currentCount <= minVersions || !isExpired(timestamp)) {
setTSAndType(timestamp, type);
return ScanQueryMatcher.MatchCode.INCLUDE;
} else {
return MatchCode.SEEK_NEXT_COL;
}
}
@Override
public void update() {
// no-op, shouldn't even be called
throw new UnsupportedOperationException(
"ScanWildcardColumnTracker.update should never be called!");
}
@Override
public void reset() {
columnBuffer = null;
resetTSAndType();
}
private void resetTSAndType() {
latestTSOfCurrentColumn = HConstants.LATEST_TIMESTAMP;
latestTypeOfCurrentColumn = 0;
}
private void setTSAndType(long timestamp, byte type) {
latestTSOfCurrentColumn = timestamp;
latestTypeOfCurrentColumn = type;
}
private boolean sameAsPreviousTSAndType(long timestamp, byte type) {
return timestamp == latestTSOfCurrentColumn && type == latestTypeOfCurrentColumn;
}
private boolean isExpired(long timestamp) {
return timestamp < oldestStamp;
}
/**
* Used by matcher and scan/get to get a hint of the next column
* to seek to after checkColumn() returns SKIP. Returns the next interesting
* column we want, or NULL there is none (wildcard scanner).
*
* @return The column count.
*/
public ColumnCount getColumnHint() {
return null;
}
/**
* We can never know a-priori if we are done, so always return false.
* @return false
*/
@Override
public boolean done() {
return false;
}
public MatchCode getNextRowOrNextColumn(byte[] bytes, int offset,
int qualLength) {
return MatchCode.SEEK_NEXT_COL;
}
public boolean isDone(long timestamp) {
return minVersions <= 0 && isExpired(timestamp);
}
}