Filter.java example

Explorer
hbase-cache-master
- trunk
/*
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hbase.filter;

import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.hbase.DeserializationException;
import org.apache.hadoop.hbase.KeyValue;

import java.util.List;

/**
 * Interface for row and column filters directly applied within the regionserver.
 * A filter can expect the following call sequence:
 *<ul>
 * <li>{@link #reset()}</li>
 * <li>{@link #filterAllRemaining()} -> true indicates scan is over, false, keep going on.</li>
 * <li>{@link #filterRowKey(byte[],int,int)} -> true to drop this row,
 * if false, we will also call</li>
 * <li>{@link #filterKeyValue(KeyValue)} -> true to drop this key/value</li>
 * <li>{@link #filterRow(List)} -> allows directmodification of the final list to be submitted
 * <li>{@link #filterRow()} -> last chance to drop entire row based on the sequence of
 * filterValue() calls. Eg: filter a row if it doesn't contain a specified column.
 * </li>
 * </ul>
 *
 * Filter instances are created one per region/scan.  This abstract class replaces
 * the old RowFilterInterface.
 *
 * When implementing your own filters, consider inheriting {@link FilterBase} to help
 * you reduce boilerplate.
 * 
 * @see FilterBase
 */
@InterfaceAudience.Public
@InterfaceStability.Stable
public abstract class Filter {
  /**
   * Reset the state of the filter between rows.
   */
  abstract public void reset();

  /**
   * Filters a row based on the row key. If this returns true, the entire
   * row will be excluded.  If false, each KeyValue in the row will be
   * passed to {@link #filterKeyValue(KeyValue)} below.
   *
   * @param buffer buffer containing row key
   * @param offset offset into buffer where row key starts
   * @param length length of the row key
   * @return true, remove entire row, false, include the row (maybe).
   */
  abstract public boolean filterRowKey(byte [] buffer, int offset, int length);

  /**
   * If this returns true, the scan will terminate.
   *
   * @return true to end scan, false to continue.
   */
  abstract public boolean filterAllRemaining();

  /**
   * A way to filter based on the column family, column qualifier and/or the
   * column value. Return code is described below.  This allows filters to
   * filter only certain number of columns, then terminate without matching ever
   * column.
   *
   * If your filter returns <code>ReturnCode.NEXT_ROW</code>, it should return
   * <code>ReturnCode.NEXT_ROW</code> until {@link #reset()} is called
   * just in case the caller calls for the next row.
   *
   * @param v the KeyValue in question
   * @return code as described below
   * @see Filter.ReturnCode
   */
  abstract public ReturnCode filterKeyValue(final KeyValue v);

  /**
   * Give the filter a chance to transform the passed KeyValue.
   * If the KeyValue is changed a new KeyValue object must be returned.
   * @see org.apache.hadoop.hbase.KeyValue#shallowCopy()
   *
   * The transformed KeyValue is what is eventually returned to the
   * client. Most filters will return the passed KeyValue unchanged.
   * @see org.apache.hadoop.hbase.filter.KeyOnlyFilter#transform(KeyValue)
   * for an example of a transformation.
   *
   * @param v the KeyValue in question
   * @return the changed KeyValue
   */
  abstract public KeyValue transform(final KeyValue v);

  /**
   * Return codes for filterValue().
   */
  public enum ReturnCode {
    /**
     * Include the KeyValue
     */
    INCLUDE,
    /**
     * Include the KeyValue and seek to the next column skipping older versions.
     */
    INCLUDE_AND_NEXT_COL,
    /**
     * Skip this KeyValue
     */
    SKIP,
    /**
     * Skip this column. Go to the next column in this row.
     */
    NEXT_COL,
    /**
     * Done with columns, skip to next row. Note that filterRow() will
     * still be called.
     */
    NEXT_ROW,
    /**
     * Seek to next key which is given as hint by the filter.
     */
    SEEK_NEXT_USING_HINT,
}

  /**
   * Chance to alter the list of keyvalues to be submitted.
   * Modifications to the list will carry on
   * @param kvs the list of keyvalues to be filtered
   */
  abstract public void filterRow(List<KeyValue> kvs);

  /**
   * @return True if this filter actively uses filterRow(List) or filterRow().
   * Primarily used to check for conflicts with scans(such as scans
   * that do not read a full row at a time)
   */
  abstract public boolean hasFilterRow();

  /**
   * Last chance to veto row based on previous {@link #filterKeyValue(KeyValue)}
   * calls. The filter needs to retain state then return a particular value for
   * this call if they wish to exclude a row if a certain column is missing
   * (for example).
   * @return true to exclude row, false to include row.
   */
  abstract public boolean filterRow();

  /**
   * If the filter returns the match code SEEK_NEXT_USING_HINT, then
   * it should also tell which is the next key it must seek to.
   * After receiving the match code SEEK_NEXT_USING_HINT, the QueryMatcher would
   * call this function to find out which key it must next seek to.
   * @return KeyValue which must be next seeked. return null if the filter is
   * not sure which key to seek to next.
   */
  abstract public KeyValue getNextKeyHint(final KeyValue currentKV);

  /**
   * Check that given column family is essential for filter to check row.  Most
   * filters always return true here. But some could have more sophisticated
   * logic which could significantly reduce scanning process by not even
   * touching columns until we are 100% sure that it's data is needed in result.
   */
  abstract public boolean isFamilyEssential(byte[] name);

  /**
   * @return The filter serialized using pb
   */
  abstract public byte [] toByteArray();

  /**
   * @param pbBytes A pb serialized {@link Filter} instance
   * @return An instance of {@link Filter} made from <code>bytes</code>
   * @throws DeserializationException
   * @see #toByteArray
   */
  public static Filter parseFrom(final byte [] pbBytes) throws DeserializationException {
    throw new DeserializationException(
      "parseFrom called on base Filter, but should be called on derived type");
  }

  /**
   * @param other
   * @return true if and only if the fields of the filter that are serialized
   * are equal to the corresponding fields in other.  Used for testing.
   */
  abstract boolean areSerializedFieldsEqual(Filter other);
}