package org.gbif.occurrence.persistence.keygen; import org.gbif.api.exception.ServiceUnavailableException; import org.gbif.dwc.terms.GbifTerm; import org.gbif.occurrence.common.config.OccHBaseConfiguration; import org.gbif.occurrence.common.identifier.OccurrenceKeyHelper; import org.gbif.occurrence.persistence.IllegalDataStateException; import org.gbif.occurrence.persistence.api.KeyLookupResult; import org.gbif.occurrence.persistence.hbase.Columns; import org.gbif.occurrence.persistence.hbase.HBaseStore; import java.io.IOException; import java.util.List; import java.util.Map; import java.util.Set; import javax.annotation.Nullable; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Sets; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.Connection; import org.apache.hadoop.hbase.client.Delete; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.client.Table; import org.apache.hadoop.hbase.filter.CompareFilter; import org.apache.hadoop.hbase.filter.Filter; import org.apache.hadoop.hbase.filter.FilterList; import org.apache.hadoop.hbase.filter.PrefixFilter; import org.apache.hadoop.hbase.filter.SingleColumnValueFilter; import org.apache.hadoop.hbase.util.Bytes; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import static com.google.common.base.Preconditions.checkNotNull; /** * An abstract implementation of KeyPersistenceService that handles the finding and deleting of keys in HBase, but * leaves the generation of keys to sub-classes. */ public abstract class AbstractHBaseKeyPersistenceService implements KeyPersistenceService<Integer> { private static final Logger LOG = LoggerFactory.getLogger(AbstractHBaseKeyPersistenceService.class); private static final int HBASE_CLIENT_CACHING = 200; private final Connection connection; private final TableName lookupTableName; private final HBaseStore<Integer> occurrenceTableStore; protected final HBaseStore<String> lookupTableStore; protected final HBaseStore<Integer> counterTableStore; protected final KeyBuilder keyBuilder; public AbstractHBaseKeyPersistenceService(OccHBaseConfiguration cfg, Connection connection, KeyBuilder keyBuilder) { lookupTableName = TableName.valueOf(checkNotNull(cfg.lookupTable, "lookupTable can't be null")); this.connection = checkNotNull(connection, "tablePool can't be null"); this.keyBuilder = checkNotNull(keyBuilder, "keyBuilder can't be null"); lookupTableStore = new HBaseStore<String>(cfg.lookupTable, Columns.OCCURRENCE_COLUMN_FAMILY, connection); counterTableStore = new HBaseStore<Integer>(cfg.counterTable, Columns.OCCURRENCE_COLUMN_FAMILY, connection); occurrenceTableStore = new HBaseStore<Integer>(cfg.occTable, Columns.OCCURRENCE_COLUMN_FAMILY, connection); } @Override public abstract KeyLookupResult generateKey(Set<String> uniqueStrings, String scope); @Override public KeyLookupResult findKey(Set<String> uniqueStrings, String scope) { checkNotNull(uniqueStrings, "uniqueStrings can't be null"); checkNotNull(scope, "scope can't be null"); if (uniqueStrings.isEmpty()) { return null; } Set<String> lookupKeys = keyBuilder.buildKeys(uniqueStrings, scope); Map<String, Integer> foundOccurrenceKeys = Maps.newTreeMap(); // required: predictable sorting for e.g. testing // get the occurrenceKey for each lookupKey, and set a flag if we find any null boolean gotNulls = false; for (String uniqueString : lookupKeys) { Integer occurrenceKey = lookupTableStore.getInt(uniqueString, Columns.LOOKUP_KEY_COLUMN); if (occurrenceKey == null) { gotNulls = true; } else { foundOccurrenceKeys.put(uniqueString, occurrenceKey); } } // go through all the returned keys and make sure they're all the same - if not, fail loudly (this means // an inconsistency in the db that we can't resolve here) Integer resultKey = null; for (String uniqueString : lookupKeys) { Integer occurrenceKey = foundOccurrenceKeys.get(uniqueString); if (occurrenceKey != null) { if (resultKey == null) { resultKey = occurrenceKey; } else if (resultKey.intValue() != occurrenceKey.intValue()) { failWithConflictingLookup(foundOccurrenceKeys); } } } // if we got an occurrenceKey as well as nulls, then we need to fill in the lookup table with the missing entries if (resultKey != null && gotNulls) { fillMissingKeys(lookupKeys, foundOccurrenceKeys, resultKey); } KeyLookupResult result = null; if (resultKey != null) { result = new KeyLookupResult(resultKey, false); } return result; } @Override public Set<Integer> findKeysByScope(String scope) { Set<Integer> keys = Sets.newHashSet(); // note HTableStore isn't capable of ad hoc scans try (Table table = connection.getTable(lookupTableName)) { Scan scan = new Scan(); scan.setCacheBlocks(false); scan.setCaching(HBASE_CLIENT_CACHING); scan.setFilter(new PrefixFilter(Bytes.toBytes(scope))); ResultScanner results = table.getScanner(scan); for (Result result : results) { byte[] rawKey = result.getValue(Columns.CF, Bytes.toBytes(Columns.LOOKUP_KEY_COLUMN)); if (rawKey != null) { keys.add(Bytes.toInt(rawKey)); } } } catch (IOException e) { throw new ServiceUnavailableException("Could not read from HBase", e); } return keys; } /** * Scans the lookup table for instances of the occurrenceKey and deletes those rows. It attempts to scope the scan * for this occurrenceKey within the dataset of the original occurrence, but note that there is no guarantee that the * original occurrence corresponding to this occurrenceKey still exists, so in the worst case this method will do a * full table scan of the lookup table. * * @param occurrenceKey the key to delete * @param datasetKey the optional "scope" for the lookup (without it this method is very slow) */ @Override public void deleteKey(Integer occurrenceKey, @Nullable String datasetKey) { checkNotNull(occurrenceKey, "occurrenceKey can't be null"); // get the dataset for this occurrence if not handed in as scope String rawDatasetKey = datasetKey; if (rawDatasetKey == null) { rawDatasetKey = occurrenceTableStore.getString(occurrenceKey, Columns.column(GbifTerm.datasetKey)); } // scan the lookup table for all rows where the key matches our dataset prefix and the cell value is our // target occurrenceKey, then delete those rows Scan scan = new Scan(); scan.addColumn(Columns.CF, Bytes.toBytes(Columns.LOOKUP_KEY_COLUMN)); // TODO: this is still too slow even with prefix - lease timeouts in logs List<Filter> filters = Lists.newArrayList(); if (rawDatasetKey == null) { LOG.warn("About to scan lookup table with no datasetKey prefix - target key for deletion is [{}]", occurrenceKey); } else { filters.add(new PrefixFilter(Bytes.toBytes(OccurrenceKeyHelper.buildKeyPrefix(rawDatasetKey)))); } Filter valueFilter = new SingleColumnValueFilter(Columns.CF, Bytes.toBytes(Columns.LOOKUP_KEY_COLUMN), CompareFilter.CompareOp.EQUAL, Bytes.toBytes(occurrenceKey)); filters.add(valueFilter); Filter filterList = new FilterList(FilterList.Operator.MUST_PASS_ALL, filters); scan.setFilter(filterList); try (Table lookupTable = connection.getTable(lookupTableName); ResultScanner resultScanner = lookupTable.getScanner(scan)) { List<Delete> keysToDelete = Lists.newArrayList(); for (Result result : resultScanner) { Delete delete = new Delete(result.getRow()); keysToDelete.add(delete); } if (!keysToDelete.isEmpty()) { lookupTable.delete(keysToDelete); } } catch (IOException e) { throw new ServiceUnavailableException("Failure accessing HBase", e); } } @Override public void deleteKeyByUniques(Set<String> uniqueStrings, String scope) { checkNotNull(uniqueStrings, "uniqueStrings can't be null"); checkNotNull(scope, "scope can't be null"); // craft a delete for every uniqueString Set<String> lookupKeys = keyBuilder.buildKeys(uniqueStrings, scope); List<Delete> keysToDelete = Lists.newArrayListWithCapacity(lookupKeys.size()); for (String lookupKey : lookupKeys) { keysToDelete.add(new Delete(Bytes.toBytes(lookupKey))); } try (Table lookupTable = connection.getTable(lookupTableName)) { if (!keysToDelete.isEmpty()) { lookupTable.delete(keysToDelete); } } catch (IOException e) { throw new ServiceUnavailableException("Failure accessing HBase", e); } } protected static void failWithConflictingLookup(Map<String, Integer> conflictingKeys) { StringBuilder sb = new StringBuilder("Found inconsistent occurrence keys in looking up unique identifiers:"); for (Map.Entry<String, Integer> entry : conflictingKeys.entrySet()) { sb.append('[').append(entry.getKey()).append("]=[").append(entry.getValue()).append(']'); } throw new IllegalDataStateException(sb.toString()); } private void fillMissingKeys(Set<String> lookupKeys, Map<String, Integer> foundOccurrenceKeys, Integer occurrenceKey) { for (String lookupKey : lookupKeys) { if (!foundOccurrenceKeys.containsKey(lookupKey)) { lookupTableStore.putInt(lookupKey, Columns.LOOKUP_KEY_COLUMN, occurrenceKey); } } } }