package org.gbif.occurrence.search.writers; import org.gbif.api.model.occurrence.Occurrence; import org.gbif.api.vocabulary.OccurrenceIssue; import org.gbif.dwc.terms.DcTerm; import org.gbif.dwc.terms.DwcTerm; import org.gbif.dwc.terms.GbifTerm; import org.gbif.dwc.terms.GbifInternalTerm; import org.gbif.occurrence.persistence.hbase.Columns; import java.io.IOException; import com.google.common.base.Predicate; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Table; import org.apache.hadoop.hbase.util.Bytes; /** * Utility class for writing occurrence records into an HBase table. */ public class HBasePredicateWriter implements Predicate<Occurrence> { // HBase table private final Table hTable; // Column family private final static byte[] CF = Bytes.toBytes("o"); /** * Default constructor. */ public HBasePredicateWriter(Table hTable) { this.hTable = hTable; } /** * Reads and processes the occurrence object. */ @Override public boolean apply(Occurrence input) { try { write(input); return true; } catch (IOException e) { return false; } } /** * Writes the occurrence record into the hbase table. * * @param occ occurrence object that will be written to hbase */ private void write(Occurrence occ) throws IOException { Put put = new Put(Bytes.toBytes(occ.getKey())); if (occ.getElevation() != null) { put.addColumn(CF, Bytes.toBytes(Columns.column(GbifTerm.elevation)), Bytes.toBytes(occ.getElevation())); } if (occ.getBasisOfRecord() != null) { put.addColumn(CF, Bytes.toBytes(Columns.column(DwcTerm.basisOfRecord)), Bytes.toBytes(occ.getBasisOfRecord().name())); } if (occ.getVerbatimField(DwcTerm.catalogNumber) != null) { put.addColumn(CF, Bytes.toBytes(Columns.column(DwcTerm.catalogNumber)), Bytes.toBytes(occ.getVerbatimField(DwcTerm.catalogNumber))); } if (occ.getClassKey() != null) { put.addColumn(CF, Bytes.toBytes(Columns.column(GbifTerm.classKey)), Bytes.toBytes(occ.getClassKey())); } if (occ.getClazz() != null) { put.addColumn(CF, Bytes.toBytes(Columns.column(DwcTerm.class_)), Bytes.toBytes(occ.getClazz())); } if (occ.getVerbatimField(DwcTerm.collectionCode) != null) { put.addColumn(CF, Bytes.toBytes(Columns.column(DwcTerm.collectionCode)), Bytes.toBytes(occ.getVerbatimField(DwcTerm.collectionCode))); } if (occ.getDatasetKey() != null) { put.addColumn(CF, Bytes.toBytes(Columns.column(GbifTerm.datasetKey)), Bytes.toBytes(occ.getDatasetKey().toString())); } if (occ.getDepth() != null) { put.addColumn(CF, Bytes.toBytes(Columns.column(GbifTerm.depth)), Bytes.toBytes(occ.getDepth())); } if (occ.getFamily() != null) { put.addColumn(CF, Bytes.toBytes(Columns.column(DwcTerm.family)), Bytes.toBytes(occ.getFamily())); } if (occ.getFamilyKey() != null) { put.addColumn(CF, Bytes.toBytes(Columns.column(GbifTerm.familyKey)), Bytes.toBytes(occ.getFamilyKey())); } if (occ.getGenus() != null) { put.addColumn(CF, Bytes.toBytes(Columns.column(DwcTerm.genus)), Bytes.toBytes(occ.getGenus())); } if (occ.getGenusKey() != null) { put.addColumn(CF, Bytes.toBytes(Columns.column(GbifTerm.genusKey)), Bytes.toBytes(occ.getGenusKey())); } if (occ.getVerbatimField(DwcTerm.institutionCode) != null) { put.addColumn(CF, Bytes.toBytes(Columns.column(DwcTerm.institutionCode)), Bytes.toBytes(occ.getVerbatimField(DwcTerm.institutionCode))); } if (occ.getKingdom() != null) { put.addColumn(CF, Bytes.toBytes(Columns.column(DwcTerm.kingdom)), Bytes.toBytes(occ.getKingdom())); } if (occ.getKingdomKey() != null) { put.addColumn(CF, Bytes.toBytes(Columns.column(GbifTerm.kingdomKey)), Bytes.toBytes(occ.getKingdomKey())); } if (occ.getDecimalLatitude() != null) { put.addColumn(CF, Bytes.toBytes(Columns.column(DwcTerm.decimalLatitude)), Bytes.toBytes(occ.getDecimalLatitude())); } if (occ.getDecimalLongitude() != null) { put.addColumn(CF, Bytes.toBytes(Columns.column(DwcTerm.decimalLongitude)), Bytes.toBytes(occ.getDecimalLongitude())); } if (occ.getVerbatimField(DwcTerm.locality) != null) { put.addColumn(CF, Bytes.toBytes(Columns.column(DwcTerm.locality)), Bytes.toBytes(occ.getVerbatimField(DwcTerm.locality))); } if (occ.getCountry() != null) { put.addColumn(CF, Bytes.toBytes(Columns.column(DwcTerm.countryCode)), Bytes.toBytes(occ.getCountry().getIso2LetterCode())); } if (occ.getVerbatimField(DwcTerm.county) != null) { put.addColumn(CF, Bytes.toBytes(Columns.column(DwcTerm.county)), Bytes.toBytes(occ.getVerbatimField(DwcTerm.county))); } if (occ.getStateProvince() != null) { put.addColumn(CF, Bytes.toBytes(Columns.column(DwcTerm.stateProvince)), Bytes.toBytes(occ.getStateProvince())); } if (occ.getContinent() != null) { put.addColumn(CF, Bytes.toBytes(Columns.column(DwcTerm.continent)), Bytes.toBytes(occ.getContinent().name())); } if (occ.getVerbatimField(DwcTerm.recordedBy) != null) { put.addColumn(CF, Bytes.toBytes(Columns.column(DwcTerm.recordedBy)), Bytes.toBytes(occ.getVerbatimField(DwcTerm.recordedBy))); } if (occ.getVerbatimField(DwcTerm.identifiedBy) != null) { put.addColumn(CF, Bytes.toBytes(Columns.column(DwcTerm.identifiedBy)), Bytes.toBytes(occ.getVerbatimField(DwcTerm.identifiedBy))); } if (occ.getDateIdentified() != null) { put.addColumn(CF, Bytes.toBytes(Columns.column(DwcTerm.dateIdentified)), Bytes.toBytes(occ.getDateIdentified().getTime())); } if (occ.getModified() != null) { put.addColumn(CF, Bytes.toBytes(Columns.column(DcTerm.modified)), Bytes.toBytes(occ.getModified().getTime())); } if (occ.getMonth() != null) { put.addColumn(CF, Bytes.toBytes(Columns.column(DwcTerm.month)), Bytes.toBytes(occ.getMonth())); } if (occ.getTaxonKey() != null) { put.addColumn(CF, Bytes.toBytes(Columns.column(GbifTerm.taxonKey)), Bytes.toBytes(occ.getTaxonKey())); } if (occ.getEventDate() != null) { put.addColumn(CF, Bytes.toBytes(Columns.column(DwcTerm.eventDate)), Bytes.toBytes(occ.getEventDate().getTime())); } if (occ.getOrder() != null) { put.addColumn(CF, Bytes.toBytes(Columns.column(DwcTerm.order)), Bytes.toBytes(occ.getOrder())); } if (occ.getOrderKey() != null) { put.addColumn(CF, Bytes.toBytes(Columns.column(GbifTerm.orderKey)), Bytes.toBytes(occ.getOrderKey())); } if (occ.getPublishingOrgKey() != null) { put.addColumn(CF, Bytes.toBytes(Columns.column(GbifInternalTerm.publishingOrgKey)), Bytes.toBytes(occ.getPublishingOrgKey().toString())); } if (occ.getPhylum() != null) { put.addColumn(CF, Bytes.toBytes(Columns.column(DwcTerm.phylum)), Bytes.toBytes(occ.getPhylum())); } if (occ.getPhylumKey() != null) { put.addColumn(CF, Bytes.toBytes(Columns.column(GbifTerm.phylumKey)), Bytes.toBytes(occ.getPhylumKey())); } if (occ.getScientificName() != null) { put.addColumn(CF, Bytes.toBytes(Columns.column(DwcTerm.scientificName)), Bytes.toBytes(occ.getScientificName())); } if (occ.getSpecies() != null) { put.addColumn(CF, Bytes.toBytes(Columns.column(DwcTerm.specificEpithet)), Bytes.toBytes(occ.getSpecies())); } if (occ.getSpeciesKey() != null) { put.addColumn(CF, Bytes.toBytes(Columns.column(GbifTerm.speciesKey)), Bytes.toBytes(occ.getSpeciesKey())); } if (occ.getYear() != null) { put.addColumn(CF, Bytes.toBytes(Columns.column(DwcTerm.year)), Bytes.toBytes(occ.getYear())); } if (occ.getTypeStatus() != null) { put.addColumn(CF, Bytes.toBytes(Columns.column(DwcTerm.typeStatus)), Bytes.toBytes(occ.getTypeStatus().name())); } if (occ.getEstablishmentMeans() != null) { put.addColumn(CF, Bytes.toBytes(Columns.column(DwcTerm.establishmentMeans)), Bytes.toBytes(occ.getEstablishmentMeans().name())); } // OccurrenceIssues for (OccurrenceIssue issue : occ.getIssues()) { put.addColumn(CF, Bytes.toBytes(Columns.column(issue)), Bytes.toBytes(1)); } if (occ.getWaterBody() != null) { put.addColumn(CF, Bytes.toBytes(Columns.column(DwcTerm.waterBody)), Bytes.toBytes(occ.getWaterBody())); } if (occ.getVerbatimField(DwcTerm.organismID) != null) { put.addColumn(CF, Bytes.toBytes(Columns.column(DwcTerm.organismID)), Bytes.toBytes(occ.getVerbatimField(DwcTerm.organismID))); } if (occ.getProtocol() != null) { put.addColumn(CF, Bytes.toBytes(Columns.column(GbifTerm.protocol)), Bytes.toBytes(occ.getProtocol().name())); } hTable.put(put); } }