/** * */ package org.gbif.occurrence.search.writer; import org.gbif.api.model.common.MediaObject; import org.gbif.api.model.occurrence.Occurrence; import org.gbif.api.vocabulary.OccurrenceIssue; import org.gbif.dwc.terms.DwcTerm; import java.io.IOException; import java.util.HashSet; import java.util.Set; import com.google.common.base.Joiner; import com.google.common.base.Optional; import com.google.common.collect.Range; import com.google.common.collect.Sets; import org.apache.solr.client.solrj.SolrClient; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.common.SolrInputDocument; import static org.gbif.common.search.solr.QueryUtils.toDateQueryFormat; import static org.gbif.occurrence.search.solr.OccurrenceSolrField.BASIS_OF_RECORD; import static org.gbif.occurrence.search.solr.OccurrenceSolrField.CATALOG_NUMBER; import static org.gbif.occurrence.search.solr.OccurrenceSolrField.CLASS_KEY; import static org.gbif.occurrence.search.solr.OccurrenceSolrField.COLLECTION_CODE; import static org.gbif.occurrence.search.solr.OccurrenceSolrField.CONTINENT; import static org.gbif.occurrence.search.solr.OccurrenceSolrField.COORDINATE; import static org.gbif.occurrence.search.solr.OccurrenceSolrField.COUNTRY; import static org.gbif.occurrence.search.solr.OccurrenceSolrField.DATASET_KEY; import static org.gbif.occurrence.search.solr.OccurrenceSolrField.DEPTH; import static org.gbif.occurrence.search.solr.OccurrenceSolrField.ELEVATION; import static org.gbif.occurrence.search.solr.OccurrenceSolrField.ESTABLISHMENT_MEANS; import static org.gbif.occurrence.search.solr.OccurrenceSolrField.EVENT_DATE; import static org.gbif.occurrence.search.solr.OccurrenceSolrField.FAMILY_KEY; import static org.gbif.occurrence.search.solr.OccurrenceSolrField.FULL_TEXT; import static org.gbif.occurrence.search.solr.OccurrenceSolrField.GENUS_KEY; import static org.gbif.occurrence.search.solr.OccurrenceSolrField.HAS_COORDINATE; import static org.gbif.occurrence.search.solr.OccurrenceSolrField.INSTITUTION_CODE; import static org.gbif.occurrence.search.solr.OccurrenceSolrField.ISSUE; import static org.gbif.occurrence.search.solr.OccurrenceSolrField.KEY; import static org.gbif.occurrence.search.solr.OccurrenceSolrField.KINGDOM_KEY; import static org.gbif.occurrence.search.solr.OccurrenceSolrField.LAST_INTERPRETED; import static org.gbif.occurrence.search.solr.OccurrenceSolrField.LATITUDE; import static org.gbif.occurrence.search.solr.OccurrenceSolrField.LONGITUDE; import static org.gbif.occurrence.search.solr.OccurrenceSolrField.MEDIA_TYPE; import static org.gbif.occurrence.search.solr.OccurrenceSolrField.MONTH; import static org.gbif.occurrence.search.solr.OccurrenceSolrField.OCCURRENCE_ID; import static org.gbif.occurrence.search.solr.OccurrenceSolrField.ORDER_KEY; import static org.gbif.occurrence.search.solr.OccurrenceSolrField.PHYLUM_KEY; import static org.gbif.occurrence.search.solr.OccurrenceSolrField.PUBLISHING_COUNTRY; import static org.gbif.occurrence.search.solr.OccurrenceSolrField.RECORDED_BY; import static org.gbif.occurrence.search.solr.OccurrenceSolrField.RECORD_NUMBER; import static org.gbif.occurrence.search.solr.OccurrenceSolrField.SPATIAL_ISSUES; import static org.gbif.occurrence.search.solr.OccurrenceSolrField.SPECIES_KEY; import static org.gbif.occurrence.search.solr.OccurrenceSolrField.SUBGENUS_KEY; import static org.gbif.occurrence.search.solr.OccurrenceSolrField.TAXON_KEY; import static org.gbif.occurrence.search.solr.OccurrenceSolrField.TYPE_STATUS; import static org.gbif.occurrence.search.solr.OccurrenceSolrField.YEAR; import static org.gbif.occurrence.search.solr.OccurrenceSolrField.REPATRIATED; import static org.gbif.occurrence.search.solr.OccurrenceSolrField.ORGANISM_ID; import static org.gbif.occurrence.search.solr.OccurrenceSolrField.STATE_PROVINCE; import static org.gbif.occurrence.search.solr.OccurrenceSolrField.WATER_BODY; import static org.gbif.occurrence.search.solr.OccurrenceSolrField.LOCALITY; import static org.gbif.occurrence.search.solr.OccurrenceSolrField.PROTOCOL; import static org.gbif.occurrence.search.solr.OccurrenceSolrField.LICENSE; import static org.gbif.occurrence.search.solr.OccurrenceSolrField.CRAWL_ID; import static org.gbif.occurrence.search.solr.OccurrenceSolrField.PUBLISHING_ORGANIZATION_KEY; /** * Utility class that stores an Occurrence record into a Solr index. */ public class SolrOccurrenceWriter { // Joins coordinates with ',' private static final Joiner COORD_JOINER = Joiner.on(',').useForNull(""); // Allowed latitude range private static final Range<Double> LAT_RANGE = Range.closed(-90.0, 90.0); // Allowed longitude range private static final Range<Double> LNG_RANGE = Range.closed(-180.0, 180.0); // SolrClient that stores the occurrence records private final SolrClient solrClient; private final int commitWithinMs; /** * Default constructor. */ public SolrOccurrenceWriter(SolrClient solrClient, int commitWithinMs) { this.solrClient = solrClient; this.commitWithinMs = commitWithinMs; } /** * Default constructor. */ public SolrOccurrenceWriter(SolrClient solrClient) { this.solrClient = solrClient; commitWithinMs = -1; } /** * Validate if the latitude and longitude values are not null and have correct values: latitude:[-90.0,90.0] and * longitude[-180.0,180.0]. */ private static boolean isValidCoordinate(Double latitude, Double longitude) { return latitude != null && longitude != null && LAT_RANGE.contains(latitude) && LNG_RANGE.contains(longitude); } public void delete(Occurrence input) throws IOException, SolrServerException { solrClient.deleteById(input.getKey().toString(), commitWithinMs); } /** * Processes the occurrence object. */ public void update(Occurrence input) throws IOException, SolrServerException { solrClient.add(buildOccSolrDocument(input), commitWithinMs); } /** * Populates the Solr document using the occurrence object. */ private static SolrInputDocument buildOccSolrDocument(Occurrence occurrence) { SolrInputDocument doc = new SolrInputDocument(); Double latitude = occurrence.getDecimalLatitude(); Double longitude = occurrence.getDecimalLongitude(); doc.setField(KEY.getFieldName(), occurrence.getKey()); doc.setField(YEAR.getFieldName(), occurrence.getYear()); doc.setField(MONTH.getFieldName(), occurrence.getMonth()); doc.setField(BASIS_OF_RECORD.getFieldName(), occurrence.getBasisOfRecord() == null ? null : occurrence.getBasisOfRecord().name()); doc.setField(CATALOG_NUMBER.getFieldName(), occurrence.getVerbatimField(DwcTerm.catalogNumber)); doc.setField(RECORDED_BY.getFieldName(), occurrence.getVerbatimField(DwcTerm.recordedBy)); doc.setField(TYPE_STATUS.getFieldName(), occurrence.getTypeStatus() == null ? null : occurrence.getTypeStatus().name()); doc.setField(RECORD_NUMBER.getFieldName(), occurrence.getVerbatimField(DwcTerm.recordNumber)); doc.setField(COUNTRY.getFieldName(), occurrence.getCountry() == null ? null : occurrence.getCountry().getIso2LetterCode()); doc.setField(PUBLISHING_COUNTRY.getFieldName(), occurrence.getPublishingCountry() == null ? null : occurrence.getPublishingCountry().getIso2LetterCode()); doc.setField(CONTINENT.getFieldName(), occurrence.getContinent() == null ? null : occurrence.getContinent().name()); doc.setField(DATASET_KEY.getFieldName(), occurrence.getDatasetKey().toString()); Set<Integer> taxonKey = buildTaxonKey(occurrence); if (!taxonKey.isEmpty()) { doc.setField(TAXON_KEY.getFieldName(), taxonKey); } else { doc.setField(TAXON_KEY.getFieldName(), null); } doc.setField(KINGDOM_KEY.getFieldName(), occurrence.getKingdomKey()); doc.setField(PHYLUM_KEY.getFieldName(), occurrence.getPhylumKey()); doc.setField(CLASS_KEY.getFieldName(), occurrence.getClassKey()); doc.setField(ORDER_KEY.getFieldName(), occurrence.getOrderKey()); doc.setField(FAMILY_KEY.getFieldName(), occurrence.getFamilyKey()); doc.setField(GENUS_KEY.getFieldName(), occurrence.getGenusKey()); doc.setField(SUBGENUS_KEY.getFieldName(), occurrence.getSubgenusKey()); doc.setField(SPECIES_KEY.getFieldName(), occurrence.getSpeciesKey()); doc.setField(ELEVATION.getFieldName(), occurrence.getElevation()); doc.setField(DEPTH.getFieldName(), occurrence.getDepth()); doc.setField(INSTITUTION_CODE.getFieldName(), occurrence.getVerbatimField(DwcTerm.institutionCode)); doc.setField(COLLECTION_CODE.getFieldName(), occurrence.getVerbatimField(DwcTerm.collectionCode)); doc.setField(SPATIAL_ISSUES.getFieldName(), occurrence.hasSpatialIssue()); doc.setField(LATITUDE.getFieldName(), latitude); doc.setField(LONGITUDE.getFieldName(), longitude); doc.setField(HAS_COORDINATE.getFieldName(), latitude != null && longitude != null); doc.setField(EVENT_DATE.getFieldName(), occurrence.getEventDate() != null ? toDateQueryFormat(occurrence.getEventDate()) : null); doc.setField(LAST_INTERPRETED.getFieldName(), occurrence.getLastInterpreted() != null ? toDateQueryFormat(occurrence.getLastInterpreted()) : null); if (isValidCoordinate(latitude, longitude)) { doc.setField(COORDINATE.getFieldName(), COORD_JOINER.join(latitude, longitude)); } else { doc.setField(COORDINATE.getFieldName(), null); } doc.setField(MEDIA_TYPE.getFieldName(), buildMediaType(occurrence)); doc.setField(ISSUE.getFieldName(), buildIssue(occurrence.getIssues())); doc.setField(ESTABLISHMENT_MEANS.getFieldName(), occurrence.getEstablishmentMeans() == null ? null : occurrence.getEstablishmentMeans().name()); doc.setField(OCCURRENCE_ID.getFieldName(), occurrence.getVerbatimField(DwcTerm.occurrenceID)); doc.setField(FULL_TEXT.getFieldName(), FullTextFieldBuilder.buildFullTextField(occurrence)); doc.setField(REPATRIATED.getFieldName(),isRepatriated(occurrence).orNull()); doc.setField(ORGANISM_ID.getFieldName(), occurrence.getVerbatimField(DwcTerm.organismID)); doc.setField(STATE_PROVINCE.getFieldName(), occurrence.getStateProvince()); doc.setField(WATER_BODY.getFieldName(), occurrence.getWaterBody()); doc.setField(LOCALITY.getFieldName(), occurrence.getVerbatimField(DwcTerm.locality)); doc.setField(PROTOCOL.getFieldName(), occurrence.getProtocol() == null ? null : occurrence.getProtocol().name()); doc.setField(CRAWL_ID.getFieldName(), occurrence.getCrawlId() == null ? null : occurrence.getCrawlId()); doc.setField(PUBLISHING_ORGANIZATION_KEY.getFieldName(), occurrence.getPublishingOrgKey() == null ? null : occurrence.getPublishingOrgKey().toString()); doc.setField(LICENSE.getFieldName(), occurrence.getLicense() == null ? null : occurrence.getLicense().name()); return doc; } /** * Returns a nullable set of String that contains the result of .name() of each issues. */ private static Set<String> buildIssue(Set<OccurrenceIssue> occurrenceIssues) { Set<String> issuesList = null; if (occurrenceIssues != null && !occurrenceIssues.isEmpty()) { issuesList = Sets.newHashSetWithExpectedSize(occurrenceIssues.size()); for (OccurrenceIssue issue : occurrenceIssues) { issuesList.add(issue.name().toUpperCase()); } } return issuesList; } /** * Returns a nullable set of String that contains the media types present in the occurrence object. */ private static Set<String> buildMediaType(Occurrence occurrence) { Set<String> mediaTypes = null; if (occurrence.getMedia() != null && !occurrence.getMedia().isEmpty()) { mediaTypes = Sets.newHashSetWithExpectedSize(occurrence.getMedia().size()); for (MediaObject mediaObject : occurrence.getMedia()) { if (mediaObject.getType() != null) { mediaTypes.add(mediaObject.getType().name().toUpperCase()); } } } return mediaTypes; } /** * Return a set of integer that contains the taxon key values. */ private static Set<Integer> buildTaxonKey(Occurrence occurrence) { Set<Integer> taxonKey = new HashSet<Integer>(); if (occurrence.getTaxonKey() != null) { taxonKey.add(occurrence.getTaxonKey()); } if (occurrence.getKingdomKey() != null) { taxonKey.add(occurrence.getKingdomKey()); } if (occurrence.getPhylumKey() != null) { taxonKey.add(occurrence.getPhylumKey()); } if (occurrence.getClassKey() != null) { taxonKey.add(occurrence.getClassKey()); } if (occurrence.getOrderKey() != null) { taxonKey.add(occurrence.getOrderKey()); } if (occurrence.getFamilyKey() != null) { taxonKey.add(occurrence.getFamilyKey()); } if (occurrence.getGenusKey() != null) { taxonKey.add(occurrence.getGenusKey()); } if (occurrence.getSubgenusKey() != null) { taxonKey.add(occurrence.getSubgenusKey()); } if (occurrence.getSpeciesKey() != null) { taxonKey.add(occurrence.getSpeciesKey()); } return taxonKey; } /** * Determines if the occurrence record has been repatriated. */ private static Optional<Boolean> isRepatriated(Occurrence occurrence) { if (occurrence.getPublishingCountry() != null && occurrence.getCountry() != null) { return Optional.of(!occurrence.getPublishingCountry().equals(occurrence.getCountry())); } return Optional.absent(); } }