package org.gbif.occurrence.download.hive; import org.gbif.dwc.terms.GbifTerm; import org.gbif.dwc.terms.Term; import org.gbif.occurrence.common.HiveColumnsUtils; import org.gbif.occurrence.common.TermUtils; import java.util.List; import java.util.Set; import com.google.common.collect.ImmutableList; /** * Utilities related to the actual queries executed at runtime. * The queries relate closely to the data definitions (obviously) and this class provides the bridge between the * definitions and the queries. */ class Queries { private static final String JOIN_ARRAY_FMT = "if(%1$s IS NULL,'',joinArray(%1$s,'\\\\;')) AS %1$s"; /** * @return the select fields for the verbatim table in the simple download */ static List<InitializableField> selectVerbatimFields() { ImmutableList.Builder<InitializableField> builder = ImmutableList.builder(); // always add the GBIF ID builder.add(new InitializableField(GbifTerm.gbifID, HiveColumns.columnFor(GbifTerm.gbifID), HiveDataTypes.typeForTerm(GbifTerm.gbifID, true))); for (Term term : DownloadTerms.DOWNLOAD_VERBATIM_TERMS) { if (GbifTerm.gbifID == term) { continue; // for safety, we code defensively as it may be added } builder.add(new InitializableField(term, HiveColumns.VERBATIM_COL_PREFIX + term.simpleName().toLowerCase(), // no escape needed due to prefix HiveDataTypes.TYPE_STRING)); } return builder.build(); } /** * @return the select fields for the interpreted table in the simple download */ static List<InitializableField> selectInterpretedFields(boolean useInitializers) { return selectDownloadFields(DownloadTerms.DOWNLOAD_INTERPRETED_TERMS, useInitializers); } /** * @return the select fields for the table in the simple download */ static List<InitializableField> selectSimpleDownloadFields() { return selectDownloadFields(DownloadTerms.SIMPLE_DOWNLOAD_TERMS, true); } /** * @return the select fields for the interpreted table in the simple download */ private static List<InitializableField> selectDownloadFields(Set<Term> terms, boolean useInitializers) { ImmutableList.Builder<InitializableField> builder = ImmutableList.builder(); // always add the GBIF ID builder.add(new InitializableField(GbifTerm.gbifID, HiveColumns.columnFor(GbifTerm.gbifID), HiveDataTypes.typeForTerm(GbifTerm.gbifID, true))); for (Term term : terms) { if (GbifTerm.gbifID == term) { continue; // for safety, we code defensively as it may be added } if (useInitializers && TermUtils.isInterpretedDate(term)) { builder.add(new InitializableField(term, toISO8601Initializer(term), HiveDataTypes.TYPE_STRING)); } else if (useInitializers && HiveColumnsUtils.isHiveArray(term)){ builder.add(new InitializableField(term, String.format(JOIN_ARRAY_FMT,HiveColumns.columnFor(term)), HiveDataTypes.TYPE_STRING)); } else { builder.add(new InitializableField(term, HiveColumns.columnFor(term), HiveDataTypes.TYPE_STRING)); } } return builder.build(); } /** * Transforms the term into toISO8601(hiveColumn) expression. */ private static String toISO8601Initializer(Term term) { final String column = HiveColumns.columnFor(term); return "toISO8601(" + column + ") AS " + column; } /** * Hidden constructor. */ private Queries() { //empty constructor } }