package org.gbif.occurrence.common; import org.gbif.api.vocabulary.Extension; import org.gbif.api.vocabulary.OccurrenceIssue; import org.gbif.dwc.terms.GbifTerm; import org.gbif.dwc.terms.Term; import com.google.common.collect.ImmutableSet; /** * Utility class to handle column names in Hive for Terms, OccurrenceIssues and Extensions. */ public class HiveColumnsUtils { // reserved hive words public static final ImmutableSet<String> HIVE_RESERVED_WORDS = new ImmutableSet.Builder<String>().add("date", "order", "format", "group").build(); // prefix for extension columns private static final String EXTENSION_PRE = "ext_"; private HiveColumnsUtils() { // empty constructor } /** * Gets the Hive column name of the term parameter. */ public static String getHiveColumn(Term term) { String columnName = term.simpleName().toLowerCase(); if (HIVE_RESERVED_WORDS.contains(columnName)) { return columnName + '_'; } return columnName; } /** * Gets the Hive column name of the extension parameter. */ public static String getHiveColumn(Extension extension) { return EXTENSION_PRE + extension.name().toLowerCase(); } /** * Returns the Hive data type of term parameter. */ public static String getHiveType(Term term) { if (TermUtils.isInterpretedNumerical(term)) { return "INT"; } else if (TermUtils.isInterpretedDate(term)) { return "BIGINT"; } else if (TermUtils.isInterpretedDouble(term)) { return "DOUBLE"; } else if (TermUtils.isInterpretedBoolean(term)) { return "BOOLEAN"; } else if (isHiveArray(term)) { return "ARRAY<STRING>"; } else { return "STRING"; } } /** * Checks if the term is stored as an Hive array. */ public static boolean isHiveArray(Term term) { return GbifTerm.mediaType == term || GbifTerm.issue == term; } /** * Gets the Hive column name of the occurrence issue parameter. */ public static String getHiveColumn(OccurrenceIssue issue) { final String columnName = issue.name().toLowerCase(); if (HIVE_RESERVED_WORDS.contains(columnName)) { return columnName + '_'; } return columnName; } }