package org.gbif.occurrence.hive.udf; import org.gbif.occurrence.common.download.DownloadUtils; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.io.Text; /** * A simple UDF for Hive that replaces specials characters with blanks. * The characters replaced by this UDF can break a download format and those are: tabs, line breaks and new lines. * If the input value is null or can't be parsed, and empty string is returned. */ @Description( name = "cleanDelimiters", value = "_FUNC_(field)") public class CleanDelimiterCharsUDF extends UDF { private final Text text = new Text(); public Text evaluate(Text field) { if (field == null) { return null; } text.set(DownloadUtils.DELIMETERS_MATCH_PATTERN.matcher(field.toString()).replaceAll(" ")); return text; } }