package eu.dnetlib.iis.wf.collapsers;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import org.apache.avro.Schema;
import org.apache.avro.Schema.Field;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.IndexedRecord;
import org.apache.avro.util.Utf8;
/**
*
* @author Dominika Tkaczyk
*/
public final class CollapserUtils {
private static final String FIELD_ORIGIN = "origin";
private static final String FIELD_DATA = "data";
// ---------------------- CONSTRUCTORS -------------------------
private CollapserUtils() {}
// ---------------------- LOGIC --------------------------------
/**
* Checks, if all objects in the list have the same schema.
*/
public static boolean haveEqualSchema(Collection<IndexedRecord> records) {
if (records == null || records.isEmpty()) {
return true;
}
Schema schema = records.iterator().next().getSchema();
for (IndexedRecord record : records) {
if (!schema.equals(record.getSchema())) {
return false;
}
}
return true;
}
/**
* Checks, if the schema is an "envelope" schema.
*/
public static boolean isEnvelopeSchema(Schema schema) {
List<Field> fields = schema.getFields();
if (fields.size() != 2) {
return false;
}
if (schema.getField(FIELD_ORIGIN) == null || schema.getField(FIELD_DATA) == null) {
return false;
}
return true;
}
/**
* Extracts the "origin" value from the envelope record.
*/
public static String getOriginValue(IndexedRecord record) {
Schema schema = record.getSchema();
Object origin = record.get(schema.getField(FIELD_ORIGIN).pos());
if (origin instanceof Utf8) {
return ((Utf8) record.get(schema.getField(FIELD_ORIGIN).pos())).toString();
}
return (String) record.get(schema.getField(FIELD_ORIGIN).pos());
}
/**
* Extracts the "data" part of the envelope record.
*/
public static IndexedRecord getDataRecord(IndexedRecord record) {
Schema schema = record.getSchema();
if (schema.getField(FIELD_DATA) == null) {
return null;
}
return (IndexedRecord) record.get(schema.getField(FIELD_DATA).pos());
}
/**
* Extracts nested field value from a record.
*
* @param record a record
* @param fieldName a path of field names separated by a dot
* @return
*/
@SuppressWarnings("unchecked")
public static <T> T getNestedFieldValue(IndexedRecord record, String fieldName) {
if (record == null || fieldName == null) {
return null;
}
IndexedRecord actRecord = record;
String[] path = fieldName.split("\\.");
for (int i = 0; i < path.length; i++) {
Schema schema = actRecord.getSchema();
if (schema.getField(path[i]) == null) {
return null;
}
Object nextRecord = actRecord.get(schema.getField(path[i]).pos());
if (i == path.length-1) {
return (T) nextRecord;
} else if (nextRecord instanceof IndexedRecord) {
actRecord = (IndexedRecord) nextRecord;
} else {
return null;
}
}
return null;
}
/**
* Returns the number of not null siginificant fields.
*
* @param record input avro object
* @param fields a list of significant field names
* @return
*/
public static int getNumberOfFilledFields(IndexedRecord record, List<String> fields) {
int number = 0;
for (Field field: record.getSchema().getFields()) {
if ((fields == null || fields.contains(field.name()))
&& record.get(field.pos()) != null) {
number++;
}
}
return number;
}
static class NumberOfFilledDataFieldsComparator implements Comparator<IndexedRecord> {
private final List<String> fields;
public NumberOfFilledDataFieldsComparator(List<String> fields) {
this.fields = fields;
}
@Override
public int compare(IndexedRecord t1, IndexedRecord t2) {
return Integer.valueOf(getNumberOfFilledFields(t2, fields))
.compareTo(getNumberOfFilledFields(t1, fields));
}
}
/**
* Sorts records by the number of not null significant fields.
*/
public static <T extends IndexedRecord> void sortByFilledDataFields(List<T> records, final List<String> fields) {
Collections.sort(records, new NumberOfFilledDataFieldsComparator(fields));
}
/**
* Merges two records by setting null fields in the base record.
*/
public static <T extends IndexedRecord> T merge(T base, T update) {
T ret = GenericData.get().deepCopy(base.getSchema(), base);
for (Field field: base.getSchema().getFields()) {
if (base.get(field.pos()) == null && update.get(field.pos()) != null) {
ret.put(field.pos(), update.get(field.pos()));
}
}
return ret;
}
}