package eu.dnetlib.iis.common.pig.udfs;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import org.apache.pig.data.BagFactory;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.DataType;
import org.apache.pig.data.TupleFactory;
import org.apache.pig.impl.logicalLayer.schema.Schema;
import org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema;
import org.junit.Test;
import com.google.common.collect.Lists;
/**
* {@link IdConfidenceTupleDeduplicator} UDF test class.
* @author mhorst
*
*/
public class IdConfidenceTupleDeduplicatorTest {
@Test
public void testUDF() throws IOException {
// given
IdConfidenceTupleDeduplicator udf = new IdConfidenceTupleDeduplicator();
TupleFactory tupleFactory = TupleFactory.getInstance();
BagFactory bagFactory = BagFactory.getInstance();
DataBag emptyBag = bagFactory.newDefaultBag();
// execute & assert
assertNull(udf.exec(null));
assertNull(udf.exec(tupleFactory.newTuple()));
assertNull(udf.exec(tupleFactory.newTuple((DataBag)null)));
assertEquals(emptyBag, udf.exec(tupleFactory.newTuple(emptyBag)));
assertEquals(
// expected
bagFactory.newDefaultBag(Collections.singletonList(
tupleFactory.newTuple(Arrays.asList("tuple1", null)))),
// provided
udf.exec(tupleFactory.newTuple(
bagFactory.newDefaultBag(Collections.singletonList(
tupleFactory.newTuple(Arrays.asList("tuple1", null)))))));
assertEquals(
// expected
bagFactory.newDefaultBag(Collections.singletonList(
tupleFactory.newTuple(Arrays.asList("tuple1",0.9f)))),
// provided
udf.exec(tupleFactory.newTuple(
bagFactory.newDefaultBag(Lists.newArrayList(
tupleFactory.newTuple(Arrays.asList("tuple1",0.9f)),
tupleFactory.newTuple(Arrays.asList("tuple1",0.1f)),
tupleFactory.newTuple(Arrays.asList("tuple1",null)))))));
assertEquals(
// expected
bagFactory.newDefaultBag(Lists.newArrayList(
tupleFactory.newTuple(Arrays.asList("tuple1",null)),
tupleFactory.newTuple(Arrays.asList("tuple2",0.1f)),
tupleFactory.newTuple(Arrays.asList("tuple3",0.6f)))),
// provided
udf.exec(tupleFactory.newTuple(
bagFactory.newDefaultBag(Lists.newArrayList(
tupleFactory.newTuple(Arrays.asList("tuple3",0.3f)),
tupleFactory.newTuple(Arrays.asList("tuple2",0.1f)),
tupleFactory.newTuple(Arrays.asList("tuple3",0.6f)),
tupleFactory.newTuple(Arrays.asList("tuple1",null)))))));
}
@Test
public void testOutputSchema() throws Exception {
// given
IdConfidenceTupleDeduplicator udf = new IdConfidenceTupleDeduplicator();
Schema inputSchema = new Schema();
inputSchema.add(new FieldSchema(null, DataType.CHARARRAY));
// execute
Schema resultSchema = udf.outputSchema(inputSchema);
// assert
assertTrue(inputSchema == resultSchema);
}
}