package eu.dnetlib.iis.common.pig.udfs; import java.io.IOException; import org.apache.pig.data.BagFactory; import org.apache.pig.data.DataBag; import org.apache.pig.data.DataType; import org.apache.pig.data.TupleFactory; import org.apache.pig.impl.logicalLayer.schema.Schema; import org.junit.Test; import com.google.common.collect.Lists; import junit.framework.TestCase; /** * * @author Dominika Tkaczyk */ public class StringBagsDifferenceTest extends TestCase { @Test public void testUDF() throws IOException { // given StringBagsDifference udf = new StringBagsDifference(); TupleFactory tupleFactory = TupleFactory.getInstance(); BagFactory bagFactory = BagFactory.getInstance(); DataBag nullBag = null; DataBag emptyBag = bagFactory.newDefaultBag(); DataBag bag1 = bagFactory.newDefaultBag(Lists.newArrayList(tupleFactory.newTuple("tup1"), tupleFactory.newTuple("tup2"))); DataBag bag2 = bagFactory.newDefaultBag(Lists.newArrayList(tupleFactory.newTuple("tup3"), tupleFactory.newTuple("tup4"))); DataBag bag3 = bagFactory.newDefaultBag(Lists.newArrayList(tupleFactory.newTuple("tup1"), tupleFactory.newTuple("tup4"), tupleFactory.newTuple("tup5"))); DataBag bag4 = bagFactory.newDefaultBag(Lists.newArrayList(tupleFactory.newTuple("tup1"), tupleFactory.newTuple("tup5"))); // execute & assert assertNull(udf.exec(null)); assertNull(udf.exec(tupleFactory.newTuple())); assertNull(udf.exec(tupleFactory.newTuple(nullBag))); assertNull(udf.exec(tupleFactory.newTuple(emptyBag))); assertNull(udf.exec(tupleFactory.newTuple(Lists.newArrayList(bag1, emptyBag, bag2, bag3)))); assertNull(udf.exec(tupleFactory.newTuple(Lists.newArrayList(nullBag, bag3, bag2)))); assertEquals(bag1, udf.exec(tupleFactory.newTuple(Lists.newArrayList(bag1, bag2)))); assertEquals(bag4, udf.exec(tupleFactory.newTuple(Lists.newArrayList(bag3, bag2)))); assertNull(udf.exec(tupleFactory.newTuple(Lists.newArrayList(bag4, bag3)))); } @Test public void testOutputSchema() throws Exception { // given StringBagsDifference udf = new StringBagsDifference(); Schema irrelevantSchema = null; // execute Schema resultSchema = udf.outputSchema(irrelevantSchema); // assert assertNotNull(resultSchema); assertEquals(1, resultSchema.getFields().size()); assertEquals(DataType.BAG, resultSchema.getField(0).type); assertEquals(1, resultSchema.getField(0).schema.getFields().size()); assertEquals(DataType.TUPLE, resultSchema.getField(0).schema.getField(0).type); assertEquals(1, resultSchema.getField(0).schema.getField(0).schema.getFields().size()); assertEquals(DataType.CHARARRAY, resultSchema.getField(0).schema.getField(0).schema.getField(0).type); } }