package eu.dnetlib.iis.wf.citationmatching; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import static org.mockito.Matchers.any; import static org.mockito.Matchers.eq; import static org.mockito.Matchers.isA; import static org.mockito.Mockito.doReturn; import static org.mockito.Mockito.verify; import java.util.List; import org.apache.avro.mapred.AvroKey; import org.apache.avro.mapreduce.AvroKeyInputFormat; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.NullWritable; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.PairFlatMapFunction; import org.junit.Test; import org.junit.runner.RunWith; import org.mockito.ArgumentCaptor; import org.mockito.Captor; import org.mockito.Mock; import org.mockito.runners.MockitoJUnitRunner; import com.google.common.collect.Lists; import eu.dnetlib.iis.citationmatching.schemas.BasicMetadata; import eu.dnetlib.iis.citationmatching.schemas.DocumentMetadata; import eu.dnetlib.iis.citationmatching.schemas.ReferenceMetadata; import scala.Tuple2; /** * @author madryk */ @RunWith(MockitoJUnitRunner.class) public class ReferenceMetadataInputReaderTest { private ReferenceMetadataInputReader referenceMetadataInputReader = new ReferenceMetadataInputReader(); @Mock private JavaSparkContext sparkContext; @Mock private JavaPairRDD<AvroKey<DocumentMetadata>, NullWritable> inputRecords; @Mock private JavaRDD<DocumentMetadata> documents; @Mock private JavaPairRDD<String, ReferenceMetadata> citations; @Captor private ArgumentCaptor<PairFlatMapFunction<DocumentMetadata, String, ReferenceMetadata>> documentToCitationsFunction; //------------------------ TESTS -------------------------- @SuppressWarnings("unchecked") @Test public void readCitations() throws Exception { // given doReturn(inputRecords).when(sparkContext).newAPIHadoopFile(any(), any(), any(), any(), any()); doReturn(documents).when(inputRecords).map(any()); doReturn(citations).when(documents).flatMapToPair(any()); // execute JavaPairRDD<String, ReferenceMetadata> retCitations = referenceMetadataInputReader.readCitations(sparkContext, "/some/path"); // assert assertTrue(retCitations == citations); verify(sparkContext).newAPIHadoopFile( eq("/some/path"), eq(AvroKeyInputFormat.class), eq(DocumentMetadata.class), eq(NullWritable.class), isA(Configuration.class)); verify(documents).flatMapToPair(documentToCitationsFunction.capture()); assertDocToCitationsFunction(documentToCitationsFunction.getValue()); } //------------------------ PRIVATE -------------------------- private void assertDocToCitationsFunction(PairFlatMapFunction<DocumentMetadata, String, ReferenceMetadata> function) throws Exception { ReferenceMetadata refMetadata1 = ReferenceMetadata.newBuilder().setPosition(3).setBasicMetadata(new BasicMetadata()).build(); ReferenceMetadata refMetadata2 = ReferenceMetadata.newBuilder().setPosition(5).setBasicMetadata(new BasicMetadata()).build(); ReferenceMetadata refMetadata3 = ReferenceMetadata.newBuilder().setPosition(6).setBasicMetadata(new BasicMetadata()).build(); DocumentMetadata docMetadata = DocumentMetadata.newBuilder() .setId("someId") .setBasicMetadata(new BasicMetadata()) .setReferences(Lists.newArrayList(refMetadata1, refMetadata2, refMetadata3)) .build(); Iterable<Tuple2<String, ReferenceMetadata>> retCitations = function.call(docMetadata); List<Tuple2<String, ReferenceMetadata>> retCitationsList = Lists.newArrayList(retCitations); assertEquals(3, retCitationsList.size()); assertEquals("cit_someId_3", retCitationsList.get(0)._1); assertTrue(retCitationsList.get(0)._2 == refMetadata1); assertEquals("cit_someId_5", retCitationsList.get(1)._1); assertTrue(retCitationsList.get(1)._2 == refMetadata2); assertEquals("cit_someId_6", retCitationsList.get(2)._1); assertTrue(retCitationsList.get(2)._2 == refMetadata3); } }