package eu.dnetlib.iis.wf.citationmatching;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.mockito.Matchers.any;
import static org.mockito.Matchers.eq;
import static org.mockito.Matchers.isA;
import static org.mockito.Mockito.doReturn;
import static org.mockito.Mockito.verify;
import org.apache.avro.mapred.AvroKey;
import org.apache.avro.mapreduce.AvroKeyInputFormat;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.NullWritable;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.PairFunction;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.mockito.ArgumentCaptor;
import org.mockito.Captor;
import org.mockito.Mock;
import org.mockito.runners.MockitoJUnitRunner;
import com.google.common.collect.Lists;
import eu.dnetlib.iis.citationmatching.schemas.BasicMetadata;
import eu.dnetlib.iis.citationmatching.schemas.DocumentMetadata;
import scala.Tuple2;
/**
* @author madryk
*/
@RunWith(MockitoJUnitRunner.class)
public class DocumentMetadataInputReaderTest {
private DocumentMetadataInputReader documentMetadataInputReader = new DocumentMetadataInputReader();
@Mock
private JavaSparkContext sparkContext;
@Mock
private JavaPairRDD<AvroKey<DocumentMetadata>, NullWritable> inputRecords;
@Mock
private JavaRDD<DocumentMetadata> inputDocuments;
@Mock
private JavaPairRDD<String, DocumentMetadata> documents;
@Captor
private ArgumentCaptor<PairFunction<DocumentMetadata, String, DocumentMetadata>> attachIdFunction;
//------------------------ TESTS --------------------------
@SuppressWarnings("unchecked")
@Test
public void readDocuments() throws Exception {
// given
doReturn(inputRecords).when(sparkContext).newAPIHadoopFile(any(), any(), any(), any(), any());
doReturn(inputDocuments).when(inputRecords).map(any());
doReturn(documents).when(inputDocuments).mapToPair(any());
// execute
JavaPairRDD<String, DocumentMetadata> retDocuments = documentMetadataInputReader.readDocuments(sparkContext, "/some/path");
// assert
assertTrue(retDocuments == documents);
verify(sparkContext).newAPIHadoopFile(
eq("/some/path"), eq(AvroKeyInputFormat.class),
eq(DocumentMetadata.class), eq(NullWritable.class),
isA(Configuration.class));
verify(inputDocuments).mapToPair(attachIdFunction.capture());
assertAttachIdFunction(attachIdFunction.getValue());
}
//------------------------ TESTS --------------------------
private void assertAttachIdFunction(PairFunction<DocumentMetadata, String, DocumentMetadata> function) throws Exception {
DocumentMetadata docMetadata = DocumentMetadata.newBuilder()
.setId("someId")
.setBasicMetadata(new BasicMetadata())
.setReferences(Lists.newArrayList())
.build();
Tuple2<String, DocumentMetadata> retDocWithId = function.call(docMetadata);
assertEquals("doc_someId", retDocWithId._1);
assertTrue(retDocWithId._2 == docMetadata);
}
}