package eu.dnetlib.iis.wf.citationmatching;
import java.io.Serializable;
import org.apache.spark.api.java.JavaPairRDD;
import eu.dnetlib.iis.citationmatching.schemas.DocumentMetadata;
import eu.dnetlib.iis.wf.citationmatching.converter.DocumentMetadataToMatchableConverter;
import pl.edu.icm.coansys.citations.InputDocumentConverter;
import pl.edu.icm.coansys.citations.data.MatchableEntity;
import scala.Tuple2;
/**
* Converter of {@link DocumentMetadata} rdd to {@link MatchableEntity} rdd
*
* @author madryk
*/
public class DocumentMetadataInputConverter implements InputDocumentConverter<String, DocumentMetadata>, Serializable {
private static final long serialVersionUID = 1L;
private DocumentMetadataToMatchableConverter converter = new DocumentMetadataToMatchableConverter();
//------------------------ LOGIC --------------------------
/**
* Converts rdd with documents of type {@link DocumentMetadata}
* to rdd with documents of type {@link MatchableEntity}.
* Method assumes that keys of input rdd will contain document id.
* Keys in returned rdd will be unchanged.
*/
@Override
public JavaPairRDD<String, MatchableEntity> convertDocuments(JavaPairRDD<String, DocumentMetadata> inputDocuments) {
JavaPairRDD<String, MatchableEntity> documentEntities = inputDocuments
.mapToPair(document -> new Tuple2<>(document._1, converter.convertToMatchableEntity(document._1, document._2)));
return documentEntities;
}
//------------------------ SETTERS --------------------------
public void setConverter(DocumentMetadataToMatchableConverter converter) {
this.converter = converter;
}
}