package eu.dnetlib.iis.wf.citationmatching;
import java.io.Serializable;
import java.util.List;
import org.apache.log4j.Logger;
import org.apache.spark.api.java.JavaPairRDD;
import com.google.common.collect.Lists;
import eu.dnetlib.iis.citationmatching.schemas.ReferenceMetadata;
import eu.dnetlib.iis.wf.citationmatching.converter.ReferenceMetadataToMatchableConverter;
import pl.edu.icm.coansys.citations.InputCitationConverter;
import pl.edu.icm.coansys.citations.data.MatchableEntity;
import scala.Tuple2;
/**
* Converter of {@link ReferenceMetadata} rdd to {@link MatchableEntity} rdd
*
* @author madryk
*/
public class ReferenceMetadataInputConverter implements InputCitationConverter<String, ReferenceMetadata>, Serializable {
private static final long serialVersionUID = 1L;
private final static Logger log = Logger.getLogger(ReferenceMetadataInputConverter.class);
private final static int MAX_CITATION_LENGTH = 10000;
private ReferenceMetadataToMatchableConverter converter = new ReferenceMetadataToMatchableConverter();
//------------------------ LOGIC --------------------------
/**
* Converts rdd with citations of type {@link ReferenceMetadata}
* to rdd with citations of type {@link MatchableEntity}.
* Method assumes that keys of input rdd will contain citation id.
* Keys in returned rdd will be unchanged.
*/
@Override
public JavaPairRDD<String, MatchableEntity> convertCitations(JavaPairRDD<String, ReferenceMetadata> inputCitations) {
JavaPairRDD<String, MatchableEntity> citationEntities = inputCitations
.flatMapToPair(inputCitation -> {
List<Tuple2<String, MatchableEntity>> list = Lists.newArrayList();
MatchableEntity entity = converter.convertToMatchableEntity(inputCitation._1, inputCitation._2);
if (entity.rawText().get().length() > MAX_CITATION_LENGTH) {
log.error("RawText of citation " + inputCitation._1 + " exceeds length limit (" + MAX_CITATION_LENGTH + ").");
return list;
}
list.add(new Tuple2<String, MatchableEntity>(inputCitation._1, entity));
return list;
});
return citationEntities;
}
//------------------------ SETTERS --------------------------
public void setConverter(ReferenceMetadataToMatchableConverter converter) {
this.converter = converter;
}
}