package de.dfki.nlp.loader;
import com.google.common.base.Joiner;
import com.google.common.base.MoreObjects;
import de.dfki.nlp.config.AnnotatorConfig;
import de.dfki.nlp.domain.IdList;
import de.dfki.nlp.domain.ParsedInputText;
import de.dfki.nlp.domain.pubmed.Abstract;
import de.dfki.nlp.domain.pubmed.AbstractText;
import de.dfki.nlp.domain.pubmed.PubmedArticle;
import de.dfki.nlp.domain.pubmed.PubmedArticleSet;
import de.dfki.nlp.io.RetryHandler;
import org.apache.commons.lang3.StringUtils;
import org.springframework.stereotype.Component;
import javax.management.AttributeList;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.stream.Collectors;
@Component
public class PubMedDocumentFetcher extends AbstractDocumentFetcher {
private final AnnotatorConfig annotatorConfig;
private final RetryHandler retryHandler;
public PubMedDocumentFetcher(AnnotatorConfig annotatorConfig, RetryHandler retryHandler) {
this.annotatorConfig = annotatorConfig;
this.retryHandler = retryHandler;
}
@Override
List<ParsedInputText> load(IdList idList) {
// load multiple pubmed documents at once
String listOfIds = Joiner.on(",").join(idList.getIds());
PubmedArticleSet pubmedArticleSet = retryHandler.retryableGet(
annotatorConfig.pubmed.url,
PubmedArticleSet.class, listOfIds);
return MoreObjects.firstNonNull(pubmedArticleSet.getPubmedArticleOrPubmedBookArticle(), new AttributeList())
.stream()
.map(entry -> {
if (entry instanceof PubmedArticle) {
PubmedArticle pubmedArticle = (PubmedArticle) entry;
// get abstract and title
Abstract anAbstract = pubmedArticle.getMedlineCitation().getArticle().getAbstract();
String abstractText = null;
if (anAbstract != null) {
List<AbstractText> abstracts = anAbstract.getAbstractText();
abstractText = abstracts.stream().map(AbstractText::getvalue).collect(Collectors.joining("\n"));
}
final String id = pubmedArticle.getMedlineCitation().getPMID().getvalue();
// match id with incoming
Optional<String> matchedID = idList.getIds().stream().filter(givenId -> StringUtils.contains(givenId, id)).findFirst();
String titleText = pubmedArticle.getMedlineCitation().getArticle().getArticleTitle().getvalue();
return new ParsedInputText(matchedID.orElse(id), titleText, abstractText, null);
}
return null;
})
.filter(Objects::nonNull)
.collect(Collectors.toList());
}
}