package de.dfki.nlp.annotator; import de.dfki.nlp.domain.ParsedInputText; import de.dfki.nlp.domain.PredictionResult; import lombok.extern.slf4j.Slf4j; import org.springframework.context.annotation.Profile; import org.springframework.integration.annotation.Transformer; import org.springframework.stereotype.Component; import seth.SETH; import java.util.Collections; import java.util.HashSet; import java.util.Set; import java.util.stream.Collectors; import java.util.stream.Stream; import static de.dfki.nlp.domain.PredictionResult.Section.T; import static de.dfki.nlp.domain.PredictionType.MUTATION; @Slf4j @Component @Profile("backend") public class SethAnnotator implements Annotator { private static final SETH SETH_DETECTOR = new SETH("resources/mutations.txt", true, true, false); @Transformer(inputChannel = "seth", outputChannel = "parsed") public Set<PredictionResult> annotate(ParsedInputText payload) { if (payload.getExternalId() == null) return Collections.emptySet(); log.trace("Parsing {}", payload.getExternalId()); Set<PredictionResult> results = new HashSet<>(); // iterate over the text sections for (PredictionResult.Section section : PredictionResult.Section.values()) { String analyzetext = section == T ? payload.getTitle() : payload.getAbstractText(); if (analyzetext == null) continue; results.addAll(detectSETH(analyzetext, section, payload.getExternalId()).collect(Collectors.toList())); } log.trace("Done parsing {}", payload.getExternalId()); return results; } private Stream<PredictionResult> detectSETH(String text, PredictionResult.Section section, String externalID) { return SETH_DETECTOR.findMutations(text).stream().map(l -> new PredictionResult(externalID, section, l.getStart(), l.getEnd(), 1.0, l.getText(), MUTATION)); } }