package io.github.infolis.infolink.annotations; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.slf4j.LoggerFactory; import io.github.infolis.infolink.annotations.Annotation.Metadata; /** * * @author kata * */ public class WebAnnoTsvHandler extends AnnotationHandler { private static final org.slf4j.Logger log = LoggerFactory.getLogger(WebAnnoTsvHandler.class); protected List<Annotation> parse(String input) { List<Annotation> annotations = new ArrayList<>(); Map<Integer, String> textMap = new HashMap<>(); Map<Integer, String> annotationMap = new HashMap<>(); int wordCount = 0; String idRegex = "(#id=(\\d+)\\s*)"; String textRegex = "(#text=(.*?)\\s*\n)"; String sentencesRegex = "(((.*?)(\n+))+)"; String segmentRegex = "(" + idRegex + textRegex + sentencesRegex + ")"; Pattern p = Pattern.compile(segmentRegex); Matcher m = p.matcher(input); while (m.find()) { String id = m.group(3); String text = m.group(5); String annotation = m.group(6); String numRegex = "(\\d+)-(\\d+)\\s+(.*?)\\s+(.*?)\n"; Pattern numPat = Pattern.compile(numRegex); Matcher numMatcher = numPat.matcher(annotation); while (numMatcher.find()) { wordCount += 1; String word = numMatcher.group(3); String annoString = numMatcher.group(4); textMap.put(wordCount, word); annotationMap.put(wordCount, annoString); log.debug(String.valueOf(wordCount)); log.debug(word); Annotation anno = new Annotation(); anno.setPosition(wordCount); anno.setWord(word); anno.setMetadata(getMetadata(annoString.split("\\s+")[0])); //TODO //anno.addRelation(); annotations.add(anno); } //log.debug(annotation); //annotations.add(annotationItem); } return annotations; } //TODO add all cases protected Metadata getMetadata(String annotatedItem) { switch (annotatedItem) { case ("B-Title"): return Metadata.title; case ("I-Title"): return Metadata.title; case ("B-Creator"): return Metadata.creator; case ("I-Creator"): return Metadata.creator; default: return Metadata.none; } } }