package org.wikibrain.sr.wikify; import gnu.trove.set.TIntSet; import org.wikibrain.core.lang.Language; import org.wikibrain.core.model.LocalLink; import org.wikibrain.core.nlp.Token; import org.wikibrain.phrases.PrunedCounts; import org.wikibrain.utils.Scoreboard; /** * @author Shilad Sen */ public class LinkInfo implements Comparable<LinkInfo> { private String anchortext; private double linkProbability; private PrunedCounts<Integer> prior; private Scoreboard<Integer> scores = new Scoreboard<Integer>(5); private Integer dest; private Double score; private Integer knownDest; private int startChar; private int endChar; public LinkInfo() {} public LinkInfo(Token token) { this.startChar = token.getBegin(); this.endChar = token.getEnd(); this.anchortext = token.getToken(); } public LinkInfo(LocalLink link) { this.startChar = link.getLocation(); this.endChar = startChar + link.getAnchorText().length(); this.anchortext = link.getAnchorText(); this.knownDest = link.getDestId(); } public boolean hasOnePossibility() { return getPrior().size() == 1; } public int getTopPriorDestination() { return getPrior().keySet().iterator().next(); } public void addScore(int wpId, double score) { getScores().add(wpId, score); } @Override public int compareTo(LinkInfo o) { if (getScore() == null && o.getScore() == null) { return 0; } else if (getScore() == null) { return 1; } else if (o.getScore() == null) { return -1; } else if (Double.isNaN(score) && Double.isNaN(o.score)) { return 0; } else if (Double.isNaN(score)) { return 1; } else if (Double.isNaN(o.score)) { return -1; } else { return -1 * score.compareTo(o.score); } } public boolean intersects(TIntSet used) { for (int i = getStartChar(); i < getEndChar(); i++) { if (used.contains(i)) { return true; } } return false; } public void markAsUsed(TIntSet used) { for (int i = getStartChar(); i < getEndChar(); i++) { used.add(i); } } /** * Text of possible link. */ public String getAnchortext() { return anchortext; } public void setAnchortext(String anchortext) { this.anchortext = anchortext; } /** * Probability that specified text represents a link. */ public double getLinkProbability() { return linkProbability; } public void setLinkProbability(double linkProbability) { this.linkProbability = linkProbability; } /** Prior distribution of links associated with text. */ public PrunedCounts<Integer> getPrior() { return prior; } public void setPrior(PrunedCounts<Integer> prior) { this.prior = prior; } /** Scores for outbound pages, ordered by score (track top 5). */ public Scoreboard<Integer> getScores() { return scores; } public void setScores(Scoreboard<Integer> scores) { this.scores = scores; } /** Wikipedia id of destination this is an existing link, otherwise null (used for training) */ public Integer getKnownDest() { return knownDest; } public void setKnownDest(Integer knownDest) { this.knownDest = knownDest; } /** Range of the anchortext token. */ public int getStartChar() { return startChar; } public void setStartChar(int startChar) { this.startChar = startChar; } public int getEndChar() { return endChar; } public void setEndChar(int endChar) { this.endChar = endChar; } public Integer getDest() { return dest; } public void setDest(Integer dest) { this.dest = dest; } public Double getScore() { return score; } public void setScore(Double score) { this.score = score; } public LocalLink toLocalLink(Language language, int wpId) { return new LocalLink(language, anchortext, wpId, dest, true, startChar, true, LocalLink.LocationType.NONE); } }