/******************************************************************************* * Trombone is a flexible text processing and analysis library used * primarily by Voyant Tools (voyant-tools.org). * * Copyright (©) 2007-2012 Stéfan Sinclair & Geoffrey Rockwell * * This file is part of Trombone. * * Trombone is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Trombone is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Trombone. If not, see <http://www.gnu.org/licenses/>. ******************************************************************************/ package org.voyanttools.trombone.model; import java.io.Serializable; import java.text.Normalizer; import java.util.Comparator; import org.apache.commons.lang3.ArrayUtils; import org.apache.commons.lang3.StringUtils; import org.bouncycastle.util.Arrays; import org.voyanttools.trombone.util.FlexibleParameters; import com.thoughtworks.xstream.annotations.XStreamAlias; import com.thoughtworks.xstream.annotations.XStreamOmitField; /** * @author sgs * */ @XStreamAlias("kwic") public class Kwic implements Serializable { public enum Sort { TERMASC, TERMDESC, DOCINDEXASC, DOCINDEXDESC, POSITIONASC, POSITIONDESC, LEFTASC, LEFTDESC, RIGHTASC, RIGHTDESC; public static Sort getForgivingly(FlexibleParameters parameters) { String sort = parameters.getParameterValue("sort", "").toUpperCase(); String sortPrefix = "TERM"; // default if (sort.startsWith("POSITION")) {sortPrefix = "POSITION";} else if (sort.startsWith("DOCINDEX")) {sortPrefix = "DOCINDEX";} else if (sort.startsWith("LEFT")) {sortPrefix = "LEFT";} else if (sort.startsWith("RIGHT")) {sortPrefix = "RIGHT";} String dir = parameters.getParameterValue("dir", "").toUpperCase(); String dirSuffix = "ASC"; if (dir.endsWith("DESC")) {dirSuffix="DESC";} return valueOf(sortPrefix+dirSuffix); } } public enum OverlapStrategy { none, first, merge; public static OverlapStrategy valueOfForgivingly(String string) { string = string.toLowerCase(); for (OverlapStrategy t : values()) { if (t.name().equals(string)) return t; } return none; } } int docIndex; String query; String term; @XStreamOmitField String normalizedAnalyzedMiddle = null; int position; String left; String middle; String right; @XStreamOmitField String leftNormalizedReverseSort = null; public Kwic(int corpusDocumentIndex, String queryString, String term, int position, String left, String middle, String right) { this.docIndex = corpusDocumentIndex; this.query = queryString; this.term = term; this.position = position; this.left = left; this.middle = middle; this.right = right; } private String getNormalizedTerm() { if (normalizedAnalyzedMiddle==null) { normalizedAnalyzedMiddle = getNormalizedString(term); } return normalizedAnalyzedMiddle; } private String getNormalizedString(String string) { return Normalizer.normalize(string, Normalizer.Form.NFD); } public static Comparator<Kwic> getComparator(Sort sort) { switch(sort) { case TERMDESC: return TermDescendingComparator; case POSITIONASC: return PositionAscendingComparator; case POSITIONDESC: return PositionDescendingComparator; case DOCINDEXASC: return DocIndexAscendingComparator; case DOCINDEXDESC: return DocIndexDescendingComparator; case LEFTASC: return LeftAscendingComparator; case LEFTDESC: return LeftDescendingComparator; case RIGHTASC: return RightAscendingComparator; case RIGHTDESC: return RightDescendingComparator; default: return TermAscendingComparator; } } private static Comparator<Kwic> TermAscendingComparator = new Comparator<Kwic>() { @Override public int compare(Kwic kwic1, Kwic kwic2) { int i = kwic1.getNormalizedTerm().compareTo(kwic2.getNormalizedTerm()); if (i==0) { i = Integer.compare(kwic1.docIndex, kwic2.docIndex); if (i==0) { i = Integer.compare(kwic1.position, kwic2.position); } } return i; } }; private static Comparator<Kwic> TermDescendingComparator = new Comparator<Kwic>() { @Override public int compare(Kwic kwic1, Kwic kwic2) { int i = kwic2.getNormalizedTerm().compareTo(kwic1.getNormalizedTerm()); if (i==0) { i = Integer.compare(kwic2.docIndex, kwic1.docIndex); if (i==0) { i = Integer.compare(kwic2.position, kwic1.position); } } return i; } }; private static Comparator<Kwic> PositionAscendingComparator = new Comparator<Kwic>() { @Override public int compare(Kwic kwic1, Kwic kwic2) { int i = Integer.compare(kwic1.position, kwic2.position); if (i==0) { i = Integer.compare(kwic1.docIndex, kwic2.docIndex); if (i==0) { i = kwic1.getNormalizedTerm().compareTo(kwic2.getNormalizedTerm()); } } return i; } }; private static Comparator<Kwic> PositionDescendingComparator = new Comparator<Kwic>() { @Override public int compare(Kwic kwic1, Kwic kwic2) { int i = Integer.compare(kwic2.position, kwic1.position); if (i==0) { i = Integer.compare(kwic2.docIndex, kwic1.docIndex); if (i==0) { i = kwic2.getNormalizedTerm().compareTo(kwic1.getNormalizedTerm()); } } return i; } }; private static Comparator<Kwic> DocIndexAscendingComparator = new Comparator<Kwic>() { @Override public int compare(Kwic kwic1, Kwic kwic2) { int i = Integer.compare(kwic1.docIndex, kwic2.docIndex); if (i==0) { i = Integer.compare(kwic1.position, kwic2.position); if (i==0) { i = kwic1.getNormalizedTerm().compareTo(kwic2.getNormalizedTerm()); } } return i; } }; private static Comparator<Kwic> DocIndexDescendingComparator = new Comparator<Kwic>() { @Override public int compare(Kwic kwic1, Kwic kwic2) { int i = Integer.compare(kwic2.docIndex, kwic1.docIndex); if (i==0) { i = Integer.compare(kwic2.position, kwic1.position); if (i==0) { i = kwic2.getNormalizedTerm().compareToIgnoreCase(kwic1.getNormalizedTerm()); } } return i; } }; private static Comparator<Kwic> LeftAscendingComparator = new Comparator<Kwic>() { @Override public int compare(Kwic kwic1, Kwic kwic2) { int i = kwic1.getLeftNormalizedReverseSort().compareToIgnoreCase(kwic2.getLeftNormalizedReverseSort()); if (i==0) { i = Integer.compare(kwic1.docIndex, kwic2.docIndex); if (i==0) { i = Integer.compare(kwic1.position, kwic2.position); } } return i; } }; private static Comparator<Kwic> LeftDescendingComparator = new Comparator<Kwic>() { @Override public int compare(Kwic kwic1, Kwic kwic2) { int i = kwic2.getLeftNormalizedReverseSort().compareToIgnoreCase(kwic1.getLeftNormalizedReverseSort()); if (i==0) { i = Integer.compare(kwic2.docIndex, kwic1.docIndex); if (i==0) { i = Integer.compare(kwic2.position, kwic1.position); } } return i; } }; private static Comparator<Kwic> RightAscendingComparator = new Comparator<Kwic>() { @Override public int compare(Kwic kwic1, Kwic kwic2) { int i = kwic1.getRight().compareToIgnoreCase(kwic2.getRight()); if (i==0) { i = Integer.compare(kwic1.docIndex, kwic2.docIndex); if (i==0) { i = Integer.compare(kwic1.position, kwic2.position); } } return i; } }; private static Comparator<Kwic> RightDescendingComparator = new Comparator<Kwic>() { @Override public int compare(Kwic kwic1, Kwic kwic2) { int i = kwic2.getRight().compareToIgnoreCase(kwic1.getRight()); if (i==0) { i = Integer.compare(kwic2.docIndex, kwic1.docIndex); if (i==0) { i = Integer.compare(kwic2.position, kwic1.position); } } return i; } }; public String toString() { return new StringBuilder(String.valueOf(docIndex)).append(".").append(position).append(" (").append(term).append("): ").append(left).append(" ***").append(middle).append("*** ").append(right).toString().replaceAll("\\s+", " ").trim(); } private String getLeftNormalizedReverseSort() { if (this.leftNormalizedReverseSort==null) { String left = getLeft(); String normalizedLeft = getNormalizedString(left.replaceAll("[^\\p{L}]+", " ").trim()); String[] lefts = normalizedLeft.split("\\s+"); ArrayUtils.reverse(lefts); leftNormalizedReverseSort = StringUtils.join(lefts, " "); } return leftNormalizedReverseSort; } public String getLeft() { return left; } public String getMiddle() { return middle; } public String getRight() { return right; } public int getPosition() { return position; } public int getDocIndex() { return position; } }