package no.priv.garshol.duke.comparators;
import no.priv.garshol.duke.Comparator;
import no.priv.garshol.duke.utils.StringUtils;
/**
* An implementation of the Dice coefficient using exact matching by
* default, but can be overridden to use any sub-comparator.
*/
public class DiceCoefficientComparator implements Comparator {
private Comparator subcomp;
public DiceCoefficientComparator() {
this.subcomp = new ExactComparator();
}
public void setComparator(Comparator comp) {
this.subcomp = comp;
}
public boolean isTokenized() {
return true;
}
public double compare(String s1, String s2) {
if (s1.equals(s2))
return 1.0;
// tokenize
String[] t1 = StringUtils.split(s1);
String[] t2 = StringUtils.split(s2);
// ensure that t1 is shorter than or same length as t2
if (t1.length > t2.length) {
String[] tmp = t2;
t2 = t1;
t1 = tmp;
}
// find best matches for each token in t1
double sum = 0;
for (int ix1 = 0; ix1 < t1.length; ix1++) {
double highest = 0;
for (int ix2 = 0; ix2 < t2.length; ix2++)
highest = Math.max(highest, subcomp.compare(t1[ix1], t2[ix2]));
sum += highest;
}
return (sum * 2) / (t1.length + t2.length);
}
}