package org.wikibrain.mapper.algorithms.conceptualign3;
import com.google.common.collect.HashMultimap;
import com.google.common.collect.Lists;
import org.apache.commons.lang.StringUtils;
import org.wikibrain.core.WikiBrainException;
import org.wikibrain.core.dao.DaoException;
import org.wikibrain.core.dao.LocalPageDao;
import org.wikibrain.core.lang.LocalId;
import org.wikibrain.core.model.LocalPage;
import java.util.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Created by bjhecht on 5/20/14.
*/
public class Conceptualign3ConnectedComponentHandler implements ConnectedComponentHandler{
private final double minVotesRatio;
private final int maxVotesPerLang;
private final boolean print;
private int curUnivId;
private final LocalPageDao lpDao;
private static Logger LOG = LoggerFactory.getLogger(ConceptualignConceptMapper.class);
public Conceptualign3ConnectedComponentHandler(double minVotesRatio,
int maxVotesPerLang, boolean print, LocalPageDao lpDao) throws WikiBrainException {
this.minVotesRatio = minVotesRatio;
this.maxVotesPerLang = maxVotesPerLang;
this.print = print;
this.curUnivId = 0;
this.lpDao = lpDao;
}
public int getCurUnivId(){
curUnivId++;
return curUnivId;
}
@Override
public List<ClusterResult> handle(List<LocalId> curVertices, ILLGraph graph, int componentId)
throws WikiBrainException {
// if its unambiguous, revert to Conceptualign1
ConceptualignHelper.ScanResult origScanResult = ConceptualignHelper.scanVerticesOfComponent(curVertices);
boolean origNotAmbiguous = origScanResult.clarity.equals(1.0);
if (origNotAmbiguous){
List<ClusterResult> rVal = Lists.newArrayList();
rVal.add(new ClusterResult(getCurUnivId(), curVertices));
return rVal;
}
// if it is ambiguous... TODO: convert to multimap
if (print) printAmbiguousCluster(curVertices);
Map<LocalId, List<LocalId>> ills = new HashMap<LocalId, List<LocalId>>();
for (LocalId curVertex : curVertices){
Set<ILLEdge> edges = graph.outgoingEdgesOf(curVertex);
List<LocalId> dests = new ArrayList<LocalId>();
for (ILLEdge edge : edges){
dests.add(edge.dest);
}
ills.put(curVertex, dests);
}
List<ClusterResult> rVal = new ArrayList<ClusterResult>();
int minLangVotes = (int)Math.floor(minVotesRatio*origScanResult.langCount-1); // -1 to account for the node itself
Set<Set<LocalId>> clusters = ILLSplitter.split(ills, minLangVotes, maxVotesPerLang, print, lpDao);
for (Set<LocalId> curCluster : clusters){
int clusterUnivId = getCurUnivId();
List<LocalId> vertexList = new ArrayList<LocalId>();
vertexList.addAll(curCluster);
ClusterResult clusterResult = new ClusterResult(clusterUnivId, vertexList);
rVal.add(clusterResult);
}
return rVal;
}
private void printAmbiguousCluster(List<LocalId> vertices) throws WikiBrainException {
try {
List<String> titles = Lists.newArrayList();
for (LocalId vertex : vertices) {
LocalPage localPage = lpDao.getById(vertex);
titles.add(localPage.getTitle().toString());
}
LOG.info("Found ambiguous cluster: " + StringUtils.join(titles, ", "));
}catch(DaoException e){
throw new WikiBrainException(e);
}
}
}