package org.wikibrain.loader;
import org.wikibrain.core.WikiBrainException;
import org.wikibrain.core.model.RawPage;
import org.wikibrain.parser.wiki.ParsedIll;
import org.wikibrain.parser.wiki.ParserVisitor;
import java.io.BufferedWriter;
import java.io.IOException;
import java.util.concurrent.atomic.AtomicInteger;
/**
* @author Shilad Sen
*/
public class InterLanguageLinkExtractor {
static class IllParserVisitor extends ParserVisitor {
private AtomicInteger count = new AtomicInteger();
private BufferedWriter output;
public IllParserVisitor(BufferedWriter output) {
this.output = output;
}
public void ill(ParsedIll ill) throws WikiBrainException {
RawPage page = ill.location.getXml();
try {
// This format may not be easy to parse. Change it.
synchronized (output) {
this.output.write(
page.getLanguage().getLangCode() + "\t" + page.getTitle().getCanonicalTitle() + "\t" +
ill.title.getLanguage().getLangCode() + "\t" + ill.title.getCanonicalTitle() + "\n");
}
count.incrementAndGet();
} catch (IOException e) {
throw new WikiBrainException(e);
}
}
public int getCount() {
return count.get();
}
}
}