package org.wikibrain.loader.pipeline;
import com.typesafe.config.Config;
import org.wikibrain.conf.Configuration;
import org.wikibrain.conf.ConfigurationException;
import org.wikibrain.core.cmd.Env;
import org.wikibrain.core.dao.DaoException;
import org.wikibrain.core.dao.MetaInfoDao;
import org.wikibrain.core.lang.LanguageSet;
import org.wikibrain.core.model.MetaInfo;
import java.io.*;
import java.util.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
*
* Runs stages in the pipeline.
* The stages are specified in the reference.conf and can be turned on or off using command line params.
*
* @author Shilad Sen
*/
public class PipelineLoader {
public static final String DEFAULT_GROUP = "core";
public static final String MULTILINGUAL_GROUP = "multilingual-core";
public static Logger LOG = LoggerFactory.getLogger(PipelineLoader.class);
private final Map<String, MetaInfo> state;
private final LanguageSet langs;
private final LinkedHashMap<String, PipelineStage> stages = new LinkedHashMap<String, PipelineStage>();
private final Map<String, List<String>> groups = new HashMap<String, List<String>>();
private final DiagnosticDao diagnosticDao;
private boolean forceRerun = false;
public PipelineLoader(Env env) throws ConfigurationException, DaoException, ClassNotFoundException, InterruptedException {
this(env, null);
}
public PipelineLoader(Env env, List<StageArgs> args) throws ConfigurationException, DaoException, ClassNotFoundException, InterruptedException {
MetaInfoDao metaDao = env.getConfigurator().get(MetaInfoDao.class);
this.langs = env.getLanguages();
this.state = metaDao.getAllCummulativeInfo();
this.diagnosticDao = env.getConfigurator().get(DiagnosticDao.class);
initConfig(env.getConfiguration());
if (args == null) {
if (langs.size() == 0) {
throw new IllegalArgumentException("No languages specified to pipeline loader");
} else if (langs.size() == 1) {
args = Arrays.asList(new StageArgs(DEFAULT_GROUP, true, null));
} else {
args = Arrays.asList(new StageArgs(MULTILINGUAL_GROUP, true, null));
}
}
setStageArguments(args);
}
public boolean runDiagnostics(Env env, String[] args, PrintStream writer) throws IOException, InterruptedException {
PrintWriter pw = new PrintWriter(writer);
boolean b = runDiagnostics(env, args, pw);
pw.flush();
return b;
}
public synchronized boolean runDiagnostics(Env env, String[] args, PrintWriter writer) throws IOException, InterruptedException {
for (PipelineStage stage : stages.values()) {
stage.reset();
stage.setDryRun(true);
}
LOG.info("Beginning dry run");
for (PipelineStage stage : stages.values()) {
if (stage.getShouldRun() != null && stage.getShouldRun()) {
try {
stage.runWithDependenciesIfNeeded(args, forceRerun);
} catch (StageFailedException e) {
throw new IllegalStateException(e); // shouldn't happen
}
}
}
DiagnosticReport report = new DiagnosticReport(env, langs, stages);
boolean result = report.runDiagnostics(writer);
LOG.info("Ended dry run");
for (PipelineStage stage : stages.values()) {
stage.reset();
}
return result;
}
public synchronized void run(String [] args) throws IOException, InterruptedException, StageFailedException {
for (PipelineStage stage : stages.values()) {
stage.reset();
}
LOG.info("Beginning loading");
try {
for (PipelineStage stage : stages.values()) {
if (stage.getShouldRun() != null && stage.getShouldRun()) {
LOG.info("Beginning stage " + stage.getName());
stage.runWithDependenciesIfNeeded(args, forceRerun);
LOG.info("Successfully completed stage " + stage.getName());
}
}
LOG.info("Loading successfully finished");
} finally {
quietlySaveDiagnostics();
}
}
private void quietlySaveDiagnostics() {
try {
long runId = Math.abs(new Random().nextLong());
for (PipelineStage stage : stages.values()) {
if (stage != null && stage.hasBeenRun()) {
StageDiagnostic sd = new StageDiagnostic(
runId,
stage.getName(),
langs,
stage.getElapsedSeconds(),
CpuBenchmarker.getSingleCoreSpeed(),
CpuBenchmarker.getMultiCoreSpeed(),
-1.0
);
sd.setSucceeded(stage.getSucceeded());
diagnosticDao.saveQuietly(sd);
}
}
} catch (Exception e) {
LOG.warn("Diagnostics save failed (this should be harmless):", e);
}
}
private void initConfig(Configuration config) throws ClassNotFoundException {
for (Config stageConfig : config.get().getConfigList("loader.stages")) {
PipelineStage stage = new PipelineStage(stageConfig, stages.values(), state);
stages.put(stage.getName(), stage);
}
// Set up the groups
Config groupConfig = config.get().getConfig("loader.groups");
for (String g : config.get().getObject("loader.groups").keySet()) {
groups.put(g, new ArrayList<String>());
for (String s : groupConfig.getStringList(g)) {
PipelineStage stage = getStage(s); // throws IllegalArgumentException if unknown stage
groups.get(g).add(s);
}
}
}
private void setStageArguments(List<StageArgs> stageArgs) {
// expand groups in the options to the individual stages
List<StageArgs> expandedArgs = new ArrayList<StageArgs>();
for (StageArgs sa : stageArgs) {
if (groups.containsKey(sa.getStageName())) {
for (String s : groups.get(sa.getStageName())) {
expandedArgs.add(sa.copyWithName(s));
}
} else {
expandedArgs.add(sa);
}
}
// Run with the requested options
for (StageArgs sa : expandedArgs) {
PipelineStage stage = getStage(sa.getStageName());
stage.setOverrideOptions(sa.isShouldRun(), sa.getArgs());
}
}
private PipelineStage getStage(String name) {
PipelineStage stage = stages.get(name);
if (stage == null) {
throw new IllegalArgumentException("Unknown stage: " + name);
}
return stage;
}
public void setForceRerun(boolean forceRerun) {
this.forceRerun = forceRerun;
}
}