package org.wikibrain.loader; import com.google.common.collect.HashMultimap; import com.google.common.collect.Multimap; import gnu.trove.map.TIntIntMap; import org.apache.commons.cli.*; import org.wikibrain.conf.ConfigurationException; import org.wikibrain.conf.Configurator; import org.wikibrain.conf.DefaultOptionBuilder; import org.wikibrain.core.WikiBrainException; import org.wikibrain.core.cmd.Env; import org.wikibrain.core.cmd.EnvBuilder; import org.wikibrain.core.dao.*; import org.wikibrain.core.dao.sql.WpDataSource; import org.wikibrain.core.lang.Language; import org.wikibrain.core.model.LocalLink; import org.wikibrain.core.lang.LanguageSet; import org.wikibrain.core.model.UniversalLink; import org.wikibrain.mapper.ConceptMapper; import java.io.IOException; import java.sql.SQLException; import java.util.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * * Generates and loads the Universal Link map into a database. * * @author Ari Weiland * */ public class UniversalLinkLoader { private static final Logger LOG = LoggerFactory.getLogger(UniversalLinkLoader.class); private final LanguageSet languageSet; private final LocalLinkDao localLinkDao; private final UniversalPageDao universalPageDao; private final UniversalLinkDao universalLinkDao; private final UniversalLinkDao universalLinkSkeletalDao; private final MetaInfoDao metaDao; public UniversalLinkLoader(LanguageSet languageSet, LocalLinkDao localLinkDao, UniversalPageDao universalPageDao, UniversalLinkDao universalLinkDao, UniversalLinkDao universalLinkSkeletalDao, MetaInfoDao metaDao) { this.languageSet = languageSet; this.localLinkDao = localLinkDao; this.universalPageDao = universalPageDao; this.universalLinkDao = universalLinkDao; this.universalLinkSkeletalDao = universalLinkSkeletalDao; this.metaDao = metaDao; } public void beginLoad(boolean shouldClear) throws DaoException { if (shouldClear) { LOG.info("Clearing data"); universalLinkDao.clear(); universalLinkSkeletalDao.clear(); } LOG.info("Begin Load"); universalLinkDao.beginLoad(); universalLinkSkeletalDao.beginLoad(); } /** * Loads the database of UniversalLinks. Requires a database of UniversalPages and LocalLinks * @throws WikiBrainException */ public void loadLinkMap(int algorithmId) throws WikiBrainException { try { Iterable<LocalLink> localLinks = localLinkDao.get(new DaoFilter().setLanguages(languageSet)); LOG.info("Fetching ID map"); Map<Language, TIntIntMap> map = universalPageDao.getAllLocalToUnivIdsMap(languageSet); LOG.info("Loading links"); long start = System.currentTimeMillis(); int i=0; for (LocalLink localLink : localLinks) { i++; if (i%100000 == 0) LOG.info("UniversalLinks loaded: " + i); int univSourceId, univDestId; if (localLink.getSourceId() < 0) { univSourceId = -1; } else { univSourceId = map.get(localLink.getLanguage()).get(localLink.getSourceId()); } if (localLink.getDestId() < 0) { univDestId = -1; } else { univDestId = map.get(localLink.getLanguage()).get(localLink.getDestId()); } Multimap<Language, LocalLink> linkMap = HashMultimap.create(); linkMap.put(localLink.getLanguage(), localLink); UniversalLink link = new UniversalLink(univSourceId, univDestId, algorithmId, linkMap); universalLinkDao.save(link); universalLinkSkeletalDao.save(link); metaDao.incrementRecords(UniversalLink.class); } long end = System.currentTimeMillis(); double seconds = (end - start) / 1000.0; LOG.info("Time (s): " + seconds); LOG.info("All UniversalLinks loaded: " + i); } catch (DaoException e) { throw new WikiBrainException(e); } } public void endLoad() throws DaoException { LOG.info("End Load"); long start = System.currentTimeMillis(); universalLinkDao.endLoad(); universalLinkSkeletalDao.endLoad(); long end = System.currentTimeMillis(); double seconds = (end - start) / 1000.0; LOG.info("Time (s): " + seconds); } public static void main(String args[]) throws ClassNotFoundException, SQLException, IOException, ConfigurationException, WikiBrainException, DaoException { Options options = new Options(); options.addOption( new DefaultOptionBuilder() .withLongOpt("drop-tables") .withDescription("drop and recreate all tables") .create("d")); EnvBuilder.addStandardOptions(options); CommandLineParser parser = new PosixParser(); CommandLine cmd; try { cmd = parser.parse(options, args); } catch (ParseException e) { System.err.println( "Invalid option usage: " + e.getMessage()); new HelpFormatter().printHelp("UniversalLinkLoader", options); return; } Env env = new EnvBuilder(cmd).build(); Configurator conf = env.getConfigurator(); String algorithm = cmd.getOptionValue("n", null); LocalLinkDao localLinkDao = conf.get(LocalLinkDao.class); UniversalPageDao universalPageDao = conf.get(UniversalPageDao.class); UniversalLinkDao universalLinkDao = conf.get(UniversalLinkDao.class); UniversalLinkDao universalLinkSkeletalDao = conf.get(UniversalLinkDao.class, "skeletal-sql-wikidata"); ConceptMapper mapper = conf.get(ConceptMapper.class, algorithm); MetaInfoDao metaDao = conf.get(MetaInfoDao.class); UniversalLinkLoader loader = new UniversalLinkLoader( env.getLanguages(), localLinkDao, universalPageDao, universalLinkDao, universalLinkSkeletalDao, metaDao); System.out.println("loading " + mapper.getId()); loader.beginLoad(cmd.hasOption("d")); loader.loadLinkMap(mapper.getId()); loader.endLoad(); LOG.info("DONE"); } }