package org.wikibrain.parser; import com.jolbox.bonecp.BoneCPDataSource; import org.junit.Ignore; import org.junit.Test; import org.wikibrain.core.dao.*; import org.wikibrain.core.dao.sql.*; import org.wikibrain.core.lang.LanguageInfo; import org.wikibrain.parser.wiki.*; import org.wikibrain.core.model.RawPage; import java.io.File; import java.util.ArrayList; import java.util.List; import java.util.concurrent.atomic.AtomicInteger; /** */ public class TestWikiTextDumpParser { public static final File SIMPLE_DUMP = new File("../wikibrain/wikibrain-loader/simplewiki-20130608-pages-articles.xml"); public static final LanguageInfo SIMPLE = LanguageInfo.getByLangCode("simple"); @Ignore @Test public void test1() throws DaoException { List<String> allowedIllLangs = new ArrayList<String>(); allowedIllLangs.add("simple"); // Scans for ILLs in all languages WikiTextDumpParser wtdp = new WikiTextDumpParser(null, null); // Scans for ILLs in languages specified above only //WikiTextDumpParser wtdp = new WikiTextDumpParser(EN_DUMP, EN, allowedIllLangs); final AtomicInteger pageCounter = new AtomicInteger(); final ArrayList<ParsedCategory> categories = new ArrayList<ParsedCategory>(); final ArrayList<ParsedIll> ills = new ArrayList<ParsedIll>(); final ArrayList<ParsedLink> links = new ArrayList<ParsedLink>(); final ArrayList<ParsedRedirect> redirects = new ArrayList<ParsedRedirect>(); List<ParserVisitor> visitors = new ArrayList<ParserVisitor>(); ParserVisitor visitor = new ParserVisitor() { @Override public void beginPage(RawPage xml) { pageCounter.incrementAndGet(); } @Override public void category(ParsedCategory category) { categories.add(category); } @Override public void ill(ParsedIll ill) { ills.add(ill); } @Override public void link(ParsedLink link) { links.add(link); } @Override public void redirect(ParsedRedirect redirect) { redirects.add(redirect); } }; BoneCPDataSource ds = new BoneCPDataSource(); ds.setJdbcUrl("jdbc:h2:"+"db/h2"); ds.setUsername("sa"); ds.setPassword(""); WpDataSource wpDs = new WpDataSource(ds); LocalLinkDao linkDao = new LocalLinkSqlDao(wpDs); LocalPageDao pageDao = new LocalPageSqlDao(wpDs); LocalCategoryMemberDao catMemDao = new LocalCategoryMemberSqlDao( wpDs, new LocalPageSqlDao(wpDs)); MetaInfoDao metaDao = new MetaInfoSqlDao(wpDs); linkDao.beginLoad(); catMemDao.beginLoad(); ParserVisitor linkVisitor = new LocalLinkVisitor(linkDao, pageDao, metaDao); ParserVisitor catVisitor = new LocalCategoryVisitor(pageDao, catMemDao, metaDao); visitors.add(visitor); visitors.add(linkVisitor); visitors.add(catVisitor); wtdp.parse(visitors); System.out.println("Categories: " + categories.size()); System.out.println("ILLs: " + ills.size()); System.out.println("Links: " + links.size()); System.out.println("Redirects: " + redirects.size()); linkDao.endLoad(); catMemDao.endLoad(); } }