package org.juxtasoftware.service; import java.io.File; import java.io.FileInputStream; import java.io.InputStream; import junit.framework.Assert; import org.apache.commons.io.IOUtils; import org.junit.Test; import org.juxtasoftware.util.WikiTextUtils; public class WikiTextUtilsTest { @Test public void simpleTextTransform() throws Exception { InputStream is = getClass().getResourceAsStream("/simple.wiki"); File txt = WikiTextUtils.toTxt(is); FileInputStream fis = new FileInputStream(txt); final String content = IOUtils.toString(fis).trim(); IOUtils.closeQuietly(fis); System.out.println(content); txt.delete(); Assert.assertEquals("This is a Hello World example", content); } @Test public void testBrStrip() throws Exception { InputStream is = getClass().getResourceAsStream("/br.wiki"); File txt = WikiTextUtils.toTxt(is); FileInputStream fis = new FileInputStream(txt); final String content = IOUtils.toString(fis).trim(); IOUtils.closeQuietly(fis); System.out.println(content); txt.delete(); Assert.assertFalse(content.contains("<br")); } @Test public void testRQuote() throws Exception { InputStream is = getClass().getResourceAsStream("/rquote.wiki"); File txt = WikiTextUtils.toTxt(is); FileInputStream fis = new FileInputStream(txt); final String content = IOUtils.toString(fis).trim(); IOUtils.closeQuietly(fis); System.out.println(content); txt.delete(); Assert.assertTrue(content.equals("Right quote\n\nLeft quote")); } @Test public void testCQuote() throws Exception { InputStream is = getClass().getResourceAsStream("/cquote.wiki"); File txt = WikiTextUtils.toTxt(is); FileInputStream fis = new FileInputStream(txt); final String content = IOUtils.toString(fis).trim(); IOUtils.closeQuietly(fis); System.out.println(content); txt.delete(); Assert.assertFalse(content.contains("{{cquote")); Assert.assertTrue(content.contains("lush orchestration")); } @Test public void testQuote() throws Exception { InputStream is = getClass().getResourceAsStream("/quote.wiki"); File txt = WikiTextUtils.toTxt(is); FileInputStream fis = new FileInputStream(txt); final String content = IOUtils.toString(fis).trim(); IOUtils.closeQuietly(fis); System.out.println(content); txt.delete(); Assert.assertTrue(content.equals("Cry Havoc and let slip the dogs of war.")); } @Test public void testMultilineFileStrip() throws Exception { InputStream is = getClass().getResourceAsStream("/multiline_file.wiki"); File txt = WikiTextUtils.toTxt(is); FileInputStream fis = new FileInputStream(txt); final String content = IOUtils.toString(fis).trim(); IOUtils.closeQuietly(fis); System.out.println(content); txt.delete(); Assert.assertEquals("Start Text\n\nHeader\nEnd Text", content); } @Test public void testImageStrip() throws Exception { InputStream is = getClass().getResourceAsStream("/image.wiki"); File txt = WikiTextUtils.toTxt(is); FileInputStream fis = new FileInputStream(txt); final String content = IOUtils.toString(fis).trim(); IOUtils.closeQuietly(fis); System.out.println(content); txt.delete(); Assert.assertEquals("Title\ntext", content); } @Test public void testCitationStrip() throws Exception { InputStream is = getClass().getResourceAsStream("/citation.wiki"); File txt = WikiTextUtils.toTxt(is); FileInputStream fis = new FileInputStream(txt); final String content = IOUtils.toString(fis).trim(); IOUtils.closeQuietly(fis); System.out.println(content); txt.delete(); Assert.assertEquals("This worked!", content); } @Test public void testRefStrip() throws Exception { InputStream is = getClass().getResourceAsStream("/ref.wiki"); File txt = WikiTextUtils.toTxt(is); FileInputStream fis = new FileInputStream(txt); final String content = IOUtils.toString(fis).trim(); IOUtils.closeQuietly(fis); System.out.println(content); txt.delete(); Assert.assertFalse(content.contains("Zhang")); Assert.assertFalse(content.contains("first4")); Assert.assertTrue(content.contains("The end.")); Assert.assertTrue(content.contains("cancer.")); } @Test public void testRefStrip2() throws Exception { InputStream is = getClass().getResourceAsStream("/ref2.wiki"); File txt = WikiTextUtils.toTxt(is); FileInputStream fis = new FileInputStream(txt); final String content = IOUtils.toString(fis).trim(); IOUtils.closeQuietly(fis); System.out.println(content); txt.delete(); Assert.assertFalse(content.contains("</ref>")); Assert.assertFalse(content.contains("<ref>")); Assert.assertFalse(content.contains("{{")); Assert.assertFalse(content.contains("}}")); } @Test public void testTroll() throws Exception { InputStream is = getClass().getResourceAsStream("/troll.wiki"); File txt = WikiTextUtils.toTxt(is); FileInputStream fis = new FileInputStream(txt); final String content = IOUtils.toString(fis).trim(); IOUtils.closeQuietly(fis); System.out.println(content); txt.delete(); Assert.assertFalse(content.contains("{{pp-semi")); Assert.assertFalse(content.contains("{{pp-move")); Assert.assertTrue(content.contains("A troll is a supernatural being")); Assert.assertTrue(content.contains("rarely helpful to human beings.")); } @Test public void wikipediaText2Transform() throws Exception { InputStream is = getClass().getResourceAsStream("/wikipedia.wiki"); File txt = WikiTextUtils.toTxt(is); FileInputStream fis = new FileInputStream(txt); final String content = IOUtils.toString(fis); IOUtils.closeQuietly(fis); System.out.println(content); txt.delete(); Assert.assertFalse(content.contains("Bot generated title")); Assert.assertFalse(content.contains("Citation needed")); Assert.assertTrue(content.contains("Tea consumption has its legendary origins in China")); Assert.assertTrue(content.contains("Green Tea's cancer fighting potential")); } @Test public void wikipediaTextTransform() throws Exception { InputStream is = getClass().getResourceAsStream("/complex.wiki"); File txt = WikiTextUtils.toTxt(is); FileInputStream fis = new FileInputStream(txt); final String content = IOUtils.toString(fis); IOUtils.closeQuietly(fis); System.out.println(content); txt.delete(); } }