/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package gobblin.ingestion.google.webmaster;
import java.util.ArrayList;
import java.util.Arrays;
import org.testng.Assert;
import org.testng.annotations.Test;
@Test(groups = {"gobblin.source.extractor.extract.google.webmaster"})
public class UrlTrieTest {
@Test
public void testTrieRoot1() {
UrlTrie trie = new UrlTrie("", new ArrayList<String>());
UrlTrieNode root = trie.getRoot();
Assert.assertTrue(root.getValue() == null);
//Assert.assertTrue(root.getParent() == null);
}
@Test
public void testTrieRoot2() {
UrlTrie trie = new UrlTrie(null, new ArrayList<String>());
UrlTrieNode root = trie.getRoot();
Assert.assertTrue(root.getValue() == null);
//Assert.assertTrue(root.getParent() == null);
}
@Test
public void testTrieRoot3() {
UrlTrie trie = new UrlTrie("www.linkedin.com/", new ArrayList<String>());
UrlTrieNode root = trie.getRoot();
Assert.assertTrue(root.getValue().equals('/'));
Assert.assertEquals(0, root.getSize());
//Assert.assertTrue(root.getParent() == null);
}
@Test
public void testParent() {
UrlTrie trie = new UrlTrie("www.linkedin.com/", Arrays.asList("www.linkedin.com/in/"));
UrlTrieNode root = trie.getRoot();
Assert.assertEquals(1, root.getSize());
UrlTrieNode child = root.getChild("in/");
Assert.assertEquals(1, child.getSize());
//Assert.assertEquals(root, child.getParent().getParent().getParent());
}
@Test
public void testSiblings() {
UrlTrie trie = new UrlTrie("https://www.linkedin.com/",
Arrays.asList("https://www.linkedin.com/a", "https://www.linkedin.com/b"));
UrlTrieNode root = trie.getRoot();
//Assert.assertEquals(root.nextSibling(), null);
UrlTrieNode bNode = root.getChild("b");
//Assert.assertEquals(root.getChild("a").nextSibling(), bNode);
//Assert.assertEquals(bNode.nextSibling(), null);
}
@Test
public void testTrieFlat() {
UrlTrie trie = new UrlTrie("https://www.linkedin.com/",
Arrays.asList("https://www.linkedin.com/jobs/", "https://www.linkedin.com/in/"));
UrlTrieNode root = trie.getRoot();
Assert.assertTrue(root.getValue().equals('/'));
Assert.assertEquals(2, root.children.size());
Assert.assertFalse(root.isExist());
Assert.assertEquals(2, root.getSize());
// Path1
String path1 = "jobs/";
checkEmptyPath(trie, path1, 1);
UrlTrieNode jobNode = trie.getChild("jobs/");
Assert.assertTrue(jobNode.getValue().equals('/'));
Assert.assertEquals(1, jobNode.getSize());
Assert.assertTrue(jobNode.isExist());
// Path2
String path2 = "in/";
checkEmptyPath(trie, path2, 1);
UrlTrieNode inNode = trie.getChild("in/");
Assert.assertTrue(inNode.getValue().equals('/'));
Assert.assertEquals(1, inNode.getSize());
Assert.assertTrue(inNode.isExist());
}
@Test
public void testDuplicate() {
UrlTrie trie = new UrlTrie("https://www.linkedin.com/",
Arrays.asList("https://www.linkedin.com/", "https://www.linkedin.com/", "https://www.linkedin.com/in/"));
UrlTrieNode root = trie.getRoot();
Assert.assertTrue(root.getValue().equals('/'));
Assert.assertEquals(1, root.children.size());
Assert.assertTrue(root.isExist());
Assert.assertEquals(3, root.getSize());
// Path1
String path1 = "in/";
checkEmptyPath(trie, path1, 1);
UrlTrieNode inNode = trie.getChild("in/");
Assert.assertTrue(inNode.getValue().equals('/'));
Assert.assertEquals(1, inNode.getSize());
Assert.assertTrue(inNode.isExist());
}
@Test
public void testTrieVertical() {
UrlTrie trie = new UrlTrie("https://www.linkedin.com/",
Arrays.asList("https://www.linkedin.com/", "https://www.linkedin.com/in/",
"https://www.linkedin.com/in/chenguo"));
UrlTrieNode root = trie.getRoot();
Assert.assertTrue(root.getValue().equals('/'));
Assert.assertEquals(1, root.children.size());
Assert.assertTrue(root.isExist());
Assert.assertEquals(3, root.getSize());
// Path1
String path1 = "in/";
checkEmptyPath(trie, path1, 2);
UrlTrieNode inNode = trie.getChild("in/");
Assert.assertTrue(inNode.getValue().equals('/'));
Assert.assertEquals(2, inNode.getSize());
Assert.assertTrue(inNode.isExist());
UrlTrieNode chenguo = inNode.getChild("chenguo");
Assert.assertEquals(root.getChild("in/chenguo"), chenguo);
Assert.assertTrue(chenguo.getValue().equals('o'));
Assert.assertEquals(1, chenguo.getSize());
Assert.assertTrue(chenguo.isExist());
}
private void checkEmptyPath(UrlTrie trie, String path, int pathChildrenCount) {
for (int i = 1; i < path.length(); ++i) {
UrlTrieNode node = trie.getChild(path.substring(0, i));
Assert.assertTrue(node.getValue().equals(path.charAt(i - 1)));
Assert.assertEquals(pathChildrenCount, node.getSize());
Assert.assertFalse(node.isExist());
}
}
}