/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package gobblin.ingestion.google.webmaster;
import java.util.List;
import org.apache.commons.lang3.tuple.Triple;
import org.testng.Assert;
import org.testng.annotations.Test;
@Test(groups = {"gobblin.source.extractor.extract.google.webmaster"})
public class TrieBasedProducerJobTest {
private String _property = "www.linkedin.com/";
@Test
public void testPartitionJobs() throws Exception {
UrlTrie trie = UrlTriePostOrderIteratorTest.getUrlTrie2(_property);
UrlTriePrefixGrouper grouper = new UrlTriePrefixGrouper(trie, 4);
String startDate = "2016-11-29";
String endDate = "2016-11-30";
Triple<String, GoogleWebmasterFilter.FilterOperator, UrlTrieNode> node0 = grouper.next();
TrieBasedProducerJob job0 = new TrieBasedProducerJob(startDate, endDate, node0, 100);
Assert.assertEquals(job0.getPage(), _property + "0");
Assert.assertEquals(job0.getOperator(), GoogleWebmasterFilter.FilterOperator.CONTAINS);
Assert.assertEquals(job0.getStartDate(), startDate);
Assert.assertEquals(job0.getEndDate(), endDate);
Assert.assertEquals(job0.getPagesSize(), 3);
List<? extends ProducerJob> granularJobs = job0.partitionJobs();
Assert.assertEquals(granularJobs.size(), 3);
ProducerJob job03 = granularJobs.get(0);
Assert.assertEquals(job03.getPage(), _property + "03");
Assert.assertEquals(job03.getOperator(), GoogleWebmasterFilter.FilterOperator.CONTAINS);
Assert.assertEquals(((TrieBasedProducerJob) job03).getGroupSize(), 2);
List<? extends ProducerJob> job03Dates = job03.partitionJobs();
Assert.assertEquals(job03Dates.size(), 2);
Assert.assertEquals(job03Dates.get(0), new SimpleProducerJob(_property + "03", startDate, startDate));
Assert.assertEquals(job03Dates.get(1), new SimpleProducerJob(_property + "03", endDate, endDate));
ProducerJob job04 = granularJobs.get(1);
Assert.assertEquals(job04.getPage(), _property + "04");
Assert.assertEquals(job04.getOperator(), GoogleWebmasterFilter.FilterOperator.CONTAINS);
Assert.assertEquals(((TrieBasedProducerJob) job04).getGroupSize(), 2);
List<? extends ProducerJob> job04Dates = job04.partitionJobs();
Assert.assertEquals(job04Dates.size(), 2);
Assert.assertEquals(job04Dates.get(0), new SimpleProducerJob(_property + "04", startDate, startDate));
Assert.assertEquals(job04Dates.get(1), new SimpleProducerJob(_property + "04", endDate, endDate));
ProducerJob job0Only = granularJobs.get(2);
Assert.assertEquals(job0Only.getPage(), _property + "0");
Assert.assertEquals(job0Only.getOperator(), GoogleWebmasterFilter.FilterOperator.EQUALS);
Assert.assertEquals(((TrieBasedProducerJob) job0Only).getGroupSize(), 2);
List<? extends ProducerJob> job0OnlyDates = job0Only.partitionJobs();
Assert.assertEquals(job0OnlyDates.size(), 2);
Assert.assertEquals(job0OnlyDates.get(0), new SimpleProducerJob(_property + "0", startDate, startDate));
Assert.assertEquals(job0OnlyDates.get(1), new SimpleProducerJob(_property + "0", endDate, endDate));
Triple<String, GoogleWebmasterFilter.FilterOperator, UrlTrieNode> node1 = grouper.next();
TrieBasedProducerJob job1 = new TrieBasedProducerJob(startDate, endDate, node1, grouper.getGroupSize());
Assert.assertEquals(job1.getPage(), _property + "1");
Assert.assertEquals(job1.getOperator(), GoogleWebmasterFilter.FilterOperator.CONTAINS);
Assert.assertEquals(job1.getStartDate(), startDate);
Assert.assertEquals(job1.getEndDate(), endDate);
Assert.assertEquals(job1.getPagesSize(), 1);
Assert.assertEquals(job1.partitionJobs().size(), 2);
Triple<String, GoogleWebmasterFilter.FilterOperator, UrlTrieNode> node2 = grouper.next();
TrieBasedProducerJob job2 = new TrieBasedProducerJob(startDate, endDate, node2, grouper.getGroupSize());
Assert.assertEquals(job2.getPage(), _property + "2");
Assert.assertEquals(job2.getOperator(), GoogleWebmasterFilter.FilterOperator.CONTAINS);
Assert.assertEquals(job2.getStartDate(), startDate);
Assert.assertEquals(job2.getEndDate(), endDate);
Assert.assertEquals(job2.getPagesSize(), 4);
List<? extends ProducerJob> job2Partitions = job2.partitionJobs();
Assert.assertEquals(job2Partitions.size(), 3);
ProducerJob job5 = job2Partitions.get(0);
Assert.assertEquals(job5.getPage(), _property + "25");
Assert.assertEquals(job5.getOperator(), GoogleWebmasterFilter.FilterOperator.CONTAINS);
Assert.assertEquals(job5.getStartDate(), startDate);
Assert.assertEquals(job5.getEndDate(), endDate);
Assert.assertEquals(job5.getPagesSize(), 2);
Assert.assertEquals(job5.partitionJobs().size(), 2);
ProducerJob job6 = job2Partitions.get(1);
Assert.assertEquals(job6.getPage(), _property + "26");
Assert.assertEquals(job6.getOperator(), GoogleWebmasterFilter.FilterOperator.CONTAINS);
Assert.assertEquals(job6.getStartDate(), startDate);
Assert.assertEquals(job6.getEndDate(), endDate);
Assert.assertEquals(job6.getPagesSize(), 1);
ProducerJob job2Only = job2Partitions.get(2);
Assert.assertEquals(job2Only.getPage(), _property + "2");
Assert.assertEquals(job2Only.getOperator(), GoogleWebmasterFilter.FilterOperator.EQUALS);
Assert.assertEquals(job2Only.getStartDate(), startDate);
Assert.assertEquals(job2Only.getEndDate(), endDate);
Assert.assertEquals(job2Only.getPagesSize(), 1);
Assert.assertFalse(grouper.hasNext());
}
}