/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package gobblin.compaction.mapreduce.conditions;
import java.io.IOException;
import java.util.Arrays;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.joda.time.DateTime;
import org.joda.time.DateTimeZone;
import org.joda.time.Period;
import org.joda.time.format.PeriodFormatterBuilder;
import org.joda.time.format.PeriodFormatter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
import com.google.common.collect.Lists;
import com.google.common.base.Optional;
import gobblin.compaction.conditions.RecompactionCondition;
import gobblin.compaction.conditions.RecompactionCombineCondition;
import gobblin.compaction.conditions.RecompactionConditionBasedOnDuration;
import gobblin.compaction.conditions.RecompactionConditionBasedOnFileCount;
import gobblin.compaction.conditions.RecompactionConditionBasedOnRatio;
import gobblin.compaction.conditions.RecompactionConditionFactory;
import gobblin.compaction.dataset.Dataset;
import gobblin.compaction.dataset.DatasetHelper;
import gobblin.compaction.mapreduce.MRCompactor;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
/**
* Test class for {@link gobblin.compaction.conditions.RecompactionCondition}.
*/
@Test(groups = {"gobblin.compaction.mapreduce.conditions"})
public class RecompactionConditionTest {
private Path inputPath = new Path ("/tmp/input");
private Path inputLatePath = new Path ("/tmp/input_late");
private Path outputPath = new Path ("/tmp/output");
private Path outputLatePath = new Path ("/tmp/output_late");
private Path tmpPath = new Path ("/tmp/output_tmp");
private Dataset dataset;
private Logger LOG = LoggerFactory.getLogger(RecompactionConditionTest.class);
public DateTime getCurrentTime() {
DateTimeZone timeZone = DateTimeZone.forID(MRCompactor.DEFAULT_COMPACTION_TIMEZONE);
DateTime currentTime = new DateTime(timeZone);
return currentTime;
}
@BeforeClass
public void setUp() throws IOException {
dataset =
new Dataset.Builder().withPriority(1.0)
.withDatasetName("Identity/MemberAccount")
.withInputPath(inputPath)
.withInputLatePath(inputLatePath)
.withOutputPath(outputPath)
.withOutputLatePath(outputLatePath)
.withOutputTmpPath(tmpPath).build();
dataset.setJobProp(MRCompactor.COMPACTION_LATEDATA_THRESHOLD_DURATION, MRCompactor.DEFAULT_COMPACTION_LATEDATA_THRESHOLD_DURATION);
dataset.setJobProp(MRCompactor.COMPACTION_LATEDATA_THRESHOLD_FILE_NUM, 3);
dataset.setJobProp(MRCompactor.COMPACTION_LATEDATA_THRESHOLD_FOR_RECOMPACT_PER_DATASET, "Identity.*,B.*:0.2; C.*,D.*:0.3");
dataset.setJobProp(MRCompactor.COMPACTION_LATEDATA_THRESHOLD_DURATION, "12h");
}
@Test
public void testRecompactionConditionBasedOnFileCount() {
try {
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(conf);
fs.delete(outputLatePath, true);
fs.mkdirs(outputLatePath);
RecompactionConditionFactory factory = new RecompactionConditionBasedOnFileCount.Factory();
RecompactionCondition conditionBasedOnFileCount= factory.createRecompactionCondition(dataset);
DatasetHelper helper = new DatasetHelper(dataset, fs, Lists.newArrayList("avro"));
fs.createNewFile(new Path(outputLatePath, new Path ("1.avro")));
fs.createNewFile(new Path(outputLatePath, new Path ("2.avro")));
Assert.assertEquals(conditionBasedOnFileCount.isRecompactionNeeded(helper), false);
fs.createNewFile(new Path(outputLatePath, new Path ("3.avro")));
Assert.assertEquals(conditionBasedOnFileCount.isRecompactionNeeded(helper), true);
fs.delete(outputLatePath, true);
} catch (Exception e) {
e.printStackTrace();
}
}
@Test
public void testRecompactionConditionBasedOnRatio() {
RecompactionConditionFactory factory = new RecompactionConditionBasedOnRatio.Factory();
RecompactionCondition conditionBasedOnRatio = factory.createRecompactionCondition(dataset);
DatasetHelper helper = mock(DatasetHelper.class);
when(helper.getLateOutputRecordCount()).thenReturn(6L);
when(helper.getOutputRecordCount()).thenReturn(94L);
Assert.assertEquals(conditionBasedOnRatio.isRecompactionNeeded(helper), false);
when(helper.getLateOutputRecordCount()).thenReturn(21L);
when(helper.getOutputRecordCount()).thenReturn(79L);
Assert.assertEquals(conditionBasedOnRatio.isRecompactionNeeded(helper), true);
}
@Test
public void testRecompactionConditionBasedOnDuration() {
RecompactionConditionFactory factory = new RecompactionConditionBasedOnDuration.Factory();
RecompactionCondition conditionBasedOnDuration = factory.createRecompactionCondition(dataset);
DatasetHelper helper = mock (DatasetHelper.class);
when(helper.getDataset()).thenReturn(dataset);
PeriodFormatter periodFormatter = new PeriodFormatterBuilder().appendMonths().appendSuffix("m").appendDays().appendSuffix("d").appendHours()
.appendSuffix("h").appendMinutes().appendSuffix("min").toFormatter();
DateTime currentTime = getCurrentTime();
Period period_A = periodFormatter.parsePeriod("11h59min");
DateTime earliest_A = currentTime.minus(period_A);
when(helper.getEarliestLateFileModificationTime()).thenReturn(Optional.of(earliest_A));
when(helper.getCurrentTime()).thenReturn(currentTime);
Assert.assertEquals(conditionBasedOnDuration.isRecompactionNeeded(helper), false);
Period period_B = periodFormatter.parsePeriod("12h01min");
DateTime earliest_B = currentTime.minus(period_B);
when(helper.getEarliestLateFileModificationTime()).thenReturn(Optional.of(earliest_B));
when(helper.getCurrentTime()).thenReturn(currentTime);
Assert.assertEquals(conditionBasedOnDuration.isRecompactionNeeded(helper), true);
}
@Test
public void testRecompactionCombineCondition() {
DatasetHelper helper = mock (DatasetHelper.class);
RecompactionCondition cond1 = mock (RecompactionConditionBasedOnRatio.class);
RecompactionCondition cond2= mock (RecompactionConditionBasedOnFileCount.class);
RecompactionCondition cond3 = mock (RecompactionConditionBasedOnDuration.class);
RecompactionCombineCondition combineConditionOr = new RecompactionCombineCondition(Arrays.asList(cond1,cond2,cond3),
RecompactionCombineCondition.CombineOperation.OR);
when(cond1.isRecompactionNeeded(helper)).thenReturn(false);
when(cond2.isRecompactionNeeded(helper)).thenReturn(false);
when(cond3.isRecompactionNeeded(helper)).thenReturn(false);
Assert.assertEquals(combineConditionOr.isRecompactionNeeded(helper), false);
when(cond1.isRecompactionNeeded(helper)).thenReturn(false);
when(cond2.isRecompactionNeeded(helper)).thenReturn(true);
when(cond3.isRecompactionNeeded(helper)).thenReturn(false);
Assert.assertEquals(combineConditionOr.isRecompactionNeeded(helper), true);
RecompactionCombineCondition combineConditionAnd = new RecompactionCombineCondition(Arrays.asList(cond1,cond2,cond3),
RecompactionCombineCondition.CombineOperation.AND);
when(cond1.isRecompactionNeeded(helper)).thenReturn(true);
when(cond2.isRecompactionNeeded(helper)).thenReturn(true);
when(cond3.isRecompactionNeeded(helper)).thenReturn(false);
Assert.assertEquals(combineConditionAnd.isRecompactionNeeded(helper), false);
when(cond1.isRecompactionNeeded(helper)).thenReturn(true);
when(cond2.isRecompactionNeeded(helper)).thenReturn(true);
when(cond3.isRecompactionNeeded(helper)).thenReturn(true);
Assert.assertEquals(combineConditionAnd.isRecompactionNeeded(helper), true);
}
}