/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package gobblin.runtime;
import java.io.IOException;
import java.util.List;
import java.util.Properties;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.testng.Assert;
import com.google.common.io.Closer;
import gobblin.configuration.ConfigurationKeys;
import gobblin.configuration.SourceState;
import gobblin.configuration.WorkUnitState;
import gobblin.metastore.StateStore;
import gobblin.runtime.JobState.DatasetState;
import gobblin.source.extractor.Extractor;
import gobblin.source.workunit.WorkUnit;
import gobblin.test.TestExtractor;
import gobblin.test.TestSource;
import gobblin.util.ClusterNameTags;
import gobblin.util.JobLauncherUtils;
/**
* Base class for {@link JobLauncher} unit tests.
*
* @author Yinan Li
*/
public class JobLauncherTestHelper {
public static final String SOURCE_FILE_LIST_KEY = "source.files";
private final StateStore<JobState.DatasetState> datasetStateStore;
private final Properties launcherProps;
public JobLauncherTestHelper(Properties launcherProps, StateStore<JobState.DatasetState> datasetStateStore) {
this.launcherProps = launcherProps;
this.datasetStateStore = datasetStateStore;
}
public void runTest(Properties jobProps) throws Exception {
String jobName = jobProps.getProperty(ConfigurationKeys.JOB_NAME_KEY);
String jobId = JobLauncherUtils.newJobId(jobName);
jobProps.setProperty(ConfigurationKeys.JOB_ID_KEY, jobId);
JobContext jobContext = null;
Closer closer = Closer.create();
try {
JobLauncher jobLauncher = closer.register(JobLauncherFactory.newJobLauncher(this.launcherProps, jobProps));
jobLauncher.launchJob(null);
jobContext = ((AbstractJobLauncher) jobLauncher).getJobContext();
} finally {
closer.close();
}
Assert.assertTrue(jobContext.getJobMetricsOptional().isPresent());
String jobMetricContextTags = jobContext.getJobMetricsOptional().get().getMetricContext().getTags().toString();
Assert.assertTrue(jobMetricContextTags.contains(ClusterNameTags.CLUSTER_IDENTIFIER_TAG_NAME),
ClusterNameTags.CLUSTER_IDENTIFIER_TAG_NAME + " tag missing in job metric context tags.");
List<JobState.DatasetState> datasetStateList = this.datasetStateStore.getAll(jobName, jobId + ".jst");
DatasetState datasetState = datasetStateList.get(0);
Assert.assertEquals(datasetState.getState(), JobState.RunningState.COMMITTED);
Assert.assertEquals(datasetState.getCompletedTasks(), 4);
Assert.assertEquals(datasetState.getJobFailures(), 0);
for (TaskState taskState : datasetState.getTaskStates()) {
Assert.assertEquals(taskState.getWorkingState(), WorkUnitState.WorkingState.COMMITTED);
Assert.assertEquals(taskState.getPropAsLong(ConfigurationKeys.WRITER_RECORDS_WRITTEN),
TestExtractor.TOTAL_RECORDS);
}
}
public void runTestWithPullLimit(Properties jobProps, long limit) throws Exception {
String jobName = jobProps.getProperty(ConfigurationKeys.JOB_NAME_KEY);
String jobId = JobLauncherUtils.newJobId(jobName).toString();
jobProps.setProperty(ConfigurationKeys.JOB_ID_KEY, jobId);
Closer closer = Closer.create();
try {
JobLauncher jobLauncher = closer.register(JobLauncherFactory.newJobLauncher(this.launcherProps, jobProps));
jobLauncher.launchJob(null);
} finally {
closer.close();
}
List<JobState.DatasetState> datasetStateList = this.datasetStateStore.getAll(jobName, jobId + ".jst");
DatasetState datasetState = datasetStateList.get(0);
Assert.assertEquals(datasetState.getState(), JobState.RunningState.COMMITTED);
Assert.assertEquals(datasetState.getCompletedTasks(), 4);
Assert.assertEquals(datasetState.getJobFailures(), 0);
for (TaskState taskState : datasetState.getTaskStates()) {
Assert.assertEquals(taskState.getWorkingState(), WorkUnitState.WorkingState.COMMITTED);
Assert.assertEquals(taskState.getPropAsLong(ConfigurationKeys.EXTRACTOR_ROWS_EXTRACTED), limit);
Assert.assertEquals(taskState.getPropAsLong(ConfigurationKeys.WRITER_ROWS_WRITTEN), limit);
}
}
public void runTestWithCancellation(final Properties jobProps) throws Exception {
String jobName = jobProps.getProperty(ConfigurationKeys.JOB_NAME_KEY);
String jobId = JobLauncherUtils.newJobId(jobName).toString();
jobProps.setProperty(ConfigurationKeys.JOB_ID_KEY, jobId);
Closer closer = Closer.create();
try {
final JobLauncher jobLauncher = closer.register(JobLauncherFactory.newJobLauncher(this.launcherProps, jobProps));
final AtomicBoolean isCancelled = new AtomicBoolean(false);
// This thread will cancel the job after some time
Thread thread = new Thread(new Runnable() {
@Override
public void run() {
try {
Thread.sleep(500);
jobLauncher.cancelJob(null);
isCancelled.set(true);
} catch (Exception je) {
// Ignored
}
}
});
thread.start();
jobLauncher.launchJob(null);
Assert.assertTrue(isCancelled.get());
} finally {
closer.close();
}
List<JobState.DatasetState> datasetStateList = this.datasetStateStore.getAll(jobName, jobId + ".jst");
Assert.assertTrue(datasetStateList.isEmpty());
}
public void runTestWithFork(Properties jobProps) throws Exception {
String jobName = jobProps.getProperty(ConfigurationKeys.JOB_NAME_KEY);
String jobId = JobLauncherUtils.newJobId(jobName).toString();
jobProps.setProperty(ConfigurationKeys.JOB_ID_KEY, jobId);
try (JobLauncher jobLauncher = JobLauncherFactory.newJobLauncher(this.launcherProps, jobProps)) {
jobLauncher.launchJob(null);
}
List<JobState.DatasetState> datasetStateList = this.datasetStateStore.getAll(jobName, jobId + ".jst");
DatasetState datasetState = datasetStateList.get(0);
Assert.assertEquals(datasetState.getState(), JobState.RunningState.COMMITTED);
Assert.assertEquals(datasetState.getCompletedTasks(), 4);
Assert.assertEquals(datasetState.getJobFailures(), 0);
FileSystem lfs = FileSystem.getLocal(new Configuration());
for (TaskState taskState : datasetState.getTaskStates()) {
Assert.assertEquals(taskState.getWorkingState(), WorkUnitState.WorkingState.COMMITTED);
Path path = new Path(this.launcherProps.getProperty(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR),
new Path(taskState.getExtract().getOutputFilePath(), "fork_0"));
Assert.assertTrue(lfs.exists(path));
Assert.assertEquals(lfs.listStatus(path).length, 2);
Assert.assertEquals(taskState.getPropAsLong(ConfigurationKeys.WRITER_RECORDS_WRITTEN + ".0"),
TestExtractor.TOTAL_RECORDS);
path = new Path(this.launcherProps.getProperty(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR),
new Path(taskState.getExtract().getOutputFilePath(), "fork_1"));
Assert.assertTrue(lfs.exists(path));
Assert.assertEquals(lfs.listStatus(path).length, 2);
Assert.assertEquals(taskState.getPropAsLong(ConfigurationKeys.WRITER_RECORDS_WRITTEN + ".1"),
TestExtractor.TOTAL_RECORDS);
}
}
public void runTestWithMultipleDatasets(Properties jobProps) throws Exception {
String jobName = jobProps.getProperty(ConfigurationKeys.JOB_NAME_KEY);
String jobId = JobLauncherUtils.newJobId(jobName).toString();
jobProps.setProperty(ConfigurationKeys.JOB_ID_KEY, jobId);
jobProps.setProperty(ConfigurationKeys.SOURCE_CLASS_KEY, MultiDatasetTestSource.class.getName());
Closer closer = Closer.create();
try {
JobLauncher jobLauncher = closer.register(JobLauncherFactory.newJobLauncher(this.launcherProps, jobProps));
jobLauncher.launchJob(null);
} finally {
closer.close();
}
for (int i = 0; i < 4; i++) {
List<JobState.DatasetState> datasetStateList =
this.datasetStateStore.getAll(jobName, "Dataset" + i + "-current.jst");
DatasetState datasetState = datasetStateList.get(0);
Assert.assertEquals(datasetState.getDatasetUrn(), "Dataset" + i);
Assert.assertEquals(datasetState.getState(), JobState.RunningState.COMMITTED);
Assert.assertEquals(datasetState.getCompletedTasks(), 1);
Assert.assertEquals(datasetState.getJobFailures(), 0);
for (TaskState taskState : datasetState.getTaskStates()) {
Assert.assertEquals(taskState.getProp(ConfigurationKeys.DATASET_URN_KEY), "Dataset" + i);
Assert.assertEquals(taskState.getWorkingState(), WorkUnitState.WorkingState.COMMITTED);
Assert.assertEquals(taskState.getPropAsLong(ConfigurationKeys.WRITER_RECORDS_WRITTEN),
TestExtractor.TOTAL_RECORDS);
}
}
}
public void runTestWithCommitSuccessfulTasksPolicy(Properties jobProps) throws Exception {
String jobName = jobProps.getProperty(ConfigurationKeys.JOB_NAME_KEY);
String jobId = JobLauncherUtils.newJobId(jobName).toString();
jobProps.setProperty(ConfigurationKeys.JOB_ID_KEY, jobId);
jobProps.setProperty(ConfigurationKeys.PUBLISH_DATA_AT_JOB_LEVEL, Boolean.FALSE.toString());
jobProps.setProperty(ConfigurationKeys.JOB_COMMIT_POLICY_KEY, "successful");
jobProps.setProperty(ConfigurationKeys.SOURCE_CLASS_KEY, TestSourceWithFaultyExtractor.class.getName());
jobProps.setProperty(ConfigurationKeys.MAX_TASK_RETRIES_KEY, "0");
Closer closer = Closer.create();
try {
JobLauncher jobLauncher = closer.register(JobLauncherFactory.newJobLauncher(this.launcherProps, jobProps));
jobLauncher.launchJob(null);
} finally {
closer.close();
}
List<JobState.DatasetState> datasetStateList = this.datasetStateStore.getAll(jobName, jobId + ".jst");
JobState jobState = datasetStateList.get(0);
Assert.assertEquals(jobState.getState(), JobState.RunningState.COMMITTED);
Assert.assertEquals(jobState.getCompletedTasks(), 4);
for (TaskState taskState : jobState.getTaskStates()) {
if (taskState.getTaskId().endsWith("0")) {
Assert.assertEquals(taskState.getWorkingState(), WorkUnitState.WorkingState.FAILED);
} else {
Assert.assertEquals(taskState.getWorkingState(), WorkUnitState.WorkingState.COMMITTED);
Assert.assertEquals(taskState.getPropAsLong(ConfigurationKeys.WRITER_RECORDS_WRITTEN),
TestExtractor.TOTAL_RECORDS);
}
}
}
public void runTestWithMultipleDatasetsAndFaultyExtractor(Properties jobProps, boolean usePartialCommitPolicy)
throws Exception {
String jobName = jobProps.getProperty(ConfigurationKeys.JOB_NAME_KEY);
String jobId = JobLauncherUtils.newJobId(jobName).toString();
jobProps.setProperty(ConfigurationKeys.JOB_ID_KEY, jobId);
jobProps.setProperty(ConfigurationKeys.SOURCE_CLASS_KEY, MultiDatasetTestSourceWithFaultyExtractor.class.getName());
jobProps.setProperty(ConfigurationKeys.MAX_TASK_RETRIES_KEY, "0");
if (usePartialCommitPolicy) {
jobProps.setProperty(ConfigurationKeys.JOB_COMMIT_POLICY_KEY, "partial");
}
Closer closer = Closer.create();
try {
JobLauncher jobLauncher = closer.register(JobLauncherFactory.newJobLauncher(this.launcherProps, jobProps));
jobLauncher.launchJob(null);
} catch (JobException je) {
// JobException is expected
} finally {
closer.close();
}
if (usePartialCommitPolicy) {
List<JobState.DatasetState> datasetStateList = this.datasetStateStore.getAll(jobName, "Dataset0-current.jst");
JobState.DatasetState datasetState = datasetStateList.get(0);
Assert.assertEquals(datasetState.getState(), JobState.RunningState.COMMITTED);
Assert.assertEquals(datasetState.getTaskCount(), 1);
TaskState taskState = datasetState.getTaskStates().get(0);
// BaseDataPublisher will change the state to COMMITTED
Assert.assertEquals(taskState.getWorkingState(), WorkUnitState.WorkingState.COMMITTED);
} else {
// Task 0 should have failed
Assert.assertTrue(this.datasetStateStore.getAll(jobName, "Dataset0-current.jst").isEmpty());
}
for (int i = 1; i < 4; i++) {
List<JobState.DatasetState> datasetStateList =
this.datasetStateStore.getAll(jobName, "Dataset" + i + "-current.jst");
JobState.DatasetState datasetState = datasetStateList.get(0);
Assert.assertEquals(datasetState.getDatasetUrn(), "Dataset" + i);
Assert.assertEquals(datasetState.getState(), JobState.RunningState.COMMITTED);
Assert.assertEquals(datasetState.getCompletedTasks(), 1);
for (TaskState taskState : datasetState.getTaskStates()) {
Assert.assertEquals(taskState.getProp(ConfigurationKeys.DATASET_URN_KEY), "Dataset" + i);
Assert.assertEquals(taskState.getWorkingState(), WorkUnitState.WorkingState.COMMITTED);
}
}
}
public void deleteStateStore(String storeName) throws IOException {
this.datasetStateStore.delete(storeName);
}
public static class MultiDatasetTestSource extends TestSource {
@Override
public List<WorkUnit> getWorkunits(SourceState state) {
List<WorkUnit> workUnits = super.getWorkunits(state);
for (int i = 0; i < workUnits.size(); i++) {
workUnits.get(i).setProp(ConfigurationKeys.DATASET_URN_KEY, "Dataset" + i);
}
return workUnits;
}
}
public static class MultiDatasetTestSourceWithFaultyExtractor extends MultiDatasetTestSource {
@Override
public Extractor<String, String> getExtractor(WorkUnitState workUnitState) {
Extractor<String, String> extractor = super.getExtractor(workUnitState);
if (workUnitState.getProp(ConfigurationKeys.DATASET_URN_KEY).endsWith("0")) {
return new FaultyExtractor(workUnitState);
}
return extractor;
}
}
public static class FaultyExtractor extends TestExtractor {
public FaultyExtractor(WorkUnitState workUnitState) {
super(workUnitState);
}
@Override
public String readRecord(@Deprecated String reuse) throws IOException {
throw new IOException("Injected failure");
}
}
public static class TestSourceWithFaultyExtractor extends TestSource {
@Override
public Extractor<String, String> getExtractor(WorkUnitState workUnitState) {
Extractor<String, String> extractor = super.getExtractor(workUnitState);
if (((TaskState) workUnitState).getTaskId().endsWith("0")) {
return new FaultyExtractor(workUnitState);
}
return extractor;
}
}
}