/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package gobblin.publisher;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.testng.Assert;
import org.testng.annotations.Test;
import com.google.common.collect.ImmutableList;
import com.google.common.io.Files;
import gobblin.configuration.ConfigurationKeys;
import gobblin.configuration.State;
import gobblin.configuration.WorkUnitState;
import gobblin.metadata.MetadataMerger;
import gobblin.metadata.types.GlobalMetadata;
import gobblin.util.ForkOperatorUtils;
/**
* Tests for BaseDataPublisher
*/
public class BaseDataPublisherTest {
/**
* Test DATA_PUBLISHER_METADATA_STR: a user should be able to put an arbitrary metadata string in job configuration
* and have that written out.
*/
@Test
public void testMetadataStrOneBranch()
throws IOException {
State s = buildDefaultState(1);
WorkUnitState wuState = new WorkUnitState();
wuState.setProp(ConfigurationKeys.DATA_PUBLISHER_METADATA_STR, "foobar");
addStateToWorkunit(s, wuState);
BaseDataPublisher publisher = new BaseDataPublisher(s);
publisher.publishMetadata(wuState);
try (InputStream mdStream = new FileInputStream(openMetadataFile(s, 1, 0))) {
String mdBytes = IOUtils.toString(mdStream, StandardCharsets.UTF_8);
Assert.assertEquals(mdBytes, "foobar", "Expected to read back metadata from string");
}
}
/**
* Test that DATA_PUBLISHER_METADATA_STR functionality works across multiple branches.
*/
@Test
public void testMetadataStrMultipleWorkUnitsAndBranches()
throws IOException {
final int numBranches = 3;
State s = buildDefaultState(numBranches);
List<WorkUnitState> workUnits = new ArrayList<>();
for (int i = 0; i < numBranches; i++) {
WorkUnitState wuState = new WorkUnitState();
wuState.setProp(ConfigurationKeys.DATA_PUBLISHER_METADATA_STR, "foobar");
addStateToWorkunit(s, wuState);
workUnits.add(wuState);
}
BaseDataPublisher publisher = new BaseDataPublisher(s);
publisher.publishMetadata(workUnits);
for (int branch = 0; branch < numBranches; branch++) {
try (InputStream mdStream = new FileInputStream(openMetadataFile(s, numBranches, branch))) {
String mdBytes = IOUtils.toString(mdStream, StandardCharsets.UTF_8);
Assert.assertEquals(mdBytes, "foobar", "Expected to read back metadata from string");
}
}
}
/**
* Test that an exception is properly thrown if we configure a merger that doesn't actually implement
* MetadataMerger
*/
@Test(expectedExceptions = IllegalArgumentException.class)
public void testBogusMetadataMerger()
throws IOException {
State s = buildDefaultState(1);
s.setProp(ConfigurationKeys.DATA_PUBLISH_WRITER_METADATA_KEY, "true");
s.setProp(ConfigurationKeys.DATA_PUBLISH_WRITER_METADATA_MERGER_NAME_KEY, "java.lang.String");
s.setProp(ConfigurationKeys.DATA_PUBLISHER_METADATA_STR, "foobar");
WorkUnitState wuState = new WorkUnitState();
addStateToWorkunit(s, wuState);
BaseDataPublisher publisher = new BaseDataPublisher(s);
publisher.publishMetadata(Collections.singletonList(wuState));
}
/**
* This test is testing several things at once:
* 1. That a merger is called properly for all workunits in a brach
* 2. That different mergers can be instantiated per branch
*/
@Test
public void testMergedMetadata()
throws IOException {
final int numBranches = 2;
final int numWorkUnits = 10;
State s = buildDefaultState(numBranches);
for (int i = 0; i < numBranches; i++) {
String mdKeyName = ForkOperatorUtils
.getPropertyNameForBranch(ConfigurationKeys.DATA_PUBLISH_WRITER_METADATA_KEY, numBranches, i);
String mdMergerKeyName = ForkOperatorUtils
.getPropertyNameForBranch(ConfigurationKeys.DATA_PUBLISH_WRITER_METADATA_MERGER_NAME_KEY, numBranches, i);
s.setProp(mdKeyName, "true");
s.setProp(mdMergerKeyName,
(i % 2) == 0 ? TestAdditionMerger.class.getName() : TestMultiplicationMerger.class.getName());
}
// For each branch, metadata is (branchId+1*workUnitNumber+1) - adding 1 so we don't ever multiply by 0
List<WorkUnitState> workUnits = new ArrayList<>();
for (int workUnitId = 0; workUnitId < numWorkUnits; workUnitId++) {
WorkUnitState wuState = new WorkUnitState();
addStateToWorkunit(s, wuState);
for (int branchId = 0; branchId < numBranches; branchId++) {
String mdForBranchName =
ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_METADATA_KEY, numBranches, branchId);
wuState.setProp(mdForBranchName, String.valueOf((branchId + 1) * (workUnitId + 1)));
}
workUnits.add(wuState);
}
BaseDataPublisher publisher = new BaseDataPublisher(s);
publisher.publishMetadata(workUnits);
for (int branch = 0; branch < numBranches; branch++) {
int expectedSum = (branch % 2 == 0) ? 0 : 1;
for (int i = 0; i < numWorkUnits; i++) {
if (branch % 2 == 0) {
expectedSum += (branch + 1) * (i + 1);
} else {
expectedSum *= (branch + 1) * (i + 1);
}
}
try (InputStream mdStream = new FileInputStream(openMetadataFile(s, numBranches, branch))) {
String mdBytes = IOUtils.toString(mdStream, StandardCharsets.UTF_8);
Assert.assertEquals(mdBytes, String.valueOf(expectedSum), "Expected to read back correctly merged metadata from string");
}
}
}
@Test
public void testNoOutputWhenDisabled()
throws IOException {
State s = buildDefaultState(1);
WorkUnitState wuState = new WorkUnitState();
addStateToWorkunit(s, wuState);
wuState.setProp(ConfigurationKeys.WRITER_METADATA_KEY, "abcdefg");
BaseDataPublisher publisher = new BaseDataPublisher(s);
publisher.publishMetadata(Collections.singletonList(wuState));
File mdFile = openMetadataFile(s, 1, 0);
Assert.assertFalse(mdFile.exists(), "Internal metadata from writer should not be written out if no merger is set in config");
}
@Test
public void testWithPartitionKey() throws IOException {
File publishPath = Files.createTempDir();
try {
File part1 = new File(publishPath, "1-2-3-4");
part1.mkdir();
File part2 = new File(publishPath, "5-6-7-8");
part2.mkdir();
State s = buildDefaultState(1);
String md = new GlobalMetadata().toJson();
s.removeProp(ConfigurationKeys.DATA_PUBLISHER_METADATA_OUTPUT_DIR);
s.setProp(ConfigurationKeys.DATA_PUBLISH_WRITER_METADATA_KEY, "true");
s.setProp(ConfigurationKeys.WRITER_METADATA_KEY, md);
s.setProp(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, publishPath.getAbsolutePath());
s.setProp(ConfigurationKeys.DATA_PUBLISHER_APPEND_EXTRACT_TO_FINAL_DIR, "false");
s.setProp(ConfigurationKeys.DATA_PUBLISHER_METADATA_OUTPUT_FILE, "metadata.json");
WorkUnitState wuState1 = new WorkUnitState();
wuState1.setProp(ConfigurationKeys.WRITER_PARTITION_PATH_KEY, "1-2-3-4");
wuState1.setProp(ConfigurationKeys.WRITER_METADATA_KEY, md);
addStateToWorkunit(s, wuState1);
WorkUnitState wuState2 = new WorkUnitState();
wuState2.setProp(ConfigurationKeys.WRITER_PARTITION_PATH_KEY, "5-6-7-8");
wuState2.setProp(ConfigurationKeys.WRITER_METADATA_KEY, md);
addStateToWorkunit(s, wuState2);
BaseDataPublisher publisher = new BaseDataPublisher(s);
publisher.publishMetadata(ImmutableList.of(wuState1, wuState2));
Assert.assertTrue(new File(part1, "metadata.json").exists());
Assert.assertTrue(new File(part2, "metadata.json").exists());
} finally {
FileUtils.deleteDirectory(publishPath);
}
}
public static class TestAdditionMerger implements MetadataMerger<String> {
private int sum = 0;
@Override
public void update(String metadata) {
sum += Integer.valueOf(metadata);
}
@Override
public String getMergedMetadata() {
return String.valueOf(sum);
}
}
public static class TestMultiplicationMerger implements MetadataMerger<String> {
private int product = 1;
public TestMultiplicationMerger(Properties config) {
// testing ctor call
}
@Override
public void update(String metadata) {
product *= Integer.valueOf(metadata);
}
@Override
public String getMergedMetadata() {
return String.valueOf(product);
}
}
private void addStateToWorkunit(State s, WorkUnitState wuState) {
for (Map.Entry<Object, Object> prop : s.getProperties().entrySet()) {
wuState.setProp((String) prop.getKey(), prop.getValue());
}
}
private File openMetadataFile(State state, int numBranches, int branchId) {
String dir = state.getProp(ConfigurationKeys.DATA_PUBLISHER_METADATA_OUTPUT_DIR);
String fileName = state.getProp(ConfigurationKeys.DATA_PUBLISHER_METADATA_OUTPUT_FILE);
if (numBranches > 1) {
fileName += "." + String.valueOf(branchId);
}
return new File(dir, fileName);
}
private State buildDefaultState(int numBranches)
throws IOException {
State state = new State();
state.setProp(ConfigurationKeys.FORK_BRANCHES_KEY, numBranches);
File tmpLocation = File.createTempFile("metadata", "");
tmpLocation.delete();
state.setProp(ConfigurationKeys.DATA_PUBLISHER_METADATA_OUTPUT_DIR, tmpLocation.getParent());
state.setProp(ConfigurationKeys.DATA_PUBLISHER_METADATA_OUTPUT_FILE, tmpLocation.getName());
return state;
}
}