/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package gobblin.writer;
import java.io.File;
import java.io.IOException;
import java.lang.reflect.Type;
import java.util.Map;
import org.apache.avro.Schema;
import org.apache.avro.file.DataFileReader;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericRecord;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.testng.Assert;
import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
import com.google.gson.Gson;
import com.google.gson.JsonElement;
import com.google.gson.reflect.TypeToken;
import gobblin.configuration.ConfigurationKeys;
import gobblin.configuration.State;
/**
* Unit tests for {@link AvroHdfsDataWriter}.
*
* @author Yinan Li
*/
@Test(groups = { "gobblin.writer" })
public class AvroHdfsDataWriterTest {
private static final Type FIELD_ENTRY_TYPE = new TypeToken<Map<String, Object>>() {}.getType();
private Schema schema;
private DataWriter<GenericRecord> writer;
private String filePath;
@BeforeClass
public void setUp() throws Exception {
// Making the staging and/or output dirs if necessary
File stagingDir = new File(TestConstants.TEST_STAGING_DIR);
File outputDir = new File(TestConstants.TEST_OUTPUT_DIR);
if (!stagingDir.exists()) {
stagingDir.mkdirs();
}
if (!outputDir.exists()) {
outputDir.mkdirs();
}
this.schema = new Schema.Parser().parse(TestConstants.AVRO_SCHEMA);
this.filePath = TestConstants.TEST_EXTRACT_NAMESPACE.replaceAll("\\.", "/") + "/" + TestConstants.TEST_EXTRACT_TABLE
+ "/" + TestConstants.TEST_EXTRACT_ID + "_" + TestConstants.TEST_EXTRACT_PULL_TYPE;
State properties = new State();
properties.setProp(ConfigurationKeys.WRITER_BUFFER_SIZE, ConfigurationKeys.DEFAULT_BUFFER_SIZE);
properties.setProp(ConfigurationKeys.WRITER_FILE_SYSTEM_URI, TestConstants.TEST_FS_URI);
properties.setProp(ConfigurationKeys.WRITER_STAGING_DIR, TestConstants.TEST_STAGING_DIR);
properties.setProp(ConfigurationKeys.WRITER_OUTPUT_DIR, TestConstants.TEST_OUTPUT_DIR);
properties.setProp(ConfigurationKeys.WRITER_FILE_PATH, this.filePath);
properties.setProp(ConfigurationKeys.WRITER_FILE_NAME, TestConstants.TEST_FILE_NAME);
// Build a writer to write test records
this.writer = new AvroDataWriterBuilder().writeTo(Destination.of(Destination.DestinationType.HDFS, properties))
.writeInFormat(WriterOutputFormat.AVRO).withWriterId(TestConstants.TEST_WRITER_ID).withSchema(this.schema)
.withBranches(1).forBranch(0).build();
}
@Test
public void testWrite() throws IOException {
// Write all test records
for (String record : TestConstants.JSON_RECORDS) {
this.writer.write(convertRecord(record));
}
Assert.assertEquals(this.writer.recordsWritten(), 3);
this.writer.close();
this.writer.commit();
File outputFile =
new File(TestConstants.TEST_OUTPUT_DIR + Path.SEPARATOR + this.filePath, TestConstants.TEST_FILE_NAME);
DataFileReader<GenericRecord> reader =
new DataFileReader<>(outputFile, new GenericDatumReader<GenericRecord>(this.schema));
// Read the records back and assert they are identical to the ones written
GenericRecord user1 = reader.next();
// Strings are in UTF8, so we have to call toString() here and below
Assert.assertEquals(user1.get("name").toString(), "Alyssa");
Assert.assertEquals(user1.get("favorite_number"), 256);
Assert.assertEquals(user1.get("favorite_color").toString(), "yellow");
GenericRecord user2 = reader.next();
Assert.assertEquals(user2.get("name").toString(), "Ben");
Assert.assertEquals(user2.get("favorite_number"), 7);
Assert.assertEquals(user2.get("favorite_color").toString(), "red");
GenericRecord user3 = reader.next();
Assert.assertEquals(user3.get("name").toString(), "Charlie");
Assert.assertEquals(user3.get("favorite_number"), 68);
Assert.assertEquals(user3.get("favorite_color").toString(), "blue");
reader.close();
}
@AfterClass
public void tearDown() throws IOException {
// Clean up the staging and/or output directories if necessary
File testRootDir = new File(TestConstants.TEST_ROOT_DIR);
if (testRootDir.exists()) {
FileUtil.fullyDelete(testRootDir);
}
}
private GenericRecord convertRecord(String inputRecord) {
Gson gson = new Gson();
JsonElement element = gson.fromJson(inputRecord, JsonElement.class);
Map<String, Object> fields = gson.fromJson(element, FIELD_ENTRY_TYPE);
GenericRecord outputRecord = new GenericData.Record(this.schema);
for (Map.Entry<String, Object> entry : fields.entrySet()) {
outputRecord.put(entry.getKey(), entry.getValue());
}
return outputRecord;
}
}