/**
* Copyright 2015 StreamSets Inc.
*
* Licensed under the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.streamsets.pipeline.lib.parser.avro;
import com.streamsets.pipeline.api.OnRecordError;
import com.streamsets.pipeline.api.Record;
import com.streamsets.pipeline.api.Stage;
import com.streamsets.pipeline.lib.parser.DataParser;
import com.streamsets.pipeline.lib.parser.DataParserException;
import com.streamsets.pipeline.lib.parser.DataParserFactory;
import com.streamsets.pipeline.lib.parser.DataParserFactoryBuilder;
import com.streamsets.pipeline.lib.parser.DataParserFormat;
import com.streamsets.pipeline.lib.util.SdcAvroTestUtil;
import com.streamsets.pipeline.sdk.ContextInfoCreator;
import org.apache.avro.Schema;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericDatumWriter;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.io.DatumWriter;
import org.junit.Assert;
import org.junit.Test;
import java.io.File;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import static com.streamsets.pipeline.config.OriginAvroSchemaSource.SOURCE;
import static com.streamsets.pipeline.lib.util.AvroSchemaHelper.SCHEMA_KEY;
import static com.streamsets.pipeline.lib.util.AvroSchemaHelper.SCHEMA_SOURCE_KEY;
public class TestAvroDataFileParser {
@Test
public void testAvroDataFileParser() throws Exception {
File avroDataFile = SdcAvroTestUtil.createAvroDataFile();
DataParser avroDataFileParser = getDataParser(avroDataFile, 1024, null);
Record parse = avroDataFileParser.parse();
Assert.assertNotNull(parse);
Assert.assertEquals("244::1", avroDataFileParser.getOffset());
parse = avroDataFileParser.parse();
Assert.assertNotNull(parse);
Assert.assertEquals("244::2", avroDataFileParser.getOffset());
parse = avroDataFileParser.parse();
Assert.assertNotNull(parse);
Assert.assertEquals("244::3", avroDataFileParser.getOffset());
parse = avroDataFileParser.parse();
Assert.assertNull(parse);
Assert.assertEquals("-1", avroDataFileParser.getOffset());
}
@Test
public void testAvroDataFileParserOffset() throws Exception {
File avroDataFile = SdcAvroTestUtil.createAvroDataFile();
DataParser dataParser = getDataParser(avroDataFile, 1024, null);
Record parse = dataParser.parse();
Assert.assertNotNull(parse);
Assert.assertEquals("244::1", dataParser.getOffset());
dataParser = getDataParser(avroDataFile, 1024, dataParser.getOffset());
parse = dataParser.parse();
Assert.assertNotNull(parse);
Assert.assertEquals("244::2", dataParser.getOffset());
dataParser = getDataParser(avroDataFile, 1024, dataParser.getOffset());
parse = dataParser.parse();
Assert.assertNotNull(parse);
Assert.assertEquals("244::3", dataParser.getOffset());
dataParser = getDataParser(avroDataFile, 1024, dataParser.getOffset());
parse = dataParser.parse();
Assert.assertNull(parse);
Assert.assertEquals("-1", dataParser.getOffset());
}
public static final String AVRO_SCHEMA = "{\n"
+"\"type\": \"record\",\n"
+"\"name\": \"Employee\",\n"
+"\"fields\": [\n"
+" {\"name\": \"name\", \"type\": \"string\"},\n"
+" {\"name\": \"id\", \"type\": \"int\"}\n"
+"]}";
private static final String[] NAMES = {
"Brock", "Hari"
};
@Test
public void testIncorrectOffset() throws Exception {
File avroDataFile = SdcAvroTestUtil.createAvroDataFile();
avroDataFile.delete();
Schema schema = new Schema.Parser().parse(AVRO_SCHEMA);
DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema);
DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(datumWriter);
dataFileWriter.create(schema, avroDataFile);
for (int i = 0; i < 5; i++) {
GenericRecord r = new GenericData.Record(schema);
r.put("name", NAMES[i % NAMES.length]);
r.put("id", i);
dataFileWriter.setSyncInterval(1073741824);
dataFileWriter.append(r);
dataFileWriter.sync();
}
dataFileWriter.flush();
dataFileWriter.close();
DataParserFactoryBuilder dataParserFactoryBuilder = new DataParserFactoryBuilder(getContext(),
DataParserFormat.AVRO);
DataParserFactory factory = dataParserFactoryBuilder
.setMaxDataLen(1024 * 1024)
.setOverRunLimit(1000 * 1000)
.setConfig(SCHEMA_SOURCE_KEY, SOURCE)
.build();
DataParser dataParser = factory.getParser(avroDataFile, null);
Map<String, Record> records = new HashMap<>();
Record record;
while((record = dataParser.parse()) != null) {
records.put(dataParser.getOffset(), record);
}
Assert.assertEquals(String.valueOf(records), 5, records.size());
Assert.assertEquals(0, records.get("141::1").get("/id").getValueAsInteger());
Assert.assertEquals(1, records.get("166::1").get("/id").getValueAsInteger());
Assert.assertEquals(2, records.get("190::1").get("/id").getValueAsInteger());
Assert.assertEquals(3, records.get("215::1").get("/id").getValueAsInteger());
Assert.assertEquals(4, records.get("239::1").get("/id").getValueAsInteger());
}
private Stage.Context getContext() {
return ContextInfoCreator.createSourceContext("i", false, OnRecordError.TO_ERROR,
Collections.<String>emptyList());
}
private DataParser getDataParser(File file, int maxObjectLength, String readerOffset) throws DataParserException {
DataParserFactoryBuilder dataParserFactoryBuilder = new DataParserFactoryBuilder(getContext(),
DataParserFormat.AVRO);
DataParserFactory factory = dataParserFactoryBuilder
.setMaxDataLen(maxObjectLength)
.setConfig(SCHEMA_KEY, SdcAvroTestUtil.AVRO_SCHEMA)
.setOverRunLimit(1000)
.build();
return factory.getParser(file, readerOffset);
}
}