/** * Copyright 2015 StreamSets Inc. * * Licensed under the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.streamsets.pipeline.lib.util; import com.google.common.collect.ImmutableList; import com.streamsets.pipeline.api.Field; import com.streamsets.pipeline.api.Record; import com.streamsets.pipeline.sdk.RecordCreator; import org.apache.avro.Schema; import org.apache.avro.file.DataFileWriter; import org.apache.avro.generic.GenericData; import org.apache.avro.generic.GenericDatumWriter; import org.apache.avro.generic.GenericRecord; import org.apache.avro.io.DatumWriter; import org.apache.avro.util.Utf8; import org.junit.Assert; import java.io.File; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.UUID; public class SdcAvroTestUtil { private static final String AVRO_UNION_TYPE_INDEX_PREFIX = "avro.union.typeIndex."; public static final String AVRO_SCHEMA1 = "{\n" +"\"type\": \"record\",\n" +"\"name\": \"Employee\",\n" +"\"fields\": [\n" +" {\"name\": \"name\", \"type\": \"string\"},\n" +" {\"name\": \"age\", \"type\": \"int\"},\n" +" {\"name\": \"emails\", \"type\": {\"type\": \"array\", \"items\": \"string\"}},\n" +" {\"name\": \"boss\", \"type\": [\"Employee\",\"null\"]}\n" +"]}"; public static final String AVRO_SCHEMA2 = "{\n" +"\"type\": \"record\",\n" +"\"name\": \"Employee\",\n" +"\"fields\": [\n" +" {\"name\": \"name\", \"type\": \"string\"},\n" +" {\"name\": \"emails\", \"type\": {\"type\": \"array\", \"items\": \"string\"}},\n" +" {\"name\": \"boss\", \"type\": [\"Employee\",\"null\"]}\n" +"]}"; public static List<Record> getRecords1() { List<Record> records = new ArrayList<>(); Map<String, Field> bossMap = new HashMap<>(); bossMap.put("name", Field.create("boss")); bossMap.put("age", Field.create(60)); bossMap.put("emails", Field.create(ImmutableList.of(Field.create("boss@company.com"), Field.create("boss2@company.com")))); bossMap.put("boss", Field.create(Field.Type.MAP, null)); Map<String, Field> map = new HashMap<>(); map.put("name", Field.create("a")); map.put("age", Field.create(30)); map.put("emails", Field.create(ImmutableList.of(Field.create("a@company.com"), Field.create("a2@company.com")))); map.put("boss", Field.create(bossMap)); Record record = RecordCreator.create(); record.getHeader().setAttribute(AVRO_UNION_TYPE_INDEX_PREFIX + "/boss", "0"); record.getHeader().setAttribute(AVRO_UNION_TYPE_INDEX_PREFIX + "/boss/boss", "1"); record.set(Field.create(map)); records.add(record); map = new HashMap<>(); map.put("name", Field.create("b")); map.put("age", Field.create(40)); map.put("emails", Field.create(ImmutableList.of(Field.create("b@company.com"), Field.create("b2@company.com")))); map.put("boss", Field.create(bossMap)); record = RecordCreator.create(); record.getHeader().setAttribute(AVRO_UNION_TYPE_INDEX_PREFIX + "/boss", "0"); record.getHeader().setAttribute(AVRO_UNION_TYPE_INDEX_PREFIX + "/boss/boss", "1"); record.set(Field.create(map)); records.add(record); map = new HashMap<>(); map.put("name", Field.create("c")); map.put("age", Field.create(50)); map.put("emails", Field.create(ImmutableList.of(Field.create("c@company.com"), Field.create("c2@company.com")))); map.put("boss", Field.create(bossMap)); record = RecordCreator.create(); record.getHeader().setAttribute(AVRO_UNION_TYPE_INDEX_PREFIX + "/boss", "0"); record.getHeader().setAttribute(AVRO_UNION_TYPE_INDEX_PREFIX + "/boss/boss", "1"); record.set(Field.create(map)); records.add(record); return records; } public static void compare1(List<GenericRecord> genericRecords) { Assert.assertEquals(3, genericRecords.size()); GenericRecord genericRecord = genericRecords.get(0); Assert.assertEquals("a", genericRecord.get("name").toString()); Assert.assertEquals(30, genericRecord.get("age")); Assert.assertTrue(genericRecord.get("emails") instanceof List); Assert.assertEquals(2, ((List<Utf8>) genericRecord.get("emails")).size()); Assert.assertEquals("a@company.com", ((List<Utf8>) genericRecord.get("emails")).get(0).toString()); Assert.assertEquals("a2@company.com", ((List<Utf8>) genericRecord.get("emails")).get(1).toString()); GenericRecord boss = (GenericRecord) genericRecord.get("boss"); Assert.assertNotNull(boss); Assert.assertEquals("boss", boss.get("name").toString()); Assert.assertEquals(60, boss.get("age")); Assert.assertTrue(boss.get("emails") instanceof List); Assert.assertEquals(2, ((List<Utf8>) boss.get("emails")).size()); Assert.assertEquals("boss@company.com", ((List<Utf8>) boss.get("emails")).get(0).toString()); Assert.assertEquals("boss2@company.com", ((List<Utf8>) boss.get("emails")).get(1).toString()); Assert.assertNull(boss.get("boss")); //Record 2 genericRecord = genericRecords.get(1); Assert.assertEquals("b", genericRecord.get("name").toString()); Assert.assertEquals(40, genericRecord.get("age")); Assert.assertTrue(genericRecord.get("emails") instanceof List); Assert.assertEquals(2, ((List<Utf8>) genericRecord.get("emails")).size()); Assert.assertEquals("b@company.com", ((List<Utf8>) genericRecord.get("emails")).get(0).toString()); Assert.assertEquals("b2@company.com", ((List<Utf8>) genericRecord.get("emails")).get(1).toString()); boss = (GenericRecord) genericRecord.get("boss"); Assert.assertNotNull(boss); Assert.assertEquals("boss", boss.get("name").toString()); Assert.assertEquals(60, boss.get("age")); Assert.assertTrue(boss.get("emails") instanceof List); Assert.assertEquals(2, ((List<Utf8>) boss.get("emails")).size()); Assert.assertEquals("boss@company.com", ((List<Utf8>) boss.get("emails")).get(0).toString()); Assert.assertEquals("boss2@company.com", ((List<Utf8>) boss.get("emails")).get(1).toString()); Assert.assertNull(boss.get("boss")); //Record 3 genericRecord = genericRecords.get(2); Assert.assertEquals("c", genericRecord.get("name").toString()); Assert.assertEquals(50, genericRecord.get("age")); Assert.assertTrue(genericRecord.get("emails") instanceof List); Assert.assertEquals(2, ((List<Utf8>) genericRecord.get("emails")).size()); Assert.assertEquals("c@company.com", ((List<Utf8>) genericRecord.get("emails")).get(0).toString()); Assert.assertEquals("c2@company.com", ((List<Utf8>) genericRecord.get("emails")).get(1).toString()); boss = (GenericRecord) genericRecord.get("boss"); Assert.assertNotNull(boss); Assert.assertEquals("boss", boss.get("name").toString()); Assert.assertEquals(60, boss.get("age")); Assert.assertTrue(boss.get("emails") instanceof List); Assert.assertEquals(2, ((List<Utf8>) boss.get("emails")).size()); Assert.assertEquals("boss@company.com", ((List<Utf8>) boss.get("emails")).get(0).toString()); Assert.assertEquals("boss2@company.com", ((List<Utf8>) boss.get("emails")).get(1).toString()); Assert.assertNull(boss.get("boss")); } public static List<Record> getRecords2() { List<Record> records = new ArrayList<>(); Map<String, Field> bossMap = new HashMap<>(); bossMap.put("name", Field.create("boss")); bossMap.put("age", Field.create(60)); bossMap.put("emails", Field.create(ImmutableList.of(Field.create("boss@company.com"), Field.create("boss2@company.com")))); bossMap.put("boss", Field.create(Field.Type.MAP, null)); Map<String, Field> map = new HashMap<>(); map.put("name", Field.create("a")); map.put("age", Field.create(30)); map.put("emails", Field.create(ImmutableList.of(Field.create("a@company.com"), Field.create("a2@company.com")))); map.put("boss", Field.create(bossMap)); Record record = RecordCreator.create(); record.getHeader().setAttribute(AVRO_UNION_TYPE_INDEX_PREFIX + "/boss", "1"); record.set(Field.create(map)); records.add(record); map = new HashMap<>(); map.put("name", Field.create("b")); map.put("age", Field.create(40)); map.put("emails", Field.create(ImmutableList.of(Field.create("b@company.com"), Field.create("b2@company.com")))); map.put("boss", Field.create(bossMap)); record = RecordCreator.create(); record.getHeader().setAttribute(AVRO_UNION_TYPE_INDEX_PREFIX + "/boss", "1"); record.set(Field.create(map)); records.add(record); map = new HashMap<>(); map.put("name", Field.create("c")); map.put("age", Field.create(50)); map.put("emails", Field.create(ImmutableList.of(Field.create("c@company.com"), Field.create("c2@company.com")))); map.put("boss", Field.create(bossMap)); record = RecordCreator.create(); record.getHeader().setAttribute(AVRO_UNION_TYPE_INDEX_PREFIX + "/boss", "1"); record.set(Field.create(map)); records.add(record); return records; } public static void compare2(List<GenericRecord> genericRecords) { Assert.assertEquals(3, genericRecords.size()); //Record 1 GenericRecord genericRecord = genericRecords.get(0); Assert.assertEquals("a", genericRecord.get("name").toString()); Assert.assertNull(genericRecord.get("age")); Assert.assertTrue(genericRecord.get("emails") instanceof List); Assert.assertEquals(2, ((List<Utf8>) genericRecord.get("emails")).size()); Assert.assertEquals("a@company.com", ((List<Utf8>) genericRecord.get("emails")).get(0).toString()); Assert.assertEquals("a2@company.com", ((List<Utf8>) genericRecord.get("emails")).get(1).toString()); GenericRecord boss = (GenericRecord) genericRecord.get("boss"); Assert.assertNull(boss); //Record 2 genericRecord = genericRecords.get(1); Assert.assertEquals("b", genericRecord.get("name").toString()); Assert.assertNull(genericRecord.get("age")); Assert.assertTrue(genericRecord.get("emails") instanceof List); Assert.assertEquals(2, ((List<Utf8>) genericRecord.get("emails")).size()); Assert.assertEquals("b@company.com", ((List<Utf8>) genericRecord.get("emails")).get(0).toString()); Assert.assertEquals("b2@company.com", ((List<Utf8>) genericRecord.get("emails")).get(1).toString()); boss = (GenericRecord) genericRecord.get("boss"); Assert.assertNull(boss); //Record 3 genericRecord = genericRecords.get(2); Assert.assertEquals("c", genericRecord.get("name").toString()); Assert.assertNull(genericRecord.get("age")); Assert.assertTrue(genericRecord.get("emails") instanceof List); Assert.assertEquals(2, ((List<Utf8>) genericRecord.get("emails")).size()); Assert.assertEquals("c@company.com", ((List<Utf8>) genericRecord.get("emails")).get(0).toString()); Assert.assertEquals("c2@company.com", ((List<Utf8>) genericRecord.get("emails")).get(1).toString()); boss = (GenericRecord) genericRecord.get("boss"); Assert.assertNull(boss); } private static String createTestDir() { File f = new File("target", UUID.randomUUID().toString()); Assert.assertTrue(f.mkdirs()); return f.getAbsolutePath(); } public static final String AVRO_SCHEMA = "{\n" +"\"type\": \"record\",\n" +"\"name\": \"Employee\",\n" +"\"fields\": [\n" +" {\"name\": \"name\", \"type\": \"string\"},\n" +" {\"name\": \"age\", \"type\": \"int\"},\n" +" {\"name\": \"emails\", \"type\": {\"type\": \"array\", \"items\": \"string\"}},\n" +" {\"name\": \"boss\", \"type\": [\"Employee\",\"null\"]}\n" +"]}"; public static File createAvroDataFile() throws Exception { File f = new File(createTestDir(), "file-0.avro"); Schema schema = new Schema.Parser().parse(AVRO_SCHEMA); GenericRecord boss = new GenericData.Record(schema); boss.put("name", "boss"); boss.put("age", 60); boss.put("emails", ImmutableList.of("boss@company.com", "boss2@company.com")); boss.put("boss", null); GenericRecord e3 = new GenericData.Record(schema); e3.put("name", "c"); e3.put("age", 50); e3.put("emails", ImmutableList.of("c@company.com", "c2@company.com")); e3.put("boss", boss); GenericRecord e2 = new GenericData.Record(schema); e2.put("name", "b"); e2.put("age", 40); e2.put("emails", ImmutableList.of("b@company.com", "b2@company.com")); e2.put("boss", boss); GenericRecord e1 = new GenericData.Record(schema); e1.put("name", "a"); e1.put("age", 30); e1.put("emails", ImmutableList.of("a@company.com", "a2@company.com")); e1.put("boss", boss); DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema); DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(datumWriter); dataFileWriter.create(schema, f); dataFileWriter.append(e1); dataFileWriter.append(e2); dataFileWriter.append(e3); dataFileWriter.flush(); dataFileWriter.close(); return f; } }