package gobblin; import java.io.IOException; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import org.apache.avro.Schema; import org.apache.avro.file.DataFileReader; import org.apache.avro.file.FileReader; import org.apache.avro.file.SeekableInput; import org.apache.avro.generic.GenericDatumReader; import org.apache.avro.generic.GenericRecord; import org.apache.avro.io.DatumReader; import org.apache.avro.mapred.FsInput; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import gobblin.configuration.ConfigurationKeys; import gobblin.configuration.WorkUnitState; import gobblin.source.extractor.DataRecordException; import gobblin.source.extractor.Extractor; public class TestAvroExtractor implements Extractor<Schema, GenericRecord> { private WorkUnitState state; private List<GenericRecord> recordList; private Iterator<GenericRecord> recordIterator; public TestAvroExtractor(WorkUnitState workUnitState) throws IOException { this.state = workUnitState; this.recordList =getRecordFromFile(workUnitState.getProp(ConfigurationKeys.SOURCE_FILEBASED_FILES_TO_PULL)); this.recordIterator = this.recordList.iterator(); } public static List<GenericRecord> getRecordFromFile(String path) throws IOException { Configuration config = new Configuration(); SeekableInput input = new FsInput(new Path(path), config); DatumReader<GenericRecord> reader1 = new GenericDatumReader<>(); FileReader<GenericRecord> fileReader = DataFileReader.openReader(input, reader1); List<GenericRecord> records = new ArrayList<>(); for (GenericRecord datum : fileReader) { records.add(datum); } fileReader.close(); return records; } @Override public Schema getSchema() throws IOException { if (recordList == null) { return null; } if (recordList.isEmpty()) { return null; } return recordList.get(0).getSchema(); } @Override public GenericRecord readRecord(@Deprecated GenericRecord reuse) throws DataRecordException, IOException { if (this.recordIterator.hasNext()) { return this.recordIterator.next(); } else { return null; } } @Override public long getExpectedRecordCount() { return recordList.size(); } @Override public long getHighWatermark() { return recordList.size(); } @Override public void close() throws IOException { } }