/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package gobblin.writer;
import java.io.IOException;
import java.util.Map;
import java.util.concurrent.ExecutionException;
import org.apache.avro.SchemaBuilder;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericRecord;
import org.apache.commons.lang3.reflect.ConstructorUtils;
import com.google.common.base.Optional;
import com.google.common.base.Preconditions;
import com.google.common.cache.CacheBuilder;
import com.google.common.cache.CacheLoader;
import com.google.common.cache.LoadingCache;
import com.google.common.io.Closer;
import lombok.extern.slf4j.Slf4j;
import gobblin.commit.SpeculativeAttemptAwareConstruct;
import gobblin.configuration.ConfigurationKeys;
import gobblin.configuration.State;
import gobblin.instrumented.writer.InstrumentedDataWriterDecorator;
import gobblin.instrumented.writer.InstrumentedPartitionedDataWriterDecorator;
import gobblin.source.extractor.CheckpointableWatermark;
import gobblin.util.AvroUtils;
import gobblin.util.FinalState;
import gobblin.writer.partitioner.WriterPartitioner;
/**
* {@link DataWriter} that partitions data using a partitioner, instantiates appropriate writers, and sends records to
* the chosen writer.
* @param <S> schema type.
* @param <D> record type.
*/
@Slf4j
public class PartitionedDataWriter<S, D> implements DataWriter<D>, FinalState, SpeculativeAttemptAwareConstruct, WatermarkAwareWriter<D> {
private static final GenericRecord NON_PARTITIONED_WRITER_KEY =
new GenericData.Record(SchemaBuilder.record("Dummy").fields().endRecord());
private int writerIdSuffix = 0;
private final String baseWriterId;
private final Optional<WriterPartitioner> partitioner;
private final LoadingCache<GenericRecord, DataWriter<D>> partitionWriters;
private final Optional<PartitionAwareDataWriterBuilder> builder;
private final boolean shouldPartition;
private final Closer closer;
private boolean isSpeculativeAttemptSafe;
private boolean isWatermarkCapable;
public PartitionedDataWriter(DataWriterBuilder<S, D> builder, final State state)
throws IOException {
this.isSpeculativeAttemptSafe = true;
this.isWatermarkCapable = true;
this.baseWriterId = builder.getWriterId();
this.closer = Closer.create();
this.partitionWriters = CacheBuilder.newBuilder().build(new CacheLoader<GenericRecord, DataWriter<D>>() {
@Override
public DataWriter<D> load(final GenericRecord key)
throws Exception {
return PartitionedDataWriter.this.closer
.register(new InstrumentedPartitionedDataWriterDecorator<>(createPartitionWriter(key), state, key));
}
});
if (state.contains(ConfigurationKeys.WRITER_PARTITIONER_CLASS)) {
Preconditions.checkArgument(builder instanceof PartitionAwareDataWriterBuilder, String
.format("%s was specified but the writer %s does not support partitioning.",
ConfigurationKeys.WRITER_PARTITIONER_CLASS, builder.getClass().getCanonicalName()));
try {
this.shouldPartition = true;
this.builder = Optional.of(PartitionAwareDataWriterBuilder.class.cast(builder));
this.partitioner = Optional.of(WriterPartitioner.class.cast(ConstructorUtils
.invokeConstructor(Class.forName(state.getProp(ConfigurationKeys.WRITER_PARTITIONER_CLASS)), state,
builder.getBranches(), builder.getBranch())));
Preconditions
.checkArgument(this.builder.get().validatePartitionSchema(this.partitioner.get().partitionSchema()), String
.format("Writer %s does not support schema from partitioner %s",
builder.getClass().getCanonicalName(), this.partitioner.getClass().getCanonicalName()));
} catch (ReflectiveOperationException roe) {
throw new IOException(roe);
}
} else {
this.shouldPartition = false;
DataWriter<D> dataWriter = builder.build();
InstrumentedDataWriterDecorator<D> writer =
this.closer.register(new InstrumentedDataWriterDecorator<>(dataWriter, state));
this.isSpeculativeAttemptSafe = this.isDataWriterForPartitionSafe(dataWriter);
this.isWatermarkCapable = this.isDataWriterWatermarkCapable(dataWriter);
this.partitionWriters.put(NON_PARTITIONED_WRITER_KEY, writer);
this.partitioner = Optional.absent();
this.builder = Optional.absent();
}
}
private boolean isDataWriterWatermarkCapable(DataWriter<D> dataWriter) {
return (dataWriter instanceof WatermarkAwareWriter) && (((WatermarkAwareWriter) dataWriter).isWatermarkCapable());
}
@Override
public void write(D record)
throws IOException {
try {
DataWriter<D> writer = getDataWriterForRecord(record);
writer.write(record);
} catch (ExecutionException ee) {
throw new IOException(ee);
}
}
private DataWriter<D> getDataWriterForRecord(D record)
throws ExecutionException {
GenericRecord partition =
this.shouldPartition ? this.partitioner.get().partitionForRecord(record) : NON_PARTITIONED_WRITER_KEY;
return this.partitionWriters.get(partition);
}
@Override
public void commit()
throws IOException {
int writersCommitted = 0;
for (Map.Entry<GenericRecord, DataWriter<D>> entry : this.partitionWriters.asMap().entrySet()) {
try {
entry.getValue().commit();
writersCommitted++;
} catch (Throwable throwable) {
log.error(String.format("Failed to commit writer for partition %s.", entry.getKey()), throwable);
}
}
if (writersCommitted < this.partitionWriters.asMap().size()) {
throw new IOException("Failed to commit all writers.");
}
}
@Override
public void cleanup()
throws IOException {
int writersCleanedUp = 0;
for (Map.Entry<GenericRecord, DataWriter<D>> entry : this.partitionWriters.asMap().entrySet()) {
try {
entry.getValue().cleanup();
writersCleanedUp++;
} catch (Throwable throwable) {
log.error(String.format("Failed to cleanup writer for partition %s.", entry.getKey()));
}
}
if (writersCleanedUp < this.partitionWriters.asMap().size()) {
throw new IOException("Failed to clean up all writers.");
}
}
@Override
public long recordsWritten() {
long totalRecords = 0;
for (Map.Entry<GenericRecord, DataWriter<D>> entry : this.partitionWriters.asMap().entrySet()) {
totalRecords += entry.getValue().recordsWritten();
}
return totalRecords;
}
@Override
public long bytesWritten()
throws IOException {
long totalBytes = 0;
for (Map.Entry<GenericRecord, DataWriter<D>> entry : this.partitionWriters.asMap().entrySet()) {
totalBytes += entry.getValue().bytesWritten();
}
return totalBytes;
}
@Override
public void close()
throws IOException {
this.closer.close();
}
private DataWriter<D> createPartitionWriter(GenericRecord partition)
throws IOException {
if (!this.builder.isPresent()) {
throw new IOException("Writer builder not found. This is an error in the code.");
}
DataWriter dataWriter = this.builder.get().forPartition(partition).withWriterId(this.baseWriterId + "_" + this.writerIdSuffix++)
.build();
this.isSpeculativeAttemptSafe = this.isSpeculativeAttemptSafe && this.isDataWriterForPartitionSafe(dataWriter);
this.isWatermarkCapable = this.isWatermarkCapable && this.isDataWriterWatermarkCapable(dataWriter);
return dataWriter;
}
@Override
public State getFinalState() {
State state = new State();
try {
for (Map.Entry<GenericRecord, DataWriter<D>> entry : this.partitionWriters.asMap().entrySet()) {
if (entry.getValue() instanceof FinalState) {
State partitionFinalState = ((FinalState) entry.getValue()).getFinalState();
if (this.shouldPartition) {
for (String key : partitionFinalState.getPropertyNames()) {
// Prevent overwriting final state across writers
partitionFinalState.setProp(key + "_" + AvroUtils.serializeAsPath(entry.getKey(), false, true),
partitionFinalState.getProp(key));
}
}
state.addAll(partitionFinalState);
}
}
state.setProp("RecordsWritten", recordsWritten());
state.setProp("BytesWritten", bytesWritten());
} catch (Exception exception) {
log.warn("Failed to get final state." + exception.getMessage());
// If Writer fails to return bytesWritten, it might not be implemented, or implemented incorrectly.
// Omit property instead of failing.
}
return state;
}
@Override
public boolean isSpeculativeAttemptSafe() {
return this.isSpeculativeAttemptSafe;
}
private boolean isDataWriterForPartitionSafe(DataWriter dataWriter) {
return dataWriter instanceof SpeculativeAttemptAwareConstruct
&& ((SpeculativeAttemptAwareConstruct) dataWriter).isSpeculativeAttemptSafe();
}
@Override
public boolean isWatermarkCapable() {
return this.isWatermarkCapable;
}
@Override
public void writeEnvelope(AcknowledgableRecordEnvelope<D> recordEnvelope)
throws IOException {
try {
DataWriter<D> writer = getDataWriterForRecord(recordEnvelope.getRecord());
// Unsafe cast, presumably we've checked earlier through isWatermarkCapable()
// that we are wrapping watermark aware wrappers
((WatermarkAwareWriter) writer).writeEnvelope(recordEnvelope);
} catch (ExecutionException ee) {
throw new IOException(ee);
}
}
@Override
public Map<String, CheckpointableWatermark> getCommittableWatermark() {
// The committable watermark from a collection of commitable and unacknowledged watermarks is the highest
// committable watermark that is less than the lowest unacknowledged watermark
WatermarkTracker watermarkTracker = new MultiWriterWatermarkTracker();
for (Map.Entry<GenericRecord, DataWriter<D>> entry : this.partitionWriters.asMap().entrySet()) {
if (entry.getValue() instanceof WatermarkAwareWriter) {
Map<String, CheckpointableWatermark> commitableWatermarks =
((WatermarkAwareWriter) entry.getValue()).getCommittableWatermark();
if (!commitableWatermarks.isEmpty()) {
watermarkTracker.committedWatermarks(commitableWatermarks);
}
Map<String, CheckpointableWatermark> unacknowledgedWatermark =
((WatermarkAwareWriter) entry.getValue()).getUnacknowledgedWatermark();
if (!unacknowledgedWatermark.isEmpty()) {
watermarkTracker.unacknowledgedWatermarks(unacknowledgedWatermark);
}
}
}
return watermarkTracker.getAllCommitableWatermarks(); //TODO: Change this to use List of committables instead
}
@Override
public Map<String, CheckpointableWatermark> getUnacknowledgedWatermark() {
WatermarkTracker watermarkTracker = new MultiWriterWatermarkTracker();
for (Map.Entry<GenericRecord, DataWriter<D>> entry : this.partitionWriters.asMap().entrySet()) {
Map<String, CheckpointableWatermark> unacknowledgedWatermark =
((WatermarkAwareWriter) entry.getValue()).getUnacknowledgedWatermark();
if (!unacknowledgedWatermark.isEmpty()) {
watermarkTracker.unacknowledgedWatermarks(unacknowledgedWatermark);
}
}
return watermarkTracker.getAllUnacknowledgedWatermarks();
}
}