/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package gobblin.audit.values.sink;
import java.io.IOException;
import java.io.OutputStream;
import java.net.URI;
import lombok.Getter;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.generic.GenericDatumWriter;
import org.apache.avro.generic.GenericRecord;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import com.google.common.io.Closer;
import com.typesafe.config.Config;
import gobblin.annotation.Alias;
import gobblin.audit.values.auditor.ValueAuditRuntimeMetadata;
import gobblin.configuration.ConfigurationKeys;
import gobblin.util.ConfigUtils;
import gobblin.util.PathUtils;
/**
* A Hadoop {@link FileSystem} based {@link AuditSink} that writes audit {@link GenericRecord}s to a file on {@link FileSystem}.
* <ul>
* <li> The {@link FileSystem} {@link URI} can be set using key {@link ConfigurationKeys#FS_URI_KEY}, {@link ConfigurationKeys#LOCAL_FS_URI} is used by default.
* <li> All audit files are written under the base path. The base path can be set using key {@link #FS_SINK_AUDIT_OUTPUT_PATH_KEY}.
* The default path is,
* <pre>
* <code>System.getProperty("user.dir") + "/lumos_value_audit/local_audit";</code>
* </pre>
* <li> It uses <code>auditMetadata</code> to build the audit file name and path.<br>
* <b>The layout on {@link FileSystem} - </b>
* <pre>
* |-- <Database>
* |-- <Table>
* |-- P=<PHASE>.C=<CLUSTER>.E=<EXTRACT_ID>.S=<SNAPSHOT_ID>.D=<DELTA_ID>
* |-- *.avro
* </pre>
* </ul>
*/
@Alias(value = "FsAuditSink")
public class FsAuditSink implements AuditSink {
private static final String FS_SINK_AUDIT_OUTPUT_PATH_KEY = "fs.outputDirPath";
private static final String FS_SINK_AUDIT_OUTPUT_DEFAULT_PATH = System.getProperty("user.dir") + "/lumos_value_audit/local_audit";
private static final String FILE_NAME_DELIMITTER = "_";
private final FileSystem fs;
private final OutputStream auditFileOutputStream;
private final DataFileWriter<GenericRecord> writer;
private final Closer closer = Closer.create();
private final ValueAuditRuntimeMetadata auditMetadata;
@Getter
private final Path auditDirPath;
public FsAuditSink(Config config, ValueAuditRuntimeMetadata auditMetadata) throws IOException {
this.auditDirPath = new Path(ConfigUtils.getString(config, FS_SINK_AUDIT_OUTPUT_PATH_KEY, FS_SINK_AUDIT_OUTPUT_DEFAULT_PATH));
this.fs = this.auditDirPath.getFileSystem(new Configuration());
this.auditMetadata = auditMetadata;
this.auditFileOutputStream = closer.register(fs.create(getAuditFilePath()));
DataFileWriter<GenericRecord> dataFileWriter = this.closer.register(new DataFileWriter<GenericRecord>(new GenericDatumWriter<GenericRecord>()));
this.writer = this.closer.register(dataFileWriter.create(this.auditMetadata.getTableMetadata().getTableSchema(), this.auditFileOutputStream));
}
/**
* Returns the complete path of the audit file. Generate the audit file path with format
*
* <pre>
* |-- <Database>
* |-- <Table>
* |-- P=<PHASE>.C=<CLUSTER>.E=<EXTRACT_ID>.S=<SNAPSHOT_ID>.D=<DELTA_ID>
* |-- *.avro
* </pre>
*
*/
public Path getAuditFilePath() {
StringBuilder auditFileNameBuilder = new StringBuilder();
auditFileNameBuilder.append("P=").append(auditMetadata.getPhase()).append(FILE_NAME_DELIMITTER).append("C=")
.append(auditMetadata.getCluster()).append(FILE_NAME_DELIMITTER).append("E=")
.append(auditMetadata.getExtractId()).append(FILE_NAME_DELIMITTER).append("S=")
.append(auditMetadata.getSnapshotId()).append(FILE_NAME_DELIMITTER).append("D=")
.append(auditMetadata.getDeltaId());
return new Path(auditDirPath, PathUtils.combinePaths(auditMetadata.getTableMetadata().getDatabase(), auditMetadata
.getTableMetadata().getTable(), auditFileNameBuilder.toString(), auditMetadata.getPartFileName()));
}
/**
* Append this record to the {@link DataFileWriter}
*
* {@inheritDoc}
* @see gobblin.audit.values.sink.AuditSink#write(org.apache.avro.generic.GenericRecord)
*/
@Override
public void write(GenericRecord record) throws IOException {
this.writer.append(record);
}
@Override
public final void close() throws IOException {
this.closer.close();
}
}