/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package gobblin.converter;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.charset.StandardCharsets;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import com.google.common.base.Splitter;
import gobblin.codec.StreamCodec;
import gobblin.configuration.WorkUnitState;
import gobblin.crypto.EncryptionConfigParser;
import gobblin.crypto.EncryptionFactory;
import gobblin.recordaccess.RecordAccessor;
/**
* Converter that can encrypt a string field in place. Assumes that the encryption algorithm chosen will output
* a UTF-8 encoded byte array.
*/
public abstract class StringFieldEncryptorConverter<SCHEMA, DATA> extends Converter<SCHEMA, SCHEMA, DATA, DATA> {
public static final String FIELDS_TO_ENCRYPT_CONFIG_NAME = "converter.fieldsToEncrypt";
private StreamCodec encryptor;
private List<String> fieldsToEncrypt;
@Override
public Converter<SCHEMA, SCHEMA, DATA, DATA> init(WorkUnitState workUnit) {
super.init(workUnit);
Map<String, Object> config =
EncryptionConfigParser.getConfigForBranch(EncryptionConfigParser.EntityType.CONVERTER, workUnit);
encryptor = EncryptionFactory.buildStreamCryptoProvider(config);
String fieldsToEncryptConfig = workUnit.getProp(FIELDS_TO_ENCRYPT_CONFIG_NAME, null);
if (fieldsToEncryptConfig == null) {
throw new IllegalArgumentException("Must fill in the " + FIELDS_TO_ENCRYPT_CONFIG_NAME + " config option!");
}
fieldsToEncrypt = Splitter.on(',').splitToList(fieldsToEncryptConfig);
return this;
}
@Override
public SCHEMA convertSchema(SCHEMA inputSchema, WorkUnitState workUnit)
throws SchemaConversionException {
return inputSchema;
}
@Override
public Iterable<DATA> convertRecord(SCHEMA outputSchema, DATA inputRecord, WorkUnitState workUnit)
throws DataConversionException {
try {
RecordAccessor accessor = getRecordAccessor(inputRecord);
for (String field : fieldsToEncrypt) {
Map<String, String> stringsToEncrypt = accessor.getMultiAsString(field);
for (Map.Entry<String, String> entry : stringsToEncrypt.entrySet()) {
byte[] bytes = entry.getValue().getBytes(StandardCharsets.UTF_8);
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
OutputStream cipherStream = encryptor.encodeOutputStream(outputStream);
cipherStream.write(bytes);
cipherStream.flush();
cipherStream.close();
byte[] cipherBytes = outputStream.toByteArray();
accessor.set(entry.getKey(), new String(cipherBytes, StandardCharsets.UTF_8));
}
}
return Collections.singleton(inputRecord);
} catch (IOException e) {
throw new DataConversionException("Error encrypting field", e);
}
}
protected List<String> getFieldsToEncrypt() {
return fieldsToEncrypt;
}
protected abstract RecordAccessor getRecordAccessor(DATA record);
}