/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package gobblin.compliance;
import com.google.common.base.Optional;
import com.google.common.base.Throwables;
import com.google.gson.JsonParseException;
import com.google.gson.JsonParser;
import lombok.extern.slf4j.Slf4j;
/**
* Each Hive Dataset using gobblin-compliance for their compliance needs, must contain a dataset.descriptor property
* in the tblproperties of a Hive Dataset.
*
* A dataset.descriptor is a description of a Hive dataset in the Json format.
*
* A compliance field is a column name in a Hive dataset to decide which records should be purged.
*
* A dataset.descriptor must contain an identifier whose value corresponds the column name containing compliance id.
*
* Path to the identifier must be specified in the job properties file
* via property dataset.descriptor.identifier.
*
* Example : dataset.descriptor = {Database : Repos, Owner : GitHub, ComplianceInfo : {IdentifierType : GitHubId}}
* If IdentifierType corresponds to the identifier and GithubId is the compliance field, then
* dataset.descriptor.identifier = ComplianceInfo.IdentifierType
*
* @author adsharma
*/
@Slf4j
public abstract class DatasetDescriptor {
protected String descriptor;
protected Optional<String> complianceFieldPath;
public DatasetDescriptor(String descriptor, Optional<String> complianceFieldPath) {
checkValidJsonStr(descriptor);
this.descriptor = descriptor;
this.complianceFieldPath = complianceFieldPath;
}
protected void checkValidJsonStr(String jsonStr) {
try {
new JsonParser().parse(jsonStr);
} catch (JsonParseException e) {
log.warn("Not a valid JSON String : " + jsonStr);
Throwables.propagate(e);
}
}
public abstract String getComplianceField();
}