/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package gobblin.compaction.verify;
import java.io.Closeable;
import java.io.IOException;
import java.util.Iterator;
import java.util.Map;
import java.util.concurrent.Callable;
import java.util.concurrent.TimeUnit;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.base.Optional;
import com.google.common.base.Preconditions;
import com.google.common.base.Throwables;
import com.google.common.collect.ImmutableMap;
import com.google.common.util.concurrent.ListenableFuture;
import com.google.common.util.concurrent.ListeningExecutorService;
import gobblin.compaction.dataset.Dataset;
import gobblin.configuration.ConfigurationKeys;
import gobblin.configuration.State;
import gobblin.util.ExecutorsUtils;
import gobblin.util.executors.ScalingThreadPoolExecutor;
/**
* A class for verifying data completeness of a {@link Dataset}.
*
* To verify data completeness, one should extend {@link AbstractRunner} and implement {@link AbstractRunner#call()}
* which returns a {@link Results} object. The (fully qualified) name of the class that extends {@link AbstractRunner}
* should be associated with property {@link ConfigurationKeys#COMPACTION_DATA_COMPLETENESS_VERIFICATION_CLASS}.
*
* @author Ziyang Liu
*/
public class DataCompletenessVerifier implements Closeable {
private static final Logger LOG = LoggerFactory.getLogger(DataCompletenessVerifier.class);
private static final String COMPACTION_COMPLETENESS_VERIFICATION_PREFIX = "compaction.completeness.verification.";
/**
* Configuration properties related to data completeness verification.
*/
private static final String COMPACTION_COMPLETENESS_VERIFICATION_CLASS =
COMPACTION_COMPLETENESS_VERIFICATION_PREFIX + "class";
private static final String COMPACTION_COMPLETENESS_VERIFICATION_THREAD_POOL_SIZE =
COMPACTION_COMPLETENESS_VERIFICATION_PREFIX + "thread.pool.size";
private static final int DEFAULT_COMPACTION_COMPLETENESS_VERIFICATION_THREAD_POOL_SIZE = 20;
private final State props;
private final int threadPoolSize;
private final ListeningExecutorService exeSvc;
private final Class<? extends AbstractRunner> runnerClass;
/**
* The given {@link State} object must specify property
* {@link ConfigurationKeys#COMPACTION_DATA_COMPLETENESS_VERIFICATION_CLASS}, and may optionally specify
* {@link ConfigurationKeys#COMPACTION_DATA_COMPLETENESS_VERIFICATION_THREAD_POOL_SIZE}.
*/
public DataCompletenessVerifier(State props) {
this.props = props;
this.threadPoolSize = getDataCompletenessVerificationThreadPoolSize();
this.exeSvc = getExecutorService();
this.runnerClass = getRunnerClass();
}
private ListeningExecutorService getExecutorService() {
return ExecutorsUtils.loggingDecorator(
ScalingThreadPoolExecutor.newScalingThreadPool(0, this.threadPoolSize, TimeUnit.SECONDS.toMillis(10)));
}
private int getDataCompletenessVerificationThreadPoolSize() {
return this.props.getPropAsInt(COMPACTION_COMPLETENESS_VERIFICATION_THREAD_POOL_SIZE,
DEFAULT_COMPACTION_COMPLETENESS_VERIFICATION_THREAD_POOL_SIZE);
}
@SuppressWarnings("unchecked")
private Class<? extends AbstractRunner> getRunnerClass() {
Preconditions.checkArgument(this.props.contains(COMPACTION_COMPLETENESS_VERIFICATION_CLASS),
"Missing required property " + COMPACTION_COMPLETENESS_VERIFICATION_CLASS);
try {
return (Class<? extends AbstractRunner>) Class
.forName(this.props.getProp(COMPACTION_COMPLETENESS_VERIFICATION_CLASS));
} catch (Throwable t) {
LOG.error("Failed to get data completeness verification class", t);
throw Throwables.propagate(t);
}
}
/**
* Verify data completeness for a set of {@link Dataset}s.
*
* @param datasets {@link Dataset}s to be verified.
* @return A {@link ListenableFuture<{@link Results}>} object that contains the result of the verification.
* Callers can add listeners or callbacks to it.
*/
public ListenableFuture<Results> verify(Iterable<Dataset> datasets) {
return this.exeSvc.submit(getRunner(datasets));
}
private AbstractRunner getRunner(Iterable<Dataset> datasets) {
try {
return this.runnerClass.getDeclaredConstructor(Iterable.class, State.class).newInstance(datasets, this.props);
} catch (Throwable t) {
LOG.error("Failed to instantiate data completeness verification class", t);
throw Throwables.propagate(t);
}
}
@Override
public void close() throws IOException {
ExecutorsUtils.shutdownExecutorService(this.exeSvc, Optional.of(LOG));
}
public void closeNow() {
ExecutorsUtils.shutdownExecutorService(this.exeSvc, Optional.of(LOG), 0, TimeUnit.NANOSECONDS);
}
/**
* Results of data completeness verification for a set of datasets.
*/
public static class Results implements Iterable<Results.Result> {
private final Iterable<Result> results;
public Results(Iterable<Result> results) {
this.results = results;
}
@Override
public Iterator<Results.Result> iterator() {
return this.results.iterator();
}
public static class Result {
public enum Status {
PASSED,
FAILED;
}
private final Dataset dataset;
private final Status status;
/**
* Data used to compute this result. A verification context is used to communicate to the caller how this {@link #status()}
* for data completeness was derived.
*/
private final Map<String, Object> verificationContext;
public Result(Dataset dataset, Status status) {
this.dataset = dataset;
this.status = status;
this.verificationContext = ImmutableMap.of();
}
public Result(Dataset dataset, Status status, Map<String, Object> verificationContext) {
this.dataset = dataset;
this.status = status;
this.verificationContext = verificationContext;
}
public Dataset dataset() {
return this.dataset;
}
public Status status() {
return this.status;
}
public Map<String, Object> verificationContext() {
return this.verificationContext;
}
}
}
/**
* Runner class for data completeness verification. Subclasses should implement {@link AbstractRunner#call()}
* which should contain the logic of data completeness verification and returns a {@link Results} object.
*/
public static abstract class AbstractRunner implements Callable<Results> {
protected final Iterable<Dataset> datasets;
protected final State props;
public AbstractRunner(Iterable<Dataset> datasets, State props) {
this.datasets = datasets;
this.props = props;
}
}
}