/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package gobblin.cluster;
import com.google.common.io.Closer;
import gobblin.metastore.StateStore;
import gobblin.runtime.util.StateStores;
import java.io.IOException;
import java.util.List;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.helix.task.Task;
import org.apache.helix.task.TaskCallbackContext;
import org.apache.helix.task.TaskConfig;
import org.apache.helix.task.TaskResult;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.slf4j.MDC;
import com.google.common.base.Optional;
import com.google.common.base.Throwables;
import com.google.common.collect.Lists;
import com.typesafe.config.ConfigFactory;
import gobblin.annotation.Alpha;
import gobblin.broker.SharedResourcesBrokerFactory;
import gobblin.broker.iface.SharedResourcesBroker;
import gobblin.configuration.ConfigurationKeys;
import gobblin.runtime.AbstractJobLauncher;
import gobblin.runtime.GobblinMultiTaskAttempt;
import gobblin.runtime.JobState;
import gobblin.runtime.TaskExecutor;
import gobblin.runtime.TaskState;
import gobblin.runtime.TaskStateTracker;
import gobblin.runtime.util.JobMetrics;
import gobblin.source.workunit.MultiWorkUnit;
import gobblin.source.workunit.WorkUnit;
import gobblin.util.Id;
import gobblin.util.JobLauncherUtils;
import gobblin.util.SerializationUtils;
import gobblin.broker.gobblin_scopes.GobblinScopeTypes;
import gobblin.broker.gobblin_scopes.JobScopeInstance;
/**
* An implementation of Helix's {@link org.apache.helix.task.Task} that wraps and runs one or more Gobblin
* {@link gobblin.runtime.Task}s.
*
* <p>
* Upon startup, a {@link GobblinHelixTask} reads the property
* {@link GobblinClusterConfigurationKeys#WORK_UNIT_FILE_PATH} for the path of the file storing a serialized
* {@link WorkUnit} on the {@link FileSystem} of choice and de-serializes the {@link WorkUnit}. Depending on
* if the serialized {@link WorkUnit} is a {@link MultiWorkUnit}, it then creates one or more Gobblin
* {@link gobblin.runtime.Task}s to run the {@link WorkUnit}(s) (possibly wrapped in the {@link MultiWorkUnit})
* and waits for the Gobblin {@link gobblin.runtime.Task}(s) to finish. Upon completion of the Gobblin
* {@link gobblin.runtime.Task}(s), it persists the {@link TaskState} of each {@link gobblin.runtime.Task} to
* a file that will be collected by the {@link GobblinHelixJobLauncher} later upon completion of the job.
* </p>
*
* @author Yinan Li
*/
@Alpha
public class GobblinHelixTask implements Task {
private static final Logger LOGGER = LoggerFactory.getLogger(GobblinHelixTask.class);
@SuppressWarnings({"unused", "FieldCanBeLocal"})
private final Optional<JobMetrics> jobMetrics;
private final TaskExecutor taskExecutor;
private final TaskStateTracker taskStateTracker;
private final TaskConfig taskConfig;
// An empty JobState instance that will be filled with values read from the serialized JobState
private final JobState jobState = new JobState();
private final String jobName;
private final String jobId;
private final String jobKey;
private final String participantId;
private final FileSystem fs;
private final StateStores stateStores;
private GobblinMultiTaskAttempt taskAttempt;
public GobblinHelixTask(TaskCallbackContext taskCallbackContext, Optional<ContainerMetrics> containerMetrics,
TaskExecutor taskExecutor, TaskStateTracker taskStateTracker, FileSystem fs, Path appWorkDir,
StateStores stateStores)
throws IOException {
this.taskExecutor = taskExecutor;
this.taskStateTracker = taskStateTracker;
this.taskConfig = taskCallbackContext.getTaskConfig();
this.jobName = this.taskConfig.getConfigMap().get(ConfigurationKeys.JOB_NAME_KEY);
this.jobId = this.taskConfig.getConfigMap().get(ConfigurationKeys.JOB_ID_KEY);
this.jobKey = Long.toString(Id.parse(this.jobId).getSequence());
this.participantId = taskCallbackContext.getManager().getInstanceName();
this.fs = fs;
this.stateStores = stateStores;
Path jobStateFilePath = new Path(appWorkDir, this.jobId + "." + AbstractJobLauncher.JOB_STATE_FILE_NAME);
SerializationUtils.deserializeState(this.fs, jobStateFilePath, this.jobState);
if (containerMetrics.isPresent()) {
// This must be done after the jobState is deserialized from the jobStateFilePath
// A reference to jobMetrics is required to ensure it is not evicted from the GobblinMetricsRegistry Cache
this.jobMetrics = Optional.of(JobMetrics.get(this.jobState, containerMetrics.get().getMetricContext()));
} else {
this.jobMetrics = Optional.absent();
}
}
@Override
public TaskResult run() {
SharedResourcesBroker<GobblinScopeTypes> globalBroker = null;
try (Closer closer = Closer.create()) {
closer.register(MDC.putCloseable(ConfigurationKeys.JOB_NAME_KEY, this.jobName));
closer.register(MDC.putCloseable(ConfigurationKeys.JOB_KEY_KEY, this.jobKey));
Path workUnitFilePath =
new Path(this.taskConfig.getConfigMap().get(GobblinClusterConfigurationKeys.WORK_UNIT_FILE_PATH));
String fileName = workUnitFilePath.getName();
String storeName = workUnitFilePath.getParent().getName();
WorkUnit workUnit;
if (workUnitFilePath.getName().endsWith(AbstractJobLauncher.MULTI_WORK_UNIT_FILE_EXTENSION)) {
workUnit = stateStores.mwuStateStore.getAll(storeName, fileName).get(0);
} else {
workUnit = stateStores.wuStateStore.getAll(storeName, fileName).get(0);
}
// The list of individual WorkUnits (flattened) to run
List<WorkUnit> workUnits = Lists.newArrayList();
if (workUnit instanceof MultiWorkUnit) {
// Flatten the MultiWorkUnit so the job configuration properties can be added to each individual WorkUnits
List<WorkUnit> flattenedWorkUnits =
JobLauncherUtils.flattenWorkUnits(((MultiWorkUnit) workUnit).getWorkUnits());
workUnits.addAll(flattenedWorkUnits);
} else {
workUnits.add(workUnit);
}
globalBroker = SharedResourcesBrokerFactory.createDefaultTopLevelBroker(
ConfigFactory.parseProperties(this.jobState.getProperties()), GobblinScopeTypes.GLOBAL.defaultScopeInstance());
SharedResourcesBroker<GobblinScopeTypes> jobBroker =
globalBroker.newSubscopedBuilder(new JobScopeInstance(this.jobState.getJobName(), this.jobState.getJobId())).build();
this.taskAttempt = new GobblinMultiTaskAttempt(workUnits.iterator(), this.jobId, this.jobState, this.taskStateTracker,
this.taskExecutor, Optional.of(this.participantId), Optional.of(this.stateStores.taskStateStore), jobBroker);
this.taskAttempt.runAndOptionallyCommitTaskAttempt(GobblinMultiTaskAttempt.CommitPolicy.IMMEDIATE);
return new TaskResult(TaskResult.Status.COMPLETED, String.format("completed tasks: %d", workUnits.size()));
} catch (InterruptedException ie) {
Thread.currentThread().interrupt();
return new TaskResult(TaskResult.Status.CANCELED, "");
} catch (Throwable t) {
LOGGER.error("GobblinHelixTask failed due to " + t.getMessage(), t);
return new TaskResult(TaskResult.Status.ERROR, Throwables.getStackTraceAsString(t));
} finally {
if (globalBroker != null) {
try {
globalBroker.close();
} catch (IOException ioe) {
LOGGER.error("Could not close shared resources broker.", ioe);
}
}
}
}
@Override
public void cancel() {
if (this.taskAttempt != null) {
try {
LOGGER.info("Task cancelled: Shutdown starting for tasks with jobId: {}", this.jobId);
this.taskAttempt.shutdownTasks();
LOGGER.info("Task cancelled: Shutdown complete for tasks with jobId: {}", this.jobId);
} catch (InterruptedException e) {
throw new RuntimeException("Interrupted while shutting down task with jobId: " + this.jobId, e);
}
} else {
LOGGER.error("Task cancelled but taskAttempt is null, so ignoring.");
}
}
}