/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.addthis.hydra.job.spawn;
import com.addthis.basis.util.LessStrings;
import com.addthis.hydra.job.Job;
import com.addthis.hydra.job.JobTask;
import com.addthis.hydra.job.JobTaskErrorCode;
import com.addthis.hydra.job.JobTaskState;
import com.addthis.hydra.job.entity.JobCommand;
import com.addthis.hydra.job.mq.CommandTaskReplicate;
import com.addthis.hydra.job.mq.HostState;
import com.addthis.hydra.job.mq.JobKey;
import com.addthis.hydra.job.mq.ReplicaTarget;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* This class moves a task from a source host to a target host.
* If the target host already had a replica of the task, that
* replica is removed so the task will make a new replica somewhere
* else.
*/
class TaskMover {
private static final Logger log = LoggerFactory.getLogger(TaskMover.class);
private HostState targetHost;
private Job job;
private JobTask task;
private boolean kickOnComplete;
private boolean isMigration;
private final JobKey taskKey;
private final String targetHostUUID;
private final String sourceHostUUID;
private final Spawn spawn;
private final HostManager hostManager;
TaskMover(Spawn spawn, HostManager hostManager, JobKey taskKey, String targetHostUUID, String sourceHostUUID) {
this.spawn = spawn;
this.hostManager = hostManager;
this.taskKey = taskKey;
this.targetHostUUID = targetHostUUID;
this.sourceHostUUID = sourceHostUUID;
}
public void setMigration(boolean isMigration) {
this.isMigration = isMigration;
}
public String choreWatcherKey() {
return targetHostUUID + "&&&" + taskKey;
}
public boolean execute() {
targetHost = hostManager.getHostState(targetHostUUID);
if (taskKey == null || !spawn.checkStatusForMove(targetHostUUID) || !spawn.checkStatusForMove(sourceHostUUID)) {
log.warn("[task.mover] erroneous input; terminating for: " + taskKey);
return false;
}
job = spawn.getJob(taskKey);
task = job.getTask(taskKey.getNodeNumber());
if (task == null) {
log.warn("[task.mover] failed to find job or task for: " + taskKey);
return false;
}
HostState liveHost = hostManager.getHostState(task.getHostUUID());
if (liveHost == null || !liveHost.hasLive(task.getJobKey())) {
log.warn("[task.mover] failed to find live task for: " + taskKey);
spawn.fixTaskDir(taskKey.getJobUuid(), taskKey.getNodeNumber(), false, false);
return false;
}
if (!task.getHostUUID().equals(sourceHostUUID) && !task.hasReplicaOnHost(sourceHostUUID)) {
log.warn("[task.mover] failed because the task does not have a copy on the specified source: " + taskKey);
return false;
}
if (task.getAllTaskHosts().contains(targetHostUUID) || targetHost.hasLive(taskKey)) {
log.warn("[task.mover] cannot move onto a host with an existing version of task: " + taskKey);
return false;
}
if (!targetHost.getMinionTypes().contains(job.getMinionType())) {
log.warn("[task.mover] cannot move onto a host that lacks the appropriate minion type: " + taskKey);
return false;
}
// If the task was rebalanced out of queued state, kick it again when the rebalance completes.
kickOnComplete = task.getState().isQueuedState();
if (!spawn.prepareTaskStatesForRebalance(job, task, isMigration)) {
log.warn("[task.mover] couldn't set task states; terminating for: " + taskKey);
return false;
}
// Swap to the lightest host to run the rsync operation, assuming swapping is allowed for this job.
HostState lightestExistingHost = spawn.taskQueuesByPriority.findBestHostToRunTask(
spawn.getHealthyHostStatesHousingTask(task, !job.getDontAutoBalanceMe()),
false);
if (lightestExistingHost != null && !lightestExistingHost.getHostUuid().equals(task.getHostUUID())) {
spawn.swapTask(task, lightestExistingHost.getHostUuid(), false, 0);
}
try {
task.setRebalanceSource(sourceHostUUID);
task.setRebalanceTarget(targetHostUUID);
startReplicate();
spawn.taskQueuesByPriority.markHostTaskActive(task.getHostUUID());
spawn.queueJobTaskUpdateEvent(job);
return true;
} catch (Exception ex) {
log.warn("[task.mover] exception during replicate initiation; terminating for task: " + taskKey, ex);
task.setErrorCode(JobTaskErrorCode.EXIT_REPLICATE_FAILURE);
task.setState(JobTaskState.ERROR);
spawn.queueJobTaskUpdateEvent(job);
return false;
}
}
private void startReplicate() throws Exception {
ReplicaTarget[] target = { new ReplicaTarget(targetHostUUID,
targetHost.getHost(),
targetHost.getUser(),
targetHost.getPath())
};
JobCommand jobcmd = spawn.getJobCommandManager().getEntity(job.getCommand());
CommandTaskReplicate replicate = new CommandTaskReplicate(task.getHostUUID(),
task.getJobUUID(),
task.getTaskID(),
target,
LessStrings.join(jobcmd.getCommand(), " "),
choreWatcherKey(),
true,
kickOnComplete);
replicate.setRebalanceSource(sourceHostUUID);
replicate.setRebalanceTarget(targetHostUUID);
spawn.sendControlMessage(replicate);
log.warn("[task.mover] replicating job/task {} from {} onto host {}",
task.getJobKey(), sourceHostUUID, targetHostUUID);
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder();
sb.append("TaskMover");
sb.append("{taskKey=").append(taskKey);
sb.append(", targetHostUUID='").append(targetHostUUID).append('\'');
sb.append(", sourceHostUUID='").append(sourceHostUUID).append('\'');
sb.append(", job=").append(job);
sb.append(", task=").append(task);
sb.append(", kickOnComplete=").append(kickOnComplete);
sb.append('}');
return sb.toString();
}
}