package com.hujiang.juice.service.driver;
import com.google.common.collect.Maps;
import com.hujiang.juice.common.exception.CacheException;
import com.hujiang.juice.common.utils.rest.Restty;
import com.hujiang.juice.service.exception.UnrecoverException;
import com.hujiang.juice.service.support.Support;
import com.hujiang.juice.common.vo.TaskResult;
import com.hujiang.juice.service.exception.DriverException;
import com.hujiang.juice.service.model.Host;
import com.hujiang.juice.service.model.SchedulerCalls;
import com.hujiang.juice.service.service.AuxiliaryService;
import com.hujiang.juice.service.service.SchedulerService;
import com.hujiang.juice.service.utils.SendUtils;
import com.hujiang.juice.service.utils.protocol.Protobuf;
import com.hujiang.juice.service.utils.protocol.Protocol;
import com.hujiang.juice.service.utils.zookeeper.CuratorUtils;
import lombok.Data;
import lombok.EqualsAndHashCode;
import lombok.extern.slf4j.Slf4j;
import okhttp3.Response;
import org.apache.commons.lang3.StringUtils;
import org.apache.mesos.v1.scheduler.Protos;
import java.io.InputStream;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import static com.google.common.collect.Lists.newArrayList;
import static com.hujiang.juice.common.vo.TaskResult.Result.ERROR;
import static com.hujiang.juice.service.config.JUICE.*;
import static org.apache.mesos.v1.Protos.*;
/**
* Created by xujia on 16/11/22.
*/
@Slf4j
@Data
@EqualsAndHashCode(callSuper = false)
public class SchedulerDriver {
private FrameworkID frameworkId;
private CuratorUtils curatorUtils;
private volatile Host host = new Host();
private Support support;
private Protocol protocol;
private volatile String streamId;
private final List<OfferID> declines = newArrayList();
private final Map<String, Set<String>> attrMap = Maps.newHashMap();
private final Map<Long, String> killMap = Maps.newConcurrentMap();
public String getUrl() {
return host.getUrl();
}
public SchedulerDriver() {
protocol = new Protobuf();
support = new Support(MESOS_FRAMEWORK_ROLE);
if (StringUtils.isNotBlank(MESOS_SCHEDULER_END_POINT_ZK)) {
curatorUtils = new CuratorUtils(MESOS_SCHEDULER_END_POINT_ZK, host);
curatorUtils.init();
log.info("service is initializing and running by curator(zookeeper)!");
} else {
log.error("mesos host is not config, service will down!");
System.exit(-1);
}
log.info("host : " + host.getHost());
}
public void run() {
// get framework id
frameworkId = SchedulerService.getFrameworkId();
// start a new thread to listen task management list
log.info("start juice auxiliary service");
AuxiliaryService.start(this);
try {
log.info("start juice service");
while (true) {
try {
connecting();
} catch (Exception e) {
if (e instanceof UnrecoverException) {
log.error("server will recover now, cause : " + e);
reset(((UnrecoverException) e).isResetFrameworkId());
}
log.error("server will restart after 30s due to : " + e);
try {
Thread.sleep(30 * 1000L);
} catch (InterruptedException e1) {
log.warn(e1.getMessage());
}
}
}
} catch (Exception e) {
log.error(e.getMessage());
}
}
private void connecting() throws Exception {
InputStream stream = null;
Response res = null;
try {
Protos.Call call = subscribeCall();
res = Restty.create(getUrl())
.addAccept(protocol.mediaType())
.addMediaType(protocol.mediaType())
.addKeepAlive()
.requestBody(protocol.getSendBytes(call))
.post();
streamId = res.header(STREAM_ID);
stream = res.body().byteStream();
log.info("send subscribe, frameworkId : " + frameworkId + " , url " + getUrl() + ", streamId : " + streamId);
log.debug("subscribe call : " + call);
if (null == stream) {
log.warn("stream is null");
throw new DriverException("stream is null");
}
while (true) {
int size = SendUtils.readChunkSize(stream);
byte[] event = SendUtils.readChunk(stream, size);
onEvent(event);
}
} catch (Exception e) {
log.error("service handle error, due to : " + e);
throw e;
} finally {
if (null != stream) {
stream.close();
}
if (null != res) {
res.close();
}
streamId = null;
}
}
private void reset(boolean isResetFrameworkId) {
if (isResetFrameworkId) {
String removingId = frameworkId.getValue();
try {
killMap.clear();
declines.clear();
attrMap.clear();
AuxiliaryService.loggedErrors();
AuxiliaryService.getSendErrors().clear();
SchedulerService.removeFrameworkId();
} catch (CacheException ex) {
log.error("remove framework id : " + removingId + " from db error!");
}
AuxiliaryService.loggedErrors();
frameworkId = SchedulerService.genFrameworkId();
}
}
private Protos.Call subscribeCall() {
String hostName = System.getenv("HOST");
if (StringUtils.isBlank(hostName)) {
try {
hostName = InetAddress.getLocalHost().getHostAddress();
} catch (UnknownHostException e) {
hostName = "unknown host";
e.printStackTrace();
}
}
return SchedulerCalls.subscribe(
FrameworkInfo.newBuilder()
.setId(frameworkId)
.setHostname(hostName)
.setUser(Optional.ofNullable(System.getenv("user")).orElse("root"))
.setName(MESOS_SCHEDULER_NAME)
.setFailoverTimeout(FRAMEWORK_FAILOVER_TIMEOUT)
.setRole(MESOS_FRAMEWORK_ROLE)
.build());
}
private void onEvent(byte[] bytes) {
Protos.Event event = null;
try {
event = (Protos.Event) protocol.getEvent(bytes, Protos.Event.class);
} catch (Exception e) {
log.warn("parser event error, raw event : " + new String(bytes));
throw new DriverException("parser event error!");
}
log.debug("event type : " + event.getType());
switch (event.getType()) {
case SUBSCRIBED:
Protos.Event.Subscribed subscribed = event.getSubscribed();
SchedulerService.subscribed(subscribed, frameworkId);
break;
case OFFERS:
try {
long start = System.currentTimeMillis();
event.getOffers().getOffersList().stream()
.filter(of -> {
if (SchedulerService.filterAndAddAttrSys(of, attrMap)) {
return true;
}
declines.add(of.getId());
return false;
})
.forEach(
of -> {
List<TaskInfo> tasks = newArrayList();
String offerId = of.getId().getValue();
try {
SchedulerService.handleOffers(killMap, support, of, attrMap.get(offerId), declines, tasks);
} catch (Exception e) {
declines.add(of.getId());
tasks.forEach(
t -> {
AuxiliaryService.getTaskErrors()
.push(new TaskResult(com.hujiang.juice.common.model.Task.splitTaskNameId(t.getTaskId().getValue())
, ERROR, "task failed due to exception!"));
}
);
tasks.clear();
}
if (tasks.size() > 0) {
AuxiliaryService.acceptOffer(protocol, streamId, of.getId(), frameworkId, tasks, getUrl());
}
}
);
if (declines.size() > 0) {
AuxiliaryService.declineOffer(protocol, streamId, frameworkId, SchedulerCalls.decline(frameworkId, declines), getUrl());
}
long end = System.currentTimeMillis();
log.debug("accept --> used time : " + (end - start) + " ms");
} finally {
declines.clear();
attrMap.clear();
}
break;
case UPDATE:
TaskStatus status = event.getUpdate().getStatus();
if (status.hasUuid()) {
SchedulerService.update(killMap, status, protocol, frameworkId, streamId, getUrl());
try {
SendUtils.sendCall(acknowledgeCall(status), protocol, streamId, getUrl());
} catch (Exception e) {
log.warn("send acknowledge call error!");
throw new DriverException(e);
}
}
break;
case MESSAGE:
Protos.Event.Message message = event.getMessage();
SchedulerService.message(message.getAgentId(), message.getData().toByteArray());
break;
case ERROR:
SchedulerService.error(event);
case RESCIND:
case FAILURE:
case HEARTBEAT:
break; // ignore
default:
log.warn("Unsupported event : " + event);
throw new DriverException("Unsupported event : " + event);
}
}
private Protos.Call acknowledgeCall(TaskStatus status) {
return SchedulerCalls.ackUpdate(frameworkId, status.getUuid(), status.getAgentId(), status.getTaskId());
}
}