/*******************************************************************************
* Copyright (c) 2011 GigaSpaces Technologies Ltd. All rights reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
package org.cloudifysource.rest.util;
import static com.gigaspaces.log.LogEntryMatchers.regex;
import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
import java.util.concurrent.FutureTask;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.cloudifysource.dsl.internal.CloudifyConstants;
import org.cloudifysource.dsl.internal.CloudifyConstants.USMState;
import org.cloudifysource.dsl.internal.EventLogConstants;
import org.cloudifysource.dsl.utils.ServiceUtils;
import org.cloudifysource.rest.controllers.RestServiceException;
import org.openspaces.admin.Admin;
import org.openspaces.admin.AdminException;
import org.openspaces.admin.gsc.GridServiceContainer;
import org.openspaces.admin.internal.pu.DefaultProcessingUnit;
import org.openspaces.admin.pu.ProcessingUnit;
import org.openspaces.admin.pu.ProcessingUnitInstance;
import org.openspaces.admin.pu.ProcessingUnitInstanceStatistics;
import org.openspaces.admin.pu.ProcessingUnitType;
import org.openspaces.admin.zone.Zone;
import org.openspaces.pu.service.ServiceMonitors;
import com.gigaspaces.log.LogEntries;
import com.gigaspaces.log.LogEntry;
import com.gigaspaces.log.LogEntryMatcher;
/**
* the RestPollingRunnable provides a service installation polling mechanism for
* lifecycle and instance count changes events. the events will be saved in a
* dedicated LifecycleEventsContainer that will be sampled by the client.
*
* Initialize the Runnable with service names their planned number of instances.
*
* @author adaml
*
*/
public class RestPollingRunnable implements Runnable {
private static final int ONE_SEC = 1;
private static final int FIVE_SECONDS_MILLI = 5000;
// a map containing all of the application services and their planned number
// of instances.
// The services are ordered according to the installation order defined by
// the application.
private final LinkedHashMap<String, Integer> serviceNames;
private final String applicationName;
private Admin admin;
private long endTime;
private static final String USM_EVENT_LOGGER_NAME = ".*.USMEventLogger.{0}\\].*";
private boolean isUninstall = false;
private boolean isSetInstances = false;
private boolean isServiceInstall = false;
private LifecycleEventsContainer lifecycleEventsContainer;
/**
* indicates whether thread threw an exception.
*/
private Throwable executionException;
/**
* future container polling task.
*/
private Future<?> futureTask;
private boolean isDone = false;
private final Map<String, Date> gscStartTimeMap = new HashMap<String, Date>();
private final Object lock = new Object();
private FutureTask<Boolean> undeployTask;
private Exception deploymentExecutionException;
private Set<String> failedServiceInstallationList = new HashSet<String>();
private static final Logger logger = Logger
.getLogger(RestPollingRunnable.class.getName());
/**
* Create a rest polling runnable to poll for a specific service's
* installation lifecycle events with the application name set to the
* "default" application name.
*
* Use this constructor if polling a single service installation.
*
* @param applicationName
* the application name to deploy
* @param timeout
* timeout polling timeout.
* @param timeunit
* polling timeout timeunit.
*/
public RestPollingRunnable(final String applicationName,
final long timeout, final TimeUnit timeunit) {
this.serviceNames = new LinkedHashMap<String, Integer>();
this.applicationName = applicationName;
}
/**
* sets the admin.
*
* @param admin
* admin instance.
*/
public void setAdmin(final Admin admin) {
this.admin = admin;
}
/**
* sets the current lifecycleEventsContainer to be updated by the callable
* task.
*
* @param lifecycleEventsContainer
* ref to a lifecycleEventsContainer.
*/
public void setLifecycleEventsContainer(
final LifecycleEventsContainer lifecycleEventsContainer) {
this.lifecycleEventsContainer = lifecycleEventsContainer;
}
/**
* gets the lifecycle event container.
*
* @return the lifecycle event container of the runnable thread
*/
public LifecycleEventsContainer getLifecycleEventsContainer() {
return this.lifecycleEventsContainer;
}
/**
* Add a service to the polling callable. the service will be sampled until
* it reaches it's planned number of instances or until a timeout exception
* is thrown.
*
* @param serviceName
* The absoulute pu name.
* @param plannedNumberOfInstances
* planned number of instances.
*/
public void addService(final String serviceName,
final int plannedNumberOfInstances) {
this.serviceNames.put(serviceName, plannedNumberOfInstances);
}
/**
* sets the services to poll and their planned number of instances.
*
* @param isServiceInstall
* true if installation is for a single service.
*/
public void setIsServiceInstall(final boolean isServiceInstall) {
this.isServiceInstall = isServiceInstall;
}
/**
* Set to true if the action invoked was setInstances.
*
* @param isSetInstances
* is the action a set instances action. default is set to false.
*/
public void setIsSetInstances(final boolean isSetInstances) {
this.isSetInstances = isSetInstances;
}
/**
* Sets the runnable's lifetime lease.
*
* @param timeout
* timeout period
* @param timeunit
* timeout timeunit
*/
public synchronized void setEndTime(final long timeout,
final TimeUnit timeunit) {
this.endTime = System.currentTimeMillis() + timeunit.toMillis(timeout);
}
/**
* returns true if the task is done.
*
* @return true if thread has ended, false otherwise.
*/
public boolean isDone() {
return this.isDone;
}
/**
* Extends the runnable's lifetime lease.
*
* @param timeout
* timeout period
* @param timeunit
* timeout timeunit
*/
public synchronized void increaseEndTimeBy(final long timeout,
final TimeUnit timeunit) {
this.endTime = endTime + timeunit.toMillis(timeout);
}
/**
* Returns the thread's end time.
*
* @return the thread's end time.
*/
public synchronized long getEndTime() {
return endTime;
}
private void setExecutionException(final Throwable e) {
synchronized (this.lock) {
this.executionException = e;
}
}
/**
* gets the execution exception that occurred on the polling thread.
*
* @return the execution exception that occurred on the polling thread or
* null
*/
public ExecutionException getExecutionException() {
synchronized (this.lock) {
if (this.executionException == null) {
return null;
}
return new ExecutionException(this.executionException);
}
}
public void setFutureTask(final Future<?> future) {
this.futureTask = future;
}
public void run() {
try {
if (this.serviceNames.isEmpty()) {
logger.log(Level.INFO,
"Polling for lifecycle events has ended successfully."
+ " Terminating the polling task");
// We stop the thread from being scheduled again.
throw new RestServiceException("Polling has ended successfully");
}
if (System.currentTimeMillis() > this.endTime) {
throw new TimeoutException("Timed out");
}
if (this.deploymentExecutionException != null) {
throw new Exception(deploymentExecutionException);
}
pollForLogs();
} catch (final Throwable e) {
if (!(e instanceof RestServiceException)) {
logger.log(
Level.INFO,
"Polling task ended unexpectedly. Reason: "
+ e.getMessage(), e);
setExecutionException(e);
} else if (!this.failedServiceInstallationList.isEmpty()) {
// Some service installations finished with errors.
String[] failedServiceNames = this.failedServiceInstallationList.toArray(
new String[failedServiceInstallationList.size()]);
final Exception ex = new ExecutionException(
"Errors occurred during installation of services: "
+ Arrays.toString(failedServiceNames), e);
setExecutionException(ex);
} else {
logger.log(Level.INFO, "Polling task ended successfully.");
}
terminateTaskGracefully();
// this exception should not be caught. it is meant to make the
// scheduler stop
// the thread execution.
throw new RuntimeException(e);
}
}
private void terminateTaskGracefully() {
this.isDone = true;
if (this.futureTask != null) {
this.futureTask.cancel(true);
}
}
/**
* Goes over each service defined prior to the callable execution and polls
* it for lifecycle and instance count events.
*
* @throws ExecutionException
*/
private void pollForLogs() throws ExecutionException {
final LinkedHashMap<String, Integer> serviceNamesClone = new LinkedHashMap<String, Integer>();
serviceNamesClone.putAll(this.serviceNames);
for (final String serviceName : serviceNamesClone.keySet()) {
addServiceLifecycleLogs(serviceName);
final int plannedNumberOfInstances = getPlannedNumberOfInstances(serviceName);
final int numberOfServiceInstances = getNumberOfServiceInstances(serviceName);
final int numberOfFailedInstances = getNumberOfFailedInstances(serviceName);
addServiceInstanceCountEvents(serviceName,
plannedNumberOfInstances, numberOfServiceInstances, numberOfFailedInstances);
removeEndedServicesFromPollingList(serviceName,
plannedNumberOfInstances, numberOfServiceInstances,
numberOfFailedInstances);
if (numberOfFailedInstances != 0 && !isUninstall) {
this.failedServiceInstallationList.add(serviceName);
}
}
}
private void removeEndedServicesFromPollingList(final String serviceName,
final int plannedNumberOfInstances,
final int numberOfServiceInstances,
final int numberOfFailedInstances) throws ExecutionException {
if (isUninstall) {
final String absolutePuName = ServiceUtils.getAbsolutePUName(
applicationName, serviceName);
try {
final Boolean undeployedSuccessfully = this.undeployTask
.get(ONE_SEC, TimeUnit.SECONDS);
if (undeployedSuccessfully) {
logger.info("undeployAndWait for processing unit " + absolutePuName + " has finished");
this.serviceNames.remove(serviceName);
this.lifecycleEventsContainer
.addNonLifecycleEvents("Service \"" + serviceName
+ "\" uninstalled successfully");
}
} catch (final Exception e) {
if (e instanceof TimeoutException) {
logger.info("undeployAndWait for processing unit " + absolutePuName + " has not finished yet");
} else {
final String message = "undeploy task has ended unsuccessfully. "
+ "Some machines may not have been terminated!";
logger.log(Level.WARNING, message, e);
lifecycleEventsContainer.addNonLifecycleEvents(message);
throw new ExecutionException(message, e);
}
}
} else {
if (plannedNumberOfInstances == numberOfServiceInstances
+ numberOfFailedInstances) {
this.serviceNames.remove(serviceName);
}
}
}
private void addServiceLifecycleLogs(final String serviceName) {
List<Map<String, String>> servicesLifecycleEventDetailes;
servicesLifecycleEventDetailes = new ArrayList<Map<String, String>>();
final String absolutePuName = ServiceUtils.getAbsolutePUName(
this.applicationName, serviceName);
logger.log(Level.FINEST, "Polling for lifecycle events on service: "
+ absolutePuName);
final Zone zone = admin.getZones().getByName(absolutePuName);
if (zone == null) {
return;
}
// TODO: this is not very efficient. Maybe possible to move the
// LogEntryMatcher
// as field add to init to call .
final String regex = MessageFormat.format(USM_EVENT_LOGGER_NAME,
absolutePuName);
final LogEntryMatcher matcher = regex(regex);
for (final GridServiceContainer container : zone
.getGridServiceContainers()) {
logger.log(Level.FINEST,
"Polling GSC with uid: " + container.getUid());
final Date pollingStartTime = getGSCSamplingStartTime(container);
LogEntries logEntries = null;
try {
logEntries = container.logEntries(matcher);
} catch (AdminException e) {
logger.log(Level.INFO, "an internal admin exception was thrown. Reason: " + e.getMessage(), e);
}
if (logEntries != null) {
// Get lifecycle events.
for (final LogEntry logEntry : logEntries) {
if (logEntry.isLog()) {
if (pollingStartTime.before(new Date(logEntry
.getTimestamp()))) {
final Map<String, String> serviceEventsMap = getEventDetailes(
logEntry, container, absolutePuName);
servicesLifecycleEventDetailes.add(serviceEventsMap);
}
}
}
}
this.lifecycleEventsContainer
.addLifecycleEvents(servicesLifecycleEventDetailes);
}
}
// Returns the time the polling started for the specific gsc.
private Date getGSCSamplingStartTime(final GridServiceContainer gsc) {
final String uid = gsc.getUid();
if (this.gscStartTimeMap.containsKey(uid)) {
return this.gscStartTimeMap.get(uid);
} else {
final Date date = new Date(new Date().getTime()
+ gsc.getOperatingSystem().getTimeDelta()
- FIVE_SECONDS_MILLI);
this.gscStartTimeMap.put(uid, date);
return date;
}
}
private void addServiceInstanceCountEvents(final String serviceName,
final int plannedNumberOfInstances,
final int numberOfServiceInstances,
final int numberOfFailedInstances) {
final String absolutePuName = ServiceUtils.getAbsolutePUName(
applicationName, serviceName);
if (numberOfServiceInstances == 0) {
if (!isUninstall) {
this.lifecycleEventsContainer
.addNonLifecycleEvents("Deploying " + serviceName
+ " with " + plannedNumberOfInstances
+ " planned instances.");
}
} else {
String event = "[" + serviceName + "] " + "Deployed "
+ numberOfServiceInstances + " planned "
+ plannedNumberOfInstances;
if (numberOfFailedInstances > 0) {
event += " failed " + numberOfFailedInstances;
}
this.lifecycleEventsContainer.addNonLifecycleEvents(event);
}
if (plannedNumberOfInstances == numberOfServiceInstances) {
if (!isServiceInstall) {
if (!isUninstall && !isSetInstances) {
this.lifecycleEventsContainer
.addNonLifecycleEvents("Service \"" + serviceName
+ "\" successfully installed ("
+ numberOfServiceInstances + " Instances)");
}
}
}
final Zone zone = admin.getZones().getByName(absolutePuName);
if (zone == null) {
// now waiting for machine to shutdown
if (isUninstall) {
this.lifecycleEventsContainer
.addNonLifecycleEvents("Service \"" + serviceName
+ "\" was stopped successfully , releasing cloud resources...");
}
}
}
/**
* The planned number of service instances is saved to the serviceNames map
* during initialization of the callable. in case of datagrid deployment,
* the planned number of instances will be reviled only after it's PU has
* been created and so we need to poll the pu to get the correct number of
* planned instances in case the pu is of type datagrid.
*
* @param serviceName
* The service name
* @return planned number of service instances
*/
private int getPlannedNumberOfInstances(final String serviceName) {
if (isUninstall) {
return 0;
}
final String absolutePuName = ServiceUtils.getAbsolutePUName(
applicationName, serviceName);
final ProcessingUnit processingUnit = admin.getProcessingUnits()
.getProcessingUnit(absolutePuName);
if (processingUnit != null) {
final Map<String, String> elasticProperties = ((DefaultProcessingUnit) processingUnit)
.getElasticProperties();
if (elasticProperties.containsKey("schema")) {
final String clusterSchemaValue = elasticProperties
.get("schema");
if ("partitioned-sync2backup".equals(clusterSchemaValue)) {
return processingUnit.getTotalNumberOfInstances();
}
}
}
if (serviceNames.containsKey(serviceName)) {
return serviceNames.get(serviceName);
}
throw new IllegalStateException(
"Service planned number of instances is undefined");
}
/**
* Gets the service instance count for every type of service (USM/Other). if
* the service pu is not running yet, returns 0.
*
* @param serviceName
* The absolute service name.
* @return the service's number of running instances.
*/
private int getNumberOfServiceInstances(final String serviceName) {
final String absolutePuName = ServiceUtils.getAbsolutePUName(
applicationName, serviceName);
final ProcessingUnit processingUnit = admin.getProcessingUnits()
.getProcessingUnit(absolutePuName);
if (processingUnit != null) {
if (processingUnit.getType() == ProcessingUnitType.UNIVERSAL) {
return getNumberOfUSMServicesWithState(absolutePuName,
USMState.RUNNING);
}
return admin.getProcessingUnits().getProcessingUnit(absolutePuName)
.getInstances().length;
}
return 0;
}
private int getNumberOfFailedInstances(final String serviceName) {
final String absolutePuName = ServiceUtils.getAbsolutePUName(
applicationName, serviceName);
final ProcessingUnit processingUnit = admin.getProcessingUnits()
.getProcessingUnit(absolutePuName);
if (processingUnit != null) {
if (processingUnit.getType() == ProcessingUnitType.UNIVERSAL) {
return getNumberOfUSMServicesWithState(absolutePuName,
USMState.ERROR);
}
return 0;
}
return 0;
}
// returns the number of RUNNING processing unit instances.
private int getNumberOfUSMServicesWithState(final String absolutePUName,
final USMState state) {
int puInstanceCounter = 0;
final ProcessingUnit processingUnit = admin.getProcessingUnits()
.getProcessingUnit(absolutePUName);
for (final ProcessingUnitInstance pui : processingUnit) {
// TODO: get the instanceState step
if (isUsmInState(pui, state)) {
puInstanceCounter++;
}
}
return puInstanceCounter;
}
private boolean isUsmInState(final ProcessingUnitInstance pui,
final USMState state) {
final ProcessingUnitInstanceStatistics statistics = pui.getStatistics();
if (statistics == null) {
return false;
}
final Map<String, ServiceMonitors> puMonitors = statistics
.getMonitors();
if (puMonitors == null) {
return false;
}
final ServiceMonitors serviceMonitors = puMonitors.get("USM");
if (serviceMonitors == null) {
return false;
}
final Map<String, Object> monitors = serviceMonitors.getMonitors();
if (monitors == null) {
return false;
}
@SuppressWarnings("boxing")
final int instanceState = (Integer) monitors
.get(CloudifyConstants.USM_MONITORS_STATE_ID);
return (CloudifyConstants.USMState.values()[instanceState] == state);
}
/**
* tells the polling task to expect uninstall or install of service. the
* default value is set to false.
*
* @param isUninstall
* is the task being preformed an uninstall task.
*/
public void setIsUninstall(final boolean isUninstall) {
this.isUninstall = isUninstall;
}
/**
* generates a map containing all of the event's details.
*
* @param logEntry
* The event log entry originated from the GSC log
* @param container
* the GSC of the specified event
* @param absolutePuName
* the absolute processing unit name.
* @return returns a details map containing all of an events details.
*/
private Map<String, String> getEventDetailes(final LogEntry logEntry,
final GridServiceContainer container, final String absolutePuName) {
final Map<String, String> returnMap = new HashMap<String, String>();
returnMap.put(EventLogConstants.getTimeStampKey(),
Long.toString(logEntry.getTimestamp()));
returnMap.put(EventLogConstants.getMachineHostNameKey(), container
.getMachine().getHostName());
returnMap.put(EventLogConstants.getMachineHostAddressKey(), container
.getMachine().getHostAddress());
returnMap.put(EventLogConstants.getServiceNameKey(),
ServiceUtils.getApplicationServiceName(absolutePuName,
this.applicationName));
// The string replacement is done since the service name that is
// received from the USM logs derived from actual PU name.
final String serviceName = returnMap.get(EventLogConstants.getServiceNameKey()) + "-";
final String originalText = logEntry.getText();
final String modifiedText = originalText.replaceFirst(
absolutePuName + "-",
serviceName);
returnMap.put(
EventLogConstants.getEventTextKey(),
modifiedText);
return returnMap;
}
public void setUndeployTask(final FutureTask<Boolean> undeployTask) {
this.undeployTask = undeployTask;
}
/**
* Sets a deployment exception for a specific deployment process.
* @param deploymentExecutionException
* the deployment exception.
*/
public void setDeploymentExecutionException(
final Exception deploymentExecutionException) {
this.deploymentExecutionException = deploymentExecutionException;
}
}