/*
* Copyright (C) 2015 hops.io.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.hops.erasure_coding;
import io.hops.exception.StorageException;
import io.hops.metadata.HdfsStorageFactory;
import io.hops.metadata.hdfs.dal.EncodingStatusDataAccess;
import io.hops.metadata.hdfs.entity.EncodingStatus;
import io.hops.transaction.EntityManager;
import io.hops.transaction.handler.EncodingStatusOperationType;
import io.hops.transaction.handler.HDFSOperationType;
import io.hops.transaction.handler.HopsTransactionalRequestHandler;
import io.hops.transaction.handler.LightWeightRequestHandler;
import io.hops.transaction.lock.LockFactory;
import io.hops.transaction.lock.TransactionLockTypes;
import io.hops.transaction.lock.TransactionLocks;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
import org.apache.hadoop.hdfs.server.namenode.INode;
import org.apache.hadoop.util.Daemon;
import org.apache.hadoop.util.StringUtils;
import java.io.IOException;
import java.lang.reflect.Constructor;
import java.util.Collection;
import java.util.List;
import java.util.UUID;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Daemon that manages erasure-coded files and their status. It scans for
* requested encodings or repairs and schedules them if resources are available.
* It checks the status of encodings and repairs and adjusts the file states
* accordingly.
*/
public class ErasureCodingManager extends Configured {
static final Log LOG = LogFactory.getLog(ErasureCodingManager.class);
private final FSNamesystem namesystem;
private final Daemon erasureCodingMonitorThread = new Daemon(
new ErasureCodingMonitor());
private EncodingManager encodingManager;
private BlockRepairManager blockRepairManager;
private String parityFolder;
private final long recheckInterval;
private final int activeEncodingLimit;
private int activeEncodings = 0;
private final int activeRepairLimit;
private final int activeParityRepairLimit;
private int activeRepairs = 0;
private int activeParityRepairs = 0;
private final int repairDelay;
private final int parityRepairDelay;
private final int deletionLimit;
private static boolean enabled = false;
public ErasureCodingManager(FSNamesystem namesystem, Configuration conf) {
super(conf);
this.namesystem = namesystem;
this.parityFolder = conf.get(DFSConfigKeys.PARITY_FOLDER,
DFSConfigKeys.DEFAULT_PARITY_FOLDER);
this.recheckInterval = conf.getInt(DFSConfigKeys.RECHECK_INTERVAL_KEY,
DFSConfigKeys.DEFAULT_RECHECK_INTERVAL);
this.activeEncodingLimit =
conf.getInt(DFSConfigKeys.ACTIVE_ENCODING_LIMIT_KEY,
DFSConfigKeys.DEFAULT_ACTIVE_ENCODING_LIMIT);
this.activeRepairLimit = conf.getInt(DFSConfigKeys.ACTIVE_REPAIR_LIMIT_KEY,
DFSConfigKeys.DEFAULT_ACTIVE_REPAIR_LIMIT);
this.activeParityRepairLimit =
conf.getInt(DFSConfigKeys.ACTIVE_PARITY_REPAIR_LIMIT_KEY,
DFSConfigKeys.DEFAULT_ACTIVE_PARITY_REPAIR_LIMIT);
this.repairDelay = conf.getInt(DFSConfigKeys.REPAIR_DELAY_KEY,
DFSConfigKeys.DEFAULT_REPAIR_DELAY_KEY);
this.parityRepairDelay = conf.getInt(DFSConfigKeys.PARITY_REPAIR_DELAY_KEY,
DFSConfigKeys.DEFAULT_PARITY_REPAIR_DELAY);
this.deletionLimit = conf.getInt(DFSConfigKeys.DELETION_LIMIT_KEY,
DFSConfigKeys.DEFAULT_DELETION_LIMIT);
enabled = conf.getBoolean(DFSConfigKeys.ERASURE_CODING_ENABLED_KEY,
DFSConfigKeys.DEFAULT_ERASURE_CODING_ENABLED_KEY);
}
private boolean loadRaidNodeClasses() {
try {
Class<?> encodingManagerClass = getConf().getClass(
DFSConfigKeys.ENCODING_MANAGER_CLASSNAME_KEY, null);
if (encodingManagerClass == null) {
encodingManagerClass = Class.forName(
DFSConfigKeys.DEFAULT_ENCODING_MANAGER_CLASSNAME);
}
if (!EncodingManager.class.isAssignableFrom(encodingManagerClass)) {
throw new ClassNotFoundException(
encodingManagerClass + " is not an implementation of " +
EncodingManager.class.getCanonicalName());
}
Constructor<?> encodingManagerConstructor = encodingManagerClass
.getConstructor(Configuration.class);
encodingManager = (EncodingManager) encodingManagerConstructor
.newInstance(getConf());
Class<?> blockRepairManagerClass = getConf().getClass(
DFSConfigKeys.BLOCK_REPAIR_MANAGER_CLASSNAME_KEY, null);
if (blockRepairManagerClass == null) {
blockRepairManagerClass = Class.forName(
DFSConfigKeys.DEFAULT_BLOCK_REPAIR_MANAGER_CLASSNAME);
}
if (!BlockRepairManager.class.isAssignableFrom(blockRepairManagerClass)) {
throw new ClassNotFoundException(
blockRepairManagerClass + " is not an implementation of " +
BlockRepairManager.class.getCanonicalName());
}
Constructor<?> blockRepairManagerConstructor = blockRepairManagerClass
.getConstructor(Configuration.class);
blockRepairManager = (BlockRepairManager) blockRepairManagerConstructor
.newInstance(getConf());
} catch (Exception e) {
LOG.error("Could not load erasure coding classes", e);
return false;
}
return true;
}
public void activate() {
if (!loadRaidNodeClasses()) {
LOG.error("ErasureCodingMonitor not started. An error occurred during" +
" the loading of the encoding library.");
return;
}
erasureCodingMonitorThread.start();
LOG.info("ErasureCodingMonitor started");
}
public void close() {
try {
if (erasureCodingMonitorThread != null) {
erasureCodingMonitorThread.interrupt();
erasureCodingMonitorThread.join(3000);
}
} catch (InterruptedException ie) {
}
LOG.info("ErasureCodingMonitor stopped");
}
public static boolean isErasureCodingEnabled(Configuration conf) {
return conf.getBoolean(DFSConfigKeys.ERASURE_CODING_ENABLED_KEY,
DFSConfigKeys.DEFAULT_ERASURE_CODING_ENABLED_KEY);
}
private class ErasureCodingMonitor implements Runnable {
@Override
public void run() {
while (namesystem.isRunning()) {
try {
try {
if (namesystem.isInSafeMode()) {
continue;
}
} catch (IOException e) {
LOG.info("In safe mode skipping this round");
}
if (namesystem.isLeader()) {
checkActiveEncodings();
scheduleEncodings();
checkActiveRepairs();
scheduleSourceRepairs();
scheduleParityRepairs();
garbageCollect();
checkRevoked();
}
try {
Thread.sleep(recheckInterval);
} catch (InterruptedException ie) {
LOG.warn("ErasureCodingMonitor thread received " +
"InterruptedException.", ie);
break;
}
} catch (Throwable e) {
LOG.error(e);
}
}
}
}
private void checkActiveEncodings() throws IOException {
LOG.info("Checking active encoding.");
List<Report> reports = encodingManager.computeReports();
for (Report report : reports) {
switch (report.getStatus()) {
case ACTIVE:
break;
case FINISHED:
LOG.info("Encoding finished for " + report.getFilePath());
finalizeEncoding(report.getFilePath());
activeEncodings--;
break;
case FAILED:
LOG.info("Encoding failed for " + report.getFilePath());
updateEncodingStatus(report.getFilePath(),
EncodingStatus.Status.ENCODING_FAILED,
EncodingStatus.ParityStatus.REPAIR_FAILED);
activeEncodings--;
break;
case CANCELED:
LOG.info("Encoding canceled for " + report.getFilePath());
updateEncodingStatus(report.getFilePath(),
EncodingStatus.Status.ENCODING_CANCELED);
activeEncodings--;
break;
}
}
}
private void finalizeEncoding(final String path) {
LOG.info("Finilizing encoding for " + path);
try {
new HopsTransactionalRequestHandler(HDFSOperationType.GET_INODE) {
private String parityPath;
@Override
public void setUp() throws StorageException, IOException {
super.setUp();
EncodingStatus status = namesystem.getEncodingStatus(path);
parityPath = parityFolder + "/" + status.getParityFileName();
}
@Override
public void acquireLock(TransactionLocks locks) throws IOException {
LockFactory lf = LockFactory.getInstance();
locks.add(lf.getINodeLock(namesystem.getNameNode(),
TransactionLockTypes.INodeLockType.WRITE,
TransactionLockTypes.INodeResolveType.PATH, path, parityPath))
.add(lf.getEncodingStatusLock(TransactionLockTypes.LockType.WRITE,
path));
}
@Override
public Object performTask() throws StorageException, IOException {
INode sourceInode = namesystem.getINode(path);
INode parityInode = namesystem.getINode(parityPath);
if (sourceInode == null) {
return null;
}
EncodingStatus encodingStatus = EntityManager
.find(EncodingStatus.Finder.ByInodeId, sourceInode.getId());
// Might get reported a second time after recovery
if (encodingStatus.getStatus()
!= EncodingStatus.Status.ENCODING_ACTIVE) {
return null;
}
if (parityInode == null) {
encodingStatus.setStatus(EncodingStatus.Status.ENCODING_FAILED);
encodingStatus.setStatusModificationTime(
System.currentTimeMillis());
} else {
encodingStatus.setStatus(EncodingStatus.Status.ENCODED);
encodingStatus.setStatusModificationTime(
System.currentTimeMillis());
encodingStatus.setParityInodeId(parityInode.getId());
encodingStatus.setParityStatus(EncodingStatus.ParityStatus.HEALTHY);
encodingStatus.setParityStatusModificationTime(
System.currentTimeMillis());
}
EntityManager.update(encodingStatus);
return null;
}
}.handle(this);
} catch (IOException e) {
LOG.error(StringUtils.stringifyException(e));
}
}
private void updateEncodingStatus(String filePath,
EncodingStatus.Status status, EncodingStatus.ParityStatus parityStatus) {
try {
namesystem.updateEncodingStatus(filePath, status, parityStatus, null);
} catch (IOException e) {
LOG.error(StringUtils.stringifyException(e));
}
}
private void updateEncodingStatus(String filePath,
EncodingStatus.Status status) {
updateEncodingStatus(filePath, status, null);
}
private void updateEncodingStatus(String filePath,
EncodingStatus.ParityStatus status) {
updateEncodingStatus(filePath, null, status);
}
private void scheduleEncodings() throws IOException {
LOG.info("Schedule encodings.");
final int limit = activeEncodingLimit - activeEncodings;
if (limit <= 0) {
return;
}
LightWeightRequestHandler findHandler = new LightWeightRequestHandler(
EncodingStatusOperationType.FIND_REQUESTED_ENCODINGS) {
@Override
public Object performTask() throws StorageException, IOException {
EncodingStatusDataAccess<EncodingStatus> dataAccess =
(EncodingStatusDataAccess) HdfsStorageFactory
.getDataAccess(EncodingStatusDataAccess.class);
return dataAccess.findRequestedEncodings(limit);
}
};
Collection<EncodingStatus> requestedEncodings =
(Collection<EncodingStatus>) findHandler.handle();
for (EncodingStatus encodingStatus : requestedEncodings) {
try {
LOG.info("Trying to schedule encoding for " + encodingStatus);
INode iNode = namesystem.findInode(encodingStatus.getInodeId());
if (iNode == null) {
LOG.error("findInode returned null for id " + encodingStatus.
getInodeId());
continue;
}
if (iNode.isUnderConstruction()) {
// It might still be written to the file
LOG.info("Still under construction. Encoding not scheduled for " +
iNode.getId());
continue;
}
String path = namesystem.getPath(iNode.getId());
if (iNode == null) {
continue;
}
LOG.info("Schedule encoding for " + path);
UUID parityFileName = UUID.randomUUID();
encodingManager.encodeFile(
encodingStatus.getEncodingPolicy(),
new Path(path),
new Path(parityFolder + "/" + parityFileName.toString()),
encodingStatus.getStatus() ==
EncodingStatus.Status.COPY_ENCODING_REQUESTED ? true : false);
namesystem.updateEncodingStatus(path,
EncodingStatus.Status.ENCODING_ACTIVE, parityFileName.toString());
activeEncodings++;
} catch (IOException e) {
LOG.error(StringUtils.stringifyException(e));
}
}
}
private void checkActiveRepairs() throws IOException {
LOG.info("Checking active repairs.");
List<Report> reports = blockRepairManager.computeReports();
for (Report report : reports) {
switch (report.getStatus()) {
case ACTIVE:
break;
case FINISHED:
LOG.info("Repair finished for " + report.getFilePath());
if (isParityFile(report.getFilePath())) {
checkFixedParity(report.getFilePath());
activeParityRepairs--;
} else {
checkFixedSource(report.getFilePath());
activeRepairs--;
}
break;
case FAILED:
LOG.info("Repair failed for " + report.getFilePath());
if (isParityFile(report.getFilePath())) {
updateEncodingStatus(report.getFilePath(),
EncodingStatus.ParityStatus.REPAIR_FAILED);
activeParityRepairs--;
} else {
updateEncodingStatus(report.getFilePath(),
EncodingStatus.Status.REPAIR_FAILED);
activeRepairs--;
}
break;
case CANCELED:
LOG.info("Repair canceled for " + report.getFilePath());
if (isParityFile(report.getFilePath())) {
updateEncodingStatus(report.getFilePath(),
EncodingStatus.ParityStatus.REPAIR_CANCELED);
activeParityRepairs--;
} else {
updateEncodingStatus(report.getFilePath(),
EncodingStatus.Status.REPAIR_CANCELED);
activeRepairs--;
}
break;
}
}
}
private void checkFixedSource(final String path) throws IOException {
new HopsTransactionalRequestHandler(HDFSOperationType.CHECK_FIXED_SOURCE) {
@Override
public void acquireLock(TransactionLocks locks) throws IOException {
LockFactory lf = LockFactory.getInstance();
locks.add(lf.getINodeLock(namesystem.getNameNode(),
TransactionLockTypes.INodeLockType.WRITE,
TransactionLockTypes.INodeResolveType.PATH, path)).add(
lf.getEncodingStatusLock(TransactionLockTypes.LockType.WRITE,
path));
}
@Override
public Object performTask() throws IOException {
INode targetNode = namesystem.getINode(path);
EncodingStatus status = EntityManager
.find(EncodingStatus.Finder.ByInodeId, targetNode.getId());
if (status.getLostBlocks() == 0) {
status.setStatus(EncodingStatus.Status.ENCODED);
} else {
status.setStatus(EncodingStatus.Status.REPAIR_REQUESTED);
}
status.setStatusModificationTime(System.currentTimeMillis());
EntityManager.update(status);
return null;
}
}.handle();
}
private void checkFixedParity(final String path) throws IOException {
new HopsTransactionalRequestHandler(HDFSOperationType.CHECK_FIXED_PARITY) {
@Override
public void acquireLock(TransactionLocks locks) throws IOException {
LockFactory lf = LockFactory.getInstance();
locks.add(lf.getINodeLock(namesystem.getNameNode(),
TransactionLockTypes.INodeLockType.WRITE,
TransactionLockTypes.INodeResolveType.PATH, path))
.add(lf.getEncodingStatusLock(
TransactionLockTypes.LockType.WRITE, path));
}
@Override
public Object performTask() throws IOException {
INode targetNode = namesystem.getINode(path);
EncodingStatus status = EntityManager.find(
EncodingStatus.Finder.ByParityInodeId, targetNode.getId());
if (status.getLostParityBlocks() == 0) {
status.setParityStatus(EncodingStatus.ParityStatus.HEALTHY);
} else {
status.setParityStatus(EncodingStatus.ParityStatus.REPAIR_REQUESTED);
}
status.setParityStatusModificationTime(System.currentTimeMillis());
EntityManager.update(status);
return null;
}
}.handle();
}
private void scheduleSourceRepairs() throws IOException {
LOG.info("Scheduling repairs");
final int limit = activeRepairLimit - activeRepairs;
if (limit <= 0) {
return;
}
LightWeightRequestHandler findHandler = new LightWeightRequestHandler(
EncodingStatusOperationType.FIND_REQUESTED_REPAIRS) {
@Override
public Object performTask() throws IOException {
EncodingStatusDataAccess<EncodingStatus> dataAccess =
(EncodingStatusDataAccess) HdfsStorageFactory
.getDataAccess(EncodingStatusDataAccess.class);
return dataAccess.findRequestedRepairs(limit);
}
};
Collection<EncodingStatus> requestedRepairs =
(Collection<EncodingStatus>) findHandler.handle();
for (EncodingStatus encodingStatus : requestedRepairs) {
try {
LOG.info("Scheduling source repair for " + encodingStatus);
if (System.currentTimeMillis()
- encodingStatus.getStatusModificationTime() < repairDelay) {
LOG.info("Skipping source repair. Delay not reached: " + repairDelay);
continue;
}
if (encodingStatus.isParityRepairActive()) {
LOG.info("Skipping source repair. Parity repair is active");
continue;
}
String path = namesystem.getPath(encodingStatus.getInodeId());
// Set status before doing something. In case the file is recovered inbetween we don't have an invalid status.
// If starting repair fails somehow then this should be detected by a timeout later.
namesystem.updateEncodingStatus(path,
EncodingStatus.Status.REPAIR_ACTIVE);
LOG.info("Status set to source repair active " + encodingStatus);
blockRepairManager.repairSourceBlocks(
encodingStatus.getEncodingPolicy().getCodec(),
new Path(path),
new Path(parityFolder + "/" + encodingStatus.getParityFileName()));
LOG.info("Scheduled job for source repair " + encodingStatus);
activeRepairs++;
} catch (IOException e) {
LOG.error(StringUtils.stringifyException(e));
}
}
}
private void scheduleParityRepairs() {
LOG.info("Scheduling parity repairs");
final int limit = activeParityRepairLimit - activeParityRepairs;
if (limit <= 0) {
return;
}
LightWeightRequestHandler findHandler = new LightWeightRequestHandler(
EncodingStatusOperationType.FIND_REQUESTED_PARITY_REPAIRS) {
@Override
public Object performTask() throws IOException {
EncodingStatusDataAccess<EncodingStatus> dataAccess =
(EncodingStatusDataAccess) HdfsStorageFactory
.getDataAccess(EncodingStatusDataAccess.class);
return dataAccess.findRequestedParityRepairs(limit);
}
};
try {
Collection<EncodingStatus> requestedRepairs =
(Collection<EncodingStatus>) findHandler.handle();
for (EncodingStatus encodingStatus : requestedRepairs) {
LOG.info("Scheduling parity repair for " + encodingStatus);
if (System.currentTimeMillis() -
encodingStatus.getParityStatusModificationTime() <
parityRepairDelay) {
LOG.info("Skipping parity repair. Delay not reached: " +
parityRepairDelay);
continue;
}
if (encodingStatus.getStatus().equals(EncodingStatus.Status.ENCODED) ==
false) {
// Only repair parity for non-broken source files. Otherwise repair source file first.
LOG.info("Skipping parity repair. Source file not healthy.");
continue;
}
String path = namesystem.getPath(encodingStatus.getInodeId());
// Set status before doing something. In case the file is recovered inbetween we don't have an invalid status.
// If starting repair fails somehow then this should be detected by a timeout later.
namesystem.updateEncodingStatus(path,
EncodingStatus.ParityStatus.REPAIR_ACTIVE);
LOG.info("Status set to parity repair active " + encodingStatus);
blockRepairManager
.repairParityBlocks(encodingStatus.getEncodingPolicy().getCodec(),
new Path(path), new Path(
parityFolder + "/" + encodingStatus.getParityFileName()));
LOG.info("Scheduled job for parity repair " + encodingStatus);
activeRepairs++;
}
} catch (IOException e) {
LOG.error(StringUtils.stringifyException(e));
}
}
private void garbageCollect() throws IOException {
LOG.info("Starting garbage collection");
LightWeightRequestHandler findHandler = new LightWeightRequestHandler(
EncodingStatusOperationType.FIND_DELETED) {
@Override
public Object performTask() throws IOException {
EncodingStatusDataAccess<EncodingStatus> dataAccess =
(EncodingStatusDataAccess) HdfsStorageFactory
.getDataAccess(EncodingStatusDataAccess.class);
return dataAccess.findDeleted(deletionLimit);
}
};
Collection<EncodingStatus> markedAsDeleted =
(Collection<EncodingStatus>) findHandler.handle();
for (EncodingStatus status : markedAsDeleted) {
LOG.info("Trying to collect " + status);
try {
namesystem.deleteWithTransaction(
parityFolder + "/" + status.getParityFileName(), false);
namesystem.removeEncodingStatus(status);
} catch (IOException e) {
LOG.error(StringUtils.stringifyException(e));
}
}
}
private void checkRevoked() throws IOException {
LOG.info("Checking replication for revocations");
LightWeightRequestHandler findHandler = new LightWeightRequestHandler(
EncodingStatusOperationType.FIND_REVOKED) {
@Override
public Object performTask() throws IOException {
EncodingStatusDataAccess<EncodingStatus> dataAccess =
(EncodingStatusDataAccess) HdfsStorageFactory
.getDataAccess(EncodingStatusDataAccess.class);
return dataAccess.findRevoked();
}
};
Collection<EncodingStatus> markedAsRevoked =
(Collection<EncodingStatus>) findHandler.handle();
for (EncodingStatus status : markedAsRevoked) {
LOG.info("Checking replication for revoked status: " + status);
String path = namesystem.getPath(status.getInodeId());
int replication = namesystem.getFileInfo(path, true).getReplication();
LocatedBlocks blocks = namesystem.getBlockLocations(path, 0,
Long.MAX_VALUE, false, true, true);
if (checkReplication(blocks, replication)) {
LOG.info("Revocation successful for " + status);
namesystem.deleteWithTransaction(
parityFolder + "/" + status.getParityFileName(), false);
namesystem.removeEncodingStatus(path, status);
}
}
}
private boolean checkReplication(LocatedBlocks blocks, int replication) {
for (LocatedBlock locatedBlock : blocks.getLocatedBlocks()) {
if (locatedBlock.getLocations().length != replication) {
return false;
}
}
return true;
}
public boolean isParityFile(String path) {
Pattern pattern = Pattern.compile(parityFolder + ".*");
Matcher matcher = pattern.matcher(path);
if (matcher.matches()) {
return true;
}
return false;
}
public static boolean isEnabled() {
return enabled;
}
}