/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.master.snapshot;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.concurrent.CancellationException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.catalog.MetaReader;
import org.apache.hadoop.hbase.errorhandling.ForeignException;
import org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher;
import org.apache.hadoop.hbase.errorhandling.ForeignExceptionSnare;
import org.apache.hadoop.hbase.executor.EventHandler;
import org.apache.hadoop.hbase.master.MasterServices;
import org.apache.hadoop.hbase.master.SnapshotSentinel;
import org.apache.hadoop.hbase.master.metrics.MasterMetrics;
import org.apache.hadoop.hbase.monitoring.MonitoredTask;
import org.apache.hadoop.hbase.monitoring.TaskMonitor;
import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
import org.apache.hadoop.hbase.snapshot.SnapshotCreationException;
import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
import org.apache.hadoop.hbase.snapshot.TableInfoCopyTask;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Pair;
import org.apache.zookeeper.KeeperException;
/**
* A handler for taking snapshots from the master.
*
* This is not a subclass of TableEventHandler because using that would incur an extra META scan.
*
* The {@link #snapshotRegions(List)} call should get implemented for each snapshot flavor.
*/
@InterfaceAudience.Private
public abstract class TakeSnapshotHandler extends EventHandler implements SnapshotSentinel,
ForeignExceptionSnare {
private static final Log LOG = LogFactory.getLog(TakeSnapshotHandler.class);
private volatile boolean finished;
// none of these should ever be null
protected final MasterServices master;
protected final MasterMetrics metricsMaster;
protected final SnapshotDescription snapshot;
protected final Configuration conf;
protected final FileSystem fs;
protected final Path rootDir;
private final Path snapshotDir;
protected final Path workingDir;
private final MasterSnapshotVerifier verifier;
protected final ForeignExceptionDispatcher monitor;
protected final MonitoredTask status;
/**
* @param snapshot descriptor of the snapshot to take
* @param masterServices master services provider
* @throws IOException on unexpected error
*/
public TakeSnapshotHandler(SnapshotDescription snapshot, final MasterServices masterServices,
final MasterMetrics metricsMaster) {
super(masterServices, EventType.C_M_SNAPSHOT_TABLE);
assert snapshot != null : "SnapshotDescription must not be nul1";
assert masterServices != null : "MasterServices must not be nul1";
this.master = masterServices;
this.metricsMaster = metricsMaster;
this.snapshot = snapshot;
this.conf = this.master.getConfiguration();
this.fs = this.master.getMasterFileSystem().getFileSystem();
this.rootDir = this.master.getMasterFileSystem().getRootDir();
this.snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshot, rootDir);
this.workingDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(snapshot, rootDir);
this.monitor = new ForeignExceptionDispatcher();
// prepare the verify
this.verifier = new MasterSnapshotVerifier(masterServices, snapshot, rootDir);
// update the running tasks
this.status = TaskMonitor.get().createStatus(
"Taking " + snapshot.getType() + " snapshot on table: " + snapshot.getTable());
}
private HTableDescriptor loadTableDescriptor()
throws FileNotFoundException, IOException {
final String name = snapshot.getTable();
HTableDescriptor htd =
this.master.getTableDescriptors().get(name);
if (htd == null) {
throw new IOException("HTableDescriptor missing for " + name);
}
return htd;
}
public TakeSnapshotHandler prepare() throws Exception {
loadTableDescriptor(); // check that .tableinfo is present
return this;
}
/**
* Execute the core common portions of taking a snapshot. The {@link #snapshotRegions(List)}
* call should get implemented for each snapshot flavor.
*/
@Override
public void process() {
String msg = "Running " + snapshot.getType() + " table snapshot " + snapshot.getName() + " "
+ eventType + " on table " + snapshot.getTable();
LOG.info(msg);
status.setStatus(msg);
try {
// If regions move after this meta scan, the region specific snapshot should fail, triggering
// an external exception that gets captured here.
// write down the snapshot info in the working directory
SnapshotDescriptionUtils.writeSnapshotInfo(snapshot, workingDir, this.fs);
new TableInfoCopyTask(monitor, snapshot, fs, rootDir).call();
monitor.rethrowException();
List<Pair<HRegionInfo, ServerName>> regionsAndLocations =
MetaReader.getTableRegionsAndLocations(this.server.getCatalogTracker(),
Bytes.toBytes(snapshot.getTable()), true);
// run the snapshot
snapshotRegions(regionsAndLocations);
// extract each pair to separate lists
Set<String> serverNames = new HashSet<String>();
for (Pair<HRegionInfo, ServerName> p : regionsAndLocations) {
serverNames.add(p.getSecond().toString());
}
// verify the snapshot is valid
status.setStatus("Verifying snapshot: " + snapshot.getName());
verifier.verifySnapshot(this.workingDir, serverNames);
// complete the snapshot, atomically moving from tmp to .snapshot dir.
completeSnapshot(this.snapshotDir, this.workingDir, this.fs);
status.markComplete("Snapshot " + snapshot.getName() + " of table " + snapshot.getTable()
+ " completed");
metricsMaster.addSnapshot(status.getCompletionTimestamp() - status.getStartTime());
} catch (Exception e) {
status.abort("Failed to complete snapshot " + snapshot.getName() + " on table " +
snapshot.getTable() + " because " + e.getMessage());
String reason = "Failed taking snapshot " + SnapshotDescriptionUtils.toString(snapshot)
+ " due to exception:" + e.getMessage();
LOG.error(reason, e);
ForeignException ee = new ForeignException(reason, e);
monitor.receive(ee);
// need to mark this completed to close off and allow cleanup to happen.
cancel("Failed to take snapshot '" + SnapshotDescriptionUtils.toString(snapshot)
+ "' due to exception");
} finally {
LOG.debug("Launching cleanup of working dir:" + workingDir);
try {
// if the working dir is still present, the snapshot has failed. it is present we delete
// it.
if (fs.exists(workingDir) && !this.fs.delete(workingDir, true)) {
LOG.error("Couldn't delete snapshot working directory:" + workingDir);
}
} catch (IOException e) {
LOG.error("Couldn't delete snapshot working directory:" + workingDir);
}
}
}
/**
* Reset the manager to allow another snapshot to proceed
*
* @param snapshotDir final path of the snapshot
* @param workingDir directory where the in progress snapshot was built
* @param fs {@link FileSystem} where the snapshot was built
* @throws SnapshotCreationException if the snapshot could not be moved
* @throws IOException the filesystem could not be reached
*/
public void completeSnapshot(Path snapshotDir, Path workingDir, FileSystem fs)
throws SnapshotCreationException, IOException {
LOG.debug("Sentinel is done, just moving the snapshot from " + workingDir + " to "
+ snapshotDir);
if (!fs.rename(workingDir, snapshotDir)) {
throw new SnapshotCreationException("Failed to move working directory(" + workingDir
+ ") to completed directory(" + snapshotDir + ").");
}
finished = true;
}
/**
* Snapshot the specified regions
*/
protected abstract void snapshotRegions(List<Pair<HRegionInfo, ServerName>> regions)
throws IOException, KeeperException;
@Override
public void cancel(String why) {
if (finished) return;
this.finished = true;
LOG.info("Stop taking snapshot=" + SnapshotDescriptionUtils.toString(snapshot) + " because: "
+ why);
CancellationException ce = new CancellationException(why);
monitor.receive(new ForeignException(master.getServerName().toString(), ce));
}
@Override
public boolean isFinished() {
return finished;
}
@Override
public long getCompletionTimestamp() {
return this.status.getCompletionTimestamp();
}
@Override
public SnapshotDescription getSnapshot() {
return snapshot;
}
@Override
public ForeignException getExceptionIfFailed() {
return monitor.getException();
}
@Override
public void rethrowExceptionIfFailed() throws ForeignException {
monitor.rethrowException();
}
@Override
public void rethrowException() throws ForeignException {
monitor.rethrowException();
}
@Override
public boolean hasException() {
return monitor.hasException();
}
@Override
public ForeignException getException() {
return monitor.getException();
}
}