/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.snapshot;
import java.io.InputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.backup.HFileArchiver;
import org.apache.hadoop.hbase.monitoring.MonitoredTask;
import org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher;
import org.apache.hadoop.hbase.io.HFileLink;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
import org.apache.hadoop.hbase.regionserver.HRegion;
import org.apache.hadoop.hbase.regionserver.StoreFile;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.FSUtils;
import org.apache.hadoop.hbase.util.FSVisitor;
import org.apache.hadoop.hbase.util.ModifyRegionUtils;
import org.apache.hadoop.io.IOUtils;
/**
* Helper to Restore/Clone a Snapshot
*
* <p>The helper assumes that a table is already created, and by calling restore()
* the content present in the snapshot will be restored as the new content of the table.
*
* <p>Clone from Snapshot: If the target table is empty, the restore operation
* is just a "clone operation", where the only operations are:
* <ul>
* <li>for each region in the snapshot create a new region
* (note that the region will have a different name, since the encoding contains the table name)
* <li>for each file in the region create a new HFileLink to point to the original file.
* <li>restore the logs, if any
* </ul>
*
* <p>Restore from Snapshot:
* <ul>
* <li>for each region in the table verify which are available in the snapshot and which are not
* <ul>
* <li>if the region is not present in the snapshot, remove it.
* <li>if the region is present in the snapshot
* <ul>
* <li>for each file in the table region verify which are available in the snapshot
* <ul>
* <li>if the hfile is not present in the snapshot, remove it
* <li>if the hfile is present, keep it (nothing to do)
* </ul>
* <li>for each file in the snapshot region but not in the table
* <ul>
* <li>create a new HFileLink that point to the original file
* </ul>
* </ul>
* </ul>
* <li>for each region in the snapshot not present in the current table state
* <ul>
* <li>create a new region and for each file in the region create a new HFileLink
* (This is the same as the clone operation)
* </ul>
* <li>restore the logs, if any
* </ul>
*/
@InterfaceAudience.Private
public class RestoreSnapshotHelper {
private static final Log LOG = LogFactory.getLog(RestoreSnapshotHelper.class);
private final Map<byte[], byte[]> regionsMap =
new TreeMap<byte[], byte[]>(Bytes.BYTES_COMPARATOR);
private final ForeignExceptionDispatcher monitor;
private final MonitoredTask status;
private final SnapshotDescription snapshotDesc;
private final Path snapshotDir;
private final HTableDescriptor tableDesc;
private final Path tableDir;
private final Configuration conf;
private final FileSystem fs;
public RestoreSnapshotHelper(final Configuration conf, final FileSystem fs,
final SnapshotDescription snapshotDescription, final Path snapshotDir,
final HTableDescriptor tableDescriptor, final Path tableDir,
final ForeignExceptionDispatcher monitor, final MonitoredTask status)
{
this.fs = fs;
this.conf = conf;
this.snapshotDesc = snapshotDescription;
this.snapshotDir = snapshotDir;
this.tableDesc = tableDescriptor;
this.tableDir = tableDir;
this.monitor = monitor;
this.status = status;
}
/**
* Restore the on-disk table to a specified snapshot state.
* @return the set of regions touched by the restore operation
*/
public RestoreMetaChanges restoreHdfsRegions() throws IOException {
LOG.debug("starting restore");
Set<String> snapshotRegionNames = SnapshotReferenceUtil.getSnapshotRegionNames(fs, snapshotDir);
if (snapshotRegionNames == null) {
LOG.warn("Nothing to restore. Snapshot " + snapshotDesc + " looks empty");
return null;
}
RestoreMetaChanges metaChanges = new RestoreMetaChanges();
// Identify which region are still available and which not.
// NOTE: we rely upon the region name as: "table name, start key, end key"
List<HRegionInfo> tableRegions = getTableRegions();
if (tableRegions != null) {
monitor.rethrowException();
for (HRegionInfo regionInfo: tableRegions) {
String regionName = regionInfo.getEncodedName();
if (snapshotRegionNames.contains(regionName)) {
LOG.info("region to restore: " + regionName);
snapshotRegionNames.remove(regionName);
metaChanges.addRegionToRestore(regionInfo);
} else {
LOG.info("region to remove: " + regionName);
metaChanges.addRegionToRemove(regionInfo);
}
}
// Restore regions using the snapshot data
monitor.rethrowException();
status.setStatus("Restoring table regions...");
restoreHdfsRegions(metaChanges.getRegionsToRestore());
status.setStatus("Finished restoring all table regions.");
// Remove regions from the current table
monitor.rethrowException();
status.setStatus("Starting to delete excess regions from table");
removeHdfsRegions(metaChanges.getRegionsToRemove());
status.setStatus("Finished deleting excess regions from table.");
}
// Regions to Add: present in the snapshot but not in the current table
if (snapshotRegionNames.size() > 0) {
List<HRegionInfo> regionsToAdd = new LinkedList<HRegionInfo>();
monitor.rethrowException();
for (String regionName: snapshotRegionNames) {
LOG.info("region to add: " + regionName);
Path regionDir = new Path(snapshotDir, regionName);
regionsToAdd.add(HRegion.loadDotRegionInfoFileContent(fs, regionDir));
}
// Create new regions cloning from the snapshot
monitor.rethrowException();
status.setStatus("Cloning regions...");
HRegionInfo[] clonedRegions = cloneHdfsRegions(regionsToAdd);
metaChanges.setNewRegions(clonedRegions);
status.setStatus("Finished cloning regions.");
}
// Restore WALs
monitor.rethrowException();
status.setStatus("Restoring WALs to table...");
restoreWALs();
status.setStatus("Finished restoring WALs to table.");
return metaChanges;
}
/**
* Describe the set of operations needed to update META after restore.
*/
public static class RestoreMetaChanges {
private List<HRegionInfo> regionsToRestore = null;
private List<HRegionInfo> regionsToRemove = null;
private List<HRegionInfo> regionsToAdd = null;
/**
* @return true if there're new regions
*/
public boolean hasRegionsToAdd() {
return this.regionsToAdd != null && this.regionsToAdd.size() > 0;
}
/**
* Returns the list of new regions added during the on-disk restore.
* The caller is responsible to add the regions to META.
* e.g MetaEditor.addRegionsToMeta(...)
* @return the list of regions to add to META
*/
public List<HRegionInfo> getRegionsToAdd() {
return this.regionsToAdd;
}
/**
* @return true if there're regions to restore
*/
public boolean hasRegionsToRestore() {
return this.regionsToRestore != null && this.regionsToRestore.size() > 0;
}
/**
* Returns the list of 'restored regions' during the on-disk restore.
* The caller is responsible to add the regions to META if not present.
* @return the list of regions restored
*/
public List<HRegionInfo> getRegionsToRestore() {
return this.regionsToRestore;
}
/**
* @return true if there're regions to remove
*/
public boolean hasRegionsToRemove() {
return this.regionsToRemove != null && this.regionsToRemove.size() > 0;
}
/**
* Returns the list of regions removed during the on-disk restore.
* The caller is responsible to remove the regions from META.
* e.g. MetaEditor.deleteRegions(...)
* @return the list of regions to remove from META
*/
public List<HRegionInfo> getRegionsToRemove() {
return this.regionsToRemove;
}
void setNewRegions(final HRegionInfo[] hris) {
if (hris != null) {
regionsToAdd = Arrays.asList(hris);
} else {
regionsToAdd = null;
}
}
void addRegionToRemove(final HRegionInfo hri) {
if (regionsToRemove == null) {
regionsToRemove = new LinkedList<HRegionInfo>();
}
regionsToRemove.add(hri);
}
void addRegionToRestore(final HRegionInfo hri) {
if (regionsToRestore == null) {
regionsToRestore = new LinkedList<HRegionInfo>();
}
regionsToRestore.add(hri);
}
}
/**
* Remove specified regions from the file-system, using the archiver.
*/
private void removeHdfsRegions(final List<HRegionInfo> regions) throws IOException {
if (regions != null && regions.size() > 0) {
for (HRegionInfo hri: regions) {
HFileArchiver.archiveRegion(conf, fs, hri);
}
}
}
/**
* Restore specified regions by restoring content to the snapshot state.
*/
private void restoreHdfsRegions(final List<HRegionInfo> regions) throws IOException {
if (regions == null || regions.size() == 0) return;
for (HRegionInfo hri: regions) restoreRegion(hri);
}
/**
* Restore region by removing files not in the snapshot
* and adding the missing ones from the snapshot.
*/
private void restoreRegion(HRegionInfo regionInfo) throws IOException {
Path snapshotRegionDir = new Path(snapshotDir, regionInfo.getEncodedName());
Map<String, List<String>> snapshotFiles =
SnapshotReferenceUtil.getRegionHFileReferences(fs, snapshotRegionDir);
Path regionDir = new Path(tableDir, regionInfo.getEncodedName());
String tableName = tableDesc.getNameAsString();
// Restore families present in the table
for (Path familyDir: FSUtils.getFamilyDirs(fs, regionDir)) {
byte[] family = Bytes.toBytes(familyDir.getName());
Set<String> familyFiles = getTableRegionFamilyFiles(familyDir);
List<String> snapshotFamilyFiles = snapshotFiles.remove(familyDir.getName());
if (snapshotFamilyFiles != null) {
List<String> hfilesToAdd = new LinkedList<String>();
for (String hfileName: snapshotFamilyFiles) {
if (familyFiles.contains(hfileName)) {
// HFile already present
familyFiles.remove(hfileName);
} else {
// HFile missing
hfilesToAdd.add(hfileName);
}
}
// Restore Missing files
for (String hfileName: hfilesToAdd) {
LOG.trace("Adding HFileLink " + hfileName +
" to region=" + regionInfo.getEncodedName() + " table=" + tableName);
restoreStoreFile(familyDir, regionInfo, hfileName);
}
// Remove hfiles not present in the snapshot
for (String hfileName: familyFiles) {
Path hfile = new Path(familyDir, hfileName);
LOG.trace("Removing hfile=" + hfile +
" from region=" + regionInfo.getEncodedName() + " table=" + tableName);
HFileArchiver.archiveStoreFile(fs, regionInfo, conf, tableDir, family, hfile);
}
} else {
// Family doesn't exists in the snapshot
LOG.trace("Removing family=" + Bytes.toString(family) +
" from region=" + regionInfo.getEncodedName() + " table=" + tableName);
HFileArchiver.archiveFamily(fs, conf, regionInfo, tableDir, family);
fs.delete(familyDir, true);
}
}
// Add families not present in the table
for (Map.Entry<String, List<String>> familyEntry: snapshotFiles.entrySet()) {
Path familyDir = new Path(regionDir, familyEntry.getKey());
if (!fs.mkdirs(familyDir)) {
throw new IOException("Unable to create familyDir=" + familyDir);
}
for (String hfileName: familyEntry.getValue()) {
LOG.trace("Adding HFileLink " + hfileName + " to table=" + tableName);
restoreStoreFile(familyDir, regionInfo, hfileName);
}
}
}
/**
* @return The set of files in the specified family directory.
*/
private Set<String> getTableRegionFamilyFiles(final Path familyDir) throws IOException {
Set<String> familyFiles = new HashSet<String>();
FileStatus[] hfiles = FSUtils.listStatus(fs, familyDir);
if (hfiles == null) return familyFiles;
for (FileStatus hfileRef: hfiles) {
String hfileName = hfileRef.getPath().getName();
familyFiles.add(hfileName);
}
return familyFiles;
}
/**
* Clone specified regions. For each region create a new region
* and create a HFileLink for each hfile.
*/
private HRegionInfo[] cloneHdfsRegions(final List<HRegionInfo> regions) throws IOException {
if (regions == null || regions.size() == 0) return null;
final Map<String, HRegionInfo> snapshotRegions =
new HashMap<String, HRegionInfo>(regions.size());
// clone region info (change embedded tableName with the new one)
HRegionInfo[] clonedRegionsInfo = new HRegionInfo[regions.size()];
for (int i = 0; i < clonedRegionsInfo.length; ++i) {
// clone the region info from the snapshot region info
HRegionInfo snapshotRegionInfo = regions.get(i);
clonedRegionsInfo[i] = cloneRegionInfo(snapshotRegionInfo);
// add the region name mapping between snapshot and cloned
String snapshotRegionName = snapshotRegionInfo.getEncodedName();
String clonedRegionName = clonedRegionsInfo[i].getEncodedName();
regionsMap.put(Bytes.toBytes(snapshotRegionName), Bytes.toBytes(clonedRegionName));
LOG.info("clone region=" + snapshotRegionName + " as " + clonedRegionName);
// Add mapping between cloned region name and snapshot region info
snapshotRegions.put(clonedRegionName, snapshotRegionInfo);
}
// create the regions on disk
ModifyRegionUtils.createRegions(conf, tableDir.getParent(),
tableDesc, clonedRegionsInfo, new ModifyRegionUtils.RegionFillTask() {
public void fillRegion(final HRegion region) throws IOException {
cloneRegion(region, snapshotRegions.get(region.getRegionInfo().getEncodedName()));
}
});
return clonedRegionsInfo;
}
/**
* Clone region directory content from the snapshot info.
*
* Each region is encoded with the table name, so the cloned region will have
* a different region name.
*
* Instead of copying the hfiles a HFileLink is created.
*
* @param region {@link HRegion} cloned
* @param snapshotRegionInfo
*/
private void cloneRegion(final HRegion region, final HRegionInfo snapshotRegionInfo)
throws IOException {
final Path snapshotRegionDir = new Path(snapshotDir, snapshotRegionInfo.getEncodedName());
final Path regionDir = new Path(tableDir, region.getRegionInfo().getEncodedName());
final String tableName = tableDesc.getNameAsString();
SnapshotReferenceUtil.visitRegionStoreFiles(fs, snapshotRegionDir,
new FSVisitor.StoreFileVisitor() {
public void storeFile (final String region, final String family, final String hfile)
throws IOException {
LOG.info("Adding HFileLink " + hfile + " to table=" + tableName);
Path familyDir = new Path(regionDir, family);
restoreStoreFile(familyDir, snapshotRegionInfo, hfile);
}
});
}
/**
* Create a new {@link HFileLink} to reference the store file.
* <p>The store file in the snapshot can be a simple hfile, an HFileLink or a reference.
* <ul>
* <li>hfile: abc -> table=region-abc
* <li>reference: abc.1234 -> table=region-abc.1234
* <li>hfilelink: table=region-hfile -> table=region-hfile
* </ul>
* @param familyDir destination directory for the store file
* @param regionInfo destination region info for the table
* @param hfileName store file name (can be a Reference, HFileLink or simple HFile)
*/
private void restoreStoreFile(final Path familyDir, final HRegionInfo regionInfo,
final String hfileName) throws IOException {
if (HFileLink.isHFileLink(hfileName)) {
HFileLink.createFromHFileLink(conf, fs, familyDir, hfileName);
} else if (StoreFile.isReference(hfileName)) {
restoreReferenceFile(familyDir, regionInfo, hfileName);
} else {
HFileLink.create(conf, fs, familyDir, regionInfo, hfileName);
}
}
/**
* Create a new {@link Reference} as copy of the source one.
* <p><blockquote><pre>
* The source table looks like:
* 1234/abc (original file)
* 5678/abc.1234 (reference file)
*
* After the clone operation looks like:
* wxyz/table=1234-abc
* stuv/table=1234-abc.wxyz
*
* NOTE that the region name in the clone changes (md5 of regioninfo)
* and the reference should reflect that change.
* </pre></blockquote>
* @param familyDir destination directory for the store file
* @param regionInfo destination region info for the table
* @param hfileName reference file name
*/
private void restoreReferenceFile(final Path familyDir, final HRegionInfo regionInfo,
final String hfileName) throws IOException {
// Extract the referred information (hfile name and parent region)
String tableName = snapshotDesc.getTable();
Path refPath = StoreFile.getReferredToFile(new Path(new Path(new Path(tableName,
regionInfo.getEncodedName()), familyDir.getName()), hfileName));
String snapshotRegionName = refPath.getParent().getParent().getName();
String fileName = refPath.getName();
// The new reference should have the cloned region name as parent, if it is a clone.
String clonedRegionName = Bytes.toString(regionsMap.get(Bytes.toBytes(snapshotRegionName)));
if (clonedRegionName == null) clonedRegionName = snapshotRegionName;
// The output file should be a reference link table=snapshotRegion-fileName.clonedRegionName
String refLink = fileName;
if (!HFileLink.isHFileLink(fileName)) {
refLink = HFileLink.createHFileLinkName(tableName, snapshotRegionName, fileName);
}
Path outPath = new Path(familyDir, refLink + '.' + clonedRegionName);
// Create the new reference
Path linkPath = new Path(familyDir,
HFileLink.createHFileLinkName(tableName, regionInfo.getEncodedName(), hfileName));
InputStream in = new HFileLink(conf, linkPath).open(fs);
OutputStream out = fs.create(outPath);
IOUtils.copyBytes(in, out, conf);
}
/**
* Create a new {@link HRegionInfo} from the snapshot region info.
* Keep the same startKey, endKey, regionId and split information but change
* the table name.
*
* @param snapshotRegionInfo Info for region to clone.
* @return the new HRegion instance
*/
public HRegionInfo cloneRegionInfo(final HRegionInfo snapshotRegionInfo) {
return new HRegionInfo(tableDesc.getName(),
snapshotRegionInfo.getStartKey(), snapshotRegionInfo.getEndKey(),
snapshotRegionInfo.isSplit(), snapshotRegionInfo.getRegionId());
}
/**
* Restore snapshot WALs.
*
* Global Snapshot keep a reference to region servers logs present during the snapshot.
* (/hbase/.snapshot/snapshotName/.logs/hostName/logName)
*
* Since each log contains different tables data, logs must be split to
* extract the table that we are interested in.
*/
private void restoreWALs() throws IOException {
final SnapshotLogSplitter logSplitter = new SnapshotLogSplitter(conf, fs, tableDir,
Bytes.toBytes(snapshotDesc.getTable()), regionsMap);
try {
// Recover.Edits
SnapshotReferenceUtil.visitRecoveredEdits(fs, snapshotDir,
new FSVisitor.RecoveredEditsVisitor() {
public void recoveredEdits (final String region, final String logfile) throws IOException {
Path path = SnapshotReferenceUtil.getRecoveredEdits(snapshotDir, region, logfile);
logSplitter.splitRecoveredEdit(path);
}
});
// Region Server Logs
SnapshotReferenceUtil.visitLogFiles(fs, snapshotDir, new FSVisitor.LogFileVisitor() {
public void logFile (final String server, final String logfile) throws IOException {
logSplitter.splitLog(server, logfile);
}
});
} finally {
logSplitter.close();
}
}
/**
* @return the set of the regions contained in the table
*/
private List<HRegionInfo> getTableRegions() throws IOException {
LOG.debug("get table regions: " + tableDir);
FileStatus[] regionDirs = FSUtils.listStatus(fs, tableDir, new FSUtils.RegionDirFilter(fs));
if (regionDirs == null) return null;
List<HRegionInfo> regions = new LinkedList<HRegionInfo>();
for (FileStatus regionDir: regionDirs) {
HRegionInfo hri = HRegion.loadDotRegionInfoFileContent(fs, regionDir.getPath());
regions.add(hri);
}
LOG.debug("found " + regions.size() + " regions for table=" + tableDesc.getNameAsString());
return regions;
}
/**
* Create a new table descriptor cloning the snapshot table schema.
*
* @param snapshotTableDescriptor
* @param tableName
* @return cloned table descriptor
* @throws IOException
*/
public static HTableDescriptor cloneTableSchema(final HTableDescriptor snapshotTableDescriptor,
final byte[] tableName) throws IOException {
HTableDescriptor htd = new HTableDescriptor(tableName);
for (HColumnDescriptor hcd: snapshotTableDescriptor.getColumnFamilies()) {
htd.addFamily(hcd);
}
for (Map.Entry<ImmutableBytesWritable, ImmutableBytesWritable> e:
snapshotTableDescriptor.getValues().entrySet()) {
htd.setValue(e.getKey(), e.getValue());
}
return htd;
}
}