/** * Copyright 2010 The Apache Software Foundation * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hbase.master; import java.io.IOException; import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Set; import java.util.UUID; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantLock; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.hbase.HBaseFileSystem; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.InvalidFamilyOperationException; import org.apache.hadoop.hbase.RemoteExceptionHandler; import org.apache.hadoop.hbase.Server; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.backup.HFileArchiver; import org.apache.hadoop.hbase.master.metrics.MasterMetrics; import org.apache.hadoop.hbase.regionserver.HRegion; import org.apache.hadoop.hbase.regionserver.wal.HLog; import org.apache.hadoop.hbase.regionserver.wal.HLogSplitter; import org.apache.hadoop.hbase.regionserver.wal.OrphanHLogAfterSplitException; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; import org.apache.hadoop.hbase.util.FSTableDescriptors; import org.apache.hadoop.hbase.util.FSUtils; /** * This class abstracts a bunch of operations the HMaster needs to interact with * the underlying file system, including splitting log files, checking file * system status, etc. */ public class MasterFileSystem { private static final Log LOG = LogFactory.getLog(MasterFileSystem.class.getName()); // HBase configuration Configuration conf; // master status Server master; // metrics for master MasterMetrics metrics; // Persisted unique cluster ID private String clusterId; // Keep around for convenience. private final FileSystem fs; // Is the fileystem ok? private volatile boolean fsOk = true; // The Path to the old logs dir private final Path oldLogDir; // root hbase directory on the FS private final Path rootdir; // hbase temp directory used for table construction and deletion private final Path tempdir; // create the split log lock final Lock splitLogLock = new ReentrantLock(); final boolean distributedLogSplitting; final SplitLogManager splitLogManager; private final MasterServices services; private final static PathFilter META_FILTER = new PathFilter() { public boolean accept(Path p) { return HLog.isMetaFile(p); } }; private final static PathFilter NON_META_FILTER = new PathFilter() { public boolean accept(Path p) { return !HLog.isMetaFile(p); } }; public MasterFileSystem(Server master, MasterServices services, MasterMetrics metrics, boolean masterRecovery) throws IOException { this.conf = master.getConfiguration(); this.master = master; this.services = services; this.metrics = metrics; // Set filesystem to be that of this.rootdir else we get complaints about // mismatched filesystems if hbase.rootdir is hdfs and fs.defaultFS is // default localfs. Presumption is that rootdir is fully-qualified before // we get to here with appropriate fs scheme. this.rootdir = FSUtils.getRootDir(conf); this.tempdir = new Path(this.rootdir, HConstants.HBASE_TEMP_DIRECTORY); // Cover both bases, the old way of setting default fs and the new. // We're supposed to run on 0.20 and 0.21 anyways. this.fs = this.rootdir.getFileSystem(conf); String fsUri = this.fs.getUri().toString(); conf.set("fs.default.name", fsUri); conf.set("fs.defaultFS", fsUri); // make sure the fs has the same conf fs.setConf(conf); this.distributedLogSplitting = conf.getBoolean("hbase.master.distributed.log.splitting", true); if (this.distributedLogSplitting) { this.splitLogManager = new SplitLogManager(master.getZooKeeper(), master.getConfiguration(), master, this.services, master.getServerName().toString()); this.splitLogManager.finishInitialization(masterRecovery); } else { this.splitLogManager = null; } // setup the filesystem variable // set up the archived logs path this.oldLogDir = createInitialFileSystemLayout(); } /** * Create initial layout in filesystem. * <ol> * <li>Check if the root region exists and is readable, if not create it. * Create hbase.version and the -ROOT- directory if not one. * </li> * <li>Create a log archive directory for RS to put archived logs</li> * </ol> * Idempotent. */ private Path createInitialFileSystemLayout() throws IOException { // check if the root directory exists checkRootDir(this.rootdir, conf, this.fs); // check if temp directory exists and clean it checkTempDir(this.tempdir, conf, this.fs); Path oldLogDir = new Path(this.rootdir, HConstants.HREGION_OLDLOGDIR_NAME); // Make sure the region servers can archive their old logs if(!this.fs.exists(oldLogDir)) { HBaseFileSystem.makeDirOnFileSystem(fs, oldLogDir); } return oldLogDir; } public FileSystem getFileSystem() { return this.fs; } /** * Get the directory where old logs go * @return the dir */ public Path getOldLogDir() { return this.oldLogDir; } /** * Checks to see if the file system is still accessible. * If not, sets closed * @return false if file system is not available */ public boolean checkFileSystem() { if (this.fsOk) { try { FSUtils.checkFileSystemAvailable(this.fs); FSUtils.checkDfsSafeMode(this.conf); } catch (IOException e) { master.abort("Shutting down HBase cluster: file system not available", e); this.fsOk = false; } } return this.fsOk; } /** * @return HBase root dir. */ public Path getRootDir() { return this.rootdir; } /** * @return HBase temp dir. */ public Path getTempDir() { return this.tempdir; } /** * @return The unique identifier generated for this cluster */ public String getClusterId() { return clusterId; } /** * Inspect the log directory to find dead servers which need log splitting */ Set<ServerName> getFailedServersFromLogFolders() { boolean retrySplitting = !conf.getBoolean("hbase.hlog.split.skip.errors", HLog.SPLIT_SKIP_ERRORS_DEFAULT); Set<ServerName> serverNames = new HashSet<ServerName>(); Path logsDirPath = new Path(this.rootdir, HConstants.HREGION_LOGDIR_NAME); do { if (master.isStopped()) { LOG.warn("Master stopped while trying to get failed servers."); break; } try { if (!this.fs.exists(logsDirPath)) return serverNames; FileStatus[] logFolders = FSUtils.listStatus(this.fs, logsDirPath, null); // Get online servers after getting log folders to avoid log folder deletion of newly // checked in region servers . see HBASE-5916 Set<ServerName> onlineServers = ((HMaster) master).getServerManager().getOnlineServers().keySet(); if (logFolders == null || logFolders.length == 0) { LOG.debug("No log files to split, proceeding..."); return serverNames; } for (FileStatus status : logFolders) { String sn = status.getPath().getName(); // truncate splitting suffix if present (for ServerName parsing) if (sn.endsWith(HLog.SPLITTING_EXT)) { sn = sn.substring(0, sn.length() - HLog.SPLITTING_EXT.length()); } ServerName serverName = ServerName.parseServerName(sn); if (!onlineServers.contains(serverName)) { LOG.info("Log folder " + status.getPath() + " doesn't belong " + "to a known region server, splitting"); serverNames.add(serverName); } else { LOG.info("Log folder " + status.getPath() + " belongs to an existing region server"); } } retrySplitting = false; } catch (IOException ioe) { LOG.warn("Failed getting failed servers to be recovered.", ioe); if (!checkFileSystem()) { LOG.warn("Bad Filesystem, exiting"); Runtime.getRuntime().halt(1); } try { if (retrySplitting) { Thread.sleep(conf.getInt("hbase.hlog.split.failure.retry.interval", 30 * 1000)); } } catch (InterruptedException e) { LOG.warn("Interrupted, aborting since cannot return w/o splitting"); Thread.currentThread().interrupt(); retrySplitting = false; Runtime.getRuntime().halt(1); } } } while (retrySplitting); return serverNames; } public void splitLog(final ServerName serverName) throws IOException { List<ServerName> serverNames = new ArrayList<ServerName>(); serverNames.add(serverName); splitLog(serverNames); } public void splitAllLogs(final ServerName serverName) throws IOException { List<ServerName> serverNames = new ArrayList<ServerName>(); serverNames.add(serverName); splitAllLogs(serverNames); } /** * Specialized method to handle the splitting for meta HLog * @param serverName * @throws IOException */ public void splitMetaLog(final ServerName serverName) throws IOException { long splitTime = 0, splitLogSize = 0; List<ServerName> serverNames = new ArrayList<ServerName>(); serverNames.add(serverName); List<Path> logDirs = getLogDirs(serverNames); if (logDirs.isEmpty()) { LOG.info("No meta logs to split"); return; } splitLogManager.handleDeadWorkers(serverNames); splitTime = EnvironmentEdgeManager.currentTimeMillis(); splitLogSize = splitLogManager.splitLogDistributed(logDirs, META_FILTER); splitTime = EnvironmentEdgeManager.currentTimeMillis() - splitTime; if (this.metrics != null) { this.metrics.addSplit(splitTime, splitLogSize); } } private List<Path> getLogDirs(final List<ServerName> serverNames) throws IOException { List<Path> logDirs = new ArrayList<Path>(); for(ServerName serverName: serverNames){ Path logDir = new Path(this.rootdir, HLog.getHLogDirectoryName(serverName.toString())); Path splitDir = logDir.suffix(HLog.SPLITTING_EXT); // rename the directory so a rogue RS doesn't create more HLogs if (fs.exists(logDir)) { if (!HBaseFileSystem.renameDirForFileSystem(fs, logDir, splitDir)) { throw new IOException("Failed fs.rename for log split: " + logDir); } logDir = splitDir; LOG.debug("Renamed region directory: " + splitDir); } else if (!fs.exists(splitDir)) { LOG.info("Log dir for server " + serverName + " does not exist"); continue; } logDirs.add(splitDir); } return logDirs; } public void splitLog(final List<ServerName> serverNames) throws IOException { splitLog(serverNames, NON_META_FILTER); } public void splitAllLogs(final List<ServerName> serverNames) throws IOException { splitLog(serverNames, null); //no filter } /** * This method is the base split method that splits HLog files matching a filter. * Callers should pass the appropriate filter for meta and non-meta HLogs. * @param serverNames * @param filter * @throws IOException */ public void splitLog(final List<ServerName> serverNames, PathFilter filter) throws IOException { long splitTime = 0, splitLogSize = 0; List<Path> logDirs = getLogDirs(serverNames); if (logDirs.isEmpty()) { LOG.info("No logs to split"); return; } boolean lockAcquired = false; if (distributedLogSplitting) { try { if (!this.services.isServerShutdownHandlerEnabled()) { // process one log splitting task at one time before SSH is enabled. // because ROOT SSH and HMaster#assignMeta could both log split a same server this.splitLogLock.lock(); lockAcquired = true; } splitLogManager.handleDeadWorkers(serverNames); splitTime = EnvironmentEdgeManager.currentTimeMillis(); splitLogSize = splitLogManager.splitLogDistributed(logDirs, filter); splitTime = EnvironmentEdgeManager.currentTimeMillis() - splitTime; } finally { if (lockAcquired) { this.splitLogLock.unlock(); } } } else { for(Path logDir: logDirs){ // splitLogLock ensures that dead region servers' logs are processed // one at a time this.splitLogLock.lock(); try { HLogSplitter splitter = HLogSplitter.createLogSplitter( conf, rootdir, logDir, oldLogDir, this.fs); try { // If FS is in safe mode, just wait till out of it. FSUtils.waitOnSafeMode(conf, conf.getInt(HConstants.THREAD_WAKE_FREQUENCY, 1000)); splitter.splitLog(); } catch (OrphanHLogAfterSplitException e) { LOG.warn("Retrying splitting because of:", e); //An HLogSplitter instance can only be used once. Get new instance. splitter = HLogSplitter.createLogSplitter(conf, rootdir, logDir, oldLogDir, this.fs); splitter.splitLog(); } splitTime = splitter.getTime(); splitLogSize = splitter.getSize(); } finally { this.splitLogLock.unlock(); } } } if (this.metrics != null) { this.metrics.addSplit(splitTime, splitLogSize); } } /** * Get the rootdir. Make sure its wholesome and exists before returning. * @param rd * @param conf * @param fs * @return hbase.rootdir (after checks for existence and bootstrapping if * needed populating the directory with necessary bootup files). * @throws IOException */ private Path checkRootDir(final Path rd, final Configuration c, final FileSystem fs) throws IOException { // If FS is in safe mode wait till out of it. FSUtils.waitOnSafeMode(c, c.getInt(HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000)); // Filesystem is good. Go ahead and check for hbase.rootdir. try { if (!fs.exists(rd)) { HBaseFileSystem.makeDirOnFileSystem(fs, rd); // DFS leaves safe mode with 0 DNs when there are 0 blocks. // We used to handle this by checking the current DN count and waiting until // it is nonzero. With security, the check for datanode count doesn't work -- // it is a privileged op. So instead we adopt the strategy of the jobtracker // and simply retry file creation during bootstrap indefinitely. As soon as // there is one datanode it will succeed. Permission problems should have // already been caught by mkdirs above. FSUtils.setVersion(fs, rd, c.getInt(HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000), c.getInt(HConstants.VERSION_FILE_WRITE_ATTEMPTS, HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS)); } else { if (!fs.isDirectory(rd)) { throw new IllegalArgumentException(rd.toString() + " is not a directory"); } // as above FSUtils.checkVersion(fs, rd, true, c.getInt(HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000), c.getInt(HConstants.VERSION_FILE_WRITE_ATTEMPTS, HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS)); } } catch (IllegalArgumentException iae) { LOG.fatal("Please fix invalid configuration for " + HConstants.HBASE_DIR + " " + rd.toString(), iae); throw iae; } // Make sure cluster ID exists if (!FSUtils.checkClusterIdExists(fs, rd, c.getInt( HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000))) { FSUtils.setClusterId(fs, rd, UUID.randomUUID().toString(), c.getInt( HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000)); } clusterId = FSUtils.getClusterId(fs, rd); // Make sure the root region directory exists! if (!FSUtils.rootRegionExists(fs, rd)) { bootstrap(rd, c); } createRootTableInfo(rd); return rd; } private void createRootTableInfo(Path rd) throws IOException { // Create ROOT tableInfo if required. if (!FSTableDescriptors.isTableInfoExists(fs, rd, Bytes.toString(HRegionInfo.ROOT_REGIONINFO.getTableName()))) { FSTableDescriptors.createTableDescriptor(HTableDescriptor.ROOT_TABLEDESC, this.conf); } } /** * Make sure the hbase temp directory exists and is empty. * NOTE that this method is only executed once just after the master becomes the active one. */ private void checkTempDir(final Path tmpdir, final Configuration c, final FileSystem fs) throws IOException { // If the temp directory exists, clear the content (left over, from the previous run) if (fs.exists(tmpdir)) { // Archive table in temp, maybe left over from failed deletion, // if not the cleaner will take care of them. for (Path tabledir: FSUtils.getTableDirs(fs, tmpdir)) { for (Path regiondir: FSUtils.getRegionDirs(fs, tabledir)) { HFileArchiver.archiveRegion(fs, this.rootdir, tabledir, regiondir); } } if (!HBaseFileSystem.deleteDirFromFileSystem(fs, tmpdir)) { throw new IOException("Unable to clean the temp directory: " + tmpdir); } } // Create the temp directory if (!HBaseFileSystem.makeDirOnFileSystem(fs, tmpdir)) { throw new IOException("HBase temp directory '" + tmpdir + "' creation failure."); } } private static void bootstrap(final Path rd, final Configuration c) throws IOException { LOG.info("BOOTSTRAP: creating ROOT and first META regions"); try { // Bootstrapping, make sure blockcache is off. Else, one will be // created here in bootstap and it'll need to be cleaned up. Better to // not make it in first place. Turn off block caching for bootstrap. // Enable after. HRegionInfo rootHRI = new HRegionInfo(HRegionInfo.ROOT_REGIONINFO); setInfoFamilyCachingForRoot(false); HRegionInfo metaHRI = new HRegionInfo(HRegionInfo.FIRST_META_REGIONINFO); setInfoFamilyCachingForMeta(false); HRegion root = HRegion.createHRegion(rootHRI, rd, c, HTableDescriptor.ROOT_TABLEDESC); HRegion meta = HRegion.createHRegion(metaHRI, rd, c, HTableDescriptor.META_TABLEDESC); setInfoFamilyCachingForRoot(true); setInfoFamilyCachingForMeta(true); // Add first region from the META table to the ROOT region. HRegion.addRegionToMETA(root, meta); root.close(); root.getLog().closeAndDelete(); meta.close(); meta.getLog().closeAndDelete(); } catch (IOException e) { e = RemoteExceptionHandler.checkIOException(e); LOG.error("bootstrap", e); throw e; } } /** * Enable in-memory caching for -ROOT- */ public static void setInfoFamilyCachingForRoot(final boolean b) { for (HColumnDescriptor hcd: HTableDescriptor.ROOT_TABLEDESC.getColumnFamilies()) { if (Bytes.equals(hcd.getName(), HConstants.CATALOG_FAMILY)) { hcd.setBlockCacheEnabled(b); hcd.setInMemory(b); } } } /** * Enable in memory caching for .META. */ public static void setInfoFamilyCachingForMeta(final boolean b) { for (HColumnDescriptor hcd: HTableDescriptor.META_TABLEDESC.getColumnFamilies()) { if (Bytes.equals(hcd.getName(), HConstants.CATALOG_FAMILY)) { hcd.setBlockCacheEnabled(b); hcd.setInMemory(b); } } } public void deleteRegion(HRegionInfo region) throws IOException { HFileArchiver.archiveRegion(conf, fs, region); } public void deleteTable(byte[] tableName) throws IOException { HBaseFileSystem.deleteDirFromFileSystem(fs, new Path(rootdir, Bytes.toString(tableName))); } /** * Move the specified file/directory to the hbase temp directory. * @param path The path of the file/directory to move * @return The temp location of the file/directory moved * @throws IOException in case of file-system failure */ public Path moveToTemp(final Path path) throws IOException { Path tempPath = new Path(this.tempdir, path.getName()); // Ensure temp exists if (!fs.exists(tempdir) && !HBaseFileSystem.makeDirOnFileSystem(fs, tempdir)) { throw new IOException("HBase temp directory '" + tempdir + "' creation failure."); } if (!HBaseFileSystem.renameDirForFileSystem(fs, path, tempPath)) { throw new IOException("Unable to move '" + path + "' to temp '" + tempPath + "'"); } return tempPath; } /** * Move the specified table to the hbase temp directory * @param tableName Table name to move * @return The temp location of the table moved * @throws IOException in case of file-system failure */ public Path moveTableToTemp(byte[] tableName) throws IOException { return moveToTemp(HTableDescriptor.getTableDir(this.rootdir, tableName)); } public void updateRegionInfo(HRegionInfo region) { // TODO implement this. i think this is currently broken in trunk i don't // see this getting updated. // @see HRegion.checkRegioninfoOnFilesystem() } public void deleteFamilyFromFS(HRegionInfo region, byte[] familyName) throws IOException { // archive family store files Path tableDir = new Path(rootdir, region.getTableNameAsString()); HFileArchiver.archiveFamily(fs, conf, region, tableDir, familyName); // delete the family folder Path familyDir = new Path(tableDir, new Path(region.getEncodedName(), Bytes.toString(familyName))); if (!HBaseFileSystem.deleteDirFromFileSystem(fs, familyDir)) { throw new IOException("Could not delete family " + Bytes.toString(familyName) + " from FileSystem for region " + region.getRegionNameAsString() + "(" + region.getEncodedName() + ")"); } } public void stop() { if (splitLogManager != null) { this.splitLogManager.stop(); } } /** * Create new HTableDescriptor in HDFS. * * @param htableDescriptor */ public void createTableDescriptor(HTableDescriptor htableDescriptor) throws IOException { FSTableDescriptors.createTableDescriptor(htableDescriptor, conf); } /** * Delete column of a table * @param tableName * @param familyName * @return Modified HTableDescriptor with requested column deleted. * @throws IOException */ public HTableDescriptor deleteColumn(byte[] tableName, byte[] familyName) throws IOException { LOG.info("DeleteColumn. Table = " + Bytes.toString(tableName) + " family = " + Bytes.toString(familyName)); HTableDescriptor htd = this.services.getTableDescriptors().get(tableName); htd.removeFamily(familyName); this.services.getTableDescriptors().add(htd); return htd; } /** * Modify Column of a table * @param tableName * @param hcd HColumnDesciptor * @return Modified HTableDescriptor with the column modified. * @throws IOException */ public HTableDescriptor modifyColumn(byte[] tableName, HColumnDescriptor hcd) throws IOException { LOG.info("AddModifyColumn. Table = " + Bytes.toString(tableName) + " HCD = " + hcd.toString()); HTableDescriptor htd = this.services.getTableDescriptors().get(tableName); byte [] familyName = hcd.getName(); if(!htd.hasFamily(familyName)) { throw new InvalidFamilyOperationException("Family '" + Bytes.toString(familyName) + "' doesn't exists so cannot be modified"); } htd.addFamily(hcd); this.services.getTableDescriptors().add(htd); return htd; } /** * Add column to a table * @param tableName * @param hcd * @return Modified HTableDescriptor with new column added. * @throws IOException */ public HTableDescriptor addColumn(byte[] tableName, HColumnDescriptor hcd) throws IOException { LOG.info("AddColumn. Table = " + Bytes.toString(tableName) + " HCD = " + hcd.toString()); HTableDescriptor htd = this.services.getTableDescriptors().get(tableName); if (htd == null) { throw new InvalidFamilyOperationException("Family '" + hcd.getNameAsString() + "' cannot be modified as HTD is null"); } htd.addFamily(hcd); this.services.getTableDescriptors().add(htd); return htd; } }