/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.cassandra.db; import static com.google.common.collect.Sets.newHashSet; import java.io.File; import java.io.FileFilter; import java.io.IOError; import java.io.IOException; import java.nio.file.FileVisitResult; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.SimpleFileVisitor; import java.nio.file.attribute.BasicFileAttributes; import java.util.*; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; import com.google.common.base.Predicate; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import com.google.common.collect.ImmutableSet.Builder; import com.google.common.collect.Iterables; import com.google.common.collect.Iterators; import com.google.common.primitives.Longs; import com.google.common.util.concurrent.Uninterruptibles; import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.cassandra.config.*; import org.apache.cassandra.io.FSError; import org.apache.cassandra.io.FSWriteError; import org.apache.cassandra.io.util.FileUtils; import org.apache.cassandra.io.sstable.*; import org.apache.cassandra.service.StorageService; import org.apache.cassandra.utils.ByteBufferUtil; import org.apache.cassandra.utils.Pair; /** * Encapsulate handling of paths to the data files. * * Since v2.1, the directory layout is the following: * /<path_to_data_dir>/ks/cf1-cfId/ks-cf1-ka-1-Data.db * /cf2-cfId/ks-cf2-ka-1-Data.db * ... * * cfId is an hex encoded CFID. * * For backward compatibility, Directories uses older directory layout if exists. * * In addition, more that one 'root' data directory can be specified so that * <path_to_data_dir> potentially represents multiple locations. * Note that in the case of multiple locations, the manifest for the leveled * compaction is only in one of the location. * * Snapshots (resp. backups) are always created along the sstables thare are * snapshoted (resp. backuped) but inside a subdirectory named 'snapshots' * (resp. backups) (and snapshots are furter inside a subdirectory of the name * of the snapshot). * * This class abstracts all those details from the rest of the code. */ public class Directories { private static final Logger logger = LoggerFactory.getLogger(Directories.class); public static final String BACKUPS_SUBDIR = "backups"; public static final String SNAPSHOT_SUBDIR = "snapshots"; public static final String SECONDARY_INDEX_NAME_SEPARATOR = "."; public static final DataDirectory[] dataFileLocations; static { String[] locations = DatabaseDescriptor.getAllDataFileLocations(); dataFileLocations = new DataDirectory[locations.length]; for (int i = 0; i < locations.length; ++i) dataFileLocations[i] = new DataDirectory(new File(locations[i])); } /** * Checks whether Cassandra has RWX permissions to the specified directory. Logs an error with * the details if it does not. * * @param dir File object of the directory. * @param dataDir String representation of the directory's location * @return status representing Cassandra's RWX permissions to the supplied folder location. */ public static boolean verifyFullPermissions(File dir, String dataDir) { if (!dir.isDirectory()) { logger.error("Not a directory {}", dataDir); return false; } else if (!FileAction.hasPrivilege(dir, FileAction.X)) { logger.error("Doesn't have execute permissions for {} directory", dataDir); return false; } else if (!FileAction.hasPrivilege(dir, FileAction.R)) { logger.error("Doesn't have read permissions for {} directory", dataDir); return false; } else if (dir.exists() && !FileAction.hasPrivilege(dir, FileAction.W)) { logger.error("Doesn't have write permissions for {} directory", dataDir); return false; } return true; } public enum FileAction { X, W, XW, R, XR, RW, XRW; private FileAction() { } public static boolean hasPrivilege(File file, FileAction action) { boolean privilege = false; switch (action) { case X: privilege = file.canExecute(); break; case W: privilege = file.canWrite(); break; case XW: privilege = file.canExecute() && file.canWrite(); break; case R: privilege = file.canRead(); break; case XR: privilege = file.canExecute() && file.canRead(); break; case RW: privilege = file.canRead() && file.canWrite(); break; case XRW: privilege = file.canExecute() && file.canRead() && file.canWrite(); break; } return privilege; } } private final CFMetaData metadata; private final File[] sstableDirectories; /** * Create Directories of given ColumnFamily. * SSTable directories are created under data_directories defined in cassandra.yaml if not exist at this time. * * @param metadata metadata of ColumnFamily */ public Directories(CFMetaData metadata) { this.metadata = metadata; this.sstableDirectories = new File[dataFileLocations.length]; // Determine SSTable directories // If upgraded from version less than 2.1, use directories already exist. for (int i = 0; i < dataFileLocations.length; ++i) { // check if old SSTable directory exists sstableDirectories[i] = new File(dataFileLocations[i].location, join(metadata.ksName, metadata.cfName)); } boolean olderDirectoryExists = Iterables.any(Arrays.asList(sstableDirectories), new Predicate<File>() { public boolean apply(File file) { return file.exists(); } }); if (olderDirectoryExists) return; // create directory name String directoryName; String cfId = ByteBufferUtil.bytesToHex(ByteBufferUtil.bytes(metadata.cfId)); int idx = metadata.cfName.indexOf(SECONDARY_INDEX_NAME_SEPARATOR); if (idx > 0) // secondary index, goes in the same directory than the base cf directoryName = metadata.cfName.substring(0, idx) + "-" + cfId; else directoryName = metadata.cfName + "-" + cfId; for (int i = 0; i < dataFileLocations.length; ++i) sstableDirectories[i] = new File(dataFileLocations[i].location, join(metadata.ksName, directoryName)); if (!StorageService.instance.isClientMode()) { for (File dir : sstableDirectories) { try { FileUtils.createDirectory(dir); } catch (FSError e) { // don't just let the default exception handler do this, we need the create loop to continue logger.error("Failed to create {} directory", dir); FileUtils.handleFSError(e); } } } } /** * Returns SSTable location which is inside given data directory. * * @param dataDirectory * @return SSTable location */ public File getLocationForDisk(DataDirectory dataDirectory) { for (File dir : sstableDirectories) { if (dir.getAbsolutePath().startsWith(dataDirectory.location.getAbsolutePath())) return dir; } return null; } public Descriptor find(String filename) { for (File dir : sstableDirectories) { if (new File(dir, filename).exists()) return Descriptor.fromFilename(dir, filename).left; } return null; } public File getDirectoryForNewSSTables() { File path = getWriteableLocationAsFile(); // Requesting GC has a chance to free space only if we're using mmap and a non SUN jvm if (path == null && (DatabaseDescriptor.getDiskAccessMode() == Config.DiskAccessMode.mmap || DatabaseDescriptor.getIndexAccessMode() == Config.DiskAccessMode.mmap) && !FileUtils.isCleanerAvailable()) { logger.info("Forcing GC to free up disk space. Upgrade to the Oracle JVM to avoid this"); StorageService.instance.requestGC(); // retry after GCing has forced unmap of compacted SSTables so they can be deleted // Note: GCInspector will do this already, but only sun JVM supports GCInspector so far SSTableDeletingTask.rescheduleFailedTasks(); Uninterruptibles.sleepUninterruptibly(10, TimeUnit.SECONDS); path = getWriteableLocationAsFile(); } return path; } public File getWriteableLocationAsFile() { return getLocationForDisk(getWriteableLocation()); } /** * @return a non-blacklisted directory with the most free space and least current tasks. * * @throws IOError if all directories are blacklisted. */ public DataDirectory getWriteableLocation() { List<DataDirectory> candidates = new ArrayList<>(); // pick directories with enough space and so that resulting sstable dirs aren't blacklisted for writes. for (DataDirectory dataDir : dataFileLocations) { if (BlacklistedDirectories.isUnwritable(getLocationForDisk(dataDir))) continue; candidates.add(dataDir); } if (candidates.isEmpty()) throw new IOError(new IOException("All configured data directories have been blacklisted as unwritable for erroring out")); // sort directories by free space, in _descending_ order. Collections.sort(candidates); // sort directories by load, in _ascending_ order. Collections.sort(candidates, new Comparator<DataDirectory>() { public int compare(DataDirectory a, DataDirectory b) { return a.currentTasks.get() - b.currentTasks.get(); } }); return candidates.get(0); } public static File getSnapshotDirectory(Descriptor desc, String snapshotName) { return getOrCreate(desc.directory, SNAPSHOT_SUBDIR, snapshotName); } public static File getBackupsDirectory(Descriptor desc) { return getOrCreate(desc.directory, BACKUPS_SUBDIR); } public SSTableLister sstableLister() { return new SSTableLister(); } public static class DataDirectory implements Comparable<DataDirectory> { public final File location; public final AtomicInteger currentTasks = new AtomicInteger(); public final AtomicLong estimatedWorkingSize = new AtomicLong(); public DataDirectory(File location) { this.location = location; } /** * @return estimated available disk space for bounded directory, * excluding the expected size written by tasks in the queue. */ public long getEstimatedAvailableSpace() { // Load factor of 0.9 we do not want to use the entire disk that is too risky. return location.getUsableSpace() - estimatedWorkingSize.get(); } public int compareTo(DataDirectory o) { // we want to sort by free space in descending order return -1 * Longs.compare(getEstimatedAvailableSpace(), o.getEstimatedAvailableSpace()); } } public class SSTableLister { private boolean skipTemporary; private boolean includeBackups; private boolean onlyBackups; private int nbFiles; private final Map<Descriptor, Set<Component>> components = new HashMap<>(); private boolean filtered; private String snapshotName; public SSTableLister skipTemporary(boolean b) { if (filtered) throw new IllegalStateException("list() has already been called"); skipTemporary = b; return this; } public SSTableLister includeBackups(boolean b) { if (filtered) throw new IllegalStateException("list() has already been called"); includeBackups = b; return this; } public SSTableLister onlyBackups(boolean b) { if (filtered) throw new IllegalStateException("list() has already been called"); onlyBackups = b; includeBackups = b; return this; } public SSTableLister snapshots(String sn) { if (filtered) throw new IllegalStateException("list() has already been called"); snapshotName = sn; return this; } public Map<Descriptor, Set<Component>> list() { filter(); return ImmutableMap.copyOf(components); } public List<File> listFiles() { filter(); List<File> l = new ArrayList<>(nbFiles); for (Map.Entry<Descriptor, Set<Component>> entry : components.entrySet()) { for (Component c : entry.getValue()) { l.add(new File(entry.getKey().filenameFor(c))); } } return l; } private void filter() { if (filtered) return; for (File location : sstableDirectories) { if (BlacklistedDirectories.isUnreadable(location)) continue; if (snapshotName != null) { new File(location, join(SNAPSHOT_SUBDIR, snapshotName)).listFiles(getFilter()); continue; } if (!onlyBackups) location.listFiles(getFilter()); if (includeBackups) new File(location, BACKUPS_SUBDIR).listFiles(getFilter()); } filtered = true; } private FileFilter getFilter() { // Note: the prefix needs to include cfname + separator to distinguish between a cfs and it's secondary indexes final String sstablePrefix = getSSTablePrefix(); return new FileFilter() { // This function always return false since accepts adds to the components map public boolean accept(File file) { // we are only interested in the SSTable files that belong to the specific ColumnFamily if (file.isDirectory() || !file.getName().startsWith(sstablePrefix)) return false; Pair<Descriptor, Component> pair = SSTable.tryComponentFromFilename(file.getParentFile(), file.getName()); if (pair == null) return false; if (skipTemporary && pair.left.temporary) return false; Set<Component> previous = components.get(pair.left); if (previous == null) { previous = new HashSet<>(); components.put(pair.left, previous); } previous.add(pair.right); nbFiles++; return false; } }; } } public boolean snapshotExists(String snapshotName) { for (File dir : sstableDirectories) { File snapshotDir = new File(dir, join(SNAPSHOT_SUBDIR, snapshotName)); if (snapshotDir.exists()) return true; } return false; } public static void clearSnapshot(String snapshotName, List<File> snapshotDirectories) { // If snapshotName is empty or null, we will delete the entire snapshot directory String tag = snapshotName == null ? "" : snapshotName; for (File dir : snapshotDirectories) { File snapshotDir = new File(dir, join(SNAPSHOT_SUBDIR, tag)); if (snapshotDir.exists()) { if (logger.isDebugEnabled()) logger.debug("Removing snapshot directory {}", snapshotDir); FileUtils.deleteRecursive(snapshotDir); } } } // The snapshot must exist public long snapshotCreationTime(String snapshotName) { for (File dir : sstableDirectories) { File snapshotDir = new File(dir, join(SNAPSHOT_SUBDIR, snapshotName)); if (snapshotDir.exists()) return snapshotDir.lastModified(); } throw new RuntimeException("Snapshot " + snapshotName + " doesn't exist"); } public long trueSnapshotsSize() { long result = 0L; for (File dir : sstableDirectories) result += getTrueAllocatedSizeIn(new File(dir, join(SNAPSHOT_SUBDIR))); return result; } private String getSSTablePrefix() { return metadata.ksName + Component.separator + metadata.cfName + Component.separator; } public long getTrueAllocatedSizeIn(File input) { if (!input.isDirectory()) return 0; TrueFilesSizeVisitor visitor = new TrueFilesSizeVisitor(); try { Files.walkFileTree(input.toPath(), visitor); } catch (IOException e) { logger.error("Could not calculate the size of {}. {}", input, e); } return visitor.getAllocatedSize(); } // Recursively finds all the sub directories in the KS directory. public static List<File> getKSChildDirectories(String ksName) { List<File> result = new ArrayList<>(); for (DataDirectory dataDirectory : dataFileLocations) { File ksDir = new File(dataDirectory.location, ksName); File[] cfDirs = ksDir.listFiles(); if (cfDirs == null) continue; for (File cfDir : cfDirs) { if (cfDir.isDirectory()) result.add(cfDir); } } return result; } public List<File> getCFDirectories() { List<File> result = new ArrayList<>(); for (File dataDirectory : sstableDirectories) { if (dataDirectory.isDirectory()) result.add(dataDirectory); } return result; } private static File getOrCreate(File base, String... subdirs) { File dir = subdirs == null || subdirs.length == 0 ? base : new File(base, join(subdirs)); if (dir.exists()) { if (!dir.isDirectory()) throw new AssertionError(String.format("Invalid directory path %s: path exists but is not a directory", dir)); } else if (!dir.mkdirs() && !(dir.exists() && dir.isDirectory())) { throw new FSWriteError(new IOException("Unable to create directory " + dir), dir); } return dir; } private static String join(String... s) { return StringUtils.join(s, File.separator); } // Hack for tests, don't use otherwise static void overrideDataDirectoriesForTest(String loc) { for (int i = 0; i < dataFileLocations.length; ++i) dataFileLocations[i] = new DataDirectory(new File(loc)); } // Hack for tests, don't use otherwise static void resetDataDirectoriesAfterTest() { String[] locations = DatabaseDescriptor.getAllDataFileLocations(); for (int i = 0; i < locations.length; ++i) dataFileLocations[i] = new DataDirectory(new File(locations[i])); } private class TrueFilesSizeVisitor extends SimpleFileVisitor<Path> { private final AtomicLong size = new AtomicLong(0); private final Set<String> visited = newHashSet(); //count each file only once private final Set<String> alive; private final String prefix = getSSTablePrefix(); public TrueFilesSizeVisitor() { super(); Builder<String> builder = ImmutableSet.builder(); for (File file: sstableLister().listFiles()) builder.add(file.getName()); alive = builder.build(); } private boolean isAcceptable(Path file) { String fileName = file.toFile().getName(); return fileName.startsWith(prefix) && !visited.contains(fileName) && !alive.contains(fileName); } @Override public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException { if (isAcceptable(file)) { size.addAndGet(attrs.size()); visited.add(file.toFile().getName()); } return FileVisitResult.CONTINUE; } @Override public FileVisitResult visitFileFailed(Path file, IOException exc) throws IOException { return FileVisitResult.CONTINUE; } public long getAllocatedSize() { return size.get(); } } }