/*
* #%L
* Common package for I/O and related utilities
* %%
* Copyright (C) 2005 - 2015 Open Microscopy Environment:
* - Board of Regents of the University of Wisconsin-Madison
* - Glencoe Software, Inc.
* - University of Dundee
* %%
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* #L%
*/
package loci.common;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.collect.MapMaker;
/**
* Pseudo-extension of java.io.File that supports reading over HTTP (among
* other things).
* It is strongly recommended to use this instead of java.io.File.
*/
public class Location {
// -- Constants --
private static final Logger LOGGER = LoggerFactory.getLogger(Location.class);
private static final boolean IS_WINDOWS =
System.getProperty("os.name").startsWith("Windows");
// -- Static fields --
/** Map from given filenames to actual filenames. */
private static ThreadLocal<HashMap<String, Object>> idMap =
new ThreadLocal<HashMap<String, Object>>() {
@Override
protected HashMap<String, Object> initialValue() {
return new HashMap<String, Object>();
}
};
private static volatile boolean cacheListings = false;
// By default, cache for one hour.
private static volatile long cacheNanos = 60L * 60L * 1000L * 1000L * 1000L;
protected class ListingsResult {
public final String [] listing;
public final long time;
ListingsResult(String [] listing, long time) {
this.listing = listing;
this.time = time;
}
}
private static final Map<String, ListingsResult> fileListings =
new MapMaker().makeMap(); // like Java's ConcurrentHashMap
// -- Fields --
private boolean isURL = true;
private URL url;
private File file;
// -- Constructors --
public Location(String pathname) {
LOGGER.trace("Location({})", pathname);
if (pathname.contains("://")) {
// Avoid expensive exception handling in case when path is
// obviously not an URL
try {
url = new URL(getMappedId(pathname));
}
catch (MalformedURLException e) {
LOGGER.trace("Location is not a URL", e);
isURL = false;
}
} else {
LOGGER.trace("Location is not a URL");
isURL = false;
}
if (!isURL) file = new File(getMappedId(pathname));
}
public Location(File file) {
LOGGER.trace("Location({})", file);
isURL = false;
this.file = file;
}
public Location(String parent, String child) {
this(parent + File.separator + child);
}
public Location(Location parent, String child) {
this(parent.getAbsolutePath(), child);
}
// -- Location API methods --
/**
* Clear all caches and reset cache-related bookkeeping variables to their
* original values.
*/
public static void reset() {
cacheListings = false;
cacheNanos = 60L * 60L * 1000L * 1000L * 1000L;
fileListings.clear();
getIdMap().clear();
}
/**
* Turn cacheing of directory listings on or off.
* Cacheing is turned off by default.
*
* Reasons to cache - directory listings over network shares
* can be very expensive, especially in HCS experiments with thousands
* of files in the same directory. Technically, if you use a directory
* listing and then go and access the file, you are using stale information.
* Unlike a database, there's no transactional integrity to file system
* operations, so the directory could change by the time you access the file.
*
* Reasons not to cache - the contents of the directories might change
* during the program invocation.
*
* @param cache - true to turn cacheing on, false to leave it off.
*/
public static void cacheDirectoryListings(boolean cache) {
cacheListings = cache;
}
/**
* Cache directory listings for this many seconds before relisting.
*
* @param sec - use the cache if a directory list was done within this many
* seconds.
*/
public static void setCacheDirectoryTimeout(double sec) {
cacheNanos = (long) (sec * 1000. * 1000. * 1000.);
}
/**
* Clear the directory listings cache.
*
* Do this if directory contents might have changed in a significant way.
*/
public static void clearDirectoryListingsCache() {
fileListings.clear();
}
/**
* Remove any cached directory listings that have expired.
*/
public static void cleanStaleCacheEntries() {
long t = System.nanoTime() - cacheNanos;
final Iterator<ListingsResult> cacheValues =
fileListings.values().iterator();
while (cacheValues.hasNext()) {
if (cacheValues.next().time < t) {
cacheValues.remove();
}
}
}
/**
* Maps the given id to an actual filename on disk. Typically actual
* filenames are used for ids, making this step unnecessary, but in some
* cases it is useful; e.g., if the file has been renamed to conform to a
* standard naming scheme and the original file extension is lost, then
* using the original filename as the id assists format handlers with type
* identification and pattern matching, and the id can be mapped to the
* actual filename for reading the file's contents.
* @see #getMappedId(String)
*/
public static void mapId(String id, String filename) {
if (id == null) return;
if (filename == null) getIdMap().remove(id);
else getIdMap().put(id, filename);
LOGGER.debug("Location.mapId: {} -> {}", id, filename);
}
/** Maps the given id to the given IRandomAccess object. */
public static void mapFile(String id, IRandomAccess ira) {
if (id == null) return;
if (ira == null) getIdMap().remove(id);
else getIdMap().put(id, ira);
LOGGER.debug("Location.mapFile: {} -> {}", id, ira);
}
/**
* Gets the actual filename on disk for the given id. Typically the id itself
* is the filename, but in some cases may not be; e.g., if OMEIS has renamed
* a file from its original name to a standard location such as Files/101,
* the original filename is useful for checking the file extension and doing
* pattern matching, but the renamed filename is required to read its
* contents.
* @see #mapId(String, String)
*/
public static String getMappedId(String id) {
if (getIdMap() == null) return id;
String filename = null;
if (id != null && (getIdMap().get(id) instanceof String)) {
filename = (String) getIdMap().get(id);
}
return filename == null ? id : filename;
}
/** Gets the random access handle for the given id. */
public static IRandomAccess getMappedFile(String id) {
if (getIdMap() == null) return null;
IRandomAccess ira = null;
if (id != null && (getIdMap().get(id) instanceof IRandomAccess)) {
ira = (IRandomAccess) getIdMap().get(id);
}
return ira;
}
/** Return the id mapping. */
public static HashMap<String, Object> getIdMap() {
return idMap.get();
}
/**
* Set the id mapping using the given HashMap.
*
* @throws IllegalArgumentException if the given HashMap is null.
*/
public static void setIdMap(HashMap<String, Object> map) {
if (map == null) throw new IllegalArgumentException("map cannot be null");
idMap.set(map);
}
/**
* Gets an IRandomAccess object that can read from the given file.
* @see IRandomAccess
*/
public static IRandomAccess getHandle(String id) throws IOException {
return getHandle(id, false);
}
/**
* Gets an IRandomAccess object that can read from or write to the given file.
* @see IRandomAccess
*/
public static IRandomAccess getHandle(String id, boolean writable)
throws IOException
{
return getHandle(id, writable, true);
}
/**
* Gets an IRandomAccess object that can read from or write to the given file.
* @see IRandomAccess
*/
public static IRandomAccess getHandle(String id, boolean writable,
boolean allowArchiveHandles) throws IOException
{
return getHandle(id, writable, allowArchiveHandles, 0);
}
public static IRandomAccess getHandle(String id, boolean writable,
boolean allowArchiveHandles, int bufferSize) throws IOException
{
LOGGER.trace("getHandle(id = {}, writable = {})", id, writable);
IRandomAccess handle = getMappedFile(id);
if (handle == null) {
LOGGER.trace("no handle was mapped for this ID");
String mapId = getMappedId(id);
if (id.startsWith("http://") || id.startsWith("https://")) {
handle = new URLHandle(mapId);
}
else if (allowArchiveHandles && ZipHandle.isZipFile(mapId)) {
handle = new ZipHandle(mapId);
}
else if (allowArchiveHandles && GZipHandle.isGZipFile(mapId)) {
handle = new GZipHandle(mapId);
}
else if (allowArchiveHandles && BZip2Handle.isBZip2File(mapId)) {
handle = new BZip2Handle(mapId);
}
else {
if (bufferSize > 0) {
handle = new NIOFileHandle(
new File(mapId), writable ? "rw" : "r", bufferSize);
}
else {
handle = new NIOFileHandle(mapId, writable ? "rw" : "r");
}
}
}
LOGGER.trace("Location.getHandle: {} -> {}", id, handle);
return handle;
}
/**
* Checks that the given id points at a valid data stream.
*
* @param id
* The id string to validate.
* @throws IOException
* if the id is not valid.
*/
public static void checkValidId(String id) throws IOException {
if (getMappedFile(id) != null) {
// NB: The id maps directly to an IRandomAccess handle, so is valid. Do
// not destroy an existing mapped IRandomAccess handle by closing it.
return;
}
// NB: Try to actually open a handle to make sure it is valid. Close it
// afterward so we don't leave it dangling. The process of doing this will
// throw IOException if something goes wrong.
Location.getHandle(id).close();
}
/**
* Return a list of all of the files in this directory. If 'noHiddenFiles' is
* set to true, then hidden files are omitted.
*
* @see java.io.File#list()
*/
public String[] list(boolean noHiddenFiles) {
LOGGER.trace("list({})", noHiddenFiles);
String key = getAbsolutePath() + Boolean.toString(noHiddenFiles);
String [] result = null;
if (cacheListings) {
cleanStaleCacheEntries();
ListingsResult listingsResult = fileListings.get(key);
if (listingsResult != null) {
return listingsResult.listing;
}
}
final List<String> files = new ArrayList<String>();
if (isURL) {
try {
URLConnection c = url.openConnection();
InputStream is = c.getInputStream();
boolean foundEnd = false;
BufferedReader br = new BufferedReader(
new InputStreamReader(is, Constants.ENCODING));
String input;
StringBuffer buffer = new StringBuffer();
while ((input = br.readLine()) != null){
buffer.append(input);
}
br.close();
String s = buffer.toString();
while (!foundEnd) {
if (s.toLowerCase().indexOf("</html>") != -1) foundEnd = true;
while (s.indexOf("a href") != -1) {
int ndx = s.indexOf("a href") + 8;
int idx = s.indexOf("\"", ndx);
if (idx < 0) break;
String f = s.substring(ndx, idx);
if (files.size() > 0 && f.startsWith("/")) {
return null;
}
s = s.substring(idx + 1);
if (f.startsWith("?")) continue;
Location check = new Location(getAbsolutePath(), f);
if (check.exists() && (!noHiddenFiles || !check.isHidden())) {
files.add(check.getName());
}
}
}
is.close();
if (files.size() == 0) {
return null;
}
}
catch (IOException e) {
LOGGER.trace("Could not retrieve directory listing", e);
return null;
}
}
else {
if (file == null) return null;
String[] f = file.list();
if (f == null) return null;
String path = file.getAbsolutePath();
for (String name : f) {
if (!noHiddenFiles || !(name.startsWith(".") ||
new Location(path, name).isHidden()))
{
files.add(name);
}
}
}
result = files.toArray(new String[files.size()]);
if (cacheListings) {
fileListings.put(key, new ListingsResult(result, System.nanoTime()));
}
LOGGER.trace(" returning {} files", files.size());
return result;
}
// -- File API methods --
/**
* If the underlying location is a URL, this method will return true if
* the URL exists.
* Otherwise, it will return true iff the file exists and is readable.
*
* @see java.io.File#canRead()
*/
public boolean canRead() {
LOGGER.trace("canRead()");
return isURL ? (isDirectory() || isFile() || exists()) : file.canRead();
}
/**
* If the underlying location is a URL, this method will always return false.
* Otherwise, it will return true iff the file exists and is writable.
*
* @see java.io.File#canWrite()
*/
public boolean canWrite() {
LOGGER.trace("canWrite()");
return isURL ? false : file.canWrite();
}
/**
* Creates a new empty file named by this Location's path name iff a file
* with this name does not already exist. Note that this operation is
* only supported if the path name can be interpreted as a path to a file on
* disk (i.e. is not a URL).
*
* @return true if the file was created successfully
* @throws IOException if an I/O error occurred, or the
* abstract pathname is a URL
* @see java.io.File#createNewFile()
*/
public boolean createNewFile() throws IOException {
if (isURL) throw new IOException("Unimplemented");
return file.createNewFile();
}
/**
* Creates a directory structures described by this Location's internal
* {@link File} instance.
*
* @return <code>true</code> if the directory structure was created
* successfully.
* @see File#mkdirs()
*/
public boolean mkdirs() {
if (file == null) {
return false;
}
return file.mkdirs();
}
/**
* Deletes this file. If {@link #isDirectory()} returns true, then the
* directory must be empty in order to be deleted. URLs cannot be deleted.
*
* @return true if the file was successfully deleted
* @see java.io.File#delete()
*/
public boolean delete() {
return isURL ? false : file.delete();
}
/**
* Request that this file be deleted when the JVM terminates.
* This method will do nothing if the pathname represents a URL.
*
* @see java.io.File#deleteOnExit()
*/
public void deleteOnExit() {
if (!isURL) file.deleteOnExit();
}
/**
* @see java.io.File#equals(Object)
* @see java.net.URL#equals(Object)
*/
@Override
public boolean equals(Object obj) {
String absPath = getAbsolutePath();
String thatPath = null;
if (obj instanceof Location) {
thatPath = ((Location) obj).getAbsolutePath();
}
else {
thatPath = obj.toString();
}
return absPath.equals(thatPath);
}
@Override
public int hashCode() {
return getAbsolutePath().hashCode();
}
/**
* Returns whether or not the pathname exists.
* If the pathname is a URL, then existence is determined based on whether
* or not we can successfully read content from the URL.
*
* @see java.io.File#exists()
*/
public boolean exists() {
LOGGER.trace("exists()");
if (isURL) {
try {
url.getContent();
return true;
}
catch (IOException e) {
LOGGER.trace("Failed to retrieve content from URL", e);
return false;
}
}
if (file.exists()) return true;
if (getMappedFile(file.getPath()) != null) return true;
String mappedId = getMappedId(file.getPath());
return mappedId != null && new File(mappedId).exists();
}
/* @see java.io.File#getAbsoluteFile() */
public Location getAbsoluteFile() {
return new Location(getAbsolutePath());
}
/* @see java.io.File#getAbsolutePath() */
public String getAbsolutePath() {
LOGGER.trace("getAbsolutePath()");
return isURL ? url.toExternalForm() : file.getAbsolutePath();
}
/* @see java.io.File#getCanonicalFile() */
public Location getCanonicalFile() throws IOException {
return isURL ? getAbsoluteFile() : new Location(file.getCanonicalFile());
}
/**
* Returns the canonical path to this file.
* If the file is a URL, then the canonical path is equivalent to the
* absolute path ({@link #getAbsolutePath()}). Otherwise, this method
* will delegate to {@link java.io.File#getCanonicalPath()}.
*/
public String getCanonicalPath() throws IOException {
return isURL ? getAbsolutePath() : file.getCanonicalPath();
}
/**
* Returns the name of this file, i.e. the last name in the path name
* sequence.
*
* @see java.io.File#getName()
*/
public String getName() {
LOGGER.trace("getName()");
if (isURL) {
String name = url.getFile();
name = name.substring(name.lastIndexOf("/") + 1);
return name;
}
return file.getName();
}
/**
* Returns the name of this file's parent directory, i.e. the path name prefix
* and every name in the path name sequence except for the last.
* If this file does not have a parent directory, then null is returned.
*
* @see java.io.File#getParent()
*/
public String getParent() {
LOGGER.trace("getParent()");
if (isURL) {
String absPath = getAbsolutePath();
absPath = absPath.substring(0, absPath.lastIndexOf("/"));
return absPath;
}
return file.getParent();
}
/* @see java.io.File#getParentFile() */
public Location getParentFile() {
return new Location(getParent());
}
/* @see java.io.File#getPath() */
public String getPath() {
return isURL ? url.getHost() + url.getPath() : file.getPath();
}
/**
* Tests whether or not this path name is absolute.
* If the path name is a URL, this method will always return true.
*
* @see java.io.File#isAbsolute()
*/
public boolean isAbsolute() {
LOGGER.trace("isAbsolute()");
return isURL ? true : file.isAbsolute();
}
/**
* Returns true if this pathname exists and represents a directory.
*
* @see java.io.File#isDirectory()
*/
public boolean isDirectory() {
LOGGER.trace("isDirectory()");
if (isURL) {
String[] list = list();
return list != null;
}
return file.isDirectory();
}
/**
* Returns true if this pathname exists and represents a regular file.
*
* @see java.io.File#exists()
*/
public boolean isFile() {
LOGGER.trace("isFile()");
return isURL ? (!isDirectory() && exists()) : file.isFile();
}
/**
* Returns true if the pathname is 'hidden'. This method will always
* return false if the pathname corresponds to a URL.
*
* @see java.io.File#isHidden()
*/
public boolean isHidden() {
LOGGER.trace("isHidden()");
if (isURL) {
return false;
}
boolean dotFile = file.getName().startsWith(".");
if (IS_WINDOWS) {
return dotFile || file.isHidden();
}
return dotFile;
}
/**
* Return the last modification time of this file, in milliseconds since
* the UNIX epoch.
* If the file does not exist, 0 is returned.
*
* @see java.io.File#lastModified()
* @see java.net.URLConnection#getLastModified()
*/
public long lastModified() {
LOGGER.trace("lastModified()");
if (isURL) {
try {
return url.openConnection().getLastModified();
}
catch (IOException e) {
LOGGER.trace("Could not determine URL's last modification time", e);
return 0;
}
}
return file.lastModified();
}
/**
* @see java.io.File#length()
* @see java.net.URLConnection#getContentLength()
*/
public long length() {
LOGGER.trace("length()");
if (isURL) {
try {
return url.openConnection().getContentLength();
}
catch (IOException e) {
LOGGER.trace("Could not determine URL's content length", e);
return 0;
}
}
return file.length();
}
/**
* Return a list of file names in this directory. Hidden files will be
* included in the list.
* If this is not a directory, return null.
*/
public String[] list() {
return list(false);
}
/**
* Return a list of absolute files in this directory. Hidden files will
* be included in the list.
* If this is not a directory, return null.
*/
public Location[] listFiles() {
String[] s = list();
if (s == null) return null;
Location[] f = new Location[s.length];
for (int i=0; i<f.length; i++) {
f[i] = new Location(getAbsolutePath(), s[i]);
f[i] = f[i].getAbsoluteFile();
}
return f;
}
/**
* Return the URL corresponding to this pathname.
*
* @see java.io.File#toURL()
*/
public URL toURL() throws MalformedURLException {
return isURL ? url : file.toURI().toURL();
}
/**
* @see java.io.File#toString()
* @see java.net.URL#toString()
*/
@Override
public String toString() {
return isURL ? url.toString() : file.toString();
}
}