/*******************************************************************************
* Trombone is a flexible text processing and analysis library used
* primarily by Voyant Tools (voyant-tools.org).
*
* Copyright (©) 2007-2012 Stéfan Sinclair & Geoffrey Rockwell
*
* This file is part of Trombone.
*
* Trombone is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Trombone is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Trombone. If not, see <http://www.gnu.org/licenses/>.
******************************************************************************/
package org.voyanttools.trombone.storage.file;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.Reader;
import java.io.Writer;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Collection;
import java.util.List;
import java.util.UUID;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.io.FileUtils;
import org.apache.lucene.store.NIOFSDirectory;
import org.mapdb.DB;
import org.mapdb.DBMaker;
import org.voyanttools.trombone.lucene.LuceneManager;
import org.voyanttools.trombone.nlp.NlpFactory;
import org.voyanttools.trombone.storage.CorpusStorage;
import org.voyanttools.trombone.storage.Storage;
import org.voyanttools.trombone.storage.StoredDocumentSourceStorage;
import org.voyanttools.trombone.util.FlexibleParameters;
import edu.stanford.nlp.util.StringUtils;
/**
* A file-system implementation of {@link Storage}.
*
* @author Stéfan Sinclair
*/
public class FileStorage implements Storage {
/**
* the default file-system location for storage
*/
public static final String DEFAULT_TROMBOME_DIRECTORY_NAME = "trombone5_2";
/**
* the default file-system location for storage
*/
public static final File DEFAULT_TROMBOME_DIRECTORY = new File(System.getProperty("java.io.tmpdir"), DEFAULT_TROMBOME_DIRECTORY_NAME);
/**
* the actual base directory used for storage
*/
public File storageLocation;
/**
* the handler for InputSource operations
*/
private FileStoredDocumentSourceStorage documentSourceStorage = null;
private CorpusStorage corpusStorage = null;
private LuceneManager luceneManager = null;
private NlpFactory nlpAnnotatorFactory = new NlpFactory();
/**
* Create a new instance in the default location.
* @throws IOException
*/
public FileStorage() throws IOException {
this(DEFAULT_TROMBOME_DIRECTORY);
}
/**
* Create a new instance at the File location specified by the dataDirectory parameter
*
* @param storageLocation the file location to use for this storage
* @throws IOException
*/
public FileStorage(FlexibleParameters parameters) throws IOException {
this(parameters.containsKey("dataDirectory") ? new File(parameters.getParameterValue("dataDirectory")) : DEFAULT_TROMBOME_DIRECTORY);
}
/**
* Create a new instance at the specified File location
*
* @param storageLocation the file location to use for this storage
* @throws IOException
*/
public FileStorage(File storageLocation) throws IOException {
System.out.println("Trombone FileStorage location: "+storageLocation);
this.storageLocation = storageLocation;
if (storageLocation.exists()==false) {
if (!storageLocation.mkdirs()) {
throw new IOException("Unable to create data directory: "+storageLocation);
}
}
}
public StoredDocumentSourceStorage getStoredDocumentSourceStorage() {
if (documentSourceStorage==null) {
documentSourceStorage = new FileStoredDocumentSourceStorage(this.storageLocation);
}
return documentSourceStorage;
}
public void destroy() throws IOException {
getLuceneManager().getIndexWriter().close();
FileUtils.deleteDirectory(storageLocation);
}
@Override
public LuceneManager getLuceneManager() throws IOException {
if (luceneManager==null) {
Path path = Paths.get(storageLocation.getPath(), "lucene");
if (Files.exists(path)==false) {
Files.createDirectories(path);
}
luceneManager = new LuceneManager(this, new NIOFSDirectory(path));
}
return luceneManager;
}
@Override
public boolean hasStoredString(String id, Location location) {
return getResourceFile(id, location).exists();
}
@Override
public String storeString(String string, Location location) throws IOException {
String id = DigestUtils.md5Hex(string);
storeString(string, id, location);
return id;
}
@Override
public void storeString(String string, String id, Location location) throws IOException {
storeString(string, id, location, false);
}
@Override
public void storeString(String string, String id, Location location, boolean canOverwrite) throws IOException {
File file = getResourceFile(id, location);
if (file.getParentFile().exists()==false) { // make sure directory exists
file.getParentFile().mkdirs();
}
if (!isStored(id, location) || canOverwrite) {
FileUtils.writeStringToFile(file, string, "UTF-8");
}
}
@Override
public String storeStrings(Collection<String> strings, Location location) throws IOException {
String string = StringUtils.join(strings, "\n");
return storeString(string, location);
}
@Override
public void storeStrings(Collection<String> strings, String id, Location location) throws IOException {
String string = StringUtils.join(strings, "\n");
storeString(string, id, location);
}
@Override
public String retrieveString(String id, Location location) throws IOException {
File file = getResourceFile(id, location);
if (file.exists()==false) throw new IOException("An attempt was made to read a store string that that does not exist: "+id);
return FileUtils.readFileToString(file);
}
@Override
public List<String> retrieveStrings(String id, Location location) throws IOException {
String string = retrieveString(id, location);
return StringUtils.split(string, "\n");
}
@Override
public CorpusStorage getCorpusStorage() {
if (corpusStorage==null) {
corpusStorage = new FileCorpusStorage(this, storageLocation);
}
return corpusStorage;
}
private File getObjectStoreDirectory(Location location) {
switch (location) {
case cache:
case notebook:
return new File(storageLocation, location.name());
default:
return new File(storageLocation,"object-storage");
}
}
File getResourceFile(String id, Location location) {
// package level for migrators
if (id==null) {
throw new IllegalArgumentException("No ID provided for stored resource");
}
File file = new File(getObjectStoreDirectory(location), id+".gz");
if (file.exists()) {return file;}
else {return new File(getObjectStoreDirectory(location), id);}
}
public boolean copyResource(File source, String id, Location location) throws IOException {
File destination = getResourceFile(id, location);
if (destination.exists()) {return false;}
FileUtils.copyFile(source, destination);
return true;
}
@Override
public boolean isStored(String id, Location location) {
return getResourceFile(id, location).exists();
}
@Override
public String store(Object obj, Location location) throws IOException {
String id = UUID.randomUUID().toString();
store(obj, id, location);
return id;
}
@Override
public void store(Object obj, String id, Location location) throws IOException {
File file = getResourceFile(id, location);
if (file.getParentFile().exists()==false) { // make sure directory exists
file.getParentFile().mkdirs();
}
FileOutputStream fileOutputStream = new FileOutputStream(file);
ObjectOutputStream out = new ObjectOutputStream(fileOutputStream);
out.writeObject(obj);
out.close();
}
@Override
public Object retrieve(String id, Location location) throws IOException, ClassNotFoundException {
File file = getResourceFile(id, location);
FileInputStream fileInputStream = new FileInputStream(file);
ObjectInputStream in = new ObjectInputStream(fileInputStream);
Object obj = in.readObject();
in.close();
return obj;
}
public Writer getStoreWriter(String id, Location location) throws IOException {
File file = getResourceFile(id, location);
if (file.getParentFile().exists()==false) { // make sure directory exists
file.getParentFile().mkdirs();
}
return new FileWriter(file);
}
public Reader getStoreReader(String id, Location location) throws IOException {
File file = getResourceFile(id, location);
return new FileReader(file);
}
/*
private File getStoreCacheDirectory() {
return new File(storageLocation, "cache");
}
private File getCachedFile(String id) {
return new File(getStoreCacheDirectory(), id);
}
@Override
public Reader retrieveCachedStringReader(String id) throws IOException {
File file = getCachedFile(id);
return new FileReader(file);
}
@Override
public Writer getStoreCachedStringWriter(String id) throws IOException {
File file = getCachedFile(id);
if (file.getParentFile().exists()==false) { // make sure directory exists
file.getParentFile().mkdirs();
}
return new FileWriter(file);
}
@Override
public boolean isStoredCache(String id) {
File file = getCachedFile(id);
return file.exists();
}
*/
@Override
public DB getDB(String id, boolean readOnly) {
DBMaker maker = DBMaker.newFileDB(getResourceFile(id, Location.object))
.transactionDisable()
.closeOnJvmShutdown()
.mmapFileEnableIfSupported();
if (readOnly) {return maker.readOnly().make();}
else {return maker.make();}
}
public void closeDB(DB db) {
db.close();
}
public boolean existsDB(String id) {
return getResourceFile(id, Location.object).exists();
}
@Override
public FileMigrator getMigrator(String id) throws IOException {
return FileMigrationFactory.getMigrator(this, id);
}
@Override
public NlpFactory getNlpAnnotatorFactory() {
return nlpAnnotatorFactory;
}
}