package com.zillabyte.motherbrain.flow.aggregation; import java.io.File; import java.io.IOException; import java.util.Iterator; import java.util.Stack; import org.apache.commons.codec.digest.DigestUtils; import org.apache.log4j.Logger; import org.codehaus.plexus.util.FileUtils; import com.google.common.base.Throwables; import com.google.common.io.Files; import com.zillabyte.motherbrain.flow.MapTuple; import com.zillabyte.motherbrain.flow.operations.AggregationOperation; import com.zillabyte.motherbrain.universe.Universe; import com.zillabyte.motherbrain.utils.Utils; /*** * Production Aggregate Store * @author sjarvie */ public class DiskBackedStore implements AggregationStore { private static final long serialVersionUID = -1837276500478667648L; private AggregationOperation _operation; private String _extraPrefix; private static Logger _log = Utils.getLogger(DiskBackedStore.class); /*** * * @param o */ public DiskBackedStore(AggregationOperation o) { this(o, ""); } /*** * * @param o * @param extraPrefix */ public DiskBackedStore(AggregationOperation o, String extraPrefix) { _operation = o; _extraPrefix = extraPrefix; // Make sure the target is clear... File root = new File(rootPath()); if (root.exists()) { _log.warn("aggregation root already exists: " + root + " ... deleting "); try { FileUtils.deleteDirectory(root); } catch (IOException e) { Throwables.propagate(e); } } } /*** * * @return */ public String rootPath() { String root = Universe.instance().fileFactory().getTmp().toString(); String flowId = _operation.topFlowId(); String instanceName = _operation.instanceName(); String extraPrefix = (_extraPrefix.equals("") ? "/" : _extraPrefix + "/"); return root + "/f" + flowId + "/" + instanceName + "/aggregate_tuples/" + extraPrefix; } /** * @return the location of the tuple aggregate keys and values */ public String dataPath(Object batchId){ return rootPath() + batchId ; } /** * Version 1 * Traverses to locate a key's directory * @return t */ public String keyPath(Object batchId, AggregationKey key) { String md5 = DigestUtils.md5Hex(Utils.serialize(key)); return dataPath(batchId) + "/" + md5; } @Override public void addToGroup(Object batchId, AggregationKey key, MapTuple tuple) { // add key if necessary String path = keyPath(batchId, key); try { File keyFile = new File(path + "/key.txt"); if (!keyFile.exists()) { Files.createParentDirs(keyFile); Files.touch(keyFile); Files.write(Utils.serialize(key), keyFile); } TuplePage page = new TuplePage(path); page.insert(tuple); } catch (IOException e) { e.printStackTrace(); } } @Override public boolean hasGroup(Object batch, AggregationKey key) { File f = new File(keyPath(batch, key)); return f.exists() && f.isDirectory() && (f.listFiles() != null); // return _map.containsKey(key); } @Override public Iterator<MapTuple> getGroupIterator(Object batch, AggregationKey key) { // return a custom Iterator that incrementally reads the next tuples TuplePage page = new TuplePage(keyPath(batch, key)); return page.iterator(); } @Override public void deleteGroup(Object batch, AggregationKey key) { File keyDir = new File(keyPath(batch, key)); if (keyDir.exists()) { for (File f : keyDir.listFiles()){ f.delete(); } keyDir.delete(); } } @Override public Iterator<AggregationKey> keyIterator(final Object batch) throws AggregationException { Iterator<AggregationKey> it = new Iterator<AggregationKey>() { private AggregationKey _currentKey = null; // Use DFS to locate key files private Stack<File> _stack = null; private String root = dataPath(batch); @Override public boolean hasNext() { // Initialize the DFS if (_stack == null) { _stack = new Stack<File>(); File rootDir = new File(root); _stack.add(rootDir); } if (_currentKey != null){ return true; } if (_stack.empty()){ return false; } else { // Find a valid key if it exists while (_stack.empty()){ File dir = _stack.pop(); if (dir.exists() && dir.isDirectory()) { for (File f : dir.listFiles()){ if (f.isFile() && f.getName().equals("key.txt")){ try { AggregationKey key = (AggregationKey) Utils.deserialize(Files.toByteArray(f)); _currentKey = key; return true; } catch (IOException e) { return false; } } else if (f.isDirectory()){ _stack.add(f); } } } } } return false; } @Override public AggregationKey next() { if (!hasNext()){ return null; } AggregationKey key = _currentKey; _currentKey = null; return key; } @Override public void remove() {} }; return it; } @Override public void flush(Object batch) { } @Override public void deleteBatch(Object batch) throws AggregationException { try { FileUtils.deleteDirectory(new File(dataPath(batch))); } catch (IOException e) { throw (AggregationException) new AggregationException(e).setUserMessage("Unable to cleanup aggregation data.").adviseRetry(); } } }