/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package gobblin.metastore;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.DataOutput;
import java.io.DataOutputStream;
import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
import org.I0Itec.zkclient.serialize.ZkSerializer;
import org.apache.commons.lang.ArrayUtils;
import org.apache.hadoop.io.Text;
import org.apache.helix.AccessOption;
import org.apache.helix.manager.zk.ByteArraySerializer;
import org.apache.helix.store.HelixPropertyStore;
import org.apache.helix.store.zk.ZkHelixPropertyStore;
import com.google.common.base.Predicate;
import com.google.common.base.Predicates;
import com.google.common.base.Strings;
import com.google.common.collect.Lists;
import gobblin.configuration.State;
import gobblin.util.io.StreamUtils;
/**
* An implementation of {@link StateStore} backed by ZooKeeper.
*
* <p>
*
* This implementation stores serialized {@link State}s as a blob in ZooKeeper in the Sequence file format.
* The ZK path is in the format /STORE_ROOT_DIR/STORE_NAME/TABLE_NAME.
* State keys are state IDs (see {@link State#getId()}), and values are objects of {@link State} or
* any of its extensions. Keys will be empty strings if state IDs are not set
* (i.e., {@link State#getId()} returns <em>null</em>). In this case, the
* {@link ZkStateStore#get(String, String, String)} method may not work.
* </p>
*
* @param <T> state object type
**/
public class ZkStateStore<T extends State> implements StateStore<T> {
// Class of the state objects to be put into the store
private final Class<T> stateClass;
private final HelixPropertyStore<byte[]> propStore;
private final boolean compressedValues;
/**
* State store that stores instances of {@link State}s in a ZooKeeper-backed {@link HelixPropertyStore}
* storeRootDir will be created when the first entry is written if it does not exist
* @param connectString ZooKeeper connect string
* @param storeRootDir The root directory for the state store
* @param compressedValues should values be compressed for storage?
* @param stateClass The type of state being stored
* @throws IOException
*/
public ZkStateStore(String connectString, String storeRootDir, boolean compressedValues, Class<T> stateClass)
throws IOException {
this.compressedValues = compressedValues;
this.stateClass = stateClass;
ZkSerializer serializer = new ByteArraySerializer();
propStore = new ZkHelixPropertyStore<byte[]>(connectString, serializer, storeRootDir);
}
private String formPath(String storeName) {
return "/" + storeName;
}
private String formPath(String storeName, String tableName) {
return "/" + storeName + "/" + tableName;
}
@Override
public boolean create(String storeName) throws IOException {
String path = formPath(storeName);
return propStore.exists(path, 0) || propStore.create(path, ArrayUtils.EMPTY_BYTE_ARRAY,
AccessOption.PERSISTENT);
}
@Override
public boolean create(String storeName, String tableName) throws IOException {
String path = formPath(storeName, tableName);
if (propStore.exists(path, 0)) {
throw new IOException(String.format("State already exists for storeName %s tableName %s", storeName,
tableName));
}
return propStore.create(path, ArrayUtils.EMPTY_BYTE_ARRAY, AccessOption.PERSISTENT);
}
@Override
public boolean exists(String storeName, String tableName) throws IOException {
String path = formPath(storeName, tableName);
return propStore.exists(path, 0);
}
/**
* Serializes the state to the {@link DataOutput}
* @param dataOutput output target receiving the serialized data
* @param state the state to serialize
* @throws IOException
*/
private void addStateToDataOutputStream(DataOutput dataOutput, T state) throws IOException {
new Text(Strings.nullToEmpty(state.getId())).write(dataOutput);
state.write(dataOutput);
}
/**
* Create a new znode with data if it does not exist otherwise update with data
* @param storeName storeName portion of znode path
* @param tableName tableName portion of znode path
* @param data znode data
* @throws IOException
*/
private void putData(String storeName, String tableName, byte[] data) throws IOException {
String path = formPath(storeName, tableName);
if (!propStore.exists(path, 0)) {
// create with data
if (!propStore.create(path, data, AccessOption.PERSISTENT)) {
throw new IOException("Failed to create a state file for table " + tableName);
}
} else {
// Update
propStore.set(path, data, AccessOption.PERSISTENT);
}
}
@Override
public void put(String storeName, String tableName, T state) throws IOException {
putAll(storeName, tableName, Collections.singletonList(state));
}
@Override
public void putAll(String storeName, String tableName, Collection<T> states) throws IOException {
try (ByteArrayOutputStream byteArrayOs = new ByteArrayOutputStream();
OutputStream os = compressedValues ? new GZIPOutputStream(byteArrayOs) : byteArrayOs;
DataOutputStream dataOutput = new DataOutputStream(os)) {
for (T state : states) {
addStateToDataOutputStream(dataOutput, state);
}
dataOutput.close();
putData(storeName, tableName, byteArrayOs.toByteArray());
}
}
@Override
public T get(String storeName, String tableName, String stateId) throws IOException {
String path = formPath(storeName, tableName);
byte[] data = propStore.get(path, null, 0);
List<T> states = Lists.newArrayList();
deserialize(data, states, stateId);
if (states.isEmpty()) {
return null;
} else {
return states.get(0);
}
}
/**
* Retrieve states from the state store based on the store name and a filtering predicate
* @param storeName The store name enclosing the state files
* @param predicate The predicate for state file filtering
* @return list of states matching matching the predicate
* @throws IOException
*/
protected List<T> getAll(String storeName, Predicate<String> predicate) throws IOException {
List<T> states = Lists.newArrayList();
String path = formPath(storeName);
byte[] data;
List<String> children = propStore.getChildNames(path, 0);
if (children == null) {
return Collections.emptyList();
}
for (String c : children) {
if (predicate.apply(c)) {
data = propStore.get(path + "/" + c, null, 0);
deserialize(data, states);
}
}
return states;
}
@Override
public List<T> getAll(String storeName, String tableName) throws IOException {
List<T> states = Lists.newArrayList();
String path = formPath(storeName, tableName);
byte[] data = propStore.get(path, null, 0);
deserialize(data, states);
return states;
}
@Override
public List<T> getAll(String storeName) throws IOException {
return getAll(storeName, Predicates.<String>alwaysTrue());
}
@Override
public List<String> getTableNames(String storeName, Predicate<String> predicate) throws IOException {
List<String> names = Lists.newArrayList();
String path = formPath(storeName);
List<String> children = propStore.getChildNames(path, 0);
if (children != null) {
for (String c : children) {
if (predicate.apply(c)) {
names.add(c);
}
}
}
return names;
}
@Override
public void createAlias(String storeName, String original, String alias) throws IOException {
String pathOriginal = formPath(storeName, original);
byte[] data;
if (!propStore.exists(pathOriginal, 0)) {
throw new IOException(String.format("State does not exist for table %s", original));
}
data = propStore.get(pathOriginal, null, 0);
putData(storeName, alias, data);
}
@Override
public void delete(String storeName, String tableName) throws IOException {
propStore.remove(formPath(storeName, tableName), 0);
}
@Override
public void delete(String storeName) throws IOException {
propStore.remove(formPath(storeName), 0);
}
/**
* Deserialize data into a list of {@link State}s.
* @param data byte array
* @param states output list of states
* @param stateId optional key filter. Set to null for no filtering.
* @throws IOException
*/
private void deserialize(byte[] data, List<T> states, String stateId) throws IOException {
if (data != null) {
Text key = new Text();
try (ByteArrayInputStream bais = new ByteArrayInputStream(data);
InputStream is = StreamUtils.isCompressed(data) ? new GZIPInputStream(bais) : bais;
DataInputStream dis = new DataInputStream(is)){
// keep deserializing while we have data
while (dis.available() > 0) {
T state = this.stateClass.newInstance();
key.readFields(dis);
state.readFields(dis);
states.add(state);
if (stateId != null && key.toString().equals(stateId)) {
return;
}
}
} catch (EOFException e) {
// no more data. GZIPInputStream.available() doesn't return 0 until after EOF.
} catch (RuntimeException e) {
throw e;
} catch (Exception e) {
throw new IOException("failure deserializing state from ZkStateStore", e);
}
}
}
/**
* Deserialize data into a list of {@link State}s.
* @param data byte array
* @param states output list of states
* @throws IOException
*/
private void deserialize(byte[] data, List<T> states) throws IOException {
deserialize(data, states, null);
}
}