/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zookeeper.server;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import java.util.concurrent.locks.ReentrantReadWriteLock.ReadLock;
import java.util.concurrent.locks.ReentrantReadWriteLock.WriteLock;
import org.apache.cassandra.db.RowMutation;
import org.apache.cassandra.db.commitlog.CommitLog;
import org.apache.cassandra.db.commitlog.MyRowMutationReplayer;
import org.apache.cassandra.net.MessagingService;
import org.apache.cassandra.service.CassandraDaemon;
import org.apache.jute.BinaryOutputArchive;
import org.apache.jute.InputArchive;
import org.apache.jute.OutputArchive;
import org.apache.jute.Record;
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.KeeperException.NoNodeException;
import org.apache.zookeeper.Watcher;
import org.apache.zookeeper.ZooDefs.OpCode;
import org.apache.zookeeper.data.ACL;
import org.apache.zookeeper.data.Stat;
import org.apache.zookeeper.server.DataTree.ProcessTxnResult;
import org.apache.zookeeper.server.persistence.FileTxnSnapLog;
import org.apache.zookeeper.server.persistence.FileTxnSnapLog.PlayBackListener;
import org.apache.zookeeper.server.quorum.Leader;
import org.apache.zookeeper.server.quorum.Leader.Proposal;
import org.apache.zookeeper.server.quorum.QuorumPacket;
import org.apache.zookeeper.server.util.SerializeUtils;
import org.apache.zookeeper.txn.CreateTxn;
import org.apache.zookeeper.txn.TxnHeader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* This class maintains the in memory database of zookeeper server states that
* includes the sessions, datatree and the committed logs. It is booted up after
* reading the logs and snapshots from the disk.
*/
public class ZKDatabase {
private static final Logger LOG = LoggerFactory.getLogger(ZKDatabase.class);
/**
* make sure on a clear you take care of all these members.
*/
protected DataTree dataTree;
protected ConcurrentHashMap<Long, Integer> sessionsWithTimeouts;
protected FileTxnSnapLog snapLog;
protected long minCommittedLog, maxCommittedLog;
public static final int commitLogCount = 500;
protected static int commitLogBuffer = 700;
protected LinkedList<Proposal> committedLog = new LinkedList<Proposal>();
protected ReentrantReadWriteLock logLock = new ReentrantReadWriteLock();
volatile private boolean initialized = false;
/**
* the filetxnsnaplog that this zk database maps to. There is a one to one
* relationship between a filetxnsnaplog and zkdatabase.
*
* @param snapLog
* the FileTxnSnapLog mapping this zkdatabase
*/
public ZKDatabase(FileTxnSnapLog snapLog) {
dataTree = new DataTree();
sessionsWithTimeouts = new ConcurrentHashMap<Long, Integer>();
this.snapLog = snapLog;
}
/**
* checks to see if the zk database has been initialized or not.
*
* @return true if zk database is initialized and false if not
*/
public boolean isInitialized() {
return initialized;
}
/**
* clear the zkdatabase. Note to developers - be careful to see that the
* clear method does clear out all the data structures in zkdatabase.
*/
public void clear() {
minCommittedLog = 0;
maxCommittedLog = 0;
/*
* to be safe we just create a new datatree.
*/
dataTree = new DataTree();
sessionsWithTimeouts.clear();
WriteLock lock = logLock.writeLock();
try {
lock.lock();
committedLog.clear();
} finally {
lock.unlock();
}
initialized = false;
}
/**
* the datatree for this zkdatabase
*
* @return the datatree for this zkdatabase
*/
public DataTree getDataTree() {
return this.dataTree;
}
/**
* the committed log for this zk database
*
* @return the committed log for this zkdatabase
*/
public long getmaxCommittedLog() {
return maxCommittedLog;
}
/**
* the minimum committed transaction log available in memory
*
* @return the minimum committed transaction log available in memory
*/
public long getminCommittedLog() {
return minCommittedLog;
}
/**
* Get the lock that controls the committedLog. If you want to get the
* pointer to the committedLog, you need to use this lock to acquire a read
* lock before calling getCommittedLog()
*
* @return the lock that controls the committed log
*/
public ReentrantReadWriteLock getLogLock() {
return logLock;
}
public synchronized LinkedList<Proposal> getCommittedLog() {
ReadLock rl = logLock.readLock();
// only make a copy if this thread isn't already holding a lock
if (logLock.getReadHoldCount() <= 0) {
try {
rl.lock();
return new LinkedList<Proposal>(this.committedLog);
} finally {
rl.unlock();
}
}
return this.committedLog;
}
/**
* get the last processed zxid from a datatree
*
* @return the last processed zxid of a datatree
*/
public long getDataTreeLastProcessedZxid() {
return dataTree.lastProcessedZxid;
}
/**
* set the datatree initialized or not
*
* @param b
* set the datatree initialized to b
*/
public void setDataTreeInit(boolean b) {
dataTree.initialized = b;
}
/**
* return the sessions in the datatree
*
* @return the data tree sessions
*/
public Collection<Long> getSessions() {
return dataTree.getSessions();
}
/**
* get sessions with timeouts
*
* @return the hashmap of sessions with timeouts
*/
public ConcurrentHashMap<Long, Integer> getSessionWithTimeOuts() {
return sessionsWithTimeouts;
}
/**
* load the database from the disk onto memory and also add the transactions
* to the committedlog in memory.
*
* @return the last valid zxid on disk
* @throws IOException
*/
public long loadDataBase() throws IOException {
PlayBackListener listener = new PlayBackListener() {
public void onTxnLoaded(TxnHeader hdr, Record txn) {
Request r = new Request(null, 0, hdr.getCxid(), hdr.getType(),
null, null);
r.txn = txn;
r.hdr = hdr;
r.zxid = hdr.getZxid();
addCommittedProposal(r);
}
};
long zxid = snapLog.restore(dataTree, sessionsWithTimeouts, listener);
initialized = true;
return zxid;
}
/**
* maintains a list of last <i>committedLog</i> or so committed requests.
* This is used for fast follower synchronization.
*
* @param request
* committed request
*/
public void addCommittedProposal(Request request) {
WriteLock wl = logLock.writeLock();
try {
wl.lock();
if (committedLog.size() > commitLogCount) {
committedLog.removeFirst();
minCommittedLog = committedLog.getFirst().packet.getZxid();
}
if (committedLog.size() == 0) {
minCommittedLog = request.zxid;
maxCommittedLog = request.zxid;
}
ByteArrayOutputStream baos = new ByteArrayOutputStream();
BinaryOutputArchive boa = BinaryOutputArchive.getArchive(baos);
try {
request.hdr.serialize(boa, "hdr");
if (request.txn != null) {
request.txn.serialize(boa, "txn");
}
baos.close();
} catch (IOException e) {
LOG.error("This really should be impossible", e);
}
QuorumPacket pp = new QuorumPacket(Leader.PROPOSAL, request.zxid,
baos.toByteArray(), null);
/*
* pgaref - Follower packet -> I should use this Method to get
* commited transactions!
*/
if ((request.sessionId == 2285l)
&& (CassandraDaemon.ZooServer.getServerState()
.equalsIgnoreCase("FOLLOWING"))
&& (request.type == OpCode.create)) {
// Is my create request here?
LOG.debug("PGAREF ZKDATABASE Follower : "
+ CassandraDaemon.ZooServer.getServerState()
.equalsIgnoreCase("FOLLOWING")
+ " request bb? : " + new String(pp.getData())
+ " Create OpCode? : "
+ (request.type == OpCode.create));
TxnHeader hdr = new TxnHeader();
Record txn = null;
try {
txn = SerializeUtils.deserializeTxn(pp.getData(), hdr);
} catch (IOException e) {
LOG.error("De - Serialization Error");
}
LOG.debug("------------------------> pgaref Deserialising..... ");// +
// new
// String(((CreateTxn) // txn).getData()));
// Deserialize and....
ByteArrayInputStream bInput = new ByteArrayInputStream(
((CreateTxn) txn).getData());
DataInputStream in = new DataInputStream(bInput);
//fix Local counter counter - Parse Path after /cassandra!!!
CommitLog.log_count = Long.parseLong(((CreateTxn) txn).getPath().substring(10));
/* pgaref FOR TESTING ONLY PUT IN COMMENTS!!
try {
final RowMutation tmp = RowMutation.serializer.deserialize(
in, getVersion());
LOG.debug("pgaref >>>>>> ROW : "+ tmp.toString());
// LOG.info(String.format("replaying mutation for %s.%s: %s",
// tmp.getKeyspaceName(),
// ByteBufferUtil.bytesToHex(tmp.key()), "{" +
// StringUtils.join(tmp.getColumnFamilies().iterator(),
// ", ")
// + "}"));
MyRowMutationReplayer recovery = new MyRowMutationReplayer();
recovery.recover(tmp);
recovery.blockForWrites();
CommitLog.instance.add(tmp);
} catch (IOException e) {
LOG.error("pgaref - Deserialization FAILED!");
}******* ENDS HERE!! */
/* pgaref - Now I Have to Clean Previous Znode NOW!!!! */
if(CommitLog.log_count > 1L){
//Its the first Znode!
long tmp = (CommitLog.log_count - 1L);
try {
org.apache.cassandra.service.CassandraDaemon.ZooServer.delete("/cassandra"+String.format("%015d", tmp),tmp);
} catch (NoNodeException e) {
LOG.error("pgaref - CaZoo F Cannot delete previous Znode!!!" +tmp + " | "+ e.toString());
}
}
}
// Ends here!
Proposal p = new Proposal();
p.packet = pp;
p.request = request;
committedLog.add(p);
maxCommittedLog = p.packet.getZxid();
} finally {
wl.unlock();
}
}
protected static final String CUR_VER = System.getProperty(
"cassandra.version", "2.0");
protected static final Map<String, Integer> VERSION_MAP = new HashMap<String, Integer>() {
{
put("0.7", 1);
put("1.0", 3);
put("1.2", MessagingService.VERSION_12);
put("2.0", MessagingService.VERSION_20);
}
};
protected final int getVersion() {
return VERSION_MAP.get(CUR_VER);
}
/**
* remove a cnxn from the datatree
*
* @param cnxn
* the cnxn to remove from the datatree
*/
public void removeCnxn(ServerCnxn cnxn) {
dataTree.removeCnxn(cnxn);
}
/**
* kill a given session in the datatree
*
* @param sessionId
* the session id to be killed
* @param zxid
* the zxid of kill session transaction
*/
public void killSession(long sessionId, long zxid) {
dataTree.killSession(sessionId, zxid);
}
/**
* write a text dump of all the ephemerals in the datatree
*
* @param pwriter
* the output to write to
*/
public void dumpEphemerals(PrintWriter pwriter) {
dataTree.dumpEphemerals(pwriter);
}
/**
* the node count of the datatree
*
* @return the node count of datatree
*/
public int getNodeCount() {
return dataTree.getNodeCount();
}
/**
* the paths for ephemeral session id
*
* @param sessionId
* the session id for which paths match to
* @return the paths for a session id
*/
public HashSet<String> getEphemerals(long sessionId) {
return dataTree.getEphemerals(sessionId);
}
/**
* the last processed zxid in the datatree
*
* @param zxid
* the last processed zxid in the datatree
*/
public void setlastProcessedZxid(long zxid) {
dataTree.lastProcessedZxid = zxid;
}
/**
* the process txn on the data
*
* @param hdr
* the txnheader for the txn
* @param txn
* the transaction that needs to be processed
* @return the result of processing the transaction on this
* datatree/zkdatabase
*/
public ProcessTxnResult processTxn(TxnHeader hdr, Record txn) {
return dataTree.processTxn(hdr, txn);
}
/**
* stat the path
*
* @param path
* the path for which stat is to be done
* @param serverCnxn
* the servercnxn attached to this request
* @return the stat of this node
* @throws KeeperException.NoNodeException
*/
public Stat statNode(String path, ServerCnxn serverCnxn)
throws KeeperException.NoNodeException {
return dataTree.statNode(path, serverCnxn);
}
/**
* get the datanode for this path
*
* @param path
* the path to lookup
* @return the datanode for getting the path
*/
public DataNode getNode(String path) {
return dataTree.getNode(path);
}
/**
* convert from long to the acl entry
*
* @param aclL
* the long for which to get the acl
* @return the acl corresponding to this long entry
*/
public List<ACL> convertLong(Long aclL) {
return dataTree.convertLong(aclL);
}
/**
* get data and stat for a path
*
* @param path
* the path being queried
* @param stat
* the stat for this path
* @param watcher
* the watcher function
* @return
* @throws KeeperException.NoNodeException
*/
public byte[] getData(String path, Stat stat, Watcher watcher)
throws KeeperException.NoNodeException {
return dataTree.getData(path, stat, watcher);
}
/**
* set watches on the datatree
*
* @param relativeZxid
* the relative zxid that client has seen
* @param dataWatches
* the data watches the client wants to reset
* @param existWatches
* the exists watches the client wants to reset
* @param childWatches
* the child watches the client wants to reset
* @param watcher
* the watcher function
*/
public void setWatches(long relativeZxid, List<String> dataWatches,
List<String> existWatches, List<String> childWatches,
Watcher watcher) {
dataTree.setWatches(relativeZxid, dataWatches, existWatches,
childWatches, watcher);
}
/**
* get acl for a path
*
* @param path
* the path to query for acl
* @param stat
* the stat for the node
* @return the acl list for this path
* @throws NoNodeException
*/
public List<ACL> getACL(String path, Stat stat) throws NoNodeException {
return dataTree.getACL(path, stat);
}
/**
* get children list for this path
*
* @param path
* the path of the node
* @param stat
* the stat of the node
* @param watcher
* the watcher function for this path
* @return the list of children for this path
* @throws KeeperException.NoNodeException
*/
public List<String> getChildren(String path, Stat stat, Watcher watcher)
throws KeeperException.NoNodeException {
return dataTree.getChildren(path, stat, watcher);
}
/**
* check if the path is special or not
*
* @param path
* the input path
* @return true if path is special and false if not
*/
public boolean isSpecialPath(String path) {
return dataTree.isSpecialPath(path);
}
/**
* get the acl size of the datatree
*
* @return the acl size of the datatree
*/
public int getAclSize() {
return dataTree.longKeyMap.size();
}
/**
* Truncate the ZKDatabase to the specified zxid
*
* @param zxid
* the zxid to truncate zk database to
* @return true if the truncate is successful and false if not
* @throws IOException
*/
public boolean truncateLog(long zxid) throws IOException {
clear();
// truncate the log
boolean truncated = snapLog.truncateLog(zxid);
if (!truncated) {
return false;
}
loadDataBase();
return true;
}
/**
* deserialize a snapshot from an input archive
*
* @param ia
* the input archive you want to deserialize from
* @throws IOException
*/
public void deserializeSnapshot(InputArchive ia) throws IOException {
clear();
SerializeUtils.deserializeSnapshot(getDataTree(), ia,
getSessionWithTimeOuts());
initialized = true;
}
/**
* serialize the snapshot
*
* @param oa
* the output archive to which the snapshot needs to be
* serialized
* @throws IOException
* @throws InterruptedException
*/
public void serializeSnapshot(OutputArchive oa) throws IOException,
InterruptedException {
SerializeUtils.serializeSnapshot(getDataTree(), oa,
getSessionWithTimeOuts());
}
/**
* append to the underlying transaction log
*
* @param si
* the request to append
* @return true if the append was succesfull and false if not
*/
public boolean append(Request si) throws IOException {
return this.snapLog.append(si);
}
/**
* roll the underlying log
*/
public void rollLog() throws IOException {
this.snapLog.rollLog();
}
/**
* commit to the underlying transaction log
*
* @throws IOException
*/
public void commit() throws IOException {
this.snapLog.commit();
}
/**
* close this database. free the resources
*
* @throws IOException
*/
public void close() throws IOException {
this.snapLog.close();
}
}