/*
* Sifarish: Recommendation Engine
* Author: Pranab Ghosh
*
* Licensed under the Apache License, Version 2.0 (the "License"); you
* may not use this file except in compliance with the License. You may
* obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
package org.sifarish.realtime;
import java.util.List;
import java.util.Map;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.chombo.storm.GenericBolt;
import org.chombo.storm.MessageHolder;
import org.chombo.util.ConfigUtility;
import org.hoidla.stream.BaseCountSketch;
import org.hoidla.stream.CountMinSketch;
import org.hoidla.stream.CountMinSketchesFrequent;
import org.hoidla.util.BoundedSortedObjects;
import org.hoidla.util.DailySchedule;
import org.hoidla.util.Expirer;
import org.hoidla.util.SimpleObjectCounter;
import org.hoidla.util.Utility;
import backtype.storm.Config;
import backtype.storm.task.TopologyContext;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;
/**
* Maintain a count min sketch
* @author pranab
*
*/
public class TrendingSketchesBolt extends GenericBolt {
private int tickFrequencyInSeconds;
private CountMinSketchesFrequent sketches;
private DailySchedule dailySchedule;
private MessageHolder msg = new MessageHolder();
public static final String BOLT_ID = "boltID";
public static final String FREQ_COUNTS = "freqCounts";
enum ExpiryPolicy {
None,
Epoch,
Tumble
}
private ExpiryPolicy expiryPolicy = ExpiryPolicy.None;
private int tumbleTimeHour;
private int ticksPerEpoch;
private long tickCount;
private static final Logger LOG = Logger.getLogger(TrendingSketchesBolt.class);
private static final long serialVersionUID = 8844719835097201335L;
/**
* @param tickFrequencyInSeconds
*/
public TrendingSketchesBolt(int tickFrequencyInSeconds) {
super();
this.tickFrequencyInSeconds = tickFrequencyInSeconds;
}
@Override
public Map<String, Object> getComponentConfiguration() {
Config conf = new Config();
conf.put(Config.TOPOLOGY_TICK_TUPLE_FREQ_SECS, tickFrequencyInSeconds);
return conf;
}
@Override
public void intialize(Map stormConf, TopologyContext context) {
if (debugOn) {
LOG.setLevel(Level.INFO);
LOG.info("TrendingSketchesBolt intialization " );
}
//sketches object
Expirer expirer = null;
double errorLimit = ConfigUtility.getDouble(stormConf, "sketches.error.lim", 0.05);
double errorProbLimit =ConfigUtility.getDouble(stormConf, "sketches.error.prob.limit", 0.02);
int mostFrequentCount = ConfigUtility.getInt(stormConf, "sketches.most.freq.count", 3);
int freqCountLimitPercent = ConfigUtility.getInt(stormConf, "sketches.freq.count.lim.percent", 20);
String expiry = ConfigUtility.getString(stormConf, "sketches.expiry.policy", "none");
if (expiry.equals("epoch")) {
int maxEpoch = ConfigUtility.getInt(stormConf, "sketches.max.epoch", 5);
expirer = new Expirer(maxEpoch);
expiryPolicy = ExpiryPolicy.Epoch;
ticksPerEpoch = ConfigUtility.getInt(stormConf, "sketches.epoch.size", 30) / tickFrequencyInSeconds;
LOG.info("ticksPerEpoch:" + ticksPerEpoch);
} else if (expiry.equals("tumble")) {
int[] tumbleTimeMin = ConfigUtility.getIntArray(stormConf, "sketches.tumble.time.min");
expiryPolicy = ExpiryPolicy.Tumble;
dailySchedule = new DailySchedule(2*tickFrequencyInSeconds, tumbleTimeMin);
}
sketches = expirer == null ?
new CountMinSketchesFrequent(errorLimit, errorProbLimit, mostFrequentCount, freqCountLimitPercent) :
new CountMinSketchesFrequent(errorLimit, errorProbLimit, mostFrequentCount, freqCountLimitPercent, expirer);
boolean globalTotalCount = ConfigUtility.getBoolean(stormConf, "sketches.global.total.count", false);
sketches.withGlobalTotalCount(globalTotalCount);
debugOn = ConfigUtility.getBoolean(stormConf,"debug.on", false);
}
@Override
public boolean process(Tuple input) {
boolean status = true;
outputMessages.clear();
if (isTickTuple(input)) {
LOG.info("got tick tuple ");
++tickCount;
if (expiryPolicy == ExpiryPolicy.Epoch) {
if (tickCount % ticksPerEpoch == 0) {
LOG.info("going to expire sketches");
sketches.expire();
sketches.refreshCount();
}
} else if (expiryPolicy == ExpiryPolicy.Tumble) {
if (dailySchedule.shouldTrigger()) {
sketches.intialize();
}
}
List<BoundedSortedObjects.SortableObject> topHitters = sketches.get();
if (!topHitters.isEmpty()) {
//send top hitters
LOG.info("sending top hitters to down stream bolt");
String serFreqCounts = Utility.join(topHitters, ":");
msg.setMessage( new Values(getID(), serFreqCounts));
outputMessages.add(msg);
}
} else {
String itemID = input.getStringByField(RecommenderBolt.ITEM_ID);
LOG.info("got message tuple ");
sketches.add(itemID);
}
return status;
}
@Override
public List<MessageHolder> getOutput() {
return outputMessages;
}
}