package com.gigaspaces.storm; import java.util.logging.Logger; import storm.trident.TridentState; import storm.trident.TridentTopology; import storm.trident.operation.BaseFunction; import storm.trident.operation.TridentCollector; import storm.trident.operation.builtin.Count; import storm.trident.tuple.TridentTuple; import backtype.storm.Config; import backtype.storm.LocalCluster; import backtype.storm.StormSubmitter; import backtype.storm.tuple.Fields; import backtype.storm.tuple.Values; import com.gigaspaces.storm.spout.XAPConfig; import com.gigaspaces.storm.spout.XAPTridentSpout; import com.gigaspaces.storm.state.XAPState2; public class WordCountTopology { private static final Logger log=Logger.getLogger(WordCountTopology.class.getName()); public static void main(String[] args) throws Exception { if(args.length<3)throw new RuntimeException("requires 3 args: toponame, host, streamName [workerCnt]"); String topoName=args[0]; String xaphost=args[1]; String streamName=args[2]; int workerCnt=4; if(args.length>3)workerCnt=Integer.parseInt(args[3]); log.info(String.format("executing wordcount with %s %s %s",topoName,xaphost,streamName)); XAPConfig config=new XAPConfig(); config.setBatchSize(1000); config.setStreamName(streamName); config.setXapHost(xaphost); config.setFields("sentence"); config.setCollectStats(true); Config conf = new Config(); //conf.setDebug(true); XAPTridentSpout spout=new XAPTridentSpout(config); TridentTopology topology = new TridentTopology(); TridentState wordCounts = topology.newStream("spout1", spout) .each(new Fields("sentence"), new SplitLarge(6), new Fields("word")) .groupBy(new Fields("word")) .persistentAggregate(XAPState2.nonTransactional( String.format("jini://*/*/streamspace?locators=%s",xaphost), true), new Count(), new Fields("count")) ; if(args!=null && args.length > 0) { conf.setNumWorkers(workerCnt); StormSubmitter.submitTopology(topoName, conf, topology.build()); } else { conf.setMaxTaskParallelism(3); LocalCluster cluster = new LocalCluster(); cluster.submitTopology("word-count", conf, topology.build()); Thread.sleep(10000); cluster.shutdown(); } } } /** * Splits and filters out small words, and removes punctuation * * @author DeWayne * */ class SplitLarge extends BaseFunction { private int size; public SplitLarge(){} public SplitLarge(int size){ this.size=size; } @Override public void execute(TridentTuple tuple, TridentCollector collector) { for(String word: tuple.getString(0).split("[ ;,:\\?\\-\\\"\\!\\.\\r()]+")) { if(word.length() > size) { collector.emit(new Values(word.toLowerCase())); } } } }