package io.confluent.examples.consumer;
import JavaSessionize.avro.LogLine;
import io.confluent.kafka.serializers.KafkaAvroDecoder;
import kafka.consumer.ConsumerConfig;
import kafka.consumer.ConsumerIterator;
import kafka.consumer.KafkaStream;
import kafka.javaapi.consumer.ConsumerConnector;
import kafka.message.MessageAndMetadata;
import kafka.serializer.StringDecoder;
import kafka.utils.VerifiableProperties;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.specific.SpecificData;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.ProducerRecord;
import java.util.HashMap;
import java.util.Map;
import java.util.Properties;
public class AvroClicksSessionizer {
private final ConsumerConnector consumer;
private final KafkaProducer<String, LogLine> producer;
private final String inputTopic;
private final String outputTopic;
private final String zookeeper;
private final String groupId;
private final String url;
private final Map<String, SessionState> state = new HashMap<String, SessionState>();
private final int sessionLengthMs;
private static AvroClicksSessionizer sessionizer;
public static void main(String[] args) {
if (args.length != 1) {
System.out.println("Please provide command line arguments: "
+ "schemaRegistryUrl");
System.exit(-1);
}
// currently hardcoding a lot of parameters, for simplicity
String zookeeper = "localhost:2181";
String groupId = "AvroClicksSessionizer";
String inputTopic = "clicks";
String outputTopic = "sessionized_clicks";
String url = args[0];
// Typically events are considered to be part of the same session if they are less than 30 minutes apart
// To make this example show interesting results sooner, we limit the interval to 5 seconds
int sessionLengthMs = 5*1000;
Runtime.getRuntime().addShutdownHook(new Thread() {
@Override
public void run() {
sessionizer.consumer.shutdown();
}
});
sessionizer = new AvroClicksSessionizer(zookeeper, groupId, inputTopic, outputTopic, url, sessionLengthMs);
sessionizer.run();
}
public AvroClicksSessionizer(String zookeeper, String groupId, String inputTopic, String outputTopic, String url, int sessionLengthMs) {
this.consumer = kafka.consumer.Consumer.createJavaConsumerConnector(
new ConsumerConfig(createConsumerConfig(zookeeper, groupId, url)));
this.producer = getProducer(url);
this.zookeeper = zookeeper;
this.groupId = groupId;
this.inputTopic = inputTopic;
this.outputTopic = outputTopic;
this.url = url;
this.sessionLengthMs = sessionLengthMs;
}
private Properties createConsumerConfig(String zookeeper, String groupId, String url) {
Properties props = new Properties();
props.put("zookeeper.connect", zookeeper);
props.put("group.id", groupId);
props.put("schema.registry.url", url);
props.put("specific.avro.reader", true);
// We configure the consumer to avoid committing offsets and to always start consuming from beginning of topic
// This is not a best practice, but we want the example consumer to show results when running it again and again
props.put("auto.commit.enable", "false");
props.put("auto.offset.reset", "smallest");
return props;
}
private void run() {
Map<String, Integer> topicCountMap = new HashMap<String, Integer>();
// Hard coding single threaded consumer
topicCountMap.put(inputTopic, 1);
Properties props = createConsumerConfig(zookeeper, groupId, url);
VerifiableProperties vProps = new VerifiableProperties(props);
// Create decoders for key and value
KafkaAvroDecoder avroDecoder = new KafkaAvroDecoder(vProps);
StringDecoder stringDecoder = new StringDecoder(new VerifiableProperties());
KafkaStream stream = consumer.createMessageStreams(topicCountMap, stringDecoder, avroDecoder).get(inputTopic).get(0);
ConsumerIterator it = stream.iterator();
System.out.println("Ready to start iterating wih properties: " + props.toString());
System.out.println("Reading topic:" + inputTopic);
while (it.hasNext()) {
MessageAndMetadata messageAndMetadata = it.next();
String ip = (String) messageAndMetadata.key();
// Once we release a new version of the avro deserializer that can return SpecificData, the deep copy will be unnecessary
GenericRecord genericEvent = (GenericRecord) messageAndMetadata.message();
LogLine event = (LogLine) SpecificData.get().deepCopy(LogLine.SCHEMA$, genericEvent);
SessionState oldState = state.get(ip);
int sessionId = 0;
if (oldState == null) {
state.put(ip, new SessionState(event.getTimestamp(), 0));
} else {
sessionId = oldState.getSessionId();
// if the old timestamp is more than 30 minutes older than new one, we have a new session
if (oldState.getLastConnection() < event.getTimestamp() - sessionLengthMs)
sessionId = sessionId + 1;
SessionState newState = new SessionState(event.getTimestamp(), sessionId);
state.put(ip, newState);
}
event.setSessionid(sessionId);
System.out.println(event.toString());
ProducerRecord<String, LogLine> record = new ProducerRecord<String, LogLine>(outputTopic, event.getIp().toString(), event);
producer.send(record);
}
}
private KafkaProducer<String, LogLine> getProducer(String url) {
Properties props = new Properties();
// hardcoding the Kafka server URI for this example
props.put("bootstrap.servers", "localhost:9092");
props.put("acks", "all");
props.put("retries", 0);
props.put("key.serializer", "io.confluent.kafka.serializers.KafkaAvroSerializer");
props.put("value.serializer", "io.confluent.kafka.serializers.KafkaAvroSerializer");
props.put("schema.registry.url", url);
KafkaProducer<String, LogLine> producer = new KafkaProducer<String, LogLine>(props);
return producer;
}
}