/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.confluent.examples.streams;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.common.serialization.Serdes;
import org.apache.kafka.streams.KafkaStreams;
import org.apache.kafka.streams.KeyValue;
import org.apache.kafka.streams.StreamsConfig;
import org.apache.kafka.streams.kstream.KStreamBuilder;
import org.apache.kafka.streams.kstream.SessionWindows;
import java.util.Collections;
import java.util.Map;
import java.util.Properties;
import java.util.concurrent.TimeUnit;
import io.confluent.examples.streams.avro.PlayEvent;
import io.confluent.kafka.serializers.AbstractKafkaAvroSerDeConfig;
import io.confluent.kafka.streams.serdes.avro.SpecificAvroSerde;
/**
* Demonstrates counting user activity (play-events) into Session Windows
* <p>
* In this example we count play-events by session. We define a session as events
* received by a user that all fall within a specified gap of inactivity. In this case,
* 30 minutes. The sessions are constantly aggregated into the StateStore "play-events-per-session",
* they are also output to a topic with the same name.
* <p>
* <br>
* HOW TO RUN THIS EXAMPLE
* <p>
* 1) Start Zookeeper, Kafka, and Confluent Schema Registry. Please refer to <a href='http://docs.confluent.io/current/quickstart.html#quickstart'>QuickStart</a>.
* <p>
* 2) Create the input/intermediate/output topics used by this example.
* <pre>
* {@code
* $ bin/kafka-topics --create --topic play-events \
* --zookeeper localhost:2181 --partitions 1 --replication-factor 1
* $ bin/kafka-topics --create --topic play-events-per-session \
* --zookeeper localhost:2181 --partitions 1 --replication-factor 1
* }</pre>
* Note: The above commands are for the Confluent Platform. For Apache Kafka it should be
* `bin/kafka-topics.sh ...`.
* <p>
* 3) Start this example application either in your IDE or on the command line.
* <p>
* If via the command line please refer to <a href='https://github.com/confluentinc/examples/tree/master/kafka-streams#packaging-and-running'>Packaging</a>.
* Once packaged you can then run:
* <pre>
* {@code
* $ java -cp target/streams-examples-3.3.0-SNAPSHOT-standalone.jar io.confluent.examples.streams.SessionWindowsExample
* }</pre>
* 4) Write some input data to the source topics (e.g. via {@link SessionWindowsExampleDriver}). The
* already running example application (step 3) will automatically process this input data and write
* the results to the output topic.
* <pre>
* {@code
* # Here: Write input data using the example driver. The driver will also consume, and print, the data from the output
* topic. The driver will stop when it has received all output records
* $ java -cp target/streams-examples-3.3.0-SNAPSHOT-standalone.jar io.confluent.examples.streams.SessionWindowsExampleDriver
* }</pre>
* You should see output data similar to:
* <pre>
* {@code
* jo@1484823406597->1484823406597 = 1 # new session for jo created
* bill@1484823466597->1484823466597 = 1 # new session for bill created
* sarah@1484823526597->1484823526597 = 1 # new session for sarah created
* jo@1484825207597->1484825207597 = 1 # new session for jo created as event time is after inactivity gap
* bill@1484823466597->1484825206597 = 2 # extend previous session for bill as event time is within inactivity gap
* sarah@1484827006597->1484827006597 = 1 # new session for sarah created as event time is after inactivity gap
* jo@1484823406597->1484825207597 = 3 # new event merges 2 previous sessions for jo
* bill@1484828806597->1484828806597 = 1 # new session for bill created
* sarah@1484827006597->1484827186597 = 2 # extend session for sarah as event time is within inactivity gap
* }
* </pre>
* <p>
* 5) Once you're done with your experiments, you can stop this example via {@code Ctrl-C}. If needed,
* also stop the Confluent Schema Registry ({@code Ctrl-C}), then stop the Kafka broker ({@code Ctrl-C}), and
* only then stop the ZooKeeper instance ({@code Ctrl-C}).
* <p>
* You can also take a look at io.confluent.examples.streams.SessionWindowsExampleTest for an example
* of the expected outputs.
*/
public class SessionWindowsExample {
static final String PLAY_EVENTS = "play-events";
static final Long INACTIVITY_GAP = TimeUnit.MINUTES.toMillis(30);
static final String PLAY_EVENTS_PER_SESSION = "play-events-per-session";
public static void main(String[] args) {
final String bootstrapServers = args.length > 0 ? args[0] : "localhost:9092";
final String schemaRegistryUrl = args.length > 1 ? args[1] : "http://localhost:8081";
final KafkaStreams streams = createStreams(bootstrapServers,
schemaRegistryUrl,
"/tmp/kafka-streams");
// Always (and unconditionally) clean local state prior to starting the processing topology.
// We opt for this unconditional call here because this will make it easier for you to play around with the example
// when resetting the application for doing a re-run (via the Application Reset Tool,
// http://docs.confluent.io/current/streams/developer-guide.html#application-reset-tool).
//
// The drawback of cleaning up local state prior is that your app must rebuilt its local state from scratch, which
// will take time and will require reading all the state-relevant data from the Kafka cluster over the network.
// Thus in a production scenario you typically do not want to clean up always as we do here but rather only when it
// is truly needed, i.e., only under certain conditions (e.g., the presence of a command line flag for your app).
// See `ApplicationResetExample.java` for a production-like example.
streams.cleanUp();
streams.start();
// Add shutdown hook to respond to SIGTERM and gracefully close Kafka Streams
Runtime.getRuntime().addShutdownHook(new Thread(streams::close));
}
static KafkaStreams createStreams(final String bootstrapServers,
final String schemaRegistryUrl,
final String stateDir) {
final Properties config = new Properties();
// Give the Streams application a unique name. The name must be unique in the Kafka cluster
// against which the application is run.
config.put(StreamsConfig.APPLICATION_ID_CONFIG, "session-windows-example");
// Where to find Kafka broker(s).
config.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
config.put(StreamsConfig.STATE_DIR_CONFIG, stateDir);
// Set to earliest so we don't miss any data that arrived in the topics before the process
// started
config.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
// disable caching to see session merging
config.put(StreamsConfig.CACHE_MAX_BYTES_BUFFERING_CONFIG, 0);
// create and configure the SpecificAvroSerdes required in this example
final SpecificAvroSerde<PlayEvent> playEventSerde = new SpecificAvroSerde<>();
final Map<String, String> serdeConfig = Collections.singletonMap(
AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, schemaRegistryUrl);
playEventSerde.configure(serdeConfig, false);
final KStreamBuilder builder = new KStreamBuilder();
builder.stream(Serdes.String(), playEventSerde, PLAY_EVENTS)
// group by key so we can count by session windows
.groupByKey(Serdes.String(), playEventSerde)
// count play events per session
.count(SessionWindows.with(INACTIVITY_GAP), PLAY_EVENTS_PER_SESSION)
// convert to a stream so we can map the key to a string
.toStream()
// map key to a readable string
.map((key, value) -> new KeyValue<>(key.key() + "@" + key.window().start() + "->" + key.window().end(), value))
// write to play-events-per-session topic
.to(Serdes.String(), Serdes.Long(), PLAY_EVENTS_PER_SESSION);
return new KafkaStreams(builder, new StreamsConfig(config));
}
}