/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package gobblin.kafka.tool;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericRecord;
import org.apache.kafka.common.serialization.Deserializer;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableMap;
import com.typesafe.config.Config;
import com.typesafe.config.ConfigFactory;
import kafka.consumer.Consumer;
import kafka.consumer.ConsumerConfig;
import kafka.consumer.ConsumerIterator;
import kafka.consumer.KafkaStream;
import kafka.javaapi.consumer.ConsumerConnector;
import kafka.message.MessageAndMetadata;
import lombok.extern.slf4j.Slf4j;
import gobblin.kafka.schemareg.KafkaSchemaRegistry;
import gobblin.kafka.schemareg.KafkaSchemaRegistryFactory;
import gobblin.kafka.serialize.LiAvroDeserializer;
import gobblin.kafka.serialize.MD5Digest;
/**
* A simple kafka consumer for debugging purposes.
*/
@Slf4j
public class SimpleKafkaConsumer {
private final ConsumerConnector consumer;
private final KafkaStream<byte[], byte[]> stream;
private final ConsumerIterator<byte[], byte[]> iterator;
private final String topic;
private final KafkaSchemaRegistry<MD5Digest, Schema> schemaRegistry;
private final Deserializer deserializer;
public SimpleKafkaConsumer(Properties props, KafkaCheckpoint checkpoint)
{
Config config = ConfigFactory.parseProperties(props);
topic = config.getString("topic");
String zkConnect = config.getString("zookeeper.connect");
schemaRegistry = KafkaSchemaRegistryFactory.getSchemaRegistry(props);
deserializer = new LiAvroDeserializer(schemaRegistry);
/** TODO: Make Confluent schema registry integration configurable
* HashMap<String, String> avroSerDeConfig = new HashMap<>();
* avroSerDeConfig.put("schema.registry.url", "http://localhost:8081");
* deserializer = new io.confluent.kafka.serializers.KafkaAvroDeserializer();
* deserializer.configure(avroSerDeConfig, false);
*
**/
Properties consumeProps = new Properties();
consumeProps.put("zookeeper.connect", zkConnect);
consumeProps.put("group.id", "gobblin-tool-" + System.nanoTime());
consumeProps.put("zookeeper.session.timeout.ms", "10000");
consumeProps.put("zookeeper.sync.time.ms", "10000");
consumeProps.put("auto.commit.interval.ms", "10000");
consumeProps.put("auto.offset.reset", "smallest");
consumeProps.put("auto.commit.enable", "false");
//consumeProps.put("consumer.timeout.ms", "10000");
consumer = Consumer.createJavaConsumerConnector(new ConsumerConfig(consumeProps));
Map<String, List<KafkaStream<byte[], byte[]>>> consumerMap = consumer.createMessageStreams(ImmutableMap.of(topic, 1));
List<KafkaStream<byte[], byte[]>> streams = consumerMap.get(this.topic);
stream = streams.get(0);
iterator = stream.iterator();
}
public void close()
{
consumer.shutdown();
}
public void shutdown()
{
close();
}
public static void main(String[] args)
throws IOException {
Preconditions.checkArgument(args.length>=1, "Usage: java " + SimpleKafkaConsumer.class.getName() + " <properties_file> <checkpoint_file>");
String fileName = args[0];
Properties props = new Properties();
props.load(new FileInputStream(new File(fileName)));
KafkaCheckpoint checkpoint = KafkaCheckpoint.emptyCheckpoint();
File checkpointFile = null;
if (args.length > 1)
{
try {
checkpointFile = new File(args[1]);
if (checkpointFile.exists()) {
FileInputStream fis = null;
try {
fis = new FileInputStream(checkpointFile);
checkpoint = KafkaCheckpoint.deserialize(fis);
} finally {
if (fis != null) fis.close();
}
} else {
log.info("Checkpoint doesn't exist, we will start with an empty one and store it here.");
}
}
catch (IOException e)
{
log.warn("Could not deserialize the previous checkpoint. Starting with empty", e);
if (!checkpoint.isEmpty())
{
checkpoint = KafkaCheckpoint.emptyCheckpoint();
}
}
}
final SimpleKafkaConsumer consumer = new SimpleKafkaConsumer(props, checkpoint);
Runtime.getRuntime().addShutdownHook(new Thread() {
@Override
public void run()
{
log.info("Shutting down...");
consumer.shutdown();
}
});
consumer.printLoop(checkpoint, checkpointFile);
}
private void printLoop(KafkaCheckpoint checkpoint, File checkpointFile)
throws IOException {
boolean storeCheckpoints = (checkpointFile != null);
if (storeCheckpoints)
{
boolean newFileCreated = checkpointFile.createNewFile();
if (newFileCreated) {
log.info("Created new checkpoint file: " + checkpointFile.getAbsolutePath());
}
}
while (true)
{
MessageAndMetadata<byte[], byte[]> messagePlusMeta;
try {
if (!iterator.hasNext()) {
return;
}
messagePlusMeta = iterator.next();
if (messagePlusMeta!=null) {
byte[] payload = messagePlusMeta.message();
System.out.println("Got a message of size " + payload.length + " bytes");
GenericRecord record = (GenericRecord) deserializer.deserialize(topic, payload);
System.out.println(record.toString());
checkpoint.update(messagePlusMeta.partition(), messagePlusMeta.offset());
}
}
catch (RuntimeException e)
{
log.warn("Error detected", e);
}
finally
{
if (storeCheckpoints) {
if (checkpoint != KafkaCheckpoint.emptyCheckpoint()) {
System.out.println("Storing checkpoint to file: " + checkpointFile.getAbsolutePath());
KafkaCheckpoint.serialize(checkpoint, checkpointFile);
}
}
}
}
}
}