/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package gobblin.kafka.client;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentMap;
import java.util.regex.Pattern;
import kafka.api.PartitionFetchInfo;
import kafka.api.PartitionOffsetRequestInfo;
import kafka.common.TopicAndPartition;
import kafka.javaapi.FetchRequest;
import kafka.javaapi.FetchResponse;
import kafka.javaapi.OffsetRequest;
import kafka.javaapi.OffsetResponse;
import kafka.javaapi.PartitionMetadata;
import kafka.javaapi.TopicMetadata;
import kafka.javaapi.TopicMetadataRequest;
import kafka.javaapi.consumer.SimpleConsumer;
import kafka.javaapi.message.ByteBufferMessageSet;
import kafka.message.MessageAndOffset;
import lombok.extern.slf4j.Slf4j;
import com.google.common.base.Function;
import com.google.common.base.Splitter;
import com.google.common.collect.Iterators;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.net.HostAndPort;
import com.typesafe.config.Config;
import gobblin.source.extractor.extract.kafka.KafkaOffsetRetrievalFailureException;
import gobblin.source.extractor.extract.kafka.KafkaPartition;
import gobblin.source.extractor.extract.kafka.KafkaTopic;
import gobblin.util.ConfigUtils;
import gobblin.util.DatasetFilterUtils;
/**
* A {@link GobblinKafkaConsumerClient} that uses kafka 08 scala consumer client. All the code has been moved from the
* legacy gobblin.source.extractor.extract.kafka.KafkaWrapper's KafkaOldApi
*/
@Slf4j
public class Kafka08ConsumerClient extends AbstractBaseKafkaConsumerClient {
public static final String CONFIG_PREFIX = AbstractBaseKafkaConsumerClient.CONFIG_PREFIX;
public static final String CONFIG_KAFKA_BUFFER_SIZE_BYTES = CONFIG_PREFIX + "bufferSizeBytes";
public static final int CONFIG_KAFKA_BUFFER_SIZE_BYTES_DEFAULT = 1024 * 1024; // 1MB
public static final String CONFIG_KAFKA_CLIENT_NAME = CONFIG_PREFIX + "clientName";
public static final String CONFIG_KAFKA_CLIENT_NAME_DEFAULT = "gobblin-kafka";
public static final String CONFIG_KAFKA_FETCH_REQUEST_CORRELATION_ID = CONFIG_PREFIX + "fetchCorrelationId";
private static final int CONFIG_KAFKA_FETCH_REQUEST_CORRELATION_ID_DEFAULT = -1;
public static final String CONFIG_KAFKA_FETCH_TOPIC_NUM_TRIES = CONFIG_PREFIX + "fetchTopicNumTries";
private static final int CONFIG_KAFKA_FETCH_TOPIC_NUM_TRIES_DEFAULT = 3;
public static final String CONFIG_KAFKA_FETCH_OFFSET_NUM_TRIES = CONFIG_PREFIX + "fetchOffsetNumTries";
private static final int CONFIG_KAFKA_FETCH_OFFSET_NUM_TRIES_DEFAULT = 3;
private final int bufferSize;
private final String clientName;
private final int fetchCorrelationId;
private final int fetchTopicRetries;
private final int fetchOffsetRetries;
private final ConcurrentMap<String, SimpleConsumer> activeConsumers = Maps.newConcurrentMap();
private Kafka08ConsumerClient(Config config) {
super(config);
bufferSize = ConfigUtils.getInt(config, CONFIG_KAFKA_BUFFER_SIZE_BYTES, CONFIG_KAFKA_BUFFER_SIZE_BYTES_DEFAULT);
clientName = ConfigUtils.getString(config, CONFIG_KAFKA_CLIENT_NAME, CONFIG_KAFKA_CLIENT_NAME_DEFAULT);
fetchCorrelationId =
ConfigUtils.getInt(config, CONFIG_KAFKA_FETCH_REQUEST_CORRELATION_ID,
CONFIG_KAFKA_FETCH_REQUEST_CORRELATION_ID_DEFAULT);
fetchTopicRetries =
ConfigUtils.getInt(config, CONFIG_KAFKA_FETCH_TOPIC_NUM_TRIES, CONFIG_KAFKA_FETCH_TOPIC_NUM_TRIES_DEFAULT);
fetchOffsetRetries =
ConfigUtils.getInt(config, CONFIG_KAFKA_FETCH_OFFSET_NUM_TRIES, CONFIG_KAFKA_FETCH_OFFSET_NUM_TRIES_DEFAULT);
}
@Override
public List<KafkaTopic> getTopics() {
List<TopicMetadata> topicMetadataList = getFilteredMetadataList();
List<KafkaTopic> filteredTopics = Lists.newArrayList();
for (TopicMetadata topicMetadata : topicMetadataList) {
List<KafkaPartition> partitions = getPartitionsForTopic(topicMetadata);
filteredTopics.add(new KafkaTopic(topicMetadata.topic(), partitions));
}
return filteredTopics;
}
private List<KafkaPartition> getPartitionsForTopic(TopicMetadata topicMetadata) {
List<KafkaPartition> partitions = Lists.newArrayList();
for (PartitionMetadata partitionMetadata : topicMetadata.partitionsMetadata()) {
if (null == partitionMetadata) {
log.error("Ignoring topic with null partition metadata " + topicMetadata.topic());
return Collections.emptyList();
}
if (null == partitionMetadata.leader()) {
log.error("Ignoring topic with null partition leader " + topicMetadata.topic() + " metatada="
+ partitionMetadata);
return Collections.emptyList();
}
partitions.add(new KafkaPartition.Builder().withId(partitionMetadata.partitionId())
.withTopicName(topicMetadata.topic()).withLeaderId(partitionMetadata.leader().id())
.withLeaderHostAndPort(partitionMetadata.leader().host(), partitionMetadata.leader().port()).build());
}
return partitions;
}
private List<TopicMetadata> getFilteredMetadataList() {
//Try all brokers one by one, until successfully retrieved topic metadata (topicMetadataList is non-null)
for (String broker : this.brokers) {
List<TopicMetadata> filteredTopicMetadataList = fetchTopicMetadataFromBroker(broker);
if (filteredTopicMetadataList != null) {
return filteredTopicMetadataList;
}
}
throw new RuntimeException("Fetching topic metadata from all brokers failed. See log warning for more information.");
}
private List<TopicMetadata> fetchTopicMetadataFromBroker(String broker, String... selectedTopics) {
log.info(String.format("Fetching topic metadata from broker %s", broker));
SimpleConsumer consumer = null;
try {
consumer = getSimpleConsumer(broker);
for (int i = 0; i < this.fetchTopicRetries; i++) {
try {
return consumer.send(new TopicMetadataRequest(Arrays.asList(selectedTopics))).topicsMetadata();
} catch (Exception e) {
log.warn(String.format("Fetching topic metadata from broker %s has failed %d times.", broker, i + 1), e);
try {
Thread.sleep((long) ((i + Math.random()) * 1000));
} catch (InterruptedException e2) {
log.warn("Caught InterruptedException: " + e2);
}
}
}
} finally {
if (consumer != null) {
consumer.close();
}
}
return null;
}
private SimpleConsumer getSimpleConsumer(String broker) {
if (this.activeConsumers.containsKey(broker)) {
return this.activeConsumers.get(broker);
}
SimpleConsumer consumer = this.createSimpleConsumer(broker);
this.activeConsumers.putIfAbsent(broker, consumer);
return consumer;
}
private SimpleConsumer getSimpleConsumer(HostAndPort hostAndPort) {
return this.getSimpleConsumer(hostAndPort.toString());
}
private SimpleConsumer createSimpleConsumer(String broker) {
List<String> hostPort = Splitter.on(':').trimResults().omitEmptyStrings().splitToList(broker);
return createSimpleConsumer(hostPort.get(0), Integer.parseInt(hostPort.get(1)));
}
private SimpleConsumer createSimpleConsumer(String host, int port) {
return new SimpleConsumer(host, port, this.socketTimeoutMillis, this.bufferSize, this.clientName);
}
@Override
public long getEarliestOffset(KafkaPartition partition) throws KafkaOffsetRetrievalFailureException {
Map<TopicAndPartition, PartitionOffsetRequestInfo> offsetRequestInfo =
Collections.singletonMap(new TopicAndPartition(partition.getTopicName(), partition.getId()),
new PartitionOffsetRequestInfo(kafka.api.OffsetRequest.EarliestTime(), 1));
return getOffset(partition, offsetRequestInfo);
}
@Override
public long getLatestOffset(KafkaPartition partition) throws KafkaOffsetRetrievalFailureException {
Map<TopicAndPartition, PartitionOffsetRequestInfo> offsetRequestInfo =
Collections.singletonMap(new TopicAndPartition(partition.getTopicName(), partition.getId()),
new PartitionOffsetRequestInfo(kafka.api.OffsetRequest.LatestTime(), 1));
return getOffset(partition, offsetRequestInfo);
}
private long getOffset(KafkaPartition partition, Map<TopicAndPartition, PartitionOffsetRequestInfo> offsetRequestInfo)
throws KafkaOffsetRetrievalFailureException {
SimpleConsumer consumer = this.getSimpleConsumer(partition.getLeader().getHostAndPort());
for (int i = 0; i < this.fetchOffsetRetries; i++) {
try {
OffsetResponse offsetResponse =
consumer.getOffsetsBefore(new OffsetRequest(offsetRequestInfo, kafka.api.OffsetRequest.CurrentVersion(),
this.clientName));
if (offsetResponse.hasError()) {
throw new RuntimeException("offsetReponse has error: "
+ offsetResponse.errorCode(partition.getTopicName(), partition.getId()));
}
return offsetResponse.offsets(partition.getTopicName(), partition.getId())[0];
} catch (Exception e) {
log.warn(String.format("Fetching offset for partition %s has failed %d time(s). Reason: %s", partition, i + 1,
e));
if (i < this.fetchOffsetRetries - 1) {
try {
Thread.sleep((long) ((i + Math.random()) * 1000));
} catch (InterruptedException e2) {
log.error("Caught interrupted exception between retries of getting latest offsets. " + e2);
}
}
}
}
throw new KafkaOffsetRetrievalFailureException(String.format("Fetching offset for partition %s has failed.",
partition));
}
@Override
public Iterator<KafkaConsumerRecord> consume(KafkaPartition partition, long nextOffset, long maxOffset) {
if (nextOffset > maxOffset) {
return null;
}
FetchRequest fetchRequest = createFetchRequest(partition, nextOffset);
try {
FetchResponse fetchResponse = getFetchResponseForFetchRequest(fetchRequest, partition);
return getIteratorFromFetchResponse(fetchResponse, partition);
} catch (Exception e) {
log.warn(String.format(
"Fetch message buffer for partition %s has failed: %s. Will refresh topic metadata and retry", partition, e));
return refreshTopicMetadataAndRetryFetch(partition, fetchRequest);
}
}
private synchronized FetchResponse getFetchResponseForFetchRequest(FetchRequest fetchRequest, KafkaPartition partition) {
SimpleConsumer consumer = getSimpleConsumer(partition.getLeader().getHostAndPort());
FetchResponse fetchResponse = consumer.fetch(fetchRequest);
if (fetchResponse.hasError()) {
throw new RuntimeException(String.format("error code %d",
fetchResponse.errorCode(partition.getTopicName(), partition.getId())));
}
return fetchResponse;
}
private Iterator<KafkaConsumerRecord> getIteratorFromFetchResponse(FetchResponse fetchResponse, KafkaPartition partition) {
try {
ByteBufferMessageSet messageBuffer = fetchResponse.messageSet(partition.getTopicName(), partition.getId());
return Iterators.transform(messageBuffer.iterator(),
new Function<kafka.message.MessageAndOffset, KafkaConsumerRecord>() {
@Override
public KafkaConsumerRecord apply(kafka.message.MessageAndOffset input) {
return new Kafka08ConsumerRecord(input);
}
});
} catch (Exception e) {
log.warn(String.format("Failed to retrieve next message buffer for partition %s: %s."
+ "The remainder of this partition will be skipped.", partition, e));
return null;
}
}
private Iterator<KafkaConsumerRecord> refreshTopicMetadataAndRetryFetch(KafkaPartition partition,
FetchRequest fetchRequest) {
try {
refreshTopicMetadata(partition);
FetchResponse fetchResponse = getFetchResponseForFetchRequest(fetchRequest, partition);
return getIteratorFromFetchResponse(fetchResponse, partition);
} catch (Exception e) {
log.warn(String.format("Fetch message buffer for partition %s has failed: %s. This partition will be skipped.",
partition, e));
return null;
}
}
private void refreshTopicMetadata(KafkaPartition partition) {
for (String broker : this.brokers) {
List<TopicMetadata> topicMetadataList = fetchTopicMetadataFromBroker(broker, partition.getTopicName());
if (topicMetadataList != null && !topicMetadataList.isEmpty()) {
TopicMetadata topicMetadata = topicMetadataList.get(0);
for (PartitionMetadata partitionMetadata : topicMetadata.partitionsMetadata()) {
if (partitionMetadata.partitionId() == partition.getId()) {
partition.setLeader(partitionMetadata.leader().id(), partitionMetadata.leader().host(), partitionMetadata
.leader().port());
break;
}
}
break;
}
}
}
private FetchRequest createFetchRequest(KafkaPartition partition, long nextOffset) {
TopicAndPartition topicAndPartition = new TopicAndPartition(partition.getTopicName(), partition.getId());
PartitionFetchInfo partitionFetchInfo = new PartitionFetchInfo(nextOffset, this.bufferSize);
Map<TopicAndPartition, PartitionFetchInfo> fetchInfo =
Collections.singletonMap(topicAndPartition, partitionFetchInfo);
return new FetchRequest(this.fetchCorrelationId, this.clientName, this.fetchTimeoutMillis, this.fetchMinBytes,
fetchInfo);
}
@Override
public void close() throws IOException {
int numOfConsumersNotClosed = 0;
for (SimpleConsumer consumer : this.activeConsumers.values()) {
if (consumer != null) {
try {
consumer.close();
} catch (Exception e) {
log.warn(String.format("Failed to close Kafka Consumer %s:%d", consumer.host(), consumer.port()));
numOfConsumersNotClosed++;
}
}
}
this.activeConsumers.clear();
if (numOfConsumersNotClosed > 0) {
throw new IOException(numOfConsumersNotClosed + " consumer(s) failed to close.");
}
}
public static class Factory implements GobblinKafkaConsumerClientFactory {
@Override
public GobblinKafkaConsumerClient create(Config config) {
return new Kafka08ConsumerClient(config);
}
}
public static class Kafka08ConsumerRecord extends BaseKafkaConsumerRecord implements ByteArrayBasedKafkaRecord {
private final MessageAndOffset messageAndOffset;
public Kafka08ConsumerRecord(MessageAndOffset messageAndOffset) {
super(messageAndOffset.offset(), messageAndOffset.message().size());
this.messageAndOffset = messageAndOffset;
}
@Override
public byte[] getMessageBytes() {
return getBytes(this.messageAndOffset.message().payload());
}
@Override
public byte[] getKeyBytes() {
return getBytes(this.messageAndOffset.message().key());
}
private static byte[] getBytes(ByteBuffer buf) {
byte[] bytes = null;
if (buf != null) {
int size = buf.remaining();
bytes = new byte[size];
buf.get(bytes, buf.position(), size);
}
return bytes;
}
}
}