package com.jthink.skyeye.collector.task; import com.jthink.skyeye.collector.callback.KafkaOffsetCommitCallback; import com.jthink.skyeye.collector.configuration.es.EsProperties; import com.jthink.skyeye.collector.configuration.kafka.KafkaProperties; import com.jthink.skyeye.base.constant.Constants; import com.jthink.skyeye.base.dto.LogDto; import org.apache.kafka.clients.consumer.*; import org.apache.kafka.common.TopicPartition; import org.apache.kafka.common.errors.WakeupException; import org.elasticsearch.action.bulk.BulkRequestBuilder; import org.elasticsearch.client.transport.TransportClient; import org.elasticsearch.common.xcontent.XContentBuilder; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Component; import java.util.HashMap; import java.util.Map; import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; /** * JThink@JThink * * @author JThink * @version 0.0.1 * @desc kafka2es 建立index的task * @date 2016-09-20 10:25:13 */ @Component public class IndexerTask implements Task { private static final Logger LOGGER = LoggerFactory.getLogger(IndexerTask.class); @Autowired private KafkaConsumer kafkaConsumerApp; @Autowired private KafkaProperties kafkaProperties; @Autowired private TransportClient transportClient; @Autowired private EsProperties esProperties; public static Map<TopicPartition, OffsetAndMetadata> currentOffsets = new HashMap<TopicPartition, OffsetAndMetadata>(); private Thread thread; @Override public void run() { this.doTask(); } /** * 任务执行, 该消费kafka的策略有极小的可能性会丢失或者重复消费, 使用的是手动提交offset的方法, 确保不丢数据和重复消费需要将索引存储到第三方存储中, 并且需要写回滚机制 * 该方式在程序意外退出的情况下有可能会丢失到es的数据 */ @Override public void doTask() { this.thread = Thread.currentThread(); BulkRequestBuilder bulkRequest = transportClient.prepareBulk(); int count = 0; try { while (true) { ConsumerRecords<byte[], String> records = this.kafkaConsumerApp.poll(this.kafkaProperties.getPollTimeout()); if (!records.isEmpty()) { for (ConsumerRecord<byte[], String> record : records) { String value = record.value(); XContentBuilder source = this.buildXContentBuilder(value); if (source != null) { bulkRequest.add(transportClient.prepareIndex(this.esProperties.getIndex(), this.esProperties.getDoc()) .setSource(source)); } else { LOGGER.info("record transform error, {}", value); } currentOffsets.put(new TopicPartition(record.topic(), record.partition()), new OffsetAndMetadata(record.offset() + 1)); count++; if (count >= 1000) { // 当达到了1000触发向kafka提交offset kafkaConsumerApp.commitAsync(currentOffsets, new KafkaOffsetCommitCallback()); count = 0; } } int size = bulkRequest.numberOfActions(); if (size != 0) { bulkRequest.execute().actionGet(); } LOGGER.info("total record: {}, indexed {} records to es", records.count(), size); bulkRequest = transportClient.prepareBulk(); kafkaConsumerApp.commitAsync(currentOffsets, new KafkaOffsetCommitCallback()); } } } catch (WakeupException e) { // do not process, this is shutdown LOGGER.error("wakeup, start to shutdown, {}", e); } catch (Exception e) { LOGGER.error("process records error, {}", e); } finally { kafkaConsumerApp.commitSync(currentOffsets); LOGGER.info("finally commit the offset"); // 不需要主动调kafkaConsumer.close(), spring bean容器会调用 } } @Override public Thread executeThread() { return this.thread; } /** * 根据log字符串构造XContentBuilder * @param line * @return */ private XContentBuilder buildXContentBuilder(String line) { try { LogDto logDto = new LogDto(line); return jsonBuilder() .startObject() .field(Constants.DAY, logDto.getDay()) .field(Constants.TIME, logDto.getTime()) .field(Constants.NANOTIME, logDto.getNanoTime()) .field(Constants.CREATED, logDto.getCreated()) .field(Constants.APP, logDto.getApp()) .field(Constants.HOST, logDto.getHost()) .field(Constants.THREAD, logDto.getThread()) .field(Constants.LEVEL, logDto.getLevel()) .field(Constants.EVENT_TYPE, logDto.getEventType()) .field(Constants.PACK, logDto.getPack()) .field(Constants.CLAZZ, logDto.getClazz()) .field(Constants.LINE, logDto.getLine()) .field(Constants.MESSAGE_SMART, logDto.getMessageSmart()) .field(Constants.MESSAGE_MAX, logDto.getMessageMax()) .endObject(); } catch (Exception e) { return null; } } }