flume 读取kafka 数据
本文介绍flume读取kafka数据的方法
代码:
/*******************************************************************************
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*******************************************************************************/
package org.apache.flume.source.kafka;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import kafka.consumer.ConsumerIterator;
import kafka.consumer.KafkaStream;
import kafka.javaapi.consumer.ConsumerConnector;
import kafka.message.Message;
import kafka.message.MessageAndMetadata;
import org.apache.flume.*;
import org.apache.flume.conf.Configurable;
import org.apache.flume.conf.ConfigurationException;
import org.apache.flume.event.SimpleEvent;
import org.apache.flume.source.AbstractSource;
import org.apache.flume.source.SyslogParser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* A Source for Kafka which reads messages from kafka. I use this in company production environment
* and its performance is good. Over 100k messages per second can be read from kafka in one source.<p>
* <tt>zookeeper.connect: </tt> the zookeeper ip kafka use.<p>
* <tt>topic: </tt> the topic to read from kafka.<p>
* <tt>group.id: </tt> the groupid of consumer group.<p>
*/
public class KafkaSource extends AbstractSource implements Configurable, PollableSource {
private static final Logger log = LoggerFactory.getLogger(KafkaSource.class);
private ConsumerConnector consumer;
private ConsumerIterator<byte[], byte[]> it;
private String topic;
public Status process() throws EventDeliveryException {
List<Event> eventList = new ArrayList<Event>();
MessageAndMetadata<byte[],byte[]> message;
Event event;
Map<String, String> headers;
String strMessage;
try {
if(it.hasNext()) {
message = it.next();
event = new SimpleEvent();
headers = new HashMap<String, String>();
headers.put("timestamp", String.valueOf(System.currentTimeMillis()));
strMessage = String.valueOf(System.currentTimeMillis()) + "|" + new String(message.message());
log.debug("Message: {}", strMessage);
event.setBody(strMessage.getBytes());
//event.setBody(message.message());
event.setHeaders(headers);
eventList.add(event);
}
getChannelProcessor().processEventBatch(eventList);
return Status.READY;
} catch (Exception e) {
log.error("KafkaSource EXCEPTION, {}", e.getMessage());
return Status.BACKOFF;
}
}
public void configure(Context context) {
topic = context.getString("topic");
if(topic == null) {
throw new ConfigurationException("Kafka topic must be specified.");
}
try {
this.consumer = KafkaSourceUtil.getConsumer(context);
} catch (IOException e) {
log.error("IOException occur, {}", e.getMessage());
} catch (InterruptedException e) {
log.error("InterruptedException occur, {}", e.getMessage());
}
Map<String, Integer> topicCountMap = new HashMap<String, Integer>();
topicCountMap.put(topic, new Integer(1));
Map<String, List<KafkaStream<byte[], byte[]>>> consumerMap = consumer.createMessageStreams(topicCountMap);
if(consumerMap == null) {
throw new ConfigurationException("topicCountMap is null");
}
List<KafkaStream<byte[], byte[]>> topicList = consumerMap.get(topic);
if(topicList == null || topicList.isEmpty()) {
throw new ConfigurationException("topicList is null or empty");
}
KafkaStream<byte[], byte[]> stream = topicList.get(0);
it = stream.iterator();
}
@Override
public synchronized void stop() {
consumer.shutdown();
super.stop();
}
}
/*******************************************************************************
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*******************************************************************************/
package org.apache.flume.source.kafka;
import java.io.IOException;
import java.util.Map;
import java.util.Properties;
import com.google.common.collect.ImmutableMap;
import kafka.consumer.Consumer;
import kafka.consumer.ConsumerConfig;
import kafka.javaapi.consumer.ConsumerConnector;
import org.apache.flume.Context;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class KafkaSourceUtil {
private static final Logger log = LoggerFactory.getLogger(KafkaSourceUtil.class);
public static Properties getKafkaConfigProperties(Context context) {
log.info("context={}",context.toString());
Properties props = new Properties();
ImmutableMap<String, String> contextMap = context.getParameters();
for (Map.Entry<String,String> entry : contextMap.entrySet()) {
String key = entry.getKey();
if (!key.equals("type") && !key.equals("channel")) {
props.setProperty(entry.getKey(), entry.getValue());
log.info("key={},value={}", entry.getKey(), entry.getValue());
}
}
return props;
}
public static ConsumerConnector getConsumer(Context context) throws IOException, InterruptedException {
ConsumerConfig consumerConfig = new ConsumerConfig(getKafkaConfigProperties(context));
ConsumerConnector consumer = Consumer.createJavaConsumerConnector(consumerConfig);
return consumer;
}
}
配置文件:( /etc/flume/conf/flume-kafka-file.properties)
agent_log.sources = kafka0
agent_log.channels = ch0
agent_log.sinks = sink0
agent_log.sources.kafka0.channels = ch0
agent_log.sinks.sink0.channel = ch0
agent_log.sources.kafka0.type = org.apache.flume.source.kafka.KafkaSource
agent_log.sources.kafka0.zookeeper.connect = node3:2181,node4:2181,node5:2181
agent_log.sources.kafka0.topic = kkt-test-topic
agent_log.sources.kafka0.group.id= test
agent_log.channels.ch0.type = memory
agent_log.channels.ch0.capacity = 2048
agent_log.channels.ch0.transactionCapacity = 1000
agent_log.sinks.sink0.type=file_roll
agent_log.sinks.sink0.sink.directory=/data/flumeng/data/test
agent_log.sinks.sink0.sink.rollInterval=300
启动脚本:
sudo su -l -s /bin/bash flume -c '/usr/lib/flume/bin/flume-ng agent --conf /etc/flume/conf --conf-file /etc/flume/conf/flume-kafka-file.properties -name agent_log -Dflume.root.logger=INFO,console '
注意: 红色字体的功能是对原来数据增加时间戳
版本号 flume-1.4.0.2.1.1.0 + kafka2.8.0-0.8.0
參考资料:https://github.com/baniuyao/flume-kafka
编译用到的库:
flume-ng-configuration-1.4.0.2.1.1.0-385
flume-ng-core-1.4.0.2.1.1.0-385
flume-ng-sdk-1.4.0.2.1.1.0-385
flume-tools-1.4.0.2.1.1.0-385
guava-11.0.2
kafka_2.8.0-0.8.0
log4j-1.2.15
scala-compiler
scala-library
slf4j-api-1.6.1
slf4j-log4j12-1.6.1
zkclient-0.3
zookeeper-3.3.4
flume 读取kafka 数据的更多相关文章
- spark读取kafka数据 createStream和createDirectStream的区别
1.KafkaUtils.createDstream 构造函数为KafkaUtils.createDstream(ssc, [zk], [consumer group id], [per-topic, ...
- SparkStreaming直连方式读取kafka数据,使用MySQL保存偏移量
SparkStreaming直连方式读取kafka数据,使用MySQL保存偏移量 1. ScalikeJDBC 2.配置文件 3.导入依赖的jar包 4.源码测试 通过MySQL保存kafka的偏移量 ...
- Flume下读取kafka数据后再打把数据输出到kafka,利用拦截器解决topic覆盖问题
1:如果在一个Flume Agent中同时使用Kafka Source和Kafka Sink来处理events,便会遇到Kafka Topic覆盖问题,具体表现为,Kafka Source可以正常从指 ...
- 使用Flume消费Kafka数据到HDFS
1.概述 对于数据的转发,Kafka是一个不错的选择.Kafka能够装载数据到消息队列,然后等待其他业务场景去消费这些数据,Kafka的应用接口API非常的丰富,支持各种存储介质,例如HDFS.HBa ...
- 使用flume将kafka数据sink到HBase【转】
1. hbase sink介绍 1.1 HbaseSink 1.2 AsyncHbaseSink 2. 配置flume 3. 运行测试flume 4. 使用RegexHbaseEventSeriali ...
- flink 读取kafka 数据,partition分配
每个并发有个编号,只会读取kafka partition % 总并发数 == 编号 的分区 如: 6 分区, 4个并发 分区: p0 p1 p2 p3 p4 p5 并发: 0 1 2 3 ...
- Logstash读取Kafka数据写入HDFS详解
强大的功能,丰富的插件,让logstash在数据处理的行列中出类拔萃 通常日志数据除了要入ES提供实时展示和简单统计外,还需要写入大数据集群来提供更为深入的逻辑处理,前边几篇ELK的文章介绍过利用lo ...
- 使用spark-streaming实时读取Kafka数据统计结果存入MySQL
在这篇文章里,我们模拟了一个场景,实时分析订单数据,统计实时收益. 场景模拟 我试图覆盖工程上最为常用的一个场景: 1)首先,向Kafka里实时的写入订单数据,JSON格式,包含订单ID-订单类型-订 ...
- SparkStreaming python 读取kafka数据将结果输出到单个指定本地文件
# -*- coding: UTF-8 -*- #!/bin/env python3 # filename readFromKafkaStreamingGetLocation.py import IP ...
随机推荐
- webpack实战---安装操作
什么是webpack? 他有什么优点? 首先对于很多刚接触webpack人来说,肯定会问webpack是什么?它有什么优点?我们为什么要使用它? Webpack是前端一个工具,可以让各 ...
- android启动第一个界面时即闪屏的核心代码(两种方式)
闪屏,就是SplashScreen,也能够说是启动画面,就是启动的时候,闪(展示)一下,持续数秒后.自己主动关闭. 第一种方式: android的实现很easy,使用Handler对象的postDe ...
- Ubuntu: GlusterFS+HBase安装教程
HBase通常安装在Hadoop HDFS上,但也能够安装在其它实现了Hadoop文件接口的分布式文件系统上.如KFS. glusterfs是一个集群文件系统可扩展到几peta-bytes. 它集合了 ...
- Codeforces Round #274 (Div. 2) 解题报告
题目地址:http://codeforces.com/contest/479 这次自己又仅仅能做出4道题来. A题:Expression 水题. 枚举六种情况求最大值就可以. 代码例如以下: #inc ...
- java大数类,两个不超过20位都不为0的十进制字符串相乘,华为笔试题
import java.math.BigInteger; import java.util.*; import java.io.*; public class Main { public static ...
- CSS3个人盲点总结【总结中..........】
~:表示同辈元素之后指定类型的元素,如;elm1 ~ elm2表示,elm1之后的所有elm2元素,且elm1与elm2都是在同一个父级元素. +:表示同辈元素的兄弟元素. \A:一个空白换行符 &l ...
- Sql Server创建主键失败:CREATE UNIQUE INDEX 终止,因为发现对象名称 '[PPR_BasicInformation]' 和索引名称 '[PK_PPR_BasicInformation]' 有重复的键(E)
这种问题是由于主键设置了唯一性,而数据库中主键列的值又有重复的值,重复值为E,改掉其中一个值就可以了.
- Android 国际区号注册手机号编码 以及常用城市列表
附上 国际区号编码:我是定义到arrays.xml里面了 <?xml version="1.0" encoding="utf-8"?> <re ...
- Java文件(io)编程——文件字符流的使用
案例1: 读取一个文件并写入到另一个文件中,char[] 来中转. 首先要在E盘下创建一个文本文档,命名为test.txt,输入一些字符串. public class Demo_5 { public ...
- 【原创】JAVA word转html
import java.io.File; import com.jacob.activeX.ActiveXComponent; import com.jacob.com.Dispatch; impor ...