spark-streaming-kafka-0-8 和 0-10的使用区别
一、spark-streaming-kafka-0-8_2.11-2.0.2.jar
1、pom.xml
- <!-- https://mvnrepository.com/artifact/org.apache.spark/spark-core_2.11 -->
 - <dependency>
 - <groupId>org.apache.spark</groupId>
 - <artifactId>spark-core_2.11</artifactId>
 - <version>2.0.2</version>
 - <scope>runtime</scope>
 - </dependency>
 - <!-- https://mvnrepository.com/artifact/org.apache.spark/spark-streaming_2.11 -->
 - <dependency>
 - <groupId>org.apache.spark</groupId>
 - <artifactId>spark-streaming_2.11</artifactId>
 - <version>2.0.2</version>
 - <scope>runtime</scope>
 - </dependency>
 - <!-- https://mvnrepository.com/artifact/org.apache.spark/spark-streaming-kafka-0-8_2.11 -->
 - <dependency>
 - <groupId>org.apache.spark</groupId>
 - <artifactId>spark-streaming-kafka-0-8_2.11</artifactId>
 - <version>2.0.2</version>
 - <scope>runtime</scope>
 - </dependency>
 
2、Kafka Consumer类
- package com.spark.main;
 - import java.util.Arrays;
 - import java.util.HashMap;
 - import java.util.HashSet;
 - import java.util.Map;
 - import java.util.Set;
 - import org.apache.spark.SparkConf;
 - import org.apache.spark.api.java.JavaRDD;
 - import org.apache.spark.api.java.function.Function;
 - import org.apache.spark.api.java.function.VoidFunction;
 - import org.apache.spark.streaming.Durations;
 - import org.apache.spark.streaming.api.java.JavaDStream;
 - import org.apache.spark.streaming.api.java.JavaPairInputDStream;
 - import org.apache.spark.streaming.api.java.JavaStreamingContext;
 - import org.apache.spark.streaming.kafka.KafkaUtils;
 - import kafka.serializer.StringDecoder;
 - import scala.Tuple2;
 - public class KafkaConsumer{
 - public static void main(String[] args) throws InterruptedException{
 - /**
 - * SparkConf sparkConf = new SparkConf().setAppName("KafkaConsumer").setMaster("local[2]");
 - * setMaster("local[2]"),至少要指定两个线程,一条用于用于接收消息,一条线程用于处理消息
 - * Durations.seconds(2)每两秒读取一次kafka
 - */
 - SparkConf sparkConf = new SparkConf().setAppName("KafkaConsumer").setMaster("local[2]");
 - JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, Durations.milliseconds(500));
 - jssc.checkpoint("hdfs://192.168.168.200:9000/checkpoint/KafkaConsumer");
 - /**
 - * 配置连接kafka的相关参数
 - */
 - Set<String> topicsSet = new HashSet<String>(Arrays.asList("TestTopic"));
 - Map<String, String> kafkaParams = new HashMap<String, String>();
 - kafkaParams.put("metadata.broker.list", "192.168.168.200:9092");
 - kafkaParams.put("auto.offset.reset", "smallest");//smallest:从最初开始;largest :从最新开始
 - kafkaParams.put("fetch.message.max.bytes", "524288");
 - JavaPairInputDStream<String, String> messages = KafkaUtils.createDirectStream(jssc, String.class, String.class,
 - StringDecoder.class, StringDecoder.class, kafkaParams, topicsSet);
 - /**
 - * _2()获取第二个对象的值
 - */
 - JavaDStream<String> lines = messages.map(new Function<Tuple2<String, String>, String>() {
 - public String call(Tuple2<String, String> tuple2) {
 - return tuple2._2();
 - }
 - });
 - lines.foreachRDD(new VoidFunction<JavaRDD<String>>() {
 - public void call(JavaRDD<String> rdd) throws Exception {
 - rdd.foreach(new VoidFunction<String>() {
 - public void call(String s) throws Exception {
 - System.out.println(s);
 - }
 - });
 - }
 - });
 - // Start the computation
 - jssc.start();
 - jssc.awaitTermination();
 - }
 - }
 
二、spark-streaming-kafka-0-10_2.11-2.0.2.jar
1、pom.xml
- <!-- https://mvnrepository.com/artifact/org.apache.spark/spark-core_2.11 -->
 - <dependency>
 - <groupId>org.apache.spark</groupId>
 - <artifactId>spark-core_2.11</artifactId>
 - <version>2.0.2</version>
 - <scope>runtime</scope>
 - </dependency>
 - <!-- https://mvnrepository.com/artifact/org.apache.spark/spark-streaming_2.11 -->
 - <dependency>
 - <groupId>org.apache.spark</groupId>
 - <artifactId>spark-streaming_2.11</artifactId>
 - <version>2.0.2</version>
 - <scope>runtime</scope>
 - </dependency>
 - <!-- https://mvnrepository.com/artifact/org.apache.spark/spark-streaming-kafka-0-10_2.11 -->
 - <dependency>
 - <groupId>org.apache.spark</groupId>
 - <artifactId>spark-streaming-kafka-0-10_2.11</artifactId>
 - <version>2.0.2</version>
 - <scope>runtime</scope>
 - </dependency>
 
2、Kafka Consumer类
- package com.spark.main;
 - import java.util.Arrays;
 - import java.util.HashMap;
 - import java.util.HashSet;
 - import java.util.Map;
 - import java.util.Set;
 - import org.apache.kafka.clients.consumer.ConsumerRecord;
 - import org.apache.kafka.common.serialization.StringDeserializer;
 - import org.apache.spark.SparkConf;
 - import org.apache.spark.api.java.JavaRDD;
 - import org.apache.spark.api.java.function.Function;
 - import org.apache.spark.api.java.function.VoidFunction;
 - import org.apache.spark.streaming.Durations;
 - import org.apache.spark.streaming.api.java.JavaDStream;
 - import org.apache.spark.streaming.api.java.JavaInputDStream;
 - import org.apache.spark.streaming.api.java.JavaPairInputDStream;
 - import org.apache.spark.streaming.api.java.JavaStreamingContext;
 - import org.apache.spark.streaming.kafka010.ConsumerStrategies;
 - import org.apache.spark.streaming.kafka010.KafkaUtils;
 - import org.apache.spark.streaming.kafka010.LocationStrategies;
 - import kafka.serializer.StringDecoder;
 - import scala.Tuple2;
 - public class Kafka10Consumer{
 - public static void main(String[] args) throws InterruptedException{
 - /**
 - * SparkConf sparkConf = new SparkConf().setAppName("KafkaConsumer").setMaster("local[2]");
 - * setMaster("local[2]"),至少要指定两个线程,一条用于用于接收消息,一条线程用于处理消息
 - * Durations.seconds(2)每两秒读取一次kafka
 - */
 - SparkConf sparkConf = new SparkConf().setAppName("Kafka10Consumer").setMaster("local[2]");
 - JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, Durations.milliseconds(500));
 - jssc.checkpoint("hdfs://192.168.168.200:9000/checkpoint/Kafka10Consumer");
 - /**
 - * 配置连接kafka的相关参数
 - */
 - Set<String> topicsSet = new HashSet<String>(Arrays.asList("TestTopic"));
 - Map<String, Object> kafkaParams = new HashMap<String, Object>();
 - kafkaParams.put("bootstrap.servers", "192.168.168.200:9092");
 - kafkaParams.put("key.deserializer", StringDeserializer.class);
 - kafkaParams.put("value.deserializer", StringDeserializer.class);
 - kafkaParams.put("group.id", "Kafka10Consumer");
 - kafkaParams.put("auto.offset.reset", "earliest");//earliest : 从最早开始;latest :从最新开始
 - kafkaParams.put("enable.auto.commit", false);
 - //通过KafkaUtils.createDirectStream(...)获得kafka数据,kafka相关参数由kafkaParams指定
 - JavaInputDStream<ConsumerRecord<Object,Object>> messages = KafkaUtils.createDirectStream(
 - jssc,
 - LocationStrategies.PreferConsistent(),
 - ConsumerStrategies.Subscribe(topicsSet, kafkaParams)
 - );
 - /**
 - * _2()获取第二个对象的值
 - */
 - JavaDStream<String> lines = messages.map(new Function<ConsumerRecord<Object,Object>, String>() {
 - @Override
 - public String call(ConsumerRecord<Object, Object> consumerRecord) throws Exception {
 - // TODO Auto-generated method stub
 - return consumerRecord.value().toString();
 - }
 - });
 - lines.foreachRDD(new VoidFunction<JavaRDD<String>>() {
 - public void call(JavaRDD<String> rdd) throws Exception {
 - rdd.foreach(new VoidFunction<String>() {
 - public void call(String s) throws Exception {
 - System.out.println(s);
 - }
 - });
 - }
 - });
 - // Start the computation
 - jssc.start();
 - jssc.awaitTermination();
 - }
 - }
 
spark-streaming-kafka-0-8 和 0-10的使用区别的更多相关文章
- Spark Streaming + Kafka整合(Kafka broker版本0.8.2.1+)
		
这篇博客是基于Spark Streaming整合Kafka-0.8.2.1官方文档. 本文主要讲解了Spark Streaming如何从Kafka接收数据.Spark Streaming从Kafka接 ...
 - Spark踩坑记——Spark Streaming+Kafka
		
[TOC] 前言 在WeTest舆情项目中,需要对每天千万级的游戏评论信息进行词频统计,在生产者一端,我们将数据按照每天的拉取时间存入了Kafka当中,而在消费者一端,我们利用了spark strea ...
 - Spark Streaming+Kafka
		
Spark Streaming+Kafka 前言 在WeTest舆情项目中,需要对每天千万级的游戏评论信息进行词频统计,在生产者一端,我们将数据按照每天的拉取时间存入了Kafka当中,而在消费者一端, ...
 - spark streaming kafka example
		
// scalastyle:off println package org.apache.spark.examples.streaming import kafka.serializer.String ...
 - spark streaming - kafka updateStateByKey 统计用户消费金额
		
场景 餐厅老板想要统计每个用户来他的店里总共消费了多少金额,我们可以使用updateStateByKey来实现 从kafka接收用户消费json数据,统计每分钟用户的消费情况,并且统计所有时间所有用户 ...
 - Spark踩坑记:Spark Streaming+kafka应用及调优
		
前言 在WeTest舆情项目中,需要对每天千万级的游戏评论信息进行词频统计,在生产者一端,我们将数据按照每天的拉取时间存入了Kafka当中,而在消费者一端,我们利用了spark streaming从k ...
 - Spark streaming + Kafka 流式数据处理,结果存储至MongoDB、Solr、Neo4j(自用)
		
KafkaStreaming.scala文件 import kafka.serializer.StringDecoder import org.apache.spark.SparkConf impor ...
 - IDEA Spark Streaming Kafka数据源-Consumer
		
import org.apache.spark.SparkConf import org.apache.spark.streaming.kafka.KafkaUtils import org.apac ...
 - 4、spark streaming+kafka
		
一.Receiver模式 1. receiver模式原理图 在SparkStreaming程序运行起来后,Executor中会有receiver tasks接收kafka推送过来的数据.数据会被持久化 ...
 - spark.streaming.kafka.maxRatePerPartition的理解
		
spark.streaming.kafka.maxRatePerPartition设定对目标topic每个partition每秒钟拉取的数据条数. 假设此项设为1,批次间隔为10s,目标topic只有 ...
 
随机推荐
- English  (一)
			
Phrase do somebody a favour give sb a hand do something for sb come to sb aid 帮助某人 what can ...
 - 2--Jmeter 4.0--Excel 数据驱动 接口测试
			
Excel 模板 通过jmeter的csv data set config 读取 Jmeter注意事项 (1)数据驱动 1..JDBC :SQL 存储在excel中,无法将where条件对应的jmet ...
 - BUG_sql未解决bug
			
[SQL]truncate table org_cert;受影响的行: 0时间: 0.021s [Err] 1055 - Expression #1 of ORDER BY clause is not ...
 - zookeeper在Dubbo中扮演了一个什么角色
			
作者:guxiangfly链接:https://www.zhihu.com/question/25070185/answer/188238271来源:知乎著作权归作者所有.商业转载请联系作者获得授权, ...
 - SQL注入之Sqli-labs系列第二十五关(过滤 OR & AND)和第二十五A关(过滤逻辑运算符注释符)
			
开始挑战第二十五关(Trick with OR & AND) 第二十五关A(Trick with comments) 0x1先查看源码 (1)这里的or和and采用了i正则匹配,大小写都无法绕 ...
 - [转]Ubuntu安装Python3.6
			
Ubuntu安装Python3.6 Ubuntu默认安装了Python2.7和3.5 输入命令python
 - 将mysql的表导出为excel
			
1.在表上右键,选择Table Data Export Wizard,选择想要导出的字段,点击next 2.选择导出文件的路径->csv格式->next 3.点击next 4.打开刚才导 ...
 - Windows共享设置
			
Windows文件共享默认是开启的,任何用户都可以方便的设置共享目录.那么如何关闭并禁止呢. ====打开或关闭网上邻居的网络发现:1. 右键单击,网上邻居,弹出菜单选择:属性,打开网络和共享中心界面 ...
 - Guava Cache 总结
			
想对Guava cache部分进行总结,但思索之后,文档才是最全面.详细的.所以,决定对guava文档进行翻译. 英文地址如下:https://github.com/google/guava/wiki ...
 - ios-UITextView输入时,字数限制的倒数及对超出的字数进行截取并弹出提示框
			
效果图如上,主要是右上角的Label显示框,创建完各控件后,可以在代理方法里贴上下面代码: -(void)textViewDidChange:(UITextView *)obj { UITextVie ...