kafka-spark偏移量提交至redis kafka1.0版本
kafka版本 1.0.0
spark版本 spark-streaming-kafka-0-10_2.11
/**
* @created by imp ON 2019/12/21
*/
class KafkaManagerByRedis(zkHost:String,kafkaParams: Map[String, Object]) extends Logging {
private val (zkClient,zkConnection) = ZkUtils.createZkClientAndConnection(zkHost , 10000 , 10000)
private val zkUtils = new ZkUtils(zkClient,zkConnection , false)
private val jedis = JedisUtil.getInstance().getJedis
/**
* def createDirectStream:InputDStream
**/
def createDirectStream[K: ClassTag, V: ClassTag](ssc: StreamingContext, topics: Seq[String]): InputDStream[ConsumerRecord[K, V]] = {
//1:readOffset
val groupId = kafkaParams("group.id").toString
val topic = topics(0)
val topicPartition: Map[TopicPartition, Long] = readOffset(topic, groupId)
KafkaUtils.createDirectStream[K, V](
ssc,
PreferConsistent,
Subscribe[K, V](topics, kafkaParams, topicPartition)
)
}
/**
* 读取偏移量
*
* @param topics
* @param groupId 消费组
* @return Map[car-1 , car-2 , Long]
**/
private def readOffset(topic: String, groupId: String): Map[TopicPartition, Long] = {
val topicPartitionMap = collection.mutable.HashMap.empty[TopicPartition, Long]
//去zk上拿topic和分区信息
val topicAndPartitionMaps: mutable.Map[String, Seq[Int]] = zkUtils.getPartitionsForTopics(Seq(topic))
val groupId = kafkaParams("group.id").toString
val redisKey = topic + "|" + groupId
topicAndPartitionMaps.foreach(topicPartitions =>{
val zkGroupTopicsDirs: ZKGroupTopicDirs = new ZKGroupTopicDirs(groupId , topicPartitions._1)
topicPartitions._2.foreach(partition => {
//迭代分区
val map: util.Map[String, String] = jedis.hgetAll(redisKey)
val offsetMap: mutable.Map[String, String] = mapAsScalaMap(map)
if (offsetMap != null && offsetMap.size != 0) {
logger.error("groupId:"+groupId+"获取到redis的偏移量数据")
topicPartitionMap.put(new TopicPartition(topicPartitions._1, Integer.valueOf(partition)), offsetMap(partition.toString).toLong)
}
else {
logger.error("程序第一次启动,redis还未存储,获取kafka的偏移量")
val consumer = new KafkaConsumer[String, Object](kafkaParams)
val topicCollection = List(new TopicPartition(topicPartitions._1 , partition))
consumer.assign(topicCollection)
val avaliableOffset: Long = consumer.beginningOffsets(topicCollection).values().head
consumer.close()
topicPartitionMap.put(new TopicPartition(topicPartitions._1 , Integer.valueOf(partition)) , avaliableOffset)
}
})
}
)
//currentoffset 、 earliestoffset leatestOffset
//cur < ear || cur > leaty ==> 矫正--> ear
//TODO 矫正
val earliestOffsets = getEarliestOffsets(kafkaParams, topic)
val topics = List(topic)
val latestOffsets = getLatestOffsets(kafkaParams, topics)
for ((k, v) <- topicPartitionMap) {
val current = v
val earliest = earliestOffsets.get(k).get
val latest = latestOffsets.get(k).get
if (current < earliest || current > latest) {
topicPartitionMap.put(k, earliest)
}
}
topicPartitionMap.toMap
}
/**
* 获取最早的偏移量
*
* @param kafkaParams
* @param topics
* @return
*/
private def getEarliestOffsets(kafkaParams: Map[String, Object], topic: String) = {
val newKafkaParams = mutable.Map[String, Object]()
newKafkaParams ++= kafkaParams
newKafkaParams.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest")
//kafka api
val consumer = new KafkaConsumer(kafkaParams)
//订阅
val topics = Seq[String](topic)
consumer.subscribe(topics)
val noOffsetForPartitionExceptionSet: mutable.Set[Nothing] = mutable.Set()
try {
consumer.poll(0)
} catch {
case e: NoOffsetForPartitionException =>
// noOffsetForPartitionExceptionSet.add(e.partition())
//邮件报警
}
//获取 分区信息
val topicp = consumer.assignment().toSet
//暂定消费
consumer.pause(topicp)
//从头开始
consumer.seekToBeginning(topicp)
val toMap = topicp.map(line => line -> consumer.position(line)).toMap
val earliestOffsetMap = toMap
consumer.unsubscribe()
consumer.close()
earliestOffsetMap
}
private def getLatestOffsets(kafkaParams: Map[String, Object], topic: Seq[String]) = {
val newKafkaParams = mutable.Map[String, Object]()
newKafkaParams ++= kafkaParams
newKafkaParams.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "latest")
//kafka api
val consumer = new KafkaConsumer[String, Array[Byte]](newKafkaParams)
//订阅
consumer.subscribe(topic)
val noOffsetForPartitionExceptionSet = mutable.Set()
try {
consumer.poll(0)
} catch {
case e: NoOffsetForPartitionException =>
// noOffsetForPartitionExceptionSet.add(e.partition())
//邮件报警
}
//获取 分区信息
val topicp = consumer.assignment().toSet
//暂定消费
consumer.pause(topicp)
//从尾开始
consumer.seekToEnd(topicp)
val toMap: Map[TopicPartition, Long] = topicp.map(line => line -> consumer.position(line)).toMap
val earliestOffsetMap = toMap
consumer.unsubscribe()
consumer.close()
earliestOffsetMap
}
def persistOffset[K, V](rdd: RDD[ConsumerRecord[K, V]], storeOffset: Boolean = true, topic: String) = {
val groupId = kafkaParams("group.id").toString
val offsetRanges: Array[OffsetRange] = rdd.asInstanceOf[HasOffsetRanges].offsetRanges
offsetRanges.foreach(offsetRange => {
val redisKey = topic + "|" + groupId
val data = if (storeOffset) offsetRange.untilOffset else offsetRange.fromOffset
jedis.hset(redisKey, offsetRange.partition.toString, data.toString)
println("topic:" + offsetRange.topic + "分区:" + offsetRange.partition + "开始消费" + offsetRange.fromOffset + "消费到" + offsetRange.untilOffset + "共计" + offsetRange.count())
})
}
}
object KafkaManagerByRedis {
def main(args: Array[String]): Unit = {
val kafkaParams = Map[String, Object](
"bootstrap.servers" -> "192.168.121.12:9092,192.168.121.12:9093",
"key.deserializer" -> classOf[StringDeserializer],
"value.deserializer" -> classOf[StringDeserializer],
"group.id" -> "test1",
"auto.offset.reset" -> ("earliest "),
"enable.auto.commit" -> (false: java.lang.Boolean) //禁用自动提交Offset,否则可能没正常消费完就提交了,造成数据错误
)
val zkServer=""
val kafkama = new KafkaManagerByRedis(zkServer,kafkaParams)
kafkama.getEarliestOffsets(kafkaParams, "cheng_du_gps_topic")
.foreach(m => println(m._1.topic(), m._1.partition(), m._2))
kafkama.getLatestOffsets(kafkaParams, List("cheng_du_gps_topic"))
.foreach(m => println(m._1.topic(), m._1.partition(), m._2))
}
}
kafka-spark偏移量提交至redis kafka1.0版本的更多相关文章
- Redis 3.0版本启动时出现警告的解决办法
原文:http://m.blog.csdn.net/article/details?id=50864933 Redis 3.0.7版本启动时出现警告的解决办法 发表于2016/3/12 12:52:4 ...
- centos安装redis 5.0版本的集群
我在本地VM-Centos里安装5.0.5时安装遇到了些问题,参考了Blog:https://www.cnblogs.com/shawhe/p/9548620.html 顺利安装完成. 安装redis ...
- Redis 3.0正式版发布,正式支持Redis集群
Redis是一个开源.基于C语言.基于内存亦可持久化的高性能NoSQL数据库,同时,它还提供了多种语言的API.近日,Redis 3.0在经过6个RC版本后,其正式版终于发布了.Redis 3.0的最 ...
- 【转载】Redis 4.0 自动内存碎片整理(Active Defrag)源码分析
click原文链接原文链接:https://blog.csdn.net/zouhuajianclever/article/details/90669409阅读本文前建议先阅读此篇博客: Redis源码 ...
- 阿里云发布 Redis 5.0 缓存服务:全新 Stream 数据类型带来不一样缓存体验
4月24日,阿里云正式宣布推出全新 Redis 5.0 版本云数据库缓存服务,据悉该服务完全兼容 4.0 及早期版本,继承了其一贯的安全,稳定,高效等特点并带来了全新的 Stream 数据结构及多项优 ...
- Redis 源码简洁剖析 11 - 主 IO 线程及 Redis 6.0 多 IO 线程
Redis 到底是不是单线程的程序? 多 IO 线程的初始化 IO 线程运行函数 IOThreadMain 如何推迟客户端「读」操作? 如何推迟客户端「写」操作? 如何把待「读」客户端分配给 IO 线 ...
- demo2 Kafka+Spark Streaming+Redis实时计算整合实践 foreachRDD输出到redis
基于Spark通用计算平台,可以很好地扩展各种计算类型的应用,尤其是Spark提供了内建的计算库支持,像Spark Streaming.Spark SQL.MLlib.GraphX,这些内建库都提供了 ...
- Kafka:ZK+Kafka+Spark Streaming集群环境搭建(十三)kafka+spark streaming打包好的程序提交时提示虚拟内存不足(Container is running beyond virtual memory limits. Current usage: 119.5 MB of 1 GB physical memory used; 2.2 GB of 2.1 G)
异常问题:Container is running beyond virtual memory limits. Current usage: 119.5 MB of 1 GB physical mem ...
- Kafka:ZK+Kafka+Spark Streaming集群环境搭建(九)安装kafka_2.11-1.1.0
如何搭建配置centos虚拟机请参考<Kafka:ZK+Kafka+Spark Streaming集群环境搭建(一)VMW安装四台CentOS,并实现本机与它们能交互,虚拟机内部实现可以上网.& ...
随机推荐
- 「Elasticsearch」SpringBoot快速集成ES
Elastic Search 的底层是开源库 Lucene.但是Lucene的使用门槛比较高,必须自己写代码去调用它的接口.而Elastic Search的出现正是为了解决了这个问题,它是 Lucen ...
- buu学习记录(上)
前言:菜鸡误入buu,差点被打吐.不过学到了好多东西. 题目名称: (1)随便注 (2)高明的黑客 (3)CheckIn (4)Hack World (5)SSRF Me (6)piapiapia ( ...
- web网络漏洞扫描器编写
这两天看了很多web漏洞扫描器编写的文章,比如W12scan以及其前身W8scan,还有猪猪侠的自动化攻击背景下的过去.现在与未来,以及网上很多优秀的扫描器和博客,除了之前写了一部分的静湖ABC段扫描 ...
- Java基础学习之基础概念与环境搭建(1)
1.Java基础概念 1.1.Java语言的特点 Java语言是简单易学的 Java语言是面向对象(封装.继承和多态) Java语言是平台无关的(一次编译,到处运行) Java语言是可靠的.安全的(异 ...
- js 实现字符串翻转
字符串作在程序中是非常常见的,因为程序中绝大部分的数据都可以当作字符串来处理.在这里介绍几种翻转字符串的方法. (1)使用字符串函数 //使用数组翻转函数 function reverseString ...
- cookie的理解
第一:每个特定的域名下最多生成20个cookie IE6或更低版本最多20个cookie IE7和之后的版本最多可以有50个cookie Firefox最多50个cookie chrome和Safar ...
- OI学习过程记录
这帖子本来是教练为了给低年级学生分享经验而让我写的学习经历,不过等我退役之后可能就变成回忆录了. 初三 WC 前:上了正睿的线上课程,练了一些模拟赛,同时也正在学文化课. 然后,莫名奇妙1膜考了全校前 ...
- Filebeat+Logstash自定义多索引
方案一:推荐 [root@elk-node-1 filebeat]# cat filebeat.yml|egrep -v "^$|^#|#" filebeat.inputs: - ...
- 看图知Docker
0.https://www.docker.com/ 1.Why Docker 可参考: https://www.cnblogs.com/kex1n/p/6933039.html https://www ...
- Linux修改系统时间为东八区北京时间(上海时间)
1. Linux时间 Linux的时间分为 System Clock(系统时间)和 Real Time Clock(硬件时间,简称RTC). 系统时间:指系统内核中的时间. 硬件时间:指主 ...