以下是对StreamingListene的研究,由于比较简单,故只贴代码,不做解释

/**
* Created by gabry.wu on 2016/5/27.
* 实现StreamingListener,以监控spark作业状态
* 传入StreamingContext可以在某种出错时退出当前的SparkStreaming
*/
class StreamingMonitor(ssc:StreamingContext) extends StreamingListener{
private val log = LoggerFactory.getLogger("SparkStreamingMonitor")
// Receiver启动
override def onReceiverStarted(receiverStarted : StreamingListenerReceiverStarted): Unit = {
log.warn("onReceiverStarted")
log.warn(s"active=${receiverStarted.receiverInfo.active},executorId=${receiverStarted.receiverInfo.executorId}," +
s"lastError=${receiverStarted.receiverInfo.lastError},lastErrorMessage=${receiverStarted.receiverInfo.lastErrorMessage}," +
s"location=${receiverStarted.receiverInfo.location},name=${receiverStarted.receiverInfo.name}," +
s"streamId=${receiverStarted.receiverInfo.streamId}")
}
// Receiver报错
override def onReceiverError(receiverError : StreamingListenerReceiverError): Unit = {
log.warn("onReceiverError")
//可在该函数处理Receiver失败
log.warn(s"active=${receiverError.receiverInfo.active},executorId=${receiverError.receiverInfo.executorId}," +
s"lastError=${receiverError.receiverInfo.lastError},lastErrorMessage=${receiverError.receiverInfo.lastErrorMessage}," +
s"location=${receiverError.receiverInfo.location},name=${receiverError.receiverInfo.name}," +
s"streamId=${receiverError.receiverInfo.streamId}")
}
// Receiver停止
override def onReceiverStopped(receiverStopped : StreamingListenerReceiverStopped): Unit = {
log.warn("onReceiverStopped")
log.warn(s"active=${receiverStopped.receiverInfo.active},executorId=${receiverStopped.receiverInfo.executorId}," +
s"lastError=${receiverStopped.receiverInfo.lastError},lastErrorMessage=${receiverStopped.receiverInfo.lastErrorMessage}," +
s"location=${receiverStopped.receiverInfo.location},name=${receiverStopped.receiverInfo.name}," +
s"streamId=${receiverStopped.receiverInfo.streamId}")
}
// Batch提交作业
override def onBatchSubmitted(batchSubmitted : StreamingListenerBatchSubmitted): Unit = {
log.warn("onBatchSubmitted")
// 提交作业之前已经知道有多少数据
// batchSubmitted.batchInfo.numRecords是此次batch的数据量
log.warn(s"batchTime=${batchSubmitted.batchInfo.batchTime},numRecords=${batchSubmitted.batchInfo.numRecords}," +
s"processingDelay=${batchSubmitted.batchInfo.processingDelay},processingEndTime=${batchSubmitted.batchInfo.processingEndTime}," +
s"processingStartTime=${batchSubmitted.batchInfo.processingStartTime},schedulingDelay=${batchSubmitted.batchInfo.schedulingDelay}," +
s"submissionTime=${batchSubmitted.batchInfo.submissionTime},totalDelay=${batchSubmitted.batchInfo.totalDelay}")
}
// Batch启动
override def onBatchStarted(batchStarted : StreamingListenerBatchStarted): Unit = {
log.warn("onBatchStarted")
//batchStarted.batchInfo.schedulingDelay:从提交到正式启动batch的间隔时间
log.warn(s"batchTime=${batchStarted.batchInfo.batchTime},numRecords=${batchStarted.batchInfo.numRecords}," +
s"processingDelay=${batchStarted.batchInfo.processingDelay},processingEndTime=${batchStarted.batchInfo.processingEndTime}," +
s"processingStartTime=${batchStarted.batchInfo.processingStartTime},schedulingDelay=${batchStarted.batchInfo.schedulingDelay}," +
s"submissionTime=${batchStarted.batchInfo.submissionTime},totalDelay=${batchStarted.batchInfo.totalDelay}")
}
// Batch完成
override def onBatchCompleted(batchCompleted : StreamingListenerBatchCompleted): Unit = {
log.warn("onBatchCompleted")
//batchCompleted.batchInfo.processingDelay:批量处理时间
//batchCompleted.batchInfo.totalDelay:此次批处理从提交,到最后结束总耗时
log.warn(s"batchTime=${batchCompleted.batchInfo.batchTime},numRecords=${batchCompleted.batchInfo.numRecords}," +
s"processingDelay=${batchCompleted.batchInfo.processingDelay},processingEndTime=${batchCompleted.batchInfo.processingEndTime}," +
s"processingStartTime=${batchCompleted.batchInfo.processingStartTime},schedulingDelay=${batchCompleted.batchInfo.schedulingDelay}," +
s"submissionTime=${batchCompleted.batchInfo.submissionTime},totalDelay=${batchCompleted.batchInfo.totalDelay}")
}
// 输出操作开始
override def onOutputOperationStarted(outputOperationStarted : StreamingListenerOutputOperationStarted): Unit = {
log.warn("onOutputOperationStarted")
//outputOperationStarted.outputOperationInfo.description:其实就是Stack的部分信息,可用于输出Action的定位
//outputOperationStarted.outputOperationInfo.name:Action的函数名称
log.warn(s"batchTime=${outputOperationStarted.outputOperationInfo.batchTime},description=${outputOperationStarted.outputOperationInfo.description}," +
s"duration=${outputOperationStarted.outputOperationInfo.duration},endTime=${outputOperationStarted.outputOperationInfo.endTime}," +
s"failureReason=${outputOperationStarted.outputOperationInfo.failureReason},id=${outputOperationStarted.outputOperationInfo.id}," +
s"name=${outputOperationStarted.outputOperationInfo.name},startTime=${outputOperationStarted.outputOperationInfo.startTime}")
}
// 输出操作完成
override def onOutputOperationCompleted(outputOperationCompleted : StreamingListenerOutputOperationCompleted): Unit = {
log.warn("onOutputOperationCompleted")
//outputOperationCompleted.outputOperationInfo.duration:Action的耗时
//outputOperationCompleted.outputOperationInfo.failureReason:Action失败的原因。可以在该函数中处理Batch失败
log.warn(s"batchTime=${outputOperationCompleted.outputOperationInfo.batchTime},description=${outputOperationCompleted.outputOperationInfo.description}," +
s"duration=${outputOperationCompleted.outputOperationInfo.duration},endTime=${outputOperationCompleted.outputOperationInfo.endTime}," +
s"failureReason=${outputOperationCompleted.outputOperationInfo.failureReason},id=${outputOperationCompleted.outputOperationInfo.id}," +
s"name=${outputOperationCompleted.outputOperationInfo.name},startTime=${outputOperationCompleted.outputOperationInfo.startTime}")
}
}

下面是添加StreamingListene的代码

val ssc = new StreamingContext(sparkConf, new Duration(batchDuration))
ssc.addStreamingListener(new StreamingMonitor(ssc))

  

各个函数的调用顺序
onReceiverStarted->[接收到数据]->onBatchSubmitted->onBatchStarted->onOutputOperationStarted->onOutputOperationCompleted->onBatchCompleted->[接收到数据]->onBatchSubmitted->onBatchStarted->onOutputOperationStarted->onOutputOperationCompleted->onBatchCompleted->.......->onReceiverStopped
其中[接收到数据]是可选项,并不是每次都会接收到数据。

StreamingListener技术点的更多相关文章

  1. Spark大数据处理技术

    全球首部全面介绍Spark及Spark生态圈相关技术的技术书籍 俯览未来大局,不失精细剖析,呈现一个现代大数据框架的架构原理和实现细节 透彻讲解Spark原理和架构,以及部署模式.调度框架.存储管理及 ...

  2. 关于解决python线上问题的几种有效技术

    工作后好久没上博客园了,虽然不是很忙,但也没学生时代闲了.今天上博客园,发现好多的文章都是年终总结,想想是不是自己也应该总结下,不过现在还没想好,等想好了再写吧.今天写写自己在工作后用到的技术干货,争 ...

  3. SQL Server技术内幕笔记合集

    SQL Server技术内幕笔记合集 发这一篇文章主要是方便大家找到我的笔记入口,方便大家o(∩_∩)o Microsoft SQL Server 6.5 技术内幕 笔记http://www.cnbl ...

  4. 本人提供微软系.NET技术顾问服务,欢迎企业咨询!

    背景: 1:目前微软系.NET技术高端人才缺少. 2:企业很难直接招到高端技术人才. 3:本人提供.NET技术顾问,保障你的产品或项目在正确的技术方向. 技术顾问服务 硬服务项: 1:提供技术.决策. ...

  5. 分布式锁1 Java常用技术方案

    前言:       由于在平时的工作中,线上服务器是分布式多台部署的,经常会面临解决分布式场景下数据一致性的问题,那么就要利用分布式锁来解决这些问题.所以自己结合实际工作中的一些经验和网上看到的一些资 ...

  6. 【大型网站技术实践】初级篇:借助LVS+Keepalived实现负载均衡

    一.负载均衡:必不可少的基础手段 1.1 找更多的牛来拉车吧 当前大多数的互联网系统都使用了服务器集群技术,集群即将相同服务部署在多台服务器上构成一个集群整体对外提供服务,这些集群可以是Web应用服务 ...

  7. 探真无阻塞加载javascript脚本技术,我们会发现很多意想不到的秘密

    下面的图片是我使用firefox和chrome浏览百度首页时候记录的http请求 下面是firefox: 下面是chrome: 在浏览百度首页前我都将浏览器的缓存全部清理掉,让这个场景最接近第一次访问 ...

  8. 关于如何提高Web服务端并发效率的异步编程技术

    最近我研究技术的一个重点是java的多线程开发,在我早期学习java的时候,很多书上把java的多线程开发标榜为简单易用,这个简单易用是以C语言作为参照的,不过我也没有使用过C语言开发过多线程,我只知 ...

  9. 架构设计:远程调用服务架构设计及zookeeper技术详解(下篇)

    一.下篇开头的废话 终于开写下篇了,这也是我写远程调用框架的第三篇文章,前两篇都被博客园作为[编辑推荐]的文章,很兴奋哦,嘿嘿~~~~,本人是个很臭美的人,一定得要截图为证: 今天是2014年的第一天 ...

随机推荐

  1. java基数排序

    代码如下: import java.util.Arrays; public class MultiKeyRadixSort { public static void radixSort(int [] ...

  2. Python利用带权重随机数解决抽奖和游戏爆装备问题

    关于带权随机数 为了帮助理解,先来看三类随机问题的对比: 1.已有n条记录,从中选取m条记录,选取出来的记录前后顺序不管. 实现思路:按行遍历所有记录,约隔n/m条取一个数据即可 2.在1类情况下,还 ...

  3. 如何卸载 win10 自带的“电影和电视”软件

    参考这里: https://answers.microsoft.com/zh-hans/windows/forum/apps_windows_10-movies/win10%E7%9A%84%E7%9 ...

  4. BZOJ 1666 USACO 2006 Oct. 奶牛的数字游戏

    直接模拟2333 #include<cstdio> #include<algorithm> using namespace std; int n,ans; void read( ...

  5. WordCountPro,完结撒花

    WordCountPro,完结撒花 软测第四周作业 一.概述 该项目github地址如下: https://github.com/YuQiao0303/WordCountPro 该项目需求如下: ht ...

  6. JavaSE 学习笔记之IO流(二十二)

    IO流:用于处理设备上数据. 流:可以理解数据的流动,就是一个数据流.IO流最终要以对象来体现,对象都存在IO包中. 流也进行分类: 1:输入流(读)和输出流(写). 2:因为处理的数据不同,分为字节 ...

  7. 零基础到架构师 不花钱学JavaEE(基础篇)- 概述

    Java简单来说是一门语言,Java能干什么? 网站:开发大,中,小型网站. 服务器端程序:企业级程序开发. APP:Android的APP基本使用Java开发. 云:Hadoop就是使用Java语言 ...

  8. HDU 4906 (dp胡乱搞)

    The Romantic Her Problem Description There is an old country and the king fell in love with a devil. ...

  9. zoj 1008 暴力枚举求解dfs+优化

    /* 现将相同的合并计数. 再枚举判断是否符合当cou==n*n是符合就退出 */ #include<stdio.h> #include<string.h> #define N ...

  10. C. Painting Fence 分治

    memory limit per test 512 megabytes input standard input output standard output Bizon the Champion i ...