spark streaming 4: DStreamGraph JobScheduler


final private[streaming] class DStreamGraph extends Serializable with Logging {
private val inputStreams = new ArrayBuffer[InputDStream[_]]()
private val outputStreams = new ArrayBuffer[DStream[_]]()
var rememberDuration: Duration = null
var checkpointInProgress = false
var zeroTime: Time = null
var startTime: Time = null
var batchDuration: Duration = null
def addInputStream(inputStream: InputDStream[_]) {
this.synchronized {
inputStream.setGraph(this)
inputStreams += inputStream
}
}
def addOutputStream(outputStream: DStream[_]) {
this.synchronized {
outputStream.setGraph(this)
outputStreams += outputStream
}
}
def getInputStreams() = this.synchronized { inputStreams.toArray }
def getOutputStreams() = this.synchronized { outputStreams.toArray }
def getReceiverInputStreams() = this.synchronized {
inputStreams.filter(_.isInstanceOf[ReceiverInputDStream[_]])
.map(_.asInstanceOf[ReceiverInputDStream[_]])
.toArray
}
def generateJobs(time: Time): Seq[Job] = {
logDebug("Generating jobs for time " + time)
val jobs = this.synchronized {
outputStreams.flatMap(outputStream => outputStream.generateJob(time))
}
logDebug("Generated " + jobs.length + " jobs for time " + time)
jobs
}
@throws(classOf[IOException])
private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException {
logDebug("DStreamGraph.writeObject used")
this.synchronized {
checkpointInProgress = true
logDebug("Enabled checkpoint mode")
oos.defaultWriteObject()
checkpointInProgress = false
logDebug("Disabled checkpoint mode")
}
}
@throws(classOf[IOException])
private def readObject(ois: ObjectInputStream): Unit = Utils.tryOrIOException {
logDebug("DStreamGraph.readObject used")
this.synchronized {
checkpointInProgress = true
ois.defaultReadObject()
checkpointInProgress = false
}
}
/**
* This class schedules jobs to be run on Spark. It uses the JobGenerator to generate
* the jobs and runs them using a thread pool.
*/
private[streaming]
class JobScheduler(val ssc: StreamingContext) extends Logging {
private val jobSets = new ConcurrentHashMap[Time, JobSet]
private val numConcurrentJobs = ssc.conf.getInt("spark.streaming.concurrentJobs", 1)
private val jobExecutor = Executors.newFixedThreadPool(numConcurrentJobs)
private val jobGenerator = new JobGenerator(this)
val clock = jobGenerator.clock
val listenerBus = new StreamingListenerBus()
// These two are created only when scheduler starts.
// eventActor not being null means the scheduler has been started and not stopped
var receiverTracker: ReceiverTracker = null
private var eventActor: ActorRef = null
def start(): Unit = synchronized {
if (eventActor != null) return // scheduler has already been started
logDebug("Starting JobScheduler")
eventActor = ssc.env.actorSystem.actorOf(Props(new Actor {
def receive = {
case event: JobSchedulerEvent => processEvent(event)
}
}), "JobScheduler")
listenerBus.start()
receiverTracker = new ReceiverTracker(ssc)
receiverTracker.start()
jobGenerator.start()
logInfo("Started JobScheduler")
}
def submitJobSet(jobSet: JobSet) {
if (jobSet.jobs.isEmpty) {
logInfo("No jobs added for time " + jobSet.time)
} else {
jobSets.put(jobSet.time, jobSet)
jobSet.jobs.foreach(job => jobExecutor.execute(new JobHandler(job)))
logInfo("Added jobs for time " + jobSet.time)
}
}
private class JobHandler(job: Job) extends Runnable {
def run() {
eventActor ! JobStarted(job)
job.run()
eventActor ! JobCompleted(job)
}
}
private def handleJobCompletion(job: Job) {
job.result match {
case Success(_) =>
val jobSet = jobSets.get(job.time)
jobSet.handleJobCompletion(job)
logInfo("Finished job " + job.id + " from job set of time " + jobSet.time)
if (jobSet.hasCompleted) {
jobSets.remove(jobSet.time)
jobGenerator.onBatchCompletion(jobSet.time)
logInfo("Total delay: %.3f s for time %s (execution: %.3f s)".format(
jobSet.totalDelay / 1000.0, jobSet.time.toString,
jobSet.processingDelay / 1000.0
))
listenerBus.post(StreamingListenerBatchCompleted(jobSet.toBatchInfo))
}
case Failure(e) =>
reportError("Error running job " + job, e)
}
}
spark streaming 4: DStreamGraph JobScheduler的更多相关文章
- Spark Streaming Backpressure分析
1.为什么引入Backpressure 默认情况下,Spark Streaming通过Receiver以生产者生产数据的速率接收数据,计算过程中会出现batch processing time > ...
- Spark Streaming性能优化: 如何在生产环境下应对流数据峰值巨变
1.为什么引入Backpressure 默认情况下,Spark Streaming通过Receiver以生产者生产数据的速率接收数据,计算过程中会出现batch processing time > ...
- 5. Spark Streaming高级解析
5.1 DStreamGraph对象分析 在Spark Streaming中,DStreamGraph是一个非常重要的组件,主要用来: 1. 通过成员inputStreams持有Spark Strea ...
- 4. Spark Streaming解析
4.1 初始化StreamingContext import org.apache.spark._ import org.apache.spark.streaming._ val conf = new ...
- Spark Streaming揭秘 Day25 StreamingContext和JobScheduler启动源码详解
Spark Streaming揭秘 Day25 StreamingContext和JobScheduler启动源码详解 今天主要理一下StreamingContext的启动过程,其中最为重要的就是Jo ...
- Spark Streaming揭秘 Day3-运行基石(JobScheduler)大揭秘
Spark Streaming揭秘 Day3 运行基石(JobScheduler)大揭秘 引子 作为一个非常强大框架,Spark Streaming兼具了流处理和批处理的特点.还记得第一天的谜团么,众 ...
- Spark Streaming源码分析 – JobScheduler
先给出一个job从被generate到被执行的整个过程在JobGenerator中,需要定时的发起GenerateJobs事件,而每个job其实就是针对DStream中的一个RDD,发起一个Spark ...
- 贯通Spark Streaming JobScheduler内幕实现和深入思考
本节主要内容: 一.SparkStreaming Job生成深度思考 二.SparkStreaming Job生成源码解析 JobScheduler的地位非常的重要,所有的关键都在JobSchedul ...
- Spark Streaming源码解读之JobScheduler内幕实现和深度思考
本期内容 : JobScheduler内幕实现 JobScheduler深度思考 JobScheduler 是整个Spark Streaming调度的核心,需要设置多线程,一条用于接收数据不断的循环, ...
随机推荐
- svn经典总结
大佬的svn:http://www.cnblogs.com/armyfai/p/3985660.html#!comments https://www.cnblogs.com/0zcl/p/730976 ...
- md加密 16位 32位
16位大写 //生成MD5 public static String getMD5(String message) { String md5 = ""; try { Message ...
- vue2.0+按需引入element-ui报错
项目使用vue脚手架自动生成的,vue版本为^2.5.16.项目中需要按需使用element-ui,根据element-ui的官方文档,一开始在babel.config.js文件中修改配置 modul ...
- JVM学习(三):垃圾回收算法
局部性原理和分代回收思想 大学学习操作系统或者计算机组成原理的时候都提到一个重要概念,叫局部性原理. 局部性原理是指CPU访问存储器时,无论是存取指令还是存取数据,所访问的存储单元都趋于聚集在一个较小 ...
- hadoop 中ALL Applications 中Tracking 下History查找不到MapReduce Job 日志
运行一个Map Reduce job 想查看日志: 点击History ,找不到网页 解决办法如下: 1.其中有一个进程是需要启动的: Hadoop自带了一个历史服务器,可以通过历史服务器查看已经运行 ...
- 原创博客>>>解决粘包问题的方法
目录 原创博客>>>解决粘包问题的方法 原创博客>>>解决粘包问题的方法 服务端: import socket import struct service=sock ...
- 【vue lazyload】插件的使用步骤
VUE图片懒加载-vue lazyload插件的简单使用
- Java之正则表达式来判断字符串中是否包含字母
/** * 使用正则表达式来判断字符串中是否包含字母 * @param str 待检验的字符串 * @return 返回是否包含 * true: 包含字母 ;false 不包含字母 */ public ...
- Acwing-169-数独2(搜索, 剪枝)
链接: https://www.acwing.com/problem/content/171/ 题意: 请你将一个16x16的数独填写完整,使得每行.每列.每个4x4十六宫格内字母A~P均恰好出现一次 ...
- 小程序上传base64的图片,可上传多张
微信小程序上传图片转化为base64格式 clickimage: function(e) { var index = e.currentTarget.dataset.index; var count ...