小记--------spark的worker原理分析及源码分析

/**
*启动driver的源码分析
*/
case LaunchDriver(driverId, driverDesc) =>
logInfo(s"Asked to launch driver $driverId") //创建DriverRunner线程
val driver = new DriverRunner(
conf,
driverId,
workDir,
sparkHome,
driverDesc.copy(command = Worker.maybeUpdateSSLSettings(driverDesc.command, conf)),
self,
workerUri,
securityMgr) //把DriverRunner线程加入Drivers的hashset中
drivers(driverId) = driver //启动driver
driver.start() //详细代码见:代码1 coresUsed += driverDesc.cores
memoryUsed += driverDesc.mem 代码1
/** Starts a thread to run and manage the driver. */
private[worker] def start() = { //DriverRunner机制分析
//启动一个java线程
new Thread("DriverRunner for " + driverId) {
override def run() {
var shutdownHook: AnyRef = null
try {
shutdownHook = ShutdownHookManager.addShutdownHook { () =>
logInfo(s"Worker shutting down, killing driver $driverId")
kill()
} // prepare driver jars and run driver
// 在此处进行第一步:创建DriverRunner的工作目录
// 第二步,下载用户上传的jar(我们编写完的spark应用程序,如果是java,用maven打个jar包,如果是scala,那么会用export将它导出为jar包)
//第三步 构建ProcessBuilder
val exitCode = prepareAndRunDriver()//详细代码见:代码2 // set final state depending on if forcibly killed and process exit code
// 对driver的退出状态做一些处理
finalState = if (exitCode == ) {
Some(DriverState.FINISHED)
} else if (killed) {
Some(DriverState.KILLED)
} else {
Some(DriverState.FAILED)
}
} catch {
case e: Exception =>
kill()
finalState = Some(DriverState.ERROR)
finalException = Some(e)
} finally {
if (shutdownHook != null) {
ShutdownHookManager.removeShutdownHook(shutdownHook)
}
} // notify worker of final driver state, possible exception
// 这个DriverRunner这个线程,向它所属的worker的actor,发送一个DriverStateChanged的事件
worker.send(DriverStateChanged(driverId, finalState.get, finalException))//详细代码见:代码3
}
}.start()
} 代码2
private[worker] def prepareAndRunDriver(): Int = {
val driverDir = createWorkingDirectory()//创建DriverRunner的工作目录
val localJarFilename = downloadUserJar(driverDir)//第二步,下载用户上传的jar def substituteVariables(argument: String): String = argument match {
case "{{WORKER_URL}}" => workerUrl
case "{{USER_JAR}}" => localJarFilename
case other => other
} // TODO: If we add ability to submit multiple jars they should also be added here // 构建ProcessBuilder
// 传入了driver的启动命令,需要的内存大小等信息
val builder = CommandUtils.buildProcessBuilder(driverDesc.command, securityManager,
driverDesc.mem, sparkHome.getAbsolutePath, substituteVariables) runDriver(builder, driverDir, driverDesc.supervise)
} 代码3
//driver执行完以后,driverrunner线程会发送一个状态给worker
//然后worker实际上会将DriverStateChanged消息发送给Master
case driverStateChanged @ DriverStateChanged(driverId, state, exception) =>
handleDriverStateChanged(driverStateChanged)//详细代码见:代码4 代码4
private[worker] def handleDriverStateChanged(driverStateChanged: DriverStateChanged): Unit = {
val driverId = driverStateChanged.driverId
val exception = driverStateChanged.exception
val state = driverStateChanged.state
state match {
case DriverState.ERROR =>
logWarning(s"Driver $driverId failed with unrecoverable exception: ${exception.get}")
case DriverState.FAILED =>
logWarning(s"Driver $driverId exited with failure")
case DriverState.FINISHED =>
logInfo(s"Driver $driverId exited successfully")
case DriverState.KILLED =>
logInfo(s"Driver $driverId was killed by user")
case _ =>
logDebug(s"Driver $driverId changed state to $state")
} //worker把DriverStateChanged消息发送给Master
// Master会对状态进行修改
sendToMaster(driverStateChanged) //将driver从本地缓存中移除
val driver = drivers.remove(driverId).get //将driver加入完成driver的队列
finishedDrivers(driverId) = driver
trimFinishedDriversIfNecessary() //将driver的内存和CPU进行释放
memoryUsed -= driver.driverDesc.mem
coresUsed -= driver.driverDesc.cores
}
/**
*启动Executor的源码分析
*/
case LaunchExecutor(masterUrl, appId, execId, appDesc, cores_, memory_) =>
if (masterUrl != activeMasterUrl) {
logWarning("Invalid Master (" + masterUrl + ") attempted to launch executor.")
} else {
try {
logInfo("Asked to launch executor %s/%d for %s".format(appId, execId, appDesc.name)) // Create the executor's working directory
// 创建executor本地工作目录
val executorDir = new File(workDir, appId + "/" + execId)
if (!executorDir.mkdirs()) {
throw new IOException("Failed to create directory " + executorDir)
} // Create local dirs for the executor. These are passed to the executor via the
// SPARK_EXECUTOR_DIRS environment variable, and deleted by the Worker when the
// application finishes.
val appLocalDirs = appDirectories.getOrElse(appId,
Utils.getOrCreateLocalRootDirs(conf).map { dir =>
val appDir = Utils.createDirectory(dir, namePrefix = "executor")
Utils.chmod700(appDir)
appDir.getAbsolutePath()
}.toSeq)
appDirectories(appId) = appLocalDirs //创建ExecutorRunner
val manager = new ExecutorRunner(
appId,
execId,
appDesc.copy(command = Worker.maybeUpdateSSLSettings(appDesc.command, conf)),
cores_,
memory_,
self,
workerId,
host,
webUi.boundPort,
publicAddress,
sparkHome,
executorDir,
workerUri,
conf,
appLocalDirs, ExecutorState.RUNNING) //把executorRunner加入本地缓存
executors(appId + "/" + execId) = manager //启动ExecutorRunner
manager.start()//详细代码:见代码5 //加上Executor需要使用的CPU 内存的资源
coresUsed += cores_
memoryUsed += memory_ //向master返回一个ExecutorStateChanged事件,用于master修改状态
sendToMaster(ExecutorStateChanged(appId, execId, manager.state, None, None))
} catch {
case e: Exception =>
logError(s"Failed to launch executor $appId/$execId for ${appDesc.name}.", e)
if (executors.contains(appId + "/" + execId)) {
executors(appId + "/" + execId).kill()
executors -= appId + "/" + execId
}
sendToMaster(ExecutorStateChanged(appId, execId, ExecutorState.FAILED,
Some(e.toString), None))
}
} 代码5
private[worker] def start() { //创建一个java线程
workerThread = new Thread("ExecutorRunner for " + fullId) {
override def run() { fetchAndRunExecutor() }//详细代码见代码6
}
workerThread.start()
// Shutdown hook that kills actors on shutdown.
shutdownHook = ShutdownHookManager.addShutdownHook { () =>
// It's possible that we arrive here before calling `fetchAndRunExecutor`, then `state` will
// be `ExecutorState.RUNNING`. In this case, we should set `state` to `FAILED`.
if (state == ExecutorState.RUNNING) {
state = ExecutorState.FAILED
}
killProcess(Some("Worker shutting down")) }
} 代码6
/**
* Download and run the executor described in our ApplicationDescription
*/
private def fetchAndRunExecutor() {
try {
// Launch the process //封装一个ProcessBuilder
val builder = CommandUtils.buildProcessBuilder(appDesc.command, new SecurityManager(conf),
memory, sparkHome.getAbsolutePath, substituteVariables)
val command = builder.command()
val formattedCommand = command.asScala.mkString("\"", "\" \"", "\"")
logInfo(s"Launch command: $formattedCommand") builder.directory(executorDir)
builder.environment.put("SPARK_EXECUTOR_DIRS", appLocalDirs.mkString(File.pathSeparator))
// In case we are running this from within the Spark Shell, avoid creating a "scala"
// parent process for the executor command
builder.environment.put("SPARK_LAUNCH_WITH_SCALA", "") // Add webUI log urls
val baseUrl =
if (conf.getBoolean("spark.ui.reverseProxy", false)) {
s"/proxy/$workerId/logPage/?appId=$appId&executorId=$execId&logType="
} else {
s"http://$publicAddress:$webUiPort/logPage/?appId=$appId&executorId=$execId&logType="
}
builder.environment.put("SPARK_LOG_URL_STDERR", s"${baseUrl}stderr")
builder.environment.put("SPARK_LOG_URL_STDOUT", s"${baseUrl}stdout") process = builder.start() //重定向到输出流文件(将是stdout和stderr)
//将executor的InputStream和ErrorStream,输出的信息
//分贝重定向到本地工作目录的stdout文件,和stderr文件中
val header = "Spark Executor Command: %s\n%s\n\n".format(
formattedCommand, "=" * ) // Redirect its stdout and stderr to files
val stdout = new File(executorDir, "stdout")
stdoutAppender = FileAppender(process.getInputStream, stdout, conf) val stderr = new File(executorDir, "stderr")
Files.write(header, stderr, StandardCharsets.UTF_8)
stderrAppender = FileAppender(process.getErrorStream, stderr, conf) // Wait for it to exit; executor may exit with code 0 (when driver instructs it to shutdown)
// or with nonzero exit code
// 调用Proess的waitFor()方法,启动executor进程
val exitCode = process.waitFor() // executor执行完之后拿到返回值状态
state = ExecutorState.EXITED
val message = "Command exited with code " + exitCode //向ExecutorRunner线程所属的Worker actor,发送ExecutorStateChanged消息
worker.send(ExecutorStateChanged(appId, execId, state, Some(message), Some(exitCode)))//详细代码见:代码7
} catch {
case interrupted: InterruptedException =>
logInfo("Runner thread for executor " + fullId + " interrupted")
state = ExecutorState.KILLED
killProcess(None)
case e: Exception =>
logError("Error running executor", e)
state = ExecutorState.FAILED
killProcess(Some(e.toString))
}
} 代码7
//向master发送executorstatechanged事件
case executorStateChanged @ ExecutorStateChanged(appId, execId, state, message, exitStatus) =>
handleExecutorStateChanged(executorStateChanged)//详细代码见:代码8 代码8
private[worker] def handleExecutorStateChanged(executorStateChanged: ExecutorStateChanged):
Unit = { // 直接向master也发送一个executorstatechanged消息
sendToMaster(executorStateChanged)
val state = executorStateChanged.state // 如果executor状态是finished
if (ExecutorState.isFinished(state)) {
val appId = executorStateChanged.appId
val fullId = appId + "/" + executorStateChanged.execId
val message = executorStateChanged.message
val exitStatus = executorStateChanged.exitStatus
executors.get(fullId) match {
case Some(executor) =>
logInfo("Executor " + fullId + " finished with state " + state +
message.map(" message " + _).getOrElse("") +
exitStatus.map(" exitStatus " + _).getOrElse("")) // 将executor从内存中移除
executors -= fullId
finishedExecutors(fullId) = executor
trimFinishedExecutorsIfNecessary() // 释放executor占用的内存和CPU资源
coresUsed -= executor.cores
memoryUsed -= executor.memory
case None =>
logInfo("Unknown Executor " + fullId + " finished with state " + state +
message.map(" message " + _).getOrElse("") +
exitStatus.map(" exitStatus " + _).getOrElse(""))
}
maybeCleanupApplication(appId)
}
}
小记--------spark的worker原理分析及源码分析的更多相关文章
- 65、Spark Streaming:数据接收原理剖析与源码分析
一.数据接收原理 二.源码分析 入口包org.apache.spark.streaming.receiver下ReceiverSupervisorImpl类的onStart()方法 ### overr ...
- SpringMVC关于json、xml自动转换的原理研究[附带源码分析 --转
SpringMVC关于json.xml自动转换的原理研究[附带源码分析] 原文地址:http://www.cnblogs.com/fangjian0423/p/springMVC-xml-json-c ...
- k8s client-go源码分析 informer源码分析(2)-初始化与启动分析
k8s client-go源码分析 informer源码分析(2)-初始化与启动分析 前面一篇文章对k8s informer做了概要分析,本篇文章将对informer的初始化与启动进行分析. info ...
- k8s client-go源码分析 informer源码分析(3)-Reflector源码分析
k8s client-go源码分析 informer源码分析(3)-Reflector源码分析 1.Reflector概述 Reflector从kube-apiserver中list&watc ...
- 66、Spark Streaming:数据处理原理剖析与源码分析(block与batch关系透彻解析)
一.数据处理原理剖析 每隔我们设置的batch interval 的time,就去找ReceiverTracker,将其中的,从上次划分batch的时间,到目前为止的这个batch interval ...
- 64、Spark Streaming:StreamingContext初始化与Receiver启动原理剖析与源码分析
一.StreamingContext源码分析 ###入口 org.apache.spark.streaming/StreamingContext.scala /** * 在创建和完成StreamCon ...
- 19、Executor原理剖析与源码分析
一.原理图解 二.源码分析 1.Executor注册机制 worker中为Application启动的executor,实际上是启动了这个CoarseGrainedExecutorBackend进程: ...
- 18、TaskScheduler原理剖析与源码分析
一.源码分析 ###入口 ###org.apache.spark.scheduler/DAGScheduler.scala // 最后,针对stage的task,创建TaskSet对象,调用taskS ...
- 22、BlockManager原理剖析与源码分析
一.原理 1.图解 Driver上,有BlockManagerMaster,它的功能,就是负责对各个节点上的BlockManager内部管理的数据的元数据进行维护, 比如Block的增删改等操作,都会 ...
随机推荐
- HDU6223 Infinite Fraction Path bfs+剪枝
Infinite Fraction Path 这个题第一次看见的时候,题意没搞懂就没做,这第二次也不会呀.. 题意:第i个城市到第(i*i+1)%n个城市,每个城市有个权值,从一个城市出发走N个城市, ...
- [NOI2017]蚯蚓排队
嘟嘟嘟 现在看来这道题还不是特别难. 别一看到字符串就想SAM 看到\(k\)很小,所以我们可以搞一个单次修改复杂度跟\(k\)有关的算法. 能想到,每一次断开或链接,最多只会影响\(k ^ 2\)个 ...
- 关于keepalive
linux内核配置有一项tcp_keepalive_time,即tcp的保活定时器.当网络上两个建立连接的进程都没有数据向对方发送的时候,tcp会隔段时间发送一次保活数据,以保持连接,间隔时间就是tc ...
- crawler 使用jQuery风格实现
以前写过java版的crawler,最近看了Groovy的XmlSlurper,效果还是不太满意,直到这篇文章启发了我:how-to-make-a-simple-web-crawler-in-java ...
- Netfilter 之 连接跟踪相关数据结构
Netfilter通过连接跟踪来记录和跟踪连接的状态,为状态防火墙和NAT提供基础支持: 钩子点与钩子函数 下图为钩子点和钩子函数的关系图(点击图片查看原图),其中ipv4_conntrack_def ...
- python 利用python的subprocess模块执行外部命令,获取返回值
有时执行dos命令需要保存返回值 需要导入库subprocess import subprocess p = subprocess.Popen('ping www.baidu.com', shell= ...
- rest-assured学习资料
rest-assured 使用指南 https://blog.csdn.net/wx19900503/article/details/54944841/
- Undo Segment/Undo Retention
undo_retention简单定义,就是最多数据的最少保留时间.AUM模式下,undo_retention参数用于事务commit后undo数据保留的时间.单位为秒.这是个no guarantee的 ...
- 【Java/csv】一个CSV文件解析类(转载)
/*下文写得不错,值得学习**/ import java.io.BufferedReader; import java.io.FileReader; import java.util.ArrayLis ...
- 3.创建一个pod应用
创建一个应用:k8s增删查改: pod创建:kubectl run nginx-deploy --image=nginx:1.14-alpine --port=80 --replicas=1 [roo ...