小记--------spark的worker原理分析及源码分析

/**
*启动driver的源码分析
*/
case LaunchDriver(driverId, driverDesc) =>
logInfo(s"Asked to launch driver $driverId") //创建DriverRunner线程
val driver = new DriverRunner(
conf,
driverId,
workDir,
sparkHome,
driverDesc.copy(command = Worker.maybeUpdateSSLSettings(driverDesc.command, conf)),
self,
workerUri,
securityMgr) //把DriverRunner线程加入Drivers的hashset中
drivers(driverId) = driver //启动driver
driver.start() //详细代码见:代码1 coresUsed += driverDesc.cores
memoryUsed += driverDesc.mem 代码1
/** Starts a thread to run and manage the driver. */
private[worker] def start() = { //DriverRunner机制分析
//启动一个java线程
new Thread("DriverRunner for " + driverId) {
override def run() {
var shutdownHook: AnyRef = null
try {
shutdownHook = ShutdownHookManager.addShutdownHook { () =>
logInfo(s"Worker shutting down, killing driver $driverId")
kill()
} // prepare driver jars and run driver
// 在此处进行第一步:创建DriverRunner的工作目录
// 第二步,下载用户上传的jar(我们编写完的spark应用程序,如果是java,用maven打个jar包,如果是scala,那么会用export将它导出为jar包)
//第三步 构建ProcessBuilder
val exitCode = prepareAndRunDriver()//详细代码见:代码2 // set final state depending on if forcibly killed and process exit code
// 对driver的退出状态做一些处理
finalState = if (exitCode == ) {
Some(DriverState.FINISHED)
} else if (killed) {
Some(DriverState.KILLED)
} else {
Some(DriverState.FAILED)
}
} catch {
case e: Exception =>
kill()
finalState = Some(DriverState.ERROR)
finalException = Some(e)
} finally {
if (shutdownHook != null) {
ShutdownHookManager.removeShutdownHook(shutdownHook)
}
} // notify worker of final driver state, possible exception
// 这个DriverRunner这个线程,向它所属的worker的actor,发送一个DriverStateChanged的事件
worker.send(DriverStateChanged(driverId, finalState.get, finalException))//详细代码见:代码3
}
}.start()
} 代码2
private[worker] def prepareAndRunDriver(): Int = {
val driverDir = createWorkingDirectory()//创建DriverRunner的工作目录
val localJarFilename = downloadUserJar(driverDir)//第二步,下载用户上传的jar def substituteVariables(argument: String): String = argument match {
case "{{WORKER_URL}}" => workerUrl
case "{{USER_JAR}}" => localJarFilename
case other => other
} // TODO: If we add ability to submit multiple jars they should also be added here // 构建ProcessBuilder
// 传入了driver的启动命令,需要的内存大小等信息
val builder = CommandUtils.buildProcessBuilder(driverDesc.command, securityManager,
driverDesc.mem, sparkHome.getAbsolutePath, substituteVariables) runDriver(builder, driverDir, driverDesc.supervise)
} 代码3
//driver执行完以后,driverrunner线程会发送一个状态给worker
//然后worker实际上会将DriverStateChanged消息发送给Master
case driverStateChanged @ DriverStateChanged(driverId, state, exception) =>
handleDriverStateChanged(driverStateChanged)//详细代码见:代码4 代码4
private[worker] def handleDriverStateChanged(driverStateChanged: DriverStateChanged): Unit = {
val driverId = driverStateChanged.driverId
val exception = driverStateChanged.exception
val state = driverStateChanged.state
state match {
case DriverState.ERROR =>
logWarning(s"Driver $driverId failed with unrecoverable exception: ${exception.get}")
case DriverState.FAILED =>
logWarning(s"Driver $driverId exited with failure")
case DriverState.FINISHED =>
logInfo(s"Driver $driverId exited successfully")
case DriverState.KILLED =>
logInfo(s"Driver $driverId was killed by user")
case _ =>
logDebug(s"Driver $driverId changed state to $state")
} //worker把DriverStateChanged消息发送给Master
// Master会对状态进行修改
sendToMaster(driverStateChanged) //将driver从本地缓存中移除
val driver = drivers.remove(driverId).get //将driver加入完成driver的队列
finishedDrivers(driverId) = driver
trimFinishedDriversIfNecessary() //将driver的内存和CPU进行释放
memoryUsed -= driver.driverDesc.mem
coresUsed -= driver.driverDesc.cores
}
/**
*启动Executor的源码分析
*/
case LaunchExecutor(masterUrl, appId, execId, appDesc, cores_, memory_) =>
if (masterUrl != activeMasterUrl) {
logWarning("Invalid Master (" + masterUrl + ") attempted to launch executor.")
} else {
try {
logInfo("Asked to launch executor %s/%d for %s".format(appId, execId, appDesc.name)) // Create the executor's working directory
// 创建executor本地工作目录
val executorDir = new File(workDir, appId + "/" + execId)
if (!executorDir.mkdirs()) {
throw new IOException("Failed to create directory " + executorDir)
} // Create local dirs for the executor. These are passed to the executor via the
// SPARK_EXECUTOR_DIRS environment variable, and deleted by the Worker when the
// application finishes.
val appLocalDirs = appDirectories.getOrElse(appId,
Utils.getOrCreateLocalRootDirs(conf).map { dir =>
val appDir = Utils.createDirectory(dir, namePrefix = "executor")
Utils.chmod700(appDir)
appDir.getAbsolutePath()
}.toSeq)
appDirectories(appId) = appLocalDirs //创建ExecutorRunner
val manager = new ExecutorRunner(
appId,
execId,
appDesc.copy(command = Worker.maybeUpdateSSLSettings(appDesc.command, conf)),
cores_,
memory_,
self,
workerId,
host,
webUi.boundPort,
publicAddress,
sparkHome,
executorDir,
workerUri,
conf,
appLocalDirs, ExecutorState.RUNNING) //把executorRunner加入本地缓存
executors(appId + "/" + execId) = manager //启动ExecutorRunner
manager.start()//详细代码:见代码5 //加上Executor需要使用的CPU 内存的资源
coresUsed += cores_
memoryUsed += memory_ //向master返回一个ExecutorStateChanged事件,用于master修改状态
sendToMaster(ExecutorStateChanged(appId, execId, manager.state, None, None))
} catch {
case e: Exception =>
logError(s"Failed to launch executor $appId/$execId for ${appDesc.name}.", e)
if (executors.contains(appId + "/" + execId)) {
executors(appId + "/" + execId).kill()
executors -= appId + "/" + execId
}
sendToMaster(ExecutorStateChanged(appId, execId, ExecutorState.FAILED,
Some(e.toString), None))
}
} 代码5
private[worker] def start() { //创建一个java线程
workerThread = new Thread("ExecutorRunner for " + fullId) {
override def run() { fetchAndRunExecutor() }//详细代码见代码6
}
workerThread.start()
// Shutdown hook that kills actors on shutdown.
shutdownHook = ShutdownHookManager.addShutdownHook { () =>
// It's possible that we arrive here before calling `fetchAndRunExecutor`, then `state` will
// be `ExecutorState.RUNNING`. In this case, we should set `state` to `FAILED`.
if (state == ExecutorState.RUNNING) {
state = ExecutorState.FAILED
}
killProcess(Some("Worker shutting down")) }
} 代码6
/**
* Download and run the executor described in our ApplicationDescription
*/
private def fetchAndRunExecutor() {
try {
// Launch the process //封装一个ProcessBuilder
val builder = CommandUtils.buildProcessBuilder(appDesc.command, new SecurityManager(conf),
memory, sparkHome.getAbsolutePath, substituteVariables)
val command = builder.command()
val formattedCommand = command.asScala.mkString("\"", "\" \"", "\"")
logInfo(s"Launch command: $formattedCommand") builder.directory(executorDir)
builder.environment.put("SPARK_EXECUTOR_DIRS", appLocalDirs.mkString(File.pathSeparator))
// In case we are running this from within the Spark Shell, avoid creating a "scala"
// parent process for the executor command
builder.environment.put("SPARK_LAUNCH_WITH_SCALA", "") // Add webUI log urls
val baseUrl =
if (conf.getBoolean("spark.ui.reverseProxy", false)) {
s"/proxy/$workerId/logPage/?appId=$appId&executorId=$execId&logType="
} else {
s"http://$publicAddress:$webUiPort/logPage/?appId=$appId&executorId=$execId&logType="
}
builder.environment.put("SPARK_LOG_URL_STDERR", s"${baseUrl}stderr")
builder.environment.put("SPARK_LOG_URL_STDOUT", s"${baseUrl}stdout") process = builder.start() //重定向到输出流文件(将是stdout和stderr)
//将executor的InputStream和ErrorStream,输出的信息
//分贝重定向到本地工作目录的stdout文件,和stderr文件中
val header = "Spark Executor Command: %s\n%s\n\n".format(
formattedCommand, "=" * ) // Redirect its stdout and stderr to files
val stdout = new File(executorDir, "stdout")
stdoutAppender = FileAppender(process.getInputStream, stdout, conf) val stderr = new File(executorDir, "stderr")
Files.write(header, stderr, StandardCharsets.UTF_8)
stderrAppender = FileAppender(process.getErrorStream, stderr, conf) // Wait for it to exit; executor may exit with code 0 (when driver instructs it to shutdown)
// or with nonzero exit code
// 调用Proess的waitFor()方法,启动executor进程
val exitCode = process.waitFor() // executor执行完之后拿到返回值状态
state = ExecutorState.EXITED
val message = "Command exited with code " + exitCode //向ExecutorRunner线程所属的Worker actor,发送ExecutorStateChanged消息
worker.send(ExecutorStateChanged(appId, execId, state, Some(message), Some(exitCode)))//详细代码见:代码7
} catch {
case interrupted: InterruptedException =>
logInfo("Runner thread for executor " + fullId + " interrupted")
state = ExecutorState.KILLED
killProcess(None)
case e: Exception =>
logError("Error running executor", e)
state = ExecutorState.FAILED
killProcess(Some(e.toString))
}
} 代码7
//向master发送executorstatechanged事件
case executorStateChanged @ ExecutorStateChanged(appId, execId, state, message, exitStatus) =>
handleExecutorStateChanged(executorStateChanged)//详细代码见:代码8 代码8
private[worker] def handleExecutorStateChanged(executorStateChanged: ExecutorStateChanged):
Unit = { // 直接向master也发送一个executorstatechanged消息
sendToMaster(executorStateChanged)
val state = executorStateChanged.state // 如果executor状态是finished
if (ExecutorState.isFinished(state)) {
val appId = executorStateChanged.appId
val fullId = appId + "/" + executorStateChanged.execId
val message = executorStateChanged.message
val exitStatus = executorStateChanged.exitStatus
executors.get(fullId) match {
case Some(executor) =>
logInfo("Executor " + fullId + " finished with state " + state +
message.map(" message " + _).getOrElse("") +
exitStatus.map(" exitStatus " + _).getOrElse("")) // 将executor从内存中移除
executors -= fullId
finishedExecutors(fullId) = executor
trimFinishedExecutorsIfNecessary() // 释放executor占用的内存和CPU资源
coresUsed -= executor.cores
memoryUsed -= executor.memory
case None =>
logInfo("Unknown Executor " + fullId + " finished with state " + state +
message.map(" message " + _).getOrElse("") +
exitStatus.map(" exitStatus " + _).getOrElse(""))
}
maybeCleanupApplication(appId)
}
}
小记--------spark的worker原理分析及源码分析的更多相关文章
- 65、Spark Streaming:数据接收原理剖析与源码分析
一.数据接收原理 二.源码分析 入口包org.apache.spark.streaming.receiver下ReceiverSupervisorImpl类的onStart()方法 ### overr ...
- SpringMVC关于json、xml自动转换的原理研究[附带源码分析 --转
SpringMVC关于json.xml自动转换的原理研究[附带源码分析] 原文地址:http://www.cnblogs.com/fangjian0423/p/springMVC-xml-json-c ...
- k8s client-go源码分析 informer源码分析(2)-初始化与启动分析
k8s client-go源码分析 informer源码分析(2)-初始化与启动分析 前面一篇文章对k8s informer做了概要分析,本篇文章将对informer的初始化与启动进行分析. info ...
- k8s client-go源码分析 informer源码分析(3)-Reflector源码分析
k8s client-go源码分析 informer源码分析(3)-Reflector源码分析 1.Reflector概述 Reflector从kube-apiserver中list&watc ...
- 66、Spark Streaming:数据处理原理剖析与源码分析(block与batch关系透彻解析)
一.数据处理原理剖析 每隔我们设置的batch interval 的time,就去找ReceiverTracker,将其中的,从上次划分batch的时间,到目前为止的这个batch interval ...
- 64、Spark Streaming:StreamingContext初始化与Receiver启动原理剖析与源码分析
一.StreamingContext源码分析 ###入口 org.apache.spark.streaming/StreamingContext.scala /** * 在创建和完成StreamCon ...
- 19、Executor原理剖析与源码分析
一.原理图解 二.源码分析 1.Executor注册机制 worker中为Application启动的executor,实际上是启动了这个CoarseGrainedExecutorBackend进程: ...
- 18、TaskScheduler原理剖析与源码分析
一.源码分析 ###入口 ###org.apache.spark.scheduler/DAGScheduler.scala // 最后,针对stage的task,创建TaskSet对象,调用taskS ...
- 22、BlockManager原理剖析与源码分析
一.原理 1.图解 Driver上,有BlockManagerMaster,它的功能,就是负责对各个节点上的BlockManager内部管理的数据的元数据进行维护, 比如Block的增删改等操作,都会 ...
随机推荐
- 路由器配置——OSPF协议(1)
一.实验目的:用OSPF协议使全网互通 二.拓扑图 三.具体步骤配置 (1)R1路由器配置 Router>enableRouter#configure terminalEnter configu ...
- Hdu 2047 Zjnu Stadium(带权并查集)
Zjnu Stadium Time Limit: 2000/1000 MS (Java/Others) Memory Limit: 32768/32768 K (Java/Others) Total ...
- Hdu 1247 Hat's Words(Trie树)
Hat's Words Time Limit: 2000/1000 MS (Java/Others) Memory Limit: 65536/32768 K (Java/Others) Total S ...
- JavaWeb_(Spring框架)在Struts+Hibernate框架中引入Spring框架
spring的功能:简单来说就是帮我们new对象,什么时候new对象好,什么时候销毁对象. 在MySQL中添加spring数据库,添加user表,并添加一条用户数据 使用struts + hibern ...
- sql语句中where 1=1和 0=1 的作用
sql where 1=1和 0=1 的作用 where 1=1; 这个条件始终为True,在不定数量查询条件情况下,1=1可以很方便的规范语句. 一.不用where 1=1 在多条件查询中的 ...
- IntelliJ IDEA 2017.3 搭建一个多模块的springboot项目(一)
新人接触springboot,IDE使用的是IntelliJ IDEA 2017.3 ,自己摸索了很久,现在自己整理一下,里面有些操作我自己也不懂是为什么这样,只是模仿公司现有的项目,自己搭建了一个简 ...
- Docker搭建ELK分析tomat日志
最近公司的项目中用到了ELK,正好有时间自己搭建一个学习一下.在实体机或虚拟机中搭建还需要安装软件,使用docker镜像安装是省时省力的,如下是步骤. 1. 下载elasticsearch镜像: #d ...
- UIGestureRecongnizer 手势拦截 对于特殊需求很有用
手势其实也有代理方法的,通过代理方法可以做到更多关于手势方面的功能 比如在下面的方法中,如果是UIButton的点击就阻止手势的点击事件. // called before touchesBegan: ...
- Ansible批量自动化管理工具 roles标准化
批量管理服务器的工具,无需部署代理,通过ssh进行管理,是python写的 ansible 常用模块 : (1)shell命令模块执行命令 (2)copy模块批量下发文件或文件夹 (3)script模 ...
- Android studio怎么使用git代码文件逐行追溯
在Android studio中集成了相当多的工具用于管理代码,应该现在经常使用的git的方式来管理管理,用于上传代码或者进行下载代码库中,而在git中进行管理的话,那么就可以进行历史的记录信息,如果 ...