Master类位置所在:spark-core_2.11-2.1.0.jar的org.apache.spark.deploy.master下的Master类
/**
* driver调度机制原理代码分析Schedule the currently available resources among waiting apps. This method will be called
* every time a new app joins or resource availability changes.
*/
private def schedule(): Unit = { //首先判断,master撞他提不是ALIVE的话, 就直接返回
//也就是说 standby master是不会进行application等资源的调度的
if (state != RecoveryState.ALIVE) {
return
} // Rondom.shuffle的原理,就是对传入的集合的元素进行随机的打乱
// 取出了workers中的所有值钱注册上来的worker,进行过滤, 必须死活状态为ALIVE的worker
// 对状态为ALIVE的worker, 调用Random的shuffle方法进行随机的打乱
val shuffledAliveWorkers = Random.shuffle(workers.toSeq.filter(_.state == WorkerState.ALIVE)) // 获取到当前可用worker的个数
val numWorkersAlive = shuffledAliveWorkers.size
var curPos = // 只有用yarn-cluster模式提交的时候,才会注册driver,因为standalone和yarn-client模式,都会在本地直接启动driver,而不会来注册driver,更不会让master调度driver了 // 首先会遍历waitingDrivers 这个ArrayBuffer
for (driver <- waitingDrivers.toList) { // iterate over a copy of waitingDrivers // We assign workers to each waiting driver in a round-robin fashion. For each driver, we
// start from the last worker that was assigned a driver, and continue onwards until we have
// explored all alive workers.
var launched = false
var numWorkersVisited = // while的条件,numWorkersVisited小于numWorkersAlive,就是说只要还有活着的worker没有被遍历到,那么就继续遍历。而且,当前这个driver还没有被启动,也就是launched为false
while (numWorkersVisited < numWorkersAlive && !launched) {
val worker = shuffledAliveWorkers(curPos)
numWorkersVisited += // 如果当前这个worker的空闲内存量大于等于driver需要的内存
// 并且worker的空闲CPU数量大于等于driver需要的CPU数量
if (worker.memoryFree >= driver.desc.mem && worker.coresFree >= driver.desc.cores) { //那么启动driver
launchDriver(worker, driver)//详细代码见代码1 // 并且将driver从waitingDrivers队列中移除
waitingDrivers -= driver
launched = true
} //然后将指针指向下一个worker
curPos = (curPos + ) % numWorkersAlive
}
}
startExecutorsOnWorkers()
} 代码1
/**
*在某一个worker上启动driver
*/
private def launchDriver(worker: WorkerInfo, driver: DriverInfo) {
logInfo("Launching driver " + driver.id + " on worker " + worker.id) // 将driver加入worker内存的缓存结构中
// 将worker内使用的内存和CPU数量,都加上driver需要的内存和CPU数量
worker.addDriver(driver) // 同时把worker也加入到driver内部的缓存结构中 进行一个互相引用,互相能找到对方
driver.worker = Some(worker) // 然后调用worker的线程,给它发送LaunchDriver消息,让worker来启动driver
worker.endpoint.send(LaunchDriver(driver.id, driver.desc)) // 然后将driver的状态设置为RUNNING
driver.state = DriverState.RUNNING
} //executor调度原理及源码分析
/**
* Schedule and launch executors on workers
*/
private def startExecutorsOnWorkers(): Unit = { // Right now this is a very simple FIFO scheduler. We keep trying to fit in the first app
// in the queue, then the second app, etc.
// 首先遍历出waitingApps中的ApplicationInfo,并且过滤出还有需要调用的core的Application
for (app <- waitingApps if app.coresLeft > ) {
val coresPerExecutor: Option[Int] = app.desc.coresPerExecutor // Filter out workers that don't have enough resources to launch an executor
// 从workers中,过滤出状态为ALIVE的,再次过滤可以被Application使用的worker,然后按照剩余CPU数量倒序排序
val usableWorkers = workers.toArray.filter(_.state == WorkerState.ALIVE)
.filter(worker => worker.memoryFree >= app.desc.memoryPerExecutorMB &&
worker.coresFree >= coresPerExecutor.getOrElse())
.sortBy(_.coresFree).reverse
val assignedCores = scheduleExecutorsOnWorkers(app, usableWorkers, spreadOutApps)//详细代码见:代码2 // Now that we've decided how many cores to allocate on each worker, let's allocate them
// 给每个worker分配完Application要求的CPU core之后 , 遍历每个worker
// 并且判断之前给这个worker分配到了CPU core
for (pos <- until usableWorkers.length if assignedCores(pos) > ) { //在Application内部缓存中添加executor
allocateWorkerResourceToExecutors(
app, assignedCores(pos), coresPerExecutor, usableWorkers(pos))//详细代码见:代码3
}
}
} 代码2
private def scheduleExecutorsOnWorkers(
app: ApplicationInfo,
usableWorkers: Array[WorkerInfo],
spreadOutApps: Boolean): Array[Int] = {
val coresPerExecutor = app.desc.coresPerExecutor // 每个executor上最小的cores数量,默认为1
val minCoresPerExecutor = coresPerExecutor.getOrElse()
val oneExecutorPerWorker = coresPerExecutor.isEmpty
val memoryPerExecutor = app.desc.memoryPerExecutorMB // 可用的worker数量
val numUsable = usableWorkers.length
val assignedCores = new Array[Int](numUsable) // Number of cores to give to each worker
val assignedExecutors = new Array[Int](numUsable) // Number of new executors on each worker // 取Application需要的core是和集群workers的空闲cores和的最小值
var coresToAssign = math.min(app.coresLeft, usableWorkers.map(_.coresFree).sum) /** Return whether the specified worker can launch an executor for this app. */
// 是否可以在一个worker上分配Executor
def canLaunchExecutor(pos: Int): Boolean = {
val keepScheduling = coresToAssign >= minCoresPerExecutor
val enoughCores = usableWorkers(pos).coresFree - assignedCores(pos) >= minCoresPerExecutor // If we allow multiple executors per worker, then we can always launch new executors.
// Otherwise, if there is already an executor on this worker, just give it more cores.
val launchingNewExecutor = !oneExecutorPerWorker || assignedExecutors(pos) == // 检查worker的空闲core和内存是否够用
if (launchingNewExecutor) {
val assignedMemory = assignedExecutors(pos) * memoryPerExecutor
val enoughMemory = usableWorkers(pos).memoryFree - assignedMemory >= memoryPerExecutor
val underLimit = assignedExecutors.sum + app.executors.size < app.executorLimit
keepScheduling && enoughCores && enoughMemory && underLimit
} else {
// We're adding cores to an existing executor, so no need
// to check memory and executor limits
// 不检查memory和executor的限制
keepScheduling && enoughCores
}
} // Keep launching executors until no more workers can accommodate any
// more executors, or if we have reached this application's limits
var freeWorkers = ( until numUsable).filter(canLaunchExecutor)
while (freeWorkers.nonEmpty) {
freeWorkers.foreach { pos =>
var keepScheduling = true
while (keepScheduling && canLaunchExecutor(pos)) {
// 要分配的cores
coresToAssign -= minCoresPerExecutor // 已分配的cores
assignedCores(pos) += minCoresPerExecutor // If we are launching one executor per worker, then every iteration assigns 1 core
// to the executor. Otherwise, every iteration assigns cores to a new executor.
// 一个worker只启动一个Executor
if (oneExecutorPerWorker) {
assignedExecutors(pos) =
} else {
assignedExecutors(pos) +=
} // Spreading out an application means spreading out its executors across as
// many workers as possible. If we are not spreading out, then we should keep
// scheduling executors on this worker until we use all of its resources.
// Otherwise, just move on to the next worker.
// 如果没有开启spreadOut算法,就一直在一个worker上分配,直到不能在分配为止,
// 这个算法的意思就是,每个Application,都尽可能分配到尽量少的worker上,比如总过有10个worker,每个有10个CPU core
// 而我们的Application总共需要20个core, 那么其实 就只用到两个worker
if (spreadOutApps) {
keepScheduling = false
}
}
}
freeWorkers = freeWorkers.filter(canLaunchExecutor)
}
assignedCores
} 代码3
//分配CPUcore的算法
private def allocateWorkerResourceToExecutors(
app: ApplicationInfo,
assignedCores: Int,
coresPerExecutor: Option[Int],
worker: WorkerInfo): Unit = {
// If the number of cores per executor is specified, we divide the cores assigned
// to this worker evenly among the executors with no remainder.
// Otherwise, we launch a single executor that grabs all the assignedCores on this worker.
// 需要的cores / 每个executor的cores 得到真正需要启动的executor数量
val numExecutors = coresPerExecutor.map { assignedCores / _ }.getOrElse()
val coresToAssign = coresPerExecutor.getOrElse(assignedCores)
for (i <- to numExecutors) { // 首先在Application内部缓存结构中,添加executors
// 并且创建ExecutorDesc对象,其中封装了,给这个executor分配多少个CPU core
// spark 1.3.0版本的executor启动的内部机制
// 在spark-submit脚本中, 可以指定要多少个executor,每个executor多少个CPU,多少内存,那么基于我们的机制,实际上,最后,executor的实际数量, 以及每个executor的CPU, 可能与配置是不一样的。
// 因为我们这里是基于总的CPU来分配的,就是说,比如要求3个executor,每个executor要3个CPU,那么
// 当我们有9个worker,每个worker有1个CPU时,那么其实总共是要分配9个CPU core, 其实根据这种算法,会给每个worker分配一个CPU core, 然后给每个worker启动一个executor
// 最后总过会启动9个executor ,每个executor有一个CPUcore
// application 需要记录executor
val exec = app.addExecutor(worker, coresToAssign) // 接着在worker上启动executor
launchExecutor(worker, exec)// 详细代码见:代码4 // 然后将Application的状态修改为RUNNING
app.state = ApplicationState.RUNNING
}
} 代码4
private def launchExecutor(worker: WorkerInfo, exec: ExecutorDesc): Unit = {
logInfo("Launching executor " + exec.fullId + " on worker " + worker.id) // 将executor加入worker内部缓存中
worker.addExecutor(exec) // 向worker发送LaunchExecutor消息
worker.endpoint.send(LaunchExecutor(masterUrl,
exec.application.id, exec.id, exec.application.desc, exec.cores, exec.memory)) // 向executor对应的application的driver,发送executorAdded消息
exec.application.driver.send(
ExecutorAdded(exec.id, worker.id, worker.hostPort, exec.cores, exec.memory))
}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 

小记--------spark资源调度机制源码分析-----Schedule的更多相关文章

  1. Spark RPC框架源码分析(三)Spark心跳机制分析

    一.Spark心跳概述 前面两节中介绍了Spark RPC的基本知识,以及深入剖析了Spark RPC中一些源码的实现流程. 具体可以看这里: Spark RPC框架源码分析(二)运行时序 Spark ...

  2. 【原】Spark中Master源码分析(二)

    继续上一篇的内容.上一篇的内容为: Spark中Master源码分析(一) http://www.cnblogs.com/yourarebest/p/5312965.html 4.receive方法, ...

  3. Spark RPC框架源码分析(二)RPC运行时序

    前情提要: Spark RPC框架源码分析(一)简述 一. Spark RPC概述 上一篇我们已经说明了Spark RPC框架的一个简单例子,Spark RPC相关的两个编程模型,Actor模型和Re ...

  4. Spark Scheduler模块源码分析之TaskScheduler和SchedulerBackend

    本文是Scheduler模块源码分析的第二篇,第一篇Spark Scheduler模块源码分析之DAGScheduler主要分析了DAGScheduler.本文接下来结合Spark-1.6.0的源码继 ...

  5. Spark RPC框架源码分析(一)简述

    Spark RPC系列: Spark RPC框架源码分析(一)运行时序 Spark RPC框架源码分析(二)运行时序 Spark RPC框架源码分析(三)运行时序 一. Spark rpc框架概述 S ...

  6. 【原】Spark中Client源码分析(二)

    继续前一篇的内容.前一篇内容为: Spark中Client源码分析(一)http://www.cnblogs.com/yourarebest/p/5313006.html DriverClient中的 ...

  7. 【原】 Spark中Worker源码分析(二)

    继续前一篇的内容.前一篇内容为: Spark中Worker源码分析(一)http://www.cnblogs.com/yourarebest/p/5300202.html 4.receive方法, r ...

  8. Spark Scheduler模块源码分析之DAGScheduler

    本文主要结合Spark-1.6.0的源码,对Spark中任务调度模块的执行过程进行分析.Spark Application在遇到Action操作时才会真正的提交任务并进行计算.这时Spark会根据Ac ...

  9. Springboot学习04-默认错误页面加载机制源码分析

    Springboot学习04-默认错误页面加载机制源码分析 前沿 希望通过本文的学习,对错误页面的加载机制有这更神的理解 正文 1-Springboot错误页面展示 2-Springboot默认错误处 ...

随机推荐

  1. linux系统编程--守护进程,会话,进程组,终端

    终端: 在UNIX系统中,用户通过终端登录系统后得到一个Shell进程,这个终端成为Shell进程的控制终端(Controlling Terminal), 进程中,控制终端是保存在PCB中的信息,而f ...

  2. 并发编程入门(一): POSIX 使用互斥量和条件变量实现生产者/消费者问题

    boost的mutex,condition_variable非常好用.但是在Linux上,boost实际上做的是对pthread_mutex_t和pthread_cond_t的一系列的封装.因此通过对 ...

  3. CPU结构及段地址偏移地址的概念

    原文地址:http://blog.csdn.net/yihuiworld/article/details/7533335#comments 程序如何执行: CPU先找到程序在内存中的入口地址 -- 地 ...

  4. 「BZOJ 2653」middle「主席树」「二分」

    题意 一个长度为\(n\)的序列\(a\),设其排过序之后为\(b\),其中位数定义为\(b[n/2]\),其中\(a,b\)从\(0\)开始标号,除法取下整.给你一个长度为\(n\)的序列\(s\) ...

  5. [Alg] 二叉树的非递归遍历

    1. 非递归遍历二叉树算法 (使用stack) 以非递归方式对二叉树进行遍历的算法需要借助一个栈来存放访问过得节点. (1) 前序遍历 从整棵树的根节点开始,对于任意节点V,访问节点V并将节点V入栈, ...

  6. 图解golang内存分配机制 (转)

    一般程序的内存分配 在讲Golang的内存分配之前,让我们先来看看一般程序的内存分布情况: 以上是程序内存的逻辑分类情况. 我们再来看看一般程序的内存的真实(真实逻辑)图: Go的内存分配核心思想 G ...

  7. 20175329&20175313&20175318 2019-2020 《信息安全系统设计基础》实验二

    我们一个小组都在实验前40mins提前做完但是因为队员截图发的迟没能及时提交上蓝墨云班课的任务超时 希望老师能看见将我的经验改正,谢谢老师 https://www.cnblogs.com/xiannv ...

  8. ElasticSearch3:RestAPI

    1.设置分片数和副本数 es7默认主分片数和主分片副本数都为1,通过 default_template 指定分片数 PUT http://192.168.8.101:9200/_template/de ...

  9. POJ 1789 -- Truck History(Prim)

     POJ 1789 -- Truck History Prim求分母的最小.即求最小生成树 #include<iostream> #include<cstring> #incl ...

  10. CentOS7设置开机启动方式(图形界面/命令行界面)

    CentOS 7由于使用systemd而不是init,所以不能通过修改inittab文件来修改开机启动模式. 先使用ctrl+alt+f2切换到命令行模式,然后输入命令:systemctl set-d ...