AppMaster向RM请求资源

MRAppMaster :serviceinit
// service to allocate containers from RM (if non-uber) or to fake it (uber)
containerAllocator = createContainerAllocator(null, context);
addIfService(containerAllocator);
dispatcher.register(ContainerAllocator.EventType.class, containerAllocator); protected ContainerAllocator createContainerAllocator(
final ClientService clientService, final AppContext context) {
return new ContainerAllocatorRouter(clientService, context); //
} private final class ContainerAllocatorRouter extends AbstractService
implements ContainerAllocator, RMHeartbeatHandler {
private final ClientService clientService;
private final AppContext context;
private ContainerAllocator containerAllocator; .....
@Override
protected void serviceStart() throws Exception {
if (job.isUber()) {
this.containerAllocator = new LocalContainerAllocator(
this.clientService, this.context, nmHost, nmPort, nmHttpPort
, containerID);
} else {
this.containerAllocator = new RMContainerAllocator( ///
this.clientService, this.context);
}
((Service)this.containerAllocator).init(getConfig());
((Service)this.containerAllocator).start();
super.serviceStart(); org.apache.hadoop.mapreduce.v2.app.rm; RMContainerAllocator类有该方法 protected synchronized void heartbeat() throws Exception {
scheduleStats.updateAndLogIfChanged("Before Scheduling: ");
List<Container> allocatedContainers = getResources(); //发远程RM发送心跳信息,注意心跳里可能没有新的资源请求信息
//只是告诉RM自己还活着,或者只是从RM取得分配资源
if (allocatedContainers.size() > ) {
scheduledRequests.assign(allocatedContainers); //获得的container具体分配到任务task (应该是重排序)
} 资源请求包括的字段:
优先级,期望在的host,内存大小等 (默认三路复制,可能会有7个资源请求,3个local,3个 rack,1个随机)
} RMContainerAllocator父类RMCommunicator的方法
protected void startAllocatorThread() {
allocatorThread = new Thread(new Runnable() {
@Override
public void run() {
while (!stopped.get() && !Thread.currentThread().isInterrupted()) {
try {
Thread.sleep(rmPollInterval); //默认每秒
try {
heartbeat(); //发送心跳
... private List<Container> getResources() throws Exception {
int headRoom = getAvailableResources() != null
? getAvailableResources().getMemory() : ;//first time it would be null
AllocateResponse response;
/*
* If contact with RM is lost, the AM will wait MR_AM_TO_RM_WAIT_INTERVAL_MS
* milliseconds before aborting. During this interval, AM will still try
* to contact the RM.
*/
try {
response = makeRemoteRequest(); //关键 makeRemoteRequest方法为其父类RMContainerRequestor定义的方法 protected AllocateResponse makeRemoteRequest() throws IOException {
ResourceBlacklistRequest blacklistRequest =
ResourceBlacklistRequest.newInstance(new ArrayList<String>(blacklistAdditions),
new ArrayList<String>(blacklistRemovals));
AllocateRequest allocateRequest = //新建个资源请求
AllocateRequest.newInstance(lastResponseID,
super.getApplicationProgress(), new ArrayList<ResourceRequest>(ask), //这个ask是集合类,存ResourceRequest实例,
//只有个新建方法,在哪赋值的呢
new ArrayList<ContainerId>(release), blacklistRequest);
AllocateResponse allocateResponse;
try {
allocateResponse = scheduler.allocate(allocateRequest); //关键,分配资源,此处的scheduler 并非是调度器
//而是ApplicationMasterProtocol,他会终调用到调度器 scheduler为其父类RMCommunicator新建
protected ApplicationMasterProtocol scheduler;
...
protected void serviceStart() throws Exception {
scheduler= createSchedulerProxy();
.. protected ApplicationMasterProtocol createSchedulerProxy() {
final Configuration conf = getConfig(); try {
return ClientRMProxy.createRMProxy(conf, ApplicationMasterProtocol.class); //ApplicationMasterProtocol协议是关键
//通过他远程调用ApplicationMasterService中的方法
} catch (IOException e) {
throw new YarnRuntimeException(e);
}
}
//后面追踪ask的的赋值最终是在哪里调用
//ask的赋值方法,最后是由  addContainerReq方法,该方法在RMContainerAllocator调用
private void addResourceRequestToAsk(ResourceRequest remoteRequest) {
// because objects inside the resource map can be deleted ask can end up
// containing an object that matches new resource object but with different
// numContainers. So exisintg values must be replaced explicitly
if(ask.contains(remoteRequest)) {
ask.remove(remoteRequest);
}
ask.add(remoteRequest);
} protected void addContainerReq(ContainerRequest req) {
// Create resource requests
for (String host : req.hosts) {
// Data-local
if (!isNodeBlacklisted(host)) {
addResourceRequest(req.priority, host, req.capability);
}
} // Nothing Rack-local for now
for (String rack : req.racks) {
addResourceRequest(req.priority, rack, req.capability);
} // Off-switch
addResourceRequest(req.priority, ResourceRequest.ANY, req.capability);
} RMContainerAllocator内
void addMap(ContainerRequestEvent event) { //addMap方法
ContainerRequest request = null; if (event.getEarlierAttemptFailed()) {
earlierFailedMaps.add(event.getAttemptID());
request = new ContainerRequest(event, PRIORITY_FAST_FAIL_MAP);
LOG.info("Added "+event.getAttemptID()+" to list of failed maps");
} else {
for (String host : event.getHosts()) {
LinkedList<TaskAttemptId> list = mapsHostMapping.get(host);
if (list == null) {
list = new LinkedList<TaskAttemptId>();
mapsHostMapping.put(host, list);
}
list.add(event.getAttemptID());
if (LOG.isDebugEnabled()) {
LOG.debug("Added attempt req to host " + host);
}
}
for (String rack: event.getRacks()) {
LinkedList<TaskAttemptId> list = mapsRackMapping.get(rack);
if (list == null) {
list = new LinkedList<TaskAttemptId>();
mapsRackMapping.put(rack, list);
}
list.add(event.getAttemptID());
if (LOG.isDebugEnabled()) {
LOG.debug("Added attempt req to rack " + rack);
}
}
request = new ContainerRequest(event, PRIORITY_MAP);
}
maps.put(event.getAttemptID(), request);
addContainerReq(request); //调用 //addMap在该方法内被调用
protected synchronized void handleEvent(ContainerAllocatorEvent event) {
recalculateReduceSchedule = true;
..................
scheduledRequests.addMap(reqEvent);//maps are immediately scheduled protected void serviceStart() throws Exception {
this.eventHandlingThread = new Thread() {
@SuppressWarnings("unchecked")
@Override
public void run() { ContainerAllocatorEvent event; while (!stopped.get() && !Thread.currentThread().isInterrupted()) {
try {
event = RMContainerAllocator.this.eventQueue.take(); //取出事件
} catch (InterruptedException e) {
if (!stopped.get()) {
LOG.error("Returning, interrupted : " + e);
}
return;
} try {
handleEvent(event); //调用 // 事件加入在MRAppMaster内,加入的事件在上面的方法被处理,该方法在哪里调用了呢?
public void handle(ContainerAllocatorEvent event) {
this.containerAllocator.handle(event);
}

RM端接受AppMaster心跳请求

 //总结,applicationmaster最终通过ApplicationMasterProtocol#allocate向RM汇报资源需求,RM端的ApplicationMasterService提供服务,并最终调用调度器的allocate
//将新的资源需求写入内存结构,并返回已经分配的资源
public class ApplicationMasterService extends AbstractService implements
ApplicationMasterProtocol {
public AllocateResponse allocate(AllocateRequest request)
throws YarnException, IOException {
..
// Allow only one thread in AM to do heartbeat at a time.
synchronized (lastResponse) { // Send the status update to the appAttempt.
this.rmContext.getDispatcher().getEventHandler().handle(
new RMAppAttemptStatusupdateEvent(appAttemptId, request
.getProgress())); List<ResourceRequest> ask = request.getAskList(); //ask,release为封装的请求
List<ContainerId> release = request.getReleaseList( // Send new requests to appAttempt.
Allocation allocation =
this.rScheduler.allocate(appAttemptId, ask, release,
blacklistAdditions, blacklistRemovals); //调有RM端的调度器 rScheduler
..
allocateResponse.setUpdatedNodes(updatedNodeReports);
} //封装一个response返回
allocateResponse.setAllocatedContainers(allocation.getContainers());
allocateResponse.setCompletedContainersStatuses(appAttempt
.pullJustFinishedContainers());
allocateResponse.setResponseId(lastResponse.getResponseId() + );
allocateResponse.setAvailableResources(allocation.getResourceLimit()); allocateResponse.setNumClusterNodes(this.rScheduler.getNumClusterNodes()); // add preemption to the allocateResponse message (if any)
allocateResponse.setPreemptionMessage(generatePreemptionMessage(allocation)); // Adding NMTokens for allocated containers.
if (!allocation.getContainers().isEmpty()) {
allocateResponse.setNMTokens(rmContext.getNMTokenSecretManager()
.createAndGetNMTokens(app.getUser(), appAttemptId, //FIFO Scheduler的allocate方法
...
// Update application requests
application.updateResourceRequests(ask); //将此次资源请求写入application的请求内存结构,等待nm发送心跳分配完后,写入application的分配内存结构,
//最终要更新到这样的一个内存结构 final Map<Priority, Map<String, ResourceRequest>> requests =
// new HashMap<Priority, Map<String, ResourceRequest>>();
...
return new Allocation(
application.pullNewlyAllocatedContainers(), //application内部的集合类,从分配好的内存结构里取
application.getHeadroom()); //application为FiCaSchedulerApp类
synchronized public List<Container> pullNewlyAllocatedContainers() {
List<Container> returnContainerList = new ArrayList<Container>(
newlyAllocatedContainers.size());
for (RMContainer rmContainer : newlyAllocatedContainers) { //只是从newlyAllocatedContainers里面取,newlyAllocatedContainers的赋值是NM发送心跳后调用assignContainer后赋值的
rmContainer.handle(new RMContainerEvent(rmContainer.getContainerId(),
RMContainerEventType.ACQUIRED));
returnContainerList.add(rmContainer.getContainer());
}
newlyAllocatedContainers.clear();
return returnContainerList;
} synchronized public RMContainer allocate(NodeType type, FiCaSchedulerNode node,
Priority priority, ResourceRequest request,
Container container) { ....
// Add it to allContainers list.
newlyAllocatedContainers.add(rmContainer); //给其赋值 //FIFO scheduler类调用上面方法,该方法是NM发送心跳最终调用的方法
private int assignContainer(FiCaSchedulerNode node, FiCaSchedulerApp application,
Priority priority, int assignableContainers,
ResourceRequest request, NodeType type) {
....
} // Create the container
Container container =
BuilderUtils.newContainer(containerId, nodeId, node.getRMNode()
.getHttpAddress(), capability, priority, containerToken); // Allocate! // Inform the application
RMContainer rmContainer =
application.allocate(type, node, priority, request, container);
//总结以上看到的,也就是appmaster向RM发送请求,是从当前内存结构返回资源请求,这个过程是异步的,当nm发送心跳,会根据appmaster的资源请求分配资源
//写到内存结构,等appmaster来取 (发送的资源请求,要先保存下来,资源请求的内存结构里,保存在application FiCaSchedulerApp里application.showRequests()

YARN的 AM与RM通信,申请资源分配过程的更多相关文章

  1. 3.19 YARN HA架构及(RM/NM) Restart讲解

    一.ResourceManager HA ResourceManager(RM)负责跟踪集群中的资源,以及调度应用程序(例如,MapReduce作业). 在Hadoop 2.4之前,ResourceM ...

  2. iOS $299刀企业证书申请的过程以及细节补充

    最近申请了iOS的 299刀企业证书,相关过程有些问题,分享出来,以便后来人参考. 申请的过程我主要参考了别人以前的文章,链接如下: 1.https://developer.apple.com/cn/ ...

  3. 黄聪:iOS $299刀企业证书申请的过程以及细节补充

    最近申请了iOS的 299刀企业证书,相关过程有些问题,分享出来,以便后来人参考.申请的过程我主要参考了别人以前的文章,链接如下: 1.https://developer.apple.com/cn/s ...

  4. Hadoop YARN配置参数剖析—RM与NM相关参数

    注意,配置这些参数前,应充分理解这几个参数的含义,以防止误配给集群带来的隐患.另外,这些参数均需要在yarn-site.xml中配置. 1.    ResourceManager相关配置参数 (1) ...

  5. hadoop之 YARN配置参数剖析—RM与NM相关参数

    参数均需要在yarn-site.xml中配置: 1. ResourceManager相关配置参数 (1) yarn.resourcemanager.address 参数解释:ResourceManag ...

  6. 【hadoop2.2(yarn)】基于yarn成功执行分布式map-reduce,记录问题解决过程。

    hadoop2.x改进了hadoop1.x的架构, 具体yarn如何工作以及改进了什么可以在网上学, 这里仅记录我个人搭建的问题和理解,希望能帮助遇到困难的朋友. 在开始前,必须了解yarn版本的ma ...

  7. iOS $299刀企业证书申请的过程以及细节补充(二)

    上篇博客写的过程中,没有图,也没有相应的说明.这次再补充一些信息: 1.从 https://developer.apple.com/ios/enroll/dunsLookupForm.action 申 ...

  8. BLE蓝牙通信指令交互过程配对与绑定

    最简单一次蓝牙通信需要以上相关步骤,包括discovery device,connect,pairing,bond等4个主要部分.BLE中主从机建立连接,到配对和绑定的过程如下图:

  9. jemalloc源码结构分析(一):内存申请处理过程

    一.5种malloc方法 1)tcache_alloc_small 2)arena_malloc_small 3)tcache_alloc_large 4)arena_malloc_large 5)h ...

随机推荐

  1. Dalvik opcodes

    原文地址: http://pallergabor.uw.hu/androidblog/dalvik_opcodes.html Dalvik opcodes Author: Gabor Paller V ...

  2. FireFox Prevent this page from creating addtional dialogs 火狐浏览器 设置 阻止此页面创建更多对话框

    FireFox英文版本老弹出“Prevent this page from creating addtional dialogs”的确认框 FireFox english version alert ...

  3. Winform开发几个常用的开发经验及知识积累(一)

    本人做Winform开发多年,孜孜不倦,略有小成,其中收集或者自己开发一些常用的东西,基本上在各个项目都能用到的一些开发经验及知识积累,现逐步介绍一些,以飨读者,共同进步. 1.窗口[×]关闭按钮变为 ...

  4. scrapy爬虫框架入门教程

    scrapy安装请参考:安装指南. 我们将使用开放目录项目(dmoz)作为抓取的例子. 这篇入门教程将引导你完成如下任务: 创建一个新的Scrapy项目 定义提取的Item 写一个Spider用来爬行 ...

  5. Python脚本控制的WebDriver 常用操作 <七>浏览器前进和后退操作

    下面将使用WebDriver来控制浏览器的前进和后退操作 测试用例场景 此操作和get.url()方法功能相同 Python脚本 # coding=gbk ''' Created on 2013年12 ...

  6. 刀哥多线程之主队列gcd-06-main_queue

    主队列 特点 专门用来在主线程上调度任务的队列 不会开启线程 以先进先出的方式,在主线程空闲时才会调度队列中的任务在主线程执行 如果当前主线程正在有任务执行,那么无论主队列中当前被添加了什么任务,都不 ...

  7. 利用JQ实现的,高仿 彩虹岛官网导航栏(学习HTML过程中的小记录)

    利用JQ实现的,高仿 彩虹岛官网导航栏(学习HTML过程中的小记录)   作者:王可利(Star·星星) 总结: 今天学习的jQ类库的使用,代码重复的比较多需要完善.严格区分大小写,在 $(" ...

  8. 算法系列4《Luhn》

    Luhn算法由IBM的Hans Peter Luhn发明,又称为"模10"算法,是一种简单的校验和算法,用来验证识别号,一般会被用于身份证号码,信用卡号.IMEI号.社会保险号的验 ...

  9. Spring组件扫描<context:component-scan/>使用详解

    1.如果不想在xml文件中配置bean,我们可以给我们的类加上spring组件注解,只需再配置下spring的扫描器就可以实现bean的自动载入. <!-- 注解注入 --> <co ...

  10. Android工具与其它

    文本文件: Tool: NotePad++ 代码工具: Tool:Eclipse+STAN+(乱七八糟c,c++,java,android),Source Insight 3 Log工具: Tool: ...