Flink – JobManager.submitJob
JobManager作为actor,
case SubmitJob(jobGraph, listeningBehaviour) =>
val client = sender() val jobInfo = new JobInfo(client, listeningBehaviour, System.currentTimeMillis(),
jobGraph.getSessionTimeout) submitJob(jobGraph, jobInfo)
submitJob,做3件事、
根据JobGraph生成ExecuteGraph
  
恢复状态CheckpointedState,或者Savepoint
提交ExecuteGraph给Scheduler进行调度
ExecuteGraph
executionGraph = ExecutionGraphBuilder.buildGraph(
executionGraph, //currentJobs.get(jobGraph.getJobID),对应的jobid是否有现存的ExecuteGraph
jobGraph,
flinkConfiguration, //配置
futureExecutor, //Executors.newFixedThreadPool(numberProcessors, new NamedThreadFactory("jobmanager-future-", "-thread-")),根据cpu核数创建的线程池
ioExecutor, // Executors.newFixedThreadPool(numberProcessors, new NamedThreadFactory("jobmanager-io-", "-thread-"))
userCodeLoader, //libraryCacheManager.getClassLoader(jobGraph.getJobID),从jar中加载
checkpointRecoveryFactory, //用于createCheckpointStore和createCheckpointIDCounter,standalone和zk两种
Time.of(timeout.length, timeout.unit),
restartStrategy, //job重启策略
jobMetrics,
numSlots, //scheduler.getTotalNumberOfSlots(),注册到该JM上的instances一共有多少slots
log.logger)
ExecutionGraphBuilder.buildGraph
New
// create a new execution graph, if none exists so far
final ExecutionGraph executionGraph; try {
executionGraph = (prior != null) ? prior :
new ExecutionGraph(
futureExecutor,
ioExecutor,
jobId,
jobName,
jobGraph.getJobConfiguration(),
jobGraph.getSerializedExecutionConfig(),
timeout,
restartStrategy,
jobGraph.getUserJarBlobKeys(),
jobGraph.getClasspaths(),
classLoader,
metrics);
} catch (IOException e) {
throw new JobException("Could not create the execution graph.", e);
}
attachJobGraph,生成Graph的节点和边
// topologically sort the job vertices and attach the graph to the existing one
List<JobVertex> sortedTopology = jobGraph.getVerticesSortedTopologicallyFromSources();
executionGraph.attachJobGraph(sortedTopology);
ExecutionGraph.attachJobGraph
       for (JobVertex jobVertex : topologiallySorted) {
            // create the execution job vertex and attach it to the graph
            ExecutionJobVertex ejv =
                    new ExecutionJobVertex(this, jobVertex, 1, timeout, createTimestamp);
            ejv.connectToPredecessors(this.intermediateResults);
            //All job vertices that are part of this graph, ConcurrentHashMap<JobVertexID, ExecutionJobVertex> tasks
            ExecutionJobVertex previousTask = this.tasks.putIfAbsent(jobVertex.getID(), ejv);
            for (IntermediateResult res : ejv.getProducedDataSets()) {
                //All intermediate results that are part of this graph
                //ConcurrentHashMap<IntermediateDataSetID, IntermediateResult> intermediateResults
                IntermediateResult previousDataSet = this.intermediateResults.putIfAbsent(res.getId(), res);
            }
            //All vertices, in the order in which they were created
            //List<ExecutionJobVertex> verticesInCreationOrder
            this.verticesInCreationOrder.add(ejv);
        }
将JobVertex封装成ExecutionJobVertex
会依次创建出ExecutionJobVertex,ExecutionVertex, Execution; IntermediateResult, IntermediateResultPartition
ExecutionJobVertex
public ExecutionJobVertex(
ExecutionGraph graph,
JobVertex jobVertex,
int defaultParallelism,
Time timeout,
long createTimestamp) throws JobException { if (graph == null || jobVertex == null) {
throw new NullPointerException();
} //并发度,决定有多少ExecutionVertex
int vertexParallelism = jobVertex.getParallelism();
int numTaskVertices = vertexParallelism > 0 ? vertexParallelism : defaultParallelism; //产生ExecutionVertex
this.taskVertices = new ExecutionVertex[numTaskVertices]; this.inputs = new ArrayList<>(jobVertex.getInputs().size()); // take the sharing group
this.slotSharingGroup = jobVertex.getSlotSharingGroup();
this.coLocationGroup = jobVertex.getCoLocationGroup(); // create the intermediate results
this.producedDataSets = new IntermediateResult[jobVertex.getNumberOfProducedIntermediateDataSets()]; //创建用于存放中间结果的IntermediateResult for (int i = 0; i < jobVertex.getProducedDataSets().size(); i++) {
final IntermediateDataSet result = jobVertex.getProducedDataSets().get(i); this.producedDataSets[i] = new IntermediateResult( //将JobGraph中的IntermediateDataSet封装成IntermediateResult
result.getId(),
this,
numTaskVertices,
result.getResultType());
} // create all task vertices
for (int i = 0; i < numTaskVertices; i++) {
ExecutionVertex vertex = new ExecutionVertex( //初始化ExecutionVertex
this, i, this.producedDataSets, timeout, createTimestamp, maxPriorAttemptsHistoryLength); this.taskVertices[i] = vertex; //
} finishedSubtasks = new boolean[parallelism];
}
ExecutionVertex
public ExecutionVertex(
ExecutionJobVertex jobVertex,
int subTaskIndex, //第几个task,task和ExecutionVertex对应
IntermediateResult[] producedDataSets,
Time timeout,
long createTimestamp,
int maxPriorExecutionHistoryLength) { this.jobVertex = jobVertex;
this.subTaskIndex = subTaskIndex;
this.taskNameWithSubtask = String.format("%s (%d/%d)",
jobVertex.getJobVertex().getName(), subTaskIndex + 1, jobVertex.getParallelism()); this.resultPartitions = new LinkedHashMap<IntermediateResultPartitionID, IntermediateResultPartition>(producedDataSets.length, 1); //用于记录IntermediateResultPartition for (IntermediateResult result : producedDataSets) {
IntermediateResultPartition irp = new IntermediateResultPartition(result, this, subTaskIndex); //初始化IntermediateResultPartition
result.setPartition(subTaskIndex, irp); resultPartitions.put(irp.getPartitionId(), irp);
} this.inputEdges = new ExecutionEdge[jobVertex.getJobVertex().getInputs().size()][]; this.priorExecutions = new EvictingBoundedList<>(maxPriorExecutionHistoryLength); this.currentExecution = new Execution( //创建Execution
getExecutionGraph().getFutureExecutor(),
this,
0,
createTimestamp,
timeout); this.timeout = timeout;
}
connectToPredecessors,把节点用edge相连
    public void connectToPredecessors(Map<IntermediateDataSetID, IntermediateResult> intermediateDataSets) throws JobException {
        List<JobEdge> inputs = jobVertex.getInputs(); //JobVertex的输入
        for (int num = 0; num < inputs.size(); num++) {
            JobEdge edge = inputs.get(num); //对应的JobEdge
            IntermediateResult ires = intermediateDataSets.get(edge.getSourceId()); //取出JobEdge的source IntermediateResult
            this.inputs.add(ires); //List<IntermediateResult> inputs;
            int consumerIndex = ires.registerConsumer(); //将当前vertex作为consumer注册到IntermediateResult的每个IntermediateResultPartition
            for (int i = 0; i < parallelism; i++) {
                ExecutionVertex ev = taskVertices[i];
                ev.connectSource(num, ires, edge, consumerIndex); //为每个ExecutionVertex建立到具体IntermediateResultPartition的ExecutionEdge
            }
        }
    }
connectSource
public void connectSource(int inputNumber, IntermediateResult source, JobEdge edge, int consumerNumber) {
    final DistributionPattern pattern = edge.getDistributionPattern(); // 获取edge的distribution pattern
    final IntermediateResultPartition[] sourcePartitions = source.getPartitions(); // 获取souce的partitions
    ExecutionEdge[] edges;
    switch (pattern) {
        case POINTWISE:
            edges = connectPointwise(sourcePartitions, inputNumber);
            break;
        case ALL_TO_ALL:
            edges = connectAllToAll(sourcePartitions, inputNumber);
            break;
        default:
            throw new RuntimeException("Unrecognized distribution pattern.");
    }
    this.inputEdges[inputNumber] = edges;
    // add the consumers to the source
    // for now (until the receiver initiated handshake is in place), we need to register the
    // edges as the execution graph
    for (ExecutionEdge ee : edges) {
        ee.getSource().addConsumer(ee, consumerNumber);
    }
}
看下connectPointwise
private ExecutionEdge[] connectPointwise(IntermediateResultPartition[] sourcePartitions, int inputNumber) {
    final int numSources = sourcePartitions.length;  //Partitions的个数
    final int parallelism = getTotalNumberOfParallelSubtasks(); //subTasks的并发度
    // simple case same number of sources as targets
    if (numSources == parallelism) { //如果1比1,简单
        return new ExecutionEdge[] { new ExecutionEdge(sourcePartitions[subTaskIndex], this, inputNumber) }; //取sourcePartitions中和subTaskIndex对应的那个partition
    }
    else if (numSources < parallelism) { //如果subTasks的并发度高,那一个source会对应于多个task
        int sourcePartition;
        // check if the pattern is regular or irregular
        // we use int arithmetics for regular, and floating point with rounding for irregular
        if (parallelism % numSources == 0) { //整除的情况下,比如2个source,6个task,那么第3个task应该对应于第一个source
            // same number of targets per source
            int factor = parallelism / numSources;
            sourcePartition = subTaskIndex / factor;
        }
        else {
            // different number of targets per source
            float factor = ((float) parallelism) / numSources;
            sourcePartition = (int) (subTaskIndex / factor);
        }
        return new ExecutionEdge[] { new ExecutionEdge(sourcePartitions[sourcePartition], this, inputNumber) };
    }
    else {
        //......
    }
}
配置checkpoint
executionGraph.enableSnapshotCheckpointing(
snapshotSettings.getCheckpointInterval(),
snapshotSettings.getCheckpointTimeout(),
snapshotSettings.getMinPauseBetweenCheckpoints(),
snapshotSettings.getMaxConcurrentCheckpoints(),
snapshotSettings.getExternalizedCheckpointSettings(),
triggerVertices,
ackVertices,
confirmVertices,
checkpointIdCounter,
completedCheckpoints,
externalizedCheckpointsDir,
checkpointStatsTracker);
启动CheckpointCoordinator,参考专门讨论Checkpoint机制的blog
Scheduler
下面看看如何将生成好的ExecutionGraph进行调度
     future { //异步
            try {
              submittedJobGraphs.putJobGraph(new SubmittedJobGraph(jobGraph, jobInfo)) //放入submittedJobGraphs
            } catch {
                //
            }
          }
          jobInfo.notifyClients(
            decorateMessage(JobSubmitSuccess(jobGraph.getJobID))) //通知用户提交成功
          if (leaderElectionService.hasLeadership) {
            executionGraph.scheduleForExecution(scheduler) //调度
          }
        } catch {
          //
        }
      }(context.dispatcher)
    }
executionGraph.scheduleForExecution
    public void scheduleForExecution(SlotProvider slotProvider) throws JobException {
        switch (scheduleMode) {
            case LAZY_FROM_SOURCES:
                // simply take the vertices without inputs.
                for (ExecutionJobVertex ejv : this.tasks.values()) { //ConcurrentHashMap<JobVertexID, ExecutionJobVertex> tasks,这个tasks的命名不科学
                    if (ejv.getJobVertex().isInputVertex()) {
                        ejv.scheduleAll(slotProvider, allowQueuedScheduling);
                    }
                }
                break;
            case EAGER:
                for (ExecutionJobVertex ejv : getVerticesTopologically()) {
                    ejv.scheduleAll(slotProvider, allowQueuedScheduling);
                }
                break;
            default:
                throw new JobException("Schedule mode is invalid.");
        }
    }
对于流默认是EAGER,
public JobGraph createJobGraph() {
        jobGraph = new JobGraph(streamGraph.getJobName());
        // make sure that all vertices start immediately
        jobGraph.setScheduleMode(ScheduleMode.EAGER);
ExecutionJobVertex.scheduleAll
    public void scheduleAll(SlotProvider slotProvider, boolean queued) throws NoResourceAvailableException {
        ExecutionVertex[] vertices = this.taskVertices;
        // kick off the tasks
        for (ExecutionVertex ev : vertices) {
            ev.scheduleForExecution(slotProvider, queued);
        }
    }
ExecutionVertex.scheduleForExecution
//The current or latest execution attempt of this vertex's task
public boolean scheduleForExecution(SlotProvider slotProvider, boolean queued) throws NoResourceAvailableException {
return this.currentExecution.scheduleForExecution(slotProvider, queued);
}
Execution.scheduleForExecution
    public boolean scheduleForExecution(SlotProvider slotProvider, boolean queued) throws NoResourceAvailableException {
        final SlotSharingGroup sharingGroup = vertex.getJobVertex().getSlotSharingGroup();
        final CoLocationConstraint locationConstraint = vertex.getLocationConstraint();
        if (transitionState(CREATED, SCHEDULED)) {
            ScheduledUnit toSchedule = locationConstraint == null ? //生成ScheduledUnit
                new ScheduledUnit(this, sharingGroup) :
                new ScheduledUnit(this, sharingGroup, locationConstraint);
            final Future<SimpleSlot> slotAllocationFuture = slotProvider.allocateSlot(toSchedule, queued); //从slotProvider获取slot
            final Future<Void> deploymentFuture = slotAllocationFuture.handle(new BiFunction<SimpleSlot, Throwable, Void>() {
                @Override
                public Void apply(SimpleSlot simpleSlot, Throwable throwable) {
                    if (simpleSlot != null) { //slot分配成功
                        try {
                            deployToSlot(simpleSlot); //deploy
                        } catch (Throwable t) {
                            try {
                                simpleSlot.releaseSlot();
                            } finally {
                                markFailed(t);
                            }
                        }
                    }
                    else {
                        markFailed(throwable);
                    }
                    return null;
                }
            });
    }
slotProvider,参考Flink - Scheduler
deployToSlot,核心就是往TaskManager提交submitTask请求
    public void deployToSlot(final SimpleSlot slot) throws JobException {
        ExecutionState previous = this.state;
        if (previous == SCHEDULED || previous == CREATED) {
            if (!transitionState(previous, DEPLOYING)) { //状态迁移成Deploying
                throw new IllegalStateException("Cannot deploy task: Concurrent deployment call race.");
            }
        }
        try {
            // good, we are allowed to deploy
            if (!slot.setExecutedVertex(this)) { //设置slot和ExecuteVertex关系
                throw new JobException("Could not assign the ExecutionVertex to the slot " + slot);
            }
            this.assignedResource = slot;
            final TaskDeploymentDescriptor deployment = vertex.createDeploymentDescriptor( //创建DeploymentDescriptor
                attemptId,
                slot,
                taskState,
                attemptNumber);
            // register this execution at the execution graph, to receive call backs
            vertex.getExecutionGraph().registerExecution(this);
            final TaskManagerGateway taskManagerGateway = slot.getTaskManagerGateway();
            final Future<Acknowledge> submitResultFuture = taskManagerGateway.submitTask(deployment, timeout); //向TaskMananger的Actor发送请求
            submitResultFuture.exceptionallyAsync(new ApplyFunction<Throwable, Void>() {......}
        }
Flink – JobManager.submitJob的更多相关文章
- Flink 源码解析 —— Flink JobManager 有什么作用?
		JobManager 的作用 https://t.zsxq.com/2VRrbuf 博客 1.Flink 从0到1学习 -- Apache Flink 介绍 2.Flink 从0到1学习 -- Mac ... 
- Flink JobManager 和 TaskManager 原理
		转自:https://www.cnblogs.com/nicekk/p/11561836.html 一.概述 Flink 整个系统主要由两个组件组成,分别为 JobManager 和 TaskMana ... 
- Flink JobManager HA模式部署(基于Standalone)
		参考文章:https://ci.apache.org/projects/flink/flink-docs-release-1.3/setup/jobmanager_high_availability. ... 
- 搭建高可用的flink JobManager HA
		JobManager协调每个flink应用的部署,它负责执行定时任务和资源管理. 每一个Flink集群都有一个jobManager, 如果jobManager出现问题之后,将不能提交新的任务和运行新任 ... 
- Apache Flink jobmanager/logs路径遍历CVE-2020-17519
		影响版本 1.11.0 1.11.1 1.11.2 poc http://192.168.49.2:8081/jobmanager/logs/..%252f..%252f..%252f..%252f. ... 
- Flink 源码解析 —— JobManager 处理 SubmitJob 的过程
		JobManager 处理 SubmitJob https://t.zsxq.com/3JQJMzZ 博客 1.Flink 从0到1学习 -- Apache Flink 介绍 2.Flink 从0到1 ... 
- Flink 源码解析 —— TaskManager 处理 SubmitJob 的过程
		TaskManager 处理 SubmitJob 的过程 https://t.zsxq.com/eu7mQZj 博客 1.Flink 从0到1学习 -- Apache Flink 介绍 2.Flink ... 
- Flink on yarn以及实现jobManager 高可用(HA)
		on yarn https://ci.apache.org/projects/flink/flink-docs-release-1.8/ops/deployment/yarn_setup.html f ... 
- Flink - Checkpoint
		Flink在流上最大的特点,就是引入全局snapshot, CheckpointCoordinator 做snapshot的核心组件为, CheckpointCoordinator /** * T ... 
随机推荐
- 《Essential C++》读书笔记 之 C++编程基础
			<Essential C++>读书笔记 之 C++编程基础 2014-07-03 1.1 如何撰写C++程序 头文件 命名空间 1.2 对象的定义与初始化 1.3 撰写表达式 运算符的优先 ... 
- 【WPF】图片按钮的单击与双击事件
			需求:ListBox中的Item是按钮图片,要求单击和双击时触发不同的事件. XAML中需要引入System.Windows.Interactivity.dll xmlns:i="clr-n ... 
- npm国内镜像
			国内使用默认的源安装较慢,镜像使用方法(三种办法任意一种都能解决问题,建议使用第三种,将配置写死,下次用的时候配置还在): 1.通过config命令 npm config set registry h ... 
- Spring注解开发之Spring常用注解
			https://blog.csdn.net/Adrian_Dai/article/details/80287557主要的注解使用: 本文用的Spring源码是4.3.16@Configuration ... 
- 【Dubbo 源码解析】01_Dubbo 设计简介
			Dubbo 设计简介 Dubbo 采用 Microkernel + Plugin (微内核 + 插件)模式,Microkernel 只负责组装 Plugin,Dubbo 自身的功能也是通过扩展点实现的 ... 
- 九度OJ 1067 n的阶乘 (模拟)
			题目1067:n的阶乘 时间限制:1 秒 内存限制:32 兆 特殊判题:否 提交:5666 解决:2141 题目描写叙述: 输入一个整数n,输出n的阶乘 输入: 一个整数n(1<=n<=2 ... 
- 10享元模式Flyweight
			一.什么是享元模式 Flyweight模式也叫享元模式,是构造型模式之 一,它通过与其他类似对象共享数据来减小内存 占用. 二.享元模式的结构 三.享元模式的角色和职责 抽象享元角色: 所有具体享元类 ... 
- MySQL存储写入速度慢分析
			问题背景描述: 在MySQL中执行SQL语句,比如insert,贼慢,明明可能也就只是一行数据的插入,数据量很小,但是耗费的时间却很多,为什么? 一.存储结构分析 MySQL存储结构图: 解析: 1. ... 
- mem 0908
			taglib http://blog.csdn.net/zyujie/article/details/8735730 dozer: Dozer可以在JavaBean到JavaBean之间进行递归数据复 ... 
- WebApi中的Session与Token间的处理对接
			首先,说起来创建session,一般会针对注册登录或者授权等情况: session 从字面上讲,就是会话.这个就类似于你和一个人交谈,你怎么知道当前和你交谈的是张三而不是李四呢?对方肯定有某种特征(长 ... 
