job源码分析
- /**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- package org.apache.hadoop.mapreduce;
- import java.io.IOException;
- import java.security.PrivilegedExceptionAction;
- import org.apache.hadoop.conf.Configuration;
- import org.apache.hadoop.fs.Path;
- import org.apache.hadoop.io.RawComparator;
- import org.apache.hadoop.mapreduce.TaskAttemptID;
- import org.apache.hadoop.mapred.JobClient;
- import org.apache.hadoop.mapred.JobConf;
- import org.apache.hadoop.mapred.RunningJob;
- import org.apache.hadoop.mapred.TaskCompletionEvent;
- /**
- * The job submitter's view of the Job. It allows the user to configure the
- * job, submit it, control its execution, and query the state. The set methods
- * only work until the job is submitted, afterwards they will throw an
- * IllegalStateException.
- * job 提交者看到的job的视图。它允许用户配置job,提交job,控制job的执行,并且查询他的状态
- * set方法只有在job提交的时候才会工作
- */
- public class Job extends JobContext {
- public static enum JobState {DEFINE, RUNNING};//job的状态,有定义好的和正在运行
- private JobState state = JobState.DEFINE;
- private JobClient jobClient;
- private RunningJob info;
- /**
- * Creates a new {@link Job}
- * A Job will be created with a generic {@link Configuration}.
- *创建一个新的job,用通用的configuration
- * @return the {@link Job}
- * @throws IOException
- */
- public static Job getInstance() throws IOException {
- // create with a null Cluster
- return getInstance(new Configuration());
- }
- /**
- * Creates a new {@link Job} with a given {@link Configuration}.
- * The <code>Job</code> makes a copy of the <code>Configuration</code> so
- * that any necessary internal modifications do not reflect on the incoming
- * parameter.
- *使用给定的configuration创建job
- *这里对configuration进行了备份,如此,任何必要的对configuration内部修改,都不会影响传进来的conf参数
- * @param conf the {@link Configuration}
- * @return the {@link Job}
- * @throws IOException
- */
- public static Job getInstance(Configuration conf) throws IOException {
- // create with a null Cluster 没有任何集群的创建
- JobConf jobConf = new JobConf(conf);
- return new Job(jobConf);
- }
- /**
- * Creates a new {@link Job} with a given {@link Configuration}
- * and a given jobName.
- *用给定的conf和jobname
- * The <code>Job</code> makes a copy of the <code>Configuration</code> so
- * that any necessary internal modifications do not reflect on the incoming
- * parameter.
- *
- * @param conf the {@link Configuration}
- * @param jobName the job instance's name
- * @return the {@link Job}
- * @throws IOException
- */
- public static Job getInstance(Configuration conf, String jobName)
- throws IOException {
- // create with a null Cluster
- Job result = getInstance(conf);
- result.setJobName(jobName);
- return result;
- }
- public Job() throws IOException {
- this(new Configuration());
- }
- public Job(Configuration conf) throws IOException {
- super(conf, null);
- }
- public Job(Configuration conf, String jobName) throws IOException {
- this(conf);
- setJobName(jobName);
- }
- JobClient getJobClient() {
- return jobClient;
- }
- //确保job的状态
- private void ensureState(JobState state) throws IllegalStateException {
- if (state != this.state) {
- throw new IllegalStateException("Job in state "+ this.state +
- " instead of " + state);
- }
- if (state == JobState.RUNNING && jobClient == null) {
- throw new IllegalStateException("Job in state " + JobState.RUNNING +
- " however jobClient is not initialized!");
- }
- }
- /**
- * Set the number of reduce tasks for the job.
- * 设置reducer的个数 常用
- * @param tasks the number of reduce tasks
- * @throws IllegalStateException if the job is submitted
- */
- public void setNumReduceTasks(int tasks) throws IllegalStateException {
- ensureState(JobState.DEFINE);
- conf.setNumReduceTasks(tasks);
- }
- /**
- * Set the current working directory for the default file system.
- * 为默认文件系统 设置当前工作目录
- * @param dir the new current working directory.
- * @throws IllegalStateException if the job is submitted
- */
- public void setWorkingDirectory(Path dir) throws IOException {
- ensureState(JobState.DEFINE);
- conf.setWorkingDirectory(dir);
- }
- /**
- * Set the {@link InputFormat} for the job.
- * @param cls the <code>InputFormat</code> to use
- * @throws IllegalStateException if the job is submitted
- */
- public void setInputFormatClass(Class<? extends InputFormat> cls
- ) throws IllegalStateException {
- ensureState(JobState.DEFINE);
- conf.setClass(INPUT_FORMAT_CLASS_ATTR, cls, InputFormat.class);
- }
- /**
- * Set the {@link OutputFormat} for the job.
- * @param cls the <code>OutputFormat</code> to use
- * @throws IllegalStateException if the job is submitted
- */
- public void setOutputFormatClass(Class<? extends OutputFormat> cls
- ) throws IllegalStateException {
- ensureState(JobState.DEFINE);
- conf.setClass(OUTPUT_FORMAT_CLASS_ATTR, cls, OutputFormat.class);
- }
- /**
- * Set the {@link Mapper} for the job.
- * @param cls the <code>Mapper</code> to use
- * @throws IllegalStateException if the job is submitted
- */
- public void setMapperClass(Class<? extends Mapper> cls
- ) throws IllegalStateException {
- ensureState(JobState.DEFINE);
- conf.setClass(MAP_CLASS_ATTR, cls, Mapper.class);
- }
- /**
- * Set the Jar by finding where a given class came from.
- * 设置jar包,hadoop根据给定的class来寻找他的jar包
- * @param cls the example class
- */
- public void setJarByClass(Class<?> cls) {
- conf.setJarByClass(cls);
- }
- /**
- * Get the pathname of the job's jar.
- * @return the pathname
- */
- public String getJar() {
- return conf.getJar();
- }
- /**
- * Set the combiner class for the job.
- * @param cls the combiner to use
- * @throws IllegalStateException if the job is submitted
- */
- public void setCombinerClass(Class<? extends Reducer> cls
- ) throws IllegalStateException {
- ensureState(JobState.DEFINE);
- conf.setClass(COMBINE_CLASS_ATTR, cls, Reducer.class);
- }
- /**
- * Set the {@link Reducer} for the job.
- * @param cls the <code>Reducer</code> to use
- * @throws IllegalStateException if the job is submitted
- */
- public void setReducerClass(Class<? extends Reducer> cls
- ) throws IllegalStateException {
- ensureState(JobState.DEFINE);
- conf.setClass(REDUCE_CLASS_ATTR, cls, Reducer.class);
- }
- /**
- * Set the {@link Partitioner} for the job.
- * @param cls the <code>Partitioner</code> to use
- * @throws IllegalStateException if the job is submitted
- */
- public void setPartitionerClass(Class<? extends Partitioner> cls
- ) throws IllegalStateException {
- ensureState(JobState.DEFINE);
- conf.setClass(PARTITIONER_CLASS_ATTR, cls, Partitioner.class);
- }
- /**
- * Set the key class for the map output data. This allows the user to
- * specify the map output key class to be different than the final output
- * value class.
- *
- * @param theClass the map output key class.
- * @throws IllegalStateException if the job is submitted
- */
- public void setMapOutputKeyClass(Class<?> theClass
- ) throws IllegalStateException {
- ensureState(JobState.DEFINE);
- conf.setMapOutputKeyClass(theClass);
- }
- /**
- * Set the value class for the map output data. This allows the user to
- * specify the map output value class to be different than the final output
- * value class.
- *
- * @param theClass the map output value class.
- * @throws IllegalStateException if the job is submitted
- */
- public void setMapOutputValueClass(Class<?> theClass
- ) throws IllegalStateException {
- ensureState(JobState.DEFINE);
- conf.setMapOutputValueClass(theClass);
- }
- /**
- * Set the key class for the job output data.
- *
- * @param theClass the key class for the job output data.
- * @throws IllegalStateException if the job is submitted
- */
- public void setOutputKeyClass(Class<?> theClass
- ) throws IllegalStateException {
- ensureState(JobState.DEFINE);
- conf.setOutputKeyClass(theClass);
- }
- /**
- * Set the value class for job outputs.
- *
- * @param theClass the value class for job outputs.
- * @throws IllegalStateException if the job is submitted
- */
- public void setOutputValueClass(Class<?> theClass
- ) throws IllegalStateException {
- ensureState(JobState.DEFINE);
- conf.setOutputValueClass(theClass);
- }
- /**
- * Define the comparator that controls how the keys are sorted before they
- * are passed to the {@link Reducer}.
- * @param cls the raw comparator
- * @throws IllegalStateException if the job is submitted
- */
- public void setSortComparatorClass(Class<? extends RawComparator> cls
- ) throws IllegalStateException {
- ensureState(JobState.DEFINE);
- conf.setOutputKeyComparatorClass(cls);
- }
- /**
- * Define the comparator that controls which keys are grouped together
- * for a single call to
- * {@link Reducer#reduce(Object, Iterable,
- * org.apache.hadoop.mapreduce.Reducer.Context)}
- * @param cls the raw comparator to use
- * @throws IllegalStateException if the job is submitted
- */
- public void setGroupingComparatorClass(Class<? extends RawComparator> cls
- ) throws IllegalStateException {
- ensureState(JobState.DEFINE);
- conf.setOutputValueGroupingComparator(cls);
- }
- /**
- * Set the user-specified job name.
- *
- * @param name the job's new name.
- * @throws IllegalStateException if the job is submitted
- */
- public void setJobName(String name) throws IllegalStateException {
- ensureState(JobState.DEFINE);
- conf.setJobName(name);
- }
- /**
- * Turn speculative execution on or off for this job.
- * 设置推测执行的开关
- * @param speculativeExecution <code>true</code> if speculative execution
- * should be turned on, else <code>false</code>.
- */
- public void setSpeculativeExecution(boolean speculativeExecution) {
- ensureState(JobState.DEFINE);
- conf.setSpeculativeExecution(speculativeExecution);
- }
- /**
- * Turn speculative execution on or off for this job for map tasks.
- *
- * @param speculativeExecution <code>true</code> if speculative execution
- * should be turned on for map tasks,
- * else <code>false</code>.
- */
- public void setMapSpeculativeExecution(boolean speculativeExecution) {
- ensureState(JobState.DEFINE);
- conf.setMapSpeculativeExecution(speculativeExecution);
- }
- /**
- * Turn speculative execution on or off for this job for reduce tasks.
- *
- * @param speculativeExecution <code>true</code> if speculative execution
- * should be turned on for reduce tasks,
- * else <code>false</code>.
- */
- public void setReduceSpeculativeExecution(boolean speculativeExecution) {
- ensureState(JobState.DEFINE);
- conf.setReduceSpeculativeExecution(speculativeExecution);
- }
- /**
- * Get the URL where some job progress information will be displayed.
- * 得到 一些job 进度信息会展示的url地址
- * @return the URL where some job progress information will be displayed.
- */
- public String getTrackingURL() {
- ensureState(JobState.RUNNING);
- return info.getTrackingURL();
- }
- /**
- * Get the <i>progress</i> of the job's setup, as a float between 0.0
- * and 1.0. When the job setup is completed, the function returns 1.0.
- *
- * @return the progress of the job's setup.
- * @throws IOException
- */
- public float setupProgress() throws IOException {
- ensureState(JobState.RUNNING);
- return info.setupProgress();
- }
- /**
- * Get the <i>progress</i> of the job's map-tasks, as a float between 0.0
- * and 1.0. When all map tasks have completed, the function returns 1.0.
- *
- * @return the progress of the job's map-tasks.
- * @throws IOException
- */
- public float mapProgress() throws IOException {
- ensureState(JobState.RUNNING);
- return info.mapProgress();
- }
- /**
- * Get the <i>progress</i> of the job's reduce-tasks, as a float between 0.0
- * and 1.0. When all reduce tasks have completed, the function returns 1.0.
- *
- * @return the progress of the job's reduce-tasks.
- * @throws IOException
- */
- public float reduceProgress() throws IOException {
- ensureState(JobState.RUNNING);
- return info.reduceProgress();
- }
- /**
- * Check if the job is finished or not.
- * This is a non-blocking call.
- *
- * @return <code>true</code> if the job is complete, else <code>false</code>.
- * @throws IOException
- */
- public boolean isComplete() throws IOException {
- ensureState(JobState.RUNNING);
- return info.isComplete();
- }
- /**
- * Check if the job completed successfully.
- *
- * @return <code>true</code> if the job succeeded, else <code>false</code>.
- * @throws IOException
- */
- public boolean isSuccessful() throws IOException {
- ensureState(JobState.RUNNING);
- return info.isSuccessful();
- }
- /**
- * Kill the running job. Blocks until all job tasks have been
- * killed as well. If the job is no longer running, it simply returns.
- * 杀掉正在运行的job 直到所有的job tasks都被杀掉之后 才会停止。
- * 如果job不再运行来 他就会返回
- * @throws IOException
- */
- public void killJob() throws IOException {
- ensureState(JobState.RUNNING);
- info.killJob();
- }
- /**
- * Get events indicating completion (success/failure) of component tasks.
- *
- * @param startFrom index to start fetching events from
- * @return an array of {@link TaskCompletionEvent}s
- * @throws IOException
- */
- public TaskCompletionEvent[] getTaskCompletionEvents(int startFrom
- ) throws IOException {
- ensureState(JobState.RUNNING);
- return info.getTaskCompletionEvents(startFrom);
- }
- /**
- * Kill indicated task attempt.
- *
- * @param taskId the id of the task to be terminated.
- * @throws IOException
- */
- public void killTask(TaskAttemptID taskId) throws IOException {
- ensureState(JobState.RUNNING);
- info.killTask(org.apache.hadoop.mapred.TaskAttemptID.downgrade(taskId),
- false);
- }
- /**
- * Fail indicated task attempt.
- *
- * @param taskId the id of the task to be terminated.
- * @throws IOException
- */
- public void failTask(TaskAttemptID taskId) throws IOException {
- ensureState(JobState.RUNNING);
- info.killTask(org.apache.hadoop.mapred.TaskAttemptID.downgrade(taskId),
- true);
- }
- /**
- * Gets the counters for this job.
- *
- * @return the counters for this job.
- * @throws IOException
- */
- public Counters getCounters() throws IOException {
- ensureState(JobState.RUNNING);
- return new Counters(info.getCounters());
- }
- private void ensureNotSet(String attr, String msg) throws IOException {
- if (conf.get(attr) != null) {
- throw new IOException(attr + " is incompatible with " + msg + " mode.");
- }
- }
- /**
- * Sets the flag that will allow the JobTracker to cancel the HDFS delegation
- * tokens upon job completion. Defaults to true.
- */
- public void setCancelDelegationTokenUponJobCompletion(boolean value) {
- ensureState(JobState.DEFINE);
- conf.setBoolean(JOB_CANCEL_DELEGATION_TOKEN, value);
- }
- /**
- * Default to the new APIs unless they are explicitly set or the old mapper or
- * reduce attributes are used.
- * @throws IOException if the configuration is inconsistant
- */
- private void setUseNewAPI() throws IOException {
- int numReduces = conf.getNumReduceTasks();
- String oldMapperClass = "mapred.mapper.class";
- String oldReduceClass = "mapred.reducer.class";
- conf.setBooleanIfUnset("mapred.mapper.new-api",
- conf.get(oldMapperClass) == null);
- if (conf.getUseNewMapper()) {
- String mode = "new map API";
- ensureNotSet("mapred.input.format.class", mode);
- ensureNotSet(oldMapperClass, mode);
- if (numReduces != 0) {
- ensureNotSet("mapred.partitioner.class", mode);
- } else {
- ensureNotSet("mapred.output.format.class", mode);
- }
- } else {
- String mode = "map compatability";
- ensureNotSet(JobContext.INPUT_FORMAT_CLASS_ATTR, mode);
- ensureNotSet(JobContext.MAP_CLASS_ATTR, mode);
- if (numReduces != 0) {
- ensureNotSet(JobContext.PARTITIONER_CLASS_ATTR, mode);
- } else {
- ensureNotSet(JobContext.OUTPUT_FORMAT_CLASS_ATTR, mode);
- }
- }
- if (numReduces != 0) {
- conf.setBooleanIfUnset("mapred.reducer.new-api",
- conf.get(oldReduceClass) == null);
- if (conf.getUseNewReducer()) {
- String mode = "new reduce API";
- ensureNotSet("mapred.output.format.class", mode);
- ensureNotSet(oldReduceClass, mode);
- } else {
- String mode = "reduce compatability";
- ensureNotSet(JobContext.OUTPUT_FORMAT_CLASS_ATTR, mode);
- ensureNotSet(JobContext.REDUCE_CLASS_ATTR, mode);
- }
- }
- }
- /**
- * Submit the job to the cluster and return immediately.
- * 提交job到集群上面 并且立刻返回
- * @throws IOException
- */
- public void submit() throws IOException, InterruptedException,
- ClassNotFoundException {
- ensureState(JobState.DEFINE);
- setUseNewAPI();
- // Connect to the JobTracker and submit the job
- //连接到jobtracker 并且提交作业
- connect();
- info = jobClient.submitJobInternal(conf);//这里才真正的提交作业
- super.setJobID(info.getID());
- state = JobState.RUNNING;
- }
- /**
- * Open a connection to the JobTracker
- * 打开到jobtracker的连接
- * @throws IOException
- * @throws InterruptedException
- */
- private void connect() throws IOException, InterruptedException {
- ugi.doAs(new PrivilegedExceptionAction<Object>() {
- public Object run() throws IOException {
- jobClient = new JobClient((JobConf) getConfiguration());
- return null;
- }
- });
- }
- /**
- * Submit the job to the cluster and wait for it to finish.
- * @param verbose print the progress to the user
- * @return true if the job succeeded
- * @throws IOException thrown if the communication with the
- * <code>JobTracker</code> is lost
- */
- public boolean waitForCompletion(boolean verbose
- ) throws IOException, InterruptedException,
- ClassNotFoundException {
- if (state == JobState.DEFINE) {
- submit();
- }
- if (verbose) {
- jobClient.monitorAndPrintJob(conf, info);
- } else {
- info.waitForCompletion();
- }
- return isSuccessful();
- }
- }
job源码分析的更多相关文章
- ABP源码分析一:整体项目结构及目录
ABP是一套非常优秀的web应用程序架构,适合用来搭建集中式架构的web应用程序. 整个Abp的Infrastructure是以Abp这个package为核心模块(core)+15个模块(module ...
- HashMap与TreeMap源码分析
1. 引言 在红黑树--算法导论(15)中学习了红黑树的原理.本来打算自己来试着实现一下,然而在看了JDK(1.8.0)TreeMap的源码后恍然发现原来它就是利用红黑树实现的(很惭愧学了Ja ...
- nginx源码分析之网络初始化
nginx作为一个高性能的HTTP服务器,网络的处理是其核心,了解网络的初始化有助于加深对nginx网络处理的了解,本文主要通过nginx的源代码来分析其网络初始化. 从配置文件中读取初始化信息 与网 ...
- zookeeper源码分析之五服务端(集群leader)处理请求流程
leader的实现类为LeaderZooKeeperServer,它间接继承自标准ZookeeperServer.它规定了请求到达leader时需要经历的路径: PrepRequestProcesso ...
- zookeeper源码分析之四服务端(单机)处理请求流程
上文: zookeeper源码分析之一服务端启动过程 中,我们介绍了zookeeper服务器的启动过程,其中单机是ZookeeperServer启动,集群使用QuorumPeer启动,那么这次我们分析 ...
- zookeeper源码分析之三客户端发送请求流程
znode 可以被监控,包括这个目录节点中存储的数据的修改,子节点目录的变化等,一旦变化可以通知设置监控的客户端,这个功能是zookeeper对于应用最重要的特性,通过这个特性可以实现的功能包括配置的 ...
- java使用websocket,并且获取HttpSession,源码分析
转载请在页首注明作者与出处 http://www.cnblogs.com/zhuxiaojie/p/6238826.html 一:本文使用范围 此文不仅仅局限于spring boot,普通的sprin ...
- ABP源码分析二:ABP中配置的注册和初始化
一般来说,ASP.NET Web应用程序的第一个执行的方法是Global.asax下定义的Start方法.执行这个方法前HttpApplication 实例必须存在,也就是说其构造函数的执行必然是完成 ...
- ABP源码分析三:ABP Module
Abp是一种基于模块化设计的思想构建的.开发人员可以将自定义的功能以模块(module)的形式集成到ABP中.具体的功能都可以设计成一个单独的Module.Abp底层框架提供便捷的方法集成每个Modu ...
- ABP源码分析四:Configuration
核心模块的配置 Configuration是ABP中设计比较巧妙的地方.其通过AbpStartupConfiguration,Castle的依赖注入,Dictionary对象和扩展方法很巧妙的实现了配 ...
随机推荐
- es6从零学习(一)let 和 const 命令
es6从零学习(一):let 和 const 命令 一:let 变量 1.块级作用域{}:let只在自己的块级作用域内有效. for(let i =0;i<3;i++) { console.lo ...
- rsync+inotify实现实时同步,自动触发同步文件
本文参考来自:http://chocolee.blog.51cto.com/8158455/1400596 我的需求和他的略有不同,同时做了一下更改,如下: 需求:两台机器相互为主备,搭建相同的两个服 ...
- 自测之Lesson14:多线程编程
题目:创建一个线程,并理清主线程结束时会发生什么. 代码: #include <stdio.h> #include <pthread.h> #include <unist ...
- 个人在git配置SSH Key遇到的问题以及解决方案
第一次用git上传代码到github,在这过程中遇到很多问题,在输入git命令的时候都小心翼翼,因为一不小心感觉就会出错.. 英语不好..在敲入git命令过程中各种错误提示勉强翻译下才看得懂 最后输入 ...
- 深入理解Java对象序列化(转载)
原文地址:http://developer.51cto.com/art/201202/317181.htm 1. 什么是Java对象序列化 Java平台允许我们在内存中创建可复用的Java对象,但一般 ...
- LintCode-174.删除链表中倒数第n个节点
删除链表中倒数第n个节点 给定一个链表,删除链表中倒数第n个节点,返回链表的头节点. 注意事项 链表中的节点个数大于等于n 样例 给出链表 1->2->3->4->5-> ...
- adb shell input keyevent值所对应的字符
转自:http://blog.csdn.net/chen825919148/article/details/18732041 0 --> "KEYCODE_UNKNOWN" ...
- ERROR 1205 (HY000): Lock wait timeout exceeded; try restarting transaction 表被锁的解决办法
转自:https://blog.csdn.net/mchdba/article/details/38313881 前言:朋友咨询我说执行简单的update语句失效,症状如下:mysql> upd ...
- RT-thread v2.1.0修正版
RT-Thread v2.1.0是v2.0.1正式版这个系列的bug修正版.RT-Thread v2.1.0修正的主要内容包括: 这个版本经历的时间比较长,并且原定的一些目标也还未能完成(更全的POS ...
- BZOJ 1509 逃学的小孩(树的直径)
题意:从树上任找三点u,v,w.使得dis(u,v)+min(dis(u,w),dis(v,w))最大. 有一个结论u,v必是树上直径的两端点. 剩下的枚举w就行了. 具体不会证... # inclu ...