job源码分析

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.mapreduce;
import java.io.IOException;
import java.security.PrivilegedExceptionAction;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.RawComparator;
import org.apache.hadoop.mapreduce.TaskAttemptID;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RunningJob;
import org.apache.hadoop.mapred.TaskCompletionEvent;
/**
* The job submitter's view of the Job. It allows the user to configure the
* job, submit it, control its execution, and query the state. The set methods
* only work until the job is submitted, afterwards they will throw an
* IllegalStateException.
* job 提交者看到的job的视图。它允许用户配置job，提交job，控制job的执行，并且查询他的状态
* set方法只有在job提交的时候才会工作
*/
public class Job extends JobContext {
public static enum JobState {DEFINE, RUNNING};//job的状态，有定义好的和正在运行
private JobState state = JobState.DEFINE;
private JobClient jobClient;
private RunningJob info;
/**
* Creates a new {@link Job}
* A Job will be created with a generic {@link Configuration}.
*创建一个新的job，用通用的configuration
* @return the {@link Job}
* @throws IOException
*/
public static Job getInstance() throws IOException {
// create with a null Cluster
return getInstance(new Configuration());
}
/**
* Creates a new {@link Job} with a given {@link Configuration}.
* The <code>Job</code> makes a copy of the <code>Configuration</code> so
* that any necessary internal modifications do not reflect on the incoming
* parameter.
*使用给定的configuration创建job
*这里对configuration进行了备份，如此，任何必要的对configuration内部修改，都不会影响传进来的conf参数
* @param conf the {@link Configuration}
* @return the {@link Job}
* @throws IOException
*/
public static Job getInstance(Configuration conf) throws IOException {
// create with a null Cluster 没有任何集群的创建
JobConf jobConf = new JobConf(conf);
return new Job(jobConf);
}
/**
* Creates a new {@link Job} with a given {@link Configuration}
* and a given jobName.
*用给定的conf和jobname
* The <code>Job</code> makes a copy of the <code>Configuration</code> so
* that any necessary internal modifications do not reflect on the incoming
* parameter.
*
* @param conf the {@link Configuration}
* @param jobName the job instance's name
* @return the {@link Job}
* @throws IOException
*/
public static Job getInstance(Configuration conf, String jobName)
throws IOException {
// create with a null Cluster
Job result = getInstance(conf);
result.setJobName(jobName);
return result;
}
public Job() throws IOException {
this(new Configuration());
}
public Job(Configuration conf) throws IOException {
super(conf, null);
}
public Job(Configuration conf, String jobName) throws IOException {
this(conf);
setJobName(jobName);
}
JobClient getJobClient() {
return jobClient;
}
//确保job的状态
private void ensureState(JobState state) throws IllegalStateException {
if (state != this.state) {
throw new IllegalStateException("Job in state "+ this.state +
" instead of " + state);
}
if (state == JobState.RUNNING && jobClient == null) {
throw new IllegalStateException("Job in state " + JobState.RUNNING +
" however jobClient is not initialized!");
}
}
/**
* Set the number of reduce tasks for the job.
* 设置reducer的个数常用
* @param tasks the number of reduce tasks
* @throws IllegalStateException if the job is submitted
*/
public void setNumReduceTasks(int tasks) throws IllegalStateException {
ensureState(JobState.DEFINE);
conf.setNumReduceTasks(tasks);
}
/**
* Set the current working directory for the default file system.
* 为默认文件系统设置当前工作目录
* @param dir the new current working directory.
* @throws IllegalStateException if the job is submitted
*/
public void setWorkingDirectory(Path dir) throws IOException {
ensureState(JobState.DEFINE);
conf.setWorkingDirectory(dir);
}
/**
* Set the {@link InputFormat} for the job.
* @param cls the <code>InputFormat</code> to use
* @throws IllegalStateException if the job is submitted
*/
public void setInputFormatClass(Class<? extends InputFormat> cls
) throws IllegalStateException {
ensureState(JobState.DEFINE);
conf.setClass(INPUT_FORMAT_CLASS_ATTR, cls, InputFormat.class);
}
/**
* Set the {@link OutputFormat} for the job.
* @param cls the <code>OutputFormat</code> to use
* @throws IllegalStateException if the job is submitted
*/
public void setOutputFormatClass(Class<? extends OutputFormat> cls
) throws IllegalStateException {
ensureState(JobState.DEFINE);
conf.setClass(OUTPUT_FORMAT_CLASS_ATTR, cls, OutputFormat.class);
}
/**
* Set the {@link Mapper} for the job.
* @param cls the <code>Mapper</code> to use
* @throws IllegalStateException if the job is submitted
*/
public void setMapperClass(Class<? extends Mapper> cls
) throws IllegalStateException {
ensureState(JobState.DEFINE);
conf.setClass(MAP_CLASS_ATTR, cls, Mapper.class);
}
/**
* Set the Jar by finding where a given class came from.
* 设置jar包，hadoop根据给定的class来寻找他的jar包
* @param cls the example class
*/
public void setJarByClass(Class<?> cls) {
conf.setJarByClass(cls);
}
/**
* Get the pathname of the job's jar.
* @return the pathname
*/
public String getJar() {
return conf.getJar();
}
/**
* Set the combiner class for the job.
* @param cls the combiner to use
* @throws IllegalStateException if the job is submitted
*/
public void setCombinerClass(Class<? extends Reducer> cls
) throws IllegalStateException {
ensureState(JobState.DEFINE);
conf.setClass(COMBINE_CLASS_ATTR, cls, Reducer.class);
}
/**
* Set the {@link Reducer} for the job.
* @param cls the <code>Reducer</code> to use
* @throws IllegalStateException if the job is submitted
*/
public void setReducerClass(Class<? extends Reducer> cls
) throws IllegalStateException {
ensureState(JobState.DEFINE);
conf.setClass(REDUCE_CLASS_ATTR, cls, Reducer.class);
}
/**
* Set the {@link Partitioner} for the job.
* @param cls the <code>Partitioner</code> to use
* @throws IllegalStateException if the job is submitted
*/
public void setPartitionerClass(Class<? extends Partitioner> cls
) throws IllegalStateException {
ensureState(JobState.DEFINE);
conf.setClass(PARTITIONER_CLASS_ATTR, cls, Partitioner.class);
}
/**
* Set the key class for the map output data. This allows the user to
* specify the map output key class to be different than the final output
* value class.
*
* @param theClass the map output key class.
* @throws IllegalStateException if the job is submitted
*/
public void setMapOutputKeyClass(Class<?> theClass
) throws IllegalStateException {
ensureState(JobState.DEFINE);
conf.setMapOutputKeyClass(theClass);
}
/**
* Set the value class for the map output data. This allows the user to
* specify the map output value class to be different than the final output
* value class.
*
* @param theClass the map output value class.
* @throws IllegalStateException if the job is submitted
*/
public void setMapOutputValueClass(Class<?> theClass
) throws IllegalStateException {
ensureState(JobState.DEFINE);
conf.setMapOutputValueClass(theClass);
}
/**
* Set the key class for the job output data.
*
* @param theClass the key class for the job output data.
* @throws IllegalStateException if the job is submitted
*/
public void setOutputKeyClass(Class<?> theClass
) throws IllegalStateException {
ensureState(JobState.DEFINE);
conf.setOutputKeyClass(theClass);
}
/**
* Set the value class for job outputs.
*
* @param theClass the value class for job outputs.
* @throws IllegalStateException if the job is submitted
*/
public void setOutputValueClass(Class<?> theClass
) throws IllegalStateException {
ensureState(JobState.DEFINE);
conf.setOutputValueClass(theClass);
}
/**
* Define the comparator that controls how the keys are sorted before they
* are passed to the {@link Reducer}.
* @param cls the raw comparator
* @throws IllegalStateException if the job is submitted
*/
public void setSortComparatorClass(Class<? extends RawComparator> cls
) throws IllegalStateException {
ensureState(JobState.DEFINE);
conf.setOutputKeyComparatorClass(cls);
}
/**
* Define the comparator that controls which keys are grouped together
* for a single call to
* {@link Reducer#reduce(Object, Iterable,
* org.apache.hadoop.mapreduce.Reducer.Context)}
* @param cls the raw comparator to use
* @throws IllegalStateException if the job is submitted
*/
public void setGroupingComparatorClass(Class<? extends RawComparator> cls
) throws IllegalStateException {
ensureState(JobState.DEFINE);
conf.setOutputValueGroupingComparator(cls);
}
/**
* Set the user-specified job name.
*
* @param name the job's new name.
* @throws IllegalStateException if the job is submitted
*/
public void setJobName(String name) throws IllegalStateException {
ensureState(JobState.DEFINE);
conf.setJobName(name);
}
/**
* Turn speculative execution on or off for this job.
* 设置推测执行的开关
* @param speculativeExecution <code>true</code> if speculative execution
* should be turned on, else <code>false</code>.
*/
public void setSpeculativeExecution(boolean speculativeExecution) {
ensureState(JobState.DEFINE);
conf.setSpeculativeExecution(speculativeExecution);
}
/**
* Turn speculative execution on or off for this job for map tasks.
*
* @param speculativeExecution <code>true</code> if speculative execution
* should be turned on for map tasks,
* else <code>false</code>.
*/
public void setMapSpeculativeExecution(boolean speculativeExecution) {
ensureState(JobState.DEFINE);
conf.setMapSpeculativeExecution(speculativeExecution);
}
/**
* Turn speculative execution on or off for this job for reduce tasks.
*
* @param speculativeExecution <code>true</code> if speculative execution
* should be turned on for reduce tasks,
* else <code>false</code>.
*/
public void setReduceSpeculativeExecution(boolean speculativeExecution) {
ensureState(JobState.DEFINE);
conf.setReduceSpeculativeExecution(speculativeExecution);
}
/**
* Get the URL where some job progress information will be displayed.
* 得到一些job 进度信息会展示的url地址
* @return the URL where some job progress information will be displayed.
*/
public String getTrackingURL() {
ensureState(JobState.RUNNING);
return info.getTrackingURL();
}
/**
* Get the progress of the job's setup, as a float between 0.0
* and 1.0. When the job setup is completed, the function returns 1.0.
*
* @return the progress of the job's setup.
* @throws IOException
*/
public float setupProgress() throws IOException {
ensureState(JobState.RUNNING);
return info.setupProgress();
}
/**
* Get the progress of the job's map-tasks, as a float between 0.0
* and 1.0. When all map tasks have completed, the function returns 1.0.
*
* @return the progress of the job's map-tasks.
* @throws IOException
*/
public float mapProgress() throws IOException {
ensureState(JobState.RUNNING);
return info.mapProgress();
}
/**
* Get the progress of the job's reduce-tasks, as a float between 0.0
* and 1.0. When all reduce tasks have completed, the function returns 1.0.
*
* @return the progress of the job's reduce-tasks.
* @throws IOException
*/
public float reduceProgress() throws IOException {
ensureState(JobState.RUNNING);
return info.reduceProgress();
}
/**
* Check if the job is finished or not.
* This is a non-blocking call.
*
* @return <code>true</code> if the job is complete, else <code>false</code>.
* @throws IOException
*/
public boolean isComplete() throws IOException {
ensureState(JobState.RUNNING);
return info.isComplete();
}
/**
* Check if the job completed successfully.
*
* @return <code>true</code> if the job succeeded, else <code>false</code>.
* @throws IOException
*/
public boolean isSuccessful() throws IOException {
ensureState(JobState.RUNNING);
return info.isSuccessful();
}
/**
* Kill the running job. Blocks until all job tasks have been
* killed as well. If the job is no longer running, it simply returns.
* 杀掉正在运行的job 直到所有的job tasks都被杀掉之后才会停止。
* 如果job不再运行来他就会返回
* @throws IOException
*/
public void killJob() throws IOException {
ensureState(JobState.RUNNING);
info.killJob();
}
/**
* Get events indicating completion (success/failure) of component tasks.
*
* @param startFrom index to start fetching events from
* @return an array of {@link TaskCompletionEvent}s
* @throws IOException
*/
public TaskCompletionEvent[] getTaskCompletionEvents(int startFrom
) throws IOException {
ensureState(JobState.RUNNING);
return info.getTaskCompletionEvents(startFrom);
}
/**
* Kill indicated task attempt.
*
* @param taskId the id of the task to be terminated.
* @throws IOException
*/
public void killTask(TaskAttemptID taskId) throws IOException {
ensureState(JobState.RUNNING);
info.killTask(org.apache.hadoop.mapred.TaskAttemptID.downgrade(taskId),
false);
}
/**
* Fail indicated task attempt.
*
* @param taskId the id of the task to be terminated.
* @throws IOException
*/
public void failTask(TaskAttemptID taskId) throws IOException {
ensureState(JobState.RUNNING);
info.killTask(org.apache.hadoop.mapred.TaskAttemptID.downgrade(taskId),
true);
}
/**
* Gets the counters for this job.
*
* @return the counters for this job.
* @throws IOException
*/
public Counters getCounters() throws IOException {
ensureState(JobState.RUNNING);
return new Counters(info.getCounters());
}
private void ensureNotSet(String attr, String msg) throws IOException {
if (conf.get(attr) != null) {
throw new IOException(attr + " is incompatible with " + msg + " mode.");
}
}
/**
* Sets the flag that will allow the JobTracker to cancel the HDFS delegation
* tokens upon job completion. Defaults to true.
*/
public void setCancelDelegationTokenUponJobCompletion(boolean value) {
ensureState(JobState.DEFINE);
conf.setBoolean(JOB_CANCEL_DELEGATION_TOKEN, value);
}
/**
* Default to the new APIs unless they are explicitly set or the old mapper or
* reduce attributes are used.
* @throws IOException if the configuration is inconsistant
*/
private void setUseNewAPI() throws IOException {
int numReduces = conf.getNumReduceTasks();
String oldMapperClass = "mapred.mapper.class";
String oldReduceClass = "mapred.reducer.class";
conf.setBooleanIfUnset("mapred.mapper.new-api",
conf.get(oldMapperClass) == null);
if (conf.getUseNewMapper()) {
String mode = "new map API";
ensureNotSet("mapred.input.format.class", mode);
ensureNotSet(oldMapperClass, mode);
if (numReduces != 0) {
ensureNotSet("mapred.partitioner.class", mode);
} else {
ensureNotSet("mapred.output.format.class", mode);
}
} else {
String mode = "map compatability";
ensureNotSet(JobContext.INPUT_FORMAT_CLASS_ATTR, mode);
ensureNotSet(JobContext.MAP_CLASS_ATTR, mode);
if (numReduces != 0) {
ensureNotSet(JobContext.PARTITIONER_CLASS_ATTR, mode);
} else {
ensureNotSet(JobContext.OUTPUT_FORMAT_CLASS_ATTR, mode);
}
}
if (numReduces != 0) {
conf.setBooleanIfUnset("mapred.reducer.new-api",
conf.get(oldReduceClass) == null);
if (conf.getUseNewReducer()) {
String mode = "new reduce API";
ensureNotSet("mapred.output.format.class", mode);
ensureNotSet(oldReduceClass, mode);
} else {
String mode = "reduce compatability";
ensureNotSet(JobContext.OUTPUT_FORMAT_CLASS_ATTR, mode);
ensureNotSet(JobContext.REDUCE_CLASS_ATTR, mode);
}
}
}
/**
* Submit the job to the cluster and return immediately.
* 提交job到集群上面并且立刻返回
* @throws IOException
*/
public void submit() throws IOException, InterruptedException,
ClassNotFoundException {
ensureState(JobState.DEFINE);
setUseNewAPI();
// Connect to the JobTracker and submit the job
//连接到jobtracker 并且提交作业
connect();
info = jobClient.submitJobInternal(conf);//这里才真正的提交作业
super.setJobID(info.getID());
state = JobState.RUNNING;
}
/**
* Open a connection to the JobTracker
* 打开到jobtracker的连接
* @throws IOException
* @throws InterruptedException
*/
private void connect() throws IOException, InterruptedException {
ugi.doAs(new PrivilegedExceptionAction<Object>() {
public Object run() throws IOException {
jobClient = new JobClient((JobConf) getConfiguration());
return null;
}
});
}
/**
* Submit the job to the cluster and wait for it to finish.
* @param verbose print the progress to the user
* @return true if the job succeeded
* @throws IOException thrown if the communication with the
* <code>JobTracker</code> is lost
*/
public boolean waitForCompletion(boolean verbose
) throws IOException, InterruptedException,
ClassNotFoundException {
if (state == JobState.DEFINE) {
submit();
}
if (verbose) {
jobClient.monitorAndPrintJob(conf, info);
} else {
info.waitForCompletion();
}
return isSuccessful();
}
}

job源码分析的更多相关文章

ABP源码分析一：整体项目结构及目录
ABP是一套非常优秀的web应用程序架构,适合用来搭建集中式架构的web应用程序. 整个Abp的Infrastructure是以Abp这个package为核心模块(core)+15个模块(module ...
HashMap与TreeMap源码分析
1. 引言在红黑树--算法导论(15)中学习了红黑树的原理.本来打算自己来试着实现一下,然而在看了JDK(1.8.0)TreeMap的源码后恍然发现原来它就是利用红黑树实现的(很惭愧学了Ja ...
nginx源码分析之网络初始化
nginx作为一个高性能的HTTP服务器,网络的处理是其核心,了解网络的初始化有助于加深对nginx网络处理的了解,本文主要通过nginx的源代码来分析其网络初始化. 从配置文件中读取初始化信息与网 ...
zookeeper源码分析之五服务端(集群leader)处理请求流程
leader的实现类为LeaderZooKeeperServer,它间接继承自标准ZookeeperServer.它规定了请求到达leader时需要经历的路径: PrepRequestProcesso ...
zookeeper源码分析之四服务端(单机)处理请求流程
上文: zookeeper源码分析之一服务端启动过程中,我们介绍了zookeeper服务器的启动过程,其中单机是ZookeeperServer启动,集群使用QuorumPeer启动,那么这次我们分析 ...
zookeeper源码分析之三客户端发送请求流程
znode 可以被监控,包括这个目录节点中存储的数据的修改,子节点目录的变化等,一旦变化可以通知设置监控的客户端,这个功能是zookeeper对于应用最重要的特性,通过这个特性可以实现的功能包括配置的 ...
java使用websocket，并且获取HttpSession，源码分析
转载请在页首注明作者与出处 http://www.cnblogs.com/zhuxiaojie/p/6238826.html 一:本文使用范围此文不仅仅局限于spring boot,普通的sprin ...
ABP源码分析二：ABP中配置的注册和初始化
一般来说,ASP.NET Web应用程序的第一个执行的方法是Global.asax下定义的Start方法.执行这个方法前HttpApplication 实例必须存在,也就是说其构造函数的执行必然是完成 ...
ABP源码分析三：ABP Module
Abp是一种基于模块化设计的思想构建的.开发人员可以将自定义的功能以模块(module)的形式集成到ABP中.具体的功能都可以设计成一个单独的Module.Abp底层框架提供便捷的方法集成每个Modu ...
ABP源码分析四：Configuration
核心模块的配置 Configuration是ABP中设计比较巧妙的地方.其通过AbpStartupConfiguration,Castle的依赖注入,Dictionary对象和扩展方法很巧妙的实现了配 ...

随机推荐

Linux下实现Rsync目录同步备份
需求:对于开发机器做目录的数据备份测试机IP:192.168.1.100 WEB目录:/bckup/ 下面我将用一台机器来备份上面测试机 /bckup下的所有数据,并实现时时同步备份机器IP: ...
Block的声明与定义语法
Block的声明 Block的声明与函数指针的声明类似返回值类型(^变量名)(参数列表) Block的定义 ^返回值类型(参数列表) { 表达式 } 其中: 1 如果返回值类型是void,可以省略 ...
2017秋软工1 - 本周PSP
1.本周PSP 2. 本周PSP饼状图 3. 本周进度条 4. 累计进度图
（转）apktool+dex2jar+jd_gui
转:http://www.cnblogs.com/MichaelGuan/archive/2011/10/25/2224578.html apktool: 可以解析资源文件,比如布局文件xml等,方便 ...
mysql入门 — （2）
创建表 CREATE TABLE 表名称 [IF NOT EXISTS]( 字段名1 列类型[属性] [索引] 字段名2 列类型[属性] [索引] ... 字段名n 列类型[属性] [索引] )[表类 ...
vagrant简单学习使用
1.安装vagrant 旧版本的vagrant可以在http://downloads.vagrantup.com/下载,支持的系统平台有mac,debian/ubuntu, centos,window ...
C++并行编程1
what is concurrency 我们可以一边看电视,一边唱歌.人并行非常容易理解,但是计算机呢?是不是我们一边编辑着word文档,一边听着歌,这样计算机就是在并行吗?不一定欧,如果你计算机是单 ...
PokeCats开发者日志（十三）
现在是PokeCats游戏开发的第六十二天的晚上,把软著权登记证书的截图加上,又重新提交审核了一遍,但愿能过吧...
敏捷冲刺DAY3
一. 每日会议 1. 照片 2. 昨日完成工作 3. 今日完成工作登录界面的进一步完善服务器搭建建立数据库下一步任务的规划,展望 4. 工作中遇到的困难工作中的困难:在进行模糊查询时,由于中 ...
【week2】结对编程-四则运算及感想
首先我要说一下,我得作业我尽力了,但是能力有限,还需练习. 四则运算,改进代码流程: 1.手动输入算式(属于中缀表达式) 2.将中缀表达式转化成后缀表达式生成out数组 3.一个操作数栈,一个运算符 ...

job源码分析

job源码分析的更多相关文章

随机推荐

热门专题