#!/usr/bin/env bash

# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License. # This script runs the hadoop core commands. bin=`which $`
bin=`dirname ${bin}`
bin=`cd "$bin"; pwd` DEFAULT_LIBEXEC_DIR="$bin"/../libexec HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
. $HADOOP_LIBEXEC_DIR/hadoop-config.sh function print_usage(){
echo "Usage: hadoop [--config confdir] [COMMAND | CLASSNAME]"
echo " CLASSNAME run the class named CLASSNAME"
echo " or"
echo " where COMMAND is one of:"
echo " fs run a generic filesystem user client"
echo " version print the version"
echo " jar <jar> run a jar file"
echo " note: please use \"yarn jar\" to launch"
echo " YARN applications, not this command."
echo " checknative [-a|-h] check native hadoop and compression libraries availability"
echo " distcp <srcurl> <desturl> copy file or directories recursively"
echo " archive -archiveName NAME -p <parent path> <src>* <dest> create a hadoop archive"
echo " classpath prints the class path needed to get the"
echo " credential interact with credential providers"
echo " Hadoop jar and the required libraries"
echo " daemonlog get/set the log level for each daemon"
echo " trace view and modify Hadoop tracing settings"
echo ""
echo "Most commands print help when invoked w/o parameters."
} if [ $# = ]; then
print_usage
exit
fi COMMAND=$
case $COMMAND in
# usage flags
--help|-help|-h)
print_usage
exit
;; #hdfs commands
namenode|secondarynamenode|datanode|dfs|dfsadmin|fsck|balancer|fetchdt|oiv|dfsgroups|portmap|nfs3)
echo "DEPRECATED: Use of this script to execute hdfs command is deprecated." >&
echo "Instead use the hdfs command for it." >&
echo "" >&
#try to locate hdfs and if present, delegate to it.
shift
if [ -f "${HADOOP_HDFS_HOME}"/bin/hdfs ]; then
exec "${HADOOP_HDFS_HOME}"/bin/hdfs ${COMMAND/dfsgroups/groups} "$@"
elif [ -f "${HADOOP_PREFIX}"/bin/hdfs ]; then
exec "${HADOOP_PREFIX}"/bin/hdfs ${COMMAND/dfsgroups/groups} "$@"
else
echo "HADOOP_HDFS_HOME not found!"
exit
fi
;; #mapred commands for backwards compatibility
pipes|job|queue|mrgroups|mradmin|jobtracker|tasktracker)
echo "DEPRECATED: Use of this script to execute mapred command is deprecated." >&
echo "Instead use the mapred command for it." >&
echo "" >&
#try to locate mapred and if present, delegate to it.
shift
if [ -f "${HADOOP_MAPRED_HOME}"/bin/mapred ]; then
exec "${HADOOP_MAPRED_HOME}"/bin/mapred ${COMMAND/mrgroups/groups} "$@"
elif [ -f "${HADOOP_PREFIX}"/bin/mapred ]; then
exec "${HADOOP_PREFIX}"/bin/mapred ${COMMAND/mrgroups/groups} "$@"
else
echo "HADOOP_MAPRED_HOME not found!"
exit
fi
;; #core commands
*)
# the core commands
if [ "$COMMAND" = "fs" ] ; then
CLASS=org.apache.hadoop.fs.FsShell
elif [ "$COMMAND" = "version" ] ; then
CLASS=org.apache.hadoop.util.VersionInfo
elif [ "$COMMAND" = "jar" ] ; then
CLASS=org.apache.hadoop.util.RunJar
if [[ -n "${YARN_OPTS}" ]] || [[ -n "${YARN_CLIENT_OPTS}" ]]; then
echo "WARNING: Use \"yarn jar\" to launch YARN applications." >&
fi
elif [ "$COMMAND" = "key" ] ; then
CLASS=org.apache.hadoop.crypto.key.KeyShell
elif [ "$COMMAND" = "checknative" ] ; then
CLASS=org.apache.hadoop.util.NativeLibraryChecker
elif [ "$COMMAND" = "distcp" ] ; then
CLASS=org.apache.hadoop.tools.DistCp
CLASSPATH=${CLASSPATH}:${TOOL_PATH}
elif [ "$COMMAND" = "daemonlog" ] ; then
CLASS=org.apache.hadoop.log.LogLevel
elif [ "$COMMAND" = "archive" ] ; then
CLASS=org.apache.hadoop.tools.HadoopArchives
CLASSPATH=${CLASSPATH}:${TOOL_PATH}
elif [ "$COMMAND" = "credential" ] ; then
CLASS=org.apache.hadoop.security.alias.CredentialShell
elif [ "$COMMAND" = "trace" ] ; then
CLASS=org.apache.hadoop.tracing.TraceAdmin
elif [ "$COMMAND" = "classpath" ] ; then
if [ "$#" -gt ]; then
CLASS=org.apache.hadoop.util.Classpath
else
# No need to bother starting up a JVM for this simple case.
if $cygwin; then
CLASSPATH=$(cygpath -p -w "$CLASSPATH" >/dev/null)
fi
echo $CLASSPATH
exit
fi
elif [[ "$COMMAND" = -* ]] ; then
# class and package names cannot begin with a -
echo "Error: No command named \`$COMMAND' was found. Perhaps you meant \`hadoop ${COMMAND#-}'"
exit
else
CLASS=$COMMAND
fi # cygwin path translation
if $cygwin; then
CLASSPATH=$(cygpath -p -w "$CLASSPATH" >/dev/null)
HADOOP_LOG_DIR=$(cygpath -w "$HADOOP_LOG_DIR" >/dev/null)
HADOOP_PREFIX=$(cygpath -w "$HADOOP_PREFIX" >/dev/null)
HADOOP_CONF_DIR=$(cygpath -w "$HADOOP_CONF_DIR" >/dev/null)
HADOOP_COMMON_HOME=$(cygpath -w "$HADOOP_COMMON_HOME" >/dev/null)
HADOOP_HDFS_HOME=$(cygpath -w "$HADOOP_HDFS_HOME" >/dev/null)
HADOOP_YARN_HOME=$(cygpath -w "$HADOOP_YARN_HOME" >/dev/null)
HADOOP_MAPRED_HOME=$(cygpath -w "$HADOOP_MAPRED_HOME" >/dev/null)
fi shift # Always respect HADOOP_OPTS and HADOOP_CLIENT_OPTS
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS" #make sure security appender is turned off
HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,NullAppender}" export CLASSPATH=$CLASSPATH
exec "$JAVA" $JAVA_HEAP_MAX $HADOOP_OPTS $CLASS "$@"
;; esac

  可以看到当hadoop脚本运行jar包时,会执行org.apache.hadoop.util.RunJar脚本.

package org.apache.hadoop.util;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.PrintStream;
import java.lang.reflect.Array;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URL;
import java.net.URLClassLoader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Enumeration;
import java.util.List;
import java.util.jar.Attributes;
import java.util.jar.JarEntry;
import java.util.jar.JarFile;
import java.util.jar.Manifest;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.hadoop.classification.InterfaceAudience.Private;
import org.apache.hadoop.classification.InterfaceStability.Unstable;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.io.IOUtils; @InterfaceAudience.Private
@InterfaceStability.Unstable
public class RunJar
{
public static final Pattern MATCH_ANY = Pattern.compile(".*");
public static final int SHUTDOWN_HOOK_PRIORITY = 10;
public static final String HADOOP_USE_CLIENT_CLASSLOADER = "HADOOP_USE_CLIENT_CLASSLOADER";
public static final String HADOOP_CLASSPATH = "HADOOP_CLASSPATH";
public static final String HADOOP_CLIENT_CLASSLOADER_SYSTEM_CLASSES = "HADOOP_CLIENT_CLASSLOADER_SYSTEM_CLASSES"; public static void unJar(File jarFile, File toDir)
throws IOException
{
unJar(jarFile, toDir, MATCH_ANY);
} public static void unJar(File jarFile, File toDir, Pattern unpackRegex)
throws IOException
{
JarFile jar = new JarFile(jarFile);
try {
Enumeration entries = jar.entries();
while (entries.hasMoreElements()) {
JarEntry entry = (JarEntry)entries.nextElement();
if ((!entry.isDirectory()) && (unpackRegex.matcher(entry.getName()).matches()))
{
InputStream in = jar.getInputStream(entry);
try {
File file = new File(toDir, entry.getName());
ensureDirectory(file.getParentFile());
OutputStream out = new FileOutputStream(file);
try {
IOUtils.copyBytes(in, out, 8192);
} finally {
}
}
finally {
}
}
}
}
finally {
jar.close();
}
} private static void ensureDirectory(File dir)
throws IOException
{
if ((!dir.mkdirs()) && (!dir.isDirectory()))
throw new IOException(new StringBuilder().append("Mkdirs failed to create ").append(dir.toString()).toString());
} public static void main(String[] args)
throws Throwable
{
new RunJar().run(args);
} public void run(String[] args) throws Throwable {
String usage = "RunJar jarFile [mainClass] args..."; if (args.length < 1) {
System.err.println(usage);
System.exit(-1);
} int firstArg = 0;
String fileName = args[(firstArg++)];
File file = new File(fileName);
if ((!file.exists()) || (!file.isFile())) {
System.err.println(new StringBuilder().append("Not a valid JAR: ").append(file.getCanonicalPath()).toString());
System.exit(-1);
}
String mainClassName = null;
JarFile jarFile;
try {
jarFile = new JarFile(fileName);
} catch (IOException io) {
throw new IOException(new StringBuilder().append("Error opening job jar: ").append(fileName).toString()).initCause(io);
} Manifest manifest = jarFile.getManifest();
if (manifest != null) {
mainClassName = manifest.getMainAttributes().getValue("Main-Class");
}
jarFile.close(); if (mainClassName == null) {
if (args.length < 2) {
System.err.println(usage);
System.exit(-1);
}
mainClassName = args[(firstArg++)];
}mainClassName = mainClassName.replaceAll("/", "."); File tmpDir = new File(System.getProperty("java.io.tmpdir"));
ensureDirectory(tmpDir);
final File workDir;
try { workDir = File.createTempFile("hadoop-unjar", "", tmpDir); }
catch (IOException ioe)
{
System.err.println(new StringBuilder().append("Error creating temp dir in java.io.tmpdir ").append(tmpDir).append(" due to ").append(ioe.getMessage()).toString()); System.exit(-1);
return;
} if (!workDir.delete()) {
System.err.println(new StringBuilder().append("Delete failed for ").append(workDir).toString());
System.exit(-1);
}
ensureDirectory(workDir); ShutdownHookManager.get().addShutdownHook(new Runnable()
{
public void run()
{
FileUtil.fullyDelete(workDir);
}
}
, 10); unJar(file, workDir); ClassLoader loader = createClassLoader(file, workDir); Thread.currentThread().setContextClassLoader(loader);
Class mainClass = Class.forName(mainClassName, true, loader);
Method main = mainClass.getMethod("main", new Class[] { Array.newInstance(String.class, 0).getClass() }); String[] newArgs = (String[])Arrays.asList(args).subList(firstArg, args.length).toArray(new String[0]);
try
{
main.invoke(null, new Object[] { newArgs });
} catch (InvocationTargetException e) {
throw e.getTargetException();
}
} private ClassLoader createClassLoader(File file, File workDir)
throws MalformedURLException
{
ClassLoader loader;
ClassLoader loader;
if (useClientClassLoader()) {
StringBuilder sb = new StringBuilder();
sb.append(new StringBuilder().append(workDir).append("/").toString()).append(File.pathSeparator).append(file).append(File.pathSeparator).append(new StringBuilder().append(workDir).append("/classes/").toString()).append(File.pathSeparator).append(new StringBuilder().append(workDir).append("/lib/*").toString()); String hadoopClasspath = getHadoopClasspath();
if ((hadoopClasspath != null) && (!hadoopClasspath.isEmpty())) {
sb.append(File.pathSeparator).append(hadoopClasspath);
}
String clientClasspath = sb.toString(); String systemClasses = getSystemClasses();
List systemClassesList = systemClasses == null ? null : Arrays.asList(StringUtils.getTrimmedStrings(systemClasses)); loader = new ApplicationClassLoader(clientClasspath, getClass().getClassLoader(), systemClassesList);
}
else {
List classPath = new ArrayList();
classPath.add(new File(new StringBuilder().append(workDir).append("/").toString()).toURI().toURL());
classPath.add(file.toURI().toURL());
classPath.add(new File(workDir, "classes/").toURI().toURL());
File[] libs = new File(workDir, "lib").listFiles();
if (libs != null) {
for (int i = 0; i < libs.length; i++) {
classPath.add(libs[i].toURI().toURL());
}
} loader = new URLClassLoader((URL[])classPath.toArray(new URL[0]));
}
return loader;
} boolean useClientClassLoader() {
return Boolean.parseBoolean(System.getenv("HADOOP_USE_CLIENT_CLASSLOADER"));
} String getHadoopClasspath() {
return System.getenv("HADOOP_CLASSPATH");
} String getSystemClasses() {
return System.getenv("HADOOP_CLIENT_CLASSLOADER_SYSTEM_CLASSES");
}
}

  可以看到这个类将加载的jar解压掉,然后添加classpath路径,获取jar包的主函数,并执行主函数.

hadoop运行作业的脚本解析的更多相关文章

  1. 大数据时代之hadoop(二):hadoop脚本解析

    “兵马未动,粮草先行”,要想深入的了解hadoop,我觉得启动或停止hadoop的脚本是必须要先了解的.说到底,hadoop就是一个分布式存储和计算框架,但是这个分布式环境是如何启动,管理的呢,我就带 ...

  2. 一文了解 Hadoop 运行机制

    大数据技术栈在当下已经是比较成熟的了,Hadoop 作为大数据存储的基石,其重要程度不言而喻,作为一个想从 java 后端转向大数据开发的程序员来说,打好 Hadoop 基础,就相当于夯实建造房屋的地 ...

  3. SQL Server代理(10/12):使用代理账号运行作业

    SQL Server代理是所有实时数据库的核心.代理有很多不明显的用法,因此系统的知识,对于开发人员还是DBA都是有用的.这系列文章会通俗介绍它的很多用法. 在这一系列的上一篇,你查看了msdb库下用 ...

  4. hadoop运行原理之Job运行(二) Job提交及初始化

    本篇主要介绍Job从客户端提交到JobTracker及其被初始化的过程. 以WordCount为例,以前的程序都是通过JobClient.runJob()方法来提交Job,但是现在大多用Job.wai ...

  5. 使用MRUnit,Mockito和PowerMock进行Hadoop MapReduce作业的单元测试

    0.preliminary 环境搭建 Setup development environment Download the latest version of MRUnit jar from Apac ...

  6. 宙斯是一个完整的Hadoop的作业平台[转]

    https://github.com/alibaba/zeus 宙斯(zeus)是什么 宙斯是一个完整的Hadoop的作业平台从Hadoop任务的调试运行到生产任务的周期调度 宙斯支持任务的整个生命周 ...

  7. 在SQL Server Management Studio中可以运行作业但是用T-SQL运行则失败

    原文:在SQL Server Management Studio中可以运行作业但是用T-SQL运行则失败 问题: 在SQL Server Management Studio中可以运行作业但是用T-SQ ...

  8. 老李推荐:第8章5节《MonkeyRunner源码剖析》MonkeyRunner启动运行过程-运行测试脚本

    老李推荐:第8章5节<MonkeyRunner源码剖析>MonkeyRunner启动运行过程-运行测试脚本   poptest是国内唯一一家培养测试开发工程师的培训机构,以学员能胜任自动化 ...

  9. 使用Windows任务计划程序运行Windows PowerShell脚本

    创建计划任务以运行PowerShell脚本 我需要创建一个计划任务来运行Windows PowerShell脚本的第一件事是我将执行的命令行.找到这个的简单方法是使用Run 命令.有时,我需要知道什么 ...

随机推荐

  1. POJ 2773 Happy 2006#素数筛选+容斥原理+二分

    http://poj.org/problem?id=2773 说实话这道题..一点都不Happy好吗 似乎还可以用欧拉函数来解这道题,但正好刚学了容斥原理和二分,就用这个解法吧. 题解:要求输出[1, ...

  2. asp.net正则表达式去除a标签

    if (drr["allow_a"].ToString() == "False") { cont = dr["news_Content"]. ...

  3. 异常处理try-catch-finally笔记

    当程序发生异常时,我们期望:返回到一种安全状态,并能够让用户执行一些其他的命令:或者 允许用户保存所有操作的结果,并以适当的方式终止程序. 异常处理机制:程序的执行过程中如果出现异常,会自动生成一个异 ...

  4. Leetcode 073 Set Matrix Zeroes

    Given a m x n matrix, if an element is 0, set its entire row and column to 0. Do it in place. click ...

  5. LeetCode OJ 123. Best Time to Buy and Sell Stock III

    Say you have an array for which the ith element is the price of a given stock on day i. Design an al ...

  6. shell 之awk 关联数组高级应用

    最近由于数据迁移过,有些用户信息需要再次确认下,也许数据量比较大,但是需要最终确认的比如说是用户ID和其对应的用户积分数,这样就会导致出现文本a(老的数据),文本b(新的数据).比如 这是文本a.tx ...

  7. sql分页带参数,带排序等,动态实现的方法

    USE [YQOBS] GO /****** Object: StoredProcedure [dbo].[PageList] Script Date: 11/06/2014 11:39:35 *** ...

  8. maven入门(上)

    Apache Maven 入门篇 ( 上 ) 作者:George Ma 写这个 maven 的入门篇是因为之前在一个开发者会的动手实验中发现挺多人对于 maven 不是那么了解,所以就有了这个想法.这 ...

  9. go语言实现寻找最大子数组

    题目:给定一个数字序列,寻找其中各元素相加和最大的子数组 /* 寻找最大子数组go语言实现 */ package main import fmt "fmt" func main() ...

  10. 关于css的hack问题

    <!--[if <keywords>? IE <version>?]> HTML代码块 <![endif]--> 取值: <keywords> ...