云计算——实验一 HDFS与MAPREDUCE操作

1、虚拟机集群搭建部署hadoop

利用VMware、centOS-7、Xshell(secureCrt)等软件搭建集群部署hadoop

远程连接工具使用Xshell：

HDFS文件操作

2.1 HDFS接口编程

调用HDFS文件接口实现对分布式文件系统中文件的访问，如创建、修改、删除等

三、MAPREDUCE并行程序开发

求每年最高气温

本实验是编写完成相关代码后，将该项目打包成jar包，上传至centos后利用hadoop命令进行运行。

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.Reducer;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class Temperature {

    /**

     * 四个泛型类型分别代表：

     * KeyIn        Mapper的输入数据的Key，这里是每行文字的起始位置（0,11,...）

     * ValueIn      Mapper的输入数据的Value，这里是每行文字

     * KeyOut       Mapper的输出数据的Key，这里是每行文字中的“年份”

     * ValueOut     Mapper的输出数据的Value，这里是每行文字中的“气温”

     */

    static class TempMapper extends

            Mapper<LongWritable, Text, Text, IntWritable> {

        @Override

        public void map(LongWritable key, Text value, Context context)

                throws IOException, InterruptedException {

            // 打印样本: Before Mapper: 0, 2000010115

            System.out.print("Before Mapper: " + key + ", " + value);

            String line = value.toString();

            String year = line.substring(0, 4);

            int temperature = Integer.parseInt(line.substring(8));

            context.write(new Text(year), new IntWritable(temperature));

            // 打印样本: After Mapper:2000, 15

            System.out.println(

                    "======" +

                    "After Mapper:" + new Text(year) + ", " + new IntWritable(temperature));

        }

    }

       static class TempReducer extends

            Reducer<Text, IntWritable, Text, IntWritable> {

        @Override

        public void reduce(Text key, Iterable<IntWritable> values,

                Context context) throws IOException, InterruptedException {

            int maxValue = Integer.MIN_VALUE;

            StringBuffer sb = new StringBuffer();

            //取values的最大值

            for (IntWritable value : values) {

                maxValue = Math.max(maxValue, value.get());

                sb.append(value).append(", ");

            }

            // 打印样本： Before Reduce: 2000, 15, 23, 99, 12, 22,

            System.out.print("Before Reduce: " + key + ", " + sb.toString());

            context.write(key, new IntWritable(maxValue));

            // 打印样本： After Reduce: 2000, 99

            System.out.println(

                    "======" +

                    "After Reduce: " + key + ", " + maxValue);

        }

    }

    public static void main(String[] args) throws Exception {

        //输入路径

        String dst = "hdfs://localhost:9000/intput.txt";

        //输出路径，必须是不存在的，空文件加也不行。

        String dstOut = "hdfs://localhost:9000/output";

        Configuration hadoopConfig = new Configuration();

        hadoopConfig.set("fs.hdfs.impl",

            org.apache.hadoop.hdfs.DistributedFileSystem.class.getName()

        );

        hadoopConfig.set("fs.file.impl",

            org.apache.hadoop.fs.LocalFileSystem.class.getName()

        );

        Job job = new Job(hadoopConfig);

        //如果需要打成jar运行，需要下面这句

        job.setJarByClass(NewMaxTemperature.class);

        //job执行作业时输入和输出文件的路径

        FileInputFormat.addInputPath(job, new Path(dst));

        FileOutputFormat.setOutputPath(job, new Path(dstOut));

        //指定自定义的Mapper和Reducer作为两个阶段的任务处理类

        job.setMapperClass(TempMapper.class);

        job.setReducerClass(TempReducer.class);

        //设置最后输出结果的Key和Value的类型

        job.setOutputKeyClass(Text.class);

        job.setOutputValueClass(IntWritable.class);

        //执行job，直到完成

        job.waitForCompletion(true);

        System.out.println("Finished");

    }

}

词频统计

import java.io.IOException;

import org.apache.commons.lang.StringUtils;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Mapper;

public class WordCountMapper extends Mapper<LongWritable, Text, Text, LongWritable>{

    @Override

    protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, LongWritable>.Context context)

            throws IOException, InterruptedException {

        // TODO Auto-generated method stub

        //super.map(key, value, context);

        //String[] words = StringUtils.split(value.toString());

          String[] words = StringUtils.split(value.toString(), " ");

        for(String word:words)

        {

              context.write(new Text(word), new LongWritable(1));

        }

    }

}

reducer：

package cn.edu.bupt.wcy.wordcount;

import java.io.IOException;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Reducer;

public class WordCountReducer extends Reducer<Text, LongWritable, Text, LongWritable> {

    @Override

    protected void reduce(Text arg0, Iterable<LongWritable> arg1,

            Reducer<Text, LongWritable, Text, LongWritable>.Context context) throws IOException, InterruptedException {

        // TODO Auto-generated method stub

        //super.reduce(arg0, arg1, arg2);

        int sum=0;

        for(LongWritable num:arg1)

        {

            sum += num.get();

        }

        context.write(arg0,new LongWritable(sum));

    }

}

runner：

package cn.edu.bupt.wcy.wordcount;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

public class WordCountRunner {

    public static void main(String[] args) throws IllegalArgumentException, IOException, ClassNotFoundException, InterruptedException {

        Configuration conf = new Configuration();

        Job job = new Job(conf);

        job.setJarByClass(WordCountRunner.class);

        job.setJobName("wordcount");

        job.setOutputKeyClass(Text.class);

        job.setOutputValueClass(LongWritable.class);

        job.setMapperClass(WordCountMapper.class);

        job.setReducerClass(WordCountReducer.class);

        job.setInputFormatClass(TextInputFormat.class);

        job.setOutputFormatClass(TextOutputFormat.class);

        FileInputFormat.addInputPath(job, new Path(args[1]));

        FileOutputFormat.setOutputPath(job, new Path(args[2]));

        job.waitForCompletion(true);

    }

}

云计算——实验一 HDFS与MAPREDUCE操作的更多相关文章

Linux实验：hdfs shell基本命令操作（一）
[实验目的] 1)熟练hdfs shell命令操作 2)理解hdfs shell和linux shell命令 [实验原理] 安装好hadoop环境之后,可以执行hdfs shell命令对hdfs 的空 ...
Linux实验：hdfs shell基本命令操作（二）
[实验目的] 1)熟练hdfs shell命令操作 2)理解hdfs shell和linux shell命令[实验原理] 安装好hadoop环境之后,可以执行hdfs shell命令 ...
4 weekend110的hdfs&mapreduce测试 + hdfs的实现机制初始 + hdfs的shell操作 + 无密登陆配置
Hdfs是根/目录,windows是每一个盘符, 1 从Linux里传一个到,hdfs里去 2 从hdfs里下一个到,linux里去想从hdfs里,下载到linux, 涨知识,记住,hdfs是建 ...
高可用,完全分布式Hadoop集群HDFS和MapReduce安装配置指南
原文:http://my.oschina.net/wstone/blog/365010#OSC_h3_13 (WJW)高可用,完全分布式Hadoop集群HDFS和MapReduce安装配置指南 [X] ...
大数据开发实战：HDFS和MapReduce优缺点分析
一. HDFS和MapReduce优缺点 1.HDFS的优势 HDFS的英文全称是 Hadoop Distributed File System,即Hadoop分布式文件系统,它是Hadoop的核心子 ...
hadoop之HDFS与MapReduce
Hadoop历史雏形开始于2002年的Apache的Nutch,Nutch是一个开源Java 实现的搜索引擎.它提供了我们运行自己的搜索引擎所需的全部工具.包括全文搜索和Web爬虫. 随后在2003 ...
HBase 相关API操练(三)：MapReduce操作HBase
MapReduce 操作 HBase 在 HBase 系统上运行批处理运算,最方便和实用的模型依然是 MapReduce,如下图所示. HBase Table 和 Region 的关系类似 HDFS ...
Hadoop平台上HDFS和MapReduce的功能
1.用自己的话阐明Hadoop平台上HDFS和MapReduce的功能.工作原理和工作过程. HDFS (1)第一次启动 namenode 格式化后,创建 fsimage 和 edits 文件.如果不 ...
7.MapReduce操作Hbase
7 HBase的MapReduce HBase中Table和Region的关系,有些类似HDFS中File和Block的关系.由于HBase提供了配套的与MapReduce进行交互的API如 Ta ...

随机推荐

springboot发送邮件（含附件）
引入maven <dependency> <groupId>org.springframework.boot</groupId> <artifactId> ...
java源码——统计字符串中字符出现的次数
对于任意输入的一段字符串,读取并且计算其中所有字符出现的次数. 使用HashMap存储字符和其对应的出现的次数,输出时,对HashMap进行遍历. 难点在于对HashMap的遍历,第一次使用,也是学习 ...
【LeetCode】935. Knight Dialer 解题报告（Python）
作者: 负雪明烛 id: fuxuemingzhu 个人博客: http://fuxuemingzhu.cn/ 目录题目描述题目大意解题方法动态规划TLE 空间换时间,利用对称性优化空间复杂 ...
【LeetCode】129. Sum Root to Leaf Numbers 解题报告（Python）
[LeetCode]129. Sum Root to Leaf Numbers 解题报告(Python) 标签(空格分隔): LeetCode 题目地址:https://leetcode.com/pr ...
【LeetCode】556. Next Greater Element III 解题报告（Python）
[LeetCode]556. Next Greater Element III 解题报告(Python) 标签(空格分隔): LeetCode 作者: 负雪明烛 id: fuxuemingzhu 个人 ...
react hooks 如何自定义组件（react函数组件的封装）
前言这里写一下如何封装可复用组件.首先技术栈 react hooks + props-type + jsx封装纯函数组件.类组件和typeScript在这不做讨论,大家别白跑一趟. 接下来会说一下封 ...
【模型推理】量化实现分享三：详解 ACIQ 对称量化算法实现
欢迎关注我的公众号 [极智视界],回复001获取Google编程规范 O_o >_< o_O O_o ~_~ o_O 大家好,我是极智视界,本文剖析一下AC ...
[JNI开发]使用javah命令生成.h的头文件
第一步:进入对应的.java目录 javac xxx.java 生成对应的xxx.class文件第二步:退回到/java目录 javah -classpath . -jni 包名.类名
Java初学者作业——编写Java程序，输出1～100之间能够同时被3和4整除的最大的五个数字。
返回本章节返回作业目录需求说明: 编写Java程序,输出1-100之间能够同时被3和4整除的最大的五个数字. 实现思路: 声明变量count,用于存储满足条件的数据个数,设置初始值为0. 在区间1 ...
NPM镜像地址
NPM镜像地址 npm ---- https://registry.npmjs.org/ cnpm --- http://r.cnpmjs.org/ taobao - https://registry ...

云计算——实验一 HDFS与MAPREDUCE操作

1、虚拟机集群搭建部署hadoop

利用VMware、centOS-7、Xshell(secureCrt)等软件搭建集群部署hadoop

远程连接工具使用Xshell：

HDFS文件操作

2.1 HDFS接口编程

调用HDFS文件接口实现对分布式文件系统中文件的访问，如创建、修改、删除等

三、MAPREDUCE并行程序开发

求每年最高气温

云计算——实验一 HDFS与MAPREDUCE操作的更多相关文章

随机推荐

热门专题