初学storm,有不足的地方还请纠正。

网上看了很多wordcount实例,发现都不是我想要的。

实现场景:统计shengjing.txt词频到集合,一次打印结果。

● 消息源Spout
  继承BaseRichSpout类 / 实现IRichSpout接口
    open,初始化动作;
    nextTuple,消息接入,执行数据发射;
    ack,tuple成功处理后调用;
    fail,tuple处理失败后调用;
    declareOutputFields,声明输出字段;

● 处理单元Bolt
  继承BaseBasicBolt类 / BaseWindowedBolt / 实现IRichBolt接口
    prepare,worker启动时初始化;
    execute,接受一个tuple / tupleWindow并执行逻辑处理,发射出去;
    cleanup,关闭前调用;
    declareOutputFiedls,字段申明;

● 项目结构

● pom.xml文件,配置项目jar依赖

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.scps.storm</groupId>
<artifactId>storm-example</artifactId>
<version>0.0.1</version>
<name>storm.example</name>
<dependencies>
<dependency>
<groupId>org.apache.storm</groupId>
<artifactId>storm-core</artifactId>
<version>1.1.0</version>
</dependency>
</dependencies>
</project>

● WordTopology.java文件,入口类,实例Topology、Spout、Bolt,配置等

 package com.scps.storm.helloword;

 import java.util.concurrent.TimeUnit;

 import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.StormSubmitter;
import org.apache.storm.generated.AlreadyAliveException;
import org.apache.storm.generated.AuthorizationException;
import org.apache.storm.generated.InvalidTopologyException;
import org.apache.storm.topology.TopologyBuilder;
import org.apache.storm.topology.base.BaseWindowedBolt.Duration;
import org.apache.storm.tuple.Fields; import com.scps.storm.helloword.bolt.SlidingWindowBolt;
import com.scps.storm.helloword.bolt.WordCountBolt;
import com.scps.storm.helloword.bolt.WordFinalBolt;
import com.scps.storm.helloword.bolt.WordSplitBolt;
import com.scps.storm.helloword.spout.WordReaderSpout; public class WordTopology { public static void main(String[] args) { TopologyBuilder builder = new TopologyBuilder(); // 1个task去读文件
builder.setSpout("word-reader", new WordReaderSpout(), 1); // 2个task分割行
builder.setBolt("word-split", new WordSplitBolt(), 2).shuffleGrouping("word-reader"); // 2个task分批统计,并发送相同的word到同一个task
builder.setBolt("word-count", new WordCountBolt(), 2).fieldsGrouping("word-split", new Fields("word")); // 1个task汇总,每隔3秒统计最近5秒的tuple,SlidingWindow滑动窗口(间隔)
// builder.setBolt("sliding-window-bolt", new SlidingWindowBolt().withWindow(new Duration(5, TimeUnit.SECONDS), new Duration(3, TimeUnit.SECONDS)), 1).shuffleGrouping("word-count");
// 1个task汇总,统计5秒内的tuple,不能超过15秒?提示超时错误,TumblingWindow滚动窗口
builder.setBolt("sliding-window-bolt", new SlidingWindowBolt().withTumblingWindow(new Duration(5, TimeUnit.SECONDS)), 1).shuffleGrouping("word-count"); // 1个task输出
builder.setBolt("word-final", new WordFinalBolt(), 1).shuffleGrouping("sliding-window-bolt"); Config conf = new Config(); conf.setDebug(false); if (args != null && args.length > 0) { // 在集群运行,需要mvn package编译
// bin/storm jar "/root/storm-example-0.0.1.jar" com.scps.storm.helloword.WordTopology "http://nimbus:8080/uploads/shengjing.txt" wordcount try { String file = args[0];
String name = args[1]; conf.put("file", file);
// conf.setNumWorkers(2); StormSubmitter.submitTopology(name, conf, builder.createTopology()); } catch (AlreadyAliveException e) { e.printStackTrace(); } catch (InvalidTopologyException e) { e.printStackTrace(); } catch (AuthorizationException e) { e.printStackTrace();
} } else { // 直接在eclipse中运行 conf.put("file", "C:\\Users\\Administrator\\Downloads\\shengjing1.txt");
// conf.put("file", "http://192.168.100.170:8080/uploads/shengjing.txt");
// conf.setMaxTaskParallelism(2); // 设置最大task数
LocalCluster cluster = new LocalCluster();
cluster.submitTopology("wordcount", conf, builder.createTopology());
}
}
}

● WordReaderSpout.java文件,读取txt文件,发送行

 package com.scps.storm.helloword.spout;

 import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Map; import org.apache.storm.spout.SpoutOutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.IRichSpout;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Values;
import org.apache.storm.utils.Utils; public class WordReaderSpout implements IRichSpout { private static final long serialVersionUID = 1L;
private SpoutOutputCollector outputCollector;
private String filePath;
private boolean completed = false; public void ack(Object arg0) { } public void activate() { } public void close() { } public void deactivate() { } public void fail(Object arg0) { } @SuppressWarnings("rawtypes")
public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) { filePath = conf.get("file").toString();
outputCollector = collector;
} public void nextTuple() { if (!completed) { String time = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(new Date());
System.out.println("WordReaderSpout nextTuple, " + time); String line = "";
InputStream inputStream = null;
InputStreamReader inputStreamReader = null;
BufferedReader reader = null; try { // filePath = "http://192.168.100.170:8080/uploads/shengjing.txt";
// filePath = "C:\\Users\\Administrator\\Downloads\\shengjing.txt"; if (filePath.startsWith("http://")) { // 远程文件
URL url = new URL(filePath);
URLConnection urlConn = url.openConnection();
inputStream = urlConn.getInputStream();
} else { // 本地文件
inputStream = new FileInputStream(filePath);
} inputStreamReader = new InputStreamReader(inputStream, "utf-8");
reader = new BufferedReader(inputStreamReader);
while ((line = reader.readLine()) != null) {
outputCollector.emit(new Values(line));
} } catch (MalformedURLException e) {
e.printStackTrace();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
completed = true;
try {
if (reader != null) {
reader.close();
}
if (inputStreamReader != null) {
inputStreamReader.close();
}
if (inputStream != null) {
inputStream.close();
}
} catch (IOException e) {
e.printStackTrace();
}
}
} Utils.sleep(20000);
} public void declareOutputFields(OutputFieldsDeclarer declarer) { declarer.declare(new Fields("line"));
} public Map<String, Object> getComponentConfiguration() { return null;
}
}

使用集群测试时,先把txt文件上传到nimbus的ui里,随机指派supervisor远程读取文件。

● WordSplitBolt.java文件,接收行,分割行,发送词

 package com.scps.storm.helloword.bolt;

 import java.util.Map;

 import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.IRichBolt;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values; public class WordSplitBolt implements IRichBolt { private static final long serialVersionUID = 1L;
private OutputCollector outputCollector; @SuppressWarnings("rawtypes")
public void prepare(Map conf, TopologyContext context, OutputCollector collector) { outputCollector = collector;
} public void execute(Tuple input) { String line = input.getStringByField("line"); line = line.trim();
line = line.replace(",", " ");
line = line.replace(".", " ");
line = line.replace(":", " ");
line = line.replace(";", " ");
line = line.replace("?", " ");
line = line.replace("!", " ");
line = line.replace("(", " ");
line = line.replace(")", " ");
line = line.replace("[", " ");
line = line.replace("]", " ");
line = line.trim(); String[] words = line.split(" ");
for (String word : words) {
word = word.trim();
if (!"".equals(word)) {
outputCollector.emit(new Values(word));
}
}
} public void declareOutputFields(OutputFieldsDeclarer declarer) { declarer.declare(new Fields("word"));
} public void cleanup() { } public Map<String, Object> getComponentConfiguration() { return null;
}
}

● WordCountBolt.java文件,接收词,统计词,发送集合

 package com.scps.storm.helloword.bolt;

 import java.util.HashMap;
import java.util.Map; import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.IRichBolt;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values; public class WordCountBolt implements IRichBolt { private static final long serialVersionUID = 1L;
Map<String, Integer> counter;
private OutputCollector outputCollector; @SuppressWarnings("rawtypes")
public void prepare(Map conf, TopologyContext context, OutputCollector collector) { counter = new HashMap<String, Integer>();
outputCollector = collector;
} public void execute(Tuple input) { String word = input.getStringByField("word");
int count; if (!counter.containsKey(word)) {
count = 1;
} else {
count = counter.get(word) + 1;
} counter.put(word, count);
outputCollector.emit(new Values(word, count));
} public void declareOutputFields(OutputFieldsDeclarer declarer) { declarer.declare(new Fields("word", "count"));
} public void cleanup() { } public Map<String, Object> getComponentConfiguration() { return null;
}
}

● SlidingWindowBolt.java文件,接收集合,合并集合,发送集合

 package com.scps.storm.helloword.bolt;

 import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashMap;
import java.util.Map; import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseWindowedBolt;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values;
import org.apache.storm.windowing.TupleWindow; public class SlidingWindowBolt extends BaseWindowedBolt { private static final long serialVersionUID = 1L;
Map<String, Integer> counter;
private OutputCollector outputCollector; @SuppressWarnings("rawtypes")
public void prepare(Map conf, TopologyContext context, OutputCollector collector) { counter = new HashMap<String, Integer>();
outputCollector = collector;
} public void execute(TupleWindow inputWindow) { String time = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(new Date());
System.out.println("SlidingWindowBolt execute, " + time); for (Tuple input : inputWindow.get()) { String word = input.getStringByField("word");
int count = input.getIntegerByField("count"); counter.put(word, count);
} outputCollector.emit(new Values(counter));
} public void declareOutputFields(OutputFieldsDeclarer declarer) { declarer.declare(new Fields("counter"));
}
}

● WordFinalBolt.java文件,接收集合,打印集合

 package com.scps.storm.helloword.bolt;

 import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.List;
import java.util.Map; import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.IRichBolt;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.tuple.Tuple; public class WordFinalBolt implements IRichBolt { private static final long serialVersionUID = 1L; @SuppressWarnings("rawtypes")
public void prepare(Map conf, TopologyContext context, OutputCollector collector) { } @SuppressWarnings("unchecked")
public void execute(Tuple input) { Map<String, Integer> counter = (Map<String, Integer>) input.getValueByField("counter");
List<String> keys = new ArrayList<String>();
keys.addAll(counter.keySet());
Collections.sort(keys);
String time = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(new Date());
System.out.println("-----------------begin------------------, " + time);
for (String key : keys) {
System.out.println(key + " : " + counter.get(key));
}
System.out.println("-----------------end--------------------, " + time);
} public void cleanup() { } public void declareOutputFields(OutputFieldsDeclarer declarer) { } public Map<String, Object> getComponentConfiguration() { return null;
}
}

● 项目源码文件地址:https://pan.baidu.com/s/1mhZtvq4 密码:ypbc

Storm之路-WordCount-实例的更多相关文章

  1. Hadoop3 在eclipse中访问hadoop并运行WordCount实例

    前言:       毕业两年了,之前的工作一直没有接触过大数据的东西,对hadoop等比较陌生,所以最近开始学习了.对于我这样第一次学的人,过程还是充满了很多疑惑和不解的,不过我采取的策略是还是先让环 ...

  2. Storm系列(二):使用Csharp创建你的第一个Storm拓扑(wordcount)

    WordCount在大数据领域就像学习一门语言时的hello world,得益于Storm的开源以及Storm.Net.Adapter,现在我们也可以像Java或Python一样,使用Csharp创建 ...

  3. hadoop运行wordcount实例,hdfs简单操作

    1.查看hadoop版本 [hadoop@ltt1 sbin]$ hadoop version Hadoop -cdh5.12.0 Subversion http://github.com/cloud ...

  4. hadoop2.6.5运行wordcount实例

    运行wordcount实例 在/tmp目录下生成两个文本文件,上面随便写两个单词. cd /tmp/ mkdir file cd file/ echo "Hello world" ...

  5. 执行hadoop自带的WordCount实例

    hadoop 自带的WordCount实例可以统计一批文本文件中各单词出现的次数.下面介绍如何执行WordCount实例. 1.启动hadoop [root@hadoop ~]# start-all. ...

  6. Python实现MapReduce,wordcount实例,MapReduce实现两表的Join

    Python实现MapReduce 下面使用mapreduce模式实现了一个简单的统计日志中单词出现次数的程序: from functools import reduce from multiproc ...

  7. wordcount实例

    scala的wordcount实例 package com.wondersgroup.myscala import scala.actors.{Actor, Future} import scala. ...

  8. Spark源码编译并在YARN上运行WordCount实例

    在学习一门新语言时,想必我们都是"Hello World"程序开始,类似地,分布式计算框架的一个典型实例就是WordCount程序,接触过Hadoop的人肯定都知道用MapRedu ...

  9. Storm手写WordCount

    建立一个maven项目,在pom.xml中进行如下配置: <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:x ...

随机推荐

  1. python一行写不下,变多行

    python里一行写不下,拆成多行, \和() 两种方法 在一行末尾 加上" \",也就是空格加上\ a= 'sdfaf' \      'test' 注意两个对象都要独立,字符串 ...

  2. 【云计算 Hadoop】Hadoop 版本 生态圈 MapReduce模型

    忘的差不多了, 先补概念, 然后开始搭建集群实战 ... . 一 Hadoop版本 和 生态圈 1. Hadoop版本 (1) Apache Hadoop版本介绍 Apache的开源项目开发流程 : ...

  3. 动态获取html页面的内容,并且取其中的某块元素的方法

     $.ajax({  url: "http://192.168.1.59:8888/app-tpl-webapp/tpl/design.html",  async:false, ...

  4. BDA大数据处理流程

    可以看出,数据处理用云,可以高效完成.而分析部分应该利用传统的bi工具.

  5. Chrome浏览器开发调试系列(一)

    // 计划写一个 Chrome 浏览器以及 调试器的系列文章,我慢慢写. 边写边改,发觉博客真是个打草稿的好地方. // 本文针对的是当前最新的浏览器Chrome34,如果你的版本不够新,希望你能够更 ...

  6. Android驱动中的Kconfig文件与Makefile文件

    内核源码树的目录下都有两个文档Kconfig(2.4版本是Config.in)和Makefile.分布到各目录的Kconfig构成了一个分布式的内核配置数据库,每个Kconfig分别描述了所属目录源文 ...

  7. 推荐一些用CRF做图像语义分割的资源

    原文地址:http://blog.sina.com.cn/s/blog_5309cefc01014nri.html 首先是code,以前找了很多,但发现比较好用的有: 1. Matlab版的UGM:h ...

  8. Android进阶(十七)AndroidAPP开发问题汇总(一)

    首先来看一下猎头公司对于Android方向人才招聘的需求: 猎头公司推荐------资深Java软件工程师(Android方向) 岗位职责: 1.熟悉Java语言,熟悉B/S开发的基本结构 2.能运用 ...

  9. 采用UltraISO制作U盘启动盘

    采用UltraISO制作U盘启动盘 打开UltralSO,选择"文件"--->"打开",如下图: 图1 打开WIN7操作系统的ISO文件,如下图: 图2 ...

  10. PHP获取指定地区的天气

    在开发网站的时候用到天气查询,由于是基于Wordpress的 所以有很多限制,先建一个[weather.php]的文件,然后看代码: <?php //获取天气 $url = 'http://m. ...