Storm入门(四)WordCount示例
一、关联代码
使用maven,代码如下。
pom.xml 和Storm入门(三)HelloWorld示例相同
RandomSentenceSpout.java

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package cn.ljh.storm.wordcount; import org.apache.storm.spout.SpoutOutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseRichSpout;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Values;
import org.apache.storm.utils.Utils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Map;
import java.util.Random; public class RandomSentenceSpout extends BaseRichSpout {
private static final Logger LOG = LoggerFactory.getLogger(RandomSentenceSpout.class); SpoutOutputCollector _collector;
Random _rand; public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
_collector = collector;
_rand = new Random();
} public void nextTuple() {
Utils.sleep(100);
String[] sentences = new String[]{
sentence("the cow jumped over the moon"),
sentence("an apple a day keeps the doctor away"),
sentence("four score and seven years ago"),
sentence("snow white and the seven dwarfs"),
sentence("i am at two with nature")};
final String sentence = sentences[_rand.nextInt(sentences.length)]; LOG.debug("Emitting tuple: {}", sentence); _collector.emit(new Values(sentence));
} protected String sentence(String input) {
return input;
} @Override
public void ack(Object id) {
} @Override
public void fail(Object id) {
} public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("word"));
} // Add unique identifier to each tuple, which is helpful for debugging
public static class TimeStamped extends RandomSentenceSpout {
private final String prefix; public TimeStamped() {
this("");
} public TimeStamped(String prefix) {
this.prefix = prefix;
} protected String sentence(String input) {
return prefix + currentDate() + " " + input;
} private String currentDate() {
return new SimpleDateFormat("yyyy.MM.dd_HH:mm:ss.SSSSSSSSS").format(new Date());
}
}
}

WordCountTopology.java

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package cn.ljh.storm.wordcount; import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.StormSubmitter;
import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.BasicOutputCollector;
import org.apache.storm.topology.IRichBolt;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.TopologyBuilder;
import org.apache.storm.topology.base.BaseBasicBolt;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values; import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map; public class WordCountTopology {
public static class SplitSentence implements IRichBolt {
private OutputCollector _collector;
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("word"));
} public Map<String, Object> getComponentConfiguration() {
return null;
} public void prepare(Map stormConf, TopologyContext context,
OutputCollector collector) {
_collector = collector;
} public void execute(Tuple input) {
String sentence = input.getStringByField("word");
String[] words = sentence.split(" ");
for(String word : words){
this._collector.emit(new Values(word));
}
} public void cleanup() {
// TODO Auto-generated method stub }
} public static class WordCount extends BaseBasicBolt {
Map<String, Integer> counts = new HashMap<String, Integer>(); public void execute(Tuple tuple, BasicOutputCollector collector) {
String word = tuple.getString(0);
Integer count = counts.get(word);
if (count == null)
count = 0;
count++;
counts.put(word, count);
collector.emit(new Values(word, count));
} public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("word", "count"));
}
} public static class WordReport extends BaseBasicBolt {
Map<String, Integer> counts = new HashMap<String, Integer>(); public void execute(Tuple tuple, BasicOutputCollector collector) {
String word = tuple.getStringByField("word");
Integer count = tuple.getIntegerByField("count");
this.counts.put(word, count);
} public void declareOutputFields(OutputFieldsDeclarer declarer) { } @Override
public void cleanup() {
System.out.println("-----------------FINAL COUNTS START-----------------------");
List<String> keys = new ArrayList<String>();
keys.addAll(this.counts.keySet());
Collections.sort(keys); for(String key : keys){
System.out.println(key + " : " + this.counts.get(key));
} System.out.println("-----------------FINAL COUNTS END-----------------------");
} } public static void main(String[] args) throws Exception { TopologyBuilder builder = new TopologyBuilder(); builder.setSpout("spout", new RandomSentenceSpout(), 5); //ShuffleGrouping:随机选择一个Task来发送。
builder.setBolt("split", new SplitSentence(), 8).shuffleGrouping("spout");
//FiledGrouping:根据Tuple中Fields来做一致性hash,相同hash值的Tuple被发送到相同的Task。
builder.setBolt("count", new WordCount(), 12).fieldsGrouping("split", new Fields("word"));
//GlobalGrouping:所有的Tuple会被发送到某个Bolt中的id最小的那个Task。
builder.setBolt("report", new WordReport(), 6).globalGrouping("count"); Config conf = new Config();
conf.setDebug(true); if (args != null && args.length > 0) {
conf.setNumWorkers(3); StormSubmitter.submitTopologyWithProgressBar(args[0], conf, builder.createTopology());
}
else {
conf.setMaxTaskParallelism(3); LocalCluster cluster = new LocalCluster();
cluster.submitTopology("word-count", conf, builder.createTopology()); Thread.sleep(20000); cluster.shutdown();
}
}
}

二、执行效果

Storm入门(四)WordCount示例的更多相关文章
- 【第四篇】ASP.NET MVC快速入门之完整示例(MVC5+EF6)
目录 [第一篇]ASP.NET MVC快速入门之数据库操作(MVC5+EF6) [第二篇]ASP.NET MVC快速入门之数据注解(MVC5+EF6) [第三篇]ASP.NET MVC快速入门之安全策 ...
- 《Storm入门》中文版
本文翻译自<Getting Started With Storm>译者:吴京润 编辑:郭蕾 方腾飞 本书的译文仅限于学习和研究之用,没有原作者和译者的授权不能用于商业用途. 译者序 ...
- 【原创】NIO框架入门(四):Android与MINA2、Netty4的跨平台UDP双向通信实战
概述 本文演示的是一个Android客户端程序,通过UDP协议与两个典型的NIO框架服务端,实现跨平台双向通信的完整Demo. 当前由于NIO框架的流行,使得开发大并发.高性能的互联网服务端成为可能. ...
- Storm系列(二):使用Csharp创建你的第一个Storm拓扑(wordcount)
WordCount在大数据领域就像学习一门语言时的hello world,得益于Storm的开源以及Storm.Net.Adapter,现在我们也可以像Java或Python一样,使用Csharp创建 ...
- 脑残式网络编程入门(四):快速理解HTTP/2的服务器推送(Server Push)
本文原作者阮一峰,作者博客:ruanyifeng.com. 1.前言 新一代HTTP/2 协议的主要目的是为了提高网页性能(有关HTTP/2的介绍,请见<从HTTP/0.9到HTTP/2:一文读 ...
- 大数据入门第七天——MapReduce详解(一)入门与简单示例
一.概述 1.map-reduce是什么 Hadoop MapReduce is a software framework for easily writing applications which ...
- hadoop学习第三天-MapReduce介绍&&WordCount示例&&倒排索引示例
一.MapReduce介绍 (最好以下面的两个示例来理解原理) 1. MapReduce的基本思想 Map-reduce的思想就是“分而治之” Map Mapper负责“分”,即把复杂的任务分解为若干 ...
- MapReduce 编程模型 & WordCount 示例
学习大数据接触到的第一个编程思想 MapReduce. 前言 之前在学习大数据的时候,很多东西很零散的做了一些笔记,但是都没有好好去整理它们,这篇文章也是对之前的笔记的整理,或者叫输出吧.一来是加 ...
- WordCount示例深度学习MapReduce过程(1)
我们都安装完Hadoop之后,按照一些案例先要跑一个WourdCount程序,来测试Hadoop安装是否成功.在终端中用命令创建一个文件夹,简单的向两个文件中各写入一段话,然后运行Hadoop,Wou ...
随机推荐
- 三、 redis进阶篇
1. redis事务 使用方法:方法为先发送multi命令告诉redis,下面所有的命令属于同一个事务,先不要执行,而是把他们暂时存起来,redis返回OK,然后后面执行需要放在同一个事务里的命令,可 ...
- Hyper-v虚拟机联网配置
最近想做点练手的项目部署到虚拟机的服务器上,然后关于虚拟机联网问题着实把贫道坑了一把.下面做一下记录防止以后忘了.... 1.新建虚拟交换机 输入交换机名称和选择外部网络,可以看到外部网络的下拉框的选 ...
- OKR能解决996吗?德鲁克怎么看?
最近网络上热议的“996”,不由让人想起我们的邻国日本.他们在20年前就有过一个热词“过劳死”,就是职场加班太严重导致的猝死. 最近有一本书新书<过劳时代>,说的就是日本20年前的过劳死. ...
- Python算法与数据结构--求所有子数组的和的最大值
Python算法与数据结构--求所有子数组的和的最大值 玄魂工作室-玄魂 玄魂工作室秘书 玄魂工作室 昨天 题目:输入一个整形数组,数组里有正数也有负数.数组中连续的一个或多个整数组成一个子数组,每个 ...
- Appium在Android7.0及以上系统运行时报错的解决方案
背景:在使用Samsung S系列手机进行自动化测试时,发现同样脚本的情况下华为荣耀系列可以正常运行,最终发现差异在于Android7.0及以上系统和appium版本不匹配,需要升级appium.但需 ...
- 解决关于:Oracle数据库 插入数据中文乱码 显示问号???
问题: oracle数据库,通过接口插入的中文数据乱码,中文变成了问号??? 解决方案: 计算机=>属性=>高级系统设置=>环境变量=>新建 变量名:NLS_LANG 值:SI ...
- qml demo分析(clocks-时钟)
一.效果展示 效果如图1所示,时钟列表支持鼠标左右拖动,带有黑色背景的是晚上时钟,无黑色背景的是白天时钟 二.源码分析 1.main.cpp文件中只包含了一个宏,该宏的具体解释请看qml 示例中的关键 ...
- WebApi生成在线API文档--Swagger
1.前言 1.1 SwaggerUI SwaggerUI 是一个简单的Restful API 测试和文档工具.简单.漂亮.易用(官方demo).通过读取JSON 配置显示API. 项目本身仅仅也只依赖 ...
- 分析 js构造函数:对象方法 、类方法 、原型方法
构造函数方法有对象方法.类方法.原型方法,这些方法在什么时候可以调用,什么时候不能调用,为什么? function Func(name){ this.name=name; this.ff=functi ...
- 【转载】SQL语句中Union和Union All的区别
在使用到SQL语句进行数据库查询的过程中,如果需要求两个数据集合的并集,一般会使用到联合查询关键字Union或者Union All,其实Union和Union All两者的使用有一定差别,查出来的数据 ...