一、本地模式

1、WordCountSpout类

package com.demo.wc;

import java.util.Map;

import org.apache.storm.spout.SpoutOutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseRichSpout;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Values; /**
* 需求:单词计数 hello world hello Beijing China
*
* 实现接口: IRichSpout IRichBolt
* 继承抽象类:BaseRichSpout BaseRichBolt 常用*/
public class WordCountSpout extends BaseRichSpout { //定义收集器
private SpoutOutputCollector collector; //发送数据
@Override
public void nextTuple() {
//1.发送数据 到bolt
collector.emit(new Values("I like China very much")); //2.设置延迟
try {
Thread.sleep(500);
} catch (InterruptedException e) {
e.printStackTrace();
}
} //创建收集器
@Override
public void open(Map arg0, TopologyContext arg1, SpoutOutputCollector collector) {
this.collector = collector;
} //声明描述
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
//起别名
declarer.declare(new Fields("wordcount"));
}
}

2、WordCountSplitBolt类

package com.demo.wc;

import java.util.Map;

import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseRichBolt;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values; public class WordCountSplitBolt extends BaseRichBolt { //数据继续发送到下一个bolt
private OutputCollector collector; //业务逻辑
@Override
public void execute(Tuple in) {
//1.获取数据
String line = in.getStringByField("wordcount"); //2.切分数据
String[] fields = line.split(" "); //3.<单词,1> 发送出去 下一个bolt(累加求和)
for (String w : fields) {
collector.emit(new Values(w, 1));
}
} //初始化
@Override
public void prepare(Map arg0, TopologyContext arg1, OutputCollector collector) {
this.collector = collector;
} //声明描述
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("word", "sum"));
}
}

3、WordCountBolt类

package com.demo.wc;

import java.util.HashMap;
import java.util.Map; import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseRichBolt;
import org.apache.storm.tuple.Tuple; public class WordCountBolt extends BaseRichBolt{ private Map<String, Integer> map = new HashMap<>(); //累加求和
@Override
public void execute(Tuple in) {
//1.获取数据
String word = in.getStringByField("word");
Integer sum = in.getIntegerByField("sum"); //2.业务处理
if (map.containsKey(word)) {
//之前出现几次
Integer count = map.get(word);
//已有的
map.put(word, count + sum);
} else {
map.put(word, sum);
} //3.打印控制台
System.out.println(Thread.currentThread().getName() + "\t 单词为:" + word + "\t 当前已出现次数为:" + map.get(word));
} @Override
public void prepare(Map arg0, TopologyContext arg1, OutputCollector arg2) {
} @Override
public void declareOutputFields(OutputFieldsDeclarer arg0) {
}
}

4、WordCountDriver类

package com.demo.wc;

import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.topology.TopologyBuilder;
import org.apache.storm.tuple.Fields; public class WordCountDriver {
public static void main(String[] args) {
//1.hadoop->Job storm->topology 创建拓扑
TopologyBuilder builder = new TopologyBuilder();
//2.指定设置
builder.setSpout("WordCountSpout", new WordCountSpout(), 1);
builder.setBolt("WordCountSplitBolt", new WordCountSplitBolt(), 4).fieldsGrouping("WordCountSpout", new Fields("wordcount"));
builder.setBolt("WordCountBolt", new WordCountBolt(), 2).fieldsGrouping("WordCountSplitBolt", new Fields("word")); //3.创建配置信息
Config conf = new Config(); //4.提交任务
LocalCluster localCluster = new LocalCluster();
localCluster.submitTopology("wordcounttopology", conf, builder.createTopology());
}
}

5、直接运行(4)里面的main方法即可启动本地模式。

二、集群模式

前三个类和上面本地模式一样,第4个类WordCountDriver和本地模式有点区别

package com.demo.wc;

import org.apache.storm.Config;
import org.apache.storm.StormSubmitter;
import org.apache.storm.topology.TopologyBuilder;
import org.apache.storm.tuple.Fields; public class WordCountDriver {
public static void main(String[] args) {
//1.hadoop->Job storm->topology 创建拓扑
TopologyBuilder builder = new TopologyBuilder();
//2.指定设置
builder.setSpout("WordCountSpout", new WordCountSpout(), 1);
builder.setBolt("WordCountSplitBolt", new WordCountSplitBolt(), 4).fieldsGrouping("WordCountSpout", new Fields("wordcount"));
builder.setBolt("WordCountBolt", new WordCountBolt(), 2).fieldsGrouping("WordCountSplitBolt", new Fields("word")); //3.创建配置信息
Config conf = new Config();
//conf.setNumWorkers(10); //集群模式
try {
StormSubmitter.submitTopology(args[0], conf, builder.createTopology());
} catch (Exception e) {
e.printStackTrace();
} //4.提交任务
//LocalCluster localCluster = new LocalCluster();
//localCluster.submitTopology("wordcounttopology", conf, builder.createTopology());
}
}

把程序打成jar包放在启动了Storm集群的机器里,在stormwordcount.jar所在目录下执行

storm jar stormwordcount.jar com.demo.wc.WordCountDriver wordcount01

即可启动程序。

三、并发度和分组策略

1、WordCountDriver_Shuffle类

package com.demo.wc;

import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.topology.TopologyBuilder; public class WordCountDriver_Shuffle {
public static void main(String[] args) {
//1.hadoop->Job storm->topology 创建拓扑
TopologyBuilder builder = new TopologyBuilder();
//2.指定设置
builder.setSpout("WordCountSpout", new WordCountSpout(), 2);
builder.setBolt("WordCountSplitBolt", new WordCountSplitBolt(), 2).setNumTasks(4).shuffleGrouping("WordCountSpout");
builder.setBolt("WordCountBolt", new WordCountBolt(), 6).shuffleGrouping("WordCountSplitBolt"); //3.创建配置信息
Config conf = new Config();
//conf.setNumWorkers(2); //集群模式
// try {
// StormSubmitter.submitTopology(args[0], conf, builder.createTopology());
// } catch (Exception e) {
// e.printStackTrace();
// } //4.提交任务
LocalCluster localCluster = new LocalCluster();
localCluster.submitTopology("wordcounttopology", conf, builder.createTopology());
}
}

2、并发度与分组策略

Storm-wordcount实时统计单词次数的更多相关文章

  1. lucene 统计单词次数(词频tf)并进行排序

    public class WordCount { static Directory directory; // 创建分词器 static Analyzer analyzer = new IKAnaly ...

  2. 大数据学习day32-----spark12-----1. sparkstreaming(1.1简介,1.2 sparkstreaming入门程序(统计单词个数,updateStageByKey的用法,1.3 SparkStreaming整合Kafka,1.4 SparkStreaming获取KafkaRDD的偏移量,并将偏移量写入kafka中)

    1. Spark Streaming 1.1 简介(来源:spark官网介绍) Spark Streaming是Spark Core API的扩展,其是支持可伸缩.高吞吐量.容错的实时数据流处理.Sp ...

  3. Storm基础概念与单词统计示例

    Storm基本概念 Storm是一个分布式的.可靠地.容错的数据流处理系统.Storm分布式计算结构称为Topology(拓扑)结构,顾名思义,与拓扑图十分类似.该拓扑图主要由数据流Stream.数据 ...

  4. Storm+HBase实时实践

    1.HBase Increment计数器 hbase counter的原理: read+count+write,正好完成,就是讲key的value读出,若存在,则完成累加,再写入,若不存在,则按&qu ...

  5. 3、SpringBoot 集成Storm wordcount

    WordCountBolt public class WordCountBolt extends BaseBasicBolt { private Map<String,Integer> c ...

  6. Storm WordCount Topology学习

    1,分布式单词计数的流程 首先要有数据源,在SentenceSpout中定义了一个字符串数组sentences来模拟数据源.字符串数组中的每句话作为一个tuple发射.其实,SplitBolt接收Se ...

  7. 使用HDFS完成wordcount词频统计

    任务需求 统计HDFS上文件的wordcount,并将统计结果输出到HDFS 功能拆解 读取HDFS文件 业务处理(词频统计) 缓存处理结果 将结果输出到HDFS 数据准备 事先往HDFS上传需要进行 ...

  8. C++读取文件统计单词个数及频率

    1.Github链接 GitHub链接地址https://github.com/Zzwenm/PersonProject-C2 2.PSP表格 PSP2.1 Personal Software Pro ...

  9. Hadoop基础学习(一)分析、编写并执行WordCount词频统计程序

    版权声明:本文为博主原创文章,未经博主同意不得转载. https://blog.csdn.net/jiq408694711/article/details/34181439 前面已经在我的Ubuntu ...

随机推荐

  1. 安装语言包(LANGUAGE PACKAGE)

    by 枫竹丹青 一.说明 在SAP ECC 安装好后,如果是生产版本,通常只有两种语言--英语和德语.这时如需中文环境,则需定义支持的语言类型并导入语言包.如果安装的是IDES版,则系统中已包含了几十 ...

  2. Ubuntu 15.04 开启远程桌面

    1.安装Xrdp Windows远程桌面使用的是RDP协议,所以ubuntu上就要先安装Xrdp,终端命令行输入安装: sudo apt-get install xrdp vnc4server xba ...

  3. spring boot热部署pom.xml配置

    <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/20 ...

  4. 等边三角形---dfs

    蒜头君手上有一些小木棍,它们长短不一,蒜头君想用这些木棍拼出一个等边三角形,并且每根木棍都要用到. 例如,蒜头君手上有长度为 11,22,33,33 的4根木棍,他可以让长度为11,22 的木棍组成一 ...

  5. Python教程:[43]Word基本操作

    使用python操作Word用到了win32com模块,我们现在就要介绍一下python对Word的基本操作,文章涉及到如何与Word程序建立连接.如果与Word文档建立连接的,以及对Word文档的基 ...

  6. 一款纯css3实现的发光屏幕旋转特效

    今天给大家带来一款纯css3实现的发光屏幕旋转特效.该屏幕由纯css3实现带发光旋转特效,效果图如下: 在线预览   源码下载 实现的代码. html代码: <div class="s ...

  7. myeclipse 上安装 Maven3<转>

    环境准备: JDK 1.6 Maven 3.0.4 myeclipse 8.6.1 安装 Maven 之前要求先确定你的 JDK 已经安装配置完成.Maven是 Apache 下的一个项目,目前最新版 ...

  8. Lucene 工作原理<转>

    Lucene是一个高性能的java全文检索工具包,它使用的是倒排文件索引结构.该结构及相应的生成算法如下: 0)设有两篇文章1和2 文章1的内容为:Tom lives in Guangzhou,I l ...

  9. VisualStudio快捷键

    ctrl+k,c 注释选中行 ctrl+k,u 取消对选中行的注释 CTRL + SHIFT + B:生成解决方案 CTRL + F7 :生成编译 CTRL + O :打开文件 CTRL + SHIF ...

  10. multimap 小例子

    场景: 按DDX值倒序取前十的板块代码,用 map<float, string, greater<float> > mapBKDDX; 存储时,相同DDX值的板块只能存储第一个 ...