Storm wordcount Read from file
source code:
package stormdemo;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map; import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.StormSubmitter;
import backtype.storm.spout.SpoutOutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.BasicOutputCollector;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.TopologyBuilder;
import backtype.storm.topology.base.BaseBasicBolt;
import backtype.storm.topology.base.BaseRichSpout;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values; public class WordCountTopology {
public static class WordReader extends BaseRichSpout {
private static final long serialVersionUID = 1L;
private SpoutOutputCollector collector;
private FileReader fileReader;
private boolean completed = false;
public void ack(Object msgId) {
System.out.println("OK:"+msgId);
}
public void close() {}
public void fail(Object msgId) {
System.out.println("FAIL:"+msgId);
}
/**The only thing that the methods will do It is emit each file line*/
public void nextTuple() {
/**
* The nextuple it is called forever, so if we have been readed the file
* we will wait and then return
*/
if(completed){
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
//Do nothing
}
return;
}
String str;
//Open the reader
BufferedReader reader = new BufferedReader(fileReader);
try{
//Read all lines
while((str = reader.readLine()) != null){
/**
* By each line emmit a new value with the line as a their
*/
this.collector.emit(new Values(str),str);
}
}catch(Exception e){
throw new RuntimeException("Error reading tuple",e);
}finally{
completed = true;
}
} /**
* We will create the file and get the collector object
*/
public void open(@SuppressWarnings("rawtypes") Map conf, TopologyContext context,
SpoutOutputCollector collector) {
try {
this.fileReader = new FileReader(conf.get("wordsFile").toString());
} catch (FileNotFoundException e) {
throw new RuntimeException("Error reading file ["+conf.get("wordsFile")+"]");
}
this.collector = collector;
} /**
* Declare the output field "line"
*/
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("line"));
}
} public static class WordNormalizer extends BaseBasicBolt { private static final long serialVersionUID = 3L; public void cleanup() {}
public void execute(Tuple input, BasicOutputCollector collector) {
String sentence = input.getString(0);
String[] words = sentence.split(" ");
for(String word : words){
word = word.trim();
if(!word.isEmpty()){
word = word.toLowerCase();
collector.emit(new Values(word));
}
}
} /**
* The bolt will only emit the field "word"
*/
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("word"));
}
}
public static class WordCount extends BaseBasicBolt {
private static final long serialVersionUID = 2L;
Map<String, Integer> counts = new HashMap<String, Integer>();
BufferedWriter output = null;
public void execute(Tuple tuple, BasicOutputCollector collector) {
String word = tuple.getString(0);
Integer count = counts.get(word);
if (count == null)
count = 0;
count++;
counts.put(word, count);
//collector.emit(new Values(word, count));
try {
output = new BufferedWriter(new FileWriter("/home/hadoop/wordcounts.txt",false ));
} catch (IOException e) {
e.printStackTrace();
try {
output.close();
} catch (IOException e1) { e1.printStackTrace(); }
}
for(Map.Entry<String, Integer> entry : counts.entrySet()){
try {
output.write(entry.getKey()+": "+entry.getValue());
output.newLine();
output.flush();
} catch (IOException e) {
e.printStackTrace();
}
}
}
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("word", "count"));
}
} public static void main(String[] args) throws Exception { TopologyBuilder builder = new TopologyBuilder(); builder.setSpout("spout", new WordReader());
builder.setBolt("split", new WordNormalizer()).shuffleGrouping("spout");
builder.setBolt("count", new WordCount()).globalGrouping("split"); Config conf = new Config();
conf.put("wordsFile", args[0]);
conf.setDebug(false);
//Topology run
if (args != null && args.length > 1) {
conf.setNumWorkers(2);
StormSubmitter.submitTopology(args[1], conf, builder.createTopology());
}
else {
conf.put(Config.TOPOLOGY_MAX_SPOUT_PENDING, 1);
LocalCluster cluster = new LocalCluster();
cluster.submitTopology("wordcount", conf, builder.createTopology());
Thread.sleep(1000);
cluster.shutdown();
} }
}
start zookeeper.(zkServer.sh start at namenode,datanode01,datanode02)
start storm nimbus at namenode.
start storm supervisor at datanode01 and datanode02;
at namenode:
cd /home/hadoop/workspace
cd /stormsample
mvn install
storm jar storm-example-0.0.1-SNAPSHOT.jar stormdemo.WordCountTopology /home/hadoop/wordinput.txt wordcount
first, you should prepare text file for the source, I put one txt file wordinput.txt in datanode01 /02 /home/hadoop/.
after running job, I found wordcount.txt at datanode01 node.
Storm wordcount Read from file的更多相关文章
- 3、SpringBoot 集成Storm wordcount
WordCountBolt public class WordCountBolt extends BaseBasicBolt { private Map<String,Integer> c ...
- [Storm] java.io.FileNotFoundException: File '../stormconf.ser' does not exist
This bug will kill supervisors Affects Version/s: 0.9.2-incubating, 0.9.3, 0.9.4 Fix Version/s: 0.10 ...
- Storm WordCount
特别注意,在本地运行的时候应该去掉<scope>provided</scope>,否则会报java.lang.ClassNotFoundException: org.apach ...
- Storm WordCount Topology学习
1,分布式单词计数的流程 首先要有数据源,在SentenceSpout中定义了一个字符串数组sentences来模拟数据源.字符串数组中的每句话作为一个tuple发射.其实,SplitBolt接收Se ...
- Storm入门(四)WordCount示例
一.关联代码 使用maven,代码如下. pom.xml 和Storm入门(三)HelloWorld示例相同 RandomSentenceSpout.java /** * Licensed to t ...
- 基于Storm的WordCount
Storm WordCount 工作过程 Storm 版本: 1.Spout 从外部数据源中读取数据,随机发送一个元组对象出去: 2.SplitBolt 接收 Spout 中输出的元组对象,将元组中的 ...
- storm教程
二.安装部署 一.storm伪分布式安装 (一)环境准备1.OS:debian 72.JDK 7.0 (二)安装zookeeper1.下载zookeeper并解压 wget http://mirr ...
- Storm之路-WordCount-实例
初学storm,有不足的地方还请纠正. 网上看了很多wordcount实例,发现都不是我想要的. 实现场景:统计shengjing.txt词频到集合,一次打印结果. ● 消息源Spout 继承Base ...
- Storm实现单词计数
package com.mengyao.storm; import java.io.File; import java.io.IOException; import java.util.Collect ...
随机推荐
- iOS开发系列通讯录、蓝牙、内购、GameCenter、iCloud、Passbook系统服务开
--系统应用与系统服务 iOS开发过程中有时候难免会使用iOS内置的一些应用软件和服务,例如QQ通讯录.微信电话本会使用iOS的通讯录,一些第三方软件会在应用内发送短信等.今天将和大家一起学习如何使用 ...
- knockout.js的简介和简单使用
1.knockout简介knockout是一个轻量级的UI类库,通过MVVM模式使JavaScript前端UI简单化knockout有四大重要概念:1)声明式绑定:使用简明移读的语法很容易地将模型(m ...
- X3DOM 1.6.1 发布注记
X3DOM 1.6.1 主要包含了一些新的功能特性.bug修复,是1.6的维护性更新版本. 特性 ClipPlane 支持 实例 here 及文档 here TwoSidedMaterial 支持 实 ...
- 开发机多用户 xdebug 远程调试 PhpStorm
在公司都用的远程开发机开发,每次有错误调试就得dd(xxx)然后保存真是,让我在本地开发用惯xdebug的情何以堪,所以有了下文. 1.安装配置xdebug 直接使用pecl安装即可 # pecl i ...
- ArrayList实现源码分析
本文将以以下几个问题来探讨ArrayList的源码实现 1.ArrayList的大小是如何自动增加的 2.什么情况下你会使用ArrayList?什么时候你会选择LinkedList? 3.如何复制某个 ...
- oschina github使用指南
我的github仓库开通,https://git.oschina.net/zhjh256. 1.打开https://git.oschina.net/signup,没有账号的话,则新创建账号. 2.从h ...
- [JS,NodeJs]个人网站效果代码集合
上次发的个人网站效果代码集合: 代码集合: 1.彩色文字墙[鼠标涟漪痕迹] 2.彩色旋转圆环 [模仿http://www.moma.org/interactives/exhibitions/2012/ ...
- XML的文档声明
1.XML的文档声明 <?xml version="1.0" encoding="utf-8"?> 文档声明必须写在第一行第一列 属性: versi ...
- IOS6学习笔记(四)
1.GCD设置一个timer计时器 - (void)awakeFromNib { __weak id weakSelf = self; double delayInSeconds = 0.25; _t ...
- ASP.NET MVC下判断用户登录和授权的方法
日常开发的绝大多数系统中,都涉及到管理用户的登录和授权问题.登录功能(Authentication),针对于所有用户都开放:而授权(Authorization),则对于某种用户角色才开放. 在asp. ...