以单词分割计数为例实现Storm的DirectGroup分组:

1、Spout实现

Spout是Storm数据源头,使用DirectGroup方式将Spout数据发送指定的Bolt,需注意:

1)、Spout消费的Bolt的Task(Task应为Storm的Executor的编号),在如下代码中Spout.open()初始化中拿到消费Task

2)、需使用SpoutOutputCollector.emitDirect()方法

3)、将Spout声明为直接流,即在Spout.declareOutputFields()声明

/**
* Fixed Cycle Spout
*
* @author hanhan.zhang
* */
public class FixedCycleSpout implements IRichSpout { private String _fieldName; private boolean _direct; // stream mark
private String _streamId; private int _index; // key = msgId, value = sending tuple
private Map<String, List<Object>> _pendingTuple; // send tuple
private List<Object> [] _sendTuple; private SpoutOutputCollector _collector;
private CountMetric _sendMetric;
private CountMetric _failMetric; // consume task set
private List<Integer> _consumeTaskIdList; public FixedCycleSpout(String _streamId, String _fieldName, boolean _direct, List<Object> ... _sendTuple) {
this._streamId = _streamId;
this._fieldName = _fieldName;
this._direct = _direct;
this._sendTuple = _sendTuple;
} @Override
public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
this._index = 0;
_pendingTuple = Maps.newHashMap(); // register metric
this._sendMetric = context.registerMetric("cycle.spout.send.tuple.metric", new CountMetric(), 60);
this._failMetric = context.registerMetric("cycle.spout.fail.tuple.metric", new CountMetric(), 60);
this._collector = collector; // get consume task id
if (this._direct) {
this._consumeTaskIdList = Lists.newLinkedList();
Map<String, Map<String, Grouping>> consumeTargets = context.getThisTargets();
if (consumeTargets != null && !consumeTargets.isEmpty()) {
// streamId = this._streamId
consumeTargets.forEach((streamId, target) -> {
if (target != null && !target.isEmpty()) {
// componentId = consume target component Id
target.forEach((componentId, group) -> {
if (group.is_set_direct()) {
this._consumeTaskIdList.addAll(context.getComponentTasks(componentId));
}
});
}
});
}
}
} @Override
public void close() { } @Override
public void activate() { } @Override
public void deactivate() { } @Override
public void nextTuple() {
this._sendMetric.incr();
if (this._index == _sendTuple.length) {
this._index = 0;
}
String msgId = UUID.randomUUID().toString();
List<Object> tuple = this._sendTuple[this._index++];
sendTuple(msgId, tuple);
} @Override
public void ack(Object msgId) {
String msgIdStr = (String) msgId;
System.out.println("ack tuple with msgId " + msgIdStr);
this._pendingTuple.remove(msgIdStr);
} @Override
public void fail(Object msgId) {
this._failMetric.incr();
String msgIdStr = (String) msgId;
System.out.println("fail tuple with msgId " + msgIdStr);
sendTuple(msgIdStr, this._pendingTuple.get(msgIdStr));
} @Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declareStream(this._streamId, this._direct, new Fields(_fieldName));
} @Override
public Map<String, Object> getComponentConfiguration() {
return null;
} protected void sendTuple(String msgId, List<Object> tuple) {
this._pendingTuple.put(msgId, tuple);
if (this._direct) {
if (this._consumeTaskIdList == null || this._consumeTaskIdList.isEmpty()) {
throw new IllegalStateException("direct task is empty !");
}
this._consumeTaskIdList.forEach(taskId ->
this._collector.emitDirect(taskId, this._streamId, tuple, msgId));
} else {
this._collector.emit(tuple, msgId);
}
}
}

2、Bolt实现

/**
* Sentence Split Bolt
*
* @author hanhan.zhang
* */
public class SentenceSplitBolt implements IRichBolt { private OutputCollector _collector; private CountMetric _ackMetric; private CountMetric _failMetric; private String _separator; private int _taskId; private boolean _direct; private String _streamId; public SentenceSplitBolt(String _streamId, boolean _direct) {
this._streamId = _streamId;
this._direct = _direct;
} /**
* @param context
* 1: Register Metric
* 2: Next Bolt Message
* @param collector (thread-safe)
* 1: Emit Tuple
* 2: Ack/Fail Tuple
* */
public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
this._collector = collector;
// register metric for monitor
this._ackMetric = context.registerMetric("sentence.split.ack.metric", new CountMetric(), 60);
this._failMetric = context.registerMetric("sentence.split.fail.metric", new CountMetric(), 60);
this._taskId = context.getThisTaskId(); this._separator = (String) stormConf.get(Const.SEPARATOR);
} @Override
public void execute(Tuple input) {
try {
String sentence = input.getString(0);
if (Strings.isNullOrEmpty(sentence)) {
return;
}
String []fields = sentence.split(_separator);
for (String field : fields) {
if (this._direct) {
this._collector.emitDirect(this._taskId, _streamId, input, new Values(field, 1));
} else {
this._collector.emit(this._streamId, input, new Values(field, 1));
}
}
this._collector.ack(input);
this._ackMetric.incr();
} catch (Exception e) {
this._collector.fail(input);
this._failMetric.incr();
}
} @Override
public void cleanup() { } @Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declareStream(this._streamId, this._direct, new Fields("word", "count"));
} @Override
public Map<String, Object> getComponentConfiguration() {
return null;
}
} /**
* Word Sum Bolt
*
* @author hanhan.zhang
* */
public class WordSumBolt extends BaseRichBolt { private OutputCollector _collector; private int _taskId; private Cache<String, AtomicInteger> _wordCache; @Override
public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
this._collector = collector;
this._taskId = context.getThisTaskId();
this._wordCache = CacheBuilder.newBuilder()
.maximumSize(1024)
.expireAfterWrite(3, TimeUnit.SECONDS)
.removalListener((removalNotification) -> {
String key = (String) removalNotification.getKey();
AtomicInteger sum = (AtomicInteger) removalNotification.getValue();
System.out.println("word sum result : [" + key + "," + sum.get() + "]");
})
.build();
} @Override
public void execute(Tuple input) {
try {
String word = input.getString(0);
int count = input.getInteger(1);
if (Strings.isEmpty(word)) {
return;
}
AtomicInteger counter = this._wordCache.getIfPresent(word);
if (counter == null) {
this._wordCache.put(word, new AtomicInteger(count));
} else {
counter.addAndGet(count);
}
this._collector.ack(input);
} catch (Exception e) {
this._collector.fail(input);
}
} @Override
public void declareOutputFields(OutputFieldsDeclarer declarer) { }
}

3、Storm运行

/**
* Tuple Split-Flow Topology
*
* @author hanhan.zhang
* */
public class FlowTopology { public static void main(String[] args) { // send tuple
List<Object> []tuple = new List[] {new Values("the cow jumped over the moon"),
new Values("the man went to the store and bought some candy"),
new Values("four score and seven years ago"),
new Values("how many apples can you eat")}; //stream name
String spoutStreamId = "topology.flow.cycle.spout.stream";
String splitStreamId = "topology.flow.split.bolt.stream"; // spout
FixedCycleSpout cycleSpout = new FixedCycleSpout(spoutStreamId, "sentence", true, tuple); // bolt
SentenceSplitBolt splitBolt = new SentenceSplitBolt(splitStreamId, false);
WordSumBolt sumBolt = new WordSumBolt(); TopologyBuilder topologyBuilder = new TopologyBuilder();
topologyBuilder.setSpout ("sentence.cycle.spout", cycleSpout, 1); topologyBuilder.setBolt("sentence.split.bolt", splitBolt, 1)
.directGrouping("sentence.cycle.spout", spoutStreamId); topologyBuilder.setBolt("word.sum.bolt", sumBolt, 3)
.fieldsGrouping("sentence.split.bolt", splitStreamId, new Fields("word")); Config config = new Config();
config.put(Const.SEPARATOR, " "); LocalCluster localCluster = new LocalCluster();
localCluster.submitTopology("flowTopology", config, topologyBuilder.createTopology()); } }

Storm---DirectGroup(直接分组)的更多相关文章

  1. 简单聊聊Storm的流分组策略

    简单聊聊Storm的流分组策略 首先我要强调的是,Storm的分组策略对结果有着直接的影响,不同的分组的结果一定是不一样的.其次,不同的分组策略对资源的利用也是有着非常大的不同,本文主要讲一讲loca ...

  2. Storm Topology及分组原理

    Storm的通信机制,需要满足如下一些条件以满足Storm的语义. 1.建立数据传输的缓冲区.在通信连接没有建立之前把发送的数据缓存起来.数据发送方可以在连接建立之前发送消息,而不需要等连接建立起来, ...

  3. Storm Grouping —— 流分组策略

    Storm Grouping: Shuffle Grouping :随机分组,尽量均匀分布到下游Bolt中 将流分组定义为混排.这种混排分组意味着来自Spout的输入将混排,或随机分发给此Bolt中的 ...

  4. storm的流分组

    用的是ShuffleGrouping分组方式,并行度设置为3 这是跑下来的结果 参考代码StormTopologyShufferGrouping.java package yehua.storm; i ...

  5. 大数据量场景下storm自定义分组与Hbase预分区完美结合大幅度节省内存空间

    前言:在系统中向hbase中插入数据时,常常通过设置region的预分区来防止大数据量插入的热点问题,提高数据插入的效率,同时可以减少当数据猛增时由于Region split带来的资源消耗.大量的预分 ...

  6. storm自定义分组与Hbase预分区结合节省内存消耗

    Hbas预分区 在系统中向hbase中插入数据时,常常通过设置region的预分区来防止大数据量插入的热点问题,提高数据插入的效率,同时可以减少当数据猛增时由于Region split带来的资源消耗. ...

  7. storm Tutorial 的解读 + 个人理解

    参考链接: Tutorial storm Tutorial 中文解读+分析 导读.摘要: .hadoop有master与slave,Storm与之对应的节点是什么? .Storm控制节点上面运行一个后 ...

  8. [转载] 使用 Twitter Storm 处理实时的大数据

    转载自http://www.ibm.com/developerworks/cn/opensource/os-twitterstorm/ 流式处理大数据简介 Storm 是一个开源的.大数据处理系统,与 ...

  9. Storm日志分析调研及其实时架构

    1.Storm第一个Demo 2.Windows下基于eclipse的Storm应用开发与调试 3.Storm实例+mysql数据库保存 4.Storm原理介绍 5. flume+kafka+stor ...

  10. Storm知识点

    1. 离线计算是什么? 离线计算:批量获取数据.批量传输数据.周期性批量计算数据.数据展示 代表技术:Sqoop批量导入数据.HDFS批量存储数据.MapReduce批量计算数据.Hive批量计算数据 ...

随机推荐

  1. 常用的smarty变量操作

    php模板引擎smarty的变量操作符可用于操作变量,自定义函数和字符.语法中使用"|"应用变量操作符,多个参数用":"??指簟?/DIV> capita ...

  2. Linux下使进程在后台运行

    怎么样使程序在后台执行 ///////////////////  nohup  ./nn > nn.log  2 > &1  &   //////////// 方法有很多, ...

  3. POJ 2777.Count Color-线段树(区间染色+区间查询颜色数量二进制状态压缩)-若干年之前的一道题目。。。

    Count Color Time Limit: 1000MS   Memory Limit: 65536K Total Submissions: 53312   Accepted: 16050 Des ...

  4. django-BBS(2)

    昨天设计了数据库和数据表,今天来进行页面前端的设计, 1.首先去bootstarp上,下载相应的模板和配置文件,添加到对应的位置 2.在templates中添加许多许多的html页面 如下     并 ...

  5. DelegatingFilterProxy干了什么?

    org.springframework.web.filter.DelegatingFilterProxy 一般情况,创建一个Filter是交给自己来实现的.基于servlet规范,在web.xml中配 ...

  6. [BZOJ3238][AHOI2013]差异(后缀数组)

    求和式的前两项可以直接算,问题是对于每对i,j计算LCP. 一个比较显然的性质是,LCP(i,j)是h[rk[i]+1~rk[j]]中的最小值. 从h的每个元素角度考虑,就是对每个h计算有多少对i,j ...

  7. 调用sort段错误问题

    问题:sort的比较函数实现有问题导致进程调用sort时core了. 结论:特别要注意,sort的比较函数必须遵循严格弱排序(strict weak ordering)的规则.   这是最近在工作中遇 ...

  8. 【推导】【暴力】Codeforces Round #432 (Div. 2, based on IndiaHacks Final Round 2017) C. Five Dimensional Points

    题意:给你五维空间内n个点,问你有多少个点不是坏点. 坏点定义:如果对于某个点A,存在点B,C,使得角BAC为锐角,那么A是坏点. 结论:如果n维空间内已经存在2*n+1个点,那么再往里面添加任意多个 ...

  9. ES6 标签模板

    标签模板其实不是模板,而是函数调用的一种特殊形式."标签"指的是函数,紧跟在后面的模板字符串就是它的参数. var a = 5; var b = 10; tag `Hello ${ ...

  10. 使用idea搭建Spring boot开发初始环境

    准备工作 将以下代码加入idea的live template,命名为springbootStartup <parent> <groupId>org.springframewor ...