Storm---DirectGroup(直接分组)
以单词分割计数为例实现Storm的DirectGroup分组:
1、Spout实现
Spout是Storm数据源头,使用DirectGroup方式将Spout数据发送指定的Bolt,需注意:
1)、Spout消费的Bolt的Task(Task应为Storm的Executor的编号),在如下代码中Spout.open()初始化中拿到消费Task
2)、需使用SpoutOutputCollector.emitDirect()方法
3)、将Spout声明为直接流,即在Spout.declareOutputFields()声明
/**
* Fixed Cycle Spout
*
* @author hanhan.zhang
* */
public class FixedCycleSpout implements IRichSpout { private String _fieldName; private boolean _direct; // stream mark
private String _streamId; private int _index; // key = msgId, value = sending tuple
private Map<String, List<Object>> _pendingTuple; // send tuple
private List<Object> [] _sendTuple; private SpoutOutputCollector _collector;
private CountMetric _sendMetric;
private CountMetric _failMetric; // consume task set
private List<Integer> _consumeTaskIdList; public FixedCycleSpout(String _streamId, String _fieldName, boolean _direct, List<Object> ... _sendTuple) {
this._streamId = _streamId;
this._fieldName = _fieldName;
this._direct = _direct;
this._sendTuple = _sendTuple;
} @Override
public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
this._index = 0;
_pendingTuple = Maps.newHashMap(); // register metric
this._sendMetric = context.registerMetric("cycle.spout.send.tuple.metric", new CountMetric(), 60);
this._failMetric = context.registerMetric("cycle.spout.fail.tuple.metric", new CountMetric(), 60);
this._collector = collector; // get consume task id
if (this._direct) {
this._consumeTaskIdList = Lists.newLinkedList();
Map<String, Map<String, Grouping>> consumeTargets = context.getThisTargets();
if (consumeTargets != null && !consumeTargets.isEmpty()) {
// streamId = this._streamId
consumeTargets.forEach((streamId, target) -> {
if (target != null && !target.isEmpty()) {
// componentId = consume target component Id
target.forEach((componentId, group) -> {
if (group.is_set_direct()) {
this._consumeTaskIdList.addAll(context.getComponentTasks(componentId));
}
});
}
});
}
}
} @Override
public void close() { } @Override
public void activate() { } @Override
public void deactivate() { } @Override
public void nextTuple() {
this._sendMetric.incr();
if (this._index == _sendTuple.length) {
this._index = 0;
}
String msgId = UUID.randomUUID().toString();
List<Object> tuple = this._sendTuple[this._index++];
sendTuple(msgId, tuple);
} @Override
public void ack(Object msgId) {
String msgIdStr = (String) msgId;
System.out.println("ack tuple with msgId " + msgIdStr);
this._pendingTuple.remove(msgIdStr);
} @Override
public void fail(Object msgId) {
this._failMetric.incr();
String msgIdStr = (String) msgId;
System.out.println("fail tuple with msgId " + msgIdStr);
sendTuple(msgIdStr, this._pendingTuple.get(msgIdStr));
} @Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declareStream(this._streamId, this._direct, new Fields(_fieldName));
} @Override
public Map<String, Object> getComponentConfiguration() {
return null;
} protected void sendTuple(String msgId, List<Object> tuple) {
this._pendingTuple.put(msgId, tuple);
if (this._direct) {
if (this._consumeTaskIdList == null || this._consumeTaskIdList.isEmpty()) {
throw new IllegalStateException("direct task is empty !");
}
this._consumeTaskIdList.forEach(taskId ->
this._collector.emitDirect(taskId, this._streamId, tuple, msgId));
} else {
this._collector.emit(tuple, msgId);
}
}
}
2、Bolt实现
/**
* Sentence Split Bolt
*
* @author hanhan.zhang
* */
public class SentenceSplitBolt implements IRichBolt { private OutputCollector _collector; private CountMetric _ackMetric; private CountMetric _failMetric; private String _separator; private int _taskId; private boolean _direct; private String _streamId; public SentenceSplitBolt(String _streamId, boolean _direct) {
this._streamId = _streamId;
this._direct = _direct;
} /**
* @param context
* 1: Register Metric
* 2: Next Bolt Message
* @param collector (thread-safe)
* 1: Emit Tuple
* 2: Ack/Fail Tuple
* */
public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
this._collector = collector;
// register metric for monitor
this._ackMetric = context.registerMetric("sentence.split.ack.metric", new CountMetric(), 60);
this._failMetric = context.registerMetric("sentence.split.fail.metric", new CountMetric(), 60);
this._taskId = context.getThisTaskId(); this._separator = (String) stormConf.get(Const.SEPARATOR);
} @Override
public void execute(Tuple input) {
try {
String sentence = input.getString(0);
if (Strings.isNullOrEmpty(sentence)) {
return;
}
String []fields = sentence.split(_separator);
for (String field : fields) {
if (this._direct) {
this._collector.emitDirect(this._taskId, _streamId, input, new Values(field, 1));
} else {
this._collector.emit(this._streamId, input, new Values(field, 1));
}
}
this._collector.ack(input);
this._ackMetric.incr();
} catch (Exception e) {
this._collector.fail(input);
this._failMetric.incr();
}
} @Override
public void cleanup() { } @Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declareStream(this._streamId, this._direct, new Fields("word", "count"));
} @Override
public Map<String, Object> getComponentConfiguration() {
return null;
}
} /**
* Word Sum Bolt
*
* @author hanhan.zhang
* */
public class WordSumBolt extends BaseRichBolt { private OutputCollector _collector; private int _taskId; private Cache<String, AtomicInteger> _wordCache; @Override
public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
this._collector = collector;
this._taskId = context.getThisTaskId();
this._wordCache = CacheBuilder.newBuilder()
.maximumSize(1024)
.expireAfterWrite(3, TimeUnit.SECONDS)
.removalListener((removalNotification) -> {
String key = (String) removalNotification.getKey();
AtomicInteger sum = (AtomicInteger) removalNotification.getValue();
System.out.println("word sum result : [" + key + "," + sum.get() + "]");
})
.build();
} @Override
public void execute(Tuple input) {
try {
String word = input.getString(0);
int count = input.getInteger(1);
if (Strings.isEmpty(word)) {
return;
}
AtomicInteger counter = this._wordCache.getIfPresent(word);
if (counter == null) {
this._wordCache.put(word, new AtomicInteger(count));
} else {
counter.addAndGet(count);
}
this._collector.ack(input);
} catch (Exception e) {
this._collector.fail(input);
}
} @Override
public void declareOutputFields(OutputFieldsDeclarer declarer) { }
}
3、Storm运行
/**
* Tuple Split-Flow Topology
*
* @author hanhan.zhang
* */
public class FlowTopology { public static void main(String[] args) { // send tuple
List<Object> []tuple = new List[] {new Values("the cow jumped over the moon"),
new Values("the man went to the store and bought some candy"),
new Values("four score and seven years ago"),
new Values("how many apples can you eat")}; //stream name
String spoutStreamId = "topology.flow.cycle.spout.stream";
String splitStreamId = "topology.flow.split.bolt.stream"; // spout
FixedCycleSpout cycleSpout = new FixedCycleSpout(spoutStreamId, "sentence", true, tuple); // bolt
SentenceSplitBolt splitBolt = new SentenceSplitBolt(splitStreamId, false);
WordSumBolt sumBolt = new WordSumBolt(); TopologyBuilder topologyBuilder = new TopologyBuilder();
topologyBuilder.setSpout ("sentence.cycle.spout", cycleSpout, 1); topologyBuilder.setBolt("sentence.split.bolt", splitBolt, 1)
.directGrouping("sentence.cycle.spout", spoutStreamId); topologyBuilder.setBolt("word.sum.bolt", sumBolt, 3)
.fieldsGrouping("sentence.split.bolt", splitStreamId, new Fields("word")); Config config = new Config();
config.put(Const.SEPARATOR, " "); LocalCluster localCluster = new LocalCluster();
localCluster.submitTopology("flowTopology", config, topologyBuilder.createTopology()); } }
Storm---DirectGroup(直接分组)的更多相关文章
- 简单聊聊Storm的流分组策略
简单聊聊Storm的流分组策略 首先我要强调的是,Storm的分组策略对结果有着直接的影响,不同的分组的结果一定是不一样的.其次,不同的分组策略对资源的利用也是有着非常大的不同,本文主要讲一讲loca ...
- Storm Topology及分组原理
Storm的通信机制,需要满足如下一些条件以满足Storm的语义. 1.建立数据传输的缓冲区.在通信连接没有建立之前把发送的数据缓存起来.数据发送方可以在连接建立之前发送消息,而不需要等连接建立起来, ...
- Storm Grouping —— 流分组策略
Storm Grouping: Shuffle Grouping :随机分组,尽量均匀分布到下游Bolt中 将流分组定义为混排.这种混排分组意味着来自Spout的输入将混排,或随机分发给此Bolt中的 ...
- storm的流分组
用的是ShuffleGrouping分组方式,并行度设置为3 这是跑下来的结果 参考代码StormTopologyShufferGrouping.java package yehua.storm; i ...
- 大数据量场景下storm自定义分组与Hbase预分区完美结合大幅度节省内存空间
前言:在系统中向hbase中插入数据时,常常通过设置region的预分区来防止大数据量插入的热点问题,提高数据插入的效率,同时可以减少当数据猛增时由于Region split带来的资源消耗.大量的预分 ...
- storm自定义分组与Hbase预分区结合节省内存消耗
Hbas预分区 在系统中向hbase中插入数据时,常常通过设置region的预分区来防止大数据量插入的热点问题,提高数据插入的效率,同时可以减少当数据猛增时由于Region split带来的资源消耗. ...
- storm Tutorial 的解读 + 个人理解
参考链接: Tutorial storm Tutorial 中文解读+分析 导读.摘要: .hadoop有master与slave,Storm与之对应的节点是什么? .Storm控制节点上面运行一个后 ...
- [转载] 使用 Twitter Storm 处理实时的大数据
转载自http://www.ibm.com/developerworks/cn/opensource/os-twitterstorm/ 流式处理大数据简介 Storm 是一个开源的.大数据处理系统,与 ...
- Storm日志分析调研及其实时架构
1.Storm第一个Demo 2.Windows下基于eclipse的Storm应用开发与调试 3.Storm实例+mysql数据库保存 4.Storm原理介绍 5. flume+kafka+stor ...
- Storm知识点
1. 离线计算是什么? 离线计算:批量获取数据.批量传输数据.周期性批量计算数据.数据展示 代表技术:Sqoop批量导入数据.HDFS批量存储数据.MapReduce批量计算数据.Hive批量计算数据 ...
随机推荐
- windows 安装tp5 composer方式
1.下载windows composer-setup.exe(我已下载一个Composer-Setup.exe); 2.我电脑使用的是phpstudy2018版 php-7.0.12-NTS 3.然后 ...
- CF1025C Plasticine zebra【环状字符串/思维】
给你一个长度为 \(\left|s\right|\) 的01串 \(s\) ,每次操作你可以任选一个 \(k\) ,使01串的 \([1,k]\) 和 \((k,\left|s\right|]\) 分 ...
- HDU 6162 Ch’s gift (树剖 + 离线线段树)
Ch’s gift Time Limit: 6000/3000 MS (Java/Others) Memory Limit: 65536/65536 K (Java/Others)Total S ...
- CSS页面排版的一点笔记
CSS页面排版 字体族 字体族的值是一个字体备选列表,多个字体使用英文逗号隔开,字体名称如果有空格则需要引号. font-family: "Georgia Pro", " ...
- 【爬虫】python 多线程知识
第一段代码: __author__ = 'Administrator' import threading import time index = 0 class MyThread(threading. ...
- Rxjava与Retrofit的使用
韩梦飞沙 韩亚飞 313134555@qq.com yue31313 han_meng_fei_sha ---- -----
- [USACO 2017 Open Gold] Tutorial
Link: 传送门 A: 由于每个颜色只染色一次就确定了所有要染色的区间 要求染色的次数其实就是求区间最多嵌套多少层,如果有区间相交则无解 以上操作明显可以将左端点排序后用栈来维护 #include ...
- [bzoj1009](HNOI2008)GT考试 (kmp+矩阵快速幂加速递推)
Description 阿 申准备报名参加GT考试,准考证号为N位数X1X2....Xn(0<=Xi<=9),他不希望准考证号上出现不吉利的数字.他的不吉利数学 A1A2...Am(0&l ...
- Semaphore(信号量)源码分析
1. Semaphore Semaphore和ReentrantReadWriteLock.ReadLock(读锁)都采用AbstractOwnableSynchronizer共享排队的方式实现. 关 ...
- 原生js实现Ajax请求
总的来说,Ajax是与服务器交换数据并更新部分网页的艺术,在不重新加载整个网页的情况下,异步请求数据并刷新页面.举一个小的例子:Goole搜索页面.当用户在输入框输入关键字的时候,JavaScript ...