以单词分割计数为例实现Storm的DirectGroup分组:

1、Spout实现

Spout是Storm数据源头,使用DirectGroup方式将Spout数据发送指定的Bolt,需注意:

1)、Spout消费的Bolt的Task(Task应为Storm的Executor的编号),在如下代码中Spout.open()初始化中拿到消费Task

2)、需使用SpoutOutputCollector.emitDirect()方法

3)、将Spout声明为直接流,即在Spout.declareOutputFields()声明

/**
* Fixed Cycle Spout
*
* @author hanhan.zhang
* */
public class FixedCycleSpout implements IRichSpout { private String _fieldName; private boolean _direct; // stream mark
private String _streamId; private int _index; // key = msgId, value = sending tuple
private Map<String, List<Object>> _pendingTuple; // send tuple
private List<Object> [] _sendTuple; private SpoutOutputCollector _collector;
private CountMetric _sendMetric;
private CountMetric _failMetric; // consume task set
private List<Integer> _consumeTaskIdList; public FixedCycleSpout(String _streamId, String _fieldName, boolean _direct, List<Object> ... _sendTuple) {
this._streamId = _streamId;
this._fieldName = _fieldName;
this._direct = _direct;
this._sendTuple = _sendTuple;
} @Override
public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
this._index = 0;
_pendingTuple = Maps.newHashMap(); // register metric
this._sendMetric = context.registerMetric("cycle.spout.send.tuple.metric", new CountMetric(), 60);
this._failMetric = context.registerMetric("cycle.spout.fail.tuple.metric", new CountMetric(), 60);
this._collector = collector; // get consume task id
if (this._direct) {
this._consumeTaskIdList = Lists.newLinkedList();
Map<String, Map<String, Grouping>> consumeTargets = context.getThisTargets();
if (consumeTargets != null && !consumeTargets.isEmpty()) {
// streamId = this._streamId
consumeTargets.forEach((streamId, target) -> {
if (target != null && !target.isEmpty()) {
// componentId = consume target component Id
target.forEach((componentId, group) -> {
if (group.is_set_direct()) {
this._consumeTaskIdList.addAll(context.getComponentTasks(componentId));
}
});
}
});
}
}
} @Override
public void close() { } @Override
public void activate() { } @Override
public void deactivate() { } @Override
public void nextTuple() {
this._sendMetric.incr();
if (this._index == _sendTuple.length) {
this._index = 0;
}
String msgId = UUID.randomUUID().toString();
List<Object> tuple = this._sendTuple[this._index++];
sendTuple(msgId, tuple);
} @Override
public void ack(Object msgId) {
String msgIdStr = (String) msgId;
System.out.println("ack tuple with msgId " + msgIdStr);
this._pendingTuple.remove(msgIdStr);
} @Override
public void fail(Object msgId) {
this._failMetric.incr();
String msgIdStr = (String) msgId;
System.out.println("fail tuple with msgId " + msgIdStr);
sendTuple(msgIdStr, this._pendingTuple.get(msgIdStr));
} @Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declareStream(this._streamId, this._direct, new Fields(_fieldName));
} @Override
public Map<String, Object> getComponentConfiguration() {
return null;
} protected void sendTuple(String msgId, List<Object> tuple) {
this._pendingTuple.put(msgId, tuple);
if (this._direct) {
if (this._consumeTaskIdList == null || this._consumeTaskIdList.isEmpty()) {
throw new IllegalStateException("direct task is empty !");
}
this._consumeTaskIdList.forEach(taskId ->
this._collector.emitDirect(taskId, this._streamId, tuple, msgId));
} else {
this._collector.emit(tuple, msgId);
}
}
}

2、Bolt实现

/**
* Sentence Split Bolt
*
* @author hanhan.zhang
* */
public class SentenceSplitBolt implements IRichBolt { private OutputCollector _collector; private CountMetric _ackMetric; private CountMetric _failMetric; private String _separator; private int _taskId; private boolean _direct; private String _streamId; public SentenceSplitBolt(String _streamId, boolean _direct) {
this._streamId = _streamId;
this._direct = _direct;
} /**
* @param context
* 1: Register Metric
* 2: Next Bolt Message
* @param collector (thread-safe)
* 1: Emit Tuple
* 2: Ack/Fail Tuple
* */
public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
this._collector = collector;
// register metric for monitor
this._ackMetric = context.registerMetric("sentence.split.ack.metric", new CountMetric(), 60);
this._failMetric = context.registerMetric("sentence.split.fail.metric", new CountMetric(), 60);
this._taskId = context.getThisTaskId(); this._separator = (String) stormConf.get(Const.SEPARATOR);
} @Override
public void execute(Tuple input) {
try {
String sentence = input.getString(0);
if (Strings.isNullOrEmpty(sentence)) {
return;
}
String []fields = sentence.split(_separator);
for (String field : fields) {
if (this._direct) {
this._collector.emitDirect(this._taskId, _streamId, input, new Values(field, 1));
} else {
this._collector.emit(this._streamId, input, new Values(field, 1));
}
}
this._collector.ack(input);
this._ackMetric.incr();
} catch (Exception e) {
this._collector.fail(input);
this._failMetric.incr();
}
} @Override
public void cleanup() { } @Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declareStream(this._streamId, this._direct, new Fields("word", "count"));
} @Override
public Map<String, Object> getComponentConfiguration() {
return null;
}
} /**
* Word Sum Bolt
*
* @author hanhan.zhang
* */
public class WordSumBolt extends BaseRichBolt { private OutputCollector _collector; private int _taskId; private Cache<String, AtomicInteger> _wordCache; @Override
public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
this._collector = collector;
this._taskId = context.getThisTaskId();
this._wordCache = CacheBuilder.newBuilder()
.maximumSize(1024)
.expireAfterWrite(3, TimeUnit.SECONDS)
.removalListener((removalNotification) -> {
String key = (String) removalNotification.getKey();
AtomicInteger sum = (AtomicInteger) removalNotification.getValue();
System.out.println("word sum result : [" + key + "," + sum.get() + "]");
})
.build();
} @Override
public void execute(Tuple input) {
try {
String word = input.getString(0);
int count = input.getInteger(1);
if (Strings.isEmpty(word)) {
return;
}
AtomicInteger counter = this._wordCache.getIfPresent(word);
if (counter == null) {
this._wordCache.put(word, new AtomicInteger(count));
} else {
counter.addAndGet(count);
}
this._collector.ack(input);
} catch (Exception e) {
this._collector.fail(input);
}
} @Override
public void declareOutputFields(OutputFieldsDeclarer declarer) { }
}

3、Storm运行

/**
* Tuple Split-Flow Topology
*
* @author hanhan.zhang
* */
public class FlowTopology { public static void main(String[] args) { // send tuple
List<Object> []tuple = new List[] {new Values("the cow jumped over the moon"),
new Values("the man went to the store and bought some candy"),
new Values("four score and seven years ago"),
new Values("how many apples can you eat")}; //stream name
String spoutStreamId = "topology.flow.cycle.spout.stream";
String splitStreamId = "topology.flow.split.bolt.stream"; // spout
FixedCycleSpout cycleSpout = new FixedCycleSpout(spoutStreamId, "sentence", true, tuple); // bolt
SentenceSplitBolt splitBolt = new SentenceSplitBolt(splitStreamId, false);
WordSumBolt sumBolt = new WordSumBolt(); TopologyBuilder topologyBuilder = new TopologyBuilder();
topologyBuilder.setSpout ("sentence.cycle.spout", cycleSpout, 1); topologyBuilder.setBolt("sentence.split.bolt", splitBolt, 1)
.directGrouping("sentence.cycle.spout", spoutStreamId); topologyBuilder.setBolt("word.sum.bolt", sumBolt, 3)
.fieldsGrouping("sentence.split.bolt", splitStreamId, new Fields("word")); Config config = new Config();
config.put(Const.SEPARATOR, " "); LocalCluster localCluster = new LocalCluster();
localCluster.submitTopology("flowTopology", config, topologyBuilder.createTopology()); } }

Storm---DirectGroup(直接分组)的更多相关文章

  1. 简单聊聊Storm的流分组策略

    简单聊聊Storm的流分组策略 首先我要强调的是,Storm的分组策略对结果有着直接的影响,不同的分组的结果一定是不一样的.其次,不同的分组策略对资源的利用也是有着非常大的不同,本文主要讲一讲loca ...

  2. Storm Topology及分组原理

    Storm的通信机制,需要满足如下一些条件以满足Storm的语义. 1.建立数据传输的缓冲区.在通信连接没有建立之前把发送的数据缓存起来.数据发送方可以在连接建立之前发送消息,而不需要等连接建立起来, ...

  3. Storm Grouping —— 流分组策略

    Storm Grouping: Shuffle Grouping :随机分组,尽量均匀分布到下游Bolt中 将流分组定义为混排.这种混排分组意味着来自Spout的输入将混排,或随机分发给此Bolt中的 ...

  4. storm的流分组

    用的是ShuffleGrouping分组方式,并行度设置为3 这是跑下来的结果 参考代码StormTopologyShufferGrouping.java package yehua.storm; i ...

  5. 大数据量场景下storm自定义分组与Hbase预分区完美结合大幅度节省内存空间

    前言:在系统中向hbase中插入数据时,常常通过设置region的预分区来防止大数据量插入的热点问题,提高数据插入的效率,同时可以减少当数据猛增时由于Region split带来的资源消耗.大量的预分 ...

  6. storm自定义分组与Hbase预分区结合节省内存消耗

    Hbas预分区 在系统中向hbase中插入数据时,常常通过设置region的预分区来防止大数据量插入的热点问题,提高数据插入的效率,同时可以减少当数据猛增时由于Region split带来的资源消耗. ...

  7. storm Tutorial 的解读 + 个人理解

    参考链接: Tutorial storm Tutorial 中文解读+分析 导读.摘要: .hadoop有master与slave,Storm与之对应的节点是什么? .Storm控制节点上面运行一个后 ...

  8. [转载] 使用 Twitter Storm 处理实时的大数据

    转载自http://www.ibm.com/developerworks/cn/opensource/os-twitterstorm/ 流式处理大数据简介 Storm 是一个开源的.大数据处理系统,与 ...

  9. Storm日志分析调研及其实时架构

    1.Storm第一个Demo 2.Windows下基于eclipse的Storm应用开发与调试 3.Storm实例+mysql数据库保存 4.Storm原理介绍 5. flume+kafka+stor ...

  10. Storm知识点

    1. 离线计算是什么? 离线计算:批量获取数据.批量传输数据.周期性批量计算数据.数据展示 代表技术:Sqoop批量导入数据.HDFS批量存储数据.MapReduce批量计算数据.Hive批量计算数据 ...

随机推荐

  1. OpenStack 存储服务 Cinder存储节点部署LVM (十四)

    部署在block(10.0.0.103)主机 一)配置lvm 1.安装lvm2软件包 yum install lvm2 -y 2.启动LVM的metadata服务并且设置该服务随系统启动 system ...

  2. 删除DOM节点应用

    <!-- HTML结构 --> <ul id="test-list"> <li>JavaScript</li> <li> ...

  3. redux saga学习

    来源地址:https://www.youtube.com/watch?v=o3A9EvMspig Saga的基本写法 takeEvery与takeLatest的区别 takeEvery是指响应每一个请 ...

  4. Linux命令之rlogin

    rlogin [-8EKLdx] [-e char] [-l username] host rlogin在远程主机host上开始一个终端会话. (1).选项 -8 选项允许进行8位的输入数据传送:否则 ...

  5. oracle中 char,varchar,varchar2的区别

    区别:      1. CHAR的长度是固定的,而VARCHAR2的长度是可以变化的, 比如,存储字符串“abc",对于CHAR (20),表示你存储的字符将占20个字节(包括17个空字符) ...

  6. UTF-8字符「EF BF BD」-备胎

    在众多的utf-8码点值中,除了ascii,你还应该记住「EF BF BD」,因为它是很多编程语言以及库中的备胎,即无效的码点值在编码的时候会默认用这个码点值进行替换,即utf-8中的超级「备胎」(R ...

  7. 子查询在DELETE语句中的应用

    子查询在DELETE 中唯一可以应用的位置就是WHERE 子句,使用子查询可以完成复杂的数据删除控制.其使用方式与SELECT 语句中的子查询基本相同,而且也可以使用相关子查询等高级的特性.下面的SQ ...

  8. hdu 1011(Starship Troopers,树形dp)

    Starship Troopers Time Limit: 10000/5000 MS (Java/Others) Memory Limit: 65536/32768 K (Java/Others) ...

  9. 20162327实验一Java开发环境的熟悉实验报告

    20162327 <程序设计与数据结构>第一次实验报告 基础知识 1.JDB的使用 使用JDB调试程序需要以下五个步骤: 1.设置断点 stop in 2.run 3.print 4.ne ...

  10. js处理时间戳

    工具类 function add0(m){return m<10?'0'+m:m } function format(shijianchuo) { var time = new Date(shi ...