目的:通过kafka输出的信息进行过滤,添加指定的字段后,进行打印

SentenceSpout:

package Trident;

import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties; import org.apache.storm.spout.SpoutOutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseRichSpout;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Values; import kafka.consumer.Consumer;
import kafka.consumer.ConsumerConfig;
import kafka.consumer.ConsumerIterator;
import kafka.consumer.KafkaStream;
import kafka.javaapi.consumer.ConsumerConnector;
import kafka.serializer.StringDecoder;
import kafka.utils.VerifiableProperties; /**
* 从kafka获取数据 spout发射
* @author BFD-593
*
*/
public class SentenceSpout extends BaseRichSpout{
//TODO
private SpoutOutputCollector collector;
private ConsumerConnector consumer;
private int index=0;
@Override
public void nextTuple() {
Map<String, Integer> topicCountMap = new HashMap<String, Integer>();
topicCountMap.put("helloworld", new Integer(1)); StringDecoder keyDecoder = new StringDecoder(new VerifiableProperties());
StringDecoder valueDecoder = new StringDecoder(new VerifiableProperties());
Map<String, List<KafkaStream<String, String>>> consumerMap =
consumer.createMessageStreams(topicCountMap,keyDecoder,valueDecoder);
KafkaStream<String, String> stream = consumerMap.get("helloworld").get(0);
ConsumerIterator<String, String> it = stream.iterator(); int messageCount = 0;
while (it.hasNext()){
String string = it.next().message().toString()+" 1"+" 2";
String name = string.split(" ")[0];
String value = string.split(" ")[1]==null?"":string.split(" ")[1];
String value2= string.split(" ")[2]==null?"":string.split(" ")[2];
this.collector.emit(new Values(name,value,value2));
}
} @Override
public void open(Map map, TopologyContext context, SpoutOutputCollector collector) {
this.collector = collector;
Properties props = new Properties();
// zookeeper 配置
props.put("zookeeper.connect", "192.168.170.185:2181"); // 消费者所在组
props.put("group.id", "testgroup"); // zk连接超时
props.put("zookeeper.session.timeout.ms", "4000");
props.put("zookeeper.sync.time.ms", "200");
props.put("auto.commit.interval.ms", "1000");
props.put("auto.offset.reset", "smallest"); // 序列化类
props.put("serializer.class", "kafka.serializer.StringEncoder"); ConsumerConfig config = new ConsumerConfig(props);
this.consumer = Consumer.createJavaConsumerConnector(config);
} @Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
Fields field = new Fields("name", "sentence","sentence2");
declarer.declare(field);
} }

FunctionBolt:

	package Trident;

	import org.apache.storm.trident.operation.BaseFunction;
import org.apache.storm.trident.operation.TridentCollector;
import org.apache.storm.trident.tuple.TridentTuple;
import org.apache.storm.tuple.Values;
/**
* trident的函数操作:将spout发射的数据,添加一个fileds gender的
* 它不会替换掉原来的tuple
* @author BFD-593
*
*/
public class FunctionBolt extends BaseFunction{ @Override
public void execute(TridentTuple tuple, TridentCollector collector) {
String str = tuple.getStringByField("name");
if(str.equals("a")){
collector.emit(new Values("男"));
}else{
collector.emit(new Values("女"));
}
} }

MyFilter:

package Trident;

import java.util.Map;

import org.apache.storm.trident.operation.BaseFilter;
import org.apache.storm.trident.operation.TridentOperationContext;
import org.apache.storm.trident.tuple.TridentTuple;
/**
* trident的过滤操作:将spout的发送的tuple,过滤掉fields0是a并且fields1是b的tuple
* @author BFD-593
*
*/
public class MyFilter extends BaseFilter{
private TridentOperationContext context; @Override
public void prepare(Map conf, TridentOperationContext context) {
super.prepare(conf, context);
this.context = context;
}
@Override
public boolean isKeep(TridentTuple tuple) {
String name = tuple.getStringByField("name");
String value = tuple.getStringByField("sentence");
return (!"a".equals(name))||(!"b".equals(value));
} }

PrintFilter:

package Trident;

import java.util.Iterator;
import java.util.Map; import org.apache.storm.trident.operation.BaseFilter;
import org.apache.storm.trident.operation.TridentOperationContext;
import org.apache.storm.trident.tuple.TridentTuple;
import org.apache.storm.tuple.Fields;
/**
* 过滤打印所有的fields以及值
* @author BFD-593
*
*/
public class PrintFilter extends BaseFilter{
private TridentOperationContext context = null; @Override
public void prepare(Map conf, TridentOperationContext context) {
super.prepare(conf, context);
this.context = context;
} @Override
public boolean isKeep(TridentTuple tuple) {
Fields fields = tuple.getFields();
Iterator<String> iterator = fields.iterator();
String str = "";
while(iterator.hasNext()){
String next = iterator.next();
Object value = tuple.getValueByField(next);
str = str + next +":"+ value+",";
}
System.out.println(str);
return true;
} }

TopologyTrident:

package Trident;

import org.apache.kafka.common.utils.Utils;
import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.trident.TridentTopology;
import org.apache.storm.trident.operation.builtin.Count;
import org.apache.storm.tuple.Fields;
/**
* trident的过滤操作、函数操作、分驱聚合操作
* @author BFD-593
*
*/
public class TopologyTrident {
public static void main(String[] args) {
SentenceSpout spout = new SentenceSpout(); TridentTopology topology = new TridentTopology();
topology.newStream("spout", spout).each(new Fields("name"),new FunctionBolt(),new Fields("gender")).each(new Fields("name","sentence"), new MyFilter())
.each(new Fields("name","sentence","sentence2","gender"), new PrintFilter()); Config conf = new Config(); LocalCluster clu = new LocalCluster();
clu.submitTopology("mytopology", conf, topology.build()); Utils.sleep(100000000);
clu.killTopology("mytopology");
clu.shutdown(); }
}

  

package Trident;
import java.util.HashMap;import java.util.List;import java.util.Map;import java.util.Properties;
import org.apache.storm.spout.SpoutOutputCollector;import org.apache.storm.task.TopologyContext;import org.apache.storm.topology.OutputFieldsDeclarer;import org.apache.storm.topology.base.BaseRichSpout;import org.apache.storm.tuple.Fields;import org.apache.storm.tuple.Values;
import kafka.consumer.Consumer;import kafka.consumer.ConsumerConfig;import kafka.consumer.ConsumerIterator;import kafka.consumer.KafkaStream;import kafka.javaapi.consumer.ConsumerConnector;import kafka.serializer.StringDecoder;import kafka.utils.VerifiableProperties;
/** * 从kafka获取数据 spout发射 * @author BFD-593 * */public class SentenceSpout extends BaseRichSpout{//TODOprivate SpoutOutputCollector collector;private ConsumerConnector consumer;private int index=0;@Overridepublic void nextTuple() {Map<String, Integer> topicCountMap = new HashMap<String, Integer>();          topicCountMap.put("helloworld", new Integer(1));            StringDecoder keyDecoder = new StringDecoder(new VerifiableProperties());          StringDecoder valueDecoder = new StringDecoder(new VerifiableProperties());          Map<String, List<KafkaStream<String, String>>> consumerMap =           consumer.createMessageStreams(topicCountMap,keyDecoder,valueDecoder);          KafkaStream<String, String> stream = consumerMap.get("helloworld").get(0);          ConsumerIterator<String, String> it = stream.iterator();                   int messageCount = 0;          while (it.hasNext()){          String string = it.next().message().toString()+" 1"+" 2";        String name = string.split(" ")[0];        String value = string.split(" ")[1]==null?"":string.split(" ")[1];        String value2= string.split(" ")[2]==null?"":string.split(" ")[2];            this.collector.emit(new Values(name,value,value2));        }  }
@Overridepublic void open(Map map, TopologyContext context, SpoutOutputCollector collector) {this.collector =  collector;Properties props = new Properties(); // zookeeper 配置          props.put("zookeeper.connect", "192.168.170.185:2181");            // 消费者所在组          props.put("group.id", "testgroup");            // zk连接超时          props.put("zookeeper.session.timeout.ms", "4000");          props.put("zookeeper.sync.time.ms", "200");          props.put("auto.commit.interval.ms", "1000");          props.put("auto.offset.reset", "smallest");                    // 序列化类          props.put("serializer.class", "kafka.serializer.StringEncoder");            ConsumerConfig config = new ConsumerConfig(props);  this.consumer = Consumer.createJavaConsumerConnector(config);}
@Overridepublic void declareOutputFields(OutputFieldsDeclarer declarer) {Fields field = new Fields("name", "sentence","sentence2");declarer.declare(field);}
}

storm trident的filter和函数的更多相关文章

  1. storm trident function函数

    package cn.crxy.trident; import java.util.List; import backtype.storm.Config; import backtype.storm. ...

  2. Strom-7 Storm Trident 详细介绍

    一.概要 1.1 Storm(简介)      Storm是一个实时的可靠地分布式流计算框架.      具体就不多说了,举个例子,它的一个典型的大数据实时计算应用场景:从Kafka消息队列读取消息( ...

  3. Storm Trident API

    在Storm Trident中有五种操作类型 Apply Locally:本地操作,所有操作应用在本地节点数据上,不会产生网络传输 Repartitioning:数据流重定向,单纯的改变数据流向,不会 ...

  4. Storm专题二:Storm Trident API 使用具体解释

    一.概述      Storm Trident中的核心数据模型就是"Stream",也就是说,Storm Trident处理的是Stream.可是实际上Stream是被成批处理的. ...

  5. storm trident 示例

    Storm Trident的核心数据模型是一批一批被处理的“流”,“流”在集群的分区在集群的节点上,对“流”的操作也是并行的在每个分区上进行. Trident有五种对“流”的操作: 1.      不 ...

  6. storm trident merger

    import java.util.List; import backtype.storm.Config; import backtype.storm.LocalCluster; import back ...

  7. Python【day 14-5】sorted filter map函数应用和练习

    '''''' ''' 内置函数或者和匿名函数结合输出 4,用map来处理字符串列表,把列表中所有人都变成sb,比方alex_sb name=[‘oldboy’,'alex','wusir'] 5,用m ...

  8. lambda匿名函数sorted排序函数filter过滤函数map映射函数

    lambda函数:表示匿名函数,不需要def来声明,一句话就能搞定. 语法:函数名=lamda 参数:返回值 求10的10次方 f=lambda n:n**n print(f(10)) 注意: 函数名 ...

  9. storm trident 的介绍与使用

    一.trident 的介绍 trident 的英文意思是三叉戟,在这里我的理解是因为之前我们通过之前的学习topology spout bolt 去处理数据是没有问题的,但trident 的对spou ...

随机推荐

  1. Opencv— — Bias and Gain

    // define head function #ifndef PS_ALGORITHM_H_INCLUDED #define PS_ALGORITHM_H_INCLUDED #include < ...

  2. 51Nod - 1821:最优集合 (求第一个不能被表示为多个数的和的数)(不错的动脑题)

    一个集合S的优美值定义为:最大的x,满足对于任意i∈1,x1,x,都存在一个S的子集S',使得S'中元素之和为i. 给定n个集合,对于每一次询问,指定一个集合S1和一个集合S2,以及一个数k,要求选择 ...

  3. Java的Fork/Join任务,你写对了吗?

    当我们需要执行大量的小任务时,有经验的Java开发人员都会采用线程池来高效执行这些小任务.然而,有一种任务,例如,对超过1000万个元素的数组进行排序,这种任务本身可以并发执行,但如何拆解成小任务需要 ...

  4. android调试之adb

    ADB 其实大部分的PC开发机与Android设备的操作都是通过adb(android debug bridge)技术完成的,这是一个C/S架构的命令行工具,主要由三个部分组成 运行在PC开发机上的命 ...

  5. Laravel中的模板引擎Blade

    <!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8&quo ...

  6. php防盗链技术

    在Http协议中,头信息里,有一个重要的选项: Referer Referer: 代表网页的来源,即上一页的地址 如果是直接在浏览器上输入地址,回来进来,则没有Referer头. 这也是: 为什么服务 ...

  7. UVa 557 Burger (概率+递推)

    题意:有 n 个牛肉堡和 n 个鸡肉堡给 2n 个客人吃,在吃之前抛硬币来决定吃什么,如果剩下的汉堡一样,就不用投了,求最后两个人吃到相同的概率. 析:由于正面考虑还要要不要投硬币,太麻烦,所以我们先 ...

  8. bat批处理教程

    批处理的本质,是一堆DOS命令按一定顺序排列而形成的集合 1.ping sz.tencent.com > a.txt 把前面信息放到a.txt中 ping sz.tencent.com > ...

  9. 洛谷1087 FBI树 解题报告

    洛谷1087 FBI树 本题地址:http://www.luogu.org/problem/show?pid=1087 题目描述 我们可以把由“0”和“1”组成的字符串分为三类:全“0”串称为B串,全 ...

  10. 大白话5分钟带你走进人工智能-第二十九节集成学习之随机森林随机方式 ,out of bag data及代码(2)

              大白话5分钟带你走进人工智能-第二十九节集成学习之随机森林随机方式 ,out  of  bag  data及代码(2) 上一节中我们讲解了随机森林的基本概念,本节的话我们讲解随机森 ...