MongoDB是大数据技术中常用的NoSql型数据库,它提供的大量的查询、聚合等操作函数,对于大量查询的日志系统来说,该MongoDB是大数据日志存储的福音。Storm的高级编程技术Trident,也提供了与Mongo集成的方法,但官方只提供了新增的处理,对于常用的修改操作并未提供接口,本文提供了一种使用Trident进行mongoDB修改操作的方式,并且对持久化的数据提供了输出的拓展操作,具体代码见下方:

import java.util.Objects;

/**
* <p>
* Date-Time: 2018/09/05 15:14
* Company: 百趣
* </p>
* 请求类型枚举
*
* @author fangyuanjie
* @version 1.0.0
*/ public enum MethodTypeEnum { // GET请求
GET("GET", "GET请求"), // POST请求
POST("POST", "POST请求"); private String code;
private String desc; public String getCode() {
return code;
} public void setCode(String code) {
this.code = code;
} public String getDesc() {
return desc;
} public void setDesc(String desc) {
this.desc = desc;
} MethodTypeEnum(String code, String desc) {
this.code = code;
this.desc = desc;
} public static MethodTypeEnum getByCode(String code) {
for (MethodTypeEnum methodTypeEnum : values()) {
if (Objects.equals(methodTypeEnum.getCode(), code)) {
return methodTypeEnum;
}
}
return null;
} }import com.alibaba.fastjson.JSONException;
import com.alibaba.fastjson.JSONObject;
import org.apache.commons.lang.StringUtils;
import org.apache.storm.trident.operation.BaseFilter;
import org.apache.storm.trident.tuple.TridentTuple; /**
* <p>
* Copyright: Copyright (c) 2018/9/10 14:28
* <p>
* Company: 百趣
* <p>
* 格式过滤
* @author tangzhe
* @version 1.0.0
*/
public class FormatFilter extends BaseFilter { @Override
public boolean isKeep(TridentTuple tuple) {
String message = tuple.getStringByField("str");
System.out.println(this.getClass().getSimpleName() + "->message:" + message);
if (StringUtils.isBlank(message)) {
System.out.println(this.getClass().getSimpleName() + ": 消息不能为空!");
return false;
}
JSONObject jsonObject;
try {
jsonObject = JSONObject.parseObject(message);
} catch (Exception e) {
System.out.println(this.getClass().getSimpleName() + ": 消息格式有误!");
return false;
}
if (jsonObject.getLong("reqTime") == null ||
jsonObject.getJSONObject("headers") == null ||
jsonObject.getString("reqURI") == null) {
System.out.println(this.getClass().getSimpleName() + ": 请求信息不能为空!");
return false;
}
try {
jsonObject.getJSONObject("headers");
jsonObject.getJSONObject("uriArgs");
jsonObject.getJSONObject("bodyData");
} catch (JSONException e) {
System.out.println(this.getClass().getSimpleName() + ": 请求信息格式有误!");
return false;
}
return true;
} }import com.alibaba.fastjson.JSONObject;
import net.baiqu.storm.trident.enums.MethodTypeEnum;
import org.apache.storm.trident.operation.BaseFunction;
import org.apache.storm.trident.operation.TridentCollector;
import org.apache.storm.trident.tuple.TridentTuple;
import org.apache.storm.tuple.Values; import java.util.Date; /**
* <p>
* Copyright: Copyright (c) 2018/9/10 14:34
* <p>
* Company: 百趣
* <p>
* 日志解析函数
* @author tangzhe
* @version 1.0.0
*/
public class OperateLogParseFunction extends BaseFunction { @Override
public void execute(TridentTuple tuple, TridentCollector collector) {
String message = tuple.getStringByField("str");
JSONObject jsonObject = JSONObject.parseObject(message);
System.out.println(this.getClass().getSimpleName() + "->message: " + message);
JSONObject headers = jsonObject.getJSONObject("headers");
JSONObject uriArgs = null;
String method = jsonObject.getString("method");
if (MethodTypeEnum.GET.getCode().equals(method)) {
uriArgs = jsonObject.getJSONObject("uriArgs");
} else if (MethodTypeEnum.POST.getCode().equals(method)) {
uriArgs = jsonObject.getJSONObject("bodyData");
}
uriArgs = uriArgs != null ? uriArgs : new JSONObject();
String appId = jsonObject.getString("appId");
String userId = uriArgs.getString("userId");
String ip = jsonObject.getString("ip");
String host = headers.getString("host");
String requestURI = jsonObject.getString("reqURI");
String username = uriArgs.getString("username");
String role = uriArgs.getString("role");
String memo = uriArgs.getString("memo");
Date requestTime = new Date(jsonObject.getLong("reqTime") * 1000);
collector.emit(new Values(appId, host, requestURI, method, ip, requestTime,
userId, username, role, memo, new Date()));
} }import org.apache.storm.trident.operation.BaseFunction;
import org.apache.storm.trident.operation.TridentCollector;
import org.apache.storm.trident.tuple.TridentTuple; /**
* <p>
* Copyright: Copyright (c) 2018/9/10 16:33
* <p>
* Company: 百趣
* <p>
* 结果记录函数
* @author tangzhe
* @version 1.0.0
*/
public class OperatePrintFunction extends BaseFunction { @Override
public void execute(TridentTuple input, TridentCollector collector) {
String result = input.getStringByField("result");
if ("success".equalsIgnoreCase(result)) {
System.out.println(this.getClass().getSimpleName() + "->: 插入mongo成功");
} else {
System.out.println(this.getClass().getSimpleName() + "->: 插入mongo失败");
}
}
}import org.apache.storm.mongodb.trident.state.MongoState;
import org.apache.storm.trident.operation.TridentCollector;
import org.apache.storm.trident.state.BaseStateUpdater;
import org.apache.storm.trident.tuple.TridentTuple;
import org.apache.storm.tuple.Values; import java.util.List; /**
* <p>
* Copyright: Copyright (c) 2018/9/10 16:29
* <p>
* Company: 百趣
* <p>
*
* @author tangzhe
* @version 1.0.0
*/
public class MyMongoStateUpdater extends BaseStateUpdater<MongoState> { @Override
public void updateState(MongoState state, List<TridentTuple> tuples,
TridentCollector collector) {
try {
state.updateState(tuples, collector);
collector.emit(new Values("success"));
} catch (Exception e) {
e.printStackTrace();
collector.emit(new Values("fail"));
}
}
}import com.google.common.collect.Lists;
import com.mongodb.client.model.Filters;
import org.apache.commons.lang.Validate;
import org.apache.storm.mongodb.common.MongoDBClient;
import org.apache.storm.mongodb.common.mapper.MongoMapper;
import org.apache.storm.mongodb.trident.state.MongoState;
import org.apache.storm.trident.operation.TridentCollector;
import org.apache.storm.trident.state.State;
import org.apache.storm.trident.tuple.TridentTuple;
import org.bson.Document;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import java.io.Serializable;
import java.util.List;
import java.util.Map; /**
* <p>
* Date-Time: 2018/09/10 13:50
* Company: 百趣
* </p>
*
* @author tangzhe
* @version 1.0.0
*/
public class OperateMongoState implements State { private static final Logger LOG = LoggerFactory.getLogger(MongoState.class); private OperateMongoState.Options options;
private MongoDBClient mongoClient;
private Map map; protected OperateMongoState(Map map, OperateMongoState.Options options) {
this.options = options;
this.map = map;
} public static class Options implements Serializable {
private String url;
private String collectionName;
private MongoMapper mapper; public OperateMongoState.Options withUrl(String url) {
this.url = url;
return this;
} public OperateMongoState.Options withCollectionName(String collectionName) {
this.collectionName = collectionName;
return this;
} public OperateMongoState.Options withMapper(MongoMapper mapper) {
this.mapper = mapper;
return this;
}
} protected void prepare() {
Validate.notEmpty(options.url, "url can not be blank or null");
Validate.notEmpty(options.collectionName, "collectionName can not be blank or null");
Validate.notNull(options.mapper, "MongoMapper can not be null"); this.mongoClient = new MongoDBClient(options.url, options.collectionName);
} @Override
public void beginCommit(Long txid) {
LOG.debug("beginCommit is noop.");
} @Override
public void commit(Long txid) {
LOG.debug("commit is noop.");
} public void updateState(List<TridentTuple> tuples, TridentCollector collector) {
List<Document> documents = Lists.newArrayList();
for (TridentTuple tuple : tuples) {
Document document = options.mapper.toDocument(tuple);
documents.add(document);
}
this.mongoClient.update(
Filters.eq("logDate",
tuples.get(0).getStringByField("logDate")),
new Document("$set", documents.get(0)), true);
} }import org.apache.storm.task.IMetricsContext;
import org.apache.storm.trident.state.State;
import org.apache.storm.trident.state.StateFactory; import java.util.Map; /**
* <p>
* Date-Time: 2018/09/10 13:50
* Company: 百趣
* </p>
*
* @author tangzhe
* @version 1.0.0
*/
public class OperateMongoStateFactory implements StateFactory { private OperateMongoState.Options options; public OperateMongoStateFactory(OperateMongoState.Options options) {
this.options = options;
} @Override
public State makeState(Map conf, IMetricsContext metrics,
int partitionIndex, int numPartitions) {
OperateMongoState state = new OperateMongoState(conf, options);
state.prepare();
return state;
} }
package net.baiqu.storm.trident.state; import org.apache.storm.trident.operation.TridentCollector;
import org.apache.storm.trident.state.BaseStateUpdater;
import org.apache.storm.trident.tuple.TridentTuple;
import org.apache.storm.tuple.Values; import java.util.List; /**
* <p>
* Date-Time: 2018/09/10 13:50
* Company: 百趣
* </p>
*
* @author tangzhe
* @version 1.0.0
*/
public class OperateMongoStateUpdater extends BaseStateUpdater<OperateMongoState> { @Override
public void updateState(OperateMongoState state, List<TridentTuple> tuples, TridentCollector collector) {
state.updateState(tuples, collector);
String userId = tuples.get(0).getStringByField("userId");
collector.emit(new Values(userId));
} }
package net.baiqu.storm.trident.topology; import kafka.api.OffsetRequest;
import net.baiqu.storm.trident.filter.FormatFilter;
import net.baiqu.storm.trident.function.OperateLogParseFunction;
import net.baiqu.storm.trident.function.OperatePrintFunction;
import net.baiqu.storm.trident.state.MyMongoStateUpdater;
import net.baiqu.storm.trident.util.TridentMongoFactory;
import net.baiqu.storm.utils.Constants;
import org.apache.commons.lang.StringUtils;
import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.StormSubmitter;
import org.apache.storm.kafka.BrokerHosts;
import org.apache.storm.kafka.StringScheme;
import org.apache.storm.kafka.ZkHosts;
import org.apache.storm.kafka.trident.TransactionalTridentKafkaSpout;
import org.apache.storm.kafka.trident.TridentKafkaConfig;
import org.apache.storm.spout.SchemeAsMultiScheme;
import org.apache.storm.trident.Stream;
import org.apache.storm.trident.TridentTopology;
import org.apache.storm.tuple.Fields; /**
* <p>
* Date-Time: 2018/09/10 13:50
* Company: 百趣
* </p>
*
* @author tangzhe
* @version 1.0.0
*/
public class OperateLogTridentTopology { public static void main(String[] args) {
TridentTopology topology = new TridentTopology(); BrokerHosts hosts = new ZkHosts(Constants.ZK_HOSTS);
String topic = Constants.KAFKA_LOG_TOPIC;
String zkRoot = Constants.ZK_KAFKA_ROOT;
String id = Constants.KAFKA_SPOUT_ID; TridentKafkaConfig kafkaConfig = new TridentKafkaConfig(hosts, topic, id);
kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme()); // demo模式设置读取偏移量的操作
if (StringUtils.equalsIgnoreCase("demo", Constants.MODE)) {
kafkaConfig.startOffsetTime = OffsetRequest.LatestTime();
} TransactionalTridentKafkaSpout kafkaSpout = new TransactionalTridentKafkaSpout(kafkaConfig); Stream stream = topology.newStream("kafkaSpout", kafkaSpout).parallelismHint(1);
stream.shuffle().each(new Fields("str"), new FormatFilter())
.parallelismHint(1)
.shuffle().each(new Fields("str"), new OperateLogParseFunction(),
new Fields("appId", "host", "requestURI", "method", "ip",
"requestTime", "userId", "username", "role", "memo", "logDate"))
.parallelismHint(1)
.partitionPersist(TridentMongoFactory.getMongoInsertState(),
new Fields("appId", "host", "requestURI", "method", "ip",
"requestTime", "userId", "username", "role", "memo", "logDate"),
new MyMongoStateUpdater(),
new Fields("result"))
.parallelismHint(1)
.newValuesStream().shuffle().each(
new Fields("result"), new OperatePrintFunction(), new Fields("none"))
.parallelismHint(1); Config config = new Config();
if (StringUtils.equalsIgnoreCase("demo", Constants.MODE)) {
LocalCluster cluster = new LocalCluster();
cluster.submitTopology("operateLogTridentTopology", config, topology.build());
} else {
config.setNumWorkers(1);
config.put(Config.NIMBUS_HOST, Constants.NIMBUS_HOST);
config.put(Config.NIMBUS_THRIFT_PORT, Constants.NIMBUS_THRIFT_PORT);
config.put(Config.TOPOLOGY_ACKER_EXECUTORS, 1);
try {
StormSubmitter.submitTopology(args[0], config, topology.build());
} catch (Exception e) {
e.printStackTrace();
}
}
} }
package net.baiqu.storm.trident.util; import net.baiqu.storm.trident.state.OperateMongoState;
import net.baiqu.storm.trident.state.OperateMongoStateFactory;
import net.baiqu.storm.utils.Constants;
import org.apache.commons.lang.StringUtils;
import org.apache.storm.mongodb.common.mapper.MongoMapper;
import org.apache.storm.mongodb.common.mapper.SimpleMongoMapper;
import org.apache.storm.mongodb.trident.state.MongoState;
import org.apache.storm.mongodb.trident.state.MongoStateFactory;
import org.apache.storm.trident.state.StateFactory; /**
* <p>
* Copyright: Copyright (c) 2018/9/10 14:56
* <p>
* Company: 百趣
* <p>
* trident mongo 工厂类
* @author tangzhe
* @version 1.0.0
*/
public class TridentMongoFactory { public static final String URL = "mongodb://" + Constants.MONGODB_USERNAME + ":"
+ Constants.MONGODB_PASSWORD.replace("@", "%40")
+ "@" + Constants.MONGODB_HOSTS + ":" + Constants.MONGODB_PORT + "/"
+ Constants.MONGODB_DATABASE + "?connectTimeoutMS=" + Constants.MONGODB_TIMEOUT; public static final String URL2 = "mongodb://" + Constants.MONGODB_HOSTS + ":" + Constants.MONGODB_PORT + "/"
+ Constants.MONGODB_DATABASE + "?connectTimeoutMS=" + Constants.MONGODB_TIMEOUT; public static final String OPERATE_LOG_DB = "operate_log"; /**
* 使用自带state实现插入mongo
*/
public static StateFactory getMongoInsertState() {
String url = getUrl(); MongoMapper mapper = new SimpleMongoMapper()
.withFields("appId", "host", "requestURI", "method", "ip", "requestTime",
"userId", "username", "role", "memo", "logDate"); MongoState.Options options = new MongoState.Options()
.withUrl(url)
.withCollectionName(OPERATE_LOG_DB)
.withMapper(mapper); return new MongoStateFactory(options);
} /**
* 自定义state实现更新mongo
*/
public static StateFactory getMongoUpdateState() {
String url = getUrl();
MongoMapper mapper = new SimpleMongoMapper()
.withFields("appId", "host", "requestURI", "method", "ip", "requestTime",
"userId", "username", "role", "memo", "logDate"); OperateMongoState.Options options = new OperateMongoState.Options()
.withUrl(url)
.withCollectionName(OPERATE_LOG_DB)
.withMapper(mapper); return new OperateMongoStateFactory(options);
} /**
* 获取mongo url
*/
private static String getUrl() {
String url;
if (StringUtils.isNotBlank(Constants.MONGODB_USERNAME)) {
url = URL;
} else {
url = URL2;
}
return url;
} }

Trident整合MongoDB的更多相关文章

  1. spring MVC 整合mongodb

    Spring Mongodb 目录 1 SPRING整合MONGODB 1 1.1 环境准备 1 1.2 包依赖 1 1.3 配置 2 2 案列 5 2.1 SPRING MVC整合MONGODB代码 ...

  2. MongoDB系列:四、spring整合mongodb,带用户验证

    在前面的两篇博客 MongoDB常用操作练习.springboot整合mongoDB的简单demo中,我们基本上熟悉了mongodb,也把它与spring boot进行了整合并且简单使用.在本篇博客中 ...

  3. java操作mongodb & springboot整合mongodb

    简单的研究原生API操作MongoDB以及封装的工具类操作,最后也会研究整合spring之后作为dao层的完整的操作. 1.原生的API操作 pom.xml <!-- https://mvnre ...

  4. SpringBoot整合mongoDB

    MongoDB 是一个介于关系数据库和非关系数据库之间的产品,是非关系数据库当中功能最丰富,最像关系数据库的. 这一片文章介绍一个springboot整合mongodb,如果你了解整合mysql之类的 ...

  5. springboot 学习之路 14(整合mongodb的Api操作)

    springboot整合mongodb: mongodb的安装和权限配置  请点击连接参考 mongodb集成 : 第一步:引如pom文件 第二步:配置文件配置mongodb路径: 第三步:关于mon ...

  6. SpringMVC整合Mongodb开发,高级操作

    开发环境: 操作系统:windows xpMongodb:2.0.6依 赖 包:Spring3.2.2 + spring-data-mongodb-1.3.0 + Spring-data-1.5 +  ...

  7. spring整合mongodb

    使用spring整合mongodb maven 依赖 <dependency> <groupId>org.mongodb</groupId> <artifac ...

  8. SpringBoot非官方教程 | 第八篇:springboot整合mongodb

    转载请标明出处: 原文首发于:https://www.fangzhipeng.com/springboot/2017/07/11/springboot8-mongodb/ 本文出自方志朋的博客 这篇文 ...

  9. Trident整合Kafka

    首先编写一个打印函数KafkaPrintFunction import org.apache.storm.trident.operation.BaseFunction; import org.apac ...

随机推荐

  1. Lucene——索引过程分析Index

    Lucene索引过程分为3个主要操作步骤:将原始文档转换成文本.分析文本.将分析好的文本保存至索引中 一.提取文本和创建文档 从 pdf.word等非纯文本格式文件中,提取文本格式信息.建立起对应的, ...

  2. 【转载】SQL执行计划

    要理解执行计划,怎么也得先理解,那各种各样的名词吧.鉴于自己还不是很了解.本文打算作为只写懂的,不懂的懂了才写. 在开头要先说明,第一次看执行计划要注意,SQL Server的执行计划是从右向左看的. ...

  3. CODESOFT条码设计软件如何隐藏数据源方法

    作为强大的条码标签设计软件,用户在用CODESOFT设计条码标签时,有时需要根据实际情况,将条码数据源隐藏,也就是使设计与打印出来的条形码下不带有数据.那么这要怎么在CODESOFT中实现呢?下面,小 ...

  4. 笨办法学Python(十八)

    习题 18: 命名.变量.代码.函数 标题包含的内容够多的吧?接下来我要教你“函数(function)”了!咚咚锵!说到函数,不一样的人会对它有不一样的理解和使用方法,不过我只会教你现在能用到的最简单 ...

  5. 音乐代码 (DNF天空之城、欢乐颂)。

    太感人了 DNF天空之城 #include <cstdio> #include <windows.h> #define qdo 262 #define qre 294 #def ...

  6. April 19 2017 Week 16 Wednesday

    What would life be if we had no courage to attempt anything? 如果我们都没有勇气去尝试点什么,生活会变成什么样子呢? I remembere ...

  7. 如何下载YouTube 8K视频

    随着科技的进步,人们对高清视频的要求越来越高,因此视频的分辨率也越来越高.从最开始的720P,到1080P,再到2K,进而到如今4K,不断地满足人们挑剔的胃口.4K分辨率的视频已经逐渐进入人们的生活中 ...

  8. 你真的会用ABAP, Java和JavaScript里的constructor么?

    如果constructor里调用了一个成员方法,这个方法被子类override了,当初始化一个子类实例时,父类的构造函数被的调用,此时父类构造函数的上下文里调用的成员方法,是父类的实现还是子类的实现? ...

  9. py常见模块

    1.系统相关的信息模块: import sys sys.argv 是一个 list,包含所有的命令行参数. sys.stdout sys.stdin sys.stderr 分别表示标准输入输出,错误输 ...

  10. Selenium入门系列4 选择并操作一组元素

    选中一组元素的方式也是8种,与选中单个元素一一对应.区别只在于element与elements.elements取到的是一个数组,element取符合条件的第一个元素. 首先在脚本的目录下新建test ...