MongoDB是大数据技术中常用的NoSql型数据库,它提供的大量的查询、聚合等操作函数,对于大量查询的日志系统来说,该MongoDB是大数据日志存储的福音。Storm的高级编程技术Trident,也提供了与Mongo集成的方法,但官方只提供了新增的处理,对于常用的修改操作并未提供接口,本文提供了一种使用Trident进行mongoDB修改操作的方式,并且对持久化的数据提供了输出的拓展操作,具体代码见下方:

import java.util.Objects;

/**
* <p>
* Date-Time: 2018/09/05 15:14
* Company: 百趣
* </p>
* 请求类型枚举
*
* @author fangyuanjie
* @version 1.0.0
*/ public enum MethodTypeEnum { // GET请求
GET("GET", "GET请求"), // POST请求
POST("POST", "POST请求"); private String code;
private String desc; public String getCode() {
return code;
} public void setCode(String code) {
this.code = code;
} public String getDesc() {
return desc;
} public void setDesc(String desc) {
this.desc = desc;
} MethodTypeEnum(String code, String desc) {
this.code = code;
this.desc = desc;
} public static MethodTypeEnum getByCode(String code) {
for (MethodTypeEnum methodTypeEnum : values()) {
if (Objects.equals(methodTypeEnum.getCode(), code)) {
return methodTypeEnum;
}
}
return null;
} }import com.alibaba.fastjson.JSONException;
import com.alibaba.fastjson.JSONObject;
import org.apache.commons.lang.StringUtils;
import org.apache.storm.trident.operation.BaseFilter;
import org.apache.storm.trident.tuple.TridentTuple; /**
* <p>
* Copyright: Copyright (c) 2018/9/10 14:28
* <p>
* Company: 百趣
* <p>
* 格式过滤
* @author tangzhe
* @version 1.0.0
*/
public class FormatFilter extends BaseFilter { @Override
public boolean isKeep(TridentTuple tuple) {
String message = tuple.getStringByField("str");
System.out.println(this.getClass().getSimpleName() + "->message:" + message);
if (StringUtils.isBlank(message)) {
System.out.println(this.getClass().getSimpleName() + ": 消息不能为空!");
return false;
}
JSONObject jsonObject;
try {
jsonObject = JSONObject.parseObject(message);
} catch (Exception e) {
System.out.println(this.getClass().getSimpleName() + ": 消息格式有误!");
return false;
}
if (jsonObject.getLong("reqTime") == null ||
jsonObject.getJSONObject("headers") == null ||
jsonObject.getString("reqURI") == null) {
System.out.println(this.getClass().getSimpleName() + ": 请求信息不能为空!");
return false;
}
try {
jsonObject.getJSONObject("headers");
jsonObject.getJSONObject("uriArgs");
jsonObject.getJSONObject("bodyData");
} catch (JSONException e) {
System.out.println(this.getClass().getSimpleName() + ": 请求信息格式有误!");
return false;
}
return true;
} }import com.alibaba.fastjson.JSONObject;
import net.baiqu.storm.trident.enums.MethodTypeEnum;
import org.apache.storm.trident.operation.BaseFunction;
import org.apache.storm.trident.operation.TridentCollector;
import org.apache.storm.trident.tuple.TridentTuple;
import org.apache.storm.tuple.Values; import java.util.Date; /**
* <p>
* Copyright: Copyright (c) 2018/9/10 14:34
* <p>
* Company: 百趣
* <p>
* 日志解析函数
* @author tangzhe
* @version 1.0.0
*/
public class OperateLogParseFunction extends BaseFunction { @Override
public void execute(TridentTuple tuple, TridentCollector collector) {
String message = tuple.getStringByField("str");
JSONObject jsonObject = JSONObject.parseObject(message);
System.out.println(this.getClass().getSimpleName() + "->message: " + message);
JSONObject headers = jsonObject.getJSONObject("headers");
JSONObject uriArgs = null;
String method = jsonObject.getString("method");
if (MethodTypeEnum.GET.getCode().equals(method)) {
uriArgs = jsonObject.getJSONObject("uriArgs");
} else if (MethodTypeEnum.POST.getCode().equals(method)) {
uriArgs = jsonObject.getJSONObject("bodyData");
}
uriArgs = uriArgs != null ? uriArgs : new JSONObject();
String appId = jsonObject.getString("appId");
String userId = uriArgs.getString("userId");
String ip = jsonObject.getString("ip");
String host = headers.getString("host");
String requestURI = jsonObject.getString("reqURI");
String username = uriArgs.getString("username");
String role = uriArgs.getString("role");
String memo = uriArgs.getString("memo");
Date requestTime = new Date(jsonObject.getLong("reqTime") * 1000);
collector.emit(new Values(appId, host, requestURI, method, ip, requestTime,
userId, username, role, memo, new Date()));
} }import org.apache.storm.trident.operation.BaseFunction;
import org.apache.storm.trident.operation.TridentCollector;
import org.apache.storm.trident.tuple.TridentTuple; /**
* <p>
* Copyright: Copyright (c) 2018/9/10 16:33
* <p>
* Company: 百趣
* <p>
* 结果记录函数
* @author tangzhe
* @version 1.0.0
*/
public class OperatePrintFunction extends BaseFunction { @Override
public void execute(TridentTuple input, TridentCollector collector) {
String result = input.getStringByField("result");
if ("success".equalsIgnoreCase(result)) {
System.out.println(this.getClass().getSimpleName() + "->: 插入mongo成功");
} else {
System.out.println(this.getClass().getSimpleName() + "->: 插入mongo失败");
}
}
}import org.apache.storm.mongodb.trident.state.MongoState;
import org.apache.storm.trident.operation.TridentCollector;
import org.apache.storm.trident.state.BaseStateUpdater;
import org.apache.storm.trident.tuple.TridentTuple;
import org.apache.storm.tuple.Values; import java.util.List; /**
* <p>
* Copyright: Copyright (c) 2018/9/10 16:29
* <p>
* Company: 百趣
* <p>
*
* @author tangzhe
* @version 1.0.0
*/
public class MyMongoStateUpdater extends BaseStateUpdater<MongoState> { @Override
public void updateState(MongoState state, List<TridentTuple> tuples,
TridentCollector collector) {
try {
state.updateState(tuples, collector);
collector.emit(new Values("success"));
} catch (Exception e) {
e.printStackTrace();
collector.emit(new Values("fail"));
}
}
}import com.google.common.collect.Lists;
import com.mongodb.client.model.Filters;
import org.apache.commons.lang.Validate;
import org.apache.storm.mongodb.common.MongoDBClient;
import org.apache.storm.mongodb.common.mapper.MongoMapper;
import org.apache.storm.mongodb.trident.state.MongoState;
import org.apache.storm.trident.operation.TridentCollector;
import org.apache.storm.trident.state.State;
import org.apache.storm.trident.tuple.TridentTuple;
import org.bson.Document;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import java.io.Serializable;
import java.util.List;
import java.util.Map; /**
* <p>
* Date-Time: 2018/09/10 13:50
* Company: 百趣
* </p>
*
* @author tangzhe
* @version 1.0.0
*/
public class OperateMongoState implements State { private static final Logger LOG = LoggerFactory.getLogger(MongoState.class); private OperateMongoState.Options options;
private MongoDBClient mongoClient;
private Map map; protected OperateMongoState(Map map, OperateMongoState.Options options) {
this.options = options;
this.map = map;
} public static class Options implements Serializable {
private String url;
private String collectionName;
private MongoMapper mapper; public OperateMongoState.Options withUrl(String url) {
this.url = url;
return this;
} public OperateMongoState.Options withCollectionName(String collectionName) {
this.collectionName = collectionName;
return this;
} public OperateMongoState.Options withMapper(MongoMapper mapper) {
this.mapper = mapper;
return this;
}
} protected void prepare() {
Validate.notEmpty(options.url, "url can not be blank or null");
Validate.notEmpty(options.collectionName, "collectionName can not be blank or null");
Validate.notNull(options.mapper, "MongoMapper can not be null"); this.mongoClient = new MongoDBClient(options.url, options.collectionName);
} @Override
public void beginCommit(Long txid) {
LOG.debug("beginCommit is noop.");
} @Override
public void commit(Long txid) {
LOG.debug("commit is noop.");
} public void updateState(List<TridentTuple> tuples, TridentCollector collector) {
List<Document> documents = Lists.newArrayList();
for (TridentTuple tuple : tuples) {
Document document = options.mapper.toDocument(tuple);
documents.add(document);
}
this.mongoClient.update(
Filters.eq("logDate",
tuples.get(0).getStringByField("logDate")),
new Document("$set", documents.get(0)), true);
} }import org.apache.storm.task.IMetricsContext;
import org.apache.storm.trident.state.State;
import org.apache.storm.trident.state.StateFactory; import java.util.Map; /**
* <p>
* Date-Time: 2018/09/10 13:50
* Company: 百趣
* </p>
*
* @author tangzhe
* @version 1.0.0
*/
public class OperateMongoStateFactory implements StateFactory { private OperateMongoState.Options options; public OperateMongoStateFactory(OperateMongoState.Options options) {
this.options = options;
} @Override
public State makeState(Map conf, IMetricsContext metrics,
int partitionIndex, int numPartitions) {
OperateMongoState state = new OperateMongoState(conf, options);
state.prepare();
return state;
} }
package net.baiqu.storm.trident.state; import org.apache.storm.trident.operation.TridentCollector;
import org.apache.storm.trident.state.BaseStateUpdater;
import org.apache.storm.trident.tuple.TridentTuple;
import org.apache.storm.tuple.Values; import java.util.List; /**
* <p>
* Date-Time: 2018/09/10 13:50
* Company: 百趣
* </p>
*
* @author tangzhe
* @version 1.0.0
*/
public class OperateMongoStateUpdater extends BaseStateUpdater<OperateMongoState> { @Override
public void updateState(OperateMongoState state, List<TridentTuple> tuples, TridentCollector collector) {
state.updateState(tuples, collector);
String userId = tuples.get(0).getStringByField("userId");
collector.emit(new Values(userId));
} }
package net.baiqu.storm.trident.topology; import kafka.api.OffsetRequest;
import net.baiqu.storm.trident.filter.FormatFilter;
import net.baiqu.storm.trident.function.OperateLogParseFunction;
import net.baiqu.storm.trident.function.OperatePrintFunction;
import net.baiqu.storm.trident.state.MyMongoStateUpdater;
import net.baiqu.storm.trident.util.TridentMongoFactory;
import net.baiqu.storm.utils.Constants;
import org.apache.commons.lang.StringUtils;
import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.StormSubmitter;
import org.apache.storm.kafka.BrokerHosts;
import org.apache.storm.kafka.StringScheme;
import org.apache.storm.kafka.ZkHosts;
import org.apache.storm.kafka.trident.TransactionalTridentKafkaSpout;
import org.apache.storm.kafka.trident.TridentKafkaConfig;
import org.apache.storm.spout.SchemeAsMultiScheme;
import org.apache.storm.trident.Stream;
import org.apache.storm.trident.TridentTopology;
import org.apache.storm.tuple.Fields; /**
* <p>
* Date-Time: 2018/09/10 13:50
* Company: 百趣
* </p>
*
* @author tangzhe
* @version 1.0.0
*/
public class OperateLogTridentTopology { public static void main(String[] args) {
TridentTopology topology = new TridentTopology(); BrokerHosts hosts = new ZkHosts(Constants.ZK_HOSTS);
String topic = Constants.KAFKA_LOG_TOPIC;
String zkRoot = Constants.ZK_KAFKA_ROOT;
String id = Constants.KAFKA_SPOUT_ID; TridentKafkaConfig kafkaConfig = new TridentKafkaConfig(hosts, topic, id);
kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme()); // demo模式设置读取偏移量的操作
if (StringUtils.equalsIgnoreCase("demo", Constants.MODE)) {
kafkaConfig.startOffsetTime = OffsetRequest.LatestTime();
} TransactionalTridentKafkaSpout kafkaSpout = new TransactionalTridentKafkaSpout(kafkaConfig); Stream stream = topology.newStream("kafkaSpout", kafkaSpout).parallelismHint(1);
stream.shuffle().each(new Fields("str"), new FormatFilter())
.parallelismHint(1)
.shuffle().each(new Fields("str"), new OperateLogParseFunction(),
new Fields("appId", "host", "requestURI", "method", "ip",
"requestTime", "userId", "username", "role", "memo", "logDate"))
.parallelismHint(1)
.partitionPersist(TridentMongoFactory.getMongoInsertState(),
new Fields("appId", "host", "requestURI", "method", "ip",
"requestTime", "userId", "username", "role", "memo", "logDate"),
new MyMongoStateUpdater(),
new Fields("result"))
.parallelismHint(1)
.newValuesStream().shuffle().each(
new Fields("result"), new OperatePrintFunction(), new Fields("none"))
.parallelismHint(1); Config config = new Config();
if (StringUtils.equalsIgnoreCase("demo", Constants.MODE)) {
LocalCluster cluster = new LocalCluster();
cluster.submitTopology("operateLogTridentTopology", config, topology.build());
} else {
config.setNumWorkers(1);
config.put(Config.NIMBUS_HOST, Constants.NIMBUS_HOST);
config.put(Config.NIMBUS_THRIFT_PORT, Constants.NIMBUS_THRIFT_PORT);
config.put(Config.TOPOLOGY_ACKER_EXECUTORS, 1);
try {
StormSubmitter.submitTopology(args[0], config, topology.build());
} catch (Exception e) {
e.printStackTrace();
}
}
} }
package net.baiqu.storm.trident.util; import net.baiqu.storm.trident.state.OperateMongoState;
import net.baiqu.storm.trident.state.OperateMongoStateFactory;
import net.baiqu.storm.utils.Constants;
import org.apache.commons.lang.StringUtils;
import org.apache.storm.mongodb.common.mapper.MongoMapper;
import org.apache.storm.mongodb.common.mapper.SimpleMongoMapper;
import org.apache.storm.mongodb.trident.state.MongoState;
import org.apache.storm.mongodb.trident.state.MongoStateFactory;
import org.apache.storm.trident.state.StateFactory; /**
* <p>
* Copyright: Copyright (c) 2018/9/10 14:56
* <p>
* Company: 百趣
* <p>
* trident mongo 工厂类
* @author tangzhe
* @version 1.0.0
*/
public class TridentMongoFactory { public static final String URL = "mongodb://" + Constants.MONGODB_USERNAME + ":"
+ Constants.MONGODB_PASSWORD.replace("@", "%40")
+ "@" + Constants.MONGODB_HOSTS + ":" + Constants.MONGODB_PORT + "/"
+ Constants.MONGODB_DATABASE + "?connectTimeoutMS=" + Constants.MONGODB_TIMEOUT; public static final String URL2 = "mongodb://" + Constants.MONGODB_HOSTS + ":" + Constants.MONGODB_PORT + "/"
+ Constants.MONGODB_DATABASE + "?connectTimeoutMS=" + Constants.MONGODB_TIMEOUT; public static final String OPERATE_LOG_DB = "operate_log"; /**
* 使用自带state实现插入mongo
*/
public static StateFactory getMongoInsertState() {
String url = getUrl(); MongoMapper mapper = new SimpleMongoMapper()
.withFields("appId", "host", "requestURI", "method", "ip", "requestTime",
"userId", "username", "role", "memo", "logDate"); MongoState.Options options = new MongoState.Options()
.withUrl(url)
.withCollectionName(OPERATE_LOG_DB)
.withMapper(mapper); return new MongoStateFactory(options);
} /**
* 自定义state实现更新mongo
*/
public static StateFactory getMongoUpdateState() {
String url = getUrl();
MongoMapper mapper = new SimpleMongoMapper()
.withFields("appId", "host", "requestURI", "method", "ip", "requestTime",
"userId", "username", "role", "memo", "logDate"); OperateMongoState.Options options = new OperateMongoState.Options()
.withUrl(url)
.withCollectionName(OPERATE_LOG_DB)
.withMapper(mapper); return new OperateMongoStateFactory(options);
} /**
* 获取mongo url
*/
private static String getUrl() {
String url;
if (StringUtils.isNotBlank(Constants.MONGODB_USERNAME)) {
url = URL;
} else {
url = URL2;
}
return url;
} }

Trident整合MongoDB的更多相关文章

  1. spring MVC 整合mongodb

    Spring Mongodb 目录 1 SPRING整合MONGODB 1 1.1 环境准备 1 1.2 包依赖 1 1.3 配置 2 2 案列 5 2.1 SPRING MVC整合MONGODB代码 ...

  2. MongoDB系列:四、spring整合mongodb,带用户验证

    在前面的两篇博客 MongoDB常用操作练习.springboot整合mongoDB的简单demo中,我们基本上熟悉了mongodb,也把它与spring boot进行了整合并且简单使用.在本篇博客中 ...

  3. java操作mongodb & springboot整合mongodb

    简单的研究原生API操作MongoDB以及封装的工具类操作,最后也会研究整合spring之后作为dao层的完整的操作. 1.原生的API操作 pom.xml <!-- https://mvnre ...

  4. SpringBoot整合mongoDB

    MongoDB 是一个介于关系数据库和非关系数据库之间的产品,是非关系数据库当中功能最丰富,最像关系数据库的. 这一片文章介绍一个springboot整合mongodb,如果你了解整合mysql之类的 ...

  5. springboot 学习之路 14(整合mongodb的Api操作)

    springboot整合mongodb: mongodb的安装和权限配置  请点击连接参考 mongodb集成 : 第一步:引如pom文件 第二步:配置文件配置mongodb路径: 第三步:关于mon ...

  6. SpringMVC整合Mongodb开发,高级操作

    开发环境: 操作系统:windows xpMongodb:2.0.6依 赖 包:Spring3.2.2 + spring-data-mongodb-1.3.0 + Spring-data-1.5 +  ...

  7. spring整合mongodb

    使用spring整合mongodb maven 依赖 <dependency> <groupId>org.mongodb</groupId> <artifac ...

  8. SpringBoot非官方教程 | 第八篇:springboot整合mongodb

    转载请标明出处: 原文首发于:https://www.fangzhipeng.com/springboot/2017/07/11/springboot8-mongodb/ 本文出自方志朋的博客 这篇文 ...

  9. Trident整合Kafka

    首先编写一个打印函数KafkaPrintFunction import org.apache.storm.trident.operation.BaseFunction; import org.apac ...

随机推荐

  1. IO文件操作

    × 目录 [1]IO文件的操作 [2]Directory类 [3]File类 [4]FileStream类 [5]文本文件的操作 一.IO文件的操作:   .net中对文件操作,经常会用到这样几个类: ...

  2. sql分组数据去重

    #分组获得每个机柜里服务器占用的机架总数,如552807e6-b428-4184-b219-ae368c68ddb3占用4个 mysql> select cabinet_uuid, count( ...

  3. strdup和strndup函数

    首先说明一下:这两个函数不建议使用,原因是返回内存地址把释放权交给别的变量,容易忘记释放. 一.strdup函数 函数原型 头文件:#include <string.h> char *st ...

  4. QT学习之QScript

    QT中有解析Json的一个类叫QScript.貌似还有一个QJson,但听说解析的方便性不如QScript,具体没有深入探究,这里仅简单记录一下QScript的使用. 首先,主要使用到的类有QScri ...

  5. Nginx启用Gzip压缩js无效的原因

    Nginx启用gzip很简单,只需要设置一下配置文件即可完成,可以参考文章Nginx如何配置Gzip压缩功能.不过,在群里常有人提到,他们的网站Gzip压缩虽然成功了,但检测到JS仍然没有压缩成功,这 ...

  6. linq 和 lmabda 表达式 的用法 和优劣 转自 农码一生

    https://www.cnblogs.com/zhaopei/p/5746414.html

  7. Visual Studio Code快捷键_Linux

    Keyboard shortcuts for Linux Basic editing Ctrl + X Cut line(empty selection) Ctrk + C   Copy line(e ...

  8. mac系统的几种u盘启动制作方式

    先拿一个U盘,格式化好(guid分区表之类的选项弄好) 1.通过终端制作: 1>下载好自己要安装的系统镜像,最新的在App Store上下,以前的可以去pc 苹果等地方下载 2>在终端输入 ...

  9. JS常见内置对象和方法

    JS中常用的内置对象:Array对象.Date对象.正则表达式对象.string对象.Global对象  Array对象中常用方法: concat() 表示把几个数组合并成一个数组join()   设 ...

  10. REST Adapter实现SAP PI中的增强XML/JSON格式转换(转载)

    SAP标准的REST adapter有着XML/JSON转换的功能,它很有用,因为一方面SAP PI/PO内部以XML格式处理数据,而另一方面,在处理REST架构风格的时候,JSON才是事实上的格式. ...