自定义http source

config

a1.sources.r1.type=http
a1.sources.r1.bind=localhost
a1.sources.r1.port=
a1.sources.r1.channels=c1
#自定义source Handler
a1.sources.r1.handler = org.apache.flume.sw.source.http.JSONHandler
a1.sources.r1.handler.configHome = /home/www/logs/datareport

handler

public class JSONHandler implements HTTPSourceHandler {

  private static final Logger LOG = LoggerFactory.getLogger(JSONHandler.class);

  public static final String PARA_SIGN = "sign";
public static final String PARA_PROJECT_ID = "projectId";
public static final String PARA_REPORT_MSG = "reportMsg"; private final Type mapType = new TypeToken<LinkedHashMap<String, Object>>() {}.getType();
private final Gson gson; //可以获取外部参数
private Context context = null; public JSONHandler() {
gson = new GsonBuilder().disableHtmlEscaping().create();
} /**
* {@inheritDoc}
*/
@Override
public List<Event> getEvents(HttpServletRequest request) throws Exception {
BufferedReader reader = request.getReader();
String charset = request.getCharacterEncoding();
//UTF-8 is default for JSON. If no charset is specified, UTF-8 is to
//be assumed.
if (charset == null) {
LOG.debug("Charset is null, default charset of UTF-8 will be used.");
charset = "UTF-8";
} else if (!(charset.equalsIgnoreCase("utf-8")
|| charset.equalsIgnoreCase("utf-16")
|| charset.equalsIgnoreCase("utf-32"))) {
LOG.error("Unsupported character set in request {}. "
+ "JSON handler supports UTF-8, "
+ "UTF-16 and UTF-32 only.", charset);
throw new UnsupportedCharsetException("JSON handler supports UTF-8, "
+ "UTF-16 and UTF-32 only.");
} /*
* Gson throws Exception if the data is not parseable to JSON.
* Need not catch it since the source will catch it and return error.
*/
LinkedHashMap<String, Object> map = new LinkedHashMap<String, Object>();
try {
map = gson.fromJson(reader, mapType);
} catch (JsonSyntaxException ex) {
throw new HTTPBadRequestException("Request has invalid JSON Syntax.", ex);
} String configHome = this.context.getString("configHome");
LOG.info(configHome);
String projectId = map.get(PARA_PROJECT_ID).toString();
String reportMsg = map.get(PARA_REPORT_MSG).toString();
Map<String, String> headers = new HashMap<String, String>();
headers.put(PARA_PROJECT_ID, projectId);
headers.put(PARA_SIGN, "");
JSONEvent jsonEvent = new JSONEvent();
jsonEvent.setHeaders(headers);
jsonEvent.setBody(reportMsg.getBytes()); return getSimpleEvents(jsonEvent);
} @Override
public void configure(Context context) {
this.context = context;
} private List<Event> getSimpleEvents(Event e) {
List<Event> newEvents = new ArrayList<Event>(1);
newEvents.add(EventBuilder.withBody(e.getBody(), e.getHeaders()));
return newEvents;
}
}

自定义Sink

config

#自定义Sink
a1.sinks.k1.type = org.apache.flume.sw.sink.RollingFileSink
a1.sinks.k1.channel = c1
a1.sinks.k1.sink.rollInterval = 15
a1.sinks.k1.sink.directory = D:/var/log/flume
#自定义pathManager类型
a1.sinks.k1.sink.pathManager = CUSTOM
#文件创建频率 (null or yyyyMMddHHmmss), 默认值null->不创建
a1.sinks.k1.sink.pathManager.dirNameFormatter = yyyyMMdd
a1.sinks.k1.sink.pathManager.prefix = log_
a1.sinks.k1.sink.pathManager.extension = txt

自定义RollingFileSink

    if(pathManagerType.equals("CUSTOM")) {
//如果外部配置的PathManager是CUSTOM,则直接new出自定义的SimplePathManager
pathController = new SimplePathManager(pathManagerContext);
} else {
pathController = PathManagerFactory.getInstance(pathManagerType, pathManagerContext);
}

自定义pathManager类型

public class SimplePathManager extends DefaultPathManager {
private static final Logger logger = LoggerFactory
.getLogger(SimplePathManager.class);
private final DateTimeFormatter formatter = DateTimeFormat.forPattern("yyyyMMddHHmmss");
private DateTimeFormatter dirNameFormatter = null; private String lastRoll; public SimplePathManager(Context context) {
super(context); String dirNameFormatterStr = context.getString("dirNameFormatter");
if(dirNameFormatterStr == null || "null".equals(dirNameFormatterStr)){
dirNameFormatter = null;
} else {
dirNameFormatter = DateTimeFormat.forPattern(dirNameFormatterStr);
} } @Override
public File nextFile() {
LocalDateTime now = LocalDateTime.now();
StringBuilder sb = new StringBuilder();
String date = formatter.print(now);
if (!date.equals(lastRoll)) {
getFileIndex().set(0);
lastRoll = date;
}
sb.append(getPrefix()).append(date).append("-");
sb.append(getFileIndex().incrementAndGet());
if (getExtension().length() > 0) {
sb.append(".").append(getExtension());
} File dir = dirNameFormatter != null ? new File(getBaseDirectory(), dirNameFormatter.print(now)) :
getBaseDirectory(); try {
FileUtils.forceMkdir(dir);
currentFile = new File(dir, sb.toString());
} catch (IOException e) {
currentFile = new File(getBaseDirectory(), sb.toString());
logger.error(e.toString(), e);
} return currentFile;
} public static class Builder implements PathManager.Builder {
@Override
public PathManager build(Context context) {
return new SimplePathManager(context);
}
} }

Apache Flume 1.7.0 自定义输入输出的更多相关文章

  1. Apache Flume 1.7.0 发布,日志服务器

    Apache Flume 1.7.0 发布了,Flume 是一个分布式.可靠和高可用的服务,用于收集.聚合以及移动大量日志数据,使用一个简单灵活的架构,就流数据模型.这是一个可靠.容错的服务. 本次更 ...

  2. Apache Flume 1.7.0 源码编译 导入Eclipse

    前言 最近看了看Apache Flume,在虚拟机里跑了一下flume + kafka + storm + mysql架构的demo,功能很简单,主要是用flume收集数据源(http上报信息),放入 ...

  3. Apache Flume 1.6.0 发布,日志服务器

    Apache Flume 1.6.0 发布,此版本现已提供下载: http://flume.apache.org/download.html 更新日志和文档: http://flume.apache. ...

  4. Apache Flume 1.7.0 各个模块简介

    Flume简介 Apache Flume是一个分布式.可靠.高可用的日志收集系统,支持各种各样的数据来源,如http,log文件,jms,监听端口数据等等,能将这些数据源的海量日志数据进行高效收集.聚 ...

  5. Flume 1.5.0简单部署试用

    ================================================================================ 一.Flume简介 ========= ...

  6. Flume官方文档翻译——Flume 1.7.0 User Guide (unreleased version)中一些知识点

    Flume官方文档翻译--Flume 1.7.0 User Guide (unreleased version)(一) Flume官方文档翻译--Flume 1.7.0 User Guide (unr ...

  7. Flume官方文档翻译——Flume 1.7.0 User Guide (unreleased version)(二)

    Flume官方文档翻译--Flume 1.7.0 User Guide (unreleased version)(一) Logging raw data(记录原始数据) Logging the raw ...

  8. Flume官方文档翻译——Flume 1.7.0 User Guide (unreleased version)(一)

    Flume 1.7.0 User Guide Introduction(简介) Overview(综述) System Requirements(系统需求) Architecture(架构) Data ...

  9. Apache Spark 2.2.0 中文文档 - Spark Streaming 编程指南 | ApacheCN

    Spark Streaming 编程指南 概述 一个入门示例 基础概念 依赖 初始化 StreamingContext Discretized Streams (DStreams)(离散化流) Inp ...

随机推荐

  1. 使用signalr实现网页和微信公众号实时聊天(上)

    最近项目中需要实现客户在公众号中和客服(客服使用后台网站系统)进行实时聊天的功能.折腾了一段时间,实现了这个功能.现在将过程记录下,以便有相同需求的同行可以参考,也是自己做个总结.这篇是上,用手机编辑 ...

  2. HDU-6060 RXD and dividing

    题目链接:http://acm.hdu.edu.cn/showproblem.php?pid=6060   多校的题目,每次只能写两道SB题,剩下的要么想不到,要么想到了,代码不知道怎么实现,还是写的 ...

  3. 获取window.location.href中传的值,并且转换成json数据使用

    做个记录保存一下,以免以后再次用到忘记了. function locVal(){ var url=window.location.href; if (url.indexOf('?')==-1)retu ...

  4. vue.js报错:Module build failed: Error: No parser and no file path given, couldn't infer a parser.

    ERROR Failed to compile with 2 errors 12:00:33 error in ./src/App.vue Module build failed: Error: No ...

  5. Django路由分配及渲染

    一.url路由分配 1.url配置的本质是将浏览器传递过来的路径和参数与服务器中的视图向匹配,并返回浏览器相应的视图函数. 2.url路径path规则 path(route, view, kwargs ...

  6. Dev gridcontrol 添加多列按钮

    gridcontrol中添加按钮的步骤: 1. 把列的ColumnEdit属性设置为RepositoryItemButtonEdit 2. 把TextEditStyle属性设置为HideTextEdi ...

  7. 线程中的join方法

    join方法的作用是同步线程. 1.不使用join方法:当设置多个线程时,在一般情况下(无守护线程,setDeamon=False),多个线程同时启动,主线程执行完,会等待其他子线程执行完,程序才会退 ...

  8. pycharm clion phpstorn全家桶激活码(可以用到2019年4月)

    SXXI7H41YN-eyJsaWNlbnNlSWQiOiJTWFhJN0g0MVlOIiwibGljZW5zZWVOYW1lIjoicGF5bmUgd2FuZyIsImFzc2lnbmVlTmFtZ ...

  9. 值得推荐的C/C++框架和库 (真的很强大)〔转〕

    http://m.blog.csdn.net/article/details?id=42541419

  10. LinkeList 特有方法

    LinkedList:特有方法:addFirst();addLast();添加元素到集合,添加到头尾,getFirst();getLast();获取元素,但不删除元素.如果集合中没有元素,会出现NoS ...