storm实时计算实例(socket实时接入)
介绍
实现了一个简单的从实时日志文件监听,写入socket服务器,再接入Storm计算的一个流程。
源码
日志监听实时写入socket服务器
- package socket;
- import java.io.BufferedReader;
- import java.io.File;
- import java.io.IOException;
- import java.io.InputStreamReader;
- import java.io.PrintWriter;
- import java.io.RandomAccessFile;
- import java.net.Socket;
- import java.util.concurrent.Executors;
- import java.util.concurrent.ScheduledExecutorService;
- import java.util.concurrent.TimeUnit;
- /*
- * 监测数据,通过socket远程发送到另外服务器 ,见MyServerMulti
- * ClientRead再通过服务器从socket里读
- *
- */
- public class LogViewToSocket {
- private long lastTimeFileSize = 0; //上次文件大小
- /**
- * 实时输出日志信息
- * @param logFile 日志文件
- * @throws IOException
- */
- public String getNewFile(File file)
- {
- File[] fs=file.listFiles();
- long maxtime=0;
- String newfilename="";
- for (int i=0;i<fs.length;i++)
- {
- if (fs[i].lastModified()>maxtime)
- {
- maxtime=fs[i].lastModified();
- newfilename=fs[i].getAbsolutePath();
- }
- }
- return newfilename;
- }
- RandomAccessFile randomFile=null;
- String newfile=null;
- String thisfile=null;
- public void realtimeShowLog(final File logFile,final PrintWriter out) throws IOException{
- newfile=getNewFile(logFile);
- //指定文件可读可写
- randomFile = new RandomAccessFile(new File(newfile),"r");
- //启动一个线程每1秒钟读取新增的日志信息
- ScheduledExecutorService exec =
- Executors.newScheduledThreadPool(1);
- exec.scheduleWithFixedDelay(new Runnable(){
- public void run() {
- try {
- //获得变化部分的
- randomFile.seek(lastTimeFileSize);
- String tmp = "";
- while( (tmp = randomFile.readLine())!= null) {
- System.out.println(new String(tmp.getBytes("ISO8859-1")));
- out.println(new String(tmp.getBytes("ISO8859-1")));
- out.flush();
- }
- thisfile=getNewFile(logFile);
- if(!thisfile.equals(newfile))
- {
- randomFile = new RandomAccessFile(new File(newfile),"r");
- lastTimeFileSize=0;
- }
- else
- lastTimeFileSize = randomFile.length();
- } catch (IOException e) {
- throw new RuntimeException(e);
- }
- }
- }, 0, 1, TimeUnit.SECONDS);
- }
- public static void main(String[] args) throws Exception {
- LogViewToSocket view = new LogViewToSocket();
- Socket socket=new Socket("192.168.27.100",5678);
- PrintWriter out=new PrintWriter(socket.getOutputStream());
- final File tmpLogFile = new File("/home/hadoop/test");
- view.realtimeShowLog(tmpLogFile,out);
- // socket.close();
- }
- }
socket服务器处理
- import java.io.BufferedReader;
- import java.io.IOException;
- import java.io.InputStreamReader;
- import java.io.PrintWriter;
- import java.net.ServerSocket;
- import java.net.Socket;
- import java.net.SocketAddress;
- import java.util.*;
- public class MyServerMulti {
- private static Socket socket1;
- public static void main(String[] args) throws IOException {
- ServerSocket server = new ServerSocket(5678);
- int i=0;
- ArrayList<PrintWriter> outs=new ArrayList<PrintWriter>();
- /*
- * 一个client socket发送数据过来, server端再发到其他client socket端
- *
- */
- Socket socket1=null;
- while (true) {
- Socket socket = server.accept();
- i++;
- System.out.println(i);
- System.out.println(socket.getInetAddress());
- PrintWriter out= new PrintWriter(socket.getOutputStream());
- outs.add(out);
- if(i==1)
- socket1=socket;
- if(i==2)
- invoke(socket1,outs);
- }
- }
- private static void invoke(final Socket client, final ArrayList<PrintWriter> outs) throws IOException {
- new Thread(new Runnable() {
- public void run() {
- BufferedReader in = null;
- PrintWriter out = null;
- PrintWriter out1 = null;
- try {
- in = new BufferedReader(new InputStreamReader(client.getInputStream()));
- out = new PrintWriter(client.getOutputStream());
- while (true) {
- String msg = in.readLine();
- System.out.println(msg);
- out.println("Server received " + msg);
- out.flush();
- /*数据转发送到多个client*/
- for(int i=0;i<outs.size();i++)
- {
- out1=outs.get(i);
- System.out.println(i);
- System.out.println("send msg:"+msg);
- out1.println(msg);
- out1.flush();
- }
- System.out.println(client.getInetAddress());
- if (msg.equals("bye")) {
- break;
- }
- }
- } catch(IOException ex) {
- ex.printStackTrace();
- } finally {
- try {
- in.close();
- } catch (Exception e) {}
- try {
- out.close();
- } catch (Exception e) {}
- try {
- client.close();
- } catch (Exception e) {}
- }
- }
- }).start();
- }
- }
storm topology
- import java.io.BufferedReader;
- import java.io.BufferedWriter;
- import java.io.File;
- import java.io.FileNotFoundException;
- import java.io.FileOutputStream;
- import java.io.FileReader;
- import java.io.FileWriter;
- import java.io.IOException;
- import java.io.InputStreamReader;
- import java.io.OutputStreamWriter;
- import java.io.PrintWriter;
- import java.io.RandomAccessFile;
- import java.net.Socket;
- import java.net.UnknownHostException;
- import java.util.Map;
- //import mytest.ThroughputTest.GenSpout;
- import backtype.storm.Config;
- import backtype.storm.LocalCluster;
- import backtype.storm.StormSubmitter;
- import backtype.storm.generated.AlreadyAliveException;
- import backtype.storm.generated.InvalidTopologyException;
- import backtype.storm.spout.SpoutOutputCollector;
- import backtype.storm.task.OutputCollector;
- import backtype.storm.task.TopologyContext;
- import backtype.storm.topology.BasicOutputCollector;
- import backtype.storm.topology.OutputFieldsDeclarer;
- import backtype.storm.topology.TopologyBuilder;
- import backtype.storm.topology.base.BaseBasicBolt;
- import backtype.storm.topology.base.BaseRichBolt;
- import backtype.storm.topology.base.BaseRichSpout;
- import backtype.storm.tuple.Fields;
- import backtype.storm.tuple.Tuple;
- import backtype.storm.tuple.Values;
- import backtype.storm.utils.Utils;
- /*
- *
- *
- * storm jar stormtest.jar socket.SocketProcess /home/hadoop/out_socket.txt true
- *
- */
- public class SocketProcess {
- public static class SocketSpout extends BaseRichSpout {
- /**
- */
- static Socket sock=null;
- static BufferedReader in=null;
- String str=null;
- private static final long serialVersionUID = 1L;
- private SpoutOutputCollector _collector;
- private BufferedReader br;
- private String dataFile;
- private BufferedWriter bw2;
- RandomAccessFile randomFile;
- private long lastTimeFileSize = 0;
- int cnt=0;
- //定义spout文件
- SocketSpout(){
- }
- //定义如何读取spout文件
- @Override
- public void open(Map conf, TopologyContext context,
- SpoutOutputCollector collector) {
- // TODO Auto-generated method stub
- _collector = collector;
- try {
- sock=new Socket("192.168.27.100",5678);
- in=
- new BufferedReader(new InputStreamReader(sock.getInputStream()));
- } catch (UnknownHostException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- } catch (IOException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }
- }
- //获取下一个tuple的方法
- @Override
- public void nextTuple() {
- // TODO Auto-generated method stub
- if(sock==null){
- try {
- sock=new Socket("192.168.27.100",5678);
- in=
- new BufferedReader(new InputStreamReader(sock.getInputStream()));
- } catch (UnknownHostException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- } catch (IOException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }
- }
- while(true){
- try {
- str = in.readLine();
- } catch (IOException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }
- System.out.println(str);
- _collector.emit(new Values(str));
- if(str.equals("end")){
- break;
- }
- }
- }
- @Override
- public void declareOutputFields(OutputFieldsDeclarer declarer) {
- // TODO Auto-generated method stub
- declarer.declare(new Fields("line"));
- }
- }
- public static class Process extends BaseRichBolt{
- private String _seperator;
- private String _outFile;
- PrintWriter pw;
- private OutputCollector _collector;
- private BufferedWriter bw;
- public Process(String outFile) {
- this._outFile = outFile;
- }
- //把输出结果保存到外部文件里面。
- @Override
- public void prepare(Map stormConf, TopologyContext context,
- OutputCollector collector) {
- // TODO Auto-generated method stub
- this._collector = collector;
- File out = new File(_outFile);
- try {
- // br = new BufferedWriter(new FileWriter(out));
- bw = new BufferedWriter(new OutputStreamWriter(
- new FileOutputStream(out, true)));
- } catch (IOException e1) {
- // TODO Auto-generated catch block
- e1.printStackTrace();
- }
- }
- //blot计算单元,把tuple中的数据添加一个bkeep和回车。然后保存到outfile指定的文件中。
- @Override
- public void execute(Tuple input) {
- // TODO Auto-generated method stub
- String line = input.getString(0);
- // System.out.println(line);
- // String[] str = line.split(_seperator);
- // System.out.println(str[2]);
- try {
- bw.write(line+",bkeep"+"\n");
- bw.flush();
- } catch (IOException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }
- _collector.emit(new Values(line));
- }
- @Override
- public void declareOutputFields(OutputFieldsDeclarer declarer) {
- // TODO Auto-generated method stub
- declarer.declare(new Fields("line"));
- }
- }
- public static void main(String[] argv) throws AlreadyAliveException, InvalidTopologyException{
- String outFile = argv[0]; //输出文件
- boolean distribute = Boolean.valueOf(argv[1]); //本地模式还是集群模式
- TopologyBuilder builder = new TopologyBuilder(); //build一个topology
- builder.setSpout("spout", new SocketSpout(), 1); //指定spout
- builder.setBolt("bolt", new Process(outFile),1).shuffleGrouping("spout"); //指定bolt,包括bolt、process和grouping
- Config conf = new Config();
- if(distribute){
- StormSubmitter.submitTopology("SocketProcess", conf, builder.createTopology());
- }else{
- LocalCluster cluster = new LocalCluster();
- cluster.submitTopology("SocketProcess", conf, builder.createTopology());
- }
- }
- }
- storm jar stormtest.jar socket.SocketProcess /home/hadoop/out_socket.txt true
spout接受从socket服务器实时发送过来的数据,经过topology处理,最终将数据写入out_socket.txt文件
storm实时计算实例(socket实时接入)的更多相关文章
- 【Streaming】30分钟概览Spark Streaming 实时计算
本文主要介绍四个问题: 什么是Spark Streaming实时计算? Spark实时计算原理流程是什么? Spark 2.X下一代实时计算框架Structured Streaming Spark S ...
- Storm实时计算:流操作入门编程实践
转自:http://shiyanjun.cn/archives/977.html Storm实时计算:流操作入门编程实践 Storm是一个分布式是实时计算系统,它设计了一种对流和计算的抽象,概念比 ...
- 《storm实战-构建大数据实时计算读书笔记》
自己的思考: 1.接收任务到任务的分发和协调 nimbus.supervisor.zookeeper 2.高容错性 各个组件都是无状态的,状态 ...
- Storm 实战:构建大数据实时计算
Storm 实战:构建大数据实时计算(阿里巴巴集团技术丛书,大数据丛书.大型互联网公司大数据实时处理干货分享!来自淘宝一线技术团队的丰富实践,快速掌握Storm技术精髓!) 阿里巴巴集团数据平台事业部 ...
- storm消费kafka实现实时计算
大致架构 * 每个应用实例部署一个日志agent * agent实时将日志发送到kafka * storm实时计算日志 * storm计算结果保存到hbase storm消费kafka 创建实时计算项 ...
- Storm大数据实时计算
大数据也是构建各类系统的时候一种全新的思维,以及架构理念,比如Storm,Hive,Spark,ZooKeeper,HBase,Elasticsearch,等等 storm,在做热数据这块,如果要做复 ...
- storm中DAU实时计算方案
所就职的公司是一家互联网视频公司,存在大量的实时计算需求,计算uv,pv等一些经典的实时指标统计.由于要统计当天的实时 UV,当天的uv由于要存储当天的所有的key,面临本地内存不够用的问题,异常重启 ...
- 实时计算storm流程架构总结
hadoop一般用在离线的分析计算中,而storm区别于hadoop,用在实时的流式计算中,被广泛用来进行实时日志处理.实时统计.实时风控等场景,当然也可以用在对数据进行实时初步的加工,存储到分布式数 ...
- 大数据笔记(二十二)——大数据实时计算框架Storm
一. 1.对比:离线计算和实时计算 离线计算:MapReduce,批量处理(Sqoop-->HDFS--> MR ---> HDFS) 实时计算:Storm和Spark Sparki ...
随机推荐
- CSS实现多重边框和内凹圆角
CSS实现多重边框 <!DOCTYPE html> <html lang="zh-cn"> <head> <meta charset=&q ...
- 【Unity3D学习笔记】解决放大后场景消失不显示问题
不知道为啥,我的Unity场景放大到一定大小后,就会消失... 解决方案: 选中一个GameObject,然后按F键. F键作用是聚焦,视图将移动,以选中对象为中心.
- 线程队列queue
队列queue 队列用于线程之间安全的信息交换 队列和列表的区别:队列里的信息get()后就没了,而列表获取数据则是copy,原列表里的值还在 使用前先实例化队列 q = queue.Queue(ma ...
- U深度U盘启动盘制作工具怎么用?U深度U盘启动盘制作工具使用教学
U深度u盘启动盘制作工具是一款强大的启动盘制作软件,对于新手用户来说,由于软件专业度很高,想一下就上手是比较困难的.所以这里给大家分享一篇U深度u盘启动盘制作工具的使用教程. 使用教程: 第一步:安装 ...
- js 流程控制语句
1.复合语句 2.switch语句 3.do...while语句 4.while语句 5.for语句 6.for...in语句 7.break和continue语句 9.with语句 10.if语句 ...
- 数据结构与算法分析java——线性表1
说到线性结构的话,我们可以根据其实现方式分为三类: 1)顺序结构的线性表 2)链式结构的线性表 3)栈和队列的线性表 应用程序后在那个的数据大致有四种基本的逻辑结构: 集合:数据元素之间只有&qu ...
- Android(java)学习笔记60:继承中父类 没有无参构造
1. 继承中父类 没有无参构造: package com.himi.test1; /* 如果父类没有无参构造方法,那么子类的构造方法会出现什么现象呢? 报错. 如何解决呢? A:在父类中加一个无参构造 ...
- Gym 100090D Insomnia
从 n 变到 1,有多少种方案? 打表记忆化. #include <bits/stdc++.h> using namespace std; int n; ]; int dfs(int n) ...
- flex布局-常用布局
在使用flex布局,老是需要去查资料,很多常用的,知道大概,可还是需要去过一遍,这里记录一下几个常用的flex布局 一个div,内容垂直居中 html <div className='topHe ...
- Centos 5.5 编译安装mysql 5.5.9
下载mysql wget http://mysql.mirrors.pair.com/Downloads/MySQL-5.5/mysql-5.5.9.tar.gz 创建mysql用户 [root@x ...