1.项目框架

  

======================程序需要一步一步的调试=====================

一:第一步,KafkaSpout与驱动类

1.此时启动的服务有

  

2.主驱动类

 package com.jun.it2;

 import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.StormSubmitter;
import backtype.storm.generated.AlreadyAliveException;
import backtype.storm.generated.InvalidTopologyException;
import backtype.storm.generated.StormTopology;
import backtype.storm.spout.SchemeAsMultiScheme;
import backtype.storm.topology.IRichSpout;
import backtype.storm.topology.TopologyBuilder;
import storm.kafka.*; import java.util.UUID; public class WebLogStatictis {
/**
* 主函数
* @param args
*/
public static void main(String[] args) {
WebLogStatictis webLogStatictis=new WebLogStatictis();
StormTopology stormTopology=webLogStatictis.createTopology();
Config config=new Config();
//集群或者本地
//conf.setNumAckers(4);
if(args == null || args.length == 0){
// 本地执行
LocalCluster localCluster = new LocalCluster();
localCluster.submitTopology("webloganalyse", config , stormTopology);
}else{
// 提交到集群上执行
config.setNumWorkers(4); // 指定使用多少个进程来执行该Topology
try {
StormSubmitter.submitTopology(args[0],config, stormTopology);
} catch (AlreadyAliveException e) {
e.printStackTrace();
} catch (InvalidTopologyException e) {
e.printStackTrace();
}
} }
/**
* 构造一个kafkaspout
* @return
*/
private IRichSpout generateSpout(){
BrokerHosts hosts = new ZkHosts("linux-hadoop01.ibeifeng.com:2181");
String topic = "nginxlog";
String zkRoot = "/" + topic;
String id = UUID.randomUUID().toString();
SpoutConfig spoutConf = new SpoutConfig(hosts,topic,zkRoot,id);
spoutConf.scheme = new SchemeAsMultiScheme(new StringScheme()); // 按字符串解析
spoutConf.forceFromStart = true;
KafkaSpout kafkaSpout = new KafkaSpout(spoutConf);
return kafkaSpout;
} public StormTopology createTopology() {
TopologyBuilder topologyBuilder=new TopologyBuilder();
//指定Spout
topologyBuilder.setSpout(WebLogConstants.KAFKA_SPOUT_ID,generateSpout());
//
topologyBuilder.setBolt(WebLogConstants.WEB_LOG_PARSER_BOLT,new WebLogParserBolt()).shuffleGrouping(WebLogConstants.KAFKA_SPOUT_ID); return topologyBuilder.createTopology();
} }

3.WebLogParserBolt

  这个主要的是打印Kafka的Spout发送的数据是否正确。

 package com.jun.it2;

 import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.IRichBolt;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.tuple.Tuple; import java.util.Map; public class WebLogParserBolt implements IRichBolt {
@Override
public void prepare(Map map, TopologyContext topologyContext, OutputCollector outputCollector) { } @Override
public void execute(Tuple tuple) {
String webLog=tuple.getStringByField("str");
System.out.println(webLog);
} @Override
public void cleanup() { } @Override
public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) { } @Override
public Map<String, Object> getComponentConfiguration() {
return null;
}
}

4.运行Main

  先消费在Topic中的数据。

  

5.运行kafka的生产者

   bin/kafka-console-producer.sh --topic nginxlog --broker-list linux-hadoop01.ibeifeng.com:9092

  

6.拷贝数据到kafka生产者控制台

  

7.Main下面控制台的程序

  

二:第二步,解析Log

1.WebLogParserBolt

  如果要是验证,就删除两个部分,打开一个注释:

    删掉分流

    删掉发射

    打开打印的注释。

2.效果

  这个只要启动Main函数就可以验证。

  

3.WebLogParserBolt

 package com.jun.it2;

 import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.IRichBolt;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values; import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern; import static com.jun.it2.WebLogConstants.*; public class WebLogParserBolt implements IRichBolt {
private Pattern pattern; private OutputCollector outputCollector;
@Override
public void prepare(Map map, TopologyContext topologyContext, OutputCollector outputCollector) {
pattern = Pattern.compile("([^ ]*) [^ ]* [^ ]* \\[([\\d+]*)\\] \\\"[^ ]* ([^ ]*) [^ ]*\\\" \\d{3} \\d+ \\\"([^\"]*)\\\" \\\"([^\"]*)\\\" \\\"[^ ]*\\\"");
this.outputCollector = outputCollector;
} @Override
public void execute(Tuple tuple) {
String webLog=tuple.getStringByField("str");
if(webLog!= null || !"".equals(webLog)){ Matcher matcher = pattern.matcher(webLog);
if(matcher.find()){
//
String ip = matcher.group(1);
String serverTimeStr = matcher.group(2); // 处理时间
long timestamp = Long.parseLong(serverTimeStr);
Date date = new Date();
date.setTime(timestamp); DateFormat df = new SimpleDateFormat("yyyyMMddHHmm");
String dateStr = df.format(date);
String day = dateStr.substring(0,8);
String hour = dateStr.substring(0,10);
String minute = dateStr ; String requestUrl = matcher.group(3);
String httpRefer = matcher.group(4);
String userAgent = matcher.group(5); //可以验证是否匹配正确
// System.err.println(webLog);
// System.err.println(
// "ip=" + ip
// + ", serverTimeStr=" + serverTimeStr
// +", requestUrl=" + requestUrl
// +", httpRefer=" + httpRefer
// +", userAgent=" + userAgent
// ); //分流
this.outputCollector.emit(WebLogConstants.IP_COUNT_STREAM, tuple,new Values(day, hour, minute, ip));
this.outputCollector.emit(WebLogConstants.URL_PARSER_STREAM, tuple,new Values(day, hour, minute, requestUrl));
this.outputCollector.emit(WebLogConstants.HTTPREFER_PARSER_STREAM, tuple,new Values(day, hour, minute, httpRefer));
this.outputCollector.emit(WebLogConstants.USERAGENT_PARSER_STREAM, tuple,new Values(day, hour, minute, userAgent));
}
}
this.outputCollector.ack(tuple); } @Override
public void cleanup() { } @Override
public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
outputFieldsDeclarer.declareStream(WebLogConstants.IP_COUNT_STREAM,new Fields(DAY, HOUR, MINUTE, IP));
outputFieldsDeclarer.declareStream(WebLogConstants.URL_PARSER_STREAM,new Fields(DAY, HOUR, MINUTE, REQUEST_URL));
outputFieldsDeclarer.declareStream(WebLogConstants.HTTPREFER_PARSER_STREAM,new Fields(DAY, HOUR, MINUTE, HTTP_REFER));
outputFieldsDeclarer.declareStream(WebLogConstants.USERAGENT_PARSER_STREAM,new Fields(DAY, HOUR, MINUTE, USERAGENT));
} @Override
public Map<String, Object> getComponentConfiguration() {
return null;
}
}

三:第三步,通用计数器

1.CountKpiBolt

 package com.jun.it2;

 import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.IRichBolt;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values; import java.util.HashMap;
import java.util.Iterator;
import java.util.Map; public class CountKpiBolt implements IRichBolt { private String kpiType; private Map<String,Integer> kpiCounts; private String currentDay = ""; private OutputCollector outputCollector; public CountKpiBolt(String kpiType){
this.kpiType = kpiType;
} @Override
public void prepare(Map map, TopologyContext topologyContext, OutputCollector outputCollector) {
this.kpiCounts = new HashMap<>();
this.outputCollector = outputCollector;
} @Override
public void execute(Tuple tuple) {
String day = tuple.getStringByField("day");
String hour = tuple.getStringByField("hour");
String minute = tuple.getStringByField("minute");
String kpi = tuple.getString(3);
//日期与KPI组合
String kpiByDay = day + "_" + kpi;
String kpiByHour = hour +"_" + kpi;
String kpiByMinute = minute + "_" + kpi;
//将计数信息存放到Map中
int kpiCountByDay = 0;
int kpiCountByHour = 0;
int kpiCountByMinute = 0;
if(kpiCounts.containsKey(kpiByDay)){
kpiCountByDay = kpiCounts.get(kpiByDay);
}
if(kpiCounts.containsKey(kpiByHour)){
kpiCountByHour = kpiCounts.get(kpiByHour);
}
if(kpiCounts.containsKey(kpiByMinute)){
kpiCountByMinute = kpiCounts.get(kpiByMinute);
}
kpiCountByDay ++;
kpiCountByHour ++;
kpiCountByMinute ++;
kpiCounts.put(kpiByDay, kpiCountByDay);
kpiCounts.put(kpiByHour, kpiCountByHour);
kpiCounts.put(kpiByMinute,kpiCountByMinute);
//隔天清空内存
if(!currentDay.equals(day)){
// 说明隔天了
Iterator<Map.Entry<String,Integer>> iter = kpiCounts.entrySet().iterator();
while(iter.hasNext()){
Map.Entry<String,Integer> entry = iter.next();
if(entry.getKey().startsWith(currentDay)){
iter.remove();
}
}
}
currentDay = day;
//发射
//发射两个字段
this.outputCollector.emit(tuple, new Values(kpiType+"_" + kpiByDay, kpiCountByDay));
this.outputCollector.emit(tuple, new Values(kpiType+"_" + kpiByHour, kpiCountByHour));
this.outputCollector.emit(tuple, new Values(kpiType+"_" + kpiByMinute, kpiCountByMinute));
this.outputCollector.ack(tuple); } @Override
public void cleanup() { } @Override
public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
outputFieldsDeclarer.declare(new Fields(WebLogConstants.SERVERTIME_KPI, WebLogConstants.KPI_COUNTS));
} @Override
public Map<String, Object> getComponentConfiguration() {
return null;
}
}

2.saveBolt.java

  主要是打印功能。

 package com.jun.it2;

 import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.IRichBolt;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.tuple.Tuple; import java.util.Map; public class SaveBolt implements IRichBolt { @Override
public void prepare(Map map, TopologyContext topologyContext, OutputCollector outputCollector) { } @Override
public void execute(Tuple tuple) {
String serverTimeAndKpi = tuple.getStringByField(WebLogConstants.SERVERTIME_KPI);
Integer kpiCounts = tuple.getIntegerByField(WebLogConstants.KPI_COUNTS);
System.err.println("serverTimeAndKpi=" + serverTimeAndKpi + ", kpiCounts=" + kpiCounts);
} @Override
public void cleanup() { } @Override
public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) { } @Override
public Map<String, Object> getComponentConfiguration() {
return null;
}
}

3.效果

  

四:保存到HBase中

1.saveBolt.java

 package com.jun.it2;

 import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.IRichBolt;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.tuple.Tuple;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.util.Bytes; import java.io.IOException;
import java.util.Map; import static com.jun.it2.WebLogConstants.HBASE_TABLENAME; public class SaveBolt implements IRichBolt {
private HTable table; private OutputCollector outputCollector;
@Override
public void prepare(Map map, TopologyContext topologyContext, OutputCollector outputCollector) {
Configuration configuration = HBaseConfiguration.create();
try {
table = new HTable(configuration,HBASE_TABLENAME);
} catch (IOException e) {
e.printStackTrace();
throw new RuntimeException(e);
} this.outputCollector = outputCollector;
} @Override
public void execute(Tuple tuple) {
String serverTimeAndKpi = tuple.getStringByField(WebLogConstants.SERVERTIME_KPI);
Integer kpiCounts = tuple.getIntegerByField(WebLogConstants.KPI_COUNTS);
// System.err.println("serverTimeAndKpi=" + serverTimeAndKpi + ", kpiCounts=" + kpiCounts);
if(serverTimeAndKpi!= null && kpiCounts != null){ Put put = new Put(Bytes.toBytes(serverTimeAndKpi));
String columnQuelifier = serverTimeAndKpi.split("_")[0];
put.add(Bytes.toBytes(WebLogConstants.COLUMN_FAMILY),
Bytes.toBytes(columnQuelifier),Bytes.toBytes(""+kpiCounts)); try {
table.put(put);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
this.outputCollector.ack(tuple);
} @Override
public void cleanup() {
if(table!= null){
try {
table.close();
} catch (IOException e) {
e.printStackTrace();
}
}
} @Override
public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) { } @Override
public Map<String, Object> getComponentConfiguration() {
return null;
}
}

2.当前服务

  

3.进入Hbase建表

  

4.运行程序

  出现报错信息

 ERROR org.apache.hadoop.util.Shell - Failed to locate the winutils binary in the hadoop binary path
java.io.IOException: Could not locate executable null\bin\winutils.exe in the Hadoop binaries.
at org.apache.hadoop.util.Shell.getQualifiedBinPath(Shell.java:355) [hadoop-common-2.5.0-cdh5.3.6.jar:na]
at org.apache.hadoop.util.Shell.getWinUtilsPath(Shell.java:370) [hadoop-common-2.5.0-cdh5.3.6.jar:na]
at org.apache.hadoop.util.Shell.<clinit>(Shell.java:363) [hadoop-common-2.5.0-cdh5.3.6.jar:na]
at org.apache.hadoop.util.StringUtils.<clinit>(StringUtils.java:79) [hadoop-common-2.5.0-cdh5.3.6.jar:na]
at org.apache.hadoop.security.Groups.parseStaticMapping(Groups.java:104) [hadoop-common-2.5.0-cdh5.3.6.jar:na]
at org.apache.hadoop.security.Groups.<init>(Groups.java:86) [hadoop-common-2.5.0-cdh5.3.6.jar:na]
at org.apache.hadoop.security.Groups.<init>(Groups.java:66) [hadoop-common-2.5.0-cdh5.3.6.jar:na]
at org.apache.hadoop.security.Groups.getUserToGroupsMappingService(Groups.java:280) [hadoop-common-2.5.0-cdh5.3.6.jar:na]
at org.apache.hadoop.security.UserGroupInformation.initialize(UserGroupInformation.java:269) [hadoop-common-2.5.0-cdh5.3.6.jar:na]
at org.apache.hadoop.security.UserGroupInformation.ensureInitialized(UserGroupInformation.java:246) [hadoop-common-2.5.0-cdh5.3.6.jar:na]
at org.apache.hadoop.security.UserGroupInformation.loginUserFromSubject(UserGroupInformation.java:775) [hadoop-common-2.5.0-cdh5.3.6.jar:na]
at org.apache.hadoop.security.UserGroupInformation.getLoginUser(UserGroupInformation.java:760) [hadoop-common-2.5.0-cdh5.3.6.jar:na]
at org.apache.hadoop.security.UserGroupInformation.getCurrentUser(UserGroupInformation.java:633) [hadoop-common-2.5.0-cdh5.3.6.jar:na]
at org.apache.hadoop.hbase.security.User$SecureHadoopUser.<init>(User.java:260) [hbase-common-0.98.6-cdh5.3.6.jar:na]
at org.apache.hadoop.hbase.security.User$SecureHadoopUser.<init>(User.java:256) [hbase-common-0.98.6-cdh5.3.6.jar:na]
at org.apache.hadoop.hbase.security.User.getCurrent(User.java:160) [hbase-common-0.98.6-cdh5.3.6.jar:na]
at org.apache.hadoop.hbase.security.UserProvider.getCurrent(UserProvider.java:89) [hbase-common-0.98.6-cdh5.3.6.jar:na]
at org.apache.hadoop.hbase.client.HConnectionKey.<init>(HConnectionKey.java:70) [hbase-client-0.98.6-cdh5.3.6.jar:na]
at org.apache.hadoop.hbase.client.HConnectionManager.getConnection(HConnectionManager.java:267) [hbase-client-0.98.6-cdh5.3.6.jar:na]
at org.apache.hadoop.hbase.client.HTable.<init>(HTable.java:199) [hbase-client-0.98.6-cdh5.3.6.jar:na]
at org.apache.hadoop.hbase.client.HTable.<init>(HTable.java:161) [hbase-client-0.98.6-cdh5.3.6.jar:na]
at com.jun.it2.SaveBolt.prepare(SaveBolt.java:27) [classes/:na]
at backtype.storm.daemon.executor$fn__3439$fn__3451.invoke(executor.clj:699) [storm-core-0.9.6.jar:0.9.6]
at backtype.storm.util$async_loop$fn__460.invoke(util.clj:461) [storm-core-0.9.6.jar:0.9.6]
at clojure.lang.AFn.run(AFn.java:24) [clojure-1.5.1.jar:na]
at java.lang.Thread.run(Thread.java:748) [na:1.8.0_144]

5.网上的解决方

  1.下载winutils的windows版本

    GitHub上,有人提供了winutils的windows的版本,项目地址是:https://github.com/srccodes/hadoop-common-2.2.0-bin,直接下载此项目的zip包,下载后是文件名是hadoop-common-2.2.0-bin-master.zip,随便解压到一个目录

  2.配置环境变量

    增加用户变量HADOOP_HOME,值是下载的zip包解压的目录,然后在系统变量path里增加$HADOOP_HOME\bin 即可。  

    再次运行程序,正常执行。

6.截图

  

7.添加配置文件

  这个是必须的,在window下面。

  

8.最终执行效果

  

五:PS---程序

1.主驱动类

 package com.jun.it2;

 import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.StormSubmitter;
import backtype.storm.generated.AlreadyAliveException;
import backtype.storm.generated.InvalidTopologyException;
import backtype.storm.generated.StormTopology;
import backtype.storm.spout.SchemeAsMultiScheme;
import backtype.storm.topology.IRichSpout;
import backtype.storm.topology.TopologyBuilder;
import backtype.storm.tuple.Fields;
import org.apache.hadoop.fs.Path;
import storm.kafka.*; import java.io.File;
import java.io.IOException;
import java.util.UUID; public class WebLogStatictis { /**
* 主函数
* @param args
*/
public static void main(String[] args) throws IOException {
WebLogStatictis webLogStatictis=new WebLogStatictis();
StormTopology stormTopology=webLogStatictis.createTopology();
Config config=new Config();
//集群或者本地
//conf.setNumAckers(4);
if(args == null || args.length == 0){
// 本地执行
LocalCluster localCluster = new LocalCluster();
localCluster.submitTopology("webloganalyse2", config , stormTopology);
}else{
// 提交到集群上执行
config.setNumWorkers(4); // 指定使用多少个进程来执行该Topology
try {
StormSubmitter.submitTopology(args[0],config, stormTopology);
} catch (AlreadyAliveException e) {
e.printStackTrace();
} catch (InvalidTopologyException e) {
e.printStackTrace();
}
} }
/**
* 构造一个kafkaspout
* @return
*/
private IRichSpout generateSpout(){
BrokerHosts hosts = new ZkHosts("linux-hadoop01.ibeifeng.com:2181");
String topic = "nginxlog";
String zkRoot = "/" + topic;
String id = UUID.randomUUID().toString();
SpoutConfig spoutConf = new SpoutConfig(hosts,topic,zkRoot,id);
spoutConf.scheme = new SchemeAsMultiScheme(new StringScheme()); // 按字符串解析
spoutConf.forceFromStart = true;
KafkaSpout kafkaSpout = new KafkaSpout(spoutConf);
return kafkaSpout;
} public StormTopology createTopology() {
TopologyBuilder topologyBuilder=new TopologyBuilder();
//指定Spout
topologyBuilder.setSpout(WebLogConstants.KAFKA_SPOUT_ID,generateSpout());
//指定WebLogParserBolt
topologyBuilder.setBolt(WebLogConstants.WEB_LOG_PARSER_BOLT,new WebLogParserBolt()).shuffleGrouping(WebLogConstants.KAFKA_SPOUT_ID);
//指定CountKpiBolt:第一个参数是组件,第二个参数是流ID,第三个参数是分组字段
topologyBuilder.setBolt(WebLogConstants.COUNT_IP_BOLT, new CountKpiBolt(WebLogConstants.IP_KPI))
.fieldsGrouping(WebLogConstants.WEB_LOG_PARSER_BOLT, WebLogConstants.IP_COUNT_STREAM, new Fields(WebLogConstants.IP));
//指定SaveBolt:汇总
topologyBuilder.setBolt(WebLogConstants.SAVE_BOLT ,new SaveBolt(),3)
.shuffleGrouping(WebLogConstants.COUNT_IP_BOLT)
;
return topologyBuilder.createTopology();
} }

2.常量类

 package com.jun.it2;

 public class WebLogConstants {
//Spout与Bolt的ID
public static String KAFKA_SPOUT_ID="kafkaSpoutId";
public static final String WEB_LOG_PARSER_BOLT = "webLogParserBolt";
public static final String COUNT_IP_BOLT = "countIpBolt";
public static final String COUNT_BROWSER_BOLT = "countBrowserBolt";
public static final String COUNT_OS_BOLT = "countOsBolt";
public static final String USER_AGENT_PARSER_BOLT = "userAgentParserBolt";
public static final String SAVE_BOLT = "saveBolt"; //流ID
public static final String IP_COUNT_STREAM = "ipCountStream";
public static final String URL_PARSER_STREAM = "urlParserStream";
public static final String HTTPREFER_PARSER_STREAM = "httpReferParserStream";
public static final String USERAGENT_PARSER_STREAM = "userAgentParserStream";
public static final String BROWSER_COUNT_STREAM = "browserCountStream";
public static final String OS_COUNT_STREAM = "osCountStream"; //tuple key名称
public static final String DAY = "day";
public static final String HOUR = "hour";
public static final String MINUTE = "minute";
public static final String IP = "ip";
public static final String REQUEST_URL = "requestUrl";
public static final String HTTP_REFER = "httpRefer";
public static final String USERAGENT = "userAgent";
public static final String BROWSER = "browser";
public static final String OS = "os";
public static final String SERVERTIME_KPI = "serverTimeAndKpi";
public static final String KPI_COUNTS = "kpiCounts"; //kpi类型
public static final String IP_KPI = "I";
public static final String URL_KPI = "U";
public static final String BROWSER_KPI = "B";
public static final String OS_KPI = "O"; //Hbase
public static final String HBASE_TABLENAME = "weblogstatictis";
public static final String COLUMN_FAMILY = "info";
}

3.解析类

 package com.jun.it2;

 import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.IRichBolt;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values; import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern; import static com.jun.it2.WebLogConstants.*; public class WebLogParserBolt implements IRichBolt {
private Pattern pattern; private OutputCollector outputCollector;
@Override
public void prepare(Map map, TopologyContext topologyContext, OutputCollector outputCollector) {
pattern = Pattern.compile("([^ ]*) [^ ]* [^ ]* \\[([\\d+]*)\\] \\\"[^ ]* ([^ ]*) [^ ]*\\\" \\d{3} \\d+ \\\"([^\"]*)\\\" \\\"([^\"]*)\\\" \\\"[^ ]*\\\"");
this.outputCollector = outputCollector;
} @Override
public void execute(Tuple tuple) {
String webLog=tuple.getStringByField("str");
if(webLog!= null || !"".equals(webLog)){ Matcher matcher = pattern.matcher(webLog);
if(matcher.find()){
//
String ip = matcher.group(1);
String serverTimeStr = matcher.group(2); // 处理时间
long timestamp = Long.parseLong(serverTimeStr);
Date date = new Date();
date.setTime(timestamp); DateFormat df = new SimpleDateFormat("yyyyMMddHHmm");
String dateStr = df.format(date);
String day = dateStr.substring(0,8);
String hour = dateStr.substring(0,10);
String minute = dateStr ; String requestUrl = matcher.group(3);
String httpRefer = matcher.group(4);
String userAgent = matcher.group(5); //可以验证是否匹配正确
// System.err.println(webLog);
// System.err.println(
// "ip=" + ip
// + ", serverTimeStr=" + serverTimeStr
// +", requestUrl=" + requestUrl
// +", httpRefer=" + httpRefer
// +", userAgent=" + userAgent
// ); //分流
this.outputCollector.emit(WebLogConstants.IP_COUNT_STREAM, tuple,new Values(day, hour, minute, ip));
this.outputCollector.emit(WebLogConstants.URL_PARSER_STREAM, tuple,new Values(day, hour, minute, requestUrl));
this.outputCollector.emit(WebLogConstants.HTTPREFER_PARSER_STREAM, tuple,new Values(day, hour, minute, httpRefer));
this.outputCollector.emit(WebLogConstants.USERAGENT_PARSER_STREAM, tuple,new Values(day, hour, minute, userAgent));
}
}
this.outputCollector.ack(tuple); } @Override
public void cleanup() { } @Override
public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
outputFieldsDeclarer.declareStream(WebLogConstants.IP_COUNT_STREAM,new Fields(DAY, HOUR, MINUTE, IP));
outputFieldsDeclarer.declareStream(WebLogConstants.URL_PARSER_STREAM,new Fields(DAY, HOUR, MINUTE, REQUEST_URL));
outputFieldsDeclarer.declareStream(WebLogConstants.HTTPREFER_PARSER_STREAM,new Fields(DAY, HOUR, MINUTE, HTTP_REFER));
outputFieldsDeclarer.declareStream(WebLogConstants.USERAGENT_PARSER_STREAM,new Fields(DAY, HOUR, MINUTE, USERAGENT));
} @Override
public Map<String, Object> getComponentConfiguration() {
return null;
}
}

4.计算类

 package com.jun.it2;

 import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.IRichBolt;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values; import java.util.HashMap;
import java.util.Iterator;
import java.util.Map; public class CountKpiBolt implements IRichBolt { private String kpiType; private Map<String,Integer> kpiCounts; private String currentDay = ""; private OutputCollector outputCollector; public CountKpiBolt(String kpiType){
this.kpiType = kpiType;
} @Override
public void prepare(Map map, TopologyContext topologyContext, OutputCollector outputCollector) {
this.kpiCounts = new HashMap<>();
this.outputCollector = outputCollector;
} @Override
public void execute(Tuple tuple) {
String day = tuple.getStringByField("day");
String hour = tuple.getStringByField("hour");
String minute = tuple.getStringByField("minute");
String kpi = tuple.getString(3);
//日期与KPI组合
String kpiByDay = day + "_" + kpi;
String kpiByHour = hour +"_" + kpi;
String kpiByMinute = minute + "_" + kpi;
//将计数信息存放到Map中
int kpiCountByDay = 0;
int kpiCountByHour = 0;
int kpiCountByMinute = 0;
if(kpiCounts.containsKey(kpiByDay)){
kpiCountByDay = kpiCounts.get(kpiByDay);
}
if(kpiCounts.containsKey(kpiByHour)){
kpiCountByHour = kpiCounts.get(kpiByHour);
}
if(kpiCounts.containsKey(kpiByMinute)){
kpiCountByMinute = kpiCounts.get(kpiByMinute);
}
kpiCountByDay ++;
kpiCountByHour ++;
kpiCountByMinute ++;
kpiCounts.put(kpiByDay, kpiCountByDay);
kpiCounts.put(kpiByHour, kpiCountByHour);
kpiCounts.put(kpiByMinute,kpiCountByMinute);
//隔天清空内存
if(!currentDay.equals(day)){
// 说明隔天了
Iterator<Map.Entry<String,Integer>> iter = kpiCounts.entrySet().iterator();
while(iter.hasNext()){
Map.Entry<String,Integer> entry = iter.next();
if(entry.getKey().startsWith(currentDay)){
iter.remove();
}
}
}
currentDay = day;
//发射
//发射两个字段
this.outputCollector.emit(tuple, new Values(kpiType+"_" + kpiByDay, kpiCountByDay));
this.outputCollector.emit(tuple, new Values(kpiType+"_" + kpiByHour, kpiCountByHour));
this.outputCollector.emit(tuple, new Values(kpiType+"_" + kpiByMinute, kpiCountByMinute));
this.outputCollector.ack(tuple); } @Override
public void cleanup() { } @Override
public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
outputFieldsDeclarer.declare(new Fields(WebLogConstants.SERVERTIME_KPI, WebLogConstants.KPI_COUNTS));
} @Override
public Map<String, Object> getComponentConfiguration() {
return null;
}
}

5.保存类

 package com.jun.it2;

 import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.IRichBolt;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.tuple.Tuple;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.util.Bytes; import java.io.IOException;
import java.util.Map; import static com.jun.it2.WebLogConstants.HBASE_TABLENAME; public class SaveBolt implements IRichBolt {
private HTable table; private OutputCollector outputCollector;
@Override
public void prepare(Map map, TopologyContext topologyContext, OutputCollector outputCollector) {
Configuration configuration = HBaseConfiguration.create();
try {
table = new HTable(configuration,HBASE_TABLENAME);
} catch (IOException e) {
e.printStackTrace();
throw new RuntimeException(e);
} this.outputCollector = outputCollector;
} @Override
public void execute(Tuple tuple) {
String serverTimeAndKpi = tuple.getStringByField(WebLogConstants.SERVERTIME_KPI);
Integer kpiCounts = tuple.getIntegerByField(WebLogConstants.KPI_COUNTS);
System.err.println("serverTimeAndKpi=" + serverTimeAndKpi + ", kpiCounts=" + kpiCounts);
if(serverTimeAndKpi!= null && kpiCounts != null){ Put put = new Put(Bytes.toBytes(serverTimeAndKpi));
String columnQuelifier = serverTimeAndKpi.split("_")[0];
put.add(Bytes.toBytes(WebLogConstants.COLUMN_FAMILY),
Bytes.toBytes(columnQuelifier),Bytes.toBytes(""+kpiCounts)); try {
table.put(put);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
this.outputCollector.ack(tuple);
} @Override
public void cleanup() {
if(table!= null){
try {
table.close();
} catch (IOException e) {
e.printStackTrace();
}
}
} @Override
public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) { } @Override
public Map<String, Object> getComponentConfiguration() {
return null;
}
}

  

  

Storm流处理项目案例的更多相关文章

  1. Storm流计算之项目篇(Storm+Kafka+HBase+Highcharts+JQuery,含3个完整实际项目)

    1.1.课程的背景 Storm是什么? 为什么学习Storm? Storm是Twitter开源的分布式实时大数据处理框架,被业界称为实时版Hadoop. 随着越来越多的场景对Hadoop的MapRed ...

  2. Storm自带测试案例的运行

    之前Storm安装之后,也知道了Storm的一些相关概念,那么怎么样才可以运行一个例子对Storm流式计算有一个感性的认识呢,那么下面来运行一个Storm安装目录自带的测试案例,我们的Storm安装在 ...

  3. 常见的七种Hadoop和Spark项目案例

    常见的七种Hadoop和Spark项目案例 有一句古老的格言是这样说的,如果你向某人提供你的全部支持和金融支持去做一些不同的和创新的事情,他们最终却会做别人正在做的事情.如比较火爆的Hadoop.Sp ...

  4. Hadoop学习笔记—20.网站日志分析项目案例(一)项目介绍

    网站日志分析项目案例(一)项目介绍:当前页面 网站日志分析项目案例(二)数据清洗:http://www.cnblogs.com/edisonchou/p/4458219.html 网站日志分析项目案例 ...

  5. Storm流计算从入门到精通之技术篇(高并发策略、批处理事务、Trident精解、运维监控、企业场景)

    1.Storm全面.系统.深入讲解,采用最新的稳定版本Storm 0.9.0.1 :   2.注重实践,对较抽象难懂的技术点如Grouping策略.并发度及线程安全.批处理事务.DRPC.Storm ...

  6. Hadoop学习笔记—20.网站日志分析项目案例(二)数据清洗

    网站日志分析项目案例(一)项目介绍:http://www.cnblogs.com/edisonchou/p/4449082.html 网站日志分析项目案例(二)数据清洗:当前页面 网站日志分析项目案例 ...

  7. Hadoop学习笔记—20.网站日志分析项目案例(三)统计分析

    网站日志分析项目案例(一)项目介绍:http://www.cnblogs.com/edisonchou/p/4449082.html 网站日志分析项目案例(二)数据清洗:http://www.cnbl ...

  8. Selenium自动化测试项目案例实践公开课

    Selenium自动化测试项目案例实践公开课: http://gdtesting.cn/news.php?id=55

  9. Java学生管理系统项目案例

    这是一个不错的Java学生管理系统项目案例,希望能够帮到大家的学习吧. 分代码如下 package com.student.util; import java.sql.Connection; impo ...

随机推荐

  1. js 拖拽 碰撞 + 重力 运动

    <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/ ...

  2. VMware workstation 网络选择 NAT模式 访问外网

    多年不用本地做测试  尽然被 nat 模式給卡着了  :动手的还是所以要记录一下: 1.根据自己需求创建 虚拟机 之后: 配置[网络适配器] -- 选择 nat 模式 ( 选择网卡 )   虚拟机   ...

  3. [HAOI2018]奇怪的背包 (DP,数论)

    [HAOI2018]奇怪的背包 \(solution:\) 首先,这一道题目的描述很像完全背包,但它所说的背包总重量是在模P意义下的,所以肯定会用到数论.我们先分析一下,每一个物品可以放无数次,可以达 ...

  4. tomcat杂记

    组成 Server –> Service –> Connector & Container( Engine –> Host –> Context( Wrapper( S ...

  5. 搭建yum服务器

    一.yum服务器端配置1.安装FTP软件#yum install vsftpd #service vsftpd start#chkconfig --add vsftpd#chkconfig vsftp ...

  6. oracle新建对象 权限管理

    代码 CREATE USER target IDENTIFIED BY target ; GRANT CONNECT, RESOURCE TO target; 刚刚创建的oracle实例中会内建两个用 ...

  7. 嵌入式系统C编程之堆栈回溯

    前言 在嵌入式系统C语言开发调试过程中,常会遇到各类异常情况.一般可按需添加打印信息,以便观察程序执行流或变量值是否异常.然而,打印操作会占用CPU时间,而且代码中添加过多打印信息时会显得很凌乱.此外 ...

  8. C语言函数调用栈(二)

    5 函数调用约定 创建一个栈帧的最重要步骤是主调函数如何向栈中传递函数参数.主调函数必须精确存储这些参数,以便被调函数能够访问到它们.函数通过选择特定的调用约定,来表明其希望以特定方式接收参数.此外, ...

  9. 每天一个linux命令【转】

    转自:http://www.cnblogs.com/peida/archive/2012/12/05/2803591.html 开始详细系统的学习linux常用命令,坚持每天一个命令,所以这个系列为每 ...

  10. php学习之mysqli的面向对象

    // mySqlTool.php  封装好的工具类 <?php class SqlTool{ private $conn; private $host="localhost" ...