CentOS7搭建Storm集群及基础操作
前提
安装Kafka前需要先安装zookeeper集群,集体安装方法请参照我的另一篇文档
Storm安装
下载
wget https://mirrors.tuna.tsinghua.edu.cn/apache/storm/apache-storm-1.1.0/apache-storm-1.1.0.tar.gz
解压
tar -zxvf apache-storm-1.1.0.tar.gz
移动文件夹
mv apache-storm-1.1.0 /usr/local/hadoop/
vim storm.yaml
storm.zookeeper.servers:
- "192.168.174.200"
- "192.168.174.201"
nimbus.seeds: ["192.168.174.200"]
storm.local.dir: "/usr/local/hadoop/apache-storm-1.1.0/data"
- storm.zookeeper.servers:表示zookeeper的集群地址,如果Zookeeper集群使用的不是默认端口,那么还需要配置storm.zookeeper.port
- storm.zookeeper.port: Zookeeper集群的端口号
- storm.local.dir: 用于配置Storm存储少量文件的路径
- nimbus.seeds: 用于配置主控节点的地址,可以配置多个
拷贝文件到其余工作节点
scp apache-storm-1.1.0 salver1:/usr/local/hadoop/
Storm操作
启动主控节点
./storm nimbus 1>/dev/null 2>&1 &
启动主控节点管理界面
./storm ui 1>/dev/null 2>&1 &
启动工作节点
./storm supervisor 1>/dev/null 2>&1 &
访问地址
http://127.0.0.1:8080
运行拓扑
./storm jar storm-book.jar com.TopologyMain /usr/words.txt
删除拓扑
./storm kill Getting-Started-Toplogie
完整示例
package com;
import java.util.ArrayList;
import java.util.List;
import java.util.UUID;
import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.StormSubmitter;
import org.apache.storm.generated.AlreadyAliveException;
import org.apache.storm.generated.AuthorizationException;
import org.apache.storm.generated.InvalidTopologyException;
import org.apache.storm.kafka.KafkaSpout;
import org.apache.storm.kafka.SpoutConfig;
import org.apache.storm.kafka.StringScheme;
import org.apache.storm.kafka.ZkHosts;
import org.apache.storm.redis.bolt.RedisStoreBolt;
import org.apache.storm.redis.common.config.JedisPoolConfig;
import org.apache.storm.redis.common.mapper.RedisStoreMapper;
import org.apache.storm.spout.SchemeAsMultiScheme;
import org.apache.storm.topology.TopologyBuilder;
import org.apache.storm.tuple.Fields;
public class MykafkaSpout {
/**
* @param args
* @throws AuthorizationException
*/
public static void main(String[] args) throws AuthorizationException {
// TODO Auto-generated method stub
String host = "127.0.0.1";
int port = 6385;
String topic = "test" ;
ZkHosts zkHosts = new ZkHosts("192.168.174.200:2181,192.168.174.201:2181");
SpoutConfig spoutConfig = new SpoutConfig(zkHosts, topic,
"",
UUID.randomUUID().toString()) ;
List<String> zkServers = new ArrayList<String>() ;
zkServers.add("192.168.174.200");
zkServers.add("192.168.174.201");
spoutConfig.zkServers = zkServers;
spoutConfig.zkPort = 2181;
spoutConfig.socketTimeoutMs = 60 * 1000 ;
spoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme()) ;
spoutConfig.startOffsetTime = kafka.api.OffsetRequest.LatestTime();
TopologyBuilder builder = new TopologyBuilder() ;
builder.setSpout("spout", new KafkaSpout(spoutConfig) ,1) ;
builder.setBolt("bolt1", new MyKafkaBolt(), 2).shuffleGrouping("spout") ;
builder.setBolt("MyCountBolt", new MyCountBolt(), 2).fieldsGrouping("bolt1", new Fields("type"));
// 将所有单词及其次数进行汇总输出
builder.setBolt("MyReportBolt", new MyReportBolt(), 2).globalGrouping("MyCountBolt");
JedisPoolConfig poolConfig = new JedisPoolConfig.Builder().setHost(host).setPort(port).setPassword("Apple05101314").build();
RedisStoreMapper storeMapper = new MyCountStoreMapper();
RedisStoreBolt storeBolt = new RedisStoreBolt(poolConfig, storeMapper);
//向redis保存数据
builder.setBolt("redis-store-bolt", storeBolt).globalGrouping("MyReportBolt");
Config conf = new Config ();
conf.setDebug(false) ;
if (args.length > 0) {
try {
StormSubmitter.submitTopology(args[0], conf, builder.createTopology());
} catch (AlreadyAliveException e) {
e.printStackTrace();
} catch (InvalidTopologyException e) {
e.printStackTrace();
}
}else {
LocalCluster localCluster = new LocalCluster();
localCluster.submitTopology("mytopology", conf, builder.createTopology());
}
}
}
package com;
import java.util.HashMap;
import java.util.Map;
import org.apache.storm.spout.SpoutOutputCollector;
import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.BasicOutputCollector;
import org.apache.storm.topology.IBasicBolt;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseRichBolt;
import org.apache.storm.topology.base.BaseRichSpout;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values;
public class MyKafkaBolt extends BaseRichBolt {
private OutputCollector outputCollector;
// key:messageId,Data
private HashMap<String, String> waitAck = new HashMap<String, String>();
public void prepare(Map map, TopologyContext context,
OutputCollector collector) {
// TODO Auto-generated method stub
this.outputCollector = collector;
}
public void execute(Tuple input) {
// TODO Auto-generated method stub
String kafkaMsg = input.getString(0);
if(kafkaMsg!=null){
this.outputCollector.emit(new Values(kafkaMsg));
this.outputCollector.ack(input);
}
}
public void declareOutputFields(OutputFieldsDeclarer declarer) {
// TODO Auto-generated method stub
declarer.declare(new Fields("type"));
}
}
package com;
import java.util.HashMap;
import java.util.Map;
import org.apache.storm.spout.SpoutOutputCollector;
import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.BasicOutputCollector;
import org.apache.storm.topology.IBasicBolt;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseRichBolt;
import org.apache.storm.topology.base.BaseRichSpout;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values;
public class MyCountBolt extends BaseRichBolt {
private OutputCollector outputCollector;
private HashMap<String, Integer> count;
public void prepare(Map stormConf, TopologyContext context,
OutputCollector collector) {
// TODO Auto-generated method stub
this.outputCollector = collector;
this.count = new HashMap<String, Integer>();
}
public void execute(Tuple input) {
// TODO Auto-generated method stub
String type = input.getStringByField("type");
int cnt = 1;
if(count.containsKey(type)){
cnt = count.get(type) + 1;
}
count.put(type, cnt);
this.outputCollector.emit(new Values(type, cnt));
this.outputCollector.ack(input);
}
public void declareOutputFields(OutputFieldsDeclarer declarer) {
// TODO Auto-generated method stub
declarer.declare(new Fields("type", "cnt"));
}
}
package com;
import org.apache.storm.redis.common.mapper.RedisDataTypeDescription;
import org.apache.storm.redis.common.mapper.RedisStoreMapper;
import org.apache.storm.tuple.ITuple;
public class MyCountStoreMapper implements RedisStoreMapper {
private RedisDataTypeDescription description;
private final String hashKey = "myCount";
public MyCountStoreMapper() {
description = new RedisDataTypeDescription(
RedisDataTypeDescription.RedisDataType.HASH, hashKey);
}
public RedisDataTypeDescription getDataTypeDescription() {
return description;
}
public String getKeyFromTuple(ITuple tuple) {
return tuple.getStringByField("zs");
}
public String getValueFromTuple(ITuple tuple) {
return tuple.getIntegerByField("cnt")+"";
}
}
package com;
import org.apache.storm.redis.bolt.RedisStoreBolt;
import org.apache.storm.redis.common.config.JedisPoolConfig;
import org.apache.storm.redis.common.mapper.RedisStoreMapper;
import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseRichBolt;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values;
import java.util.HashMap;
import java.util.Map;
import org.apache.log4j.Logger;
/**
* Created by gzx on 17-2-6.
*/
public class MyReportBolt extends BaseRichBolt {
private static Logger logger = Logger.getLogger(MyReportBolt.class);
private OutputCollector outputCollector;
private HashMap<String, Integer> count;
public void prepare(Map map, TopologyContext topologyContext,
OutputCollector collector) {
this.count = new HashMap<String, Integer>();
this.outputCollector = collector;
}
/**
* 打印单词及其出现次数
*
* @param tuple
*/
public void execute(Tuple tuple) {
String type = tuple.getStringByField("type");
int cnt = tuple.getIntegerByField("cnt");
count.put(type, cnt);
if (count.containsKey("join") && count.containsKey("out")) {
int join = count.get("join");
int out = count.get("out");
int sy = join-out;
System.out.println("join=" + join);
System.out.println("out=" + out);
//System.out.printf("===当前剩余总数==="+sy+"\r\n");
logger.debug("===当前剩余总数==="+sy);
this.outputCollector.emit(new Values("zs", sy));
this.outputCollector.ack(tuple);
}
}
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("zs", "cnt"));
}
}
CentOS7搭建Storm集群及基础操作的更多相关文章
- centos7搭建kafka集群-第二篇
好了,本篇开始部署kafka集群 Zookeeper集群搭建 注:Kafka集群是把状态保存在Zookeeper中的,首先要搭建Zookeeper集群(也可以用kafka自带的ZK,但不推荐) 1.软 ...
- 在CentOS上搭建Storm集群
Here's a summary of the steps for setting up a Storm cluster: Set up a Zookeeper clusterInstall depe ...
- centos7搭建kafka集群
一.安装jdk 1.下载jdk压缩包并移动到/usr/local目录 mv jdk-8u162-linux-x64.tar.gz /usr/local 2.解压 tar -zxvf jdk-8u162 ...
- CentOS7搭建Hadoop2.8.0集群及基础操作与测试
环境说明 示例环境 主机名 IP 角色 系统版本 数据目录 Hadoop版本 master 192.168.174.200 nameNode CentOS Linux release 7.4.1708 ...
- Centos7搭建zookeeper集群
centos7与之前的版本都不一样,修改主机名在/ect/hostname 和/ect/hosts 这两个文件控制 首先修改/ect/hostname vi /ect/hostname 打开之后的内容 ...
- 【转】centos7 搭建etcd集群
转自http://www.cnblogs.com/zhenyuyaodidiao/p/6237019.html 一.简介 “A highly-available key value store for ...
- 初学Hadoop:利用VMWare+CentOS7搭建Hadoop集群
一.前言 开始学习数据处理相关的知识了,第一步是搭建一个Hadoop集群.搭建一个分布式集群需要多台电脑,在此我选择采用VMWare+CentOS7搭建一个三台虚拟机组成的Hadoop集群. 注:1 ...
- centos7搭建kafka集群-第一篇
Kafka初识 1.Kafka使用背景 在我们大量使用分布式数据库.分布式计算集群的时候,是否会遇到这样的一些问题: 我们想分析下用户行为(pageviews),以便我们设计出更好的广告位 我想对用户 ...
- centos7搭建dolphinscheduler集群
一.简述 Apache DolphinScheduler是一个分布式去中心化,易扩展的可视化DAG工作流任务调度系统.致力于解决数据处理流程中错综复杂的依赖关系,使调度系统在数据处理流程中开箱即用.有 ...
随机推荐
- 【gym102222K】Vertex Covers(高维前缀和,meet in the middle)
题意:给定一张n点m边的图,点带点权,定义点覆盖的权值为点权之积,问所有点覆盖的权值之和膜q n<=36, 1<=a[i]<=1e9,1e8<=q<=1e9 思路:n&l ...
- spring boot 集成 mybatis 单元测试Dao层 控制台报错:org.apache.ibatis.binding.BindingException: Invalid bound statement (not found):
最近帮同学做毕业程序,采用后端spring boot + mybatis + H2,将框架搭好进行各层的单元测试时,在dao层就出现了错,如图 于是在网上找各种资料,有的说是xml文件和接口没有一一对 ...
- 170903-关于MyBatis
MyBatis总体介绍: MyBatis实际上是Ibatis3.0版本以后的持久化层框架[也就是和数据库打交道的框架]! 和数据库打交道的技术有: 原生的JDBC技术--->Spring的Jd ...
- 走进JavaWeb技术世界14:Mybatis入门
本系列文章将整理到我在GitHub上的<Java面试指南>仓库,更多精彩内容请到我的仓库里查看 https://github.com/h2pl/Java-Tutorial 喜欢的话麻烦点下 ...
- 如何将一个SpringBoot简便地打成一个war包(亲测有效)
正常情况下SpringBoot项目是以jar包的形式,通过命令行: 来运行的,并且SpringBoot是内嵌Tomcat服务器,所以每次重新启动都是用的新的Tomcat服务器.正因如此,也出现了一个问 ...
- Ubuntu 系统搭建LNMP环境
当前Linux版本:Ubuntu16.04 一.安装Nginx 在终端中输入命令 " sudo apt-get install nginx ",在确认安装完成后,在浏览器中访问 l ...
- mysql 5.6多库并行复制原理
首先,要开启这个并行复制,需要设定slave_parallel_workers参数,这个参数如果设定成0的话代表不使用并行,relaylog由sql线程执行,表现和之前版本一致.当这个参数设置成n时, ...
- React - 可控组件和非可控组件的选择
原则 受控组件(用户输入 ---> state 更新 ---> 组件更新)的消耗明显比非受控组件大的多,但非受控组件只能在需求非常简单的情况下的使用. 特性 uncontrolled 受控 ...
- python twisted异步将数据导入到数据库中
from twisted.enterprise import adbapi from twisted.internet import reactor def creat_conn(): # 数据库基本 ...
- 如何实现免登陆功能(cookie session?)
Cookie的机制 Cookie是浏览器(User Agent)访问一些网站后,这些网站存放在客户端的一组数据,用于使网站等跟踪用户,实现用户自定义功能. Cookie的Domain和Path属性标识 ...