hadoop生态搭建(3节点)-10.spark配置
# https://www.scala-lang.org/download/2.12.4.html
# ==================================================================安装 scala
tar -zxvf ~/scala-2.12.4.tgz -C /usr/local
rm –r ~/scala-2.12.4.tgz
# http://archive.apache.org/dist/spark/spark-2.3.0/
# ==================================================================安装 spark
tar -zxf ~/spark-2.3.0-bin-hadoop2.7.tgz -C /usr/local
mv /usr/local/spark-2.3.0-bin-hadoop2.7 /usr/local/spark-2.3.0
rm –r ~/spark-2.3.0-bin-hadoop2.7.tgz
# 环境变量
# ==================================================================node1 node2 node3
vi /etc/profile # 在export PATH USER LOGNAME MAIL HOSTNAME HISTSIZE HISTCONTROL下添加 export JAVA_HOME=/usr/java/jdk1.8.0_111
export ZOOKEEPER_HOME=/usr/local/zookeeper-3.4.12
export HADOOP_HOME=/usr/local/hadoop/hadoop-2.7.6
export MYSQL_HOME=/usr/local/mysql
export HBASE_HOME=/usr/local/hbase-1.2.4
export HIVE_HOME=/usr/local/hive-2.1.1
export SCALA_HOME=/usr/local/scala-2.12.4
export KAFKA_HOME=/usr/local/kafka_2.12-0.10.2.1
export FLUME_HOME=/usr/local/flume-1.8.0
export SPARK_HOME=/usr/local/spark-2.3.0 export PATH=$PATH:$JAVA_HOME/bin:$JAVA_HOME/jre/bin:$ZOOKEEPER_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$MYSQL_HOME/bin:$HBASE_HOME/bin:$HIVE_HOME/bin:$SCALA_HOME/bin:$KAFKA_HOME/bin:$FLUME_HOME/bin:$SPARK_HOME/bin:$SPARK_HOME/sbin
export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar export HADOOP_INSTALL=$HADOOP_HOME
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export HADOOP_COMMON_HOME=$HADOOP_HOME
export HADOOP_HDFS_HOME=$HADOOP_HOME
export YARN_HOME=$HADOOP_HOME
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
# ==================================================================node1
# 使环境变量生效
source /etc/profile # 查看配置结果
echo $SPARK_HOME
# ==================================================================node1
cp $SPARK_HOME/conf/docker.properties.template $SPARK_HOME/conf/docker.properties
vi $SPARK_HOME/conf/docker.properties spark.mesos.executor.home: /usr/local/spark-2.3.0 cp $SPARK_HOME/conf/fairscheduler.xml.template $SPARK_HOME/conf/fairscheduler.xml
cp $SPARK_HOME/conf/log4j.properties.template $SPARK_HOME/conf/log4j.properties
cp $SPARK_HOME/conf/metrics.properties.template $SPARK_HOME/conf/metrics.properties cp $SPARK_HOME/conf/slaves.template $SPARK_HOME/conf/slaves
vi $SPARK_HOME/conf/slaves node1
node2
node3 cp $SPARK_HOME/conf/spark-defaults.conf.template $SPARK_HOME/conf/spark-defaults.conf
vi $SPARK_HOME/conf/spark-defaults.conf spark.eventLog.enabled true
spark.eventLog.dir hdfs://appcluster/spark/eventslog
# 监控页面需要监控的目录,需要先启用和指定事件日志目录,配合上面两项使用
spark.history.fs.logDirectory hdfs://appcluster/spark
spark.eventLog.compress true # 如果想 YARN ResourceManager 访问 Spark History Server ,则添加一行:
# spark.yarn.historyServer.address http://node1:19888 cp $SPARK_HOME/conf/spark-env.sh.template $SPARK_HOME/conf/spark-env.sh
vi $SPARK_HOME/conf/spark-env.sh export SPARK_MASTER_PORT=7077 #提交任务的端口,默认是7077
export SPARK_MASTER_WEBUI_PORT=8070 #masster节点的webui端口 默认8080改为8070
export SPARK_WORKER_CORES=1 #每个worker从节点能够支配的core的个数
export SPARK_WORKER_MEMORY=1g #每个worker从节点能够支配的内存数
export SPARK_WORKER_PORT=7078 #每个worker从节点的端口(可选配置)
export SPARK_WORKER_WEBUI_PORT=8071 #每个worker从节点的wwebui端口(可选配置)
export SPARK_WORKER_INSTANCES=1 #每个worker从节点的实例(可选配置) export JAVA_HOME=/usr/java/jdk1.8.0_111
export SCALA_HOME=/usr/local/scala-2.12.4
export HADOOP_HOME=/usr/local/hadoop-2.7.6
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
export YARN_CONF_DIR=$HADOOP_HOME/etc/Hadoop
export SPARK_PID_DIR=/usr/local/spark-2.3.0/pids
export SPARK_LOCAL_DIR=/usr/local/spark-2.3.0/tmp
export LD_LIBRARY_PATH=$HADOOP_HOME/lib/native
export SPARK_DAEMON_JAVA_OPTS="-Dspark.deploy.recoveryMode=ZOOKEEPER -Dspark.deploy.zookeeper.url=node1:2181,node2:2181,node3:2181 -Dspark.deploy.zookeeper.dir=/spark" vi $SPARK_HOME/sbin/start-master.sh SPARK_MASTER_WEBUI_PORT=8070 cp $HADOOP_HOME/etc/hadoop/hdfs-site.xml $SPARK_HOME/conf/ vi $HADOOP_HOME/etc/hadoop/log4j.properties log4j.logger.org.apache.hadoop.util.NativeCodeLoader=ERROR scp -r $HADOOP_HOME/etc/hadoop/log4j.properties node2:$HADOOP_HOME/etc/hadoop/
scp -r $HADOOP_HOME/etc/hadoop/log4j.properties node3:$HADOOP_HOME/etc/hadoop/
# ==================================================================node1
scp -r $SPARK_HOME node2:/usr/local/
scp -r $SPARK_HOME node3:/usr/local/
# ==================================================================node2 node3
# 使环境变量生效
source /etc/profile # 查看配置结果
echo $FLUME_HOME
# 启动
# ==================================================================node1 node2 node3
# 先启动zookeeper 和 hdfs
zkServer.sh start
zkServer.sh status # ==================================================================node1
zkCli.sh
create /spark '' $HADOOP_HOME/sbin/start-all.sh $HADOOP_HOME/sbin/hadoop-daemon.sh start zkfc # ==================================================================node2
$HADOOP_HOME/sbin/hadoop-daemon.sh start zkfc
$HADOOP_HOME/sbin/yarn-daemon.sh start resourcemanager
# 启动spark
# ==================================================================node1
$SPARK_HOME/sbin/start-master.sh $SPARK_HOME/sbin/start-slaves.sh # ==================================================================node2
$SPARK_HOME/sbin/start-master.sh # ==================================================================node1
# 获取安全模式的状态:
hdfs dfsadmin -safemode get # 安全模式打开
# hdfs dfsadmin -safemode enter # 安全模式关闭
# hdfs dfsadmin -safemode leave hdfs dfs -mkdir -p /spark/eventslog $SPARK_HOME/bin/spark-shell # http://node1:4040
# http://node1:8070 > :quit
# test
# 需保证hdfs上该目录不存在
# hdfs dfs -mkdir -p /spark/output
# hdfs dfs -rmr /spark/output vi ~/sparkdata.txt hello man
what are you doing now
my running
hello
kevin
hi man hdfs dfs -mkdir -p /usr/file/input hdfs dfs -put ~/sparkdata.txt /usr/file/input
hdfs dfs -ls /usr/file/input val file1 = sc.textFile("file:///root/sparkdata.txt")
val count1=file1.flatMap(line => line.split(" ")).map(word => (word,1)).reduceByKey(_+_)
count1.saveAsTextFile("hdfs://node1:8020/spark/output1") val file=sc.textFile("hdfs://appcluster/usr/file/input/sparkdata.txt")
val count=file.flatMap(line => line.split(" ")).map(word => (word,1)).reduceByKey(_+_)
count.saveAsTextFile("hdfs://node1:8020/spark/output") hdfs dfs -ls /spark/output hdfs dfs -cat /spark/output/part-00000
# stop已经启动的进程
# ==================================================================node1
$SPARK_HOME/sbin/stop-slaves.sh $SPARK_HOME/sbin/stop-master.sh $HADOOP_HOME/sbin/stop-all.sh # ==================================================================node1 node2 node3
# 停止 zookeeper
zkServer.sh stop # ==================================================================node2
$HADOOP_HOME/sbin/yarn-daemon.sh stop resourcemanager
$HADOOP_HOME/sbin/hadoop-daemon.sh stop zkfc # ==================================================================node1
$HADOOP_HOME/sbin/hadoop-daemon.sh stop zkfc shutdown -h now
# 快照 spark
hadoop生态搭建(3节点)-10.spark配置的更多相关文章
- hadoop生态搭建(3节点)
软件:CentOS-7 VMware12 SSHSecureShellClient shell工具:Xshell 规划 vm网络配置 01.基础配置 02.ssh配置 03.zookeep ...
- hadoop生态搭建(3节点)-08.kafka配置
如果之前没有安装jdk和zookeeper,安装了的请直接跳过 # https://www.oracle.com/technetwork/java/javase/downloads/java-arch ...
- hadoop生态搭建(3节点)-04.hadoop配置
如果之前没有安装jdk和zookeeper,安装了的请直接跳过 # https://www.oracle.com/technetwork/java/javase/downloads/java-arch ...
- hadoop生态搭建(3节点)-13.mongodb配置
# 13.mongodb配置_副本集_认证授权# ==================================================================安装 mongod ...
- hadoop生态搭建(3节点)-15.Nginx_Keepalived_Tomcat配置
# Nginx+Tomcat搭建高可用服务器名称 预装软件 IP地址Nginx服务器 Nginx1 192.168.6.131Nginx服务器 Nginx2 192.168.6.132 # ===== ...
- hadoop生态搭建(3节点)-09.flume配置
# http://archive.apache.org/dist/flume/1.8.0/# ===================================================== ...
- hadoop生态搭建(3节点)-11.storm配置
# http://archive.apache.org/dist/storm/apache-storm-1.1.0/ # ======================================= ...
- hadoop生态搭建(3节点)-12.rabbitmq配置
# 安装 需要相关包# ==================================================================node1 node2 node3 yum ...
- hadoop生态搭建(3节点)-14.redis配置
# ==================================================================规划node1 redis:7000 7001 192.168. ...
随机推荐
- Mysql 启动失败常见错误
各位可以按照顺序逐条拍错. mysql启动时报错:Starting MySQL... ERROR! The server quit without updating PID file (/opt/my ...
- SQL UNION操作符使用
SQL UNION 操作符 SQL UNION 操作符合并两个或多个 SELECT 语句的结果. SQL UNION 操作符 UNION 操作符用于合并两个或多个 SELECT 语句的结果集. 请注意 ...
- git rebase vs git merge详解
https://medium.com/@porteneuve/getting-solid-at-git-rebase-vs-merge-4fa1a48c53aa#.std3ddz0g 请参考另外一篇文 ...
- 从传输流收到意外的 EOF 或 0 个字节
/// <summary> /// 发送POST请求 /// </summary> /// <param name="json"></pa ...
- Carbon中文使用手册
Introduction Carbon 继承了PHP的 Datetime 类和JsonSerialiable.所以 Carbon 中没有涉及到的,但在 Datetime 和JsonSerializab ...
- js数组 标签: javascript 2016-08-03 14:15 131人阅读 评论(0) 收藏
数组排序 reverse()方法 reverse()方法会反转数组的顺序. sort()方法 默认情况下sort()方法按升序排列数组项.为实现排序sort()方法调用每项的toString(),然后 ...
- css文本过长如何设置省略号
对于单行文本: 语法: text-overflow : clip | ellipsis 参数: clip : 不显示省略标记(...),而是简单的裁切 (clip这个参数是不常用的!) el ...
- Redis 缓存穿透
Redis 缓存穿透 https://www.cnblogs.com/jiekzou/p/9212114.html 场景描述:我们在项目中使用缓存通常都是先检查缓存中是否存在,如果存在直接返回缓存内容 ...
- angular里forRoot的作用
模块A是这样定义的 @NgModule({ providers: [AService], declarations: [ TitleComponent ], exports: [ TitleCompo ...
- Spring 的下载、安装和使用
一.下载 Spring 下载地址:http://repo.spring.io/libs-release-local/org/springframework/spring/4.0.6.RELEASE/ ...