基于:Hadoop 2.6.0-cdh5.4.0 hive1.1.0 HBase 1.0.0-cdh5.4.0 关键配置文件
core-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://gagcluster</value>
</property>
<property>
<name>io.file.buffer.size</name>
<value>131072</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/data1/hadoop_tmp/tmp</value>
</property>
<property>
<name>ha.zookeeper.quorum</name>
<value>master.hadoop:2181,whoami:2181,slave1.hadoop:2181,slave2.hadoop:2181,slave3.hadoop:2181</value>
</property>
</configuration>
hdfs-site.xml
<configuration>
<property>
<name>dfs.nameservices</name>
<value>gagcluster</value>
</property>
<property>
<name>dfs.ha.namenodes.gagcluster</name>
<value>master.hadoop,whoami</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:/data1/hadoop_tmp/hdfs/datanode</value>
</property>
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
<property>
<name>dfs.namenode.rpc-address.gagcluster.master.hadoop</name>
<value>master.hadoop:9000</value>
</property>
<property>
<name>dfs.namenode.rpc-address.gagcluster.whoami</name>
<value>whoami:9000</value>
</property>
<property>
<name>dfs.namenode.http-address.gagcluster.master.hadoop</name>
<value>master.hadoop:50070</value>
</property>
<property>
<name>dfs.namenode.http-address.gagcluster.whoami</name>
<value>whoami:50070</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:/data1/hadoop_tmp/hdfs/namenode</value>
</property>
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://master.hadoop:8485;whoami:8485;slave1.hadoop:8485;slave2.hadoop:8485;slave3.hadoop:8485/gagcluster</value>
</property> <property>
<name>dfs.journalnode.edits.dir</name>
<value>/data1/hadoop/tmp/journal</value>
</property>
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.client.failover.proxy.provider.gagcluster</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<property>
<name>dfs.ha.fencing.methods</name>
<value>sshfence(hdfs)
shell(/bin/true)</value>
</property>
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/home/hadoop/.ssh/id_rsa</value>
</property>
<property>
<name>dfs.datanode.max.transfer.threads</name>
<value>8192</value>
</property>
</configuration>
mapred-site.xml
<configuration>
<!--
<property>
<name>mapred.job.tracker</name>
<value>master.hadoop:9001</value>
</property>
-->
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<!--<property>
<name>mapreduce.cluster.local.dir</name>
<value>/data1/hadoop_tmp/tmp/map</value>
</property>
-->
<property>
<name>mapreduce.jobhistory.address</name>
<value>0.0.0.0:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>0.0.0.0:19888</value>
</property>
<property>
<name>dfs.permissions</name>
<value>false</value>
</property>
</configuration>
yarn-site.xml
<?xml version="1.0"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<configuration> <!-- Site specific YARN configuration properties -->
<property>
<name>yarn.resourcemanager.ha.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.resourcemanager.cluster-id</name>
<value>rm-cluster</value>
</property> <property>
<name>yarn.resourcemanager.ha.rm-ids</name>
<value>rm1,rm2</value>
</property>
<property>
<name>yarn.resourcemanager.ha.automatic-failover.recover.enabled</name>
<value>true</value>
</property>
<!-- 指定RM的名字 -->
<property>
<name>yarn.resourcemanager.recovery.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm1</name>
<value>master.hadoop</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm2</name>
<value>whoami</value>
</property>
<property>
<name>yarn.resourcemanager.ha.id</name>
<value>rm1</value>
<description>If we want to launch more than one RM in single node, we need this configuration</description>
</property> <property>
<name>yarn.resourcemanager.store.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
</property>
<property>
<name>yarn.resourcemanager.zk-address</name>
<value>master.hadoop:2181,whoami:2181,slave1.hadoop:2181,slave2.hadoop:2181,slave3.hadoop:2181</value>
</property>
<!--开启自动恢复功能-->
<property>
<name>yarn.resourcemanager.scheduler.address.rm1</name>
<value>master.hadoop:8030</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address.rm2</name>
<value>whoami:8030</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address.rm1</name>
<value>master.hadoop:8031</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address.rm2</name>
<value>whoami:8031</value>
</property>
<property>
<name>yarn.resourcemanager.address.rm1</name>
<value>master.hadoop:8032</value>
</property>
<property>
<name>yarn.resourcemanager.address.rm2</name>
<value>whoami:8032</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address.rm1</name>
<value>master.hadoop:8033</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address.rm2</name>
<value>whoami:8033</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address.rm1</name>
<value>master.hadoop:8088</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address.rm2</name>
<value>whoami:8088</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<name>yarn.resourcemanager.connect.retry-interval.ms</name>
<value>2000</value>
</property>
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>12288</value>
</property>
</configuration>
hadoop-env.sh
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License. # Set Hadoop-specific environment variables here. # The only required environment variable is JAVA_HOME. All others are
# optional. When running a distributed configuration it is best to
# set JAVA_HOME in this file, so that it is correctly defined on
# remote nodes. # The java implementation to use.
export JAVA_HOME=/usr/local/java/jdk1.7.0_65 # The jsvc implementation to use. Jsvc is required to run secure datanodes
# that bind to privileged ports to provide authentication of data transfer
# protocol. Jsvc is not required if SASL is configured for authentication of
# data transfer protocol using non-privileged ports.
#export JSVC_HOME=${JSVC_HOME}
export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-"/etc/hadoop"} # Extra Java CLASSPATH elements. Automatically insert capacity-scheduler.
for f in $HADOOP_HOME/contrib/capacity-scheduler/*.jar; do
if [ "$HADOOP_CLASSPATH" ]; then
export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$f
else
export HADOOP_CLASSPATH=$f
fi
done # The maximum amount of heap to use, in MB. Default is 1000.
#export HADOOP_HEAPSIZE=
#export HADOOP_NAMENODE_INIT_HEAPSIZE="" # Extra Java runtime options. Empty by default.
export HADOOP_OPTS="$HADOOP_OPTS -Djava.net.preferIPv4Stack=true" # Command specific options appended to HADOOP_OPTS when specified
export HADOOP_NAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_NAMENODE_OPTS"
export HADOOP_DATANODE_OPTS="-Dhadoop.security.logger=ERROR,RFAS $HADOOP_DATANODE_OPTS" export HADOOP_SECONDARYNAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_SECONDARYNAMENODE_OPTS" export HADOOP_NFS3_OPTS="$HADOOP_NFS3_OPTS"
export HADOOP_PORTMAP_OPTS="-Xmx512m $HADOOP_PORTMAP_OPTS" # The following applies to multiple commands (fs, dfs, fsck, distcp etc)
export HADOOP_CLIENT_OPTS="-Xmx512m $HADOOP_CLIENT_OPTS"
#HADOOP_JAVA_PLATFORM_OPTS="-XX:-UsePerfData $HADOOP_JAVA_PLATFORM_OPTS" # On secure datanodes, user to run the datanode as after dropping privileges.
# This **MUST** be uncommented to enable secure HDFS if using privileged ports
# to provide authentication of data transfer protocol. This **MUST NOT** be
# defined if SASL is configured for authentication of data transfer protocol
# using non-privileged ports.
export HADOOP_SECURE_DN_USER=${HADOOP_SECURE_DN_USER} # Where log files are stored. $HADOOP_HOME/logs by default.
#export HADOOP_LOG_DIR=${HADOOP_LOG_DIR}/$USER # Where log files are stored in the secure data environment.a
export HADOOP_LOG_DIR=/data1/logs/
export HADOOP_SECURE_DN_LOG_DIR=${HADOOP_LOG_DIR}/${HADOOP_HDFS_USER} ###
# HDFS Mover specific parameters
###
# Specify the JVM options to be used when starting the HDFS Mover.
# These options will be appended to the options specified as HADOOP_OPTS
# and therefore may override any similar flags set in HADOOP_OPTS
#
# export HADOOP_MOVER_OPTS="" ###
# Advanced Users Only!
### # The directory where pid files are stored. /tmp by default.
# NOTE: this should be set to a directory that can only be written to by
# the user that will run the hadoop daemons. Otherwise there is the
# potential for a symlink attack.
export HADOOP_PID_DIR=${HADOOP_PID_DIR}
export HADOOP_SECURE_DN_PID_DIR=${HADOOP_PID_DIR} # A string representing this instance of hadoop. $USER by default.
export HADOOP_IDENT_STRING=$USER
hive-env.sh
export HIVE_HOME=/usr/local/hive
export JAVA_HOME=/usr/local/java/jdk1.7.0_65
export HADOOP_HOME=/usr/local/hadoop
export HIVE_CONF_DIR=/usr/local/hive/conf
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License. # Set Hive and Hadoop environment variables here. These variables can be used
# to control the execution of Hive. It should be used by admins to configure
# the Hive installation (so that users do not have to set environment variables
# or set command line parameters to get correct behavior).
#
# The hive service being invoked (CLI/HWI etc.) is available via the environment
# variable SERVICE # Hive Client memory usage can be an issue if a large number of clients
# are running at the same time. The flags below have been useful in
# reducing memory usage:
#
# if [ "$SERVICE" = "cli" ]; then
# if [ -z "$DEBUG" ]; then
# export HADOOP_OPTS="$HADOOP_OPTS -XX:NewRatio=12 -Xms10m -XX:MaxHeapFreeRatio=40 -XX:MinHeapFreeRatio=15 -XX:+UseParNewGC -XX:-UseGCOverheadLimit"
# else
# export HADOOP_OPTS="$HADOOP_OPTS -XX:NewRatio=12 -Xms10m -XX:MaxHeapFreeRatio=40 -XX:MinHeapFreeRatio=15 -XX:-UseGCOverheadLimit"
# fi
# fi # The heap size of the jvm stared by hive shell script can be controlled via:
#
export HADOOP_HEAPSIZE=512
#
# Larger heap size may be required when running queries over large number of files or partitions.
# By default hive shell scripts use a heap size of 256 (MB). Larger heap size would also be
# appropriate for hive server (hwi etc). # Set HADOOP_HOME to point to a specific hadoop install directory
# HADOOP_HOME=${bin}/../../hadoop
hive-site.xml
<value>jdbc:mysql://127.0.0.1:3306/hive</value>
<description>驱动名</description>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>hive</value>
<description>用户名</description>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>buzhidao</value>
<description>密码</description>
</property>
<property>
<name>hive.metastore.warehouse.dir</name>
<value>/data1/cloud/hive</value>
<description>数据路径(相对hdfs)</description>
</property>
<property>
<name>hive.metastore.uris</name>
<value>thrift://192.168.0.103:9083</value>
<description>运行hive得主机地址及端口</description>
</property>
<property>
<name>hive.hwi.war.file</name>
<value>lib/hive-hwi-1.2.0.war</value>
<description>hwi得war路径</description>
</property>
<property>
<name>mapred.input.dir.recursive</name>
<value>true</value>
</property>
<property>
<name>hive.mapred.supports.subdirectories</name>
<value>true</value>
</property>
<property>
<name>hive.exec.mode.local.auto</name>
<value>true</value>
</property>
<property>
<name>hive.exec.parallel</name>
<value>true</value>
</property>
<property>
<name>hive.limit.optimize.enable</name>
<value>true</value>
</property>
<property>
<name>mapred.job.reuse.jvm.num.tasks</name>
<value>10</value>
</property>
</configuration>
hbase-site.xml
<configuration>
<property>
<name>hbase.rootdir</name>
<value>hdfs://gagcluster/hbase</value>
</property>
<property>
<name>hbase.zookeeper.property.dataDir</name>
<value>/data1/cloud/zookeeper/data</value>
</property>
<property>
<name>hbase.cluster.distributed</name>
<value>true</value>
</property>
<property>
<name>hbase.master</name>
<value>60000</value>
</property>
<property>
<name>hbase.master.info.port</name>
<value>60010</value>
</property>
<property>
<name>hbase.zookeeper.property.clientPort</name>
<value>2181</value>
</property>
<property>
<name>hbase.zookeeper.quorum</name>
<value>master.hadoop,slave1.hadoop,slave2.hadoop,slave3.hadoop,whoami</value>
</property>
<property>
<name>hbase.regionserver.handler.count</name>
<value>100</value>
</property>
<property>
<name>hfile.block.cache.size</name>
<value>0.4</value>
</property>
<property>
<name>hbase.hregion.memstore.flush.size</name>
<value>266666</value>
</property>
<property>
<name>hbase.hregion.max.filesize</name>
<value>1000000</value>
</property>
<property>
<name>hbase.hstore.compactionThreshold</name>
<value>2</value>
</property>
<property>
<name>hbase.client.write.buffer</name>
<value>20971520</value>
</property>
<property>
<name>hbase.client.scanner.caching</name>
<value>500</value>
</property>
</configuration>
zoo.cfg
# The number of milliseconds of each tick
tickTime=2000
# The number of ticks that the initial
# synchronization phase can take
initLimit=10
# The number of ticks that can pass between
# sending a request and getting an acknowledgement
syncLimit=5
# the directory where the snapshot is stored.
# do not use /tmp for storage, /tmp here is just
# example sakes.
dataDir=/data1/cloud/zookeeper/data
dataLogDir=/data1/cloud/zookeeper/logs
# the port at which the clients will connect
clientPort=2181
server.0=master.hadoop:2888:3888
server.1=whoami:2888:3888
server.2=slave1.hadoop:2888:3888
server.3=slave2.hadoop:2888:3888
server.4=slave3.hadoop:2888:3888
#
# Be sure to read the maintenance section of the
# administrator guide before turning on autopurge.
#
# http://zookeeper.apache.org/doc/current/zookeeperAdmin.html#sc_maintenance
#
# The number of snapshots to retain in dataDir
#autopurge.snapRetainCount=3
# Purge task interval in hours
# Set to "0" to disable auto purge feature
#autopurge.purgeInterval=1
基于:Hadoop 2.6.0-cdh5.4.0 hive1.1.0 HBase 1.0.0-cdh5.4.0 关键配置文件的更多相关文章
- 基于Hadoop 2.6.0运行数字排序的计算
上个博客写了Hadoop2.6.0的环境部署,下面写一个简单的基于数字排序的小程序,真正实现分布式的计算,原理就是对多个文件中的数字进行排序,每个文件中每个数字占一行,排序原理是按行读取后分块进行排序 ...
- 搭建hadoop环境,在win7的eclipse上远程操作(Linux上)hadoop2.6.0出错的一些总结
问题1:在DFS Lcation 上不能对文件进行操作: 解决方法: 在hadoop上的每个节点上修改该文件 conf/mapred-site.xml,增加: <property> < ...
- 大数据项目实践:基于hadoop+spark+mongodb+mysql+c#开发医院临床知识库系统
一.前言 从20世纪90年代数字化医院概念提出到至今的20多年时间,数字化医院(Digital Hospital)在国内各大医院飞速的普及推广发展,并取得骄人成绩.不但有数字化医院管理信息系统(HIS ...
- 【hbase0.96】基于hadoop搭建hbase的心得
hbase是基于hadoop的hdfs框架做的分布式表格存储系统,所谓表格系统就是在k/v系统的基础上,对value部分支持column family和column,并支持多版本读写. hbase的工 ...
- 基于hadoop的数据仓库工具:Hive概述
Hive是基于Hadoop的一个数据仓库工具,可以将结构化的数据文件映射为一张数据库表,并提供完整的sql查询功能,可以将sql语句转换为MapReduce任务进行运行.其优点是学习成本低,可以通过类 ...
- Hive -- 基于Hadoop的数据仓库分析工具
Hive是一个基于Hadoop的一个数据仓库工具,可以将结构化的数据文件映射为一张数据库表,通过类SQL语句快速实现简单的MapReduce统计,不必开发专门的MapReduce应用,十分适合数据仓库 ...
- 基于Hadoop的大数据平台实施记——整体架构设计[转]
http://blog.csdn.net/jacktan/article/details/9200979 大数据的热度在持续的升温,继云计算之后大数据成为又一大众所追捧的新星.我们暂不去讨论大数据到底 ...
- 基于Hadoop的大数据平台实施记——整体架构设计
大数据的热度在持续的升温,继云计算之后大数据成为又一大众所追捧的新星.我们暂不去讨论大数据到底是否适用于您的组织,至少在互联网上已经被吹嘘成无所不能的超级战舰.好像一夜之间我们就从互联网时代跳跃进了大 ...
- 基于hadoop的图书推荐
根据在炼数成金上的学习,将部分代码总结一下在需要的时候可以多加温习.首先根据原理作简要分析.一般推荐系统使用的协同过滤推荐模型:分别是基于ItemCF的推荐模型或者是基于UserCF的推荐模型:首先分 ...
- 基于Hadoop分布式集群YARN模式下的TensorFlowOnSpark平台搭建
1. 介绍 在过去几年中,神经网络已经有了很壮观的进展,现在他们几乎已经是图像识别和自动翻译领域中最强者[1].为了从海量数据中获得洞察力,需要部署分布式深度学习.现有的DL框架通常需要为深度学习设置 ...
随机推荐
- ubuntu安装Qt5
1.ubuntu 10.04 desktop amd64 问题: 1.1. 没有GLIBCXX_3.4.15版本,或是更高的版本 http://blog.chinaunix.net/uid-91530 ...
- Live disk migration with libvirt blockcopy
nova采用 libvirt blockcopy(python API virDomainBlockRebase)来做live snapshot. Create the base image: $ ...
- Spring Boot入门——Redis
1.添加redis相关依赖 2.application.properties增加redis的相关属性 3.编写redisConfig进行redis配置 4.编写测试类redisService.redi ...
- Linux开放端口
开放端口8081 -A RH-Firewall-1-INPUT -m state --state NEW -m tcp -p tcp --dport 8081 -j ACCEPT 重启端口服务 ser ...
- SQL Server中的执行引擎入门
简介 当查询优化器(Query Optimizer)将T-SQL语句解析后并从执行计划中选择最低消耗的执行计划后,具体的执行就会交由执行引擎(Execution Engine)来进行执行.本文旨在 ...
- 定义类+类实例化+属性+构造函数+匿名类型var+堆与栈+GC回收机制+值类型与引用类型
为了让编程更加清晰,把程序中的功能进行模块化划分,每个模块提供特定的功能,而且每个模块都是孤立的,这种模块化编程提供了非常大的多样性,大大增加了重用代码的机会. 面向对象编程也叫做OOP编程 简单来说 ...
- 2017-03-05 CentOS中结合Nginx部署dotnet core Web应用程序
Visual Studio Live 倒计时2天,当然这是美国倒计时两天,中国应该是在3月8日的凌晨,正值"3.8妇女节".提前祝广大的女性同志节日快乐,当然还有奋斗在一线的程序媛 ...
- DataSet.WriteXml()
枚举通常是作为 DataSet.WriteXml() 方法的第二个参数使用.它决定使用哪种格式保存XML: IgnoreSchema --默认值.只写数据集的数据,不带有任何架构信息.如果数据集内无数 ...
- 从AD域获取用户AD信息
public static Dictionary<string, string> SearchADInfo(string adName) { string strTemp = " ...
- 后勤LO采购数据源增强
EIS采购数据源增强 1. 2LIS_02_HRD增强 1.1. 在标准数据源上增加字段 事务代码:LBWE 激活数据源字段 激活数据源 1.2. 在结构MC02M_0H ...