注意:将mysql的驱动包拷贝到spark/lib下,将hive-site.xml拷贝到项目resources下,远程调试不要使用主机名

import org.apache.spark._
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.sql.hive.HiveContext
import java.io.FileNotFoundException
import java.io.IOException object HiveSelect {
def main(args: Array[String]) {
System.setProperty("hadoop.home.dir", "D:\\hadoop") //加载hadoop组件
val conf = new SparkConf().setAppName("HiveApp").setMaster("spark://192.168.66.66:7077")
.set("spark.executor.memory", "1g")
.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
.setJars(Seq("D:\\workspace\\scala\\out\\scala.jar"))//加载远程spark
//.set("hive.metastore.uris", "thrift://192.168.66.66:9083")//远程hive的meterstore地址
// .set("spark.driver.extraClassPath","D:\\json\\mysql-connector-java-5.1.39.jar")
val sparkcontext = new SparkContext(conf);
try {
val hiveContext = new HiveContext(sparkcontext);
hiveContext.sql("use siat"); //使用数据库
hiveContext.sql("DROP TABLE IF EXISTS src") //删除表
hiveContext.sql("CREATE TABLE IF NOT EXISTS src (key INT, value STRING) " +
"ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' ");//创建表
hiveContext.sql("LOAD DATA LOCAL INPATH 'D:\\workspace\\scala\\src.txt' INTO TABLE src "); //导入数据
hiveContext.sql(" SELECT * FROM src").collect().foreach(println);//查询数据
}
catch {
case e: FileNotFoundException => println("Missing file exception")
case ex: IOException => println("IO Exception")
case ee: ArithmeticException => println(ee)
case eee: Throwable => println("found a unknown exception" + eee)
case ef: NumberFormatException => println(ef)
case ec: Exception => println(ec)
case e: IllegalArgumentException => println("illegal arg. exception");
case e: IllegalStateException => println("illegal state exception");
}
finally {
sparkcontext.stop()
}
}
}

 附录1:scala-spark api-http://spark.apache.org/docs/latest/api/scala/index.html#org.apache.spark.package 

org.apache.spark

org.apache.spark.api.java

org.apache.spark.api.java.function

org.apache.spark.broadcast

org.apache.spark.graphx

org.apache.spark.graphx.impl

org.apache.spark.graphx.lib

org.apache.spark.graphx.util

org.apache.spark.input

org.apache.spark.internal

org.apache.spark.internal.io

org.apache.spark.io

org.apache.spark.launcher

org.apache.spark.mapred

org.apache.spark.metrics.source

org.apache.spark.ml

org.apache.spark.ml.attribute

org.apache.spark.ml.classification

org.apache.spark.ml.clustering

org.apache.spark.ml.evaluation

org.apache.spark.ml.feature

org.apache.spark.ml.fpm

org.apache.spark.ml.linalg

org.apache.spark.ml.param

org.apache.spark.ml.recommendation

org.apache.spark.ml.regression

org.apache.spark.ml.source.libsvm

org.apache.spark.ml.stat

org.apache.spark.ml.stat.distribution

org.apache.spark.ml.tree

org.apache.spark.ml.tuning

org.apache.spark.ml.util

org.apache.spark.mllib

org.apache.spark.mllib.classification

org.apache.spark.mllib.clustering

org.apache.spark.mllib.evaluation

org.apache.spark.mllib.feature

org.apache.spark.mllib.fpm

org.apache.spark.mllib.linalg

org.apache.spark.mllib.linalg.distributed

org.apache.spark.mllib.optimization

org.apache.spark.mllib.pmml

org.apache.spark.mllib.random

org.apache.spark.mllib.rdd

org.apache.spark.mllib.recommendation

org.apache.spark.mllib.regression

org.apache.spark.mllib.stat

org.apache.spark.mllib.stat.distribution

org.apache.spark.mllib.stat.test

org.apache.spark.mllib.tree

org.apache.spark.mllib.tree.configuration

org.apache.spark.mllib.tree.impurity

org.apache.spark.mllib.tree.loss

org.apache.spark.mllib.tree.model

org.apache.spark.mllib.util

org.apache.spark.partial

org.apache.spark.rdd

org.apache.spark.scheduler

org.apache.spark.scheduler.cluster

org.apache.spark.security

org.apache.spark.serializer

org.apache.spark.sql

org.apache.spark.sql.api.java

org.apache.spark.sql.catalog

org.apache.spark.sql.expressions

org.apache.spark.sql.expressions.javalang

org.apache.spark.sql.expressions.scalalang

org.apache.spark.sql.hive

org.apache.spark.sql.hive.execution

org.apache.spark.sql.hive.orc

org.apache.spark.sql.jdbc

org.apache.spark.sql.sources

org.apache.spark.sql.streaming

org.apache.spark.sql.types

org.apache.spark.sql.util

org.apache.spark.status.api.v1

org.apache.spark.status.api.v1.streaming

org.apache.spark.storage

org.apache.spark.streaming

org.apache.spark.streaming.api.java

org.apache.spark.streaming.dstream

org.apache.spark.streaming.flume

org.apache.spark.streaming.kafka

org.apache.spark.streaming.kinesis

org.apache.spark.streaming.receiver

org.apache.spark.streaming.scheduler

org.apache.spark.streaming.scheduler.rate

org.apache.spark.streaming.util

org.apache.spark.ui.env

org.apache.spark.ui.exec

org.apache.spark.ui.jobs

org.apache.spark.ui.storage

org.apache.spark.util

org.apache.spark.util.random

org.apache.spark.util.sketch

  

Spark记录-本地Spark读取Hive数据简单例子的更多相关文章

  1. R语言读取Hive数据表

    R通过RJDBC包连接Hive 目前Hive集群是可以通过跳板机来访问 HiveServer, 将Hive 中的批量数据读入R环境,并进行后续的模型和算法运算. 1. 登录跳板机后需要首先在Linux ...

  2. javascript读取xml文件读取节点数据的例子

    分享下用javascript读取xml文件读取节点数据方法. 读取的节点数据,还有一种情况是读取节点属性数据. <head> <title></title> < ...

  3. Spark记录-Spark-Shell客户端操作读取Hive数据

    1.拷贝hive-site.xml到spark/conf下,拷贝mysql-connector-java-xxx-bin.jar到hive/lib下 2.开启hive元数据服务:hive  --ser ...

  4. Spark SQL读取hive数据时报找不到mysql驱动

    Exception: Caused by: org.datanucleus.exceptions.NucleusException: Attempt to invoke the "BoneC ...

  5. Spark从HDFS上读取JSON数据

    代码如下: import org.apache.spark.sql.Row; import org.apache.spark.SparkConf; import org.apache.spark.ap ...

  6. Spark记录-阿里巴巴开源工具DataX数据同步工具使用

    1.官网下载 下载地址:https://github.com/alibaba/DataX DataX 是阿里巴巴集团内被广泛使用的离线数据同步工具/平台,实现包括 MySQL.Oracle.SqlSe ...

  7. python 读取hive数据

    话不多说,直接上代码 from pyhive import hivedef pyhive(hql): conn = hive.Connection(host='HiveServer2 host', p ...

  8. ListBox和ComboBox绑定数据简单例子

    1. 将集合数据绑定到ListBox和ComboBox控件,界面上显示某个属性的内容 //自定义了Person类(有Name,Age,Heigth等属性) List<Person> per ...

  9. Spark读取elasticsearch数据指南

    最近要在 Spark job 中通过 Spark SQL 的方式读取 Elasticsearch 数据,踩了一些坑,总结于此. 环境说明 Spark job 的编写语言为 Scala,scala-li ...

随机推荐

  1. Jmeter(二十三)_插件扩展

    Jmeter插件管理器 安装插件的方法有两种,一种是传统的方式,即官网下载,本地配置,重启jmeter.现在有一种快捷的方法可以自定义安装插件-插件管理器 JMeter 插件管理器的使用方法很简单:不 ...

  2. Accer 4752G添加固态硬盘 双系统

    (此文一直在草稿箱里躺了一年,略作修改后发布~) 背景:电脑是2011年年末买的,用到现在也已经5年多了,好在没坏过什么硬件,有过2年疯狂打LOL的经历,之后电脑就打不动了,FPS始终上不去,启动游戏 ...

  3. OD之破解密钥文件授权(三)

    除了上次的序列号验证以外,还有这种密钥授权模式,需要密钥文件授权才能打开文件; 老办法,先拖进OD中动态分析再说: 然后F8进行调试这时候发现了一个条件跳转函数jnz下面是说跳转未实现,那我们发现上面 ...

  4. GitHubDesktop权限问题解决办法

    Desktop对于管理仓库非常方便.实用 很多人实用Desktop将仓库项目clone到本地 但是更新后同步时出现了如下权限错误: Error Authentication failed. You m ...

  5. 通过Heketi管理GlusterFS为K8S集群提供持久化存储

    参考文档: Github project:https://github.com/heketi/heketi MANAGING VOLUMES USING HEKETI:https://access.r ...

  6. Invalid AABB inAABB UnityEngine.Canvas:SendWillRenderCanvases()的解决办法

    我遇到这个问题的情况是, 在Start()中直接使用WWW价值本地图片,可能是加载图片相对比较耗时,就出现了这个错误. 解决的办法是使用协程: // Use this for initializati ...

  7. C语言版本:单链表的实现

    slist.h #ifndef __SLIST_H__ #define __SLIST_H__ #include<cstdio> #include<malloc.h> #inc ...

  8. PAT-1001 采花生

    题目描述 鲁宾逊先生有一只宠物猴,名叫多多.这天,他们两个正沿着乡间小路散步,突然发现路边的告示牌上贴着一张小小的纸条:“欢迎免费品尝我种的花生!——熊字”. 鲁宾逊先生和多多都很开心,因为花生正是他 ...

  9. 专业实训题目需求分析(3D推箱子)

    业务需求:    游戏提供主菜单让玩家进行游戏设置.帮助说明,推箱子的小人可以前后左右转动,箱子可以被上下左右的推动,要有关卡设置,障碍物设置,游戏提供背景音乐的功能,要实现3D效果. 面向的用户类型 ...

  10. JS基础(二)数据类型

    一.标量类型 1.字符串string类型:字符串需要用定界符包裹.定界符:单引号(‘’),双引号(“”). 2.数字类型:1)整型:所有整数 2)浮点型:所有浮点数 3.boolean类型:返回tru ...