sparkSQL中的example学习(1)
SparkSQLDemo.scala
import org.apache.spark.sql.{Row, SparkSession}
import org.apache.spark.sql.types.{StringType, StructField, StructType}
object SparkSQLDemo {
// $example on:create_ds$
case class Person(name: String, age: Long)
// $example on:create_ds$
def main(args: Array[String]): Unit = {
//开启SparkSession
// $example on: init_session$
val spark = SparkSession
.builder()
.appName("SparkSQLDemo")
.master("local")
.config("spark.some.config.option", "some-value")
.getOrCreate()
// $example off: init_session$
// runBasicDataFrameDemo(spark)
// runDatasetCreationDemo(spark)
// runInferSchemaDemo(spark)
runProgrammaticSchemaDemo(spark)
//关闭SparkSeesion
spark.stop()
}
private def runBasicDataFrameDemo(spark: SparkSession) = {
val df = spark.read.json("/Users/hadoop/app/spark/examples/src/main/resources/people.json")
//Displays the content of the DataFrame to stdout
df.show()
//Print the schema in a tree format
df.printSchema()
//Select only the "name" column
df.select("name").show()
//This import is needed to use the $-notation
import spark.implicits._
df.select($"name", $"age" + 1).show()
//Select people older than 21
df.select($"age" > 21).show()
//Count people by age
df.groupBy("age").count().show()
//$example on: global_temp_view$
//Register the DataFrame as a SQL temporary view
df.createOrReplaceTempView("people")
val sqlDF = spark.sql("select * from people")
sqlDF.show()
//Register the DataFrame as a global temporary view
df.createGlobalTempView("people")
//Global temporary view is tied to a system preserved database `global_temp`
spark.sql("select * from global_temp.people").show
//Global temporary view is cross-session
spark.newSession().sql("select * from global_temp.people").show()
}
private def runDatasetCreationDemo(spark: SparkSession) = {
// A container for a [[Dataset]], used for implicit conversions in Scala.
// To use this, import implicit conversions in SQL:
import spark.implicits._
// .toDS() -> 这是用括号声明的,以防止Scala编译器将`rdd.toDS(“1”)`视为调用此toDS然后应用于返回的数据集。
//Encoder are created for case classes (为case class 创建编码器)
val caseClassDS = Seq(Person("Andy", 32)).toDS()
caseClassDS.show()
//Encoders for most common types are automatically provided by importing spark.implicits._
val primitiveDS = Seq(1, 2, 3).toDS()
primitiveDS.map(_ + 1).foreach(println(_))//.collect()
//DataFrames can be converted to a Dataset by providing a class. Mapping will bedone by name
val path = "/Users/hadoop/app/spark/examples/src/main/resources/people.json"
val peopleDS = spark.read.json(path).as[Person]
peopleDS.show()
}
private def runInferSchemaDemo(spark: SparkSession) = {
// $example on: schema_inferring$
//For implicit conversions from RDDs to DataFrames
import spark.implicits._
//Create an RDD of Person objects from a text file, convert it to a DataFrame
val peopleDF = spark.sparkContext
.textFile("/Users/hadoop/app/spark/examples/src/main/resources/people.txt")
.map(_.split(","))
.map(x => Person(x(0), x(1).trim.toInt))
.toDF()
//Register the DataFrame as a temporary view
peopleDF.createOrReplaceTempView("people")
//SQL statements can be run by using the sql methods provided by Spark
val teenagersDF = spark.sql("select name, age from people where age between 13 and 19")
//The columns of a row in the result can be accessed by field index
//(结果中的行的列可以通过字段索引访问)
teenagersDF.map(teenager => s"Name: ${teenager(0)}").show()
//or by field name
teenagersDF.map(teenager => s"Name: ${teenager.getAs[String]("name")}").show()
//No pre-defined encoders for Dataset[Map[K,V]], define explicitly
//(Dataset[Map[K,V]] 没有预定义的编码器, 显式定义)
implicit val mapEncoder = org.apache.spark.sql.Encoders.kryo[Map[String, Any]]
//Primitive types and case classes can be also defined as
//(原始类型和case类也可以定义为隐式val )
//implicit val stringIntMapEncoder: Encoder[Map[String, Any]] = ExpressionEncoder()
//row.getValuesMap[T] retrieves multiple columns at once into a Map[String, T]
teenagersDF.map(teenager =>
teenager.getValuesMap[Any](List("name", "age"))
).foreach(println(_))//.collect()
// $example off: schema_inferring$
}
private def runProgrammaticSchemaDemo(spark: SparkSession) = {
import spark.implicits._
// $example on: programmatic_schema$
//Create an RDD
val peopleRDD = spark.sparkContext.textFile("/Users/hadoop/app/spark/examples/src/main/resources/people.txt")
//The schema is encoded in a string
val schemaString = "name age"
//Generate the schema based on the string of schema
val fields = schemaString.split(" ")
.map(fieldName => StructField(fieldName, StringType, nullable = true))
val schema = StructType(fields)
//Convert records of the RDD (people) to Rows
val rowRDD = peopleRDD
.map(_.split(","))
.map(attributes => Row(attributes(0), attributes(1).trim))
//Apply the schema to the RDD
val peopleDF = spark.createDataFrame(rowRDD, schema)
//Creates a temporary view using the DataFrame
peopleDF.createOrReplaceTempView("people")
//SQL can be run over a temporary view created using DataFrames
val results = spark.sql("select name from people")
//The results of SQL queries are DataFrames and support all the normal RDD operations
//The columns of a row in the result can be accessed by field index or by field name
results.map(attributes => s"Name: ${attributes(0)}").show()
// $exmaple off: programmatic_schema$
}
}


sparkSQL中的example学习(1)的更多相关文章
- sparkSQL中的example学习(3)
UserDefinedTypedAggregation.scala(用户可自定义类型) import org.apache.spark.sql.expressions.Aggregator impor ...
- sparkSQL中的example学习(2)
UserDefinedUntypedAggregate.scala(默认返回类型为空,不能更改) import org.apache.spark.sql.{Row, SparkSession} imp ...
- PHP中的Libevent学习
wangbin@2012,1,3 目录 Libevent在php中的应用学习 1. Libevent介绍 2. 为什么要学习libevent 3. Php libeven ...
- JS中childNodes深入学习
原文:JS中childNodes深入学习 <html xmlns="http://www.w3.org/1999/xhtml"> <head> <ti ...
- CNCC2017中的深度学习与跨媒体智能
CNCC2017中的深度学习与跨媒体智能 转载请注明作者:梦里茶 目录 机器学习与跨媒体智能 传统方法与深度学习 图像分割 小数据集下的深度学习 语音前沿技术 生成模型 基于贝叶斯的视觉信息编解码 珠 ...
- 【Spark篇】---SparkSQL中自定义UDF和UDAF,开窗函数的应用
一.前述 SparkSQL中的UDF相当于是1进1出,UDAF相当于是多进一出,类似于聚合函数. 开窗函数一般分组取topn时常用. 二.UDF和UDAF函数 1.UDF函数 java代码: Spar ...
- 图解BERT(NLP中的迁移学习)
目录 一.例子:句子分类 二.模型架构 模型的输入 模型的输出 三.与卷积网络并行 四.嵌入表示的新时代 回顾一下词嵌入 ELMo: 语境的重要性 五.ULM-FiT:搞懂NLP中的迁移学习 六.Tr ...
- python中confIgparser模块学习
python中configparser模块学习 ConfigParser模块在python中用来读取配置文件,配置文件的格式跟windows下的ini配置文件相似,可以包含一个或多个节(section ...
- Scala中的类学习
Scala中的类学习 从java了解类的情况下,了解Scala的类并不难.Scala类中的字段自动带getter和setter方法,用@BeanProperty注解生成javaBean对象的getXX ...
随机推荐
- Asp.Net Core 开发之旅之NLog日志
NLog已是日志库的一员大佬,使用也简单方便,本文介绍的环境是居于.NET CORE 3.0 1.安装 Install-Package NLog.Web.AspNetCore 2.创建配置文件 在we ...
- 【洛谷P4251】[SCOI2015]小凸玩矩阵(二分+二分图匹配)
洛谷 题意: 给出一个\(n*m\)的矩阵\(A\).现要从中选出\(n\)个数,任意两个数不能在同一行或者同一列. 现在问选出的\(n\)个数中第\(k\)大的数的最小值是多少. 思路: 显然二分一 ...
- ES3、ES5、ES6对象代理的写法差异
ES3的对象代理写法: console.log('定义私有变量ES3写法:') // ES3 var Person = function (){ var data = { name:'ES3', ag ...
- luoguP3979 遥远的国度
换根的树剖 https://www.luogu.org/problem/P3979 题意: (出题人口活好.... 给定一棵以 root 为根的 n 个点的有根树,对于任意一个点 x, 给定他 的点权 ...
- Python基础之猜数游戏
例题一:猜数游戏.在程序中预设一个0~9之间的整数,让用户通过键盘输入所猜的数,如果大于预设的数,显示“遗憾,太大了”:小于预设的数,显示“遗憾,太小了”,如此循环,直至猜中该数,显示“预测N次,你猜 ...
- 配置Ngnix1.15.11+php5.4出现502 Bad Gateway问题
今天在调试Ngnix1.15.11+php5.4网站时候,因为网站数据和并发过大,出现502 Bad Gateway问题,所以记下笔记. 只需要修改php-fpm.conf的request_termi ...
- keras 学习笔记(二) ——— data_generator
data_generator 每次输出一个batch,基于keras.utils.Sequence Base object for fitting to a sequence of data, suc ...
- MySQL学习笔记2——DML
DML(数据操作语言,它是对表记录的操作(增,删,改)!) 1.插入数据 *INSERT INTO 表名(列名1,列名2,...) VALUES(列值1,列值2,...); >在表名后给出要插入 ...
- 物联网架构成长之路(33)-EMQ数据存储到influxDB
一.前言 时隔一年半,技术变化特别快,学习也要跟上才行.以前写过EMQ数据转存问题,当时用了比较笨的方法,通过写插件的方式,把MQTT里面的数据发送到数据库进行存储.当时也是为了学习erlang和em ...
- github上方便的小工具
目录 python中的fire模块 Install Reference python中的fire模块 它可以对所有Python 对象,包括functions, classes, modules, ob ...