SparkSql处理嵌套json数据
一、数据准备:
{
"dc_id": "dc-101",
"source": {
"sensor-igauge": {
"id": 10,
"ip": "68.28.91.22",
"description": "Sensor attached to the container ceilings",
"temp":35,
"c02_level": 1475,
"geo": {"lat":38.00, "long":97.00}
},
"sensor-ipad": {
"id": 13,
"ip": "67.185.72.1",
"description": "Sensor ipad attached to carbon cylinders",
"temp": 34,
"c02_level": 1370,
"geo": {"lat":47.41, "long":-122.00}
},
"sensor-inest": {
"id": 8,
"ip": "208.109.163.218",
"description": "Sensor attached to the factory ceilings",
"temp": 40,
"c02_level": 1346,
"geo": {"lat":33.61, "long":-111.89}
},
"sensor-istick": {
"id": 5,
"ip": "204.116.105.67",
"description": "Sensor embedded in exhaust pipes in the ceilings",
"temp": 40,
"c02_level": 1574,
"geo": {"lat":35.93, "long":-85.46}
}
}
}
代码示例:
package spark.project_1 import org.apache.spark.sql.functions._
import org.apache.spark.sql.types._
import org.apache.spark.{SparkConf, sql} /**
* Author Mr. Guo
* Create 2018/10/19 - 14:36
*/
case class DeviceAlert(dcId: String, deviceType: String, ip: String, deviceId: Long, temp: Long, c02_level: Long,
lat: Double, lon: Double) object dispose_json {
def main(args: Array[String]): Unit = { val conf = new SparkConf()
val ssc = new sql.SparkSession
.Builder()
.config(conf)
.master("local[2]")
.appName("dispose_json")
.getOrCreate() ssc.sparkContext.setLogLevel("error")
println("--------------------------------------------------------------------")
//导入隐式转换
import ssc.implicits._
val dataDS1 = Seq(
"""
|{
|"dc_id": "dc-101",
|"source": {
| "sensor-igauge": {
| "id": 10,
| "ip": "68.28.91.22",
| "description": "Sensor attached to the container ceilings",
| "temp":35,
| "c02_level": 1475,
| "geo": {"lat":38.00, "long":97.00}
| },
| "sensor-ipad": {
| "id": 13,
| "ip": "67.185.72.1",
| "description": "Sensor ipad attached to carbon cylinders",
| "temp": 34,
| "c02_level": 1370,
| "geo": {"lat":47.41, "long":-122.00}
| },
| "sensor-inest": {
| "id": 8,
| "ip": "208.109.163.218",
| "description": "Sensor attached to the factory ceilings",
| "temp": 40,
| "c02_level": 1346,
| "geo": {"lat":33.61, "long":-111.89}
| },
| "sensor-istick": {
| "id": 5,
| "ip": "204.116.105.67",
| "description": "Sensor embedded in exhaust pipes in the ceilings",
| "temp": 40,
| "c02_level": 1574,
| "geo": {"lat":35.93, "long":-85.46}
| }
| }
|}
""".stripMargin).toDS()
//定义schema
val schema1 = new StructType()
.add("dc_id", StringType)
.add("source",
MapType(StringType,
new StructType()
.add("description", StringType)
.add("ip", StringType)
.add("id", LongType)
.add("temp", LongType)
.add("c02_level", LongType)
.add("geo",
new StructType()
.add("lat", DoubleType)
.add("long", DoubleType)
)
)
)
val df1 = ssc.read.schema(schema1).json(dataDS1.rdd)
df1.printSchema()
df1.show(false)
println("=======================================")
val explodeDF = df1.select($"dc_id", explode($"source"))
explodeDF.printSchema()
explodeDF.show(10, false)
println("=======================================")
val notifydevicesDS = explodeDF.select($"dc_id" as "dcId",
$"key" as "deviceType",
'value.getItem("ip") as 'ip,
'value.getItem("id") as 'deviceId,
'value.getItem("c02_level") as 'c02_level,
'value.getItem("temp") as 'temp,
'value.getItem("geo").getItem("lat") as 'lat,
'value.getItem("geo").getItem("long") as 'lon)
.as[DeviceAlert]
notifydevicesDS.printSchema()
notifydevicesDS.show(20, false) ssc.stop()
二、数据准备
{
"devices": {
"thermostats": {
"peyiJNo0IldT2YlIVtYaGQ": {
"device_id": "peyiJNo0IldT2YlIVtYaGQ",
"locale": "en-US",
"software_version": "4.0",
"structure_id": "VqFabWH21nwVyd4RWgJgNb292wa7hG_dUwo2i2SG7j3-BOLY0BA4sw",
"where_name": "Hallway Upstairs",
"last_connection": "2016-10-31T23:59:59.000Z",
"is_online": true,
"can_cool": true,
"can_heat": true,
"is_using_emergency_heat": true,
"has_fan": true,
"fan_timer_active": true,
"fan_timer_timeout": "2016-10-31T23:59:59.000Z",
"temperature_scale": "F",
"target_temperature_f": 72,
"target_temperature_high_f": 80,
"target_temperature_low_f": 65,
"eco_temperature_high_f": 80,
"eco_temperature_low_f": 65,
"away_temperature_high_f": 80,
"away_temperature_low_f": 65,
"hvac_mode": "heat",
"humidity": 40,
"hvac_state": "heating",
"is_locked": true,
"locked_temp_min_f": 65,
"locked_temp_max_f": 80
}
},
"smoke_co_alarms": {
"RTMTKxsQTCxzVcsySOHPxKoF4OyCifrs": {
"device_id": "RTMTKxsQTCxzVcsySOHPxKoF4OyCifrs",
"locale": "en-US",
"software_version": "1.01",
"structure_id": "VqFabWH21nwVyd4RWgJgNb292wa7hG_dUwo2i2SG7j3-BOLY0BA4sw",
"where_name": "Jane's Room",
"last_connection": "2016-10-31T23:59:59.000Z",
"is_online": true,
"battery_health": "ok",
"co_alarm_state": "ok",
"smoke_alarm_state": "ok",
"is_manual_test_active": true,
"last_manual_test_time": "2016-10-31T23:59:59.000Z",
"ui_color_state": "gray"
}
},
"cameras": {
"awJo6rH0IldT2YlIVtYaGQ": {
"device_id": "awJo6rH",
"software_version": "4.0",
"structure_id": "VqFabWH21nwVyd4RWgJgNb292wa7hG_dUwo2i2SG7j3-BOLY0BA4sw",
"where_name": "Foyer",
"is_online": true,
"is_streaming": true,
"is_audio_input_enabled": true,
"last_is_online_change": "2016-12-29T18:42:00.000Z",
"is_video_history_enabled": true,
"web_url": "https://home.nest.com/cameras/device_id?auth=access_token",
"app_url": "nestmobile://cameras/device_id?auth=access_token",
"is_public_share_enabled": true,
"activity_zones": { "name": "Walkway", "id": 244083 },
"last_event": "2016-10-31T23:59:59.000Z"
}
}
}
}
代码示例:
package spark.project_1 import org.apache.spark.sql.functions._
import org.apache.spark.sql.types._
import org.apache.spark.{SparkConf, sql} /**
* Author Mr. Guo
* Create 2018/10/19 - 14:36
*/
case class DeviceAlert(dcId: String, deviceType: String, ip: String, deviceId: Long, temp: Long, c02_level: Long,
lat: Double, lon: Double) object dispose_json {
def main(args: Array[String]): Unit = { val conf = new SparkConf()
val ssc = new sql.SparkSession
.Builder()
.config(conf)
.master("local[2]")
.appName("dispose_json")
.getOrCreate() ssc.sparkContext.setLogLevel("error")
println("--------------------------------------------------------------------")
//导入隐式转换
import ssc.implicits._
val dataDS2 = Seq(
"""
|{
| "devices": {
| "thermostats": {
| "peyiJNo0IldT2YlIVtYaGQ": {
| "device_id": "peyiJNo0IldT2YlIVtYaGQ",
| "locale": "en-US",
| "software_version": "4.0",
| "structure_id": "VqFabWH21nwVyd4RWgJgNb292wa7hG_dUwo2i2SG7j3-BOLY0BA4sw",
| "where_name": "Hallway Upstairs",
| "last_connection": "2016-10-31T23:59:59.000Z",
| "is_online": true,
| "can_cool": true,
| "can_heat": true,
| "is_using_emergency_heat": true,
| "has_fan": true,
| "fan_timer_active": true,
| "fan_timer_timeout": "2016-10-31T23:59:59.000Z",
| "temperature_scale": "F",
| "target_temperature_f": 72,
| "target_temperature_high_f": 80,
| "target_temperature_low_f": 65,
| "eco_temperature_high_f": 80,
| "eco_temperature_low_f": 65,
| "away_temperature_high_f": 80,
| "away_temperature_low_f": 65,
| "hvac_mode": "heat",
| "humidity": 40,
| "hvac_state": "heating",
| "is_locked": true,
| "locked_temp_min_f": 65,
| "locked_temp_max_f": 80
| }
| },
| "smoke_co_alarms": {
| "RTMTKxsQTCxzVcsySOHPxKoF4OyCifrs": {
| "device_id": "RTMTKxsQTCxzVcsySOHPxKoF4OyCifrs",
| "locale": "en-US",
| "software_version": "1.01",
| "structure_id": "VqFabWH21nwVyd4RWgJgNb292wa7hG_dUwo2i2SG7j3-BOLY0BA4sw",
| "where_name": "Jane's Room",
| "last_connection": "2016-10-31T23:59:59.000Z",
| "is_online": true,
| "battery_health": "ok",
| "co_alarm_state": "ok",
| "smoke_alarm_state": "ok",
| "is_manual_test_active": true,
| "last_manual_test_time": "2016-10-31T23:59:59.000Z",
| "ui_color_state": "gray"
| }
| },
| "cameras": {
| "awJo6rH0IldT2YlIVtYaGQ": {
| "device_id": "awJo6rH",
| "software_version": "4.0",
| "structure_id": "VqFabWH21nwVyd4RWgJgNb292wa7hG_dUwo2i2SG7j3-BOLY0BA4sw",
| "where_name": "Foyer",
| "is_online": true,
| "is_streaming": true,
| "is_audio_input_enabled": true,
| "last_is_online_change": "2016-12-29T18:42:00.000Z",
| "is_video_history_enabled": true,
| "web_url": "https://home.nest.com/cameras/device_id?auth=access_token",
| "app_url": "nestmobile://cameras/device_id?auth=access_token",
| "is_public_share_enabled": true,
| "activity_zones": { "name": "Walkway", "id": 244083 },
| "last_event": "2016-10-31T23:59:59.000Z"
| }
| }
| }
| }
""".stripMargin).toDS() val schmea2 = new StructType()
.add("devices",
new StructType()
.add("thermostats", MapType(StringType,
new StructType()
.add("device_id", StringType)
.add("locale", StringType)
.add("software_version", StringType)
.add("structure_id", StringType)
.add("where_name", StringType)
.add("last_connection", StringType)
.add("is_online", BooleanType)
.add("can_cool", BooleanType)
.add("can_heat", BooleanType)
.add("is_using_emergency_heat", BooleanType)
.add("has_fan", BooleanType)
.add("fan_timer_active", BooleanType)
.add("fan_timer_timeout", StringType)
.add("temperature_scale", StringType)
.add("target_temperature_f", DoubleType)
.add("target_temperature_high_f", DoubleType)
.add("target_temperature_low_f", DoubleType)
.add("eco_temperature_high_f", DoubleType)
.add("eco_temperature_low_f", DoubleType)
.add("away_temperature_high_f", DoubleType)
.add("away_temperature_low_f", DoubleType)
.add("hvac_mode", StringType)
.add("humidity", DoubleType)
.add("hvac_state", StringType)
.add("is_locked", BooleanType)
.add("locked_temp_min_f", DoubleType)
.add("locked_temp_max_f", DoubleType)))
.add("smoke_co_alarms", MapType(StringType,
new StructType()
.add("device_id", StringType)
.add("locale", StringType)
.add("software_version", StringType)
.add("structure_id", StringType)
.add("where_name", StringType)
.add("last_connection", StringType)
.add("is_online", BooleanType)
.add("battery_health", StringType)
.add("co_alarm_state", StringType)
.add("smoke_alarm_state", StringType)
.add("is_manual_test_active", BooleanType)
.add("last_manual_test_time", StringType)
.add("ui_color_state", StringType)))
.add("cameras", MapType(StringType,
new StructType()
.add("device_id", StringType)
.add("software_version", StringType)
.add("structure_id", StringType)
.add("where_name", StringType)
.add("is_online", BooleanType)
.add("is_streaming", BooleanType)
.add("is_audio_input_enabled", BooleanType)
.add("last_is_online_change", StringType)
.add("is_video_history_enabled", BooleanType)
.add("web_url", StringType)
.add("app_url", StringType)
.add("is_public_share_enabled", BooleanType)
.add("activity_zones",
new StructType()
.add("name", StringType)
.add("id", LongType))
.add("last_event", StringType)))) val df2 = ssc.read.schema(schmea2).json(dataDS2.rdd)
val stringJsonDF = df2.select(to_json(struct($"*"))).toDF("nestDevice")
val mapColumnsDF = df2.select($"devices".getItem("smoke_co_alarms").alias("smoke_alarms"),
$"devices".getItem("cameras").alias("cameras"),
$"devices".getItem("thermostats").alias("thermostats")) val explodeThermostatsDF = mapColumnsDF.select(explode($"thermostats"))
val explodeCamerasDF = mapColumnsDF.select(explode($"cameras"))
val explodeSmokedAlarmsDF = df2.select(explode($"devices.smoke_co_alarms"))
explodeSmokedAlarmsDF.printSchema() val thermostateDF =
explodeThermostatsDF.select($"value".getItem("device_id").alias("device_id"),
$"value".getItem("locale").alias("locale"),
$"value".getItem("where_name").alias("location"),
$"value".getItem("last_connection").alias("last_connection"),
$"value".getItem("humidity").alias("humidity"),
$"value".getItem("target_temperature_f").alias("target_temperature_f"),
$"value".getItem("hvac_mode").alias("mode"),
$"value".getItem("software_version").alias("version")) val cameraDF =
explodeCamerasDF.select($"value".getItem("device_id").alias("device_id"),
$"value".getItem("where_name").alias("location"),
$"value".getItem("software_version").alias("version"),
$"value".getItem("activity_zones").getItem("name").alias("name"),
$"value".getItem("activity_zones").getItem("id").alias("id")) val smokedAlarmsDF =
explodeSmokedAlarmsDF.select($"value".getItem("device_id").alias("device_id"),
$"value".getItem("where_name").alias("location"),
$"value".getItem("software_version").alias("version"),
$"value".getItem("last_connection").alias("last_connected"),
$"value".getItem("battery_health").alias("battery_health")) cameraDF.show val joineDFs = thermostateDF.join(cameraDF,"version")
joineDFs.show(10,false) ssc.stop()
SparkSql处理嵌套json数据的更多相关文章
- ASP.NET提取多层嵌套json数据的方法
本文实例讲述了ASP.NET利用第三方类库Newtonsoft.Json提取多层嵌套json数据的方法,具体例子如下. 假设需要提取的json字符串如下: {"name":&quo ...
- 详解ASP.NET提取多层嵌套json数据的方法
本篇文章主要介绍了ASP.NET提取多层嵌套json数据的方法,利用第三方类库Newtonsoft.Json提取多层嵌套json数据的方法,有兴趣的可以了解一下. 本文实例讲述了ASP.NET利用第三 ...
- 利用FastJson,拼接复杂嵌套json数据&&直接从json字符串中(不依赖实体类)解析出键值对
1.拼接复杂嵌套json FastJson工具包中有两主要的类: JSONObject和JSONArray ,前者表示json对象,后者表示json数组.他们两者都能添加Object类型的对象,但是J ...
- 提取多层嵌套Json数据
在.net 2.0中提取这样的json {"name":"lily","age":23,"addr":{"ci ...
- 转换嵌套JSON数据为TABLE
先准备一些数据: 创建一张临时表来存储: DECLARE @json_table AS TABLE ( [type] NVARCHAR(MAX), [desc] NVARCHAR(MAX) ) Sou ...
- Gson 解析多层嵌套JSON数据
http://stackoverflow.com/questions/14139437/java-type-generic-as-argument-for-gson
- 【golang】json数据解析 - 嵌套json解析
@ 目录 1. 通过结构体映射解析 2. 嵌套json解析-map 1. 通过结构体映射解析 原数据结构 解析 // 结构体 type contractJson struct { Data []tra ...
- extJS4.2.0 Json数据解析,嵌套及非嵌套(二)
Ext.data.reader.Reader Readers通常用于翻译数据,使其被加载为 Model 实例或Store, 该数据一般是一个AJAX请求的响应数据. 一般情况下不需要直接创建一个Rea ...
- 用JAVA进行Json数据解析(对象数组的相互嵌套)
这段时间我们在做一个英语翻译软件的小小小APP,涉及到了对Json数据的解析,所以特地来总结一下! 假设我们要对如下数据进行解析,其实在平时,返回的Json数据是很乱的,很难分清数据的关系,这是经过相 ...
随机推荐
- python-获取当前工作路径
1.sys.argv[0] import sys print sys.argv[0]#获得的是当前执行脚本的位置(若在命令行执行的该命令,则为空) 运行结果(在python脚本中执行的结果): F:/ ...
- nessus无法访问https://localhost:8834/#/,解决方法。
之前没弄明白为啥经常访问不了https://localhost:8834/#/,后面才发现是服务关闭了. 首先netstat -an 查看8834是否开启, 直接运行一下nessus目录下的nessu ...
- winfrom弹出窗口用timer控件控制倒计时20秒后关闭
功能描述: 因为在程序退出时需要确认是否是误操作,所以加了密码输入的子窗体,子窗体在20秒内会自动关闭 代码如下: private int count; private void Form2_Load ...
- Spring Boot 入门day01
一.Spring Boot入门 1.Spring Boot简介 Spring Boot是由Pivotal团队提供的全新框架,其设计目的是用来简化新Spring应用的初始搭建以及开发过程.该框架使用了特 ...
- NOIP2018提高组初赛知识点
(传说,在神秘的初赛中,选手们经常互相爆零以示友好……) 历年真题:ti.luogu.com.cn 以下标题中打*的是我认为的重点内容 一.关于计算机 (一)计算机组成 硬件组成: 1. 控制器(C ...
- word文档发布至博客wordpress网站系统
今天ytkah接到一个需求:将word文档发布到wordpress网站上,因为客户那边习惯用word来编辑文章,想直接将内容导入到wp网站中,其实 Word 已经提供了这样的功能,并且能够保留 Wor ...
- Redis入门到高可用(十八)—— 主从复制
一.单机有什么问题 1.机器故障 2.容量瓶颈 3.QPS瓶颈 二.主从复制 1.数据副本(高可用.分布式基础) 2.拓展读性能(读写分离) 简单总结: 三.主从复制配置 四.主从复制配置-实验演示 ...
- 十一、无事勿扰,有事通知(2)——KVO
概述 Key-Value-Observe,简称KVO,和上节介绍的Notification师出同门,主要目的都是为了实现观察者模式. 虽说是同门师兄弟,但是各自精通的技艺却是各不相同的. 不像Noti ...
- fastjson java类、字符串、jsonObject之前的转换
json对象转成json字符串 JSONObject json = new JSONObject(); json.put("page",1); json.put("pag ...
- Django进阶之中间件
中间件简介 django 中的中间件(middleware),在django中,中间件其实就是一个类,在请求到来和结束后,django会根据自己的规则在合适的时机执行中间件中相应的方法. 在djang ...