spark 写入数据到Geomesa(Hbase)

package com.grady.geomesa

import org.apache.spark.sql.jts.PointUDT

import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType}

import org.apache.spark.sql.{Row, SparkSession}

import org.apache.spark.{SparkConf, sql}

import org.geotools.data.DataStoreFinder

import org.geotools.geometry.jts.JTSFactoryFinder

import org.locationtech.geomesa.utils.geotools.SchemaBuilder

import org.locationtech.jts.geom.Coordinate

import org.locationtech.geomesa.spark.jts._

import scala.collection.JavaConversions._

object SparkWriteGeomesa {

  val ToGeomesaCatalog = "gradytest"

  val ToGeomesaCatalogFeature = "student"

  def main(args: Array[String]): Unit = {

    val conf = new SparkConf().setAppName("SparkWriteGeomesa")

    val ss = SparkSession.builder().config(conf).getOrCreate().withJTS

    // 组装要写入的数据

    val inDataFrame = gainData(ss)

    inDataFrame.show()

    // 写入数据

    saveData(inDataFrame, ToGeomesaCatalog, ToGeomesaCatalogFeature)

    ss.stop()

  }

  def gainData(ss: SparkSession): sql.DataFrame = {

    val inDataRDD = ss.sparkContext.makeRDD(Array("1,jack,15,11.10,12.10", "2,Lily,16,12.10,13.10", "3,mike,16,14.10,15.10"))

    val rowRDD = inDataRDD.map(_.split(','))

      .map(row => {

        val id = row(0)

        val name = row(1)

        val age = row(2)

        val lon = row(3).toDouble

        val lat = row(4).toDouble

        val factory = JTSFactoryFinder.getGeometryFactory()

        val coordinate = new Coordinate(lon, lat)

        val geom = factory.createPoint(coordinate)

        Row(geom, id, name, age)

      })

    val structType = StructType(

      Seq(

        StructField("geom", PointUDT, nullable = true),

        StructField("studentId", StringType, nullable = true),

        StructField("name", StringType, nullable = true),

        StructField("age", StringType, nullable = true)

      )

    )

    ss.createDataFrame(rowRDD, structType)

  }

  def saveData(inDataFrame: sql.DataFrame, toGeomesaCatalog: String, toGeomesaCatalogFeature: String) = {

    val sft = SchemaBuilder.builder()

      .addPoint("geom", true)

      // 不可用id， id为保留字段

      .addString("studentId")

      .addString("name")

      .addString("age")

      .build(toGeomesaCatalogFeature)

    //geomesa.mix.geometries指定同时支持point 和non-point混合几何特性

    sft.getUserData.put("geomesa.mixed.geometries",Boolean.box(true))

    val params = Map(

      "hbase.zookeepers" -> "10.82.232.64:2181",

      "hbase.catalog"  -> toGeomesaCatalog)

    //创建feature

    DataStoreFinder.getDataStore(params).createSchema(sft)

    inDataFrame.write.format("geomesa")

      .options(params)

      .option("geomesa.feature", toGeomesaCatalogFeature)

      .save()

  }

}

hbase 查看表是否生成：

hbase(main):001:0> list

TABLE

gradytest

gradytest_student_id_v4

gradytest_student_z2_geom_v5

## geomesa 数据会序列化，这里是序列化后的数据

hbase(main):003:0> scan 'gradytest_student_z2_geom_v5'

ROW                               COLUMN+CELL

 \x000\x85\xD7\x8C\x9B\xE0\xE7\xF column=d:, timestamp=2022-02-09T19:21:38.287, value=\x03\x00\x04\x02\x00\x0E\x00 \x00"\x00&\x00(\

 F000017ed-e37a-4e60-b90f-93fc81e x00\x00\x00\x00\x01\x01@&333333@(333333\x821jac\xEB1\xB5

 0ab0e

 \x000\x922Q\xB1\xD4\x1E\xFF00001 column=d:, timestamp=2022-02-09T19:19:35.603, value=\x03\x00\x04\x02\x00\x0E\x00 \x00"\x00&\x00(\

 7ed-e35c-4d77-99e7-c6918a06c008  x00\x00\x00\x00\x01\x01@(333333@*333333\x822Lil\xF91\xB6

 \x000\x99\x8A\xA5\xB6\xEBQ\x0200 column=d:, timestamp=2022-02-09T19:21:38.332, value=\x03\x00\x04\x02\x00\x0E\x00 \x00"\x00&\x00(\

 0017ed-e37a-4ebd-b3a5-a9c7399a63 x00\x00\x00\x00\x01\x01@,333333@.333333\x823mik\xE51\xB6

 5b

 \x000\x99\x8A\xA5\xB6\xEBQ\x0200 column=d:, timestamp=2022-02-09T20:02:28.707, value=\x03\x00\x04\x02\x00\x0E\x00 \x00"\x00&\x00(\

 0017ed-e5d1-4257-a75d-b0e2372954 x00\x00\x00\x00\x01\x01@,333333@.333333\x823mik\xE51\xB6

 2e

 \x020\x85\xD7\x8C\x9B\xE0\xE7\xF column=d:, timestamp=2022-02-09T19:19:35.334, value=\x03\x00\x04\x02\x00\x0E\x00 \x00"\x00&\x00(\

 F000017ed-e35c-4d77-a841-b3bcf6f x00\x00\x00\x00\x01\x01@&333333@(333333\x821jac\xEB1\xB5

 aa8ac

 \x020\x922Q\xB1\xD4\x1E\xFF00001 column=d:, timestamp=2022-02-09T19:21:38.285, value=\x03\x00\x04\x02\x00\x0E\x00 \x00"\x00&\x00(\

 7ed-e37a-4e60-9d7f-66988be48234  x00\x00\x00\x00\x01\x01@(333333@*333333\x822Lil\xF91\xB6

 \x020\x99\x8A\xA5\xB6\xEBQ\x0200 column=d:, timestamp=2022-02-09T19:19:35.335, value=\x03\x00\x04\x02\x00\x0E\x00 \x00"\x00&\x00(\

 0017ed-e35c-4e9a-8600-97ed8d92c4 x00\x00\x00\x00\x01\x01@,333333@.333333\x823mik\xE51\xB6

 8b

 \x030\x85\xD7\x8C\x9B\xE0\xE7\xF column=d:, timestamp=2022-02-09T20:02:28.622, value=\x03\x00\x04\x02\x00\x0E\x00 \x00"\x00&\x00(\

 F000017ed-e5d1-41f8-ae71-84db58b x00\x00\x00\x00\x01\x01@&333333@(333333\x821jac\xEB1\xB5

 9478f

 \x030\x922Q\xB1\xD4\x1E\xFF00001 column=d:, timestamp=2022-02-09T20:02:28.662, value=\x03\x00\x04\x02\x00\x0E\x00 \x00"\x00&\x00(\

 7ed-e5d1-41f8-a308-efcee8b70bf9  x00\x00\x00\x00\x01\x01@(333333@*333333\x822Lil\xF91\xB6

9 row(s)

pom.xml

<?xml version="1.0" encoding="UTF-8"?>

<project xmlns="http://maven.apache.org/POM/4.0.0"

         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"

         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">

    <parent>

        <artifactId>spark-practise</artifactId>

        <groupId>org.example</groupId>

        <version>1.0-SNAPSHOT</version>

    </parent>

    <modelVersion>4.0.0</modelVersion>

    <artifactId>geomesa</artifactId>

    <properties>

        <maven.compiler.source>8</maven.compiler.source>

        <maven.compiler.target>8</maven.compiler.target>

        <geomesa.version>3.1.0</geomesa.version>

    </properties>

    <dependencies>

        <dependency>

            <groupId>org.apache.hadoop</groupId>

            <artifactId>hadoop-hdfs</artifactId>

            <version>${hadoop.version}</version>

        </dependency>

        <dependency>

                <groupId>org.locationtech.geomesa</groupId>

                <artifactId>geomesa-hbase-spark-runtime-hbase2_2.12</artifactId>

                <version>3.3.0</version>

            <exclusions>

                <exclusion>

                    <groupId>org.apache.hadoop</groupId>

                    <artifactId>hadoop-hdfs</artifactId>

                </exclusion>

            </exclusions>

        </dependency>

        <dependency>

            <groupId>org.locationtech.geomesa</groupId>

            <artifactId>geomesa-spark-core_2.12</artifactId>

            <version>3.3.0</version>

        </dependency>

        <dependency>

            <groupId>org.apache.spark</groupId>

            <artifactId>spark-core_${scala.binary.version}</artifactId>

            <version>${spark.version}</version>

            <scope>provided</scope>

        </dependency>

        <dependency>

            <groupId>org.apache.spark</groupId>

            <artifactId>spark-sql_${scala.binary.version}</artifactId>

            <version>${spark.version}</version>

            <scope>provided</scope>

        </dependency>

        <dependency>

            <groupId>org.apache.spark</groupId>

            <artifactId>spark-yarn_${scala.binary.version}</artifactId>

            <version>${spark.version}</version>

            <scope>provided</scope>

        </dependency>

    </dependencies>

    <build>

        <resources>

            <resource>

                <directory>src/main/resources</directory>

                <filtering>true</filtering>

            </resource>

        </resources>

        <plugins>

            <plugin>

                <groupId>net.alchim31.maven</groupId>

                <artifactId>scala-maven-plugin</artifactId>

                <version>3.2.1</version>

                <configuration>

                    <source>1.8</source>

                    <target>1.8</target>

                    <scalaVersion>${scala.version}</scalaVersion>

                </configuration>

                <executions>

                    <execution>

                        <id>scala-compile-first</id>

                        <phase>process-resources</phase>

                        <goals>

                            <goal>add-source</goal>

                            <goal>compile</goal>

                        </goals>

                    </execution>

                    <execution>

                        <id>scala-test-compile</id>

                        <phase>process-test-resources</phase>

                        <goals>

                            <goal>testCompile</goal>

                        </goals>

                    </execution>

                </executions>

            </plugin>

            <plugin>

                <groupId>org.apache.maven.plugins</groupId>

                <artifactId>maven-shade-plugin</artifactId>

                <version>3.2.1</version>

                <configuration>

                    <artifactSet>

                        <excludes>

                            <exclude>org.slf4j:*</exclude>

                        </excludes>

                    </artifactSet>

                </configuration>

                <executions>

                    <execution>

                        <phase>package</phase>

                        <goals>

                            <goal>shade</goal>

                        </goals>

                        <configuration>

                            <createDependencyReducedPom>false</createDependencyReducedPom>

                            <filters>

                                <filter>

                                    <artifact>*:*</artifact>

                                    <excludes>

                                        <exclude>META-INF/*.SF</exclude>

                                        <exclude>META-INF/*.DSA</exclude>

                                        <exclude>META-INF/*.RSA</exclude>

                                    </excludes>

                                </filter>

                            </filters>

                            <transformers>

                                <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" />

                            </transformers>

                        </configuration>

                    </execution>

                </executions>

            </plugin>

        </plugins>

    </build>

</project>

PS: 一定要使用maven-shade-plugin 插件或其他插件将相关依赖（比如：geomesa-hbase-spark-runtime-hbase2_2.12）打入，否则会报错

spark 写入数据到Geomesa(Hbase)的更多相关文章

MapReduce和Spark写入Hbase多表总结
作者:Syn良子出处:http://www.cnblogs.com/cssdongl 转载请注明出处大家都知道用mapreduce或者spark写入已知的hbase中的表时,直接在mapreduc ...
Spark Streaming实时写入数据到HBase
一.概述在实时应用之中,难免会遇到往NoSql数据如HBase中写入数据的情景.题主在工作中遇到如下情景,需要实时查询某个设备ID对应的账号ID数量.踩过的坑也挺多,举其中之一,如一开始选择使用NE ...
spark 读取Geomesa(Hbase)数据
package com.grady.geomesa import org.apache.hadoop.conf.Configuration import org.apache.spark.SparkC ...
Spark Streaming接收Kafka数据存储到Hbase
Spark Streaming接收Kafka数据存储到Hbase fly spark hbase kafka 主要参考了这篇文章https://yq.aliyun.com/articles/60712 ...
spark读取hdfs上的文件和写入数据到hdfs上面
def main(args: Array[String]): Unit = { val conf = new SparkConf() conf.set("spark.master" ...
HBase BulkLoad批量写入数据实战
1.概述在进行数据传输中,批量加载数据到HBase集群有多种方式,比如通过HBase API进行批量写入数据.使用Sqoop工具批量导数到HBase集群.使用MapReduce批量导入等.这些方式, ...
spark（三）从hbase取数据
前言通过spark获取hbase数据的过程中,遇到了InputFormat.文章主要围绕InputFormat介绍.会牵扯到spark,mapreduce,hbase相关内容 InputFormat ...
聊一聊 HBase 是如何写入数据的？
hi,大家好,我是大D.今天继续了解下 HBase 是如何写入数据的,然后再讲解一下一个比较经典的面试题. Region Server 寻址 HBase Client 访问 ZooKeeper: 获取 ...
Spark：DataFrame批量导入Hbase的两种方式(HFile、Hive)
Spark处理后的结果数据resultDataFrame可以有多种存储介质,比较常见是存储为文件.关系型数据库,非关系行数据库. 各种方式有各自的特点,对于海量数据而言,如果想要达到实时查询的目的,使 ...

随机推荐

关于我用python表白成功这件事【表白成功】
520,并非情人所属, 我们可以表白万物, 不管什么时候, 这都是一个特别的日子, 今天,我要表白所有, 心里有我的人! 在这个充满幸福的日子里, 我要把最美好的祝福, 送给心里有我的每一个人: 祝愿 ...
Future源码一观-JUC系列
背景介绍在程序中,主线程启动一个子线程进行异步计算,主线程是不阻塞继续执行的,这点看起来是非常自然的,都已经选择启动子线程去异步执行了,主线程如果是阻塞的话,那还不如主线程自己去执行不就好了.那会不 ...
protobuf 的交叉编译使用（C++）
前言为了提高通信效率,可以采用 protobuf 替代 XML 和 Json 数据交互格式,protobuf 相对来说数据量小,在进程间通信或者设备之间通信能够提高通信速率.下面介绍 protobu ...
构建 API 的7个建议【翻译】
迄今为止,越来越多的企业依靠API来为客户提供服务,以确保竞争的优势和业务可见性.出现这个情况的原因是微服务和无服务器架构正变得越来越普遍,API作为其中的关键节点,继承和承载了更多业务. 在这个前提 ...
Note -「模拟退火」
随机化算法属于省选芝士体系 0x01 前置芝士你只需要会 rand 就可以啦! 当然如果你想理解的更透彻也可以先看看爬山算法 0x02 关于退火退火是一种金属热处理工艺,指的是将金属缓慢加热到一 ...
Mysql 系列 | 日志模块
了解了 SQL 执行的流程,知道每一条语句都经过连接器.查询存储.分析器.优化器.执行器最后到存储引擎的过程.查询语句是如此,更新语句也不例外. 不同的是,更新语句会修改表数据,这里就涉及到两个重要的 ...
logrotate command in Linux
背景在生产过程中,由于磁盘空间.保留周期等因素,会对系统.应用等日志提出要求,要求系统日志定期进行轮转.压缩和删除,从而减少开销,而系统自带的logrotate 则是一个简单又实用的小工具,下面着 ...
BufferedWriter字符缓冲输出流和BufferedReader字符缓冲输入流
package com.yang.Test.BufferedStudy; import java.io.BufferedWriter; import java.io.FileWriter; impor ...
md文档使用小技巧
简介在日常写readme文档中,可能会遇到一些小问题,此处记录一下md文档编写过程中的一些小技巧. 插入图片在md文档中插入图片,目前有三种方式,本地导入.网络导入.base64导入. 本地导入 ...
Python3.7将普通图片(png)转换为SVG图片格式并且让你的网站Logo(图标)从此”动”起来
原文转载自「刘悦的技术博客」https://v3u.cn/a_id_148 在之前的几篇文章中,介绍了业界中比较火爆的图片技术SVG(Scalable Vector Graphics),比如Iconf ...

spark 写入数据到Geomesa(Hbase)

pom.xml

PS: 一定要使用maven-shade-plugin 插件 或其他插件将相关依赖（比如：geomesa-hbase-spark-runtime-hbase2_2.12）打入，否则会报错

spark 写入数据到Geomesa(Hbase)的更多相关文章

随机推荐

热门专题

PS: 一定要使用maven-shade-plugin 插件或其他插件将相关依赖（比如：geomesa-hbase-spark-runtime-hbase2_2.12）打入，否则会报错