package es

import java.io.InputStream
import java.text.SimpleDateFormat
import java.util.{Calendar, Date, Properties} import org.elasticsearch.spark.rdd.EsSpark
import org.apache.spark.SparkConf
import org.apache.spark.sql.SparkSession
import org.slf4j.LoggerFactory object ShoppingcartMarketToEs { private val log = LoggerFactory.getLogger(ShoppingcartMarketToEs.getClass) val prop = new Properties()
val is: InputStream = this.getClass().getResourceAsStream("/elastic.properties")
prop.load(is)
val ENVIRONMENT_SETING = "es_host_sit"
val host = prop.getProperty(ENVIRONMENT_SETING) def main(args: Array[String]): Unit = {
val sparkConf = new SparkConf().setAppName("ReadSnCategoryToEs")
// sparkConf.set("spark.sql.hive.metastorePartitionPruning", "false")
sparkConf.set("es.nodes", host)
sparkConf.set("es.nodes.wan.only", "true")
// sparkConf.set("es.port", "9200")
// sparkConf.set("es.index.auto.create", "true")
// sparkConf.set("es.batch.size.entries", "5000")
// sparkConf.set("es.write.operation", "upsert") val session = SparkSession.builder().config(sparkConf).enableHiveSupport().getOrCreate()
session.sql("use sospdm")
session.udf.register("get_utc_time", () => {
val cal = Calendar.getInstance()
cal.setTime(new Date())
val zoneOffset = cal.get(Calendar.ZONE_OFFSET)
val dstOffset = cal.get(Calendar.DST_OFFSET)
cal.add(Calendar.MILLISECOND, -(zoneOffset + dstOffset))
val utcTime = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS").format(cal.getTime)
utcTime.replace(" ", "T") + "+0000"
})
val querySql = "select pid,shop_id,gds_cd,gds_nm,gds_add_num,gds_add_time,gds_price,expect_tran_price,l4_gds_grp_cd,l4_gds_grp_nm,category_cd,category_nm,brand_cd,brand_nm,'null' as create_user,'null' as update_user,create_time,update_time,get_utc_time() as `@timestamp` from sospdm.tdm_wbank_opts_t_goods_info_newest_ed"
val resultDF = session.sql(querySql)
if (!ENVIRONMENT_SETING.contains("prd")) {
resultDF.show(10)
} val tuple = resultDF.rdd.map(row => {
val pid: String = row.getAs[String]("pid").toString()
val shop_id: String = row.getAs[String]("shop_id").toString()
val gds_cd: String = row.getAs[String]("gds_cd").toString()
val gds_nm: String = row.getAs[String]("gds_nm").toString()
val gds_add_num: String = row.getAs[String]("gds_add_num").toString()
val gds_add_time: String = row.getAs[String]("gds_add_time").toString()
val gds_price: String = row.getAs[String]("gds_price").toString()
val expect_tran_price: String = row.getAs[String]("expect_tran_price").toString()
val l4_gds_grp_cd: String = row.getAs[String]("l4_gds_grp_cd").toString()
val l4_gds_grp_nm: String = row.getAs[String]("l4_gds_grp_nm").toString()
val category_cd: String = row.getAs[String]("category_cd").toString()
val category_nm: String = row.getAs[String]("category_nm").toString()
val brand_cd: String = row.getAs[String]("brand_cd").toString()
val brand_nm: String = row.getAs[String]("brand_nm").toString()
val create_user: String = row.getAs[String]("create_user").toString()
val update_user: String = row.getAs[String]("update_user").toString()
val create_time: String = row.getAs[String]("create_time").toString()
val update_time: String = row.getAs[String]("update_time").toString()
val `@timestamp`: String = row.getAs[String]("@timestamp").toString()
var map = Map[String, Object]()
map += ("pid" -> pid)
map += ("shop_id" -> shop_id)
map += ("gds_cd" -> gds_cd)
map += ("gds_nm" -> gds_nm)
map += ("gds_add_num" -> gds_add_num)
map += ("gds_add_time" -> gds_add_time)
map += ("gds_price" -> gds_price)
map += ("expect_tran_price" -> expect_tran_price)
map += ("l4_gds_grp_cd" -> l4_gds_grp_cd)
map += ("l4_gds_grp_nm" -> l4_gds_grp_nm)
map += ("category_cd" -> category_cd)
map += ("category_nm" -> category_nm)
map += ("brand_cd" -> brand_cd)
map += ("brand_nm" -> brand_nm)
map += ("create_user" -> create_user)
map += ("update_user" -> update_user)
map += ("create_time" -> create_time)
map += ("@timestamp" -> `@timestamp`) (shop_id + gds_cd + gds_add_time, map)
})
EsSpark.saveToEsWithMeta(tuple, "idx_shop_goods_addcart/idx_shop_goods_addcart")
}
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion> <groupId>spark-hive</groupId>
<artifactId>spark-hive</artifactId>
<version>1.0-SNAPSHOT</version> <properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
<scala.version>2.11.8</scala.version>
<spark.version>2.1.0.9</spark.version>
<spark.artifactId.version>2.11</spark.artifactId.version>
</properties>
<dependencies>
<dependency>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
<version>1.1.1</version>
<type>jar</type>
</dependency> <dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.1</version>
</dependency> <dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<version>1.2.17</version>
</dependency> <dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.6.2</version>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.21</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<version>2.1.0</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming_2.11</artifactId>
<version>2.1.0</version>
</dependency> <dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming-kafka-0-8_2.11</artifactId>
<version>2.1.0</version>
</dependency>
<dependency>
<groupId>com.google.code.gson</groupId>
<artifactId>gson</artifactId>
<version>2.8.2</version>
</dependency> <dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId>
<version>2.1.0</version>
</dependency> <dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.29</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-hive_${spark.artifactId.version}</artifactId>
<version>${spark.version}</version>
<scope>provided</scope>
</dependency>
<!--flink dependency-->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-java</artifactId>
<version>1.5.0</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-java_2.11</artifactId>
<version>1.5.0</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-clients_2.11</artifactId>
<version>1.5.0</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-wikiedits_2.11</artifactId>
<version>1.5.0</version>
</dependency>
<!--hbase dependency-->
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase</artifactId>
<version>0.98.8-hadoop2</version>
<type>pom</type>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>0.98.8-hadoop2</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-common</artifactId>
<version>0.98.8-hadoop2</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-server</artifactId>
<version>0.98.8-hadoop2</version>
</dependency>
<dependency>
<groupId>org.elasticsearch</groupId>
<artifactId>elasticsearch-spark-20_${spark.artifactId.version}</artifactId>
<version>6.7.1</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.elasticsearch/elasticsearch -->
</dependencies>
<build>
<plugins>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<configuration>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
</configuration>
</plugin>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>build-helper-maven-plugin</artifactId>
<version>1.8</version>
<executions>
<execution>
<id>add-source</id>
<phase>generate-sources</phase>
<goals>
<goal>add-source</goal>
</goals>
<configuration>
<sources>
<source>src/main/scala</source>
<source>src/test/scala</source>
</sources>
</configuration>
</execution>
<execution>
<id>add-test-source</id>
<phase>generate-sources</phase>
<goals>
<goal>add-test-source</goal>
</goals>
<configuration>
<sources>
<source>src/test/scala</source>
</sources>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>2.3.2</version>
<configuration>
<source>1.7</source>
<target>1.7</target>
<encoding>${project.build.sourceEncoding}</encoding>
</configuration>
</plugin>
<plugin>
<groupId>org.scala-tools</groupId>
<artifactId>maven-scala-plugin</artifactId>
<executions>
<execution>
<goals>
<goal>compile</goal>
<goal>add-source</goal>
<goal>testCompile</goal>
</goals>
</execution>
</executions>
<configuration>
<scalaVersion>2.11.8</scalaVersion>
<sourceDir>src/main/scala</sourceDir>
<jvmArgs>
<jvmArg>-Xms64m</jvmArg>
<jvmArg>-Xmx1024m</jvmArg>
</jvmArgs>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-release-plugin</artifactId>
<version>2.5.3</version>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-deploy-plugin</artifactId>
<configuration>
<skip>false</skip>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>2.4.1</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<filters>
<filter>
<artifact>*:*</artifact>
<excludes>
<exclude>META-INF/*.SF</exclude>
org.apache.hive
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*.RSA</exclude>
</excludes>
</filter>
</filters>
<minimizeJar>false</minimizeJar>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
<resources>
<resource>
<directory>src/main/resources</directory>
<filtering>true</filtering>
</resource>
<resource>
<directory>src/main/resources/${profiles.active}</directory>
</resource>
</resources> <!-- 修复 Plugin execution not covered by lifecycle configuration -->
<pluginManagement>
<plugins>
<plugin>
<groupId>org.eclipse.m2e</groupId>
<artifactId>lifecycle-mapping</artifactId>
<version>1.0.0</version>
<configuration>
<lifecycleMappingMetadata>
<pluginExecutions>
<pluginExecution>
<pluginExecutionFilter>
<groupId>org.codehaus.mojo</groupId>
<artifactId>build-helper-maven-plugin</artifactId>
<versionRange>[1.8,)</versionRange>
<goals>
<goal>add-source</goal>
<goal>add-test-source</goal>
</goals>
</pluginExecutionFilter>
<action>
<ignore></ignore>
</action>
</pluginExecution> <pluginExecution>
<pluginExecutionFilter>
<groupId>org.scala-tools</groupId>
<artifactId>maven-scala-plugin</artifactId>
<versionRange>[1.8,)</versionRange>
<goals>
<goal>compile</goal>
<goal>add-source</goal>
<goal>testCompile</goal>
</goals>
</pluginExecutionFilter>
<action>
<ignore></ignore>
</action>
</pluginExecution>
</pluginExecutions>
</lifecycleMappingMetadata>
</configuration>
</plugin>
</plugins>
</pluginManagement>
</build>
</project>

spark_to_es的更多相关文章

随机推荐

  1. Android学习第九天

    为什么需要内容提供者 a)        如何创建数据库 b)        文件权限 c)         Chmod linux修改权限 内容提供者原理 a)        内容提供者把数据进行封 ...

  2. adb调试

    adb usb调试,adb网络调试是非常实用的工具,通过电脑连接手机达到文件传输.电脑端安装app刷机等功能材料: 材料: 1.电脑端安装号对应手机的驱动程序 2.电脑端下载好adb调试工具 3.手机 ...

  3. 转:centos 7 安装音频视频解码器

    (原文:https://blog.csdn.net/zhou1519/article/details/39035233/) 1.安装额外的软件源epel和nux-dextop rpm -Uvh htt ...

  4. python中的装饰器迭代器生成器

    装饰器: 定义:本质是函数(装饰其它函数) 为其它函数添加附加功能 原则: 1 不能修改被装饰函数源代码    2 不修改被装饰函数调用方式 实现装饰器知识储备: 1 函数即‘’变量‘’ 2 高阶函数 ...

  5. ArrayList的实现及原理

    ArrayList ArrayList是最常见以及每个Java开发者最熟悉的集合类了,顾名思义,ArrayList就是一个以数组形式实现的集合,以一张表格来看一下ArrayList里面有哪些基本的元素 ...

  6. 浏览器录宏重放软件-iMacros

    iMacros https://imacros.net/ iMacros v12 Now Available The world's most popular web automation, data ...

  7. 第二章 Java程序设计环境

    安装 Java 开发工具包 JDK : 编写Java程序的程序员使用的软件 JRE : 运行Java程序的环境,包含JVM和基本类库, 但不包含编译器 SE, EE, ME Java FX : 用于图 ...

  8. ActiveMQ之topic主题模式

    开发环境我们使用的是ActiveMQ 5.11.1 Release的Windows版,官网最新版是ActiveMQ 5.12.0 Release,大家可以自行下载,下载地址.需要注意的是,开发时候,要 ...

  9. 数位dp 的简单入门

    时间紧张,就不讲那么详细了. 之前一直被深搜代码误解,以为数位dp 其实就是记忆化深搜...(虽说爆搜确实很舒服而且还好想) 但是后来发现数位dp 的标准格式其实是 预处理 + dp ...... 数 ...

  10. 【easy】27. Remove Element

    删除等于n的数,并返回剩余元素个数 Given nums = [3,2,2,3], val = 3, Your function should return length = 2, with the ...