titlesplit
/**
* Created by lkl on 2017/6/26.
*///spark-shell --driver-class-path /home/hadoop/test/mysqljdbc.jar
import java.sql.{DriverManager, ResultSet}
import org.apache.spark.SparkContext
import org.apache.spark.SparkConf
import java.util.Date
object titlesplit {
val rl= "jdbc:mysql://10.19.65.17:54321/emotion?user=emotion&password=qingxu&useUnicode=true&characterEncoding=utf8&autoReconnect=true&failOverReadOnly=false"
classOf[com.mysql.jdbc.Driver]
val conn = DriverManager.getConnection(rl)
val statement = conn.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_UPDATABLE)
def main(args: Array[String]) {
val conf = new SparkConf()
// val conf = new SparkConf().setAppName("test").setMaster("local")
val sc = new SparkContext(conf)
val sqlContext = new org.apache.spark.sql.SQLContext(sc)
val format = new java.text.SimpleDateFormat("yyyy-MM-dd")
val yearformat = new java.text.SimpleDateFormat("yyyy")
val year = yearformat.format(new java.util.Date().getTime())
val format2 = new java.text.SimpleDateFormat("yyyy/MM/dd")
val dat = format2.format(new java.util.Date().getTime() - 0 * 24 * 60 * 60 * 1000).toString
val st=sqlContext.read.json("hdfs://ns1/user/datacenter/home/datacenter/datacollect/logs/dataplatform/Crawler/Crawler_Common_WebPageNews/"+dat+"/*.gz")
// val st=sqlContext.read.json("hdfs://192.168.0.211:9000/user/datacenter/home/datacenter/datacollect/logs/dataplatform/Crawler/Crawler_Common_WebPageNews/"+dat+"/*.gz")
val j=st.toDF().registerTempTable("job")
val ed = sqlContext.sql("select `innerSessionId`,SUBSTR(`time`,1,10) AS time,`channelType`,`sourcetitle`,`title` from job")
val pp = ed.map(p => {
val v0 = p.getString(0)
val v2 = p.getString(2)
val v1 = p.get(1)
val v3 = p.getString(3)
val v4 = p.getString(4)
val v5 = p.getString(4).split("\\|")
(v0, v1, v2, v3, v4, v5)
}) pp.foreach(p => {
for (i <- 0 until p._6.size) {
val v1 = p._2.toString val v0 = p._1
val v2 = p._3
val v3 = p._4
val v4 = p._5
val v5 = p._6(i).split(" ")
if (v5.size == 4) {
val now = new Date()
val a = now.getTime.toInt
insert(v0, v1, v2, v3, v4, v5(0), v5(1), v5(2), v5(3),a)
}
}
})
conn.close()
}
def insert(value0: String, value1: String, value2: String, value3: String, value4: String, value5: String,
value6: String, value7: String, value8: String,value9:Int): Unit = {
try {
val prep = conn.prepareStatement("INSERT INTO titlesplit(innserSessionid,times,channelType,sourcetitle,title,words,characters,refer,role,Nowtime) VALUES (?,?,?,?,?,?,?,?,?,?) ")
prep.setString(1, value0)
prep.setString(2, value1)
prep.setString(3, value2)
prep.setString(4, value3)
prep.setString(5, value4)
prep.setString(6, value5)
prep.setString(7, value6)
prep.setString(8, value7)
prep.setString(9, value8)
prep.setInt(10,value9)
prep.executeUpdate
} catch {
case e: Exception => e.printStackTrace
}
finally {
}
}
}
titlesplit的更多相关文章
- titlesplit源码
) UNSIGNED NOT NULL AUTO_INCREMENT, innserSessionid ), times ), channelType ), sourcetitle ), title ...
- middle
/** * Created by lkl on 2017/7/31. *//** * Created by lkl on 2017/6/26. *///spark-shell --driver-cla ...
- result源码
CREATE TABLE `result` (`id` INT(10) UNSIGNED NOT NULL AUTO_INCREMENT,`thetime` CHAR(100) , `category ...
- middle源码
CREATE TABLE `middle` ( `id` INT(10) UNSIGNED NOT NULL AUTO_INCREMENT, `innserSessionid` VARCHAR(250 ...
随机推荐
- 潭州课堂25班:Ph201805201 并发(协程) 第十五课 (课堂笔记)
#斐波那契 def fid(n): res = [] indx = 0 a = 0 b = 1 while indx < n : res.append(b) a,b = b,a+b indx + ...
- 在Win7 64位旗舰版下,利用Vs2008编译64位的Qt 4.8.2
1.下载qt-everywhere-opensource-src-4.8.2.zip. 2.VS2008需要安装x64编译器. 3.将qt-everywhere-opensource-src-4.8. ...
- 解决wsl不能安装z.sh问题
z.sh是韦大很推崇的类似autojump的bash插件,能够很方便的寻找目录,然而wsl下不能直接使用,解决方法在其github仓库(z)的issue中找到: Reproduce it at Mic ...
- spyder 安装
https://www.cnblogs.com/pcat/p/5398997.html
- 【转】Angular之constructor和ngOnInit差异及适用场景
原文:http://liuwenzhuang.github.io/2016/03/04/angular2-constructor-versus-ngOnInit.html -------------- ...
- 【Yaml】Yaml学习笔记
转载:https://blog.csdn.net/moshenglv/article/details/52084899 YAML何许物也?在XML泛滥的情况下,YAML的出现的确让人眼前一亮,在初步学 ...
- 原创:vsphere概念深入系列三:vSphere命令行管理
假设无法近距离接触物理主机,只能远程命令行管理,. 以下命令行可以起到点作用. 首先需要安装vSphere CLI工具. 启动后界面: 1.查看datastore内容 所有命令行工具都可以加上-ser ...
- [Done]SnowFlake生成Long类型主键返回前台过长导致精度缺失的问题
问题描述: 在开发过程中,项目的主键生成器是SnowFlake,其生成的long主键是28位, 但是js中Long的最大值:https://blog.csdn.net/sunmerZeal/artic ...
- 【微信上传素材接口--永久性】微信永久性上传、获取返回的medie_id 和url
上传图片到微信服务器获得media_id和url (永久性) 其他接口类:https://www.cnblogs.com/gjw-hsf/p/7375261.html 转载地址:https://blo ...
- InnoDB 存储引擎的主要知识点介绍
本文转载自:Draveness,略有修改 原文链接:『浅入浅出』MySQL 和 InnoDB · 面向信仰编程 作为一名开发人员,在日常的工作中会难以避免地接触到数据库,无论是基于文件的 sqlite ...