/**
* Created by lkl on 2017/6/26.
*///spark-shell --driver-class-path /home/hadoop/test/mysqljdbc.jar
import java.sql.{DriverManager, ResultSet}
import org.apache.spark.SparkContext
import org.apache.spark.SparkConf
import java.util.Date
object titlesplit {

val rl= "jdbc:mysql://10.19.65.17:54321/emotion?user=emotion&password=qingxu&useUnicode=true&characterEncoding=utf8&autoReconnect=true&failOverReadOnly=false"
classOf[com.mysql.jdbc.Driver]

val conn = DriverManager.getConnection(rl)
val statement = conn.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_UPDATABLE)
def main(args: Array[String]) {
val conf = new SparkConf()
// val conf = new SparkConf().setAppName("test").setMaster("local")
val sc = new SparkContext(conf)
val sqlContext = new org.apache.spark.sql.SQLContext(sc)
val format = new java.text.SimpleDateFormat("yyyy-MM-dd")
val yearformat = new java.text.SimpleDateFormat("yyyy")
val year = yearformat.format(new java.util.Date().getTime())
val format2 = new java.text.SimpleDateFormat("yyyy/MM/dd")
val dat = format2.format(new java.util.Date().getTime() - 0 * 24 * 60 * 60 * 1000).toString
val st=sqlContext.read.json("hdfs://ns1/user/datacenter/home/datacenter/datacollect/logs/dataplatform/Crawler/Crawler_Common_WebPageNews/"+dat+"/*.gz")
// val st=sqlContext.read.json("hdfs://192.168.0.211:9000/user/datacenter/home/datacenter/datacollect/logs/dataplatform/Crawler/Crawler_Common_WebPageNews/"+dat+"/*.gz")
val j=st.toDF().registerTempTable("job")
val ed = sqlContext.sql("select `innerSessionId`,SUBSTR(`time`,1,10) AS time,`channelType`,`sourcetitle`,`title` from job")
val pp = ed.map(p => {
val v0 = p.getString(0)
val v2 = p.getString(2)
val v1 = p.get(1)
val v3 = p.getString(3)
val v4 = p.getString(4)
val v5 = p.getString(4).split("\\|")
(v0, v1, v2, v3, v4, v5)
}) pp.foreach(p => {

for (i <- 0 until p._6.size) {
val v1 = p._2.toString val v0 = p._1

val v2 = p._3
val v3 = p._4
val v4 = p._5
val v5 = p._6(i).split(" ")
if (v5.size == 4) {
val now = new Date()
val a = now.getTime.toInt
insert(v0, v1, v2, v3, v4, v5(0), v5(1), v5(2), v5(3),a)
}

}

})
conn.close()
}
def insert(value0: String, value1: String, value2: String, value3: String, value4: String, value5: String,
value6: String, value7: String, value8: String,value9:Int): Unit = {

try {
val prep = conn.prepareStatement("INSERT INTO titlesplit(innserSessionid,times,channelType,sourcetitle,title,words,characters,refer,role,Nowtime) VALUES (?,?,?,?,?,?,?,?,?,?) ")
prep.setString(1, value0)
prep.setString(2, value1)
prep.setString(3, value2)
prep.setString(4, value3)
prep.setString(5, value4)
prep.setString(6, value5)
prep.setString(7, value6)
prep.setString(8, value7)
prep.setString(9, value8)
prep.setInt(10,value9)
prep.executeUpdate
} catch {
case e: Exception => e.printStackTrace
}
finally {
}
}
}

titlesplit的更多相关文章

  1. titlesplit源码

    ) UNSIGNED NOT NULL AUTO_INCREMENT, innserSessionid ), times ), channelType ), sourcetitle ), title ...

  2. middle

    /** * Created by lkl on 2017/7/31. *//** * Created by lkl on 2017/6/26. *///spark-shell --driver-cla ...

  3. result源码

    CREATE TABLE `result` (`id` INT(10) UNSIGNED NOT NULL AUTO_INCREMENT,`thetime` CHAR(100) , `category ...

  4. middle源码

    CREATE TABLE `middle` ( `id` INT(10) UNSIGNED NOT NULL AUTO_INCREMENT, `innserSessionid` VARCHAR(250 ...

随机推荐

  1. Windows平台交叉编译Arm Linux平台的QT5.7库

    1.准备交叉编译环境 环境说明:Windows 7 64位 此过程需要: (1)Qt库开源代码,我使用的是5.7.0版本: (2)Perl语言环境5.12版本以上: (3)Python语言环境 2.7 ...

  2. UITableView滚动优化(RunLoop)

    链接: 利用RunLoop优化tableView RunLoop方式优化加载tableview RunLoop总结:RunLoop的应用场景(三)滚动视图流畅性优化 TableView加载图片的优化逻 ...

  3. 迭代函数:zip、enumerate,list解析

    #encoding:utf-8 """ 并行迭代: zip enumerate 获取元素及下标 list解析 iter """ #zip # ...

  4. Structured Streaming教程(3) —— 与Kafka的集成

    Structured Streaming最主要的生产环境应用场景就是配合kafka做实时处理,不过在Strucured Streaming中kafka的版本要求相对搞一些,只支持0.10及以上的版本. ...

  5. jdk 10.0.2 bug修复

    之前记录过jdk9+版本的1个bug,某些情况下会导致方法执行二遍,今天早上打开笔记本(mac),弹出一个框提示jdk升级10.0.2,顺手点了一下,然后验证了下该bug,发现居然fix掉了,推荐大家 ...

  6. Swift中String与NSDate的互相转换

    其实每种编程语言,我都觉得String和日期对象的相互转换是一种十分麻烦的事情,Swift也不例外.这篇博客记录了我学到的String与NSDate的互相转换方法,供大家参考. 从String转为NS ...

  7. BTrace使用简介

    很多时候在online的应用出现问题时,很多时候我们需要知道更多的程序的运行细节,但又不可能在开发的时候就把程序中所有的运行细节都打印到日志上,通常这个时候能采取的就是修改代码,重新部署,然后再观察, ...

  8. goto语句引起的crosses initialization of XXX

    1. 背景 goto语句虽然目前已经不提倡使用,但是用起来还是很方便,尤其是老代码中见的比较多. 在改动有goto语句的老代码时需要特别注意,是否跳过来资源的释放.有用变量的初始化等等. 很久之前写c ...

  9. C# System.IO.File

    using System; using System.IO; class Test { public static void Main() { string path = @"c:\temp ...

  10. sublime text 3-right click context menu

    dd a system wide windows explorer button " Edit with Sublime" similar to how Notepad++ doe ...