package step3;

 import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.List; import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.methods.PostMethod;
import org.json.JSONObject;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements; import bean.Porn;
import util.DBConnection; /**
*
* @ClassName: quhao
* @Description: 91porn地址解析
* @author zeze
* @date 2016年06月30日 下午7:55:31
*
*/
public class porn91 { private static String cookie = "incap_ses_401_649914=31EbXVOgx0r6Ql5TmqOQBdjxdFcAAAAAu7MrrqICFZvpjsIw5VriGQ==; incap_ses_434_649914=wx2HcnWH7GDQCChRweAFBt/xdFcAAAAAczn9Ohl2VBPqxEd8kRi2GA==; incap_ses_407_649914=U4VYNM5iO1l1H0VP7/SlBWXydFcAAAAAifL73Yq/OnIgRqKWiWPqUg==; incap_ses_406_649914=8Ub/DfvqEGs9L9gFemeiBWEKdVcAAAAA+aBeDqKyWw37Sv+KZ4cdlA==; incap_ses_432_649914=bLzAYBXvVG0kSU6wyMX+BWUKdVcAAAAAZW+uykXgylzu/dZOu7IDWw==; _ga=GA1.2.1738858661.1466764840; _gat=1; visid_incap_649914=2hb3ym0OQ9C7sr1krqKCQTUObVcAAAAAQUIPAAAAAADQQCM/QP5jhCXO3+mlIKmg; incap_ses_199_649914=RkWbbfybyCoL2fxKs/3CAqIbdVcAAAAAOa+RJFdt35NV8xtM8MbP8Q==; session=eyJfZnJlc2giOmZhbHNlLCJjc3JmX3Rva2VuIjp7IiBiIjoiTkdFek9HRmtNakkxTldVM05EVXpZMkZoTldKaE5tWXpOV014TlRBNU1UZ3dPVGcyTkRNMU5BPT0ifX0.ClatMQ.INJmWYMZ8T220CgsSTcfpHhTxXI";
private static String cookie2 = "incap_ses_401_649914=31EbXVOgx0r6Ql5TmqOQBdjxdFcAAAAAu7MrrqICFZvpjsIw5VriGQ==; incap_ses_434_649914=wx2HcnWH7GDQCChRweAFBt/xdFcAAAAAczn9Ohl2VBPqxEd8kRi2GA==; incap_ses_407_649914=U4VYNM5iO1l1H0VP7/SlBWXydFcAAAAAifL73Yq/OnIgRqKWiWPqUg==; incap_ses_406_649914=8Ub/DfvqEGs9L9gFemeiBWEKdVcAAAAA+aBeDqKyWw37Sv+KZ4cdlA==; incap_ses_432_649914=bLzAYBXvVG0kSU6wyMX+BWUKdVcAAAAAZW+uykXgylzu/dZOu7IDWw==; _ga=GA1.2.1738858661.1466764840; _gat=1; visid_incap_649914=2hb3ym0OQ9C7sr1krqKCQTUObVcAAAAAQUIPAAAAAADQQCM/QP5jhCXO3+mlIKmg; incap_ses_199_649914=RkWbbfybyCoL2fxKs/3CAqIbdVcAAAAAOa+RJFdt35NV8xtM8MbP8Q==; session=eyJfZnJlc2giOmZhbHNlLCJjc3JmX3Rva2VuIjp7IiBiIjoiTkdFek9HRmtNakkxTldVM05EVXpZMkZoTldKaE5tWXpOV014TlRBNU1UZ3dPVGcyTkRNMU5BPT0ifX0.ClatMw.6MGC1jX7mgjsChpGFBd-xHTv9ZU"; private static String Token = "1467296187##60ecf40d9328862cc6cd6a478adfc72ee0554050"; private static String Url = "http://freeget.co/video/extraction";
private static String url001 = null;
private static String dirfile = "F:/91porn/91url.csv";
private static String destfile = "F:/91porn/data.txt"; private static int cnt0 = 0; private static String num = null;
private static String title = null;
private static String time = null;
private static String longtime = null;
private static String viewnum = null;
private static String Parurl = null;// "http://www.91porn.com/view_video.php?viewkey=c5ec60d0da8c8fbdb180&page=4&viewtype=basic&category=mr"; public static void main(String[] args) throws InterruptedException { File file = new File(dirfile);
FileReader reader = null;
BufferedReader br = null;
try {
reader = new FileReader(file);
br = new BufferedReader(reader);
String str = null;
String[] strArr = null;
int cnt = 0;
while ((str = br.readLine()) != null) {
// System.out.println(str);
strArr = str.split(",");
if (strArr.length != 7)
continue;
num = strArr[0];
title = strArr[1];
time = strArr[2];
longtime = strArr[4];
viewnum = strArr[5];
Parurl = strArr[6];
cnt++;
System.out.println(num + "," + title + "," + time);
func_step1();
}
System.out.println("采集结束,总共:" + cnt + "条,成功写入" + cnt0 + "条"); } catch (Exception e) {
// TODO: handle exception
e.printStackTrace();
} finally {
if (br != null) {
try {
br.close();
} catch (Exception e2) {
// TODO: handle exception
e2.printStackTrace();
}
}
if (reader != null) {
try {
reader.close();
} catch (Exception e2) {
// TODO: handle exception
e2.printStackTrace();
}
}
} } private static void func_step1() {
HttpClient httpClient = new HttpClient();
try {
PostMethod postMethod = new PostMethod(Url);
postMethod.getParams().setContentCharset("utf-8");
// 每次访问需授权的网址时需 cookie 作为通行证
postMethod.setRequestHeader("cookie", cookie);
postMethod.setRequestHeader("X-CSRFToken", Token);
postMethod.setRequestHeader("Accept-Language", "zh-CN,zh;q=0.8");
postMethod.setRequestHeader("Host", "freeget.co");
postMethod.setRequestHeader("Referer", "http://freeget.co/");
postMethod.setRequestHeader("User-Agent",
"Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0) QQBrowser/9.2.5063.400");
postMethod.setParameter("url", Parurl);
int statusCode = httpClient.executeMethod(postMethod);// 返回状态码200为成功,500为服务器端发生运行错误
System.out.println("返回状态码:" + statusCode);
// 打印出返回数据,检验一下是否成功
String result = postMethod.getResponseBodyAsString();
if (statusCode == 200) {
// 解析成功,取得token和view_key
JSONObject a = new JSONObject(result);
url001 = "http://freeget.co/video/" + a.get("view_key") + "/" + a.get("token");
System.out.println("视频解析地址:" + url001);
func_step2(url001);
}
} catch (Exception e) {
e.printStackTrace();
}
} private static void func_step2(String url) {
HttpClient httpClient = new HttpClient();
try {
GetMethod getMethod = new GetMethod(url);
getMethod.getParams().setContentCharset("utf-8");
getMethod.setRequestHeader("cookie", cookie2);
getMethod.setRequestHeader("Accept-Language", "zh-cn");
getMethod.setRequestHeader("User-Agent",
"Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0) QQBrowser/9.2.5063.400");
int statusCode = httpClient.executeMethod(getMethod);// 返回状态码200为成功,500为服务器端发生运行错误
// System.out.println("返回状态码:" + statusCode);
// 打印出返回数据,检验一下是否成功
InputStream inputStream = getMethod.getResponseBodyAsStream();
BufferedReader br = new BufferedReader(new InputStreamReader(inputStream));
StringBuffer stringBuffer = new StringBuffer();
String str = "";
while ((str = br.readLine()) != null) {
stringBuffer.append(str);
}
if (statusCode == 200) {
Document doc = Jsoup.parse(stringBuffer.toString());
Elements name = doc.select("a");
String playurl = name.get(4).text();
System.out.println("在线播放地址:" + playurl);
writefile(playurl);
cnt0++;
}
} catch (Exception e) {
e.printStackTrace();
}
} private static void writefile(String url) {
FileWriter fw = null;
BufferedWriter bw = null;
PrintWriter pw = null;
try {
fw = new FileWriter(new File(destfile), true);
bw = new BufferedWriter(fw);
pw = new PrintWriter(bw);
pw.write(num + ',' + title + ',' + time + ',' + longtime + ',' + viewnum + ',' + url + "\r\n");
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} finally {
if (pw != null) {
pw.close();
}
if (bw != null) {
try {
bw.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
if (fw != null) {
try {
fw.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
} public List<Porn> QueryAllBook() {
java.sql.Connection connection = DBConnection.getConnection();
String sql = "select * from porn where status=0";
java.sql.PreparedStatement pstmt = DBConnection.getPreparedStatement(connection, sql);
List<Porn> pornlist = new ArrayList<Porn>();
System.out.println(sql);
try {
Statement stmt = connection.createStatement(ResultSet.TYPE_SCROLL_SENSITIVE, ResultSet.CONCUR_READ_ONLY);
java.sql.ResultSet rs = stmt.executeQuery(sql);
while (rs.next()) {
Porn porn = new Porn();
porn.setNum(rs.getString(1));
porn.setTitle(rs.getString(2));
porn.setTime(rs.getString(3));
porn.setViewkey(rs.getString(4));
porn.setLongtime(rs.getString(5));
porn.setViewnum(rs.getString(6));
porn.setParurl(rs.getString(7));
pornlist.add(porn);
}
rs.last();
} catch (SQLException e) {
e.printStackTrace();
} finally {
DBConnection.close(connection, pstmt, null);
}
return pornlist;
}
}

Java解析采集模块的更多相关文章

  1. java解析xml的三种方法

    java解析XML的三种方法 1.SAX事件解析 package com.wzh.sax; import org.xml.sax.Attributes; import org.xml.sax.SAXE ...

  2. atitit.java解析sql语言解析器解释器的实现

    atitit.java解析sql语言解析器解释器的实现 1. 解析sql的本质:实现一个4gl dsl编程语言的编译器 1 2. 解析sql的主要的流程,词法分析,而后进行语法分析,语义分析,构建sq ...

  3. java 解析XML文档

    Java 解析XML文档 一.解析XML文档方式: 1.DOM方式:将整个XML文档读取到内存中,按照XML文件的树状结构图进行解析. 2.SAX方式:基于事件的解析,只需要加载XML中的部分数据,优 ...

  4. Java 解析 XML

    Java 解析 XML 标签: Java基础 XML解析技术有两种 DOM SAX DOM方式 根据XML的层级结构在内存中分配一个树形结构,把XML的标签,属性和文本等元素都封装成树的节点对象 优点 ...

  5. JAVA解析XML的四种方式

    java解析xml文件四种方式 1.介绍 1)DOM(JAXP Crimson解析器) DOM是用与平台和语言无关的方式表示XML文档的官方W3C标准.DOM是以层次结构组织的节点或信息片断的集合.这 ...

  6. JSON 之JAVA 解析

    一.   JSON (JavaScript Object Notation)一种简单的数据格式,比xml更轻巧. Json建构于两种结构:     1.“名称/值”对的集合(A collection ...

  7. Java解析XML汇总(DOM/SAX/JDOM/DOM4j/XPath)

    [目录] 一.[基础知识——扫盲] 二.[DOM.SAX.JDOM.DOM4j简单使用介绍] 三.[性能测试] 四.[对比] 五.[小插曲XPath] 六.[补充] 关键字:Java解析xml.解析x ...

  8. Java 解析epub格式电子书,helloWorld程序,附带源程序和相关jar包

    秀才坤坤出品 一.epub格式电子书 相关材料和源码均在链接中可以下载:http://pan.baidu.com/s/1bnm8YXT 包括 1.JAVA项目工程test_epub,里面包括了jar包 ...

  9. Java解析XML文档(简单实例)——dom解析xml

      一.前言 用Java解析XML文档,最常用的有两种方法:使用基于事件的XML简单API(Simple API for XML)称为SAX和基于树和节点的文档对象模型(Document Object ...

随机推荐

  1. javac 及 java命令的使用问题(错误或无法加载主类)

    一.问题 使用 javac 命令编译完.java源文件后,用 java 命令运行.class文件时,通常会遇到如下或类似的问题: 错误: 找不到或无法加载主类 HelloWorld.class 二.解 ...

  2. 《深入理解bootstrap》读书笔记:第4章 CSS组件(下)

    十. 标签(.label类,label-xxx) 高亮一些标题部分. 1 2 3 4 5 6 <h1>HELLO<span class="label label-defau ...

  3. Windows Server 2008修改IE浏览器级别便于使用

    1.降低IE安全级别  Win 2008默认IE的安全级别为“高”,并且不能随意调整,在浏览网页的时候有些会有一些限制,可以打开注册表编辑器进行设置,定位到 [HKEY_LOCAL_MACHINE\S ...

  4. 新浪微博客户端(63)-使用block进行链式编程

    Person.h #import <Foundation/Foundation.h> @interface Person : NSObject - (Person *(^)())study ...

  5. [CentOS7]安装mysql遇到的问题

    摘要 在安装mysql的时候,遇到了一些问题,这里列出所遇到的问题. yum list mysql-server 在使用命令yum list mysql-server安装mysql的时候,遇到如图所示 ...

  6. 今天依然是 JQ点击事件之“点击淡入淡出事件”

    废话不多说,先贴代码,再解释 <script> $(document).ready(function(){ $("button").click(function(){ ...

  7. mysql 简单练习

    1.查找全部学生的信息 [SQL]select * from student 受影响的行: 0 时间: 0.000s 2.查出成绩及格的所有人 [SQL]select * from student w ...

  8. 关于Windows下安装mongodb和加入Windows系统启动项

    .首先:在http://www.mongodb.org/downloads官网下载最新的win版本的mongodb下载包(我下载到d盘) .加压缩,修改文件夹名字为mongodb,建立放数据库文件夹w ...

  9. CF461B Appleman and Tree (树DP)

    CF462D Codeforces Round #263 (Div. 2) D Codeforces Round #263 (Div. 1) B B. Appleman and Tree time l ...

  10. highcharts图表中级入门之xAxis label:X(横)坐标刻度值过长截断多行(换行)显示问题说明

    在使用highcharts图表的过程中,总会碰到这样一个很是棘手的问题,横坐标刻度值太长,在不换行显示的情况下显得格外拥挤.虽然针对这一问题是可以对其刻度值进行旋转以此来避开显示拥挤问题[如何让hig ...