用java实现新浪爬虫,代码完整剖析(仅针对当前SinaSignOn有效)
先来看我们的web.xml文件,如下
<!DOCTYPE web-app PUBLIC
"-//Sun Microsystems, Inc.//DTD Web Application 2.3//EN"
"http://java.sun.com/dtd/web-app_2_3.dtd" > <web-app>
<display-name>MySinaSpider</display-name>
<listener>
<listener-class>main.java.sina.spider.StartSpiderLisenter</listener-class>
</listener>
</web-app>
这样的配置当启动tomcat的时候,就会运行爬虫,然后再看我们的StartSpiderLisenter类,如下
package main.java.sina.spider; import javax.servlet.ServletContextEvent;
import javax.servlet.ServletContextListener;
import main.java.sina.bean.info.LoginInfo;
import main.java.sina.utils.Constant; public class StartSpiderLisenter implements ServletContextListener{ public void contextDestroyed(ServletContextEvent arg0) { } public void contextInitialized(ServletContextEvent arg0) {
Constant.personalHomePage = "http://weibo.com/zhaoyao2012/home"; //填写你自己的新浪微博个人主页
LoginInfo.username = "***"; //填写你的新浪微博用户名
LoginInfo.password = "***"; //填写你的新浪微博密码
Constant.enableProxy = false; //是否使用代理
Spider.start();
} }
很明显我们看到StartSpiderLisenter 类是继承自ServletContextListener这个接口,一定要实现它的两个方法,contextInitialized和contextDestroyed.它们分别在初始化和销毁的时候被容器调用。我们看到在contextInitialized初始化上下文的方法中调用了Spider.start()方法。那么我们来看看Spider这个类,如下:
package main.java.sina.spider; import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.quartz.JobBuilder;
import org.quartz.JobDetail;
import org.quartz.Scheduler;
import org.quartz.SchedulerException;
import org.quartz.SchedulerFactory;
import org.quartz.SimpleScheduleBuilder;
import org.quartz.SimpleTrigger;
import org.quartz.TriggerBuilder;
import org.quartz.impl.StdSchedulerFactory;
import main.java.sina.bean.info.LoginInfo;
import main.java.sina.httpclient.LoginSina;
import main.java.sina.httpclient.SpiderSina;
import main.java.sina.job.KeywordSearchJob;
import main.java.sina.utils.Constant;
import main.java.sina.utils.HttpHelper;
import main.java.test.SpiderTest; public class Spider { public static void main(String[] args) { Constant.personalHomePage = "****";
LoginInfo.username = "****";
LoginInfo.password = "****";
Constant.enableProxy = false;
Constant.hourbefore = 0; //这个参数用于设置时差
start(); }
public static void start() { final SchedulerFactory factory = new StdSchedulerFactory();
try {
Scheduler scheduler = factory.getScheduler();
JobDetail jobDetail = JobBuilder.newJob(KeywordSearchJob.class)
.withIdentity("keywordSearch", "weibo").build();
SimpleTrigger trigger = TriggerBuilder.newTrigger()
.withIdentity("keywordSearch", "weibo")
.withSchedule(SimpleScheduleBuilder.repeatHourlyForever())
.build();
scheduler.scheduleJob(jobDetail, trigger);
scheduler.start();
} catch (SchedulerException e) {
e.printStackTrace();
}
} public static SpiderSina createSpider() {
LoginSina ls = new LoginSina(LoginInfo.username, LoginInfo.password);
ls.dologinSina();
ls.redirect();
SpiderSina spider = new SpiderSina(ls); return spider;
} public static void sendMidsofDays(SpiderSina spider,String keyword, String fromdate,
String todate) { try {
String midsString = "";
for (int i = 1; i <= 50; i++) {
String htmlContent = spider
.search(keyword, i, fromdate, todate);
if (htmlContent.contains("noresult_support")) {
break;
}
System.out.println(i);
Pattern pattern = Pattern.compile("<div mid=\"([0-9]*)\""); String start = "\"pid\":\"pl_weibo_direct\"";
try {
htmlContent = htmlContent.substring(htmlContent
.indexOf(start));
} catch (Exception e) {
htmlContent = htmlContent.substring(1);
}
htmlContent = htmlContent.replace("\\\"", "\"");
htmlContent = htmlContent.replace("\\/", "/");
Matcher matcher = pattern.matcher(htmlContent);
while (matcher.find()) {
System.out.println(matcher.group(1));
midsString += matcher.group(1) + ",";
}
if (i == 37) {
try {
Thread.sleep(1000 * 60 * 30);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
System.out.println(midsString);
HttpHelper.getLiveData(midsString, Constant.CommentUrl);
} catch (IOException e) {
e.printStackTrace();
} }
}
我们在Spider.start()方法中,看到了作业KeywordSearchJob.class,那么我们来看看这个KeywordSearchJob类的实现,如下:
package main.java.sina.job; import org.quartz.Job;
import org.quartz.JobExecutionContext;
import org.quartz.JobExecutionException;
import main.java.sina.httpclient.SpiderSina;
import main.java.sina.spider.Spider;
import main.java.sina.utils.Constant;
import main.java.sina.utils.Utils; public class KeywordSearchJob implements Job { public void execute(JobExecutionContext arg0) throws JobExecutionException { Constant.enableProxy = false; //我的爬虫中没有使用代理,故值设为false.
String keyword = "%25E5%25AE%2581%25E6%25B3%25A2%25E5%25A4%25A7%25E5%25AD%25A6";//被编码后的关键字
String datehour = Utils.getDateOfSpecifiedPreHour(Constant.hourbefore);//这个工具类实现了时差格式的转换
SpiderSina spider = Spider.createSpider();
spider.forwardToWeiboPage();
Spider.sendMidsofDays(spider,keyword,datehour,datehour);
} }
接下来,我们看几个工具类的实现:首先来看下Utils.java这个类,如下:它实现了日期的格式的一些转换
package main.java.sina.utils; import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.StringReader;
import java.io.UnsupportedEncodingException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
import java.util.Properties; import org.htmlparser.Parser;
import org.htmlparser.lexer.Lexer;
import org.htmlparser.lexer.Page;
import org.htmlparser.util.DefaultParserFeedback;
// I/O操作类
public class Utils { public static Date getDateFromString(String dtext,Date fileCreateDate) {
Date date=null;
int y,mm,se;
Calendar c = Calendar.getInstance();
c.setTime(fileCreateDate);
y = c.get(Calendar.YEAR); //年
//d = c.get(Calendar.DAY_OF_MONTH); //日
mm = c.get(Calendar.MINUTE); //分
se = c.get(Calendar.SECOND);//秒
if(dtext.contains("秒前")){
int end=0;
for(int i=0;i<dtext.length();i++){
if(dtext.charAt(i)>='0' && dtext.charAt(i)<='9'){
end++;
}else{
break;
}
}
dtext=dtext.substring(0,end);
int second=Integer.parseInt(dtext);
c.set(Calendar.SECOND, se-second);
date=c.getTime();
}
else if(dtext.contains("分钟前")){
int end=0;
for(int i=0;i<dtext.length();i++){
if(dtext.charAt(i)>='0' && dtext.charAt(i)<='9'){
end++;
}else{
break;
}
}
dtext=dtext.substring(0,end);
int minute=Integer.parseInt(dtext);
c.set(Calendar.MINUTE, mm-minute);
date=c.getTime();
}else if(dtext.contains("今天")){
dtext=dtext.replace("今天 ", "").trim();
String ss[]=dtext.split(":");
if(ss!=null && ss.length==2){
c.set(Calendar.HOUR_OF_DAY, Integer.parseInt(ss[0]));
c.set(Calendar.MINUTE, Integer.parseInt(ss[1]));
date=c.getTime();
}
}else if(dtext.contains("月")){
dtext=y+"年".concat(dtext);
SimpleDateFormat sf=new SimpleDateFormat("yyyy年MM月dd日 HH:mm");
try {
date=sf.parse(dtext);
} catch (ParseException e) {
e.printStackTrace();
}
}else if(dtext.contains("-")){
SimpleDateFormat sf=new SimpleDateFormat("yyyy-MM-dd HH:mm");
try {
date=sf.parse(dtext);
} catch (ParseException e) {
e.printStackTrace();
}
}
return date;
}
public static void writeFileFromStream(String filename,InputStream in){
if(filename==null || filename.trim().length()==0)
return;
File file=new File(filename);
if(!file.exists()){
try {
file.createNewFile();
} catch (IOException e) {
e.printStackTrace();
}
}
FileOutputStream fou=null;
try {
fou = new FileOutputStream(file);
byte []buffer=new byte[1024*4];
int len=-1;
while((len=in.read(buffer))!=-1){
fou.write(buffer,0,len);
}
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}finally{
if(in!=null)
try {
in.close();
} catch (IOException e) {
e.printStackTrace();
}
if(fou!=null)
try {
fou.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
public static void writeFileFromString(String filename,String str){
if(filename==null || filename.trim().length()==0)
filename="tmp.txt";
File file=new File(filename);
if(!file.exists()){
try {
file.createNewFile();
} catch (IOException e) {
e.printStackTrace();
}
}
BufferedWriter writer=null;
BufferedReader reader=null;
try {
writer=new BufferedWriter(new FileWriter(file));
reader=new BufferedReader(new StringReader(str));
String tmp=null;
StringBuffer buffer=new StringBuffer();
while((tmp=reader.readLine())!=null)
buffer.append(tmp+"\n");
writer.write(buffer.toString()); } catch (IOException e) {
e.printStackTrace();
}finally{
try {
reader.close();
writer.close();
} catch (IOException e) {
e.printStackTrace();
}
} } public static String getStringFromStream(InputStream in) {
BufferedReader reader=null;
reader = new BufferedReader(new InputStreamReader(in));
StringBuffer buffer=new StringBuffer();
String str=null;
try{
while((str=reader.readLine())!=null){
buffer.append(str+"\n");
}
reader.close();
}catch(Exception ex){
ex.printStackTrace();
}
try {
return new String(buffer.toString().getBytes(),"utf-8");
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
return "error:"+e.getMessage();
}
}
//得到数据库的配置信息
public static Properties getDBconfig(){
Properties properties=new Properties();
InputStream in = null;
try {
in = new FileInputStream(new File("config/dbconfig.ini"));
properties.load(in);
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}finally{
if(in!=null)
try {
in.close();
} catch (IOException e) {
e.printStackTrace();
}
}
return properties;
} public static Parser createParser(String inputHTML) {
Lexer mLexer = new Lexer(new Page(inputHTML));
Parser parser = new Parser(mLexer, new DefaultParserFeedback(
DefaultParserFeedback.QUIET));
return parser;
} public static String getDateOfSpecifiedPreHour(int hourNum){
SimpleDateFormat sdFormat = new SimpleDateFormat("yyyy-MM-dd-HH");
Date date = new Date();
System.out.println("date -" +date + " " + hourNum);
Calendar calendar = Calendar.getInstance();
calendar.setTime(date);
calendar.add(Calendar.HOUR_OF_DAY, -1 * hourNum);
System.out.println("date2 -" +sdFormat.format(calendar.getTime()));
return sdFormat.format(calendar.getTime());
}
}
再来看一下ThreadPool.java这个类,如下:这是一个线程工具类,定义了线程的一些动作
package main.java.sina.utils; import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors; /** * 线程池工具类
*/
public class ThreadPool {
private ExecutorService service;
private List<Thread> threadList; public ThreadPool(int limite, List<Thread> threadList) {
this.service = Executors.newFixedThreadPool(limite);
this.threadList = threadList;
} public void execute() {
if(threadList==null ||threadList.size()==0) return ;
for (int index = 0; index < threadList.size(); index++) {
Thread t=threadList.get(index);
service.execute(t);
}
}
public boolean isTerminated(){
return service.isTerminated();
} public void shutDown() {
service.shutdown();
}
}
然后再看一下Constant.java这个常量类,如下:常量类把系统总用到的一些常量写在这里,以后项目维护需要更改的时候,方便维护更改
package main.java.sina.utils; /**
* @ClassName: Constant
*
*/
public class Constant {
public static boolean enableProxy = false;
public static String liveCommentUrl = "http://localhost:8080/social-hub-connector/loadingLiveData";
public static String CommentUrl = "http://localhost:8080/social-hub-connector/loadingData";
public static String personalHomePage = "******";
public static String weiboUsername = "*********";
public static String weiboPassword = "*********";
public static int hourbefore = 0;
}
再来看一下Base64Encoder.java类,它对一些字段进行了编码的类,如下:
package main.java.sina.utils; /**
* */
public class Base64Encoder {
private static final char last2byte = (char) Integer.parseInt("00000011", 2);
private static final char last4byte = (char) Integer.parseInt("00001111", 2);
private static final char last6byte = (char) Integer.parseInt("00111111", 2);
private static final char lead6byte = (char) Integer.parseInt("11111100", 2);
private static final char lead4byte = (char) Integer.parseInt("11110000", 2);
private static final char lead2byte = (char) Integer.parseInt("11000000", 2);
private static final char[] encodeTable = new char[]{'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/'}; public Base64Encoder() {
}
public static String encode(byte[] from) {
StringBuffer to = new StringBuffer((int) (from.length * 1.34) + 3);
int num = 0;
char currentByte = 0;
for (int i = 0; i < from.length; i++) {
num = num % 8;
while (num < 8) {
switch (num) {
case 0:
currentByte = (char) (from[i] & lead6byte);
currentByte = (char) (currentByte >>> 2);
break;
case 2:
currentByte = (char) (from[i] & last6byte);
break;
case 4:
currentByte = (char) (from[i] & last4byte);
currentByte = (char) (currentByte << 2);
if ((i + 1) < from.length) {
currentByte |= (from[i + 1] & lead2byte) >>> 6;
}
break;
case 6:
currentByte = (char) (from[i] & last2byte);
currentByte = (char) (currentByte << 4);
if ((i + 1) < from.length) {
currentByte |= (from[i + 1] & lead4byte) >>> 4;
}
break;
}
to.append(encodeTable[currentByte]);
num += 6;
}
}
if (to.length() % 4 != 0) {
for (int i = 4 - to.length() % 4; i > 0; i--) {
to.append("=");
}
}
return to.toString();
}
}
这个类中,针对新浪的一些特殊的加密规则,写的方法,这个在拼接最终的URl的时候回用到,如根据servertime+nonce两个参数来生成一串字符串加密规则:
package main.java.sina.utils;
import java.io.File;
import java.io.FileReader; import javax.script.Invocable;
import javax.script.ScriptEngine;
import javax.script.ScriptEngineManager; /**
* */
public class EncodeSuAndSp {
static ScriptEngineManager mgr = new ScriptEngineManager();
static ScriptEngine engine = mgr.getEngineByExtension("js");
static Invocable inv = (Invocable) engine; public static String getEncryptedP(String password,String servertime,String nonce){
String value1="";
try {
engine.eval(new FileReader(new File("js/encrypt.js")));
value1 = String.valueOf(inv.invokeFunction("hex_sha1",password));
value1 = String.valueOf(inv.invokeFunction("hex_sha1",value1));
value1 = String.valueOf(inv.invokeFunction("hex_sha1",value1+servertime+nonce));
} catch (Exception e) {
e.printStackTrace();
}
return value1;
} public static String getEncodedUsername(String username){
String value1="";
try {
engine.eval(new FileReader(new File("js/encrypt.js")));
value1 = String.valueOf(inv.invokeFunction("encode",username));
System.out.println(value1);
} catch (Exception e) {
e.printStackTrace();
}
return value1;
}
}
package main.java.sina.utils;
import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.net.URLEncoder;
public class EncodeUtils { public static final String encodeURL(String str,String enc) {
try {
return URLEncoder.encode(str, enc);
} catch (UnsupportedEncodingException e) {
throw new RuntimeException(e);
}
}
public static final String decodeURL(String str,String enc) {
try {
return URLDecoder.decode(str, enc);
} catch (UnsupportedEncodingException e) {
throw new RuntimeException(e);
}
} public static String unicdoeToGB2312(String str) {
String res = null;
if(str==null ){
return "";
}
StringBuffer sb = new StringBuffer();
try {
while (str.length() > 0) {
if (str.startsWith("\\u")) {
int x = 0;
try{
x = Integer.parseInt(str.substring(2, 6), 16);
}catch(Exception ex){
x= 0;
}
sb.append((char) x);
str = str.substring(6);
} else {
sb.append(str.charAt(0));
str = str.substring(1);
}
}
res = sb.toString();
} catch (Exception e) {
e.printStackTrace(System.err);
}
res=res.replaceAll("\\\\r", "")
.replaceAll("\\\\n", "")
.replaceAll("\\\\t", "")
.replaceAll(" ", "")
.replaceAll(">", "")
.replaceAll("\\[", "\"")
.replaceAll("\\]", "\"");
return res;
} public static String unicodeTogb2312(String str) {
String res = null;
StringBuffer sb = new StringBuffer();
try {
while (str.length() > 0) {
if (str.startsWith("\\u")) {
int x = Integer.parseInt(str.substring(2, 6), 16);
sb.append((char) x);
str = str.substring(6);
} else {
sb.append(str.charAt(0));
str = str.substring(1);
}
}
res = sb.toString();
} catch (Exception e) {
e.printStackTrace(System.err);
}
res=res.replaceAll("\\\\r", "")
.replaceAll("\\\\t", "")
.replaceAll(" ", "")
.replaceAll(">", "")
.replaceAll("\\\\n", "");
return res;
}
}
这个类很关键HttpUtils.java类,这个方法中重写了doPost()和doGet()方法.如下:
package main.java.sina.utils; import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import org.apache.http.Header;
import org.apache.http.HttpEntity;
import org.apache.http.HttpHost;
import org.apache.http.HttpResponse;
import org.apache.http.HttpVersion;
import org.apache.http.NameValuePair;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.HttpClient;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.methods.HttpUriRequest;
import org.apache.http.conn.params.ConnRoutePNames;
import org.apache.http.conn.params.ConnRouteParams;
import org.apache.http.cookie.Cookie;
import org.apache.http.entity.InputStreamEntity;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.impl.conn.tsccm.ThreadSafeClientConnManager;
import org.apache.http.impl.cookie.BasicClientCookie;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.params.BasicHttpParams;
import org.apache.http.params.CoreProtocolPNames;
import org.apache.http.params.HttpParams;
import org.apache.http.params.HttpProtocolParams;
import org.apache.http.protocol.BasicHttpContext;
import org.apache.http.protocol.ExecutionContext;
import org.apache.http.protocol.HTTP;
import org.apache.http.protocol.HttpContext; /**
* http操作相关的类
*/
public class HttpUtils {
/*
* params :
* url: 地址
* headers请求头部信息
* return : httpresponse响应
*/
public static HttpResponse doGet(String url,Map<String,String> headers){
HttpClient client=createHttpClient();
HttpGet getMethod=new HttpGet(url);
HttpResponse response=null; HttpContext httpContext = new BasicHttpContext();
try {
if(headers!=null && headers.keySet().size()>0){
for(String key:headers.keySet()){
getMethod.addHeader(key, headers.get(key));
}
}
response=client.execute(getMethod);
HttpUriRequest realRequest = (HttpUriRequest)httpContext.getAttribute(ExecutionContext.HTTP_REQUEST);
System.out.println(realRequest.getURI());
} catch (ClientProtocolException e) {
e.printStackTrace();
} catch (IOException e) {
String msg=e.getMessage();
if(msg.contains("Truncated chunk")){
System.out.println(e.getMessage() +" 数据获取不完整,需要重新获取。");
}else{
System.out.println(e.getMessage() +" 连接被拒绝,需要降低爬取频率。");
}
} catch(Exception e){
}
System.out.println(response);
return response;
} /*
* params :
* url: 地址
* headers:请求头部信息
* params:post的请求数据
* return : httpresponse响应
*/ public static HttpResponse doPost(String url,Map<String,String> headers,Map<String,String> params){
HttpClient client=createHttpClient();
HttpPost postMethod=new HttpPost(url);
HttpResponse response=null;
try {
if(headers!=null && headers.keySet().size()>0){
for(String key:headers.keySet()){
postMethod.addHeader(key, headers.get(key));
}
}
List<NameValuePair> p=null;
if(params!=null && params.keySet().size()>0){
p=new ArrayList<NameValuePair>();
for(String key:params.keySet()){
p.add(new BasicNameValuePair(key,params.get(key)));
}
}
if(p!=null)
postMethod.setEntity(new UrlEncodedFormEntity(p,HTTP.UTF_8));
response=client.execute(postMethod);
} catch (ClientProtocolException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return response;
} //上传一个文件
public static HttpResponse doPost(String url,Map<String,String> headers,String fileName){
HttpClient client=createHttpClient();
HttpPost postMethod=new HttpPost(url);
String boundary = "";
HttpResponse response=null;
try {
if(headers!=null && headers.keySet().size()>0){
for(String key:headers.keySet()){
postMethod.addHeader(key, headers.get(key));
if(key.equals("Content-Type")){
String tmp=headers.get(key);
boundary=tmp.substring(tmp.indexOf("=")+1);
}
}
}
File file=new File(fileName);
InputStream in=new FileInputStream(file); StringBuffer buffer=new StringBuffer();
buffer.append(boundary).append("\n")
.append("Content-Disposition: form-data; name=\"pic1\"; filename=\""+file.getName()).append("\"\n")
.append("Content-Type: image/pjpeg").append("\n")
.append("\n"); System.out.println(buffer.toString()); String tmpstr=Utils.getStringFromStream(in);
tmpstr=Base64Encoder.encode(tmpstr.getBytes());
buffer.append(tmpstr).append("\n");
buffer.append(boundary+"--").append("\n"); System.out.println(buffer.toString()); in=new ByteArrayInputStream(buffer.toString().getBytes()); InputStreamEntity ise=new InputStreamEntity(in,buffer.toString().getBytes().length); postMethod.setEntity(ise); response=client.execute(postMethod);
} catch (ClientProtocolException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return response;
}
/*
* params :
* httpresponse
* return : 响应的头部信息
*/ public static List<Header> getReponseHeaders(HttpResponse response){
List<Header> headers=null;
Header[] hds=response.getAllHeaders();
if(hds!=null && hds.length>0){
headers=new ArrayList<Header>();
for(int i=0;i<hds.length;i++){
headers.add(hds[i]);
}
}
return headers;
} /*
* params :
* headers:头部信息
* request:请求
*/
public static void setHeaders(Map<String,String> headers,HttpUriRequest request){
if(headers!=null && headers.keySet().size()>0){
for(String key:headers.keySet()){
request.addHeader(key, headers.get(key)); }
}
} /*
* params :
* httpresponse
* return : 响应的cookies值
*/ public static List<Cookie> getResponseCookies(HttpResponse response){
List<Cookie> cookies=null;
Header[] hds=response.getAllHeaders();
if(hds!=null && hds.length>0){
for(int i=0;i<hds.length;i++){
if(hds[i].getName().equalsIgnoreCase("Set-Cookie")){
if(cookies==null){
cookies=new ArrayList<Cookie>();
}
String cookiestring[]=hds[i].getValue().split(";");
String ss[]=cookiestring[0].split("=",2);
String cookiename=ss[0];
String cookievalue=ss[1];
Cookie cookie=new BasicClientCookie(cookiename,cookievalue);
cookies.add(cookie);
}
}
}
return cookies;
}
/*
* params :
* cookies数组
* return : cookies数组组成的字符串
*/
public static String setCookie2String(List<Cookie> cookies){
StringBuilder builder=null;
if(cookies!=null && cookies.size()>0){
builder=new StringBuilder();
for(int j=0;j<cookies.size();j++){
Cookie c=cookies.get(j);
builder.append(c.getName()+"="+c.getValue());
if(j!=cookies.size()-1)
builder.append("; ");
}
return builder.toString();
}
return null;
} /*
* 从响应中得到输入流
*/
public static InputStream getInputStreamFromResponse(HttpResponse response){
if(response==null){
return null;
}
HttpEntity entity=response.getEntity();
InputStream in=null;
try {
in = entity.getContent();
} catch (IllegalStateException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return in;
} /*
* 从响应中得到字符串
*/
public static String getStringFromResponse(HttpResponse response){
if(response==null){
return null;
}
InputStream in=getInputStreamFromResponse(response);
String responseText="";
if(in!=null){
responseText=Utils.getStringFromStream(in);
}
return responseText;
} /**
* 创建支持多线程并发连接的HTTPCLIENT
*/
private final static HttpClient createHttpClient() {
String proxyHost = "web-proxy-sha.chn.hp.com";
int proxyPort = 8080;
HttpHost proxy = new HttpHost(proxyHost,proxyPort);
HttpParams params = new BasicHttpParams();
if(Constant.enableProxy){
params.setParameter(ConnRouteParams.DEFAULT_PROXY, proxy);
}
HttpProtocolParams.setVersion(params, HttpVersion.HTTP_1_1);
HttpProtocolParams.setContentCharset(params, "UTF-8"); ThreadSafeClientConnManager clientmanager = new ThreadSafeClientConnManager();
clientmanager.setMaxTotal(20);
HttpClient client = new DefaultHttpClient(clientmanager, params); //定义了环形重定向,定向到相同的路径是否被允许.
client.getParams().setParameter("http.protocol.allow-circular-redirects", true); //定义了重定向的最大数量
client.getParams().setParameter("http.protocol.max-redirects", 50); //定义了重定向是否应该自动处理
client.getParams().setParameter("http.protocol.handle-redirects", false);
return client;
} /**
*加入代理的功能
* @return HttpClient 对象
*/
public static HttpClient getDefaultHttpClientByProxy() {
HttpClient httpclient =createHttpClient();
String filePath = "proxy.properties";
HttpHost proxy = null;
Map<String, String> map = ReadIni.getDbini(filePath);
if (map.size() == 0) {
throw new RuntimeException("无可用代理");
} else {
Set<String> set = map.keySet();
String[] array = (String[]) set.toArray(new String[set.size()]);
Random r = new Random();
int rnum = r.nextInt(array.length);
String ip = array[rnum];
String port = map.get(ip);
proxy = new HttpHost(ip, Integer.parseInt(port));
}
httpclient.getParams().setParameter(ConnRoutePNames.DEFAULT_PROXY,proxy);
httpclient.getParams().setParameter(CoreProtocolPNames.PROTOCOL_VERSION, HttpVersion.HTTP_1_1);
return httpclient;
}
}
接下来卡一个HttpHelper的辅助类,如下:
/**
*
*/
package main.java.sina.utils; import java.io.IOException;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpException;
import org.apache.commons.httpclient.methods.PostMethod; /**
* @ClassName: HttpHelper
*
*/
public class HttpHelper {
public static String getLiveData(String requestData,String url)
throws HttpException, IOException {
PostMethod postMethod = new PostMethod(url);
postMethod.setParameter("mids", requestData);
HttpClient httpClient = new HttpClient();
int statusCode = httpClient.executeMethod(postMethod);
String response = postMethod.getResponseBodyAsString();
postMethod.releaseConnection();
System.out.println(response);
return response;
} public static String getHobbyData(String userid, String hobbys)
throws HttpException, IOException {
PostMethod postMethod = new PostMethod("http://c0048925.itcs.hp.com:8080/connector/loadingHobby");
postMethod.setParameter("userid", userid);
postMethod.setParameter("hobbys", hobbys);
HttpClient httpClient = new HttpClient();
int statusCode = httpClient.executeMethod(postMethod);
String response = postMethod.getResponseBodyAsString();
postMethod.releaseConnection();
System.out.println(response);
return response;
} }
ReadIni.java类,在读文本文件中使用,如下:
package main.java.sina.utils; import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.InputStreamReader;
import java.util.HashMap;
import java.util.Map; public class ReadIni { public static Map<String, String> getDbini(String file) {
Map<String, String> map = new HashMap<String, String>();
InputStreamReader isr = null;
try{
isr = new InputStreamReader(new FileInputStream(file));
} catch (FileNotFoundException e1) {
e1.printStackTrace();
}
BufferedReader br = new BufferedReader(isr);
String s = null;
try {
s = br.readLine();
while (s != null) {
if (s.trim().length() > 0) {
String[] s1 = getIni(s);
map.put(s1[0], s1[1]);
s = br.readLine();
}
}
br.close();
isr.close();
} catch (Exception e) {
e.printStackTrace();
}
return map;
} public static String[] getIni(String str) {
String[] temp = str.split("=");
return temp;
} }
然后,我们跳转到登录sina,来看一下loginSina这个类的实现:
package main.java.sina.httpclient; import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.math.BigInteger;
import java.security.InvalidKeyException;
import java.security.KeyFactory;
import java.security.NoSuchAlgorithmException;
import java.security.interfaces.RSAPublicKey;
import java.security.spec.InvalidKeySpecException;
import java.security.spec.RSAPublicKeySpec;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Scanner; import javax.crypto.BadPaddingException;
import javax.crypto.Cipher;
import javax.crypto.IllegalBlockSizeException;
import javax.crypto.NoSuchPaddingException; import org.apache.commons.codec.binary.Hex;
import org.apache.commons.httpclient.params.HttpParams;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.cookie.Cookie;
import org.springframework.core.io.ClassPathResource; import main.java.sina.json.msg.PreLoginResponseMessage;
import main.java.sina.utils.Base64Encoder;
import main.java.sina.utils.EncodeUtils;
import main.java.sina.utils.HttpUtils;
import main.java.sina.utils.JsonUtils;
import main.java.sina.utils.Utils; public class LoginSina {
private String username;
private String password;
private String rsakv;
private String pubkey; //servertime和nonce都是在登录时需要使用的,用于post信息的加密
private String servertime;//服务器的时间
private String nonce;//一次性字符串
private String userid;//用户微博ID
private String pcid;//若需要输入验证码时用到
private String userdomainname;//用于域名
private String door;//验证码 private Map<String,String> headers=null; private List<Cookie> cookies=null; public LoginSina(String username,String password){
this.username=username;
this.password=password;
init();
} public Map<String,String> getHeaders(){
Map<String,String> hds=null;
if(headers!=null && headers.keySet().size()>0){
hds=new HashMap<String,String>();
for(String key:headers.keySet()){
hds.put(key,headers.get(key));
}
}
return hds;
} public List<Cookie> getCookies(){
List<Cookie> cc=null;
if(cookies!=null && cookies.size()>0){
cc=new ArrayList<Cookie>();
for(int i=0;i<cookies.size();i++){
cc.add(cookies.get(i));
}
}
return cc;
}
//登录微博
public String dologinSina(){
System.out.println("---do login, please hold on...---");
String url="http://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.4.5)";//v1.3.17
Map<String,String> headers=new HashMap<String,String>();
Map<String,String> params=new HashMap<String,String>(); /*HTTP协议中的headers:http://www.cnblogs.com/yuzhongwusan/archive/2011/10/20/2218954.html
* */
headers.put("Accept", "text/html, application/xhtml+xml, */*");
headers.put("Referer", "http://login.sina.com.cn/member/my.php?entry=sso");
headers.put("Accept-Language", "zh-cn");
headers.put("User-Agent", "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0; BOIE9;ZHCN");
headers.put("Host", "login.sina.com.cn");
headers.put("Connection", "Keep-Alive");
headers.put("Content-Type", "application/x-www-form-urlencoded");
headers.put("Cache-Control", "no-cache");
params.put("encoding", "UTF-8");
params.put("entry", "weibo");
params.put("from", "");
params.put("prelt", "112");
params.put("gateway", "1");
params.put("nonce", nonce);
params.put("pwencode", "rsa2");//wsse
params.put("returntype", "META");
params.put("pagerefer", "");
params.put("savestate", "7");
params.put("servertime", servertime);
params.put("rsakv", rsakv);
params.put("service", "miniblog");
params.put("sp", getEncryptedP());
params.put("ssosimplelogin", "1");
params.put("su", getEncodedU());
params.put("url", "http://weibo.com/ajaxlogin.php?framelogin=1&callback=parent.sinaSSOController.feedBackUrlCallBack");
params.put("useticket", "1");
params.put("vsnf", "1");
HttpResponse response=HttpUtils.doPost(url, headers, params);
this.cookies=HttpUtils.getResponseCookies(response);
this.headers=headers;
String responseText=HttpUtils.getStringFromResponse(response);
try {
responseText=new String(responseText.getBytes(),"GBK");
if(!responseText.contains("retcode=0")){
downloadCheckImage();
this.nonce=getnonce();
Scanner s=new Scanner(System.in);
if(responseText.contains("retcode=4049"))
System.out.println("请输入验证码:");
else if(responseText.contains("retcode=2070")){
System.out.println("验证码不正确,请再次输入验证码:");
}
this.door=s.next();
dologinSina();
}
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
System.out.println("Congratulations, you have login success!");
return responseText;
}
//登录后重定向
public String redirect(){
String cookieValue=HttpUtils.setCookie2String(this.cookies);
this.headers.clear();
this.headers.put("Accept", "image/gif, image/jpeg, image/pjpeg, image/pjpeg, application/x-shockwave-flash, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, */*");
this.headers.put("Accept-Language", "zh-cn");
this.headers.put("Connection", "Keep-Alive");
this.headers.put("Host", "sina.com.cn");
this.headers.put("Referer", "http://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.4.15)");
this.headers.put("User", "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; QQDownload 691)");
this.headers.put("Cookie", cookieValue);
String ssosavestate=""; //SSO即Sina Sign-on,
String ticket = "";
for(Cookie c:this.cookies){
if(c.getName().equals("ALF")){
ssosavestate=c.getValue();
}else if(c.getName().equals("tgc")){
ticket=c.getValue();
}
}
String url="http://weibo.com/ajaxlogin.php?" +
"framelogin=1&callback=parent.sinaSSOController.feedBackUrlCallBack&" +
"sudaref=weibo.com";
HttpResponse response=HttpUtils.doGet(url, this.headers);
response=HttpUtils.doGet(url, this.headers);
String responseText=HttpUtils.getStringFromResponse(response);
return responseText;
}
//生成一次性的字符串 6位 用于加密
private String getnonce() {
String x = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
String str = "";
for (int i = 0; i < 6; i++) {
str += x.charAt((int)Math.ceil(Math.random() * 1000000) % x.length());
}
return str;
}
//初始化:得到服务区的时间servertime和一次性字符串nonce
private void init(){
String url=compositeUrl();
Map<String,String> headers=new HashMap<String,String>();
headers.put("Accept", "*/*");
headers.put("Referer", "http://weibo.com/");
headers.put("Accept-Language", "zh-cn");
headers.put("User-Agent", "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; QQDownload 691)");
headers.put("Host", "login.sina.com.cn");
headers.put("Connection", "Keep-Alive");
HttpResponse response=HttpUtils.doGet(url, headers);
String responseText=HttpUtils.getStringFromResponse(response);
int begin=responseText.indexOf("{");
int end=responseText.lastIndexOf("}");
responseText=responseText.substring(begin,end+1);
PreLoginResponseMessage plrmsg =JsonUtils.jsontoPreLoginResponseMessage(responseText);
this.nonce=plrmsg.getNonce();
this.servertime=plrmsg.getServertime()+"";
this.pubkey=plrmsg.getPubkey();
this.rsakv=plrmsg.getRsakv();
this.pcid=plrmsg.getPcid();
}
//下载验证码
private void downloadCheckImage() {
if(pcid==null) return;
this.headers.remove("Content-Type");
try {
if(this.cookies != null){
this.cookies.clear();
} } catch (Exception e) {
e.printStackTrace();
}
String cookieValue=HttpUtils.setCookie2String(this.cookies);
this.headers.put("Cookie", cookieValue);
String url="http://login.sina.com.cn/cgi/pin.php?r="+(long)(Math.random()*100000000)+"&s=0&p="+this.pcid;
HttpResponse response=HttpUtils.doGet(url, headers);
InputStream in=HttpUtils.getInputStreamFromResponse(response);
try {
//System.out.println(new ClassPathResource("checkImage.jpeg").getFile().getPath());
Utils.writeFileFromStream(new ClassPathResource("checkImage.jpeg").getFile().getPath(), in);
} catch (IOException e) {
e.printStackTrace();
}
}
//组合预登陆时的URL
private String compositeUrl(){
StringBuilder builder=new StringBuilder();
builder.append("http://login.sina.com.cn/sso/prelogin.php?")
.append("entry=weibo&callback=sinaSSOController.preloginCallBack&")
.append("su="+getEncodedU())
.append("&rsakt=mod&checkpin=1&client=ssologin.js(v1.4.5)&_="+System.currentTimeMillis());
return builder.toString();
}
//对用户名进行编码
private String getEncodedU() {
if(username!=null && username.length()>0){
return Base64Encoder.encode(EncodeUtils.encodeURL(username,"utf-8").getBytes());
}
return "";
}
//对密码进行编码
private String getEncryptedP(){
// return EncodeSuAndSp.getEncryptedP(password, servertime, nonce);
String data=servertime+"\t"+nonce+"\n"+password;
String spT=rsaCrypt(pubkey, "10001", data);
return spT;
} public static String rsaCrypt(String pubkey, String exponentHex, String pwd,String servertime,String nonce) {
String data=servertime+"\t"+nonce+"\n"+pwd;
return rsaCrypt(pubkey,exponentHex,data);
} public static String rsaCrypt(String pubkey, String exponentHex, String messageg) {
KeyFactory factory=null;
try {
factory = KeyFactory.getInstance("RSA");
} catch (NoSuchAlgorithmException e1) {
return "";
}
BigInteger publicExponent = new BigInteger(pubkey, 16); /* public exponent */
BigInteger modulus = new BigInteger(exponentHex, 16); /* modulus */
RSAPublicKeySpec spec = new RSAPublicKeySpec(publicExponent, modulus);
RSAPublicKey pub=null;
try {
pub = (RSAPublicKey) factory.generatePublic(spec);
} catch (InvalidKeySpecException e1) {
return "";
}
Cipher enc=null;
byte[] encryptedContentKey =null;
try {
enc = Cipher.getInstance("RSA");
enc.init(Cipher.ENCRYPT_MODE, pub);
encryptedContentKey = enc.doFinal(messageg.getBytes());
} catch (NoSuchAlgorithmException e1) {
System.out.println(e1.getMessage());
return "";
} catch (NoSuchPaddingException e1) {
System.out.println(e1.getMessage());
return "";
} catch (InvalidKeyException e1) {
System.out.println(e1.getMessage());
return "";
} catch (IllegalBlockSizeException e1) {
System.out.println(e1.getMessage());
return "";
} catch (BadPaddingException e1) {
System.out.println(e1.getMessage());
return "";
}
return new String(Hex.encodeHex(encryptedContentKey));
}
public void setUserid(String userid) {
this.userid = userid;
} public String getUserid() {
return userid;
} public void setUserdomainname(String userdomainname) {
this.userdomainname = userdomainname;
} public String getUserdomainname() {
return userdomainname;
} }
Spider.sina类如下:
package main.java.sina.httpclient;
import java.util.HashMap;
import java.util.List;
import java.util.Map; import org.apache.http.HttpResponse;
import org.apache.http.cookie.Cookie; import main.java.sina.utils.Constant;
import main.java.sina.utils.EncodeUtils;
import main.java.sina.utils.HttpUtils;
import main.java.sina.utils.Utils; public class SpiderSina {
private LoginSina ls;
private Map<String,String> headers;
private final int ADDFOLLOWING =1;
private final int CANCELFOLLOWING =2;
public SpiderSina(LoginSina ls){
this.ls=ls;
this.headers=new HashMap<String,String>();
headers.put("Accept", "text/html, application/xhtml+xml, */*");
headers.put("Accept-Language", "zh-cn");
headers.put("User-Agent", "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0; BOIE9;ZHCN");
headers.put("Connection", "Keep-Alive");
headers.put("Cache-Control", "no-cache");
String cookieValue=HttpUtils.setCookie2String(ls.getCookies());
headers.put("Cookie", cookieValue);
}
public String getGroupCategory(){
String url="http://q.weibo.com/";
this.headers.put("Host", "q.weibo.com");
HttpResponse response=HttpUtils.doGet(url, headers);
String responseText=HttpUtils.getStringFromResponse(response);
responseText=EncodeUtils.unicdoeToGB2312(responseText);
return responseText;
}
public String search(String keyword, int pageNo){
String url="http://s.weibo.com/weibo/%25E5%25AE%2581%25E6%25B3%25A2%25E5%25A4%25A7%25E5%25AD%25A6&page="+pageNo;
String cookieValue = "SINAGLOBAL=8556698272004.724.1417744632425; un=shy_annan@126.com; myuid=5439352084; wvr=6; un=sm2014121904@126.com; _s_tentry=developer.51cto.com; SWB=usrmdinst_14; SUS=SID-5438576807-1419173757-GZ-lrze7-d8e1e3f082b428c12412c8ba30f0a6de; SUE=es%3D4cdfdd5d5f0f75141c092b32f89525a2%26ev%3Dv1%26es2%3D469e50c869315e57efeec3012c3bb6a8%26rs0%3DoWdG36CQ33LUEtKTvGn907Zy1mwFETvSVJsxeHEiaMPcKDB7pFxg596a2pLhFLJfQmswf4AvXYAkzTfemrYgWrz%252BQPustEA2wLNYufYpAZqFsGWanhTBq6elzB2yoZp41xcpy1WwXn1CuvzIzzEYpuILjHahkmJDQDQy6KaxlbA%253D%26rv%3D0; SUP=cv%3D1%26bt%3D1419173757%26et%3D1419260157%26d%3Dc909%26i%3Da6de%26us%3D1%26vf%3D0%26vt%3D0%26ac%3D27%26st%3D0%26uid%3D5438576807%26name%3Dsm2014121904%2540126.com%26nick%3DSocialMedia%25E5%259B%259B%25E5%25A8%2583%26fmp%3D%26lcp%3D; SUB=_2A255kq8tDeTxGeNK6FoU9yjEyzuIHXVa6DVlrDV8PUNbvtBeLW3TkW-bMoi0G_bBfpbS3TMqcXg6zDWFLA..; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9WhGThsH46uNrx1VY0ApV0SR5JpX5KMt; ALF=1450709756; SSOLoginState=1419173757; WBStore=bc5ad8450c3f8a48|undefined; Apache=1027467835228.8901.1419173761694; ULV=1419173761704:6:6:1:1027467835228.8901.1419173761694:1418797827169; UOR=www.ilehao.com,widget.weibo.com,login.sina.com.cn; ULOGIN_IMG=14192385783486";
headers.put("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8");
//headers.put("Accept-Encoding", "gzip, deflate, sdch");
headers.put("Accept-Language", "zh-CN");
headers.put("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36");
headers.put("Connection", "Keep-Alive");
headers.put("Cache-Control", "max-age=0");
headers.put("Referer", "http://login.sina.com.cn/sso/login.php?url=http%3A%2F%2Fs.weibo.com%2Fweibo%2F%2525E6%252583%2525A0%2525E6%252599%2525AE%26page%3D2&_rand=1419173756.6387&gateway=1&service=weibo&entry=miniblog&useticket=1&returntype=META");
headers.put("Cookie", cookieValue);
this.headers.put("Host", "s.weibo.com");
HttpResponse response=HttpUtils.doGet(url, headers);
String responseText=HttpUtils.getStringFromResponse(response);
responseText=EncodeUtils.unicdoeToGB2312(responseText); return responseText;
} public String searchCommentsByUid(String uid){ String url="http://www.weibo.com/u/"+uid;
String cookieValue = "SINAGLOBAL=8556698272004.724.1417744632425; myuid=2035860051; wvr=6; YF-Ugrow-G0=ad06784f6deda07eea88e095402e4243; SSOLoginState=1423150079; YF-V5-G0=32eb5467e9bfc8b60c2d771056535ac5; _s_tentry=www.weibo.com; Apache=6264929557219.147.1423150103832; ULV=1423150103842:18:2:2:6264929557219.147.1423150103832:1422769721265; ULOGIN_IMG=1423233797946; YF-Page-G0=82cdcdfb16327a659fbb60cc9368fb19; SUS=SID-2035860051-1423286223-GZ-jdkh4-c8ea11de0a42151313986e52f9aa6017; SUE=es%3D8701ff5aca59244ff1ff263cf985bee6%26ev%3Dv1%26es2%3D7995c9eb7455697c09fac4f7486e14eb%26rs0%3DTyXXIRjcEw%252BeS5PaVSM%252FhQjc2JGhKBOe3uFTgShiIUAbPFI2eKtrgxM2wIi9A1xndiTFFM72zY%252FDKYFXONrgkao5cRo%252FHkydV%252FnaQjNmXoeESu5gi6Iq0aX883NhGR0utBVNZb5XaIG3X6HMMfBJC%252B7pnVHogEo8eD6cx8nzN5c%253D%26rv%3D0; SUP=cv%3D1%26bt%3D1423286223%26et%3D1423372623%26d%3Dc909%26i%3D6017%26us%3D1%26vf%3D0%26vt%3D0%26ac%3D0%26st%3D0%26uid%3D2035860051%26name%3Dshy_annan%2540126.com%26nick%3D%25E7%2594%25A8%25E6%2588%25B72035860051%26fmp%3D%26lcp%3D2013-08-18%252021%253A48%253A10; SUB=_2A2550e-fDeTxGeRO6FcZ9i7Mzj2IHXVap0ZXrDV8PUNbvtBuLWnTkW-gBGVORTA7J_lSZzAqzW6E50JjBQ..; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9Wh7oKNCGYcNnhlC6eqqQbbl5JpX5KMt; SUHB=0M20OGRPiOKzyc; ALF=1454822222; UOR=www.ilehao.com,widget.weibo.com,login.sina.com.cn";
headers.put("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8");
headers.put("Accept-Language", "zh-CN");
headers.put("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36");
headers.put("Connection", "Keep-Alive");
headers.put("Cache-Control", "max-age=0");
headers.put("Cookie", cookieValue);
this.headers.put("Host", "www.weibo.com");
HttpResponse response=HttpUtils.doGet(url, headers);
String responseText=HttpUtils.getStringFromResponse(response);
responseText=EncodeUtils.unicdoeToGB2312(responseText);
return responseText;
}
//爬虫根据关键字,查询时间断,和查询页数 来得到htmlContent
public String search(String keyword, int pageNo, String fromdate,String todate){
StringBuffer stringBuffer = new StringBuffer(200);
stringBuffer.append("http://s.weibo.com/weibo/"+ keyword +"&page=");
stringBuffer.append(pageNo);
stringBuffer.append("&typeall=1&suball=1×cope=custom:");
stringBuffer.append(fromdate);
stringBuffer.append(":");
stringBuffer.append(todate);
stringBuffer.append("&Refer=g");
String url = stringBuffer.toString();
String cookieValue = headers.get("Cookie");
headers.put("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8");
//headers.put("Accept-Encoding", "gzip, deflate, sdch");
headers.put("Accept-Language", "zh-CN");
headers.put("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36");
headers.put("Connection", "Keep-Alive");
headers.put("Cache-Control", "max-age=0");
headers.put("Referer", "http://s.weibo.com/weibo/%25E5%25AE%2581%25E6%25B3%25A2%25E5%25A4%25A7%25E5%25AD%25A6&typeall=1&suball=1×cope=custom:"+fromdate+":"+todate+"&Refer=g");
headers.put("Cookie", cookieValue);
this.headers.put("Host", "s.weibo.com");
HttpResponse response=HttpUtils.doGet(url, headers);
String responseText=HttpUtils.getStringFromResponse(response);
responseText=EncodeUtils.unicdoeToGB2312(responseText); System.out.println("************htmlContent start***********");
System.out.println(responseText);
System.out.println("************htmlContent end***********");
return responseText;
}
public void forwardToWeiboPage(){
String url = Constant.personalHomePage;
headers.put("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8");
headers.put("Accept-Language", "zh-CN");
headers.put("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36");
headers.put("Connection", "Keep-Alive");
this.headers.put("Host", "s.weibo.com");
HttpResponse response=HttpUtils.doGet(url, headers);
String responseText=HttpUtils.getStringFromResponse(response);
responseText=EncodeUtils.unicdoeToGB2312(responseText);
List<Cookie> cookies = HttpUtils.getResponseCookies(response);
String cookie = HttpUtils.setCookie2String(cookies);
headers.put("Cookie", cookie);
}
public String getGroupCategory(int id){
String url="http://q.weibo.com/class/category/?id="+id;
this.headers.put("Host", "q.weibo.com");
HttpResponse response=HttpUtils.doGet(url, headers);
String responseText=HttpUtils.getStringFromResponse(response);
responseText=EncodeUtils.unicdoeToGB2312(responseText);
return responseText;
}
//得到微群管理员ID信息,其实用户成员的第一页 HTML页面
public String getGroupAdministrator(String groupid) {
String url="http://q.weibo.com/"+groupid+"/members/all";
this.headers.remove("Referer");
this.headers.put("Host", "q.weibo.com");
this.headers.remove("Content-Type");
this.headers.remove("x-requested-with");
HttpResponse response=HttpUtils.doGet(url, headers);
String responseText=HttpUtils.getStringFromResponse(response);
return responseText;
}
//根据微群号和页号得到群成员ID信息 -----JSON格式数据
public String getGroupMembers(String groupid,int pagenumber){
this.headers.put("Referer", "http://q.weibo.com/"+groupid+"/members/all");
this.headers.put("Host", "q.weibo.com");
this.headers.put("Content-Type", "application/x-www-form-urlencoded");
this.headers.put("x-requested-with", "XMLHttpRequest");
Map<String,String> params=new HashMap<String,String>();
params.put("_t", "0");
params.put("page", pagenumber+"");
params.put("gid", groupid);
params.put("query","");
params.put("tab", "all");
params.put("vip", "1");
String url="http://q.weibo.com/ajax/members/page";
HttpResponse response=HttpUtils.doPost(url, headers, params);
return HttpUtils.getStringFromResponse(response);
}
/*
* 得到微群中微博信息 经过多次尝试成功
* 每次获得50个微博记录,page是页号, count值50 可以在1-75之间,但是,每次开始的时候还是从50的倍数开始的
*/
public String getGroupTopic(int page,int count,String gid){
this.headers.put("Referer", "http://q.weibo.com/"+gid);
this.headers.put("Host", "q.weibo.com");
this.headers.put("Content-Type", "application/x-www-form-urlencoded");
this.headers.put("x-requested-with", "XMLHttpRequest");
Integer pre_page=1;
if(page==1){
pre_page=2;
}else{
pre_page=page-1;
}
Map<String,String> params=new HashMap<String,String>();
params.put("_k", System.currentTimeMillis()+"");
params.put("_t", "0");
params.put("count", count+"");
//params.put("end_id", end_id);
params.put("gid", gid);
params.put("is_search","");
params.put("key_word", "");
params.put("me", "0");
params.put("mids", "");
params.put("new", "0");
params.put("page", page+"");
params.put("pagebar", "0");
params.put("pre_page", pre_page+"");
params.put("since_id", "0");
params.put("uid", "0"); String url="http://q.weibo.com/ajax/mblog/groupfeed";
HttpResponse response=HttpUtils.doPost(url, headers, params);
return HttpUtils.getStringFromResponse(response);
}
/*
* 得到微群中微博信息数目
* 这个信息中其实还包含了微群的所有的基本信息~~~~~~~~~~****** json格式的数据信息
*/
public String getGroupMessageNumber(String gid){
this.headers.put("Referer", "http://q.weibo.com/"+gid);
this.headers.put("Host", "q.weibo.com");
this.headers.put("Content-Type", "application/x-www-form-urlencoded");
this.headers.put("x-requested-with", "XMLHttpRequest");
String url="http://q.weibo.com/ajax/rightnav/groupprofile?gid="+gid+"&_t=0&__rnd="+System.currentTimeMillis();
HttpResponse response=HttpUtils.doGet(url, headers);
return HttpUtils.getStringFromResponse(response);
}
//得到微群的主页信息 HTML页码 主要是为了得到第一条微博记录的MID值
public String getgroupMainPage(String groupid) {
String url="http://q.weibo.com/"+groupid+"?topnav=1";
this.headers.remove("Referer");
this.headers.put("Host", "q.weibo.com");
this.headers.remove("Content-Type");
this.headers.remove("x-requested-with"); HttpResponse response=HttpUtils.doGet(url, headers);
String responseText=HttpUtils.getStringFromResponse(response);
return responseText;
}
/*
* 根据分类得到微群信息
* categroyID :分类ID号
* pagenumber:页号
* sort:分类方式 1 按成员人数 2按 微群博数 3按创建时间分类
* count:每页的记录数目
*/
public String getGroupByCategroy(int categroyID,int pagenumber,int sort,int count){
this.headers.put("Referer", "http://q.weibo.com/class/category/?id="+categroyID);
this.headers.put("Host", "q.weibo.com");
this.headers.put("Content-Type", "application/x-www-form-urlencoded");
this.headers.put("x-requested-with", "XMLHttpRequest");
Map<String,String> params=new HashMap<String,String>();
params.put("_t", "0");
params.put("page", pagenumber+"");
params.put("id", categroyID+"");
params.put("sort",sort+"");
params.put("count", count+""); String url="http://q.weibo.com/ajax/class/category";
HttpResponse response=HttpUtils.doPost(url, headers,params);
String responseText=HttpUtils.getStringFromResponse(response);
responseText=EncodeUtils.unicdoeToGB2312(responseText);
return responseText;
}
//得到表情列表信息
public String getFaceList(){
String url="http://weibo.com/aj/mblog/face?type=face&_t=0&__rnd="+System.currentTimeMillis();
this.headers.put("Referer", "http://weibo.com/");
this.headers.put("Host", "weibo.com");
this.headers.put("Content-Type", "application/x-www-form-urlencoded");
this.headers.put("x-requested-with", "XMLHttpRequest"); HttpResponse response=HttpUtils.doGet(url, headers);
String responseText=HttpUtils.getStringFromResponse(response);
System.out.println(responseText);
Utils.writeFileFromString("tmpFile/faceList.txt", responseText);
return responseText;
}
//用户基本信息 主要是将要解析用户主页下方经过编码后的内容
public String getMemberInfo(String memberID){
String url="http://weibo.com/"+memberID+"/info";
this.headers.put("Host", "weibo.com");
this.headers.put("Referer", "http://weibo.com/u/"+memberID);
HttpResponse response=HttpUtils.doGet(url, headers);
String responseText=HttpUtils.getStringFromResponse(response);
return responseText;
}
//用户粉丝用户信息 html页面,每次20个
public String getMemberFans(String memberID,int page){
String url="http://weibo.com/"+memberID+"/fans?&uid=1689219395&tag=&page="+page;
this.headers.put("Host", "weibo.com");
this.headers.put("Referer", "http://weibo.com/"+memberID+"/fans");
HttpResponse response=HttpUtils.doGet(url, headers);
String responseText=HttpUtils.getStringFromResponse(response);
return responseText;
}
//用户关注的用户信息 html页面
public String getMemberFollowing(String memberID,int page){
String url="http://weibo.com/"+memberID+"/follow?page="+page;
this.headers.put("Host", "weibo.com");
this.headers.put("Referer", "http://weibo.com/"+memberID+"/follow");
HttpResponse response=HttpUtils.doGet(url, headers);
String responseText=HttpUtils.getStringFromResponse(response);
return responseText;
} /*
* @params
* memberID:是用户ID
* max_id:每次AJAX获得数据时上面一次的最后一个ID值
* end_id:用户最新的一条微博的ID值
* k:一个随机数
* page:页号
* pre_page:前一页
* count:每次返回的数值 当max_id为null是 count=50 否则为15
* pagebar:ajax时,第一次为0,第二次为1
* 注意:
* 1 用此请求,每次获得的数据格式都一样,用同样的解析方法来进行解析。
* 2 每次一页可以获得总共45条记录,需要三次请求。每次请求可获得15条记录。
* 3 max_id可以不用到,直接等于 end_id就可以了.
* 4 第一次请求时可以将end_id设置为NUll,即为第一次时翻页时的请求后边的滚动时必须有end_id参数,end_id为第一页的第一条ID即可。
*/
//获得用户发布的微博信息 json格式的数据
public String getMemberReleaseTopic(String memberID,String end_id,Integer page,Integer pagebar){
String url="";
Integer pre_page=1;
Integer count=0;
String k=System.currentTimeMillis()+""+(int)(Math.random()*100000)%100;
if(end_id==null){
count=50;
if(page==1){
pre_page=2;
}else{
pre_page=page-1;
}
url="http://weibo.com/aj/mblog/mbloglist?" +
"page="+page+"&count="+count+"&pre_page="+pre_page+"&" +
"_k="+ k+"&uid="+memberID+
"&_t=0&__rnd="+System.currentTimeMillis();
}else{
count=15;
pre_page=page;
url="http://weibo.com/aj/mblog/mbloglist?" +
"page="+page+"&count="+count+"&max_id="+end_id+"&" +
"pre_page="+pre_page+"&end_id="+end_id+"&" +
"pagebar="+pagebar+"&_k="+k+"&" +
"uid="+memberID+"&_t=0&__rnd="+System.currentTimeMillis();
}
String cookieValue = "SINAGLOBAL=8556698272004.724.1417744632425; un=sm2014121903@126.com; myuid=5439352084; YF-Ugrow-G0=4703aa1c27ac0c4bab8fc0fc5968141e; SSOLoginState=1421374583; wvr=6; YF-V5-G0=8c4aa275e8793f05bfb8641c780e617b; _s_tentry=login.sina.com.cn; Apache=2461283528245.9854.1421374588453; ULV=1421374588550:13:5:3:2461283528245.9854.1421374588453:1421210767499; UOR=www.ilehao.com,widget.weibo.com,login.sina.com.cn; SUS=SID-2035860051-1421462085-GZ-7jcgb-1539d643bae5195fb7f792b2ae77befb; SUE=es%3Df15e11ed09b6a0108a28adfa58609b78%26ev%3Dv1%26es2%3Da0f706efac5c89495062648a4de3e337%26rs0%3DZBxlOUv0mhmxyHfOVmZ3tH7tNvAp08BjPeLUJPdu9WzG38Dsm40px%252Bd9w21ycDpZQwBK3q0prFfNs%252F8ZuZSasa1eps%252FOGNxJ3CIHN8JN%252Fik6gVpIPgVeeRdalNWTIbth6hLa34uOp%252BXii%252Bxeib%252BvINsr%252FdOvQx6kjp6fsC44QXc%253D%26rv%3D0; SUP=cv%3D1%26bt%3D1421462085%26et%3D1421548485%26d%3Dc909%26i%3Dbefb%26us%3D1%26vf%3D0%26vt%3D0%26ac%3D2%26st%3D0%26uid%3D2035860051%26name%3Dshy_annan%2540126.com%26nick%3D%25E7%2594%25A8%25E6%2588%25B72035860051%26fmp%3D%26lcp%3D2013-08-18%252021%253A48%253A10; SUB=_2A255vboVDeTxGeRO6FcZ9i7Mzj2IHXVazdpdrDV8PUNbvtBuLVj-kW91jmbQSGo7Rn30RVvGP5KOgBgNgQ..; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9Wh7oKNCGYcNnhlC6eqqQbbl5JpX5KMt; ALF=1452998078; ULOGIN_IMG=14214638933178; YF-Page-G0=0acee381afd48776ab7a56bd67c2e7ac";
headers.put("Cookie", cookieValue);
this.headers.put("Referer", "http://weibo.com/u/"+memberID);
this.headers.put("Host", "www.weibo.com");
this.headers.put("Content-Type", "application/x-www-form-urlencoded");
this.headers.put("x-requested-with", "XMLHttpRequest");
url = "http://weibo.com/u/"+memberID;
HttpResponse response=HttpUtils.doGet(url, headers);
if(response==null){
return "";
}
return HttpUtils.getStringFromResponse(response);
}
/*
* ~~~~~~~~~~~~~~~~~~~~~获取用户的一些信息~~~end~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/ //********************************************************************************** /*
* 名人堂与达人信息
*/
public String getVerified(String url){
this.headers.put("Host", "verified.weibo.com");
this.headers.put("Referer", "http://plaza.weibo.com/?topnav=1&wvr=4");
HttpResponse response=HttpUtils.doGet(url, headers);
String responseText=HttpUtils.getStringFromResponse(response);
return responseText;
} public String getVerifiedMember(String path,Integer g_index){
String url="http://verified.weibo.com/aj/getgrouplist?g_index="+g_index+
"&path="+path+"&_t=0&__rnd="+System.currentTimeMillis();
this.headers.put("Host", "verified.weibo.com");
this.headers.put("Referer", path);
this.headers.put("Content-Type", "application/x-www-form-urlencoded");
this.headers.put("x-requested-with", "XMLHttpRequest");
HttpResponse response=HttpUtils.doGet(url, headers);
String responseText=HttpUtils.getStringFromResponse(response); return responseText;
} public String setArea(Integer provinceID){
this.headers.put("Referer", "http://club.weibo.com/list");
this.headers.put("Host", "club.weibo.com");
this.headers.put("Content-Type", "application/x-www-form-urlencoded");
this.headers.put("x-requested-with", "XMLHttpRequest"); Map<String,String> params=new HashMap<String,String>(); params.put("_t", "0");
params.put("city", "1000");
params.put("prov", provinceID+""); String url="http://club.weibo.com/ajax_setArea.php";
HttpResponse response=HttpUtils.doPost(url, headers, params); List<Cookie> cks=HttpUtils.getResponseCookies(response);
List<Cookie> cookies=ls.getCookies();
cookies.addAll(cks);
String cookieValue=HttpUtils.setCookie2String(cookies);
this.headers.put("Cookie", cookieValue); return HttpUtils.getStringFromResponse(response);
} public String getDaRen(Integer page){
String op="ltime";
String url="http://club.weibo.com/list?sex=3&op="+op+"&page="+page+"&";
Integer pre_page=(page<=1? 2:page-1);
this.headers.put("Host", "club.weibo.com");
this.headers.put("Referer", "http://club.weibo.com/list?sex=3&op=ltime&page="+pre_page+"&");
this.headers.remove("Content-Type");
this.headers.remove("x-requested-with"); HttpResponse response=HttpUtils.doGet(url, headers);
if(response!= null){
return HttpUtils.getStringFromResponse(response);
}
return ""; }
//发布一条文字微博
public String releaseTopic(String content){
this.headers.put("Referer", "http://weibo.com/");
this.headers.put("Host", "weibo.com");
this.headers.put("Content-Type", "application/x-www-form-urlencoded");
this.headers.put("x-requested-with", "XMLHttpRequest");
Map<String,String> params=new HashMap<String,String>();
params.put("_t", "0");
params.put("location", "home");
params.put("module", "stissue");
params.put("pic_id", "");
params.put("text", content);
String url="http://weibo.com/aj/mblog/add?__rnd="+System.currentTimeMillis();
HttpResponse response=HttpUtils.doPost(url, headers, params);
return HttpUtils.getStringFromResponse(response);
} //得到自己关注的成员
public String getSelfFollowIngs(){
return "";
}
//得到自己的粉丝
public String getSelfFollowers(){
return "";
}
//得到自己加入的微群
public String getSelfJoinedGroups(){
return "";
}
//得到自己的标签
public String getSelfTags(){
return "";
}
//得到自己发布的微博
public String getSelfReleaseTopics(){
return "";
}
//得到自己主页的微博
public String getSelfPageTopics(){
return "";
}
//关注一个人
public String addFollowing(String memberid){
return addorcancleFollowing(memberid,this.ADDFOLLOWING);
}
//取消关注一个人
public String cancelFollowing(String memberid){
return addorcancleFollowing(memberid,this.CANCELFOLLOWING);
}
private String addorcancleFollowing(String memberid,int option){
String url="";
switch(option){
case ADDFOLLOWING:
url="http://weibo.com/aj/f/followed?__rnd="+System.currentTimeMillis();
break;
case CANCELFOLLOWING:
url="http://weibo.com/aj/f/unfollow?__rnd="+System.currentTimeMillis();
break;
} Map<String,String> params=new HashMap<String,String>(); this.headers.put("Referer", "http://weibo.com/");
this.headers.put("Host", "weibo.com");
this.headers.put("Content-Type", "application/x-www-form-urlencoded");
this.headers.put("Referer", "http://weibo.com/");
this.headers.put("x-requested-with", "XMLHttpRequest"); params.put("_t", "0");
params.put("f", "1");
params.put("location", "profile");
params.put("refer_flag", "");
params.put("refer_sort", "profile");
params.put("uid", memberid); HttpResponse response=HttpUtils.doPost(url, headers, params);
return HttpUtils.getStringFromResponse(response);
}
/**
* 得到的标签信息 调用一次10个
* @return
*/
public String getTags(){
String url="http://account.weibo.com/set/aj/tagsuggest?__rnd="+System.currentTimeMillis();
this.headers.put("Referer", "http://account.weibo.com/set/tag#");
this.headers.put("Host", "account.weibo.com");
HttpResponse response=HttpUtils.doGet(url, headers);
return HttpUtils.getStringFromResponse(response);
} /**
* 得到微博热词信息
* @param k :热词的门类
*/
public String getHotWords(String k){
String url="http://data.weibo.com/top/keyword?k="+k;
try{
Integer.parseInt(k);
}catch(Exception ex){
url="http://data.weibo.com/top/keyword?t="+k;
}
this.headers.put("Referer", "http://data.weibo.com/top/keyword");
this.headers.put("Host", "data.weibo.com");
HttpResponse response=HttpUtils.doGet(url, headers);
return HttpUtils.getStringFromResponse(response);
} /**
* 得到微博热帖子
* @param cat 表示热帖门类
* @param page 表示页号
*/
public String getHotWeibo(String cat,int page){
String url="http://data.weibo.com/hot/ajax/catfeed?page="+page+"&cat="+cat+"&_t=0&__rnd="+System.currentTimeMillis();
this.headers.put("Referer", "http://data.weibo.com/hot/minibloghot");
this.headers.put("Host", "data.weibo.com");
HttpResponse response=HttpUtils.doGet(url, headers);
return HttpUtils.getStringFromResponse(response);
} /**
* 按照分类获取 微博吧名字 第一步
*/
public String getWeiBar(String ctgid,int p){
String sort="post";
String url="http://weiba.weibo.com/aj_f/CategoryList?sort="+sort+"&p="+p+"&ctgid="+ctgid+"&_t=0&__rnd="+System.currentTimeMillis();
this.headers.put("Referer", "http://weiba.weibo.com/ct/"+ctgid);
this.headers.put("Host", "weiba.weibo.com");
this.headers.put("Accept", "*/*");
this.headers.put("Content-Type", "application/x-www-form-urlencoded");
this.headers.put("X-Requested-With", "XMLHttpRequest");
HttpResponse response=HttpUtils.doGet(url, headers);
return HttpUtils.getStringFromResponse(response);
}
/**
* 根据微博吧 名称 ,得到该吧内的所有帖子标题 第二步
*/
public String getWeiBarByWeibarName(String bid,int p){
String url="http://weiba.weibo.com/aj_t/postlist?bid="+bid+"&p="+p+"&_t=all&__rnd="+System.currentTimeMillis();
this.headers.put("Referer", "http://weiba.weibo.com/");
this.headers.put("Host", "weiba.weibo.com");
this.headers.put("Accept", "*/*");
this.headers.put("Content-Type", "application/x-www-form-urlencoded");
this.headers.put("X-Requested-With", "XMLHttpRequest");
HttpResponse response=HttpUtils.doGet(url, headers);
return HttpUtils.getStringFromResponse(response);
} /**
* 新浪微公益名单
* type ="donate"
* type="discuss"
*/
public String getWeiGongYiMember(int page,int projectID,String type){
String url="http://gongyi.weibo.com/aj_personal_helpdata?page="+page+"&type="+type+"&project_id="+projectID+"&_t=0&__rnd="+System.currentTimeMillis();
this.headers.put("Referer", "http://gongyi.weibo.com/"+projectID);
this.headers.put("Host", "gongyi.weibo.com");
this.headers.put("Accept", "*/*");
this.headers.put("Content-Type", "application/x-www-form-urlencoded");
this.headers.put("X-Requested-With", "XMLHttpRequest");
HttpResponse response=HttpUtils.doGet(url, headers);
return HttpUtils.getStringFromResponse(response);
}
}
用java实现新浪爬虫,代码完整剖析(仅针对当前SinaSignOn有效)的更多相关文章
- Java 模拟新浪登录 2016
想学习一下网络爬虫.涉及到模拟登录,查阅了一番资料以后发现大部分都有点过时了,就使用前辈们给的经验,Firefox抓包调试,採用httpclient模拟了一下新浪登录. 不正确之处多多包括.须要的能够 ...
- 【python网络编程】新浪爬虫:关键词搜索爬取微博数据
上学期参加了一个大数据比赛,需要抓取大量数据,于是我从新浪微博下手,本来准备使用新浪的API的,无奈新浪并没有开放关键字搜索的API,所以只能用爬虫来获取了.幸运的是,新浪提供了一个高级搜索功能,为我 ...
- java调用新浪接口根据Ip查询所属地区
import java.io.BufferedReader; import java.io.InputStream; import java.io.InputStreamReader; import ...
- 新浪天气api
package com.smartdot.dcu; /** * java获取新浪天气预报代码 */ import java.io.FileNotFoundException; import java. ...
- android新浪分享实例
新浪分享比较简单,新浪有提供完整的demo. android实现新浪的分享功能,分3种分享情况: 纯文本的,带图片的,图片为本地图片(传入的是图片在手机的地址),第2种带图片的是,网络图片,图片地址为 ...
- Java 8新特性-4 方法引用
对于引用来说我们一般都是用在对象,而对象引用的特点是:不同的引用对象可以操作同一块内容! Java 8的方法引用定义了四种格式: 引用静态方法 ClassName :: staticMetho ...
- Thinkcmf 在新浪云上的部署问题
最近要开发一个社团主页,于是想到了CMF内容管理系统的,但是直接在自己的服务器测试成本太高,于是选择了在新浪云上进行部署测试. 但是在安装Thinkcmf的过程中产生了一些技术性的问题.但最后终于在自 ...
- Java编程的逻辑 (90) - 正则表达式 (下 - 剖析常见表达式)
本系列文章经补充和完善,已修订整理成书<Java编程的逻辑>,由机械工业出版社华章分社出版,于2018年1月上市热销,读者好评如潮!各大网店和书店有售,欢迎购买,京东自营链接:http:/ ...
- python3.4学习笔记(十四) 网络爬虫实例代码,抓取新浪爱彩双色球开奖数据实例
python3.4学习笔记(十四) 网络爬虫实例代码,抓取新浪爱彩双色球开奖数据实例 新浪爱彩双色球开奖数据URL:http://zst.aicai.com/ssq/openInfo/ 最终输出结果格 ...
随机推荐
- Android 在View中更新View
直接用Invalidate()方法会导致错误:只有主线程才能更新UI 取而代之的是可以使用postInvalidate(); 原因: 最终会调用ViewRootImpl类的dispatchInvali ...
- CAR
24.编写一个Car类,具有String类型的属性品牌,具有功能drive: 定义其子类Aodi和Benchi,具有属性:价格.型号:具有功能:变速: 定义主类E,在其main方法中分别创建Aodi和 ...
- vue.js学习之入门实例
之前一直看过vue.js官网api,但是很少实践,这里抽出时间谢了个入门级的demo,记录下一些知识点,防止后续踩坑,牵扯到的的知识点:vue.vue-cli.vue-router.webpack等. ...
- 修改WebBrowser控件的内核解决方案
方法一 加入你想让WebBrowser控件的渲染模式编程IE8的标准模式, 你可以通过设置注册表FEATURE_BROWSER_EMULATION 来实现. 示例: 注册表中注明当前本机装的IE版本H ...
- WebClient.DownloadFile(线程机制,异步下载文件)
线程机制(避免卡屏),异步下载文件. 我做网站的监控,WebClient.DownloadFile这个方法是我经常用到的,必要的时候肯定是要从网上下载些什么(WebRequest 也可以下载网络文件, ...
- 练习2 练习目标-使用引用类型的成员变量:在本练习中,将扩展银行项目,添加一个(客户类)Customer类。Customer类将包含一个Account对象。
package banking; public class Customer { private String firstName; private String lastName; private ...
- javascript基础语法——词法结构
× 目录 [1]java [2]定义 [3]大小写[4]保留字[5]注释[6]空白[7]分号 前面的话 javascript是一门简单的语言,也是一门复杂的语言.说它简单,是因为学会使用它只需片刻功夫 ...
- CSS中常见的6种文本样式
前面的话 CSS文本样式是相对于内容进行的样式修饰.由于在层叠关系中,内容要高于背景.所以文本样式相对而言更加重要.有些人对文本和字体样式之间的不同不太了解,简单地讲,文本是内容,而字体则用于显示这个 ...
- java多线程执行问题
class Demo extends Thread{ public Demo(String name){ super(name); } public void run(){ for(int i=0; ...
- CSS3入门之转换
CSS3入门之转换 *:first-child { margin-top: 0 !important; } body>*:last-child { margin-bottom: 0 !impor ...