验证码识别,爬虫永远的话题~

用打码兔总体的体验就是单线程速度太慢~

import java.io.IOException;
import java.net.MalformedURLException;
import java.util.Date; import org.apache.log4j.Logger;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements; import com.gargoylesoftware.htmlunit.BrowserVersion;
import com.gargoylesoftware.htmlunit.FailingHttpStatusCodeException;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.html.HtmlButton;
import com.gargoylesoftware.htmlunit.html.HtmlForm;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
import com.gargoylesoftware.htmlunit.html.HtmlSubmitInput; import cn.smy.dama2.Dama2Web;
import cn.smy.dama2.Dama2Web.DecodeResult;
import cn.smy.dama2.Dama2Web.ReadBalanceResult; /**
* @Title: main.java
* @Package
* @Description: TODO(用一句话描述该文件做什么)
* @author A18ccms A18ccms_gmail_com
* @date 2017年2月15日 下午3:42:00
* @version V1.0
*/ /**
* @ClassName: main
* @Description: TODO
* @author zeze
* @date 2017年2月15日 下午3:42:00
*
*/
public class main {
private static Logger logger = Logger.getLogger(main.class);
private static final long serialVersionUID = 1325980466616825****;
private static Dama2Web dama2 = new Dama2Web(46****, "41c5a58de6********d23b67f61645e3a7", "***", "****");
private static int id; private static long nd = 1000 * 24 * 60 * 60;
private static long nh = 1000 * 60 * 60;
private static long nm = 1000 * 60;
private static long ns = 1000;
// 获得两个时间的毫秒时间差异
private static Date nowDate;
private static Date endDate;
private static long diff;// getTime返回的是一个long型的毫秒数
// 计算差多少分钟
private static long min;
// 计算差多少秒//输出结果
private static long sec;
// 计算多少毫秒
private static long ms; public static void main(String[] agrs) {
String emailAccount = "asd@qq.com"; for (int i = 0; i < 10; i++) {
nowDate = new Date(); emailAccount = "asd" + i + "@qq.com";
if(i==0)
emailAccount="asd@qq.com"; int statusCode=checkEbayAccount(emailAccount);
if(statusCode==0){
System.out.println(emailAccount + " 该邮箱号不是ebay账号");
}else if(statusCode==1){
System.out.println(emailAccount + " 该账号是eBay账号!");
}else if(statusCode==101){
System.out.println("打码错误!");
statusCode=checkEbayAccount(emailAccount);
while(statusCode==101){
statusCode=checkEbayAccount(emailAccount);
}
}else{
System.out.println(statusCode);
} endDate = new Date();
diff = endDate.getTime() - nowDate.getTime();
min = diff % nd % nh / nm;
sec = diff % nd % nh % nm / ns;
ms = diff % nd % nh % nm % ns;
System.out.println(min + "分钟" + sec + "秒" + ms + "毫秒");
} } // 验证邮箱是否为eBay账号
public static int checkEbayAccount(String emailAccount) {
System.out.println("开始验证账号:" + emailAccount);
WebClient webClient = new WebClient(BrowserVersion.FIREFOX_17);
HtmlPage page = null;
try {
page = webClient.getPage("http://fyp.ebay.com/");
} catch (FailingHttpStatusCodeException e) {
logger.error(e);
} catch (MalformedURLException e) {
logger.error(e);
} catch (IOException e) {
logger.error(e);
}
HtmlForm form = page.getForms().get(1);
form.getInputByName("input").setValueAttribute(emailAccount);
HtmlButton button = (HtmlButton) form.getElementsByTagName("button").get(0); try {
page = button.click();
} catch (IOException e1) {
logger.error(e1);
}
if (page.asText().indexOf("Select how you want to reset your password") != -1) {
// System.out.println(emailAccount + " 该账号是eBay账号!");
return 1;
} while (page.asText().indexOf("Security Measure") != -1) { Document doc = Jsoup.parse(page.asXml());
Elements imgSrc = doc.getElementsByTag("iframe");
String imgUrl = imgSrc.attr("src");
System.out.println("验证码图片链接:" + imgUrl);
String code = getCode(imgUrl); // 提交验证码
form = page.getForms().get(0);
form.getInputByName("tokenText").setValueAttribute(code);
HtmlSubmitInput input = (HtmlSubmitInput) form.getElementsByTagName("input").get(5);
try {
page = input.click();
} catch (IOException e1) {
logger.error(e1);
} if (page.asText().indexOf("the verification code you entered doesn't match against the image") != -1) {
// System.out.println("打码错误!");
dama2.reportError(id);
return 101;
} // 再次提交邮箱
form = page.getForms().get(1);
form.getInputByName("input").setValueAttribute(emailAccount);
button = (HtmlButton) form.getElementsByTagName("button").get(0);
try {
page = button.click();
} catch (IOException e1) {
logger.error(e1);
} if (page.asText().indexOf("Security Measure") != -1){// 如果还是验证码页面
System.out.println("提交还是验证码页面!");
continue;
} if (page.asText().indexOf("Oops, that's not a match. Try again?") != -1) {
// System.out.println(emailAccount + " 该邮箱号不是ebay账号");
return 0;
} else if (page.asText().indexOf("Select how you want to reset your password") != 1) {
// System.out.println(emailAccount + " 该账号是eBay账号!");
return 1;
} else {
System.out.println(page.asText());
return 2;
}
}
return 3;
} // 打码兔获取验证码
public static String getCode(String imgUrl) {
// 打码兔
int type = 6;
int timeout = 30;
ReadBalanceResult balanceResult = dama2.getBalance();
// System.out.println(balanceResult);
DecodeResult res = dama2.decodeUrlAndGetResult(imgUrl, type, timeout);
String s;
if (res.ret >= 0) {
id = res.ret;
s = "success: result=" + res.result + "; id=" + res.ret;
System.out.println(s);
} else {
s = "failed: ret = " + res.ret + "; desc=" + res.desc;
System.err.println(s);
}
return res.result;
} }

测试结果如下:

用多线程测试,明显快多了

package test;

import java.io.IOException;
import java.net.MalformedURLException;
import java.util.ArrayList;
import java.util.Date;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future; import org.apache.log4j.Logger;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements; import com.gargoylesoftware.htmlunit.BrowserVersion;
import com.gargoylesoftware.htmlunit.FailingHttpStatusCodeException;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.html.HtmlButton;
import com.gargoylesoftware.htmlunit.html.HtmlForm;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
import com.gargoylesoftware.htmlunit.html.HtmlSubmitInput; import cn.smy.dama2.Dama2Web;
import cn.smy.dama2.Dama2Web.DecodeResult;
import cn.smy.dama2.Dama2Web.ReadBalanceResult; /***
*
* @ClassName: EbayMultiplyThreadCheck
* @Description: TODO
* @author zeze
* @date 2017年2月16日 上午8:49:46
*
*/
public class EbayMultiplyThreadCheck { private static int threadNum = 30; private static long nd = 1000 * 24 * 60 * 60;
private static long nh = 1000 * 60 * 60;
private static long nm = 1000 * 60;
private static long ns = 1000;
private static Date nowDate;
private static Date endDate;
private static long diff;
private static long min;
private static long sec;
private static long ms; public static void main(String[] args) {
nowDate = new Date(); ExecutorService exec = Executors.newFixedThreadPool(threadNum);
ArrayList<Future<Integer>> results = new ArrayList<Future<Integer>>(); for (int i = 0; i < threadNum; i++) {
String email = "asd" + i + "@qq.com";
if (i == 0)
email = "asd@qq.com";
results.add(exec.submit(new CheckEbayAccount(email)));
} boolean isDone = false;
while (!isDone) {
isDone = true;
for (Future<Integer> future : results) {
if (!future.isDone()) {
isDone = false;
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
}
break;
}
}
}
exec.shutdown(); endDate = new Date();
diff = endDate.getTime() - nowDate.getTime();
min = diff % nd % nh / nm;
sec = diff % nd % nh % nm / ns;
ms = diff % nd % nh % nm % ns;
System.out.println(min + "分钟" + sec + "秒" + ms + "毫秒"); }
} class CheckEbayAccount implements Callable<Integer> { private String email;
private static Logger logger = Logger.getLogger(CheckEbayAccount.class);
private static Dama2Web dama2 = new Dama2Web(****, "41c5a58de68ebe2*******", "***", "****");
private static int id; public CheckEbayAccount(String email) {
this.email = email;
} @Override
public Integer call() { System.out.println(Thread.currentThread().getName() + " 开始验证账号:" + email);
WebClient webClient = new WebClient(BrowserVersion.FIREFOX_17);
HtmlPage page = null;
try {
page = webClient.getPage("http://fyp.ebay.com/");
} catch (FailingHttpStatusCodeException e) {
logger.error(e);
} catch (MalformedURLException e) {
logger.error(e);
} catch (IOException e) {
logger.error(e);
}
HtmlForm form = page.getForms().get(1);
form.getInputByName("input").setValueAttribute(email);
HtmlButton button = (HtmlButton) form.getElementsByTagName("button").get(0); try {
page = button.click();
} catch (IOException e1) {
logger.error(e1);
} if (page.asText().indexOf("Select how you want to reset your password") != -1) {
System.out.println(Thread.currentThread().getName() + " " + email + " 该账号是eBay账号!");
return 1;
} else if (page.asText().indexOf("Oops, that's not a match. Try again?") != -1) {
System.out.println(Thread.currentThread().getName() + " " + email + " 该邮箱号不是ebay账号");
return 0;
} while (page.asText().indexOf("Security Measure") != -1) { Document doc = Jsoup.parse(page.asXml());
Elements imgSrc = doc.getElementsByTag("iframe");
String imgUrl = imgSrc.attr("src");
System.out.println(Thread.currentThread().getName() + " " + "验证码图片链接:" + imgUrl);
String code = getCode(imgUrl); // 提交验证码
form = page.getForms().get(0);
form.getInputByName("tokenText").setValueAttribute(code);
HtmlSubmitInput input = (HtmlSubmitInput) form.getElementsByTagName("input").get(5);
try {
page = input.click();
} catch (IOException e1) {
System.out.println(Thread.currentThread().getName() + " " + e1);
} while (page.asText().indexOf("Sorry") != -1) {
System.out.println(Thread.currentThread().getName() + " 打码错误!重试");
dama2.reportError(id); doc = Jsoup.parse(page.asXml());
imgSrc = doc.getElementsByTag("iframe");
imgUrl = imgSrc.attr("src");
System.out.println(Thread.currentThread().getName() + " " + "验证码图片链接:" + imgUrl);
code = getCode(imgUrl); // 提交验证码
form = page.getForms().get(0);
form.getInputByName("tokenText").setValueAttribute(code);
input = (HtmlSubmitInput) form.getElementsByTagName("input").get(5);
try {
page = input.click();
} catch (IOException e1) {
logger.error(e1);
}
} // 再次提交邮箱
form = page.getForms().get(1);
form.getInputByName("input").setValueAttribute(email);
button = (HtmlButton) form.getElementsByTagName("button").get(0);
try {
page = button.click();
} catch (IOException e1) {
logger.error(e1);
} if (page.asText().indexOf("Security Measure") != -1) {// 如果还是验证码页面
System.out.println(Thread.currentThread().getName() + " 提交还是验证码页面!");
continue;
} if (page.asText().indexOf("Oops, that's not a match. Try again?") != -1) {
System.out.println(Thread.currentThread().getName() + " " + email + " 该邮箱号不是ebay账号");
return 0;
} else if (page.asText().indexOf("Select how you want to reset your password") != 1) {
System.out.println(Thread.currentThread().getName() + " " + email + " 该账号是eBay账号!");
return 1;
} else {
System.out.println(Thread.currentThread().getName() + " " + page.asText());
return 2;
}
}
System.out.println(Thread.currentThread().getName() + " " + page.asText());
return 3; } // 打码兔获取验证码
public static String getCode(String imgUrl) {
// 打码兔
int type = 6;
int timeout = 30;
ReadBalanceResult balanceResult = dama2.getBalance();
// System.out.println(balanceResult);
DecodeResult res = dama2.decodeUrlAndGetResult(imgUrl, type, timeout);
String s;
if (res.ret >= 0) {
id = res.ret;
s = "[打码结果=" + res.result + "] [id=" + res.ret + "] " + balanceResult;
System.out.println(Thread.currentThread().getName() + " " + s);
} else {
while (res.result == null) {
s = "打码失败,重试: ret = " + res.ret + "; desc=" + res.desc;
System.out.println(Thread.currentThread().getName() + " " + s);
dama2.reportError(id);
res = dama2.decodeUrlAndGetResult(imgUrl, type, timeout);
if (res.ret >= 0) {
id = res.ret;
s = "[打码结果=" + res.result + "] [id=" + res.ret + "] " + balanceResult;
System.out.println(Thread.currentThread().getName() + " " + s);
}
}
}
return res.result;
} }

测试30个账号,平均每个3秒

Java 验证码识别之多线程打码兔的更多相关文章

  1. Java验证码识别解决方案

    建库,去重,切割,识别. package edu.fzu.ir.test; import java.awt.Color; import java.awt.image.BufferedImage; im ...

  2. Java 验证码识别库 Tess4j 学习

    Java 验证码识别库 Tess4j 学习 [在用java的Jsoup做爬虫爬取数据时遇到了验证码识别的问题(基于maven),找了网上挺多的资料,发现Tess4j可以自动识别验证码,在这里简单记录下 ...

  3. JAVA验证码识别:基于jTessBoxEditorFX和Tesseract-OCR训练样本

    JAVA验证识别:基于jTessBoxEditorFX和Tesseract-OCR训练样本 工具准备: jTessBoxEditorFX下载:https://github.com/nguyenq/jT ...

  4. java验证码识别

    首先参考了csdn大佬的文章,但是写的不全ImgUtils类没有给出代码,无法进行了 写不完整就是制造垃圾 不过这个大佬又说这个大佬的文章值得参考于是又查看这篇文章 有案例https://blog.c ...

  5. 【Java】验证码识别解决方案

    对于类似以下简单的验证码的识别方案: 1. 2 3 4. 1.建库:切割验证码为单个字符,人工标记,比如:A. 2.识别:给一个验证码:切割为单个字符,在库中查询识别. /*** * author:c ...

  6. Java 验证码、二维码

    Java 验证码.二维码 资源 需要:   jelly-core-1.7.0.GA.jar网站:   http://lychie.github.io/products.html将下载下来的 jelly ...

  7. uu云验证码识别平台,验证码,验证码识别,全自动验证码识别技术,优优云全自动打码,代答题系统,优优云远程打码平台,uu云打码

    uu云验证码识别平台,验证码,验证码识别,全自动验证码识别技术,优优云全自动打码,代答题系统,优优云远程打码平台,uu云打码 优优云验证码识别答题平台介绍 优优云|UU云(中国公司)是全球唯一领先的智 ...

  8. 基于tensorflow的‘端到端’的字符型验证码识别源码整理(github源码分享)

    基于tensorflow的‘端到端’的字符型验证码识别 1   Abstract 验证码(CAPTCHA)的诞生本身是为了自动区分 自然人 和 机器人 的一套公开方法, 但是近几年的人工智能技术的发展 ...

  9. 基于python语言的tensorflow的‘端到端’的字符型验证码识别源码整理(github源码分享)

    基于python语言的tensorflow的‘端到端’的字符型验证码识别 1   Abstract 验证码(CAPTCHA)的诞生本身是为了自动区分 自然人 和 机器人 的一套公开方法, 但是近几年的 ...

随机推荐

  1. Storm简述及集群安装

    Storm 集群类似于一个 Hadoop 集群.然而你在 Hadoop 的执行"MapReduce job", 在storm 上你执行 "topologies (不好翻译 ...

  2. 【shell】正则表达式

    当一个文件或命令输出中抽取或过滤文本时,可以使用正则表达式(RE),正则表达式是一些特殊或很不特殊的字符串模式的集合. 在Linux中grep.awk.sed均可解释正则 1.基本元字符集及其定义 ^ ...

  3. 获取List对象的泛型类(原创)

    群里一个伙计的需求,最后我提出了这种解决方案,不过他觉得多写俩括号增加了调用方的难度.还是先记下来吧,有时间看看还能不能再改造.   1.直接获取时获取不到的,类型被虚拟机擦除了2.利用子类实现父类的 ...

  4. Python 实现的、带GUI界面的词云生成器

    代码地址如下:http://www.demodashi.com/demo/14233.html 详细说明: "词云"就是数据可视化的一种形式,给出一段文本,根据文本中词语的出现频率 ...

  5. Android开发之动态检索(Filter)联系人

    1. 将所有联系人都转换为数字串,存到列表中. 将联系人姓名转换为数字串.例如,张丽思创->zlsc->9572. 过程解析: 张 – zhang – z – 9 丽 – li – l – ...

  6. JS中函数的 this 各种指向

    this是js的一个关键字,随着函数使用场合不同,this的值会发生变化.但是总有一个原则,那就是this指的是调用函数的那个对象. 情形1:如果一个函数中有this,但是它没有被上一级的对象所调用, ...

  7. 【LeetCode】132. Palindrome Partitioning II

    Palindrome Partitioning II  Given a string s, partition s such that every substring of the partition ...

  8. JQueryMobile开发必须的知道的知识(转)

    移动Web页面的基本组成元素: 页面头部,页面内容,页面底部 <!DOCTYPE html> <html> <head> <title>My Page& ...

  9. 利用C#改写JAVA中的Base64.DecodeBase64以及Inflater解码

    最近正在进行项目服务的移植工作,即将JAVA服务的程序移植到DotNet平台中. 在JAVA程序中,有个HTTP请求数据头中,包含一个BASE64编码的字符串,例如: eJyVjMENgDAMA1fp ...

  10. jQuery on() 方法问题

    <!DOCTYPE html><html><head><script src="https://cdn.bootcss.com/jquery/1.1 ...