Rhino+envjs-1.2.js 在java运行网站js 工具类
java爬虫遇到个页面加密的东西,找了些资料学习学习
做了个java运行js的工具类,希望对大家有用,其中用到client(获取js)可以自行换成自己的client。主要是用了
Rhino就是JavaScript引擎,它的目的就是实现Java与JavaScript的互操作性。rhino-1.7R1.jar
Envjs一个纯js方式在无浏览器环境下模拟浏览器的行为。envjs-1.2.js
一般网站js中都会用到jauery,所以还用了jauery.js
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.Reader;
import java.lang.ref.SoftReference;
import java.net.URI;
import java.nio.charset.Charset;
import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.zip.GZIPInputStream; import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang.Validate;
import org.apache.http.Header;
import org.apache.http.HeaderElement;
import org.apache.http.HttpEntity;
import org.apache.http.ParseException;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.protocol.RequestAcceptEncoding;
import org.apache.http.impl.DefaultConnectionReuseStrategy;
import org.apache.http.impl.client.BasicCookieStore;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.util.Args;
import org.apache.http.util.ByteArrayBuffer;
import org.jsoup.Jsoup;
import org.mozilla.javascript.Context;
import org.mozilla.javascript.ContextFactory;
import org.mozilla.javascript.Function;
import org.mozilla.javascript.Scriptable; import com.ibm.icu.text.CharsetDetector;
import com.ibm.icu.text.CharsetMatch; //import net.sourceforge.htmlunit.corejs.javascript.Context;
//import net.sourceforge.htmlunit.corejs.javascript.ContextFactory;
//import net.sourceforge.htmlunit.corejs.javascript.Function;
//import net.sourceforge.htmlunit.corejs.javascript.Scriptable; /**
* 参照http://mybeautiful.iteye.com/blog/1442839
* http://m.oschina.net/blog/121347
* http://blog.csdn.net/dwjmantou/article/details/45276967
* http://lcllcl987.iteye.com/blog/87423
* ***不可使用htmlunit的包******Cannot call method "setOptimizationLevel" of null
* @author 5432
*
*/
public class RhinoScaper {
private Context context;
private Scriptable scriptable;
/**
* 初始化方法
*/
public void init(){
context = ContextFactory.getGlobal().enterContext();
scriptable =context.initStandardObjects(null);
context.setOptimizationLevel(-1);
context.setLanguageVersion(Context.VERSION_1_5);
// 初始化测试用,并定义envjs-1.2.js未定义print
context.evaluateString(scriptable,
"var v='sssaass';"
+ "var print = function(v) {"+
" java.lang.System.out.println(v);return v ;"+
" };function hah(){return v }",
"print",1,null);
// System.out.println("v == " + scriptable.get("v", scriptable) );
Function prf = (Function)scriptable.get("print", scriptable);
Object call = prf.call(Context.getCurrentContext(), scriptable, prf, new Object[]{"test"});
// System.out.println("print == "+call.toString());
Object invokFunction = invokFunction("hah");
// System.out.println(invokFunction.toString()); String[] file = { this.getClass().getResource("/")+"envjs-1.2.js", "./lib/jquery.js" };
for (String f : file) {
evaluateJs(f);
}
}
/**
* 调用函数
* @param functionName
* @param functionArags
* @return
*/
public Object invokFunction(String functionName,Object... functionArags) {
Validate.notNull(context, "context is null");
Validate.notNull(scriptable, "scriptable is null");
Function function = (Function) scriptable.get(functionName, scriptable);
Object call = function.call(Context.getCurrentContext(), scriptable, function, functionArags);
// System.out.println("reslult = "+call.toString());
return call;
} /**
* 加载js文件
* (当没有找到对应文件,
* 且要加载文件名路径包含‘envjs-1.2.js’ 会访问 https://raw.githubusercontent.com/ryan-roemer/envjs-1.2/master/env.rhino.1.2.js
* 文件名路径包含‘jquery.js’ 会访问 http://apps.bdimg.com/libs/jquery/1.6.0/jquery.js
* 加载js文件 )
* @param f 文件名路径
*/
public void evaluateJs(String f) {
Validate.notNull(context, "context is null");
Validate.notNull(scriptable, "scriptable is null");
FileReader in = null;
try {
// FileInputStream fI = new FileInputStream(f);
// String js = IOUtils.toString(fI, "UTF-8");//设置默认js文件编码为utf-8
// context.evaluateString(scriptable, js, f, 1, null);
in = new FileReader(f);
context.evaluateReader(scriptable, in, f, 1, null);
} catch (FileNotFoundException e1) {
// e1.printStackTrace();
if (f.contains("envjs-1.2.js")) {
String envjs ="https://raw.githubusercontent.com/ryan-roemer/envjs-1.2/master/env.rhino.1.2.js";
try {
SoftReference<String> htmlString = Client.getHtmlString(envjs);
String jqueryStr = htmlString==null?"":htmlString.get();
// DefaultClient defaultClient = new DefaultClient();
// String jqueryStr =defaultClient.get(envjs).asHtml();
context.evaluateString(scriptable, jqueryStr, envjs, 1, null);
} catch (Exception e) {
e.printStackTrace();
}
} else if (f.contains("jquery.js")) {
String jquery = "http://apps.bdimg.com/libs/jquery/1.6.0/jquery.js";
Reader bufR =null;
try {
SoftReference<Reader> htmlReader = Client.getHtmlReader(jquery);
bufR = htmlReader==null?new BufferedReader(null):htmlReader.get();
// String js = IOUtils.toString(bufR);
context.evaluateReader(scriptable, bufR , jquery, 1, null);
} catch (IOException e) {
e.printStackTrace();
} catch (Exception e) {
e.printStackTrace();
}finally {
// close(bufR);
IOUtils.closeQuietly(bufR);
}
} else{
throw new RuntimeException("unknown file "+f);
}
} catch (IOException e1) {
e1.printStackTrace();
}finally {
// close(in);
IOUtils.closeQuietly(in);
}
} public static void main(String[] args) {
RhinoScaper rhinoScaper = new RhinoScaper();
rhinoScaper.init();
// rhinoScaper.JSloadString("jsString", "jsname");
// rhinoScaper.evaluateJs("E:/Desktop/loginjs.js");
// rhinoScaper.loadJS("", classpathURI); // 电信登录加密测试
String pwd="111";
StringBuilder ascending = new StringBuilder();
SoftReference<String> htmlString = null;
try {
htmlString = Client.getHtmlString("http://login.189.cn/bundles/jquery?v=h3Pl8XT8zdNkoI1VbV5sEZOBrSqsxRXX0TIQ9S_lAlM1");
} catch (Exception e) {
e.printStackTrace();
}
String jsStr =htmlString==null?"":htmlString.get();
jsStr = jsStr.replaceAll("float:", "floats:").replaceAll("throws", "throwss");
ascending.append(jsStr);
ascending.append(";\n var input=document.createElement(\"input\");input.value='"+pwd+"';;input.id= 'pass';input.type='password';");
ascending.append("\n function getpassword(){ return $(input).valAesEncryptSet()}");
rhinoScaper.JSloadString(ascending.toString(), "jsname");
Object result = rhinoScaper.invokFunction("getpassword");
System.out.println(result);
try {
htmlString = Client.getHtmlString("http://www.youdaili.net/Daili/");
jsStr =htmlString==null?"":htmlString.get();
String runScript = rhinoScaper.runScript(jsStr);
System.out.println(runScript);
} catch (Exception e) {
e.printStackTrace();
} }
/**
* 运行js
* @param html
* @return
*/
private String runScript(String html) {
String function = null;int jsfrom = 0;
Pattern p = Pattern.compile("setTimeout\\(\"(.*)\\((.*)\\)\", 200\\);");
Matcher m = p.matcher(html);
if(m.find()){
function = m.group(1);//函数名
jsfrom = Integer.parseInt(m.group(2));//参数
}
JSloadString(Jsoup.parse(html).select("script").html().replace("eval(\"qo=eval;qo(po);\")", "return po"), "jsname");
Object result = invokFunction(function, jsfrom);
return result.toString();
}
/**
* 加载js文件
* @param sourceName 名称
* @param classpathURI 文件路径
*/
public void loadJS(String sourceName, String classpathURI) {
Validate.notNull(context, "context is null");
Validate.notNull(scriptable, "scriptable is null");
String js = null;
InputStream inputStream = null;
try {
inputStream = getClass().getResourceAsStream(classpathURI);
js = IOUtils.toString(inputStream, "UTF-8");//设置默认js文件编码为utf-8
} catch (IOException e) {
e.printStackTrace();
} finally {
IOUtils.closeQuietly(inputStream);
}
context.evaluateString(scriptable, js, sourceName, 1, null);
}
/**
* 加载js字符串
* @param source js字符串(注意处理js中由于变量名为throws,float类似名称导致的报错)
* @param sourceName 名称
*/
public void JSloadString(String source, String sourceName){
Validate.notNull(context, "context is null");
Validate.notNull(scriptable, "scriptable is null");
context.evaluateString(scriptable, source, sourceName, 1, null);
}
}
class Client{
public static void close(AutoCloseable close) {
if (close != null) {
try {
close.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
public static CloseableHttpResponse HttpGetResponse(String url) throws IOException, ClientProtocolException {
HttpGet httpGet = new HttpGet(URI.create(url));
BasicCookieStore cookieStore = new BasicCookieStore();
HttpClientBuilder builder = HttpClientBuilder.create().disableContentCompression()
.setConnectionReuseStrategy(new DefaultConnectionReuseStrategy()).setUserAgent("Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.137 Safari/537.36");
builder.addInterceptorLast(new RequestAcceptEncoding());
builder.setDefaultCookieStore(cookieStore);
CloseableHttpClient client = builder.build();
CloseableHttpResponse execute = client.execute(httpGet);
return execute;
}
public static SoftReference<String> getHtmlString(String url)throws Exception {
CloseableHttpResponse execute = null;
byte[] binary =null;//初次解析内容
SoftReference<String> result = null;
try {
execute = HttpGetResponse(url);
// content = execute.getEntity().getContent();
binary = HttpEntityTOByte(execute.getEntity());
}finally {
close(execute);
}
String html;
byte[] decode;
try {
System.out.println(execute.getStatusLine().toString());
System.out.println(execute.getEntity().getContentEncoding()); Args.notNull(binary, "binary");
decode= decode(binary,execute.getEntity());
try {
String charset = getContentCharSet(execute.getEntity().getContentType().getValue());
if (charset != null) {
html = new String(decode, Charset.forName(charset));
} else {
CharsetMatch match = new CharsetDetector().setText(decode)
.detect();
html = match.getString();
}
} catch (Exception e) {
throw new Exception(e);
}
result = new SoftReference<String>(html);
}finally {
binary =null;
decode =null;
html=null;
}
return result; }
public static SoftReference<Reader> getHtmlReader(String url)throws Exception {
CloseableHttpResponse execute = null;
byte[] binary =null;//初次解析内容
SoftReference<Reader> result = null;
try {
execute = HttpGetResponse(url);
binary = HttpEntityTOByte(execute.getEntity());
}finally {
close(execute);
}
byte[] decode;
Reader bufR = null;
try {
System.out.println(execute.getStatusLine().toString());
System.out.println(execute.getEntity().getContentEncoding().toString());
Args.notNull(binary, "binary");
decode= decode(binary,execute.getEntity());
bufR= new BufferedReader(new InputStreamReader(new ByteArrayInputStream(decode)));
result=new SoftReference<Reader>(bufR);
}finally {
binary =null;
decode =null;
//close(bufR);
}
return result; }
private static String getContentCharSet(String contentType) throws ParseException {
String charset = null;
if (StringUtils.isNotEmpty(contentType)) {
String[] strs = contentType.split(";");
for (String string : strs) {
if (string.contains("charset")) {
String[] tmp = string.split("=");
if (tmp.length == 2) {
return tmp[1];
}
}
}
}
return charset;
}
public static final int BUFFER = 1024;
/**
* 数据解压缩 gizp
*
* @param data
* @return
* @throws Exception
* @author http://snowolf.iteye.com/blog/643010
*/
public static byte[] decompress(byte[] data) throws Exception {
ByteArrayInputStream bais = new ByteArrayInputStream(data);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
// 解压缩
decompress(bais, baos);
data = baos.toByteArray();
baos.flush();
close(baos);
close(bais);
// baos.close();
// bais.close();
return data;
}
/**
* 数据解压缩
*
* @param is
* @param os
* @throws Exception
*/
public static void decompress(InputStream is, OutputStream os)
throws Exception {
GZIPInputStream gis =null;
byte data[];
try {
gis = new GZIPInputStream(is);
int count;
data = new byte[BUFFER];
while ((count = gis.read(data, 0, BUFFER)) != -1) {
os.write(data, 0, count);
}
} finally{
data = null;
close(gis);
// gis.close();
}
} /**
* gizp解压
* @param binary
* @param res
* @param entity
* @return
* @throws Exception
*
*/
public static byte[] decode(byte[] binary, final HttpEntity entity) throws Exception {
if (entity != null && entity.getContentLength() != 0) {
final Header ceheader = entity.getContentEncoding();
if (ceheader != null) {
final HeaderElement[] codecs = ceheader.getElements();
for (final HeaderElement codec : codecs) {
final String codecname = codec.getName().toLowerCase(Locale.US);
if ("gzip".equals(codecname) || "x-gzip".equals(codecname)) {
return decompress(binary);
} else if ("deflate".equals(codecname)) {
return binary;
} else if ("identity".equals(codecname)) { /* Don't need to transform the content - no-op */
return binary;
} else {
throw new Exception("Unsupported Content-Coding: "+codecname );
}
}
}
}
return binary;
}
/**
* 将HttpEntity转换成byte数组
* @param entity HttpEntity
* @return byte[]
* @throws IOException
* @author EntityUtils.toByteArray(entity)
*/
public static byte[] HttpEntityTOByte(HttpEntity entity) throws IOException{
final InputStream instream = entity.getContent();
if (instream == null) {
return null;
}
try {
Args.check(entity.getContentLength() <= Integer.MAX_VALUE,
"HTTP entity too large to be buffered in memory");
int i = (int)entity.getContentLength();
if (i < 0) {
i = 4096;
}
final ByteArrayBuffer buffer = new ByteArrayBuffer(i);
final byte[] tmp = new byte[4096];
int l;
while((l = instream.read(tmp)) != -1) {
buffer.append(tmp, 0, l);
}
return buffer.toByteArray();
} finally {
instream.close();
}
}
}
Rhino+envjs-1.2.js 在java运行网站js 工具类的更多相关文章
- java中常用的工具类(一)
我们java程序员在开发项目的是常常会用到一些工具类.今天我汇总了一下java中常用的工具方法.大家可以在项目中使用.可以收藏!加入IT江湖官方群:383126909 我们一起成长 一.String工 ...
- Android PermissionUtils:运行时权限工具类及申请权限的正确姿势
Android PermissionUtils:运行时权限工具类及申请权限的正确姿势 ifadai 关注 2017.06.16 16:22* 字数 318 阅读 3637评论 1喜欢 6 Permis ...
- Java日期时间实用工具类
Java日期时间实用工具类 1.Date (java.util.Date) Date(); 以当前时间构造一个Date对象 Date(long); 构造函数 ...
- Java并发多线程 - 并发工具类JUC
安全共享对象策略 1.线程限制 : 一个被线程限制的对象,由线程独占,并且只能被占有它的线程修改 2.共享只读 : 一个共享只读的对象,在没有额外同步的情况下,可以被多个线程并发访问, 但是任何线程都 ...
- Java 中的并发工具类
Java 中的并发工具类 CountDownLatch public class JoinCountDownLatchTest { public static void main(String[] a ...
- Java线程的并发工具类
Java线程的并发工具类. 一.fork/join 1. Fork-Join原理 在必要的情况下,将一个大任务,拆分(fork)成若干个小任务,然后再将一个个小任务的结果进行汇总(join). 适用场 ...
- Java学习-041-颜色工具类(RGB,HEX)
在日常的网页开发中,经常需要进行颜色数值获取.转换,例如获取红色,获取蓝色,获取绿色,RGB转十六进制颜色,十六进制颜色转RGB等,因而在学习过程中,写了一个小工具类,仅供各位小主参考! 多不闲言,直 ...
- JAVA中封装JSONUtils工具类及使用
在JAVA中用json-lib-2.3-jdk15.jar包中提供了JSONObject和JSONArray基类,用于JSON的序列化和反序列化的操作.但是我们更习惯将其进一步封装,达到更好的重用. ...
- JAVA自动生成正则表达式工具类
经过很久的努力,终于完成了JAVA自动生成正则表达式工具类.还记得之前需要正则,老是从网上找吗?找了想修改也不会修改.现在不用再为此烦恼了,使用此生成类轻松搞定所有正则表达式.赶快在同事面前炫一下吧. ...
随机推荐
- android studio fetching android sdk component information
解决办法: 1.找到Android Studio安装目录下的idea.properties文件 2.增加disable.android.first.run=true
- 登录phpmyadmin提示: #1045 无法登录 MySQL 服务器
打开phpmyadmin,进行登录,出现以下问题,提示:#1045 无法登录 MySQL 服务器 或许出现以下错误情况:phpmyadmin:#1045 无法登录 MySQL 服务器.Access d ...
- ion-refresher 下拉更新数据
使用指令ion-refresher可以为容器eg:ion-scroll 和 ion-content进行拉动刷新 <ion-scroll> <ion-refresher on-refr ...
- python unicode 和 str 类型的关系
python (2.X)在进行 运行时候字符串运算的时候, 分为两种类型 str, unicode 前者是 二进制的形式进行对字符串的保存, 后者是 以unicode的方式进行保存, 一般的工作方式为 ...
- MediaCodec Name & Type
OMX.google.mp3.decoder support type:audio/mpegOMX.google.amrnb.decoder support type:audio/3gppOMX.go ...
- 【Cocos2d-x 3.x】 精灵帧缓存和纹理缓存
转自泰然网(Cocos2d-x 3.x官方文档):精灵帧缓存:http://www.tairan.com/archives/6378/ 纹理缓存: http://www.tairan.com/ar ...
- certbot+nginx (仅用作个人纪录)
https://certbot.eff.org/#centos6-nginx https://github.com/kshcherban/acme-nginx server { listen 80; ...
- 王爽<<汇编语言>> 实验十四
;以"年/月/日 时:分:秒"的格式, 显示当前的日期, 时间 assume cs:code code segment main: out 70h,al ;告诉CMOS RAM将要 ...
- 结对编程--基于android平台的黄金点游戏(2.0版本)
在昨天上传完博客之后发现一个重大的bug...故在此推出2.0版本. 博文详情见:http://www.cnblogs.com/RayShea/p/5372398.html coding地址:http ...
- DTO对象
在EF中,EF生成的对象都是代理对象,这些对象看上去是实体类对象,但是其实都是EF封装好的代理类对象.所以调用EF查询得到的代理类对象有继承于实体对象,所以可以用实体类对象来接收返回的代理类对象.EF ...