网络版程序:

import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.List; public class TestIndex { private String rootUrl = "http://localhost/apk/";
private String listUrl = rootUrl + "test-index.htm";
private static List<String> imageUrlList = new ArrayList<String>();
public static void main(String args[]){
TestIndex ti = new TestIndex();
ti.getData();
System.out.println(imageUrlList.size());
for(int i=0; i<imageUrlList.size();i++){
System.out.println(imageUrlList.get(i));
} } private InputStream getNetInputStream(String urlStr)
{
try
{
URL url = new URL(urlStr);
URLConnection conn = url.openConnection();
conn.connect();
InputStream is = conn.getInputStream();
return is;
}
catch (Exception e)
{ }
return null;
}
private void getData() {
try
{
InputStream is = getNetInputStream(listUrl);
InputStreamReader isr = new InputStreamReader(is);
BufferedReader br = new BufferedReader(isr);
String s = null;
String html="";
while ((s = br.readLine()) != null)
{
html+=s;
} is.close();
String startStr = "src=\"https://";
String endStr = " width=";
int start = 0;
int end = 0;
int index =0;
imageUrlList.clear();
while (true)
{
start = html.indexOf(startStr, index);
if (start < 0)
break;
index=start;
end = html.indexOf(endStr, index);
String ss = html.substring(start+5,end-1);
imageUrlList.add(ss);
index +=ss.length();
}
}
catch (Exception e)
{
// TODO: handle exception
}
}
}

本地版程序:

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List; public class TestIndex_IO { private String rootUrl = "D:/Hixin/webandroid/";
private String listUrl = rootUrl + "test-index.htm";
private static List<String> imageUrlList = new ArrayList<String>();
public static void main(String args[]){
TestIndex_IO ti = new TestIndex_IO();
ti.getData();
System.out.println(imageUrlList.size());
for(int i=0; i<imageUrlList.size();i++){
System.out.println(imageUrlList.get(i));
} } private InputStream getNetInputStream(String urlStr)
{ InputStream is;
try {
is = new FileInputStream(new File(urlStr));
return is;
} catch (FileNotFoundException e) {
e.printStackTrace();
}
return null; }
private void getData() {
try
{
InputStream is = getNetInputStream(listUrl);
InputStreamReader isr = new InputStreamReader(is);
BufferedReader br = new BufferedReader(isr);
String s = null;
String html="";
while ((s = br.readLine()) != null)
{
html+=s;
} is.close();
String startStr = "src=\"https://";
String endStr = " width=";
int start = 0;
int end = 0;
int index =0;
imageUrlList.clear();
while (true)
{
start = html.indexOf(startStr, index);
if (start < 0)
break;
index=start;
end = html.indexOf(endStr, index);
String ss = html.substring(start+5,end-1);
imageUrlList.add(ss);
index +=ss.length();
}
}
catch (Exception e)
{
// TODO: handle exception
}
}
}

差别仅仅在于private InputStream getNetInputStream(String urlStr)函数。为避免中文乱码,建议InputStreamReader isr = new InputStreamReader(is,"utf-8");

            InputStream is = getNetInputStream(listUrl);
InputStreamReader isr = new InputStreamReader(is);
BufferedReader br = new BufferedReader(isr);
String s = null;
String html="";
while ((s = br.readLine()) != null)
{
html+=s;
}
System.out.println(html.length());
is.close();

输出结果为:77300

            InputStream is = getNetInputStream(listUrl);
InputStreamReader isr = new InputStreamReader(is,"utf-8");
BufferedReader br = new BufferedReader(isr);
String s = null;
String html="";
while ((s = br.readLine()) != null)
{
html+=s;
}
System.out.println(html.length());
is.close();

输出结果为:77135

 private String writeUrl = "D:/newfile/new/new";
private String fileName ="test-index.htm";
File f = new File(writeUrl);
if(!f.exists()) {
f.mkdirs();
}
File f1 = new File(f, fileName);
FileOutputStream fos = new FileOutputStream(f1);
OutputStreamWriter osw = new OutputStreamWriter(fos); osw.write(html,0,html.length());
osw.flush();
osw.close();

解析出htm文件中包含的网址。

结果:

20
https://encrypted-tbn3.gstatic.com/images?q=tbn:ANd9GcRvQgUjsVDBncM3mVIgIyIuE87BnlyJUy2BNsAp8kUoTanrC_css5mVAw
https://encrypted-tbn2.gstatic.com/images?q=tbn:ANd9GcThd8cYjOTmCgYJZxX5ls-xpxaAlH1_yocOSCqI5_7OkL29SNtbCZ7q2Yoj
https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTl-FzKmsppxuwzmTITGCv9uDxmrWr1pG0lw8mUD9wkWIloASxQeBEMnVjz
https://encrypted-tbn3.gstatic.com/images?q=tbn:ANd9GcQWbmiZJIXKHV2IoTBp7zSY6kD5g5VPzVtBTLJYYR5nwTtKi2-0_u93qL4e
https://encrypted-tbn1.gstatic.com/images?q=tbn:ANd9GcSlrLi_GtVgUehU7coFe1eMdrJxPdvS42iTqXkla0g75s31NBfAq2u1LE4
https://encrypted-tbn3.gstatic.com/images?q=tbn:ANd9GcSkrlyGxSs8Dr_7k3MUvoGq1vE45LgHZ0zEhIEdD9LLZiaoMcE7IAqn8ho
https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTu__OUSJ4R4EKBu4jOi2ZAdHohpVQIBy3-SfnI8FYpN8wVC9kJG_aWuk_w
https://encrypted-tbn3.gstatic.com/images?q=tbn:ANd9GcR3Bf7YtsHJ813A5_wWzpxIy4MbEmqz5NLw3qv1nPxOZqVjH7QlY-qYSCg
https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcToB4nJPqVwnzn0xeasnXyhxGgOqHXdypE6KZIMTfV9k52eIrE3iYsA6Ixm
https://encrypted-tbn1.gstatic.com/images?q=tbn:ANd9GcTkKw0LpqdB2eQMUpwdQdvM9DTeNtq1mrvMNivoQtN37p3m0OPsx4ME9i4O
https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcSZGzMf_3hmdDktz91yp5ZQi-eGWLCenZ0U446sXT2nqYuwlWRI_V_BVIWi
https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTQF-55T5GM3dLdaoafPdlIYK0ESNvM6-Bsb4-B2rQTeyD5gGoCKxokExM-
https://encrypted-tbn2.gstatic.com/images?q=tbn:ANd9GcRoRjo4TFeXmx47zE6VH0ylcO0IQ2HBsOHYIMJCI9MsRyg_PF1WhHbqG76Q
https://encrypted-tbn1.gstatic.com/images?q=tbn:ANd9GcRrdegt1koEy51dLWrJAbVMJBlCEZ7fPl2mztYYM6onvxocRCq030Ft1gE
https://encrypted-tbn1.gstatic.com/images?q=tbn:ANd9GcTtnQpte0uq9Ue9nsg25GeO1kw_-Hcn69ozTQkiMBHrXKwlANutyhwKD9XM
https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRNRdxzmuFKABoGgyv2SC0gMticosL2LB3V1fBMOwNtVBZxHkyMw4IcWBFj
https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQr40CEf75nWCj5dg-oeKtb9zK6mhktu7vnfoYAh5ioy34goC3c9ptDkQwP
https://encrypted-tbn2.gstatic.com/images?q=tbn:ANd9GcQUnyHrVEbppqhZnWnQrijhBFP0X34gRf7pKw6PdT4ggepB2k9g-p71sgGh
https://encrypted-tbn1.gstatic.com/images?q=tbn:ANd9GcR9Us9qblbTJaw47gULXCI8sHKN4I61gYsT2ijebtZzgsMDI8GmYqQpIIw
https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcSIrW-IbBZjM9Ztn60r9QE1_FIMjt494qGX12tqsLsibYPLuFVwyVSgz1I

用正则表达式更简单:

 InputStream is = getNetInputStream(listUrl);
InputStreamReader isr = new InputStreamReader(is);
BufferedReader br = new BufferedReader(isr);
String s = ""; while ((s = br.readLine()) != null)
{
Pattern p = Pattern.compile("src=\"https[^\"]+");
Matcher m = p.matcher(s);
while(m.find()) {
System.out.println(m.group());
}
}
 src="https://encrypted-tbn3.gstatic.com/images?q=tbn:ANd9GcRvQgUjsVDBncM3mVIgIyIuE87BnlyJUy2BNsAp8kUoTanrC_css5mVAw
src="https://encrypted-tbn2.gstatic.com/images?q=tbn:ANd9GcThd8cYjOTmCgYJZxX5ls-xpxaAlH1_yocOSCqI5_7OkL29SNtbCZ7q2Yoj
src="https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTl-FzKmsppxuwzmTITGCv9uDxmrWr1pG0lw8mUD9wkWIloASxQeBEMnVjz
src="https://encrypted-tbn3.gstatic.com/images?q=tbn:ANd9GcQWbmiZJIXKHV2IoTBp7zSY6kD5g5VPzVtBTLJYYR5nwTtKi2-0_u93qL4e
src="https://encrypted-tbn1.gstatic.com/images?q=tbn:ANd9GcSlrLi_GtVgUehU7coFe1eMdrJxPdvS42iTqXkla0g75s31NBfAq2u1LE4
src="https://encrypted-tbn3.gstatic.com/images?q=tbn:ANd9GcSkrlyGxSs8Dr_7k3MUvoGq1vE45LgHZ0zEhIEdD9LLZiaoMcE7IAqn8ho
src="https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTu__OUSJ4R4EKBu4jOi2ZAdHohpVQIBy3-SfnI8FYpN8wVC9kJG_aWuk_w
src="https://encrypted-tbn3.gstatic.com/images?q=tbn:ANd9GcR3Bf7YtsHJ813A5_wWzpxIy4MbEmqz5NLw3qv1nPxOZqVjH7QlY-qYSCg
src="https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcToB4nJPqVwnzn0xeasnXyhxGgOqHXdypE6KZIMTfV9k52eIrE3iYsA6Ixm
src="https://encrypted-tbn1.gstatic.com/images?q=tbn:ANd9GcTkKw0LpqdB2eQMUpwdQdvM9DTeNtq1mrvMNivoQtN37p3m0OPsx4ME9i4O
src="https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcSZGzMf_3hmdDktz91yp5ZQi-eGWLCenZ0U446sXT2nqYuwlWRI_V_BVIWi
src="https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTQF-55T5GM3dLdaoafPdlIYK0ESNvM6-Bsb4-B2rQTeyD5gGoCKxokExM-
src="https://encrypted-tbn2.gstatic.com/images?q=tbn:ANd9GcRoRjo4TFeXmx47zE6VH0ylcO0IQ2HBsOHYIMJCI9MsRyg_PF1WhHbqG76Q
src="https://encrypted-tbn1.gstatic.com/images?q=tbn:ANd9GcRrdegt1koEy51dLWrJAbVMJBlCEZ7fPl2mztYYM6onvxocRCq030Ft1gE
src="https://encrypted-tbn1.gstatic.com/images?q=tbn:ANd9GcTtnQpte0uq9Ue9nsg25GeO1kw_-Hcn69ozTQkiMBHrXKwlANutyhwKD9XM
src="https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRNRdxzmuFKABoGgyv2SC0gMticosL2LB3V1fBMOwNtVBZxHkyMw4IcWBFj
src="https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQr40CEf75nWCj5dg-oeKtb9zK6mhktu7vnfoYAh5ioy34goC3c9ptDkQwP
src="https://encrypted-tbn2.gstatic.com/images?q=tbn:ANd9GcQUnyHrVEbppqhZnWnQrijhBFP0X34gRf7pKw6PdT4ggepB2k9g-p71sgGh
src="https://encrypted-tbn1.gstatic.com/images?q=tbn:ANd9GcR9Us9qblbTJaw47gULXCI8sHKN4I61gYsT2ijebtZzgsMDI8GmYqQpIIw
src="https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcSIrW-IbBZjM9Ztn60r9QE1_FIMjt494qGX12tqsLsibYPLuFVwyVSgz1I

通过网络得到html,并解析出其中网址(JAVA程序)的更多相关文章

  1. Java匹马行天下之 Java国出了个Java——举国欢庆

    Java帝国的崛起 前言: 看庭前花开花落,宠辱不惊, 望天上云卷云舒,去留无意. 闹心的事儿,选择释怀: 纠缠的人儿,试着放下, 生活其实很美. 心若向阳,就无惧悲伤. 愿你明朗坦荡纵情豁达,有得有 ...

  2. Android网络请求与数据解析,使用Gson和GsonFormat解析复杂Json数据

    版权声明:未经博主允许不得转载 一:简介 [达叔有道]软件技术人员,时代作者,从 Android 到全栈之路,我相信你也可以!阅读他的文章,会上瘾!You and me, we are family ...

  3. 初探iOS网络开发,数据解析。

    通过大众点评平台开发来简单了解一下,oc的网络编程和数据解析(json) 首先我们需要到大大众点评开发者平台申请一个key.http://developer.dianping.com/app/tech ...

  4. Flutter网络请求和数据解析

    一:前言 - 什么是反射机制,Flutter为什么禁用反射机制? 在Flutter中它的网络请求和数据解析稍微的比较麻烦一点,因为Flutter不支持反射机制.相信大家都看到这么一条,就是Flutte ...

  5. Android okHttp网络请求之Json解析

    前言: 前面两篇文章介绍了基于okHttp的post.get请求,以及文件的上传下载,今天主要介绍一下如何和Json解析一起使用?如何才能提高开发效率? okHttp相关文章地址: Android o ...

  6. ZeroMQ接口函数之 :zmq_z85_decode – 从一个用Z85算法生成的文本中解析出二进制密码

    ZeroMQ 官方地址 :http://api.zeromq.org/4-0:zmq_z85_decode zmq_z85_decode(3)         ØMQ Manual - ØMQ/4.1 ...

  7. js中解析json对象:JSON.parse()用于从一个字符串中解析出json对象, JSON.stringify()用于从一个对象解析出字符串。

    JSON.parse()用于从一个字符串中解析出json对象. var str = '{"name":"huangxiaojian","age&quo ...

  8. GJM : Unity3D 常用网络框架与实战解析 【笔记】

    Unity常用网络框架与实战解析 1.Http协议          Http协议                  存在TCP 之上 有时候 TLS\SSL 之上 默认端口80 https 默认端口 ...

  9. 网络热恋之XML解析

    XML 可扩展标记语言 用于标记电子文件使其具有结构性的标记语言,可以用来标记数据.定义数据类型,是一种允许用户对自己的标记语言进行定义的源语言 易读性高,编码手写难度小,数据量大 NSXMLPars ...

随机推荐

  1. 性能调优之MYSQL高并发优化

    性能调优之MYSQL高并发优化   一.数据库结构的设计 如果不能设计一个合理的数据库模型,不仅会增加客户端和服务器段程序的编程和维护的难度,而且将会影响系统实际运行的性能.所以,在一个系统开始实施之 ...

  2. Java并发编程:如何创建进程?

    转载自:http://www.cnblogs.com/dolphin0520/p/3913517.html 在前面一篇文章中已经讲述了在进程和线程的由来,今天就来讲一下在Java中如何创建线程,让线程 ...

  3. npm 配置和安装 express4.X 遇到的问题及解决

    前言:懒得看前面两篇介绍的也可以从本节直接参考,但建议最好了解下,因为 4.X 的express 已经把命令行工具分离出来 (链接https://github.com/expressjs/genera ...

  4. .elf格式内容

    arm-linux-ld 可以将程序链接成我们arm平台下的可运行的程序 以之前使用过的led程序为例: 首先: arm-linux-gcc -g -c led.S (-g是表示产生调试信息, -c是 ...

  5. linux_cmd_list_0

    一.文件 touch file # 创建空白文件 rm -rf 目录名 # 不提示删除非空目录(-r:递归删除 -f强制) dos2unix # windows文本转linux文本 unix2dos ...

  6. MySQL关于Duplicate entry '1' for key 'PRIMARY'错误

    今天复习MySQL遇到Duplicate entry '1' for key 'PRIMARY'错误. 原因是主键值为'1'的数据已经存在,主键是唯一的,不可重复.

  7. Swift中枚举的总结以及使用

    枚举定义了一组具有相关性的数据,是开发者可以再带吗中以一个安全的方式来使用这些值,以又助于提供代码的可读性. 在Swift中,枚举可以分成两种:任意类型的枚举和指定类型的枚举,结构如下: //任意类型 ...

  8. Elasticsearch搜索之cross_fields分析

    cross_fields类型采用了一种以词条为中心(Term-centric)的方法,这种方法和best_fields及most_fields采用的以字段为中心(Field-centric)的方法有很 ...

  9. 多个git账号的SSH配置

    一般使用git都只需要维持一个默认的git账户就可以打天下了. 但如果自己确实需要多个git账号的需求的话,就有必要配置多个ssh key了. 首先为生成多个ssh key ssh-keygen -t ...

  10. Java设计模式之(建造者模式)

    建造者模式:是将一个复杂的对象的构建与它的表示分离,使得同样的构建过程可以创建不同的表示. 建造者模式通常包括下面几个角色: 1. builder:抽象建造者,给出一个抽象接口,以规范产品对象的各个组 ...