网络版程序:

import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.List; public class TestIndex { private String rootUrl = "http://localhost/apk/";
private String listUrl = rootUrl + "test-index.htm";
private static List<String> imageUrlList = new ArrayList<String>();
public static void main(String args[]){
TestIndex ti = new TestIndex();
ti.getData();
System.out.println(imageUrlList.size());
for(int i=0; i<imageUrlList.size();i++){
System.out.println(imageUrlList.get(i));
} } private InputStream getNetInputStream(String urlStr)
{
try
{
URL url = new URL(urlStr);
URLConnection conn = url.openConnection();
conn.connect();
InputStream is = conn.getInputStream();
return is;
}
catch (Exception e)
{ }
return null;
}
private void getData() {
try
{
InputStream is = getNetInputStream(listUrl);
InputStreamReader isr = new InputStreamReader(is);
BufferedReader br = new BufferedReader(isr);
String s = null;
String html="";
while ((s = br.readLine()) != null)
{
html+=s;
} is.close();
String startStr = "src=\"https://";
String endStr = " width=";
int start = 0;
int end = 0;
int index =0;
imageUrlList.clear();
while (true)
{
start = html.indexOf(startStr, index);
if (start < 0)
break;
index=start;
end = html.indexOf(endStr, index);
String ss = html.substring(start+5,end-1);
imageUrlList.add(ss);
index +=ss.length();
}
}
catch (Exception e)
{
// TODO: handle exception
}
}
}

本地版程序:

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List; public class TestIndex_IO { private String rootUrl = "D:/Hixin/webandroid/";
private String listUrl = rootUrl + "test-index.htm";
private static List<String> imageUrlList = new ArrayList<String>();
public static void main(String args[]){
TestIndex_IO ti = new TestIndex_IO();
ti.getData();
System.out.println(imageUrlList.size());
for(int i=0; i<imageUrlList.size();i++){
System.out.println(imageUrlList.get(i));
} } private InputStream getNetInputStream(String urlStr)
{ InputStream is;
try {
is = new FileInputStream(new File(urlStr));
return is;
} catch (FileNotFoundException e) {
e.printStackTrace();
}
return null; }
private void getData() {
try
{
InputStream is = getNetInputStream(listUrl);
InputStreamReader isr = new InputStreamReader(is);
BufferedReader br = new BufferedReader(isr);
String s = null;
String html="";
while ((s = br.readLine()) != null)
{
html+=s;
} is.close();
String startStr = "src=\"https://";
String endStr = " width=";
int start = 0;
int end = 0;
int index =0;
imageUrlList.clear();
while (true)
{
start = html.indexOf(startStr, index);
if (start < 0)
break;
index=start;
end = html.indexOf(endStr, index);
String ss = html.substring(start+5,end-1);
imageUrlList.add(ss);
index +=ss.length();
}
}
catch (Exception e)
{
// TODO: handle exception
}
}
}

差别仅仅在于private InputStream getNetInputStream(String urlStr)函数。为避免中文乱码,建议InputStreamReader isr = new InputStreamReader(is,"utf-8");

            InputStream is = getNetInputStream(listUrl);
InputStreamReader isr = new InputStreamReader(is);
BufferedReader br = new BufferedReader(isr);
String s = null;
String html="";
while ((s = br.readLine()) != null)
{
html+=s;
}
System.out.println(html.length());
is.close();

输出结果为:77300

            InputStream is = getNetInputStream(listUrl);
InputStreamReader isr = new InputStreamReader(is,"utf-8");
BufferedReader br = new BufferedReader(isr);
String s = null;
String html="";
while ((s = br.readLine()) != null)
{
html+=s;
}
System.out.println(html.length());
is.close();

输出结果为:77135

 private String writeUrl = "D:/newfile/new/new";
private String fileName ="test-index.htm";
File f = new File(writeUrl);
if(!f.exists()) {
f.mkdirs();
}
File f1 = new File(f, fileName);
FileOutputStream fos = new FileOutputStream(f1);
OutputStreamWriter osw = new OutputStreamWriter(fos); osw.write(html,0,html.length());
osw.flush();
osw.close();

解析出htm文件中包含的网址。

结果:

20
https://encrypted-tbn3.gstatic.com/images?q=tbn:ANd9GcRvQgUjsVDBncM3mVIgIyIuE87BnlyJUy2BNsAp8kUoTanrC_css5mVAw
https://encrypted-tbn2.gstatic.com/images?q=tbn:ANd9GcThd8cYjOTmCgYJZxX5ls-xpxaAlH1_yocOSCqI5_7OkL29SNtbCZ7q2Yoj
https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTl-FzKmsppxuwzmTITGCv9uDxmrWr1pG0lw8mUD9wkWIloASxQeBEMnVjz
https://encrypted-tbn3.gstatic.com/images?q=tbn:ANd9GcQWbmiZJIXKHV2IoTBp7zSY6kD5g5VPzVtBTLJYYR5nwTtKi2-0_u93qL4e
https://encrypted-tbn1.gstatic.com/images?q=tbn:ANd9GcSlrLi_GtVgUehU7coFe1eMdrJxPdvS42iTqXkla0g75s31NBfAq2u1LE4
https://encrypted-tbn3.gstatic.com/images?q=tbn:ANd9GcSkrlyGxSs8Dr_7k3MUvoGq1vE45LgHZ0zEhIEdD9LLZiaoMcE7IAqn8ho
https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTu__OUSJ4R4EKBu4jOi2ZAdHohpVQIBy3-SfnI8FYpN8wVC9kJG_aWuk_w
https://encrypted-tbn3.gstatic.com/images?q=tbn:ANd9GcR3Bf7YtsHJ813A5_wWzpxIy4MbEmqz5NLw3qv1nPxOZqVjH7QlY-qYSCg
https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcToB4nJPqVwnzn0xeasnXyhxGgOqHXdypE6KZIMTfV9k52eIrE3iYsA6Ixm
https://encrypted-tbn1.gstatic.com/images?q=tbn:ANd9GcTkKw0LpqdB2eQMUpwdQdvM9DTeNtq1mrvMNivoQtN37p3m0OPsx4ME9i4O
https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcSZGzMf_3hmdDktz91yp5ZQi-eGWLCenZ0U446sXT2nqYuwlWRI_V_BVIWi
https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTQF-55T5GM3dLdaoafPdlIYK0ESNvM6-Bsb4-B2rQTeyD5gGoCKxokExM-
https://encrypted-tbn2.gstatic.com/images?q=tbn:ANd9GcRoRjo4TFeXmx47zE6VH0ylcO0IQ2HBsOHYIMJCI9MsRyg_PF1WhHbqG76Q
https://encrypted-tbn1.gstatic.com/images?q=tbn:ANd9GcRrdegt1koEy51dLWrJAbVMJBlCEZ7fPl2mztYYM6onvxocRCq030Ft1gE
https://encrypted-tbn1.gstatic.com/images?q=tbn:ANd9GcTtnQpte0uq9Ue9nsg25GeO1kw_-Hcn69ozTQkiMBHrXKwlANutyhwKD9XM
https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRNRdxzmuFKABoGgyv2SC0gMticosL2LB3V1fBMOwNtVBZxHkyMw4IcWBFj
https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQr40CEf75nWCj5dg-oeKtb9zK6mhktu7vnfoYAh5ioy34goC3c9ptDkQwP
https://encrypted-tbn2.gstatic.com/images?q=tbn:ANd9GcQUnyHrVEbppqhZnWnQrijhBFP0X34gRf7pKw6PdT4ggepB2k9g-p71sgGh
https://encrypted-tbn1.gstatic.com/images?q=tbn:ANd9GcR9Us9qblbTJaw47gULXCI8sHKN4I61gYsT2ijebtZzgsMDI8GmYqQpIIw
https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcSIrW-IbBZjM9Ztn60r9QE1_FIMjt494qGX12tqsLsibYPLuFVwyVSgz1I

用正则表达式更简单:

 InputStream is = getNetInputStream(listUrl);
InputStreamReader isr = new InputStreamReader(is);
BufferedReader br = new BufferedReader(isr);
String s = ""; while ((s = br.readLine()) != null)
{
Pattern p = Pattern.compile("src=\"https[^\"]+");
Matcher m = p.matcher(s);
while(m.find()) {
System.out.println(m.group());
}
}
 src="https://encrypted-tbn3.gstatic.com/images?q=tbn:ANd9GcRvQgUjsVDBncM3mVIgIyIuE87BnlyJUy2BNsAp8kUoTanrC_css5mVAw
src="https://encrypted-tbn2.gstatic.com/images?q=tbn:ANd9GcThd8cYjOTmCgYJZxX5ls-xpxaAlH1_yocOSCqI5_7OkL29SNtbCZ7q2Yoj
src="https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTl-FzKmsppxuwzmTITGCv9uDxmrWr1pG0lw8mUD9wkWIloASxQeBEMnVjz
src="https://encrypted-tbn3.gstatic.com/images?q=tbn:ANd9GcQWbmiZJIXKHV2IoTBp7zSY6kD5g5VPzVtBTLJYYR5nwTtKi2-0_u93qL4e
src="https://encrypted-tbn1.gstatic.com/images?q=tbn:ANd9GcSlrLi_GtVgUehU7coFe1eMdrJxPdvS42iTqXkla0g75s31NBfAq2u1LE4
src="https://encrypted-tbn3.gstatic.com/images?q=tbn:ANd9GcSkrlyGxSs8Dr_7k3MUvoGq1vE45LgHZ0zEhIEdD9LLZiaoMcE7IAqn8ho
src="https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTu__OUSJ4R4EKBu4jOi2ZAdHohpVQIBy3-SfnI8FYpN8wVC9kJG_aWuk_w
src="https://encrypted-tbn3.gstatic.com/images?q=tbn:ANd9GcR3Bf7YtsHJ813A5_wWzpxIy4MbEmqz5NLw3qv1nPxOZqVjH7QlY-qYSCg
src="https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcToB4nJPqVwnzn0xeasnXyhxGgOqHXdypE6KZIMTfV9k52eIrE3iYsA6Ixm
src="https://encrypted-tbn1.gstatic.com/images?q=tbn:ANd9GcTkKw0LpqdB2eQMUpwdQdvM9DTeNtq1mrvMNivoQtN37p3m0OPsx4ME9i4O
src="https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcSZGzMf_3hmdDktz91yp5ZQi-eGWLCenZ0U446sXT2nqYuwlWRI_V_BVIWi
src="https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTQF-55T5GM3dLdaoafPdlIYK0ESNvM6-Bsb4-B2rQTeyD5gGoCKxokExM-
src="https://encrypted-tbn2.gstatic.com/images?q=tbn:ANd9GcRoRjo4TFeXmx47zE6VH0ylcO0IQ2HBsOHYIMJCI9MsRyg_PF1WhHbqG76Q
src="https://encrypted-tbn1.gstatic.com/images?q=tbn:ANd9GcRrdegt1koEy51dLWrJAbVMJBlCEZ7fPl2mztYYM6onvxocRCq030Ft1gE
src="https://encrypted-tbn1.gstatic.com/images?q=tbn:ANd9GcTtnQpte0uq9Ue9nsg25GeO1kw_-Hcn69ozTQkiMBHrXKwlANutyhwKD9XM
src="https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRNRdxzmuFKABoGgyv2SC0gMticosL2LB3V1fBMOwNtVBZxHkyMw4IcWBFj
src="https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQr40CEf75nWCj5dg-oeKtb9zK6mhktu7vnfoYAh5ioy34goC3c9ptDkQwP
src="https://encrypted-tbn2.gstatic.com/images?q=tbn:ANd9GcQUnyHrVEbppqhZnWnQrijhBFP0X34gRf7pKw6PdT4ggepB2k9g-p71sgGh
src="https://encrypted-tbn1.gstatic.com/images?q=tbn:ANd9GcR9Us9qblbTJaw47gULXCI8sHKN4I61gYsT2ijebtZzgsMDI8GmYqQpIIw
src="https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcSIrW-IbBZjM9Ztn60r9QE1_FIMjt494qGX12tqsLsibYPLuFVwyVSgz1I

通过网络得到html,并解析出其中网址(JAVA程序)的更多相关文章

  1. Java匹马行天下之 Java国出了个Java——举国欢庆

    Java帝国的崛起 前言: 看庭前花开花落,宠辱不惊, 望天上云卷云舒,去留无意. 闹心的事儿,选择释怀: 纠缠的人儿,试着放下, 生活其实很美. 心若向阳,就无惧悲伤. 愿你明朗坦荡纵情豁达,有得有 ...

  2. Android网络请求与数据解析,使用Gson和GsonFormat解析复杂Json数据

    版权声明:未经博主允许不得转载 一:简介 [达叔有道]软件技术人员,时代作者,从 Android 到全栈之路,我相信你也可以!阅读他的文章,会上瘾!You and me, we are family ...

  3. 初探iOS网络开发,数据解析。

    通过大众点评平台开发来简单了解一下,oc的网络编程和数据解析(json) 首先我们需要到大大众点评开发者平台申请一个key.http://developer.dianping.com/app/tech ...

  4. Flutter网络请求和数据解析

    一:前言 - 什么是反射机制,Flutter为什么禁用反射机制? 在Flutter中它的网络请求和数据解析稍微的比较麻烦一点,因为Flutter不支持反射机制.相信大家都看到这么一条,就是Flutte ...

  5. Android okHttp网络请求之Json解析

    前言: 前面两篇文章介绍了基于okHttp的post.get请求,以及文件的上传下载,今天主要介绍一下如何和Json解析一起使用?如何才能提高开发效率? okHttp相关文章地址: Android o ...

  6. ZeroMQ接口函数之 :zmq_z85_decode – 从一个用Z85算法生成的文本中解析出二进制密码

    ZeroMQ 官方地址 :http://api.zeromq.org/4-0:zmq_z85_decode zmq_z85_decode(3)         ØMQ Manual - ØMQ/4.1 ...

  7. js中解析json对象:JSON.parse()用于从一个字符串中解析出json对象, JSON.stringify()用于从一个对象解析出字符串。

    JSON.parse()用于从一个字符串中解析出json对象. var str = '{"name":"huangxiaojian","age&quo ...

  8. GJM : Unity3D 常用网络框架与实战解析 【笔记】

    Unity常用网络框架与实战解析 1.Http协议          Http协议                  存在TCP 之上 有时候 TLS\SSL 之上 默认端口80 https 默认端口 ...

  9. 网络热恋之XML解析

    XML 可扩展标记语言 用于标记电子文件使其具有结构性的标记语言,可以用来标记数据.定义数据类型,是一种允许用户对自己的标记语言进行定义的源语言 易读性高,编码手写难度小,数据量大 NSXMLPars ...

随机推荐

  1. 老李谈HTTP1.1的长连接

    老李谈HTTP1.1的长连接   poptest是国内唯一一家培养测试开发工程师的培训机构,以学员能胜任自动化测试,性能测试,测试工具开发等工作为目标.如果对课程感兴趣,请大家咨询qq:9088214 ...

  2. 性能测试培训:批量执行Jmeter脚本之ant调用

    性能测试培训:批量执行Jmeter脚本之ant调用   poptest是国内唯一一家培养测试开发工程师的培训机构,以学员能胜任自动化测试,性能测试,测试工具开发等工作为目标.在poptest的load ...

  3. 菜鸟学IT之IP基础

    IT菜鸟,以后研究的方向是云计算,从基础的开始,这是第一篇博文.有不对的地方希望大家指正.IP是网络知识的基础,今天就开始学习IP. IP地址格式:IP地址就是"网络地址+主机地址" ...

  4. AndroidAnnotations框架简单使用方法

    当我们配置好了框架后,那么久可以来使用了.使用教程网上一大堆,官方也有提供!!!可自行学习深造.下面我简单的贴出几个常用的方法,作为HelloWorld入门: @EActivity(R.layout. ...

  5. python_嵌套列表变成普通列表

    如何把[1, 5, 6, [2, 7, [3, [4, 5, 6]]]]变成[1, 5, 6, 2, 7, 3, 4, 5, 6]? 思考: -- for循环每次都遍历列表一层 -- 把取出的单个值加 ...

  6. 简单 fibonacci 函数

    public static int fibonacci(int n){  if(n<=1) return 1;  else {     return fibonacci(n-1)+fibonac ...

  7. 空a标签 a标签空的情况下 IE6 IE7下点击无效

    最近做了好多网站专题页面,因为专题页面图片较多,个别banner上有1个到多个按钮,一种是用“图解img标签的usemap”的方法做链接,(图解img标签的usemap使用方法)[传送门] 另一种用则 ...

  8. 我的iOS博客旅行开始了,欢迎光临!

    期待您的关注!

  9. android studio 2.3 下载地址

      android studio下载:  Windows+SDK:(1.8GB)| Windows(428 MB) | Linux    idea  win.exe  win.zip 序号 名称 中文 ...

  10. Javascript的内容

    JS简介和变量 {JS的三种方式}            1 HTML中内嵌JS(不提倡使用)            <button onclick="javascript:alert ...