网络版程序:

import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.List; public class TestIndex { private String rootUrl = "http://localhost/apk/";
private String listUrl = rootUrl + "test-index.htm";
private static List<String> imageUrlList = new ArrayList<String>();
public static void main(String args[]){
TestIndex ti = new TestIndex();
ti.getData();
System.out.println(imageUrlList.size());
for(int i=0; i<imageUrlList.size();i++){
System.out.println(imageUrlList.get(i));
} } private InputStream getNetInputStream(String urlStr)
{
try
{
URL url = new URL(urlStr);
URLConnection conn = url.openConnection();
conn.connect();
InputStream is = conn.getInputStream();
return is;
}
catch (Exception e)
{ }
return null;
}
private void getData() {
try
{
InputStream is = getNetInputStream(listUrl);
InputStreamReader isr = new InputStreamReader(is);
BufferedReader br = new BufferedReader(isr);
String s = null;
String html="";
while ((s = br.readLine()) != null)
{
html+=s;
} is.close();
String startStr = "src=\"https://";
String endStr = " width=";
int start = 0;
int end = 0;
int index =0;
imageUrlList.clear();
while (true)
{
start = html.indexOf(startStr, index);
if (start < 0)
break;
index=start;
end = html.indexOf(endStr, index);
String ss = html.substring(start+5,end-1);
imageUrlList.add(ss);
index +=ss.length();
}
}
catch (Exception e)
{
// TODO: handle exception
}
}
}

本地版程序:

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List; public class TestIndex_IO { private String rootUrl = "D:/Hixin/webandroid/";
private String listUrl = rootUrl + "test-index.htm";
private static List<String> imageUrlList = new ArrayList<String>();
public static void main(String args[]){
TestIndex_IO ti = new TestIndex_IO();
ti.getData();
System.out.println(imageUrlList.size());
for(int i=0; i<imageUrlList.size();i++){
System.out.println(imageUrlList.get(i));
} } private InputStream getNetInputStream(String urlStr)
{ InputStream is;
try {
is = new FileInputStream(new File(urlStr));
return is;
} catch (FileNotFoundException e) {
e.printStackTrace();
}
return null; }
private void getData() {
try
{
InputStream is = getNetInputStream(listUrl);
InputStreamReader isr = new InputStreamReader(is);
BufferedReader br = new BufferedReader(isr);
String s = null;
String html="";
while ((s = br.readLine()) != null)
{
html+=s;
} is.close();
String startStr = "src=\"https://";
String endStr = " width=";
int start = 0;
int end = 0;
int index =0;
imageUrlList.clear();
while (true)
{
start = html.indexOf(startStr, index);
if (start < 0)
break;
index=start;
end = html.indexOf(endStr, index);
String ss = html.substring(start+5,end-1);
imageUrlList.add(ss);
index +=ss.length();
}
}
catch (Exception e)
{
// TODO: handle exception
}
}
}

差别仅仅在于private InputStream getNetInputStream(String urlStr)函数。为避免中文乱码,建议InputStreamReader isr = new InputStreamReader(is,"utf-8");

            InputStream is = getNetInputStream(listUrl);
InputStreamReader isr = new InputStreamReader(is);
BufferedReader br = new BufferedReader(isr);
String s = null;
String html="";
while ((s = br.readLine()) != null)
{
html+=s;
}
System.out.println(html.length());
is.close();

输出结果为:77300

            InputStream is = getNetInputStream(listUrl);
InputStreamReader isr = new InputStreamReader(is,"utf-8");
BufferedReader br = new BufferedReader(isr);
String s = null;
String html="";
while ((s = br.readLine()) != null)
{
html+=s;
}
System.out.println(html.length());
is.close();

输出结果为:77135

 private String writeUrl = "D:/newfile/new/new";
private String fileName ="test-index.htm";
File f = new File(writeUrl);
if(!f.exists()) {
f.mkdirs();
}
File f1 = new File(f, fileName);
FileOutputStream fos = new FileOutputStream(f1);
OutputStreamWriter osw = new OutputStreamWriter(fos); osw.write(html,0,html.length());
osw.flush();
osw.close();

解析出htm文件中包含的网址。

结果:

20
https://encrypted-tbn3.gstatic.com/images?q=tbn:ANd9GcRvQgUjsVDBncM3mVIgIyIuE87BnlyJUy2BNsAp8kUoTanrC_css5mVAw
https://encrypted-tbn2.gstatic.com/images?q=tbn:ANd9GcThd8cYjOTmCgYJZxX5ls-xpxaAlH1_yocOSCqI5_7OkL29SNtbCZ7q2Yoj
https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTl-FzKmsppxuwzmTITGCv9uDxmrWr1pG0lw8mUD9wkWIloASxQeBEMnVjz
https://encrypted-tbn3.gstatic.com/images?q=tbn:ANd9GcQWbmiZJIXKHV2IoTBp7zSY6kD5g5VPzVtBTLJYYR5nwTtKi2-0_u93qL4e
https://encrypted-tbn1.gstatic.com/images?q=tbn:ANd9GcSlrLi_GtVgUehU7coFe1eMdrJxPdvS42iTqXkla0g75s31NBfAq2u1LE4
https://encrypted-tbn3.gstatic.com/images?q=tbn:ANd9GcSkrlyGxSs8Dr_7k3MUvoGq1vE45LgHZ0zEhIEdD9LLZiaoMcE7IAqn8ho
https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTu__OUSJ4R4EKBu4jOi2ZAdHohpVQIBy3-SfnI8FYpN8wVC9kJG_aWuk_w
https://encrypted-tbn3.gstatic.com/images?q=tbn:ANd9GcR3Bf7YtsHJ813A5_wWzpxIy4MbEmqz5NLw3qv1nPxOZqVjH7QlY-qYSCg
https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcToB4nJPqVwnzn0xeasnXyhxGgOqHXdypE6KZIMTfV9k52eIrE3iYsA6Ixm
https://encrypted-tbn1.gstatic.com/images?q=tbn:ANd9GcTkKw0LpqdB2eQMUpwdQdvM9DTeNtq1mrvMNivoQtN37p3m0OPsx4ME9i4O
https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcSZGzMf_3hmdDktz91yp5ZQi-eGWLCenZ0U446sXT2nqYuwlWRI_V_BVIWi
https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTQF-55T5GM3dLdaoafPdlIYK0ESNvM6-Bsb4-B2rQTeyD5gGoCKxokExM-
https://encrypted-tbn2.gstatic.com/images?q=tbn:ANd9GcRoRjo4TFeXmx47zE6VH0ylcO0IQ2HBsOHYIMJCI9MsRyg_PF1WhHbqG76Q
https://encrypted-tbn1.gstatic.com/images?q=tbn:ANd9GcRrdegt1koEy51dLWrJAbVMJBlCEZ7fPl2mztYYM6onvxocRCq030Ft1gE
https://encrypted-tbn1.gstatic.com/images?q=tbn:ANd9GcTtnQpte0uq9Ue9nsg25GeO1kw_-Hcn69ozTQkiMBHrXKwlANutyhwKD9XM
https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRNRdxzmuFKABoGgyv2SC0gMticosL2LB3V1fBMOwNtVBZxHkyMw4IcWBFj
https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQr40CEf75nWCj5dg-oeKtb9zK6mhktu7vnfoYAh5ioy34goC3c9ptDkQwP
https://encrypted-tbn2.gstatic.com/images?q=tbn:ANd9GcQUnyHrVEbppqhZnWnQrijhBFP0X34gRf7pKw6PdT4ggepB2k9g-p71sgGh
https://encrypted-tbn1.gstatic.com/images?q=tbn:ANd9GcR9Us9qblbTJaw47gULXCI8sHKN4I61gYsT2ijebtZzgsMDI8GmYqQpIIw
https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcSIrW-IbBZjM9Ztn60r9QE1_FIMjt494qGX12tqsLsibYPLuFVwyVSgz1I

用正则表达式更简单:

 InputStream is = getNetInputStream(listUrl);
InputStreamReader isr = new InputStreamReader(is);
BufferedReader br = new BufferedReader(isr);
String s = ""; while ((s = br.readLine()) != null)
{
Pattern p = Pattern.compile("src=\"https[^\"]+");
Matcher m = p.matcher(s);
while(m.find()) {
System.out.println(m.group());
}
}
 src="https://encrypted-tbn3.gstatic.com/images?q=tbn:ANd9GcRvQgUjsVDBncM3mVIgIyIuE87BnlyJUy2BNsAp8kUoTanrC_css5mVAw
src="https://encrypted-tbn2.gstatic.com/images?q=tbn:ANd9GcThd8cYjOTmCgYJZxX5ls-xpxaAlH1_yocOSCqI5_7OkL29SNtbCZ7q2Yoj
src="https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTl-FzKmsppxuwzmTITGCv9uDxmrWr1pG0lw8mUD9wkWIloASxQeBEMnVjz
src="https://encrypted-tbn3.gstatic.com/images?q=tbn:ANd9GcQWbmiZJIXKHV2IoTBp7zSY6kD5g5VPzVtBTLJYYR5nwTtKi2-0_u93qL4e
src="https://encrypted-tbn1.gstatic.com/images?q=tbn:ANd9GcSlrLi_GtVgUehU7coFe1eMdrJxPdvS42iTqXkla0g75s31NBfAq2u1LE4
src="https://encrypted-tbn3.gstatic.com/images?q=tbn:ANd9GcSkrlyGxSs8Dr_7k3MUvoGq1vE45LgHZ0zEhIEdD9LLZiaoMcE7IAqn8ho
src="https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTu__OUSJ4R4EKBu4jOi2ZAdHohpVQIBy3-SfnI8FYpN8wVC9kJG_aWuk_w
src="https://encrypted-tbn3.gstatic.com/images?q=tbn:ANd9GcR3Bf7YtsHJ813A5_wWzpxIy4MbEmqz5NLw3qv1nPxOZqVjH7QlY-qYSCg
src="https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcToB4nJPqVwnzn0xeasnXyhxGgOqHXdypE6KZIMTfV9k52eIrE3iYsA6Ixm
src="https://encrypted-tbn1.gstatic.com/images?q=tbn:ANd9GcTkKw0LpqdB2eQMUpwdQdvM9DTeNtq1mrvMNivoQtN37p3m0OPsx4ME9i4O
src="https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcSZGzMf_3hmdDktz91yp5ZQi-eGWLCenZ0U446sXT2nqYuwlWRI_V_BVIWi
src="https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTQF-55T5GM3dLdaoafPdlIYK0ESNvM6-Bsb4-B2rQTeyD5gGoCKxokExM-
src="https://encrypted-tbn2.gstatic.com/images?q=tbn:ANd9GcRoRjo4TFeXmx47zE6VH0ylcO0IQ2HBsOHYIMJCI9MsRyg_PF1WhHbqG76Q
src="https://encrypted-tbn1.gstatic.com/images?q=tbn:ANd9GcRrdegt1koEy51dLWrJAbVMJBlCEZ7fPl2mztYYM6onvxocRCq030Ft1gE
src="https://encrypted-tbn1.gstatic.com/images?q=tbn:ANd9GcTtnQpte0uq9Ue9nsg25GeO1kw_-Hcn69ozTQkiMBHrXKwlANutyhwKD9XM
src="https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRNRdxzmuFKABoGgyv2SC0gMticosL2LB3V1fBMOwNtVBZxHkyMw4IcWBFj
src="https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQr40CEf75nWCj5dg-oeKtb9zK6mhktu7vnfoYAh5ioy34goC3c9ptDkQwP
src="https://encrypted-tbn2.gstatic.com/images?q=tbn:ANd9GcQUnyHrVEbppqhZnWnQrijhBFP0X34gRf7pKw6PdT4ggepB2k9g-p71sgGh
src="https://encrypted-tbn1.gstatic.com/images?q=tbn:ANd9GcR9Us9qblbTJaw47gULXCI8sHKN4I61gYsT2ijebtZzgsMDI8GmYqQpIIw
src="https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcSIrW-IbBZjM9Ztn60r9QE1_FIMjt494qGX12tqsLsibYPLuFVwyVSgz1I

通过网络得到html,并解析出其中网址(JAVA程序)的更多相关文章

  1. Java匹马行天下之 Java国出了个Java——举国欢庆

    Java帝国的崛起 前言: 看庭前花开花落,宠辱不惊, 望天上云卷云舒,去留无意. 闹心的事儿,选择释怀: 纠缠的人儿,试着放下, 生活其实很美. 心若向阳,就无惧悲伤. 愿你明朗坦荡纵情豁达,有得有 ...

  2. Android网络请求与数据解析,使用Gson和GsonFormat解析复杂Json数据

    版权声明:未经博主允许不得转载 一:简介 [达叔有道]软件技术人员,时代作者,从 Android 到全栈之路,我相信你也可以!阅读他的文章,会上瘾!You and me, we are family ...

  3. 初探iOS网络开发,数据解析。

    通过大众点评平台开发来简单了解一下,oc的网络编程和数据解析(json) 首先我们需要到大大众点评开发者平台申请一个key.http://developer.dianping.com/app/tech ...

  4. Flutter网络请求和数据解析

    一:前言 - 什么是反射机制,Flutter为什么禁用反射机制? 在Flutter中它的网络请求和数据解析稍微的比较麻烦一点,因为Flutter不支持反射机制.相信大家都看到这么一条,就是Flutte ...

  5. Android okHttp网络请求之Json解析

    前言: 前面两篇文章介绍了基于okHttp的post.get请求,以及文件的上传下载,今天主要介绍一下如何和Json解析一起使用?如何才能提高开发效率? okHttp相关文章地址: Android o ...

  6. ZeroMQ接口函数之 :zmq_z85_decode – 从一个用Z85算法生成的文本中解析出二进制密码

    ZeroMQ 官方地址 :http://api.zeromq.org/4-0:zmq_z85_decode zmq_z85_decode(3)         ØMQ Manual - ØMQ/4.1 ...

  7. js中解析json对象:JSON.parse()用于从一个字符串中解析出json对象, JSON.stringify()用于从一个对象解析出字符串。

    JSON.parse()用于从一个字符串中解析出json对象. var str = '{"name":"huangxiaojian","age&quo ...

  8. GJM : Unity3D 常用网络框架与实战解析 【笔记】

    Unity常用网络框架与实战解析 1.Http协议          Http协议                  存在TCP 之上 有时候 TLS\SSL 之上 默认端口80 https 默认端口 ...

  9. 网络热恋之XML解析

    XML 可扩展标记语言 用于标记电子文件使其具有结构性的标记语言,可以用来标记数据.定义数据类型,是一种允许用户对自己的标记语言进行定义的源语言 易读性高,编码手写难度小,数据量大 NSXMLPars ...

随机推荐

  1. DCN路由操作

    offset */interface in/out access-list/prefix-list <1-16>                 // 修改路由偏移量   RIP偏移列表 ...

  2. Mybatis基础学习(三)—映射文件

    一.输入映射 1.parameterType     指定输入参数的Java类,可以使用别名或者类的全限定名.它也可以接受基本数据类型.POJO对象.HashMap.   (1)基本数据类型   (2 ...

  3. FunDA:一个开源的函数式数据处理工具库,也是Slick的补充

    如果你是一个Slick用户,或者你是一个数据库编程人员正在尝试进入函数式编程模式,那么FunDA可能会帮到你. 目前市面上FRM(Functional Relational Mapper),即函数式的 ...

  4. jquery template.js前端模板引擎

    作为现代应用,ajax的大量使用,使得前端工程师们日常的开发少不了拼装模板,渲染模板 在刚有web的时候,前端与后端的交互,非常直白,浏览器端发出URL,后端返回一张拼好了的HTML串.浏览器对其进行 ...

  5. 对InvokeRequired的理解

    if (listBox1.InvokeRequired)                            //当有新工作进程访问控件时InvokeRequired为True            ...

  6. 前馈神经网络-反向传播(Back Propagation)公式推导走读

        构造:输入神经元个数等于输入向量维度,输出神经元个数等于输出向量维度.(x1=(1,2,3),则需要三个输入神经元)   一 前向后传播   隐层:

  7. STAR法则的感想

    STAR法则百度百科上被解释为,面试官用于收集面试者信息的工具,而我个人理解,它更像是一个表达技巧,叙述结构,我们先来看看什么是STAR法则: STAR法则,即为Situation Task Acti ...

  8. 一条SQL搞定信息增益的计算

    欢迎大家关注腾讯云技术社区-博客园官方主页,我们将持续在博客园为大家推荐技术精品文章哦~ 周东谕,2011年加入腾讯,现任职于腾讯互娱运营部数据中心,主要从事游戏相关的数据分析和挖掘工作. 信息增益原 ...

  9. Python 一行代码

    Python语法十分便捷,通过几个简单例子了解其趣味 1.Fizz.Buzz问题为: 打印数字1到100, 3的倍数打印"Fizz", 5的倍数打印"Buzz" ...

  10. Swift try try! try?使用和区别

    Swift try try! try?使用和区别 一.异常处理try catch的使用 1. swift异常处理 历史由来 Swift1.0版本 Cocoa Touch 的 NSError ,Swif ...