java版模拟浏览器下载百度动漫图片到本地。

 package javaNet.Instance.ImageDownload;

 import java.io.BufferedReader;

 import java.io.File;

 import java.io.FileOutputStream;

 import java.io.IOException;

 import java.io.InputStream;

 import java.io.InputStreamReader;

 import java.net.MalformedURLException;

 import java.net.URL;

 import java.util.ArrayList;

 import java.util.regex.Matcher;

 import java.util.regex.Pattern;

 public class DownloadImgs {

     private String url=null;

     public DownloadImgs(String url) {

         this.url=url;

     }

     //----------------------------------gethtml start-----------------------------

     /**

      * visit the baidu.img page to get the html

      * @return inputStream

      * @throws IOException

      * @throws MalformedURLException

      */

     public InputStream GetBaiduImgHtml_Stream() throws IOException,MalformedURLException {

         URL img_Url=new URL(url);

         return img_Url.openStream();

     }

     /**

      * convert the stream to the string

      * @param inStrm

      * @return string of the page

      */

     public String InputStreamToString(InputStream inStrm){

         BufferedReader reader=new BufferedReader(new InputStreamReader(inStrm));

         StringBuilder sb=new StringBuilder();

         String  line=null;

         try {

             while((line=reader.readLine())!=null){

                 sb.append(line+'\n');

               }

             }

         catch (IOException e)

         {

             e.printStackTrace();

         }

         finally

         {

             try

             {

                 inStrm.close();

             }

             catch (IOException e)

             {

                 e.printStackTrace();

             }

         }

         return sb.toString();

     }

     /**

      * get the origin page of baidu.img

      * @return

      * @throws MalformedURLException

      * @throws IOException

      */

     public String GetBaiduImgHtml_Page() throws MalformedURLException, IOException

     {

         return this.InputStreamToString(this.GetBaiduImgHtml_Stream());

     }

     /**

      * test whether url have been visited the image page,and get the page.

      * @param page

      */

     public void Display_HtmlPage(String page)

     {

         System.out.println(page);

     }

     //-------------------------gethtml end----------------

     //-------------------------paretoimgurllist start-----

     public ArrayList<String> ParsePageToImgList(String page,String imgPa)

     {

         ArrayList<String> imgList=new ArrayList<String>();

         Pattern pattern=Pattern.compile(imgPa);

         Matcher matcher=pattern.matcher(page);

         while(matcher.find())

         {

             imgList.add(matcher.group(1));

         }

         return imgList;

     }

     //------------------------paretoimgurllist end---------

     //------------------------DownloadFile  start----------

     public  boolean DownloadFile(String imgUrl,int index,String path)

     {

         try

         {

             File f=new File(path+"\\"+index+".jpg");

             System.out.println("下载："+imgUrl);

             URL url=new URL(imgUrl);

             InputStream ins=url.openStream();

             FileOutputStream fout=new FileOutputStream(f);

             byte[] buffer=new byte[2048];

             int bytes_number;

             while((bytes_number=ins.read(buffer))!=-1)

             {

                 fout.write(buffer,0,bytes_number);

                 fout.flush();

             }

             ins.close();

             fout.close();

         }

         catch(Exception e)

         {

             System.out.println("下载失败！");

             e.printStackTrace();

             return false;

         }

         System.out.println("下载完成...");

         return true;

     }

     //------------------------DownloadFile  end----------

     //------------------------mkDir  start----------

     /**

      * make a direction for download the images in the native disk.

      * @param path the native path

      * @return is success

      */

     public void MkDir(String path)

     {

         File dir=new File(path);

         if(!dir.exists())

         {

             dir.mkdirs();

         }

     }

     //------------------------mkDir  end------------

     public void Display_ArrayList(ArrayList<String> list)

     {

         for(String temp:list)

         {

             System.out.println(temp);

         }

     }

     public static void main(String[] args) throws MalformedURLException, IOException

     {

         String imgPa="\"objURL\":\"(.*?)\"";

         String path="F:\\photos";

         int index=0;

         DownloadImgs downloadimgs=new DownloadImgs("http://image.baidu.com/search/index?"

                 + "tn=baiduimage&ipn=r&ct=201326592&cl=2&lm=-1&st=-1&fm=index&fr=&sf=1"

                 + "&fmq=&pv=&ic=0&nc=1&z=&se=1&showtab=0&fb=0&width=&height=&face=0"

                 + "&istype=2&ie=utf-8&word=%E5%8A%A8%E6%BC%AB&oq=%E5%8A%A8%E6%BC%AB&rsp=-1");

         //downloadimgs.Display_HtmlPage(downloadimgs.GetBaiduImgHtml_Page());

         String htmlPage=downloadimgs.GetBaiduImgHtml_Page();

         ArrayList<String> imgList=downloadimgs.ParsePageToImgList(htmlPage, imgPa);

         //downloadimgs.Display_ArrayList(imgList);

         downloadimgs.MkDir(path);

         for(String imgUrl:imgList)

             downloadimgs.DownloadFile(imgUrl, (index++)+1, path);

         System.out.println("一共下载了"+index+"个图片。");

     }

 }

java版模拟浏览器下载百度动漫图片到本地。的更多相关文章

[JAVA]解决不同浏览器下载附件的中文名乱码问题
附件下载时,遇到中文附件名的兼容性问题,firefox.chrome.ie三个派系不兼容,通过分析整理,总结出处理该问题的办法,记录如下: 1.文件名编码服务器默认使用的是ISO8859-1,而我们 ...
java 实现模拟浏览器访问网站
一般的情况下我们都是使用IE或者Navigator浏览器来访问一个WEB服务器,用来浏览页面查看信息或者提交一些数据等等.所访问的这些页面有的仅仅是一些普通的页面,有的需要用户登录后方可使用,或者需 ...
Java 使用IE浏览器下载文件，文件名乱码问题
使用Servlet实现文件下载功能时,使用IE下载出现文件名乱码; 网上常见的解决办法是通过"user-agen"来判断浏览器: if (req.getHeader("u ...
Java 解决IE浏览器下载文件，文件名出现乱码问题
/** * 区分ie 和其他浏览器的下载文件乱码问题 * @param request * @param fileName * @return */ public String getFileName ...
Java使用IE浏览器下载文件，文件名乱码问题
String userAgent = request.getHeader("user-agent").toLowerCase(); if (userAgent.contains(& ...
浏览器下载img标签Base64图片
https://blog.csdn.net/qq_42076140/article/details/82113622 原文地址 <a href="javascript:downl ...
java读流方式，下载网络上的图片
本工具类支持url的list集合,具体实现如下所示: public static void download(ArrayList<String> listUrl, String downl ...
下载百度上的图片C#——输入名字就可以下载
using System; using System.Collections.Generic; using System.Data; using System.Configuration; using ...
python爬虫:使用Selenium模拟浏览器行为
前几天有位微信读者问我一个爬虫的问题,就是在爬去百度贴吧首页的热门动态下面的图片的时候,爬取的图片总是爬取不完整,比首页看到的少.原因他也大概分析了下,就是后面的图片是动态加载的.他的问题就是这部分动 ...

随机推荐

CAS登录时不仅仅需要用户名来确认身份的情况
最近在帮别人搞CAS,积累点经验问题一:登录需要用户名和部门名称唯一确定一个用户,并将userid作为唯一标示. 在UsernamePasswordCredentials中添加userid 修改Qu ...
Codeforces Round #279 (Div. 2) vector
A. Team Olympiad time limit per test 1 second memory limit per test 256 megabytes input standard inp ...
转：AngularJS的Filter用法详解
Filter简介 Filter是用来格式化数据用的. Filter的基本原型( '|' 类似于Linux中的管道模式): {{ expression | filter }} Filter可以被链式使用 ...
git add 命令
git add xx命令可以将xx文件添加到暂存区,如果有很多改动可以通过 git add -A .来一次添加所有改变的文件. 注意 -A 选项后面还有一个句点. git add -A表示添加所有内容 ...
MongoDB-3.2.6 副本集和主从
yum实例 vim /etc/yum.repos.d/mongodb-org-3.2.repo [mongodb-org-3.2] name=Mongodb baseurl=http://repo.m ...
Oracle EBS - AOL
AOL: (Path: /u43/dev6/interface/aol) 1. Goto system administrator response 2. View -> Request (Sa ...
2016 年 50 个最佳的轻量级 JavaScript 框架和库
作者:IT程序狮链接:https://zhuanlan.zhihu.com/p/24598210来源:知乎著作权归作者所有.商业转载请联系作者获得授权,非商业转载请注明出处. 回顾今年已发布的 JS ...
使用 PDO 方式将 Session 保存到 MySQL 数据中
类: <?php /* 使用数据库保存session */ class DBHandler implements SessionHandlerInterface { protected $dbh ...
swift-UserDefaults控制账号和密码
import UIKit class FiveVC: UIViewController { //MARK:-------- 全局常量设置 let IsFirstLaunch = "IsF ...
/proc/sys/vm/参数
1) /proc/sys/vm/block_dump该文件表示是否打开Block Debug模式,用于记录所有的读写及Dirty Block写回动作.缺省设置:0,禁用Block Debug模式2) ...

java版模拟浏览器下载百度动漫图片到本地。

java版模拟浏览器下载百度动漫图片到本地。的更多相关文章

随机推荐

热门专题