import re
from datetime import timedelta
from tornado import httpclient, gen, ioloop, queues peoples = {'': 71, '': 66, '': 54, '': 50, '': 66, '': 61,
'': 103, '': 31, '': 32, '': 41, '': 33, '': 93, '': 50, '': 108, '': 55,
'': 55, '': 92, '': 56, '': 29, '': 27,
'': 25, '': 25, '': 50, '': 66, '': 68, '': 52, '': 50, '': 50, '': 52, '': 50,
'': 133, '': 166, '': 10, '': 8, '': 99, '': 18,
'': 50, '': 24, '': 19, '': 25, '': 24, '': 24, '': 67, '': 52, '': 67,
'': 67, '': 8, '': 31, '': 82, '': 62, '': 8, '': 104, '': 52, '': 52, '': 47,
'': 56, '': 72, '': 57, '': 36, '': 50, '': 120, '': 50,
'': 56} class AsySpider(object):
def __init__(self, urls, concurrency=10, results=None, **kwargs):
urls.reverse()
self.urls = urls
self.concurrency = concurrency
self._q = queues.Queue()
self._fetching = set()
self._fetched = set()
if results is None:
self.results = [] def fetch(self, url, **kwargs):
fetch = getattr(httpclient.AsyncHTTPClient(), 'fetch')
return fetch(url, raise_error=False, **kwargs) def handle_html(self, url, html):
"""handle html page"""
print(url) def handle_response(self, url, response):
"""inherit and rewrite this method if necessary"""
if response.code == 200:
self.handle_html(url, response.body) elif response.code == 599: # retry
self._fetching.remove(url)
self._q.put(url) @gen.coroutine
def get_page(self, url):
try:
response = yield self.fetch(url)
# print('######fetched %s' % url)
except Exception as e:
print('Exception: %s %s' % (e, url))
raise gen.Return(e)
raise gen.Return(response) @gen.coroutine
def _run(self):
@gen.coroutine
def fetch_url():
current_url = yield self._q.get()
try:
if current_url in self._fetching:
return # print('fetching****** %s' % current_url)
self._fetching.add(current_url) response = yield self.get_page(current_url)
self.handle_response(current_url, response) # handle reponse self._fetched.add(current_url) for i in range(self.concurrency):
if self.urls:
yield self._q.put(self.urls.pop()) finally:
self._q.task_done() @gen.coroutine
def worker():
while True:
yield fetch_url() self._q.put(self.urls.pop()) # add first url # Start workers, then wait for the work queue to be empty.
for _ in range(self.concurrency):
worker() yield self._q.join(timeout=timedelta(seconds=300000))
try:
assert self._fetching == self._fetched
except AssertionError:
print(self._fetching - self._fetched)
print(self._fetched - self._fetching) def run(self):
io_loop = ioloop.IOLoop.current()
io_loop.run_sync(self._run) class MySpider(AsySpider):
def fetch(self, url, **kwargs):
"""重写父类fetch方法"""
cookies_str = 'JSESSIONID=0000n4jBi_dKg91XbtHHQHDeeDL:1b4e17j2v; iPlanetDire' \
'ctoryPro=AQIC5wM2LY4Sfcxu%' \
'2FWPIJWGHttZPiXafd%2B1gowyEoxTmyiY%3D%40AAJTSQACMDE%3D%23'
headers = {
'User-Agent': 'mozilla/5.0 (compatible; baiduspider/2.0; +http://www.baidu.com/search/spider.html)',
'cookie': cookies_str
}
return super(MySpider, self).fetch(
url, headers=headers
) def handle_html(self, url, html):
url += 'qwertyu'
pattern = re.compile('userPhoto&ownerId=(.*)qwertyu')
filename = re.findall(pattern, url)[0]
# 注意把dir修改成你想要存放照片位置.例如C:/picture/
dir = '/home/innovation/文档/pic/'
with open(dir + filename + '.jpg', 'wb') as file:
file.write(html)
file.close() def main():
urls = []
url_pic = 'http://myportal.sxu.edu.cn/attachmentDownload.portal?notUseCache=true&type=userPhoto&ownerId='
for academy in peoples:
for i in range(peoples[academy]):
i += 1
if i < 10:
i = '' + str(i)
elif 100 > i >= 10:
i = '' + str(i)
urls.append(url_pic + '' + academy + str(i))
s = MySpider(urls)
s.run() if __name__ == '__main__':
main()

A flash of Joy的更多相关文章

  1. Compiling OpenGL games with the Flash C Compiler (FlasCC)

    Compiling OpenGL games with the Flash C Compiler (FlasCC) In this article I show how to use the Flas ...

  2. 隐私泄露杀手锏 —— Flash 权限反射

    [简版:http://weibo.com/p/1001603881940380956046] 前言 一直以为该风险早已被重视,但最近无意中发现,仍有不少网站存在该缺陷,其中不乏一些常用的邮箱.社交网站 ...

  3. 百度 flash html5自切换 多文件异步上传控件webuploader基本用法

    双核浏览器下在chrome内核中使用uploadify总有302问题,也不知道如何修复,之所以喜欢360浏览器是因为帮客户控制渲染内核: 若页面需默认用极速核,增加标签:<meta name=& ...

  4. 解决“chrome提示adobe flash player 已经过期”的小问题

    这个小问题也确实困扰我许久,后来看到chrome吧里面有人给出了解决方案: 安装install_flash_player_ppapi, 该软件下载地址:http://labs.adobe.com/do ...

  5. 在 Linux 中使用搜狗拼音输入法以及搞定 Flash 和支付宝

    在 Ubuntu 中安装搜狗输入法 在 Ubuntu Kylin 系统中,默认安装搜狗拼音输入法,但是在原生 Ubuntu 系统中则不是.这可以理解,毕竟搜狗输入法的 Linux 版有 Kylin 团 ...

  6. [异常解决] ubuntukylin16.04 LTS中关于flash安装和使用不了的问题解决

    http://www.linuxdiyf.com/linux/25211.html 归纳解决flash插件大法: 启动器中找到 软件更新,启动,点击 其它软件,把Canonical合作伙伴前方框 选上 ...

  7. 基于Adobe Flash平台的3D页游技术剖析

    写在前面 从黑暗之光,佛本是道,大战神的有插件3D页游.再到如今的魔龙之戒. 足以证明,3D无插件正在引领页游技术的潮流. 目前,要做到3D引擎,有以下几个选择. 说到这里,我们发现.这些都不重要. ...

  8. 强大的flash头像上传插件(支持旋转、拖拽、剪裁、生成缩略图等)

    今天介绍的这款flash上传头像功能非常强大,支持php,asp,jsp,asp.net 调用 头像剪裁,预览组件插件. 本组件需要安装Flash Player后才可使用,请从http://dl.pc ...

  9. MDK st-link下载STM32程序出现Internal command error和Error:Flash download failed. Target DLL

    MDK st-link下载STM32程序出现Internal command error和Error:Flash download failed. Target DLL   是因为目标板的芯片处于休眠 ...

随机推荐

  1. 在SQL Server 2005中连接Oracle,完成查询、插入操作

    建立指向Oracle的连接假设Oracle数据库的用户名为test,密码为test,在SQL Server数据库所在服务器上建立的指向Oracle数据库的服务命名为hisorcl.1. 在SQL Se ...

  2. Java 邮件发送

    <dependency> <groupId>javax.mail</groupId> <artifactId>mail</artifactId&g ...

  3. zookeeper,dubbo,dubbo admin

    zookeeper 1. 分布式协调服务:我们的程序运行在不同的机器上,这些机器可能位于同一个机架,同一个机房又或不同的数据中心.在这样的环境中,我们要实现协调该怎么办?那么这就是分布式协调服务要干的 ...

  4. ruby第一次实践 ”hello world“

    下载ruby   https://rubyinstaller.org/downloads/ 创建 ruby-version 写自己的版本 写Gemfile source: http://ruby.ta ...

  5. 浅析Java.lang.Runtime类

    一.概述      Runtime类封装了运行时的环境.每个 Java 应用程序都有一个 Runtime 类实例,使应用程序能够与其运行的环境相连接.      一般不能实例化一个Runtime对象, ...

  6. js 判断移动设备、pc端、android、iPhone、是否为微信、微博、qq空间

    varbrowser = {   versions: function () {      var u = navigator.userAgent, app = navigator.appVersio ...

  7. JavaScript的apply()方法和call()方法

    1 <script type="text/javascript"> 2 /*定义一个人类*/ 3 function Person(name,age) 4 { 5 thi ...

  8. Sprite(精灵)&& 三个特殊的层Layer

    用来作为以后复习使用. 1 #include "ScenceScend.h" CCScene* ScenceScend::scene() { CCScene* s = CCScen ...

  9. Mac > MacBook Pro的移动硬盘方案

    灵感来自:http://cone.cc/2012/12/30/Macbook-Air-HD/ 主要为了解决:如果让移动硬盘,无缝地,安全地,同时应用在苹果电脑系统和微软的系统. 除了储存文件,我还想用 ...

  10. Spark相关

    非常好的spark分析博客,我们team的,哈哈:http://jerryshao.me/ spark programming guide: https://github.com/mesos/spar ...