A flash of Joy
import re
from datetime import timedelta
from tornado import httpclient, gen, ioloop, queues peoples = {'': 71, '': 66, '': 54, '': 50, '': 66, '': 61,
'': 103, '': 31, '': 32, '': 41, '': 33, '': 93, '': 50, '': 108, '': 55,
'': 55, '': 92, '': 56, '': 29, '': 27,
'': 25, '': 25, '': 50, '': 66, '': 68, '': 52, '': 50, '': 50, '': 52, '': 50,
'': 133, '': 166, '': 10, '': 8, '': 99, '': 18,
'': 50, '': 24, '': 19, '': 25, '': 24, '': 24, '': 67, '': 52, '': 67,
'': 67, '': 8, '': 31, '': 82, '': 62, '': 8, '': 104, '': 52, '': 52, '': 47,
'': 56, '': 72, '': 57, '': 36, '': 50, '': 120, '': 50,
'': 56} class AsySpider(object):
def __init__(self, urls, concurrency=10, results=None, **kwargs):
urls.reverse()
self.urls = urls
self.concurrency = concurrency
self._q = queues.Queue()
self._fetching = set()
self._fetched = set()
if results is None:
self.results = [] def fetch(self, url, **kwargs):
fetch = getattr(httpclient.AsyncHTTPClient(), 'fetch')
return fetch(url, raise_error=False, **kwargs) def handle_html(self, url, html):
"""handle html page"""
print(url) def handle_response(self, url, response):
"""inherit and rewrite this method if necessary"""
if response.code == 200:
self.handle_html(url, response.body) elif response.code == 599: # retry
self._fetching.remove(url)
self._q.put(url) @gen.coroutine
def get_page(self, url):
try:
response = yield self.fetch(url)
# print('######fetched %s' % url)
except Exception as e:
print('Exception: %s %s' % (e, url))
raise gen.Return(e)
raise gen.Return(response) @gen.coroutine
def _run(self):
@gen.coroutine
def fetch_url():
current_url = yield self._q.get()
try:
if current_url in self._fetching:
return # print('fetching****** %s' % current_url)
self._fetching.add(current_url) response = yield self.get_page(current_url)
self.handle_response(current_url, response) # handle reponse self._fetched.add(current_url) for i in range(self.concurrency):
if self.urls:
yield self._q.put(self.urls.pop()) finally:
self._q.task_done() @gen.coroutine
def worker():
while True:
yield fetch_url() self._q.put(self.urls.pop()) # add first url # Start workers, then wait for the work queue to be empty.
for _ in range(self.concurrency):
worker() yield self._q.join(timeout=timedelta(seconds=300000))
try:
assert self._fetching == self._fetched
except AssertionError:
print(self._fetching - self._fetched)
print(self._fetched - self._fetching) def run(self):
io_loop = ioloop.IOLoop.current()
io_loop.run_sync(self._run) class MySpider(AsySpider):
def fetch(self, url, **kwargs):
"""重写父类fetch方法"""
cookies_str = 'JSESSIONID=0000n4jBi_dKg91XbtHHQHDeeDL:1b4e17j2v; iPlanetDire' \
'ctoryPro=AQIC5wM2LY4Sfcxu%' \
'2FWPIJWGHttZPiXafd%2B1gowyEoxTmyiY%3D%40AAJTSQACMDE%3D%23'
headers = {
'User-Agent': 'mozilla/5.0 (compatible; baiduspider/2.0; +http://www.baidu.com/search/spider.html)',
'cookie': cookies_str
}
return super(MySpider, self).fetch(
url, headers=headers
) def handle_html(self, url, html):
url += 'qwertyu'
pattern = re.compile('userPhoto&ownerId=(.*)qwertyu')
filename = re.findall(pattern, url)[0]
# 注意把dir修改成你想要存放照片位置.例如C:/picture/
dir = '/home/innovation/文档/pic/'
with open(dir + filename + '.jpg', 'wb') as file:
file.write(html)
file.close() def main():
urls = []
url_pic = 'http://myportal.sxu.edu.cn/attachmentDownload.portal?notUseCache=true&type=userPhoto&ownerId='
for academy in peoples:
for i in range(peoples[academy]):
i += 1
if i < 10:
i = '' + str(i)
elif 100 > i >= 10:
i = '' + str(i)
urls.append(url_pic + '' + academy + str(i))
s = MySpider(urls)
s.run() if __name__ == '__main__':
main()
A flash of Joy的更多相关文章
- Compiling OpenGL games with the Flash C Compiler (FlasCC)
Compiling OpenGL games with the Flash C Compiler (FlasCC) In this article I show how to use the Flas ...
- 隐私泄露杀手锏 —— Flash 权限反射
[简版:http://weibo.com/p/1001603881940380956046] 前言 一直以为该风险早已被重视,但最近无意中发现,仍有不少网站存在该缺陷,其中不乏一些常用的邮箱.社交网站 ...
- 百度 flash html5自切换 多文件异步上传控件webuploader基本用法
双核浏览器下在chrome内核中使用uploadify总有302问题,也不知道如何修复,之所以喜欢360浏览器是因为帮客户控制渲染内核: 若页面需默认用极速核,增加标签:<meta name=& ...
- 解决“chrome提示adobe flash player 已经过期”的小问题
这个小问题也确实困扰我许久,后来看到chrome吧里面有人给出了解决方案: 安装install_flash_player_ppapi, 该软件下载地址:http://labs.adobe.com/do ...
- 在 Linux 中使用搜狗拼音输入法以及搞定 Flash 和支付宝
在 Ubuntu 中安装搜狗输入法 在 Ubuntu Kylin 系统中,默认安装搜狗拼音输入法,但是在原生 Ubuntu 系统中则不是.这可以理解,毕竟搜狗输入法的 Linux 版有 Kylin 团 ...
- [异常解决] ubuntukylin16.04 LTS中关于flash安装和使用不了的问题解决
http://www.linuxdiyf.com/linux/25211.html 归纳解决flash插件大法: 启动器中找到 软件更新,启动,点击 其它软件,把Canonical合作伙伴前方框 选上 ...
- 基于Adobe Flash平台的3D页游技术剖析
写在前面 从黑暗之光,佛本是道,大战神的有插件3D页游.再到如今的魔龙之戒. 足以证明,3D无插件正在引领页游技术的潮流. 目前,要做到3D引擎,有以下几个选择. 说到这里,我们发现.这些都不重要. ...
- 强大的flash头像上传插件(支持旋转、拖拽、剪裁、生成缩略图等)
今天介绍的这款flash上传头像功能非常强大,支持php,asp,jsp,asp.net 调用 头像剪裁,预览组件插件. 本组件需要安装Flash Player后才可使用,请从http://dl.pc ...
- MDK st-link下载STM32程序出现Internal command error和Error:Flash download failed. Target DLL
MDK st-link下载STM32程序出现Internal command error和Error:Flash download failed. Target DLL 是因为目标板的芯片处于休眠 ...
随机推荐
- 在SQL Server 2005中连接Oracle,完成查询、插入操作
建立指向Oracle的连接假设Oracle数据库的用户名为test,密码为test,在SQL Server数据库所在服务器上建立的指向Oracle数据库的服务命名为hisorcl.1. 在SQL Se ...
- Java 邮件发送
<dependency> <groupId>javax.mail</groupId> <artifactId>mail</artifactId&g ...
- zookeeper,dubbo,dubbo admin
zookeeper 1. 分布式协调服务:我们的程序运行在不同的机器上,这些机器可能位于同一个机架,同一个机房又或不同的数据中心.在这样的环境中,我们要实现协调该怎么办?那么这就是分布式协调服务要干的 ...
- ruby第一次实践 ”hello world“
下载ruby https://rubyinstaller.org/downloads/ 创建 ruby-version 写自己的版本 写Gemfile source: http://ruby.ta ...
- 浅析Java.lang.Runtime类
一.概述 Runtime类封装了运行时的环境.每个 Java 应用程序都有一个 Runtime 类实例,使应用程序能够与其运行的环境相连接. 一般不能实例化一个Runtime对象, ...
- js 判断移动设备、pc端、android、iPhone、是否为微信、微博、qq空间
varbrowser = { versions: function () { var u = navigator.userAgent, app = navigator.appVersio ...
- JavaScript的apply()方法和call()方法
1 <script type="text/javascript"> 2 /*定义一个人类*/ 3 function Person(name,age) 4 { 5 thi ...
- Sprite(精灵)&& 三个特殊的层Layer
用来作为以后复习使用. 1 #include "ScenceScend.h" CCScene* ScenceScend::scene() { CCScene* s = CCScen ...
- Mac > MacBook Pro的移动硬盘方案
灵感来自:http://cone.cc/2012/12/30/Macbook-Air-HD/ 主要为了解决:如果让移动硬盘,无缝地,安全地,同时应用在苹果电脑系统和微软的系统. 除了储存文件,我还想用 ...
- Spark相关
非常好的spark分析博客,我们team的,哈哈:http://jerryshao.me/ spark programming guide: https://github.com/mesos/spar ...