单线程爬虫VS多线程爬虫的效率对比
单线程爬虫:
import re
import requests
import time url_EB = 'http://www.amazon.com/gp/search/other/ref=sr_sa_p_4?me=A22XNR713HGDVG&rh=n%3A9063592011%2Ck%3Aprojector&bbn=9063592011&keywords=projector&pickerToList=brandtextbin&ie=UTF8&qid=1461902521'
headers_EB = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.86 Safari/537.36'} url_AML = '''https://www.amazon.com/gp/search/other/ref=sr_sa_p_4?me=A3UJI9WWE6PRP5&rh=i%3Amerchant-items
&pickerToList=brandtextbin&ie=UTF8&qid=1461899728'''
headers_AML ={'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.86 Safari/537.36'} url_DL= 'https://www.amazon.com/gp/search/other/ref=sr_sa_p_4?me=AS7ZU4MN0FPOY&rh=i%3Amerchant-items&pickerToList=brandtextbin&ie=UTF8&qid=1461901862'
headers_DL = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.86 Safari/537.36'} name = {'a':'ExclusiveBulbs',
'b':'Amazing Lamps',
'c':'Dynamic Lamps'} # listing_count = re.findall('<span class="narrowValue">(.*?)</span',data.text)
# f = dict(map(lambda x,y:[x,y],store_name,listing_count))
#
# for k,v in f.items():
# print(k,v) def foo_one(url,headers,name):
print('--------------------------开始爬去{0}at{1}---------------------------'.format(name,time.ctime())) response = requests.get(url,headers=headers)
store_name = re.findall('<span class="refinementLink">(.*?)</span><span class="narrowValue">(.*?)</span',response.text)
for i in store_name:
print(i)
print('--------------------------爬去完毕at{}----------------------------'.format(time.ctime()))
time.sleep(1) if __name__ == '__main__':
foo_one(url_EB,headers_EB,name['a'])
foo_one(url_AML,headers_AML,name['b'])
foo_one(url_DL,headers_DL,name['c'])
输出:00:25:33开始,00:26:02结束 耗时29秒
--------------------------开始爬去ExclusiveBulbsatSat Apr 30 00:25:33 2016---------------------------
('A.Shine', ' (97)')
('AmpacElectronics', ' (1,644)')
('AuraBeam', ' (33,084)')
('AWO', ' (1,206)')
('Battery1inc', ' (694)')
('Comoze Lamps', ' (6,172)')
('Compatible Lamp', ' (317)')
('Corgi Lamps', ' (2,124)')
('CTLAMP', ' (3,499)')
('Dell', ' (191)')
('Diamond Lamps', ' (966)')
('Dynamic', ' (4)')
('Eiki', ' (460)')
('ePharos', ' (2,592)')
('Epson', ' (1,456)')
('EREPLACEMENT', ' (115)')
('eReplacements', ' (814)')
('eWo's', ' (120)')
('eWorldlamp', ' (354)')
('FI Lamps', ' (5,707)')
('FL Projector Lamp For Mitsubishi', ' (1)')
('For Epson', ' (3)')
('Generic', ' (9,769)')
('Good Lamp', ' (819)')
('HCDZ', ' (2,746)')
('Hitachi', ' (935)')
('IET Lamps', ' (2,144)')
('InFocus', ' (44)')
('JVC', ' (326)')
('KCL', ' (3,781)')
('Lampedia', ' (618)')
('Lutema', ' (1,956)')
('Mitsubishi', ' (1,006)')
('Mogobe', ' (1,335)')
('MyProjectorLamps', ' (473)')
('NEC', ' (446)')
('Nec Computers', ' (13)')
('Optoma', ' (956)')
('Osram Sylvania', ' (78)')
('Panasonic', ' (820)')
('Philips', ' (7,502)')
('Powerwarehouse', ' (9,971)')
('Projector Lamps World', ' (112)')
('Pureglare', ' (369)')
('Samsung', ' (1,078)')
('Sharp', ' (426)')
('Shopforbattery', ' (2,510)')
('SMART BOARD', ' (66)')
('Sony', ' (990)')
('TVLampsforless', ' (14)')
('Unknown', ' (722)')
--------------------------爬去完毕atSat Apr 30 00:25:57 2016----------------------------
--------------------------开始爬去Amazing LampsatSat Apr 30 00:25:58 2016---------------------------
('AWO', ' (1)')
('Comoze Lamps', ' (2)')
('DNGO', ' (8)')
('Electrified', ' (9)')
('ELECTRIFIED', ' (10)')
('Electrified Discounters', ' (5)')
('ELECTRIFIED LAMPS', ' (1,177)')
('ELECTRIFIED PRINTHEAD', ' (24)')
('ELECTRIFIED PRINTHEADS', ' (2)')
('FI Lamps', ' (2)')
('Generic', ' (34)')
('GloWatt', ' (1)')
('KCL', ' (1)')
('OEM', ' (1)')
('Powerwarehouse', ' (7)')
('SKU', ' (5)')
('Top Lamp', ' (1)')
('Unknown', ' (1)')
('USOM', ' (3)')
--------------------------爬去完毕atSat Apr 30 00:26:00 2016----------------------------
--------------------------开始爬去Dynamic LampsatSat Apr 30 00:26:01 2016---------------------------
('Battery1inc', ' (85)')
('BenQ', ' (237)')
('Buslink', ' (31)')
('Calumet', ' (2)')
('Comoze Lamps', ' (405)')
('CTLAMP', ' (615)')
('Dell', ' (82)')
('Divine Lighting', ' (36)')
('DNGO', ' (63)')
('Dynamic', ' (4)')
('Eiko', ' (140)')
('Electrified', ' (2)')
('ELECTRIFIED LAMPS', ' (24)')
('Electronix Xpress', ' (418)')
('ePharos', ' (502)')
('Epson', ' (631)')
('eReplacements', ' (119)')
('FI Lamps', ' (505)')
('FL Projector Lamp For Mitsubishi', ' (1)')
('G-lamps', ' (43)')
('GE', ' (248)')
('GE Lighting', ' (152)')
('General Electric', ' (53)')
('Generic', ' (1,671)')
('Genie', ' (101)')
('GLAMPS', ' (2)')
('Impact', ' (7)')
('Industrial Lighting Solutions', ' (9)')
('KCL', ' (280)')
('Kodak', ' (1)')
('Lampedia', ' (63)')
('M-Wave', ' (830)')
('Mitsubishi', ' (406)')
('Mitsubishi DLP TV Bulbs', ' (29)')
('Mocpinc', ' (10)')
('MyProjectorLamps', ' (344)')
('Nec', ' (19)')
('Optoma', ' (161)')
('Osram', ' (1,295)')
('Panasonic', ' (245)')
('Philips', ' (988)')
('Powerwarehouse', ' (239)')
('Projector Lamps World', ' (45)')
('Pureglare', ' (107)')
('Samsung', ' (323)')
('ShopJimmy', ' (3)')
('Sony', ' (141)')
('Sylvania', ' (115)')
('Technical Precision', ' (10)')
('Unknown', ' (167)')
('Welch Allyn Compatible', ' (1)')
--------------------------爬去完毕atSat Apr 30 00:26:02 2016----------------------------
多线程:00:32:37开始00:32:39结束 耗时2秒
import re
import requests import threading
import time
from time import ctime,sleep url_EB = 'http://www.amazon.com/gp/search/other/ref=sr_sa_p_4?me=A22XNR713HGDVG&rh=n%3A9063592011%2Ck%3Aprojector&bbn=9063592011&keywords=projector&pickerToList=brandtextbin&ie=UTF8&qid=1461902521'
headers_EB = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.86 Safari/537.36'} url_AML = '''https://www.amazon.com/gp/search/other/ref=sr_sa_p_4?me=A3UJI9WWE6PRP5&rh=i%3Amerchant-items
&pickerToList=brandtextbin&ie=UTF8&qid=1461899728'''
headers_AML ={'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.86 Safari/537.36'} url_DL= 'https://www.amazon.com/gp/search/other/ref=sr_sa_p_4?me=AS7ZU4MN0FPOY&rh=i%3Amerchant-items&pickerToList=brandtextbin&ie=UTF8&qid=1461901862'
headers_DL = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.86 Safari/537.36'} name = {'a':'ExclusiveBulbs',
'b':'Amazing Lamps',
'c':'Dynamic Lamps'} # listing_count = re.findall('<span class="narrowValue">(.*?)</span',data.text)
# f = dict(map(lambda x,y:[x,y],store_name,listing_count))
#
# for k,v in f.items():
# print(k,v) def foo_one(url,headers,name):
print('--------------------------开始爬去{0}at{1}---------------------------'.format(name,time.ctime())) response = requests.get(url,headers=headers)
store_name = re.findall('<span class="refinementLink">(.*?)</span><span class="narrowValue">(.*?)</span',response.text)
for i in store_name:
print(i)
print('--------------------------爬去完毕{0}at{1}----------------------------'.format(name,time.ctime())) threads = []
t1 = threading.Thread(target=foo_one,args=(url_EB,headers_EB,name['a']))
threads.append(t1)
t2 = threading.Thread(target=foo_one,args=(url_AML,headers_AML,name['b']))
threads.append(t2)
t3 = threading.Thread(target=foo_one,args=(url_DL,headers_DL,name['c']))
threads.append(t3) if __name__ == '__main__':
for t in threads:
t.setDaemon(True)
t.start()
t.join() print ("all over %s" %ctime())
输出:
--------------------------开始爬去ExclusiveBulbsatSat Apr 30 00:32:37 2016---------------------------
--------------------------开始爬去Amazing LampsatSat Apr 30 00:32:37 2016---------------------------
--------------------------开始爬去Dynamic LampsatSat Apr 30 00:32:37 2016---------------------------
('A.Shine', ' (97)')
('AmpacElectronics', ' (1,645)')
('AuraBeam', ' (33,088)')
('AWO', ' (1,209)')
('Battery1inc', ' (694)')
('Comoze Lamps', ' (6,172)')
('Compatible Lamp', ' (317)')
('Corgi Lamps', ' (2,123)')
('CTLAMP', ' (3,501)')
('Dell', ' (191)')
('Diamond Lamps', ' (966)')
('Dynamic', ' (4)')
('Eiki', ' (457)')
('ePharos', ' (2,592)')
('Epson', ' (1,456)')
('EREPLACEMENT', ' (115)')
('eReplacements', ' (813)')
('eWo's', ' (120)')
('eWorldlamp', ' (354)')
('FI Lamps', ' (5,710)')
('FL Projector Lamp For Mitsubishi', ' (1)')
('For Epson', ' (3)')
('Generic', ' (9,771)')
('Good Lamp', ' (819)')
('HCDZ', ' (2,748)')
('Hitachi', ' (935)')
('IET Lamps', ' (2,137)')
('InFocus', ' (44)')
('JVC', ' (326)')
('KCL', ' (3,783)')
('Lampedia', ' (618)')
('Lutema', ' (1,955)')
('Mitsubishi', ' (1,006)')
('Mogobe', ' (1,336)')
('MyProjectorLamps', ' (473)')
('NEC', ' (450)')
('Nec Computers', ' (13)')
('Optoma', ' (956)')
('Osram Sylvania', ' (78)')
('Panasonic', ' (820)')
('Philips', ' (7,502)')
('Powerwarehouse', ' (9,972)')
('Projector Lamps World', ' (112)')
('Pureglare', ' (369)')
('Samsung', ' (1,078)')
('Sharp', ' (426)')
('Shopforbattery', ' (2,511)')
('SMART BOARD', ' (66)')
('Sony', ' (990)')
('TVLampsforless', ' (14)')
('Unknown', ' (722)')
--------------------------爬去完毕ExclusiveBulbsatSat Apr 30 00:32:38 2016----------------------------
('Battery1inc', ' (85)')
('BenQ', ' (237)')
('Buslink', ' (31)')
('Calumet', ' (2)')
('Comoze Lamps', ' (405)')
('CTLAMP', ' (615)')
('Dell', ' (82)')
('Divine Lighting', ' (36)')
('DNGO', ' (63)')
('Dynamic', ' (4)')
('Eiko', ' (140)')
('Electrified', ' (2)')
('ELECTRIFIED LAMPS', ' (24)')
('Electronix Xpress', ' (418)')
('ePharos', ' (502)')
('Epson', ' (631)')
('eReplacements', ' (119)')
('FI Lamps', ' (505)')
('FL Projector Lamp For Mitsubishi', ' (1)')
('G-lamps', ' (43)')
('GE', ' (248)')
('GE Lighting', ' (152)')
('General Electric', ' (53)')
('Generic', ' (1,671)')
('Genie', ' (101)')
('GLAMPS', ' (2)')
('Impact', ' (7)')
('Industrial Lighting Solutions', ' (9)')
('KCL', ' (280)')
('Kodak', ' (1)')
('Lampedia', ' (63)')
('M-Wave', ' (830)')
('Mitsubishi', ' (406)')
('Mitsubishi DLP TV Bulbs', ' (29)')
('Mocpinc', ' (10)')
('MyProjectorLamps', ' (344)')
('Nec', ' (19)')
('Optoma', ' (161)')
('Osram', ' (1,295)')
('Panasonic', ' (245)')
('Philips', ' (988)')
('Powerwarehouse', ' (239)')
('Projector Lamps World', ' (45)')
('Pureglare', ' (107)')
('Samsung', ' (323)')
('ShopJimmy', ' (3)')
('Sony', ' (141)')
('Sylvania', ' (115)')
('Technical Precision', ' (10)')
('Unknown', ' (167)')
('Welch Allyn Compatible', ' (1)')
--------------------------爬去完毕Dynamic LampsatSat Apr 30 00:32:39 2016----------------------------
all over Sat Apr 30 00:32:39 2016
单线程爬虫VS多线程爬虫的效率对比的更多相关文章
- 【Python爬虫实战】多线程爬虫---糗事百科段子爬取
多线程爬虫:即程序中的某些程序段并行执行,合理地设置多线程,可以让爬虫效率更高糗事百科段子普通爬虫和多线程爬虫分析该网址链接得出:https://www.qiushibaike.com/8hr/pag ...
- Java 多线程爬虫及分布式爬虫架构探索
这是 Java 爬虫系列博文的第五篇,在上一篇 Java 爬虫服务器被屏蔽,不要慌,咱们换一台服务器 中,我们简单的聊反爬虫策略和反反爬虫方法,主要针对的是 IP 被封及其对应办法.前面几篇文章我们把 ...
- Java 多线程爬虫及分布式爬虫架构
这是 Java 爬虫系列博文的第五篇,在上一篇 Java 爬虫服务器被屏蔽,不要慌,咱们换一台服务器 中,我们简单的聊反爬虫策略和反反爬虫方法,主要针对的是 IP 被封及其对应办法.前面几篇文章我们把 ...
- python多线程爬虫设计及实现示例
爬虫的基本步骤分为:获取,解析,存储.假设这里获取和存储为io密集型(访问网络和数据存储),解析为cpu密集型.那么在设计多线程爬虫时主要有两种方案:第一种方案是一个线程完成三个步骤,然后运行多个线程 ...
- 抓包分析、多线程爬虫及xpath学习
1.抓包分析 1.1 Fiddler安装及基本操作 由于很多网站采用的是HTTPS协议,而fiddler默认不支持HTTPS,先通过设置使fiddler能抓取HTTPS网站,过程可参考(https:/ ...
- python爬虫之多线程、多进程+代码示例
python爬虫之多线程.多进程 使用多进程.多线程编写爬虫的代码能有效的提高爬虫爬取目标网站的效率. 一.什么是进程和线程 引用廖雪峰的官方网站关于进程和线程的讲解: 进程:对于操作系统来说,一个任 ...
- c#中@标志的作用 C#通过序列化实现深表复制 细说并发编程-TPL 大数据量下DataTable To List效率对比 【转载】C#工具类:实现文件操作File的工具类 异步多线程 Async .net 多线程 Thread ThreadPool Task .Net 反射学习
c#中@标志的作用 参考微软官方文档-特殊字符@,地址 https://docs.microsoft.com/zh-cn/dotnet/csharp/language-reference/toke ...
- 【python3两小时快速入门】入门笔记03:简单爬虫+多线程爬虫
作用,之间将目标网页保存金本地 1.爬虫代码修改自网络,目前运行平稳,博主需要的是精准爬取,数据量并不大,暂未加多线程. 2.分割策略是通过查询条件进行分类,循环启动多条线程. 1.单线程简单爬虫(第 ...
- Python爬虫进阶 | 多线程
一.简介 为了提高爬虫程序效率,由于python解释器GIL,导致同一进程中即使有多个线程,实际上也只会有一个线程在运行,但通过request.get发送请求获取响应时有阻塞,所以采用了多线程依然可以 ...
随机推荐
- avro序列化详细操作
Intellij 15.0.3 Maven avro 1.8.0 Avro是一个数据序列化系统. 它提供以下: 1 丰富的数据结构类型 2 快速可压缩的二进制数据形式 3 存储持久数据的文件容器 4 ...
- 转:Android推送技术研究
Android推送技术研究 字数5208 阅读4026 评论5 喜欢35 前言 最近研究Android推送的实现, 研究了两天一夜, 有了一点收获, 写下来既为了分享, 也为了吐槽. 需要说明的是有些 ...
- [Functional Programming Monad] Map And Evaluate State With A Stateful Monad
We explore our first stateful transaction, by devising a means to echo our state value into the resu ...
- Codeforces Round #307 (Div. 2) D. GukiZ and Binary Operations (矩阵高速幂)
题目地址:http://codeforces.com/contest/551/problem/D 分析下公式能够知道,相当于每一位上放0或者1使得最后成为0或者1.假设最后是0的话,那么全部相邻位一定 ...
- kyeremal-bzoj2038-[2009国家集训队]-小z的袜子(hose)-莫队算法
id=2038">bzoj2038-[2009国家集训队]-小z的袜子(hose) F.A.Qs Home Discuss ProblemSet Status Ranklist Con ...
- <LeetCode OJ> 328. Odd Even Linked List
328. Odd Even Linked List Total Accepted: 9271 Total Submissions: 24497 Difficulty: Easy Given a sin ...
- 【MyBatis学习03】原始dao开发方法及其弊端
上一篇博文总结了一下mybatis的入门,接下来就要开发dao方法了,这篇博文主要总结一下mybatis中原始dao开发的方法,最后并总结一下原始dao开发方法的弊端.mybatis中dao开发应该使 ...
- 牛散NO.3:MACD放之四海 假作真时真亦假
大宗商品日线“异曲同工夺命勾魂枪” 话说有实战意义的技术在任何资本市场里都能产生出神奇的效果.不能说放之四海皆准,但至少起到触类旁通的“牵强”吧.大宗商品特别是在国际市场交易的大宗 商品由于是来自各方 ...
- HBase - Filter - 过滤器的介绍以及使用 | 那伊抹微笑
博文作者:那伊抹微笑 csdn 博客地址:http://blog.csdn.net/u012185296 itdog8 地址链接 : http://www.itdog8.com/thread-214- ...
- Atitit.变量的定义 获取 储存 物理结构 基本类型简化 隐式转换 类型推导 与底层原理 attilaxDSL
Atitit.变量的定义 获取 储存 物理结构 基本类型简化 隐式转换 类型推导 与底层原理 attilaxDSL 1.1. $ 美元字符, php 黑头1 1.2. 默认变量的范围和声明:1 1.3 ...