scrapy_ip_agent
#File name is rotate_useragent
# -*- coding: UTF-8 -*-
import random
import urllib2
import redis
from scrapy.contrib.downloadermiddleware.useragent import UserAgentMiddleware
from CrawlerTools.ScrapyFileSystem.config import *
class RotateUserAgentMiddleware(UserAgentMiddleware):
def __inti__(self,user_agent=""):
self.user_agent=user_agent
def process_request(self,request,spider):
user_agent_list=["Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 "
"(KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1",
"Mozilla/5.0 (X11; CrOS i686 2268.111.0) AppleWebKit/536.11 "
"(KHTML, like Gecko) Chrome/20.0.1132.57 Safari/536.11",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.6 "
"(KHTML, like Gecko) Chrome/20.0.1092.0 Safari/536.6",
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.6 "
"(KHTML, like Gecko) Chrome/20.0.1090.0 Safari/536.6",
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.1 "
"(KHTML, like Gecko) Chrome/19.77.34.5 Safari/537.1",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/536.5 "
"(KHTML, like Gecko) Chrome/19.0.1084.9 Safari/536.5",
"Mozilla/5.0 (Windows NT 6.0) AppleWebKit/536.5 "
"(KHTML, like Gecko) Chrome/19.0.1084.36 Safari/536.5",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 "
"(KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/536.3 "
"(KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/536.3 "
"(KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 "
"(KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 "
"(KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 "
"(KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 "
"(KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.3 "
"(KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 "
"(KHTML, like Gecko) Chrome/19.0.1061.0 Safari/536.3",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.24 "
"(KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24",
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.24 "
"(KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24" ]
ua=random.choice(user_agent_list)
if ua:
request.headers.setdefault("User-Agent",ua)
#Get ip
cfg=config("Redis")
redisId=redis.Redis(cfg["host"],cfg["port"],1,cfg["pwd"])
res=redisId.srandmember("ipPool",1)[0].strip()
urls=res.split(":")
request.meta['proxy'] ="http://"+str(urls[0])+":"+str(urls[1])
# Use the following lines if your proxy requires authentication
#Configuration profile
DOWNLOADER_MIDDLEWARES = {
'scrapy.contrib.downloadermiddleware.useragent.UserAgentMiddleware' : None,
'CrawlerTools.rotate_useragent.RotateUserAgentMiddleware' :400
}
scrapy_ip_agent的更多相关文章
随机推荐
- 第三百零三天 how can I 坚持
今天年会,运气还不错,竟然中了个小奖,一个榨汁机,已经很满足了. 今天加上了她,感觉挺合适,就怕一句话聊不来就带搭不理的了.她很好,懂得知足,不攀比. 弟弟今天把房子首付交了,把贷款办完就算安心了,目 ...
- CoffeeScript学习(3)—— 函数
CoffeeScript函数 如果大家有看我之前关于ES6的箭头函数的话,这一篇也不会很难理解.我们这一次可以说一下,关于两者的一些细微差别. 基本 在CoffeeScript中,任何函数都是用箭头函 ...
- firefox 对相对定位的TD元素渲染错误
<!DOCTYPE html> <html> <head> <title></title> <meta http-equiv=&quo ...
- HDU 5676 ztr loves lucky numbers (模拟)
ztr loves lucky numbers 题目链接: http://acm.hust.edu.cn/vjudge/contest/121332#problem/I Description ztr ...
- jquery easyui将form表单元素的值序列化成对象
function serializeObject(form){ var o={}; $.each(form.serializeArray(),function(index){ if(o[this['n ...
- AutoCAD.NET二次开发:扩展数据之XData
结果缓存——ResultBuffer 结果缓存即 Autodesk.AutoCAD.DatabaseServices.ResultBuffer 类型,使用 ResultBuffer 对象时需要提供一个 ...
- ASP.NET项目中引用全局dll
在ASP.NET项目中,有些dll是全局dll,也就是说,没有放在单个项目的引用中.它们一般存放在如下目录C:\Windows\assembly中 这个时候,我们需要在单个项目中引用他们,应该如何做呢 ...
- POJ 3177 Redundant Paths(强连通分量)
题目链接:http://poj.org/problem?id=3177 题目大意是一个无向图给你n个点m条边,让你求出最少加多少条边 可以让任意两个点相通两条及以上的路线(每条路线点可以重复,但是每条 ...
- ABA problem
多线程及多进程编程同步时可能出现的问题,如果一个值被P1读取两次,两次的值相同,据此判断该值没有被修改过,但该值可能在两次读取之间被P2修改为另外一个value,并在P1再次读取之前修改回了原值.P1 ...
- AngularJS~大话开篇
AngularJS是一款优秀的前端JS框架,已经被用于Google的多款产品当中.AngularJS有着诸多特性,最为核心的是:MVVM.模块化.自动化双向数据绑定.语义化标签.依赖注入.等等. 前端 ...