python 模拟浏览器

想用python模拟浏览器访问web的方法测试些东西，有哪几种方法呢？

一类：单纯的访问web，不解析其js，css等。

1. urllib2

#-*- coding:utf-8 -*

import urllib2

def Furllib2(ip,port,url,timeout):

    proxydict = {}

    proxydict['http'] = "http://%s:%s"%(ip,port)

    print proxydict

    proxy_handler = urllib2.ProxyHandler(proxydict)

    opener = urllib2.build_opener(proxy_handler)

    opener.addheaders = [('User-agent', 'Mozilla/5.0')]

    urllib2.install_opener(opener)

    try:

        response = urllib2.urlopen(url,timeout=timeout)

        print response.geturl()

        print response.getcode()

        print response.info()

        print response.read()

        return True

    except:

        print 'some errors occored' + '-'*50

        return 0

def main():

    proxyip = '14.18.16.69'

    proxyport = '80'

    proxy = 'http://2.181.1.127:80'

    url = 'http://www.cnblogs.com/'

    timeout = 4

    print Furllib2(proxyip,proxyport,url,timeout)

if __name__ == "__main__":

    main()

2. mechanize(与网站的自动化交互)

http://wwwsearch.sourceforge.net/mechanize/doc.html

def Fmechanize(url):

    cookies = mechanize.CookieJar()

    opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(cookies))

    try:

        r = opener.open(url)  # GET

        # r = opener.open("http://example.com/", data)  # POST

        print r.geturl()

        print r.info()

        return True

    except:

        return 0

二类：模拟浏览器，使用firefox等的浏览器引擎，支持js，css等。

1. selenium 的firefox或者chrome等驱动，但是由于要打开一个浏览器，所以会比较慢（浏览器驱动可以到selenium官网上下载，也可以到firefox插件出搜索）

def Fselenium_firefox(ip,port,url,timeout):

    try:

        profile = webdriver.FirefoxProfile()

        profile.set_preference('network.proxy.type', 1)

        profile.set_preference('network.proxy.http',ip)

        profile.set_preference('network.proxy.http_port', port)

        profile.update_preferences()

        driver = webdriver.Firefox(profile,timeout = timeout)

    except Exception:

        print traceback.print_exc()

        return 0

        pass

    try:

        driver.get(url)

        time.sleep(5)

        cookies= driver.get_cookies()

        print cookies

        # driver.get()

        driver.quit()

        return 1

    except Exception:

        traceback.print_exc()

        # print 'not have Union allianceid'

        driver.quit()

        return 0

2. selenium :headless test使用selenium+ phantomjs驱动，无需打开浏览器，但是支持js的模拟浏览器动作，也就说说和你手工打开是没有区别的。

http://selenium.googlecode.com/git/docs/api/py/api.html

def Fselenium_phantomjs(ip,port,url,timeout):

    try:

        proxyip = '%s%s%s%s'%('--proxy=',ip,':',port)

        proxyport = '--proxy-type=http'

        service_args = []

        service_args.append(proxyip)

        service_args.append(proxyport)

        print service_args

        driver = webdriver.PhantomJS(service_args = service_args)
　　     #driver = webdriver.PhantomJS("/root/phantomjs-1.9.7-linux-x86_64/bin/phantomjs",service_args = service_args)制定phantomjs的位置

        driver.set_page_load_timeout(timeout)

        driver.get(url)

        time.sleep(4)

    except Exception:

        traceback.print_exc()

    try:

        geturl = driver.current_url

        print driver.current_url

        return True

    except Exception:

        traceback.print_exc()

        geturl = None

        return 0

3. qt，网上戗来的代码

http://qt-project.org/wiki/PySide#PySide.QtWebKit.PySide.QtWebKit.QWebView.url

from PyQt4 import QtCore, QtGui, QtWebKit, QtNetwork

class cookieJar(QtNetwork.QNetworkCookieJar):

    def __init__(self, cookiesKey, parent=None):

        super(cookieJar, self).__init__(parent)

        self.mainWindow = parent

        self.cookiesKey = cookiesKey

        cookiesValue    = self.mainWindow.settings.value(self.cookiesKey)       

        if cookiesValue:

            cookiesList = QtNetwork.QNetworkCookie.parseCookies(cookiesValue)

            self.setAllCookies(cookiesList)

   # def setCookiesFromUrl (self, cookieList, url):

    #    cookiesValue = self.mainWindow.settings.value(self.cookiesKey)

     #   cookiesArray = cookiesValue if cookiesValue else QtCore.QByteArray()

      #  for cookie in cookieList:

       #     cookiesArray.append(cookie.toRawForm() + "\n")

        #self.mainWindow.settings.setValue(self.cookiesKey, cookiesArray)

        #return super(cookieJar, self).setCookiesFromUrl(cookieList, url)

    def deleteCookie(self,cookieList):

	cookie = []

	self.mainWindow.settings.value(cookie)

class webView(QtWebKit.QWebView):

    def __init__(self, cookiesKey, url, parent=None):

        super(webView, self).__init__(parent)

        self.cookieJar = cookieJar(cookiesKey, parent)

        self.page().networkAccessManager().setCookieJar(self.cookieJar)

class myWindow(QtGui.QMainWindow):

    def __init__(self, parent=None):

        super(myWindow, self).__init__(parent)

        self.cookiesKey = "cookies"

        self.centralwidget = QtGui.QWidget(self)

        self.tabWidget = QtGui.QTabWidget(self.centralwidget)

        self.tabWidget.setTabsClosable(True)

        self.verticalLayout = QtGui.QVBoxLayout(self.centralwidget)

        self.verticalLayout.addWidget(self.tabWidget)

        self.actionTabAdd = QtGui.QAction(self)

        self.actionTabAdd.setText("Add Tab")

        self.actionTabAdd.triggered.connect(self.on_actionTabAdd_triggered)

        self.lineEdit = QtGui.QLineEdit(self)

        self.lineEdit.setText("http://www.example.com")

        self.toolBar = QtGui.QToolBar(self)

        self.toolBar.addAction(self.actionTabAdd)

        self.toolBar.addWidget(self.lineEdit)

        self.addToolBar(QtCore.Qt.ToolBarArea(QtCore.Qt.TopToolBarArea), self.toolBar)

        self.setCentralWidget(self.tabWidget)

        self.settings = QtCore.QSettings()

    @QtCore.pyqtSlot()

    def on_actionShowCookies_triggered(self):

        webView = self.tabWidget.currentWidget()

        listCookies = webView.page().networkAccessManager().cookieJar().allCookies()

        for cookie in  listCookies:

            print cookie.toRawForm()

    @QtCore.pyqtSlot()

    def on_actionTabAdd_triggered(self):

        url = self.lineEdit.text()

        self.addNewTab(url if url else 'about:blank')

    def addNewTab(self, url):

        tabName = u"Tab {0}".format(str(self.tabWidget.count()))

        tabWidget= webView(self.cookiesKey, url, self)

        tabWidget.loadFinished.connect(self.on_tabWidget_loadFinished)

        tabWidget.load(QtCore.QUrl(url))

        tabIndex = self.tabWidget.addTab(tabWidget, tabName)

        self.tabWidget.setCurrentIndex(tabIndex)

    @QtCore.pyqtSlot()

    def on_tabWidget_loadFinished(self):

        cookies2 = self.settings.value(self.cookiesKey)

if __name__ == "__main__":

    import sys

    app = QtGui.QApplication(sys.argv)

    app.setApplicationName('myWindow')

    main = myWindow()

    main.resize(666, 333)

    main.show()

    sys.exit(app.exec_())

4. qt-headless

http://qt-project.org/wiki/PySide#PySide.QtWebKit.PySide.QtWebKit.QWebView.url

import sys

from PyQt4.QtGui import *

from PyQt4.QtCore import *

from PyQt4.QtWebKit import *  

class Render(QWebPage):

  def __init__(self, url):

    self.app = QApplication(sys.argv)

    QWebPage.__init__(self)

    self.loadFinished.connect(self._loadFinished)

    self.mainFrame().load(QUrl(url))

    self.app.exec_()  

  def _loadFinished(self, result):

    self.frame = self.mainFrame()

    self.app.quit()  

url = 'http://webscraping.com'

r = Render(url)

html = r.frame.toHtml()

print html

5. splinter :打开浏览器，模拟操作，python的

http://splinter.cobrateam.info/docs/tutorial.html

>>> from splinter import Browser

>>> browser = Browser()

>>> url = "http://www.cnblogs.com"

>>> browser.visit(url)

具体用哪个要看你有什么具体的需求了

python 模拟浏览器的更多相关文章

第14.7节 Python模拟浏览器访问实现http报文体压缩传输
一. 引言在<第14.6节 Python模拟浏览器访问网页的实现代码>介绍了使用urllib包的request模块访问网页的方法.但上节特别说明http报文头Accept-Encodin ...
Python模拟浏览器多窗口切换
# 模拟浏览器多窗口切换 # 代码中引入selenium版本为:3.4.3 # 通过Chrom浏览器访问发起请求 # Chrom版本:59 ,chromdriver:2.3 # 需要对应版本的Chro ...
Python模拟浏览器前进后退操作
# 模拟浏览器前进后退操作 # 代码中引入selenium版本为:3.4.3 # 通过Chrom浏览器访问发起请求 # Chrom版本:59 ,chromdriver:2.3 # 需要对应版本的Chr ...
python模拟浏览器保存Cookie进行会话
#! /usr/bin/env python # -*-coding:utf- -*- import urllib import urllib2 import cookielib class NetR ...
用Python模拟浏览器操作
两种思绪三种要领: 用pamie.建议不要使用,因为pamie为小我私人开发,里面的bug比力多,并且是直接使用win32com体式格局的,如果ie不警惕修改了,后果很严重.另外,pamie3使用的是 ...
python模拟浏览器爬取数据
爬虫新手大坑:爬取数据的时候一定要设置header伪装成浏览器!!!! 在爬取某财经网站数据时由于没有设置Header信息,直接被封掉了ip 后来设置了Accept.Connection.User-A ...
python：爬虫1——实战（下载一张图片、用Python模拟浏览器，通过在线的有道词典来对文本翻译）
一.下载一只猫 import urllib.request response = urllib.request.urlopen("http://cdn.duitang.com/uploads ...
python 模拟浏览器登陆coursera
import requests import random import string def randomString(length): return ''.join(random.choice(s ...
Python模拟浏览器上传文件脚本（Multipart/form-data格式）
http协议本身的原始方法不支持multipart/form-data请求,这个请求由原始方法演变而来的. multipart/form-data的基础方法是post,也就是说是由post方法来组合实 ...

随机推荐

基于php下载文件的详解
本篇文章是对php下载文件进行了详细的分析介绍,需要的朋友参考下 php下载文件,比如txt文件. 出现的效果就是,弹出浏览器自带的下载框,出现另存为操作.有时候会出现内存溢出和超时的现象. 超时的话 ...
PHP操作MongoDB简明教程(转)
转自:http://blog.sina.com.cn/s/blog_6324c2380100ux2m.html MongoDB是最近比较流行的NoSQL数据库,网络上关于PHP操作MongoDB的资料 ...
Webserver issues | PHP manager for IIS
4 down vote accepted In order to successfully install the PHP manager for IIS 8, you need the .NET 3 ...
在Centos7上安装漏洞扫描软件Nessus
本文摘要:简单叙述了在Centos7上安装Nessus扫描器的过程 Nessus 是目前全世界最多人使用的系统漏洞扫描与分析软件,Nessus的用户界面是基于Web界面来访问Nessus漏洞扫描器 ...
IDEA操作GIT说明
公司的代码库从TFS升级到了GIT,我们的自动化测试代码就需要迁移到git上.操作如下: 1.安装GIT 安装完成后,在IDEA中配置git安装路径 2.在本地磁盘新建一个空目录,例如:D:\Wo ...
URI、URL以及URN的区别
首先,URI,是uniform resource identifier,统一资源标识符,用来唯一的标识一个资源.而URL是uniform resource locator,统一资源定位器,它是一种具体 ...
linux 标准io笔记
三种缓冲 1.全缓冲:在缓冲区写满时输出到指定的输出端. 比如对磁盘上的文件进行读写通常是全缓冲的. 2.行缓冲:在遇到'\n'时输出到指定的输出端. 比如标准输入和标准输出就是行缓冲, 回车后就会进 ...
Linux磁盘空间爆满，MySQL无法启动
OS: Cent OS 6.3 DB: 5.5.14 看到一个帖子,在服务器上安装了oracle和mysql数据库,mysql数据库忘记开启innodb_file_per_table,导致插入测试数据 ...
ApacheBench 使用教程
ab压力测试 ab是Apache超文本传输协议(HTTP)的性能测试工具.其设计意图是描绘当前所安装的Apache的执行性能. 主要是显示你安装的Apache每秒可以处理多少个请求. 格式: ./ab ...
关于Oracle表空间数据文件自增长的一些默认选项
昨天,一个同事请教了一些关于Oracle表空间数据文件自增长的问题,解答过程中顺便整理起来,以后其他同事有同样的疑问时可以直接查阅. 实验内容: 创建MYTEST表空间,默认不开启自增长. 给MYTE ...

python 模拟浏览器

python 模拟浏览器的更多相关文章

随机推荐

热门专题