python2.x urllib2和urllib的使用

1.最简单用法

　　urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,...)

 import urllib2

 import urllib

 response = urllib2.urlopen("http://www.baidu.com")

 print 'getcode():',response.getcode()

 print 'geturl():',response.geturl()

 print 'url:',response.url

 print 'headers:\n',response.headers

 print 'msg:',response.msg

 #-------------------------------------out--------------------------------------

 getcode(): 200

 geturl(): http://www.baidu.com

 url: http://www.baidu.com

 headers:

 Date: Thu, 29 Dec 2016 06:28:36 GMT

 Content-Type: text/html; charset=utf-8

 Transfer-Encoding: chunked

 Connection: Close

 Vary: Accept-Encoding

 Set-Cookie: BAIDUID=9A1E663B4C3AB33D11266F0D865A1F59:FG=1; expires=Thu, 31-Dec-37 23:55:55 GMT; max-age=2147483647; path=/; domain=.baidu.com

 Set-Cookie: BIDUPSID=9A1E663B4C3AB33D11266F0D865A1F59; expires=Thu, 31-Dec-37 23:55:55 GMT; max-age=2147483647; path=/; domain=.baidu.com

 Set-Cookie: PSTM=1482992916; expires=Thu, 31-Dec-37 23:55:55 GMT; max-age=2147483647; path=/; domain=.baidu.com

 Set-Cookie: BDSVRTM=0; path=/

 Set-Cookie: BD_HOME=0; path=/

 Set-Cookie: H_PS_PSSID=21858_1464_21112_17001_21553_20930; path=/; domain=.baidu.com

 P3P: CP=" OTI DSP COR IVA OUR IND COM "

 Cache-Control: private

 Cxy_all: baidu+0ba0b09e0fa305471b5e3b42c352570f

 Expires: Thu, 29 Dec 2016 06:27:54 GMT

 X-Powered-By: HPHP

 Server: BWS/1.1

 X-UA-Compatible: IE=Edge,chrome=1

 BDPAGETYPE: 1

 BDQID: 0x889c1bcd00004be7

 BDUSERID: 0

 msg: OK

获取html内容

 print response.read()     #以str字符串形式返回整个页面

 print response.readline() #每执行一次返回一行

 print response.readlines() #以列表形式返回

2. 构造Request 设置headers

 def set_headers():

     #构造Request,设置headers

     #__init__(self, url, data=None, headers={},origin_req_host=None, unverifiable=False)

     import urllib2

     headers = {'User-Agent':'liubi-Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36'}

     request = urllib2.Request("http://localhost:5000/urllib2testget",headers=headers)

     response = urllib2.urlopen(request)

     print request.headers

     #追加一个header

     request.add_header("addheader","nice")

     response = urllib2.urlopen(request)

     print request.headers

 set_headers()

 #--------------------------------输出:

 {'User-agent': 'liubi-Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36'}

 {"a": "", "": ""}

 ------------------------------------------------

 {'Addheader': 'nice', 'User-agent': 'liubi-Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36'}

 {"a": "", "": ""}

3.发送get请求,发送post请求

 def get_post():

     #get方式

     import urllib2

     import urllib

     headers = {'User-Agent':'liu bi'}

     values = {"username":"diaosir_get","password":"diao123_get"}

     data = urllib.urlencode(values)

     print '---------------------get:'

     url = "http://localhost:5000/urllib2testget"

     get_url=url+"?"+data

     request = urllib2.Request(get_url,headers=headers)

     response = urllib2.urlopen(request)

     print json.loads(response.read())

     print '---------------------post:'

     url = "http://localhost:5000/urllib2testpost"

     request = urllib2.Request(url,data,headers=headers)

     response = urllib2.urlopen(request)

     print json.loads(response.read())

 get_post()

 #---------------------------------------------------------输出:

 ---------------------get:

 {u'username': u'diaosir_get', u'password': u'diao123_get'}

 ---------------------post:

 {u'username': u'diaosir_get', u'password': u'diao123_get'}

post&get

4.代理模式设置

def set_proxies():

    #1.proxy_handler

    #2.创建operner

    #3.安装opener[非必须]

    #4.拿operner去请求url

    enable_proxy = True

    proxy_handler = urllib2.ProxyHandler({"http":'http://120.24.73.165:3128'})

    null_proxy_handler = urllib2.ProxyHandler({})

    if enable_proxy:

        opener = urllib2.build_opener(proxy_handler)#挂载opener

    else:

        opener = urllib2.build_opener(null_proxy_handler)

    request = urllib2.Request('http://www.baidu.com')

    print '---------------------不使用代理'

    response = urllib2.urlopen(request)

    print response.getcode(),request.host

    print '---------------------使用代理'

    response = opener.open(request)

    print response.getcode(),request.host

#----------------------------------------------------------输出

---------------------不使用代理

200 www.baidu.com

---------------------使用代理

200 120.24.73.165:3128

5.debug模式, 代码中urllib2.build_opener中的httpsHandler需要去掉，

 def debug_set():

     #代理，调试

     import  urllib2,urllib

     proxy_handler = urllib2.ProxyHandler({"http":'http://192.168.1.108:89'})

     #debuglog的使用

     httpHandler = urllib2.HTTPHandler(debuglevel=1)

     opener = urllib2.build_opener(httpHandler, httpsHandler,)

     urllib2.install_opener(opener)

     request = urllib2.Request('http://127.0.0.1:5000/urllib2testget?a=2&b=3',headers={'User-Agent':'liubi00'})

     response = opener.open(request)

     print response.getcode(),response.read()

 #-------------------------------------------输出:

 send: 'GET /urllib2testget?a=2&b=3 HTTP/1.1\r\nAccept-Encoding: identity\r\nHost: 127.0.0.1:5000\r\nConnection: close\r\nUser-Agent: liubi00\r\n\r\n'

 reply: 'HTTP/1.0 200 OK\r\n'

 header: Content-Type: text/html; charset=utf-8

 header: Content-Length: 20

 header: Server: Werkzeug/0.11.11 Python/2.7.12

 header: Date: Fri, 30 Dec 2016 15:12:40 GMT

 200 {"a": "", "b": ""}

6.获取cookie存到cookie.txt

import cookielib

import  urllib2

def get_cookie():

    filename = 'cookie.txt'

    #声明一个MozillaCookieJar对象实例来保存cookie，之后写入文件

    cookie = cookielib.MozillaCookieJar(filename)

    #利用urllib2库的HTTPCookieProcessor对象来创建cookie处理器

    handler = urllib2.HTTPCookieProcessor(cookie)

    #通过handler来构建opener

    opener = urllib2.build_opener(handler,)

    request = urllib2.Request('http://www.baidu.com')

    request.add_header('User-Agent','fuckyou')

    response = opener.open(request)

    #保存cookie到文件

    cookie.save(ignore_discard=True, ignore_expires=True)

    print response.getcode()

get_cookie()

#----------------------------------------------输出:

200

7.通过cookie请求，更多查看http://www.cnblogs.com/sysu-blackbear/p/3629770.html

 import cookielib

 import urllib2

 def use_cookie():

     #cookie--从cookies.txt读取cookies,携带cookies请求

     cookie_file = 'cookie.txt'

     #创建MozillaCookieJar实例对象

     cookie = cookielib.MozillaCookieJar(cookie_file)

     #从文件中读取cookie内容到变量

     cookie.load( ignore_discard=True, ignore_expires=True)

     #创建请求的request

     req = urllib2.Request("http://www.baidu.com")

     #利用urllib2的build_opener方法创建一个opener

     opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie))

     response = opener.open(req)

     print response.read()

8.异常处理

 def deal_errors():

     #异常处理

     import urllib2

     #HTTPError

     req = urllib2.Request('http://blog.csdn.net/cqcre')

     try:

         urllib2.urlopen(req)

     except urllib2.HTTPError, e:

         print e.code

         print e.reason

     #URLError

     requset = urllib2.Request('http://www.xxxxx.com')

     try:

         urllib2.urlopen(requset)

     except urllib2.URLError, e:

         print e.reason

     #HTTPERROR&URLERROR

     req = urllib2.Request('http://blog.csdn.net/cqcre')

     try:

         urllib2.urlopen(req)

     except urllib2.URLError, e:

         if hasattr(e,"code"):

             print e.code

         if hasattr(e,"reason"):

             print e.reason

     else:

         print "OK"

python2.x urllib2和urllib的使用的更多相关文章

Python2/3中的urllib库
urllib库对照速查表 Python2.X Python3.X urllib urllib.request, urllib.error, urllib.parse urllib2 urllib.re ...
Python2和Python3中urllib库中urlencode的使用注意事项
前言在Python中,我们通常使用urllib中的urlencode方法将字典编码,用于提交数据给url等操作,但是在Python2和Python3中urllib模块中所提供的urlencode的包 ...
python2核心类库：urllib、urllib2的区别和使用
urllib/urllib2都是接受URL请求的相关模块区别:1.urllib2可以接受一个Request类的实例来设置URL请求的headers,urllib仅可以接受URL.这意味着,你不可以伪装 ...
urllib与urllib2的学习总结(python2.7.X): python urllib与urllib2
https://www.cnblogs.com/wly923/archive/2013/05/07/3057122.html
python urllib2与urllib
1.urllib2可以接受一个Request对象,并以此可以来设置一个URL的headers,但是urllib只接收一个URL. 2.urllib模块可以提供进行urlencode的方法,该方法用于G ...
Python把json格式的string对象转变成dict对象操作、Python3不能使用urllib2、urllib.parse.urlencode(params).encode(encoding='UTF8')
son格式的string对象转变成dict对象操作 content=eval(content)#json字典转化 Python3不能使用urllib2 直接使用urllib.request替换urll ...
Python2 基于urllib2 的HTTP请求类
一个利用urllib2模块编写的下载器,虽然有了requests模块,但是毕竟标准库 import urllib2,random class strong_down(): def __init__(s ...
python2中urllib2模块带cookies使用方法
#!/usr/bin/python # coding=utf-8 #############方式1######################### import urllib2 cookie = & ...
python2.7 urllib2 爬虫
# _*_ coding:utf-8 _*_ import urllib2import cookielibimport randomimport refrom bs4 import Beautiful ...

随机推荐

cocos2d的-X- luaproject的LUA脚本加密
2014/1/26 更新近期又发现了一个非常easy的方法,事实上coco2dx已经给我们提供设置loader的方法. 注意:有个局限性,在非android平台下调用pEngine->exec ...
jsonp总结
由于“同源策略”的限制,ajax不能做跨域请求,jsonp是当下解决跨域请求最流行的方案,来个例子(index.html): <!doctype html> <html lang=& ...
jquery调用wcf案例
----------根据其他网友总结 1.在契约接口上添加:[WebInvoke(RequestFormat=WebMessageFormat.Json,ResponseFormat=WebMessa ...
hdu 1239 Calling Extraterrestrial Intelligence Again (暴力枚举)
Calling Extraterrestrial Intelligence Again Time Limit: 2000/1000 MS (Java/Others) Memory Limit: ...
C#控件怎样获取，和失去焦点的处理
publicForm1() { InitializeComponent(); textBox1.Enter+=newEventHandler(textBox1_Enter);//获得焦点事件 text ...
c#二进制、十进制、16进制之间的转换
//十进制转二进制 Console.WriteLine(Convert.ToString(69, 2)); //十进制转八进制 Console.WriteLine(Convert.ToString(6 ...
golang切片slice
切片slice是引用类型 len()函数获取元素的个数 cap()获取数组的容量 1.申明方式 (1)var a []int 与数组不同的是他不申明长度(2)s2 := make([]int, 3, ...
Hive中Bucket的应用
网友南京-李先森给了他收集的一些资料,如下: Buckets 对指定列计算 hash,根据 hash 值切分数据,目的是为了并行,每一个 Bucket 对应一个文件.如将 user 列分散至 32 个 ...
C# list distinct操作
使用代理实现对C# list distinct操作范型在c#编程中经常使用,而经常用list 去存放实体集,因此会设计到对list的各种操作,比较常见的有对list进行排序,查找,比较,去重复. ...
ios学习笔记之2天来总结
学了2天,小结下. ios的基本代码执行流程: 与java的基本异同: 异: 1.基类:java中Object是所有类的父类,而objective-c的根类为NSObject 2.默认访问类型:jav ...

python2.x urllib2和urllib的使用

python2.x urllib2和urllib的使用的更多相关文章

随机推荐

热门专题