【10】Python urllib、编码解码、requests、多线程、多进程、unittest初探、__file_

1 urllib

urllib是一个标准模块，直接import就可以使用

1.1get请求

 from urllib.request import urlopen

 url='http://www.nnzhp.cn/archives/423'
 res=urlopen(url).read()  #发送get请求,返回响应的类型是bytes
 f = open('a.html','w',encoding='utf-8')
 f.write(res.decode()) #将bytesde类型的res decode成str
 f.close()

1.2post请求

 from urllib.request import urlopen
 from urllib.parse import urlencode
 import json

 url='http://api.nnzhp.cn/api/user/login'
 data = {"username":"niuhanyang","passwd":'aA123456'} #data是字典格式
 data = urlencode(data) #请求参数需要进行url编码
 res = urlopen(url,data.encode()).read() #发送post请求
 print(res.decode()) #请求结果是字节流，需要将其解码
 d = json.loads(res.decode()) #处理json
 print(d.get('login_info').get('sign'))

2 编码解码

python3默认编码为unicode(utf-8可以看成是unicode的一个扩展集)，由str类型进行表示
二进制数据使用byte类型表示

所以不会将str和byte混在一起。在实际应用中我们经常需要将两者进行互转：

字符串通过编码转换为字节码，字节码通过解码转换为字符串

str--->(encode)--->bytes
bytes--->(decode)--->str

3 requests

3.1get请求

 req = requests.get('http://www.nnzhp.cn',params={'username':'xxx'},cookies={'k':'v'},
                    headers={'User-Agent':'Chrome'},verify=False,timeout=3)  #发送get请求，data是请求数据，
                         # cookies是要发送的cookies，headers是请求头信息，verify=False是https请求的时候要加上，要不然会报错。
                         #timeout参数是超时时间，超过几秒钟的话，就不再去请求它了，会返回timeout异常
                         #这些都可以不写，如果有的话，可以加上

下载MP3实例

 import requests

 MP3_url='http://qiniuuwmp3.changba.com/1113525663.mp3'
 res = requests.get(MP3_url)
 mp3 = res.content  #返回的二进制内容
 f=open('sample.mp3','wb')
 f.write(mp3)
 f.close()

3.2post请求

 req2 = requests.post('http://www.nnzhp.cn',data={'username':'xxx'},cookies={'k':'v'},
                     headers={'User-Agent':'Chrome'},files={'file':open('a.txt')},timeout=3) #发送post请求，data是请求数据，
                     # cookies是要发送的cookies，headers是请求头信息，files是发送的文件，verify=False是https请求的时候要加上，
                     # 要不然会报错,timeout参数是超时时间，超过几秒钟的话，就不再去请求它了，会返回timeout异常
                     #这些都可以不写，如果有的话，可以加上

发送请求，data为字典

1 import requests
2
3 url='http://api.nnzhp.cn/api/user/login'
4 res = requests.post(url,data={"username":"niuhanyang",
5                           "passwd":"aA123456"})
6 print(res.json()) #返回的就是一个字典
7 print(res.text)  #返回字符串，响应的源码
8 print(res) #返回如<Response [200]>

{'error_code': 0, 'login_info': {'login_time': '20181119211507', 'sign': '52c62ca2a17ed581a6eb4888bf574f43', 'userId': 9786}}
{
        "error_code": 0,
        "login_info": {
                "login_time": "20181119211507",
                "sign": "52c62ca2a17ed581a6eb4888bf574f43",
                "userId": 9786
        }
}
<Response [200]>

发送请求，data为json

 url='http://api.nnzhp.cn/api/user/add_stu'

 data={","grade":"金牛座","name":"郑重"}
 res = requests.post(url,json=data)
 print(res.json())

上传文件实例

 import requests

 url='http://api.nnzhp.cn/api/file/file_upload'
 res = requests.post(url,files={'file':open('g.mp3','rb')})
 print(res.json())

3.3返回值

res　　#返回如<Response [200]>
res.status_code 　　#返回状态码，如200
res.json()　　#返回字典。不需要手动decode()转码。如果res结果是json格式，可以使用json()将json串转成字典格式。如果res结果不是json的话，不能使用json()
res.text　　#返回字符串，响应的源码。不能用于下载文件等。
res.content　　#返回二进制。主要用于流媒体文件、图片文件的下载。
res.headers　　#返回响应的所有headers
res.cookies　　#返回响应的所有cookies

4 多线程

进程与线程：

进程就是一组资源的集合。
线程是在进程里面具体干活的。
一个进程里面至少一个线程，这个线程就是主线程。

线程本质：

你电脑的有CPU是几核就只能同时运行几个任务（线程）。
python里面的多线程，其实利用不了多核CPU（只在一核运行）。

4.1 主线程等待子线程方法一

 import threading
 import time

 def run():
     time.sleep(5)
     print('over')
 ths = []
 start_time = time.time()
 #for循环等待子线程结束
 for i in range(4):
     t = threading.Thread(target=run)#实例化一个线程
     t.start() #启动这个线程
     ths.append(t) #将线程加入列表
 #循环等待每个子线程，所有子线程跑完才会往下走
 for t in ths:
     t.join() #主线程等待子线程结束后再继续往下运行
 end_time = time.time()
 print('耗时：（秒）', end_time-start_time)

over
over
over
over
耗时：（秒） 5.001286029815674

4.2主线程等待子线程方法二

 import threading
 import time

 def run():
     time.sleep(5)
     print('over...')

 #使用while循环等待子线程
 start_time = time.time()

 for i in range(20):
     t = threading.Thread(target=run)#实例化一个线程
     t.start()#启动这个线程

 print('之前的线程数：',threading.activeCount())

 while threading.activeCount() != 1: #当前线程数>1时，不停地循环；只剩下主线程，则会继续往下走
     pass

 print('现在线程数：', threading.activeCount())

 end_time = time.time()
 print('耗时：（秒）', end_time-start_time)

之前的线程数： 21
over...
over...
over...
over...
over...
over...
over...
over...
over...
over...
over...
over...
over...
over...
over...
over...
over...
over...
over...
over...
现在线程数： 1
耗时：（秒） 5.174295902252197

4.3使用继承来启动多线程

 import threading
 import time

 class MyThread(threading.Thread): #继承threading.Thread
     def run(self):
         #这个方法必须叫run
         time.sleep(5)
         print('run..')

 for i in range(5):
     t = MyThread()
     t.start()

4.4获取多线程运行函数函数的返回值

 import requests
 import threading

 all_res = [] #创建一个list用来存放返回值
 def get_name(name):
     r = requests.get('http://api.nnzhp.cn/api/user/stu_info',
                      params={'stu_name':name})
     res = r.json()
     all_res.append(res) #将返回值加入到list，也可以存入数据库

 for i in range(10):
     t = threading.Thread(target=get_name,args=(i,)) #args后面跟的是元组
     t.start()

 while threading.active_count()!=1: #等待子线程全部运行结束后再打印结果
     pass

 print(all_res)

4.5守护线程

 import threading
 import time

 def hhh():
     time.sleep(5)
     print('hhhh')

 for i in range(10):
     t = threading.Thread(target=hhh)
     t.setDaemon(True) #设置子线程为守护线程
     t.start()

 print('秦始皇死了')

秦始皇死了

主线程启动完子线程后，继续往下走，打印完‘请始皇死了’后，主线程停止，同时没运行完的子线程也会停止。

何时使用？

4.6 GIL全局解释器锁

4.7锁

方法一

 #coding=utf-8
 #如果是Python2，则需要加上上面这段
 import threading
 num = 0
 lock = threading.Lock()  #申请一把锁
 def xiaojun():
     global num
     lock.acquire() #加锁
     num+=1
     lock.release() #解锁

 for i in range(1000):
     t = threading.Thread(target=xiaojun)
     t.start()
 while threading.active_count()!=1:
     pass
 print(num)
 #多个线程同时操作同一个数据的时候一定要加锁

方法二

import threading
num = 0
lock = threading.Lock()  #申请一把锁
def xiaojun():
    global num
    with lock: #第二种方法，类似with open
        num += 1

for i in range(1000):
    t = threading.Thread(target=xiaojun)
    t.start()
while threading.active_count()!=1:
    pass
print(num)

Python3中有优化，如果不加锁也可以，但是还是建议加上

5 多进程

多进程可以利用多核cpu。

CPU密集型任务多进程，CPU干活
IO密集型任务多线程，磁盘IO、网络IO(input,output)

 from multiprocessing import Process
 import time
 import threading
 def run_thread():
     time.sleep(5)
     print('%s在运行'%threading.current_thread())

 def run():
     for i in range(10):
         t = threading.Thread(target=run_thread)
         t.start()

 if __name__ == '__main__': #windows要加上main
     for i in range(10):
         p = Process(target=run) #起动进程
         p.start()
         print(p.pid)

6 unittest初探

unittest是python自带的单元测试工具，和junit，phpunit类似

 import unittest

 def calc(a,b):
     return  a+b

 class MyTest(unittest.TestCase):
     def testa(self): #方法名前要加test
         res = calc(1,2)
         self.assertEqual(3,res,msg='预期结果和实际结果不一致')
     def testb(self):
         res = calc(0,1)
         self.assertEqual(2,res,msg='预期结果和实际结果不一致')

 unittest.main()

.F
======================================================================
FAIL: testb (__main__.MyTest)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "D:/我的文档/day12/单元测试.py", line 18, in testb
    self.assertEqual(2,res,msg='预期结果和实际结果不一致')
AssertionError: 2 != 1 : 预期结果和实际结果不一致

----------------------------------------------------------------------
Ran 2 tests in 0.002s

FAILED (failures=1)

7 file变量

变量__file__表示文件本身，输出的是一个文件名(但在pycharm中会自动输出绝对路径)

使用时需要将其转换成绝对路径

 #例：将父目录的父目录设置为环境变量
 import os,sys
 BAE_PATH  = os.path.dirname(
 os.path.dirname(os.path.abspath(__file__))
 )
 sys.path.insert(0,BAE_PATH)

8 jsonpath

可以用来处理字典

 import jsonpath
 dic =  {
     "error_code": 0,
     "login_info": {
       "userId": 1,
       "sign": "d58e3582afa99040e27b92b13c8f2280",
       "
     }
   }
 s1 = jsonpath.jsonpath(dic, '$.login_info.userId') #返回一个列表，$表示根节点对象
 s2 = jsonpath.jsonpath(dic, '$..userId')#模糊匹配,使用两个点
 s3 = jsonpath.jsonpath(dic, '$.login_info..userId')
 s4 = jsonpath.jsonpath(dic, '$..TTTTTTTTTTT') #查询不存在的参数，返回False
 print(s1)
 print(s2)
 print(s3)
 print(s4)

[1]
[1]
[1]
False

【10】Python urllib、编码解码、requests、多线程、多进程、unittest初探、file、jsonpath的更多相关文章

python GIL全局解释器锁,多线程多进程效率比较,进程池,协程,TCP服务端实现协程
GIL全局解释器锁 ''' python解释器: - Cpython C语言 - Jpython java ... 1.GIL: 全局解释器锁 - 翻译: 在同一个进程下开启的多线程,同一时刻只能有一 ...
python Unicode 编码解码
1 #将Unicode转换成普通的Python字符串:"编码(encode)" 2 unicodestring = u"Hello world" 3 utf8s ...
10 python 初学（Python 的编码解码）
Python 2 : ASCII Python 3 :Unicode
day06 python is == 编码解码
day06 python 一. is 和 == 的区别 == :比较, 判断, 比较的是值: 可以比较数字, 字符串, 列表, 元组, 字典,等 is :是比较, 比较的是内存地 ...
opencv python图片编码解码
cv2.imdecode()函数从指定的内存缓存中读取数据,并把数据转换(解码)成图像格式;主要用于从网络传输数据中恢复出图像.cv2.imencode()函数是将图片格式转换(编码)成流数据,赋值到 ...
Python Base64编码解码
import base64 str = '12345678'.encode('utf8') print(base64.b64encode(str).decode('utf8')) # 编码接收的参数 ...
python 字符串编码解码和格式化问题
转自:https://www.liaoxuefeng.com/wiki/001374738125095c955c1e6d8bb493182103fac9270762a000/0013868191962 ...
Python多线程多进程那些事儿看这篇就够了~~
自己以前也写过多线程,发现都是零零碎碎,这篇写写详细点,填一下GIL和Python多线程多进程的坑~ 总结下GIL的坑和python多线程多进程分别应用场景(IO密集.计算密集)以及具体实现的代码模块 ...
Python学习之路14☞多线程与多进程
一进程与线程的概念 1.1 进程进程定义: 进程就是一个程序在一个数据集上的一次动态执行过程.进程一般由程序.数据集.进程控制块三部分组成.我们编写的程序用来描述进程要完成哪些功能以及如何完成:数 ...

随机推荐

【内部】Fiddler设置代理请求的方式
1.2 打开Fiiddler,设置如图步骤: 3.添加规则: 4.这里选择第三个选项: 5.选中^开始,空格结束的如图内容.复制你要代理的地址.如:http://wap.cmread.com/nap/ ...
python中用*和**解析数据
在python中可以用*解析tuple,list,set数据给函数传参,用**解析dict类型数据,这样可使代码更加简洁. 示例代码: def func(a,b,c): print('a:{0},b: ...
Vue-cli 鼠标监听事件之滚动条
<template> <div class="scroll"> <div class="scroll-div-outer&quo ...
【VS开发】文件夹和文件选择EditBrowe控件使用
让EditBrowse控件既能浏览文件,又能浏览文件夹... 下图是在http://www.codeproject.com/Articles/35722/MFC-Feature-Pack-CMFCEd ...
Angular5 tslint错误：The selector of the component “XXXComponent” should be used as element
错误描述在项目中自己封装了一个 select 组件 @Component({ selector: '[app-choosen-select]', templateUrl: './selectcomm ...
java.math包简介
java.math包提供了java中的数学类包括基本的浮点库.复杂运算以及任意精度的数据运算 '可以看得到,主要包括三个类一个枚举 BigDecimal和BigInteger接下来会详细介绍先 ...
ASM下添加磁盘
linux下asm磁盘扩容,此次扩容添加4块480G磁盘第一步:multipath -ll : 查看多路径映射磁盘(两节点都做) 配置 /etc/multipath.conf文件,配置新加磁盘的al ...
Educational Codeforces Round 64 -C（二分）
题目链接:https://codeforces.com/contest/1156/problem/C 题意:给出n个数和整形数z,定义一对数为差>=z的数,且每个数最多和一个数组成对,求最多有多 ...
【转帖】大话Spring Cloud
springcloud(一):大话Spring Cloud 2017/05/01 http://www.ityouknow.com/springcloud/2017/05/01/simple-sp ...
DIY兼容机装苹果系统
遇到问题: 无法用变色龙引导:删除原WIN系统前隐藏分区变色龙引导画面无法进安装界面:a,wowpc.iso版本低,换新版;b,复制EXTRA进MAC安装盘 MAC OS安装完成后重新启动卡在苹果图 ...

【10】Python urllib、编码解码、requests、多线程、多进程、unittest初探、__file__、jsonpath