吴裕雄 实战python编程(2)
from urllib.parse import urlparse
url = 'http://www.pm25x.com/city/beijing.htm'
o = urlparse(url)
print(o)
print("scheme={}".format(o.scheme)) # http
print("netloc={}".format(o.netloc)) # www.pm25x.com
print("port={}".format(o.port)) # None
print("path={}".format(o.path)) # /city/beijing.htm
print("query={}".format(o.query)) # 空

import requests
url = 'http://www.wsbookshow.com/'
html = requests.get(url)
html.encoding="GBK"
print(html.text)

import requests
import numpy as np
url = 'http://www.wsbookshow.com/'
html = requests.get(url)
html.encoding="gbk"
htmllist = html.text.splitlines()
print(type(htmllist))
print(np.shape(htmllist))
for row in htmllist:
print(row)

import requests
url = 'http://www.wsbookshow.com/'
html = requests.get(url)
html.encoding="gbk"
htmllist = html.text.splitlines()
n=0
for row in htmllist:
if "新概念" in row:
n+=1
print("找到 {} 次!".format(n))

import re
pat = re.compile('[a-z]+')
m = pat.match('tem12po')
print(m)
if not m==None:
print(m.group())
print(m.start())
print(m.end())
print(m.span())

import re
m = re.match(r'[a-z]+','tem12po')
print(m)
if not m==None:
print(m.group())
print(m.start())
print(m.end())
print(m.span())

import re
pat = re.compile('[a-z]+')
m = pat.search('3tem12po')
print(m) # <_sre.SRE_Match object; span=(1, 4), match='tem'>
if not m==None:
print(m.group()) # tem
print(m.start()) # 1
print(m.end()) # 4
print(m.span()) # (1,4)

import re
pat = re.compile('[a-z]+')
m = pat.findall('tem12po')
print(m) # ['tem', 'po']

import requests,re
regex = re.compile('[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+')
url = 'http://www.wsbookshow.com/'
html = requests.get(url)
emails = regex.findall(html.text)
for email in emails:
print(email)

吴裕雄 实战python编程(2)的更多相关文章
- 吴裕雄 实战PYTHON编程(10)
import cv2 cv2.namedWindow("frame")cap = cv2.VideoCapture(0)while(cap.isOpened()): ret, im ...
- 吴裕雄 实战PYTHON编程(9)
import cv2 cv2.namedWindow("ShowImage1")cv2.namedWindow("ShowImage2")image1 = cv ...
- 吴裕雄 实战PYTHON编程(8)
import pandas as pd df = pd.DataFrame( {"林大明":[65,92,78,83,70], "陈聪明":[90,72,76, ...
- 吴裕雄 实战PYTHON编程(7)
import os from win32com import client word = client.gencache.EnsureDispatch('Word.Application')word. ...
- 吴裕雄 实战PYTHON编程(6)
import matplotlib.pyplot as plt plt.rcParams['font.sans-serif']=['Simhei']plt.rcParams['axes.unicode ...
- 吴裕雄 实战PYTHON编程(5)
text = '中华'print(type(text))#<class 'str'>text1 = text.encode('gbk')print(type(text1))#<cla ...
- 吴裕雄 实战PYTHON编程(4)
import hashlib md5 = hashlib.md5()md5.update(b'Test String')print(md5.hexdigest()) import hashlib md ...
- 吴裕雄 实战python编程(3)
import requests from bs4 import BeautifulSoup url = 'http://www.baidu.com'html = requests.get(url)sp ...
- 吴裕雄 实战python编程(1)
import sqlite3 conn = sqlite3.connect('E:\\test.sqlite') # 建立数据库联接cursor = conn.cursor() # 建立 cursor ...
随机推荐
- arcgis license manager 10.2服务无法启动
(步骤)1. 用cmd切换到 license manager 安装目录,如 C:\Program Files (x86)\ArcGIS\License10.2\bin,输入: Lmgrd -z -c ...
- Linux中CPU亲和性(go)
http://www.cnblogs.com/LubinLew/p/cpu_affinity.html
- SEO之H1,H2,H3,H4....STRONG使用方法
作为一个SEO从业人员,我们不仅仅是要懂得如何通过网站内容和外链等SEO手段,其实一个优秀的SEOER在从事一个SEO案例时,首先着手的是如何从网站程序本身来打好网站SEO基础. 在平时和很多朋友的交 ...
- ThinkJava-多态
8.2.1 方法调用绑定 解决的办法就是后期绑定,它的含义就是在运行时根据对象的类型进行绑定.后期绑定也 叫做动态绑定或运行时绑定.如果一种语言想实现后期绑定,就必须具有某种机制,以便在运 行时能判断 ...
- 挂载本地ISO
http://www.linuxidc.com/Linux/2017-03/142087.htm 挂载本地ISO mount -o loop /home/iso/RHEL-server-7.0-x86 ...
- fiddler基础功能介绍
- js将UTC时间转化为当地时区时间
js将UTC时间转化为当地时区时间(UTC转GMT) 我们在进行网站开发的时候有可能会涉及到国外的用户或者用户身在国外,这时就会存在时差问题,比如说我们在中国的时间是08:00,但是此时韩国的时间 ...
- Asynchronous programming with Tornado
Asynchronous programming can be tricky for beginners, therefore I think it’s useful to iron some bas ...
- 【Linux_Unix系统编程】chapter7 内存分配
Chapter7 内存分配本章将用于在堆或者栈上分配内存的函数.7.1 在堆上分配内存 通常将堆的当前的内存边界称为"program break" 7.1.1 调整program ...
- 3D Render
记录最近遇到的问题: 1:崩溃问题 由于高频率获取DC异常导致. void D3D11Texture2D::Copy2Window(void* srcdc, uint32_t left, uint32 ...