import requests from lxml import etree url='http://desk.zol.com.cn/meinv/' add1='.html' urls=[] i = 0 for i in range(1,100): urls.append(url+str(i)+add1) for url in urls: print("正在爬取"+url) html=requests.get(url) html.encoding='gb2312'#从网页源代码可知网页…
Python 爬取所有51VOA网站的Learn a words文本及mp3音频 #!/usr/bin/env python # -*- coding: utf-8 -*- #Python 爬取所有51VOA网站的Learn a words文本及mp3音频 import os import sys import time import urllib as req from threading import Thread import urllib2 import urllib from thre…