#!/usr/bin/env python # encoding: utf-8 import urllib.request from collections import defaultdict response=urllib.request.urlopen("https://twitter.com/login") headerstr=response.getheaders() d = defaultdict(list) for k, v in headerstr: if "
import re, urllib.request, urllib.parse, urllib.error page = input('Enter - ') fhand = urllib.request.urlopen(page) for line in fhand: str = line.decode().strip() hrefs = re.findall('href="([^\"]*)"', str) for href in hrefs: print(href)
摘抄自:https://www.liaoxuefeng.com/wiki/0014316089557264a6b348958f449949df42a6d3a2e542c000/001432688314740a0aed473a39f47b09c8c7274c9ab6aee000 Get urllib的request模块可以非常方便地抓取URL内容,也就是发送一个GET请求到指定的页面,然后返回HTTP的响应: 例如,对豆瓣的一个URLhttps://api.douban.com/v2/book/2