python爬取股票信息

 import requests

 from bs4 import BeautifulSoup

 import traceback

 import re

 def getHTMLText(url):

     try:

         r = requests.get(url)

         r.raise_for_status()

         r.encoding = r.apparent_encoding

         return r.text

     except:

         return ""

 def getStockList(lst, stockURL):

     html = getHTMLText(stockURL)

     soup = BeautifulSoup(html, 'html.parser')

     a = soup.find_all('a')

     for i in a:

         try:

             href = i.attrs['href']

             lst.append(re.findall(r"[s][hz]\d{6}", href)[0])

         except:

             continue

 def getStockInfo(lst, stockURL, fpath):

     for stock in lst:

         url = stockURL + stock + ".html"

         html = getHTMLText(url)

         try:

             if html == "":

                 continue

             infoDict = {}

             soup = BeautifulSoup(html, 'html.parser')

             stockInfo = soup.find('div', attrs={'class': 'stock-bets'})

             name = stockInfo.find_all(attrs={'class': 'bets-name'})[0]

             infoDict.update({'股票名称': name.text.split()[0]})

             keyList = stockInfo.find_all('dt')

             valueList = stockInfo.find_all('dd')

             for i in range(len(keyList)):

                 key = keyList[i].text

                 val = valueList[i].text

                 infoDict[key] = val

             with open(fpath, 'a', encoding='utf-8') as f:

                 f.write(str(infoDict) + '\n')

         except:

             traceback.print_exc()

             continue

 def main():

     stock_list_url = 'http://quote.eastmoney.com/stocklist.html'

     stock_info_url = 'http://gupiao.baidu.com/stock/'

     output_file = 'D:/BaiduStockInfo.txt'

     slist = []

     getStockList(slist, stock_list_url)

     getStockInfo(slist, stock_info_url, output_file)

 main()

这是慕课上的源代码，直接粘贴的，不知道为什么运行一直报错。以下是错误。如果有人解决了，麻烦说一声，谢谢啦。

下面是慕课中修改的代码，也是源代码，直接粘贴的，但是是可以运行出来的。

 import requests

 from bs4 import BeautifulSoup

 import traceback

 import re

 def getHTMLText(url, code="utf-8"):

     try:

         r = requests.get(url)

         r.raise_for_status()

         r.encoding = code

         return r.text

     except:

         return ""

 def getStockList(lst, stockURL):

     html = getHTMLText(stockURL, "GB2312")

     soup = BeautifulSoup(html, 'html.parser')

     a = soup.find_all('a')

     for i in a:

         try:

             href = i.attrs['href']

             lst.append(re.findall(r"[s][hz]\d{6}", href)[0])

         except:

             continue

 def getStockInfo(lst, stockURL, fpath):

     count = 0

     for stock in lst:

         url = stockURL + stock + ".html"

         html = getHTMLText(url)

         try:

             if html=="":

                 continue

             infoDict = {}

             soup = BeautifulSoup(html, 'html.parser')

             stockInfo = soup.find('div',attrs={'class':'stock-bets'})

             name = stockInfo.find_all(attrs={'class':'bets-name'})[0]

             infoDict.update({'股票名称': name.text.split()[0]})

             keyList = stockInfo.find_all('dt')

             valueList = stockInfo.find_all('dd')

             for i in range(len(keyList)):

                 key = keyList[i].text

                 val = valueList[i].text

                 infoDict[key] = val

             with open(fpath, 'a', encoding='utf-8') as f:

                 f.write( str(infoDict) + '\n' )

                 count = count + 1

                 print("\r当前进度: {:.2f}%".format(count*100/len(lst)),end="")

         except:

             count = count + 1

             print("\r当前进度: {:.2f}%".format(count*100/len(lst)),end="")

             continue

 def main():

     stock_list_url = 'http://quote.eastmoney.com/stocklist.html'

     stock_info_url = 'http://gupiao.baidu.com/stock/'

     output_file = 'D:/BaiduStockInfo.txt'

     slist=[]

     getStockList(slist, stock_list_url)

     getStockInfo(slist, stock_info_url, output_file)

 main()

至于为什么，本人也不是特别清楚，代码主题部分是一样的，具体原因不清楚。我把编码方式删去了，和原先一样。但是程序依然可以运行。我不知道为什么，加了进度条，程序就可以运行了。

这是别人的代码分析，个人觉得还是很好的，很仔细：https://segmentfault.com/a/1190000010520835

python爬取股票信息的更多相关文章

Python爬取股票信息，并实现可视化数据
前言截止2019年年底我国股票投资者数量为15975.24万户, 如此多的股民热衷于炒股,首先抛开炒股技术不说, 那么多股票数据是不是非常难找, 找到之后是不是看着密密麻麻的数据是不是头都大了? 今 ...
Python 爬取所有51VOA网站的Learn a words文本及mp3音频
Python 爬取所有51VOA网站的Learn a words文本及mp3音频 #!/usr/bin/env python # -*- coding: utf-8 -*- #Python 爬取所有5 ...
python爬取网站数据
开学前接了一个任务,内容是从网上爬取特定属性的数据.正好之前学了python,练练手. 编码问题因为涉及到中文,所以必然地涉及到了编码的问题,这一次借这个机会算是彻底搞清楚了. 问题要从文字的编码讲 ...
python爬取某个网页的图片-如百度贴吧
python爬取某个网页的图片-如百度贴吧作者:vpoet mail:vpoet_sir@163.com 注:随意copy,不用告诉我 #coding:utf-8 import urllib imp ...
Python:爬取乌云厂商列表，使用BeautifulSoup解析
在SSS论坛看到有人写的Python爬取乌云厂商,想练一下手,就照着重新写了一遍原帖:http://bbs.sssie.com/thread-965-1-1.html #coding:utf- im ...
使用python爬取MedSci上的期刊信息
使用python爬取medsci上的期刊信息,通过设定条件,然后获取相应的期刊的的影响因子排名,期刊名称,英文全称和影响因子.主要过程如下: 首先,通过分析网站http://www.medsci.cn ...
python爬取免费优质IP归属地查询接口
python爬取免费优质IP归属地查询接口具体不表,我今天要做的工作就是: 需要将数据库中大量ip查询出起归属地刚开始感觉好简单啊,毕竟只需要从百度找个免费接口然后来个python脚本跑一晚上就o ...
Python爬取豆瓣指定书籍的短评
Python爬取豆瓣指定书籍的短评 #!/usr/bin/python # coding=utf-8 import re import sys import time import random im ...
python爬取网页的通用代码框架
python爬取网页的通用代码框架: def getHTMLText(url):#参数code缺省值为‘utf-8’(编码方式) try: r=requests.get(url,timeout=30) ...

随机推荐

Html盛放媒体/视频标签
播放视频: <video id="video1" width="420" style="margin-top:15px;" contr ...
suricata 关键字补充说明
基本关键字 1.msg(对匹配到的规则的说明,第一部分约定用大写字母表示,msg始终是签名的第一个关键字) 注意:msg中必须转义以下字符: ; \ " msg :“ATTACK-RESPO ...
mongodb的聚合aggregate|group|match|project|sort|limit|skip|unwind
聚合 aggregate 聚合(aggregate)主要用于计算数据,类似sql中的sum().avg() 语法 db.集合名称.aggregate([{管道:{表达式}}]) 管道管道在Unix和 ...
Excel2010隔行变色的实现方法 [也可套用格式即可]
这样excel隔行变色的效果,excel会自动隔行填充不同颜色. 公式说明: =MOD(ROW(),2)=0,实现的效果是偶数行自动填充底纹颜色 =MOD(ROW(),2)=1,实现的效果是奇数行自动 ...
【转】R语言 RStudio快捷键
链接地址 http://blog.sina.com.cn/s/blog_403aa80a0101ar8q.html 控制台功能 Windows & Linux Mac 移动鼠标到控制台 C ...
JS获取QueryString(Jquery)
QueryString = { data: {}, Initial: function() { var aPairs, aTmp; var qu ...
ASPxCallback组件(珍藏版)
<%@ Page Language="C#" AutoEventWireup="true" CodeFile="Default3.aspx.cs ...
linux配置sphinx
1. 配置索引 cd /usr/local/sphinx/etc/ cp sphinx.conf.dist sphinx.conf //备份配置文件,防止改错 vim sphinx.conf 配置文件 ...
centos7 redis配置
https://www.cnblogs.com/web424/p/6796993.html
JavaScript：几种常用循环
##循环数组的方法 1.for循环 for(let i = 0;i < ary.length;i++){ console.log(ary[i]); } 2.forEach ary.forEach ...

python爬取股票信息

python爬取股票信息的更多相关文章

随机推荐

热门专题