python 爬取王者荣耀英雄皮肤代码

import os, time, requests, json, re, sys

from retrying import retry

from urllib import parse

"""

文章描述：爬取王者荣耀英雄壁纸+封面

使用说明：直接在最底下输入下载地址，然后运行

作者：Felix(2020/7/30 14:42)

最新修改时间：2021-4-5

公众号：【全面资源集】

博客：https://blog.csdn.net/weixin_49012647

说明：没有使用进程，面向对象加过程，使用控制台输出显示进度，没有反扒机制，不识别UA，此文章调试了两天才趋近完美

"""

class HonorOfKings:

    """王者荣耀皮肤下载"""

    def __init__(self, save_path='./heros'):

        self.save_path = save_path  # 默认路径为：./heros

        self.time = str(time.time()).split('.')

        self.url = 'https://apps.game.qq.com/cgi-bin/ams/module/ishow/V1.0/query/workList_inc.cgi?activityId=2735&sVerifyCode=ABCD&sDataType=JSON&iListNum=20&totalpage=0&page={}&iOrder=0&iSortNumClose=1&iAMSActivityId=51991&_everyRead=true&iTypeId=2&iFlowId=267733&iActId=2735&iModuleId=2735&_=%s' % \

                   self.time[0]  # 这是抓包获得的，暂时不会。。

    def hello(self):

        """这是排面"""

        print("*" * 50)

        print(' ' * 18 + '王者荣耀壁纸下载')

        print(' ' * 5 + '公众号：【全面资源集】')

        print("*" * 50)

        return self

    def run(self):

        """爬虫主程序"""

        print('↓' * 20 + ' 格式选择: ' + '↓' * 20)

        print('1.缩略图 2.1024x768 3.1280x720 4.1280x1024 5.1440x900 6.1920x1080 7.1920x1200 8.1920x1440')

        size = input('请输入您想下载的格式序号，默认6：')

        print()

        size = size if size and int(size) in [1, 2, 3, 4, 5, 6, 7, 8] else 6  # 直接回车就选6

        hero_list = self.request('http://gamehelper.gm825.com/wzry/hero/list?channel_id=90009a&app_id=h9044j&game_id=7622&game_name=%E7%8E%8B%E8%80%85%E8%8D%A3%E8%80%80&vcode=12.0.3&version_code=1203&cuid=2654CC14D2D3894DBF5808264AE2DAD7&ovr=6.0.1&device=Xiaomi_MI+5&net_type=1&client_id=1Yfyt44QSqu7PcVdDduBYQ%3D%3D&info_ms=fBzJ%2BCu4ZDAtl4CyHuZ%2FJQ%3D%3D&info_ma=XshbgIgi0V1HxXTqixI%2BKbgXtNtOP0%2Fn1WZtMWRWj5o%3D&mno=0&info_la=9AChHTMC3uW%2BfY8%2BCFhcFw%3D%3D&info_ci=9AChHTMC3uW%2BfY8%2BCFhcFw%3D%3D&mcc=0&clientversion=&bssid=VY%2BeiuZRJ%2FwaXmoLLVUrMODX1ZTf%2F2dzsWn2AOEM0I4%3D&os_level=23&os_id=dc451556fc0eeadb&resolution=1080_1920&dpi=480&client_ip=192.168.0.198&pdunid=a83d20d8').json()

        hero_names = []  # 上面网址是抓包获得的，暂时不会。。

        cover_div = os.path.join(self.save_path, '英雄封面')

        os.makedirs(cover_div)

        num = 0  # 下载第几张封面，用于显示进度

        all = len(hero_list['list'])

        for i in hero_list['list']:

            hero_names.append(i['name'])  # 把英雄名放入列表

            content = self.request(i['cover']).content

            cover_path = os.path.join(cover_div, i['name']+'.png')

            if not os.path.exists(cover_path):

                with open(cover_path, 'wb') as f:  # 保存封面

                    f.write(content)

                    num += 1

                    sys.stdout.write('\r')

                    sys.stdout.write('→ → → →正在爬取封面....爬取进度：%s|%s张' % (num, all))

        # print(hero_names)

        page = 0  # 第零页，用于获取英雄总数，并保存第零页图片

        offset = 20  # 页数，用于递增爬取不同页

        total_response = self.request(self.url.format(page)).text

        total_res = json.loads(total_response)

        total_page = --int(total_res['iTotalPages'])  # 总页数（25）

        print('→ → → →开始爬取皮肤...（总共 {} 页）'.format(total_page))

        while True:

            if offset > total_page:

                break

            url = self.url.format(offset)

            result = self.request(url).json()  # 获取json格式数据（不标准），但是能索引，你也可以用下面的

            # response = self.request(url).text

            # result = json.loads(response)

            now = 0  # 表示第几张图，用于显示进度

            for item in result["List"]:

                now += 1

                split_name = parse.unquote(item['sProdName']).split('-')

                hero_name = split_name[0]  # 英雄名，但是不规范

                hero_name = re.sub(r'[【】:.<>|·@#$%^&() ]', '', hero_name)  # 把垃圾符号弄掉

                for f in hero_names:  # 有些英雄名是：张良·幽兰居士，但是我希望所有同英雄皮肤放在一个目录下，所有加上这一步

                    if f in hero_name:

                        hero_name = f

                # print('---正在下载第 {} 页 {} 英雄 进度{}/{}...'.format(offset, hero_name, now, len(result["List"])))

                hero_url = parse.unquote(item['sProdImgNo_{}'.format(str(size))])  # 网址都被编码了，恶心

                save_path = os.path.join(self.save_path, hero_name)  # 图片保存路径

                try:  # 不是每个名字都有“-”

                    pic_name = split_name[1]

                    pic_name = re.sub(r'[【】:.<>|·@#$%^&() ]', '', pic_name)+'.jpg'  # 图片名也给它标准化

                except IndexError:

                    pic_name = hero_name+'.jpg'

                save_name = os.path.join(save_path, pic_name)

                if not os.path.exists(save_path):

                    os.makedirs(save_path)

                if not os.path.exists(save_name):

                    with open(save_name, 'wb') as f:

                        response_content = self.request(hero_url.replace("/200", "/0")).content

                        f.write(response_content)

                        sys.stdout.write('\r')  # 让输出不断更新

                        sys.stdout.write('第%s页 %s|第%s张' % (offset, '▋'*2*now, now))

            offset += 1

        print('\n下载完成！')

    @retry(stop_max_attempt_number=3)

    def request(self, url):

        response = requests.get(url, timeout=10)

        assert response.status_code == 200

        return response

if __name__ == "__main__":

    HonorOfKings(r'E:\win10\Pictures\电脑图片\王者荣耀壁纸').hello().run()  # 这里设置图片下载根目录

加线程代码

import os, time, requests, json, re, sys

import threadpool

from retrying import retry

from urllib import parse

from tqdm import tqdm

"""

文章描述：爬取王者荣耀英雄壁纸+封面

使用说明：直接在最底下输入下载地址，然后运行

作者：Felix(2020/7/30 14:42)

最新修改时间：2021-4-4

公众号：【全面资源集】

博客：https://blog.csdn.net/weixin_49012647

说明：（1）使用线程爬取，但是感觉没有快多少，网址图片加载速度不是很快，而且服务器会没有响应。

     （2）使用tqdm显示进度，但是该模块也会出问题，比如单位，img/s,结果变成s/img，而且加重程序负担

      (3)因为是二次更改，在函数里嵌套函数，非常不专业，所有尽量少用

"""

class HonorOfKings:

    """

     This is a main Class, the file contains all documents.

     One document contains paragraphs that have several sentences

     It loads the original file and converts the original file to new content

     Then the new content will be saved by this class

    """

    def __init__(self, save_path='./heros'):

        self.save_path = save_path  # 保存根目录默认在代码所在目录

        self.time = str(time.time()).split('.')

        self.url = 'https://apps.game.qq.com/cgi-bin/ams/module/ishow/V1.0/query/workList_inc.cgi?activityId=2735&sVerifyCode=ABCD&sDataType=JSON&iListNum=20&totalpage=0&page={}&iOrder=0&iSortNumClose=1&iAMSActivityId=51991&_everyRead=true&iTypeId=2&iFlowId=267733&iActId=2735&iModuleId=2735&_=%s' % \

                   self.time[0]  # 抓包的网址

    def hello(self):

        """

        This is a welcome speech（欢迎界面）

        :return: self

        """

        print("*" * 50)

        print(' ' * 18 + '王者荣耀壁纸下载')

        print(' ' * 5 + '公众号：【全面资源集】')

        print("*" * 50)

        return self

    def pool(self, function, arg):

        """下载线程池"""

        pool = threadpool.ThreadPool(20)

        request = threadpool.makeRequests(function, arg)

        [pool.putRequest(req) for req in request]

        pool.wait()

    def run(self):

        """The program entry（程序入口）"""

        print('↓' * 20 + ' 格式选择: ' + '↓' * 20)

        print('1.缩略图 2.1024x768 3.1280x720 4.1280x1024 5.1440x900 6.1920x1080 7.1920x1200 8.1920x1440')

        size = input('请输入您想下载的格式序号，默认6：')

        print()

        size = size if size and int(size) in [1, 2, 3, 4, 5, 6, 7, 8] else 6

        hero_list = self.request(  # 下面网址是抓包获得的，暂时不会。。

            'http://gamehelper.gm825.com/wzry/hero/list?channel_id=90009a&app_id=h9044j&game_id=7622&game_name=%E7%8E%8B%E8%80%85%E8%8D%A3%E8%80%80&vcode=12.0.3&version_code=1203&cuid=2654CC14D2D3894DBF5808264AE2DAD7&ovr=6.0.1&device=Xiaomi_MI+5&net_type=1&client_id=1Yfyt44QSqu7PcVdDduBYQ%3D%3D&info_ms=fBzJ%2BCu4ZDAtl4CyHuZ%2FJQ%3D%3D&info_ma=XshbgIgi0V1HxXTqixI%2BKbgXtNtOP0%2Fn1WZtMWRWj5o%3D&mno=0&info_la=9AChHTMC3uW%2BfY8%2BCFhcFw%3D%3D&info_ci=9AChHTMC3uW%2BfY8%2BCFhcFw%3D%3D&mcc=0&clientversion=&bssid=VY%2BeiuZRJ%2FwaXmoLLVUrMODX1ZTf%2F2dzsWn2AOEM0I4%3D&os_level=23&os_id=dc451556fc0eeadb&resolution=1080_1920&dpi=480&client_ip=192.168.0.198&pdunid=a83d20d8').json()

        cover_dicts = []  # 存放字典 {封面名：封面网址}

        hero_names = []  # 存放所有英雄名

        cover_div = os.path.join(self.save_path, '英雄封面')

        os.makedirs(cover_div)

        num = 0  # 下载第几张封面，用于显示进度

        all = len(hero_list['list'])

        def down_corver(dict):

            """下载封面"""

            global num

            content = self.request(dict['cover']).content

            cover_path = os.path.join(cover_div, dict['name'] + '.png')

            if not os.path.exists(cover_path):

                with open(cover_path, 'wb') as f:  # 保存封面

                    f.write(content)

                    num += 1

                    sys.stdout.write('\r')

                    sys.stdout.write('→ → → →正在爬取封面....爬取进度：%s|%s张' % (num, all))

        for i in hero_list['list']:

            cover_dicts.append({i['name']: i['corver']})

            hero_names.append(i['name'])

        # print(cover_dicts)

        for i in hero_names:

            os.makedirs(os.path.join(self.save_path, i))

        self.pool(down_corver, cover_dicts)

        page = 0

        offset = 0  # 爬取的页数

        total_res = self.request(self.url.format(page)).json()

        # total_response = self.request(self.url.format(page)).text

        # total_res = json.loads(total_response)

        total_page = --int(total_res['iTotalPages'])  # 所有页数

        print('→ → → →开始爬取皮肤（总共 {} 页）...'.format(total_page))

        def down(dict):

            """创建线程池下载"""

            if '-' in dict['name']:

                hero_name = dict['name'].split('-')[0]  # 英雄名，创建英雄图片目录

                hero_name = re.sub(r'[【】:.<>|·@#$%^&() ]', '', hero_name)

                for name in hero_names:

                    if name in hero_name:

                        hero_name = name

                save_path = os.path.join(self.save_path, hero_name)  # 英雄皮肤保存目录

                pic_name = dict['name'].split('-')[1]  # 各种皮肤名

                pic_name = re.sub(r'[【】:.<>|·@#$%^&() ]', '', pic_name) + '.jpg'

            else:

                hero_name = pic_name = dict['name']

                hero_name = pic_name = re.sub(r'[【】:.<>|·@#$%^&() ]', '', hero_name)

                save_path = os.path.join(self.save_path, hero_name)

            save_name = os.path.join(save_path, pic_name)

            hero_url = dict['url']

            if not os.path.exists(save_name):

                with open(save_name, 'wb') as f:

                    response_content = self.request(hero_url.replace("/200", "/0")).content

                    f.write(response_content)

            tq.update(1)

            time.sleep(0.4)

        while True:

            if offset > total_page:

                break

            url = self.url.format(offset)

            response = self.request(url).text

            result = json.loads(response)  # 共25页，每页20个图片，总共483张；每页英雄不同，即乱排的

            # now = 0

            dict_list = []  # 储存所有{英雄名：下载地址}的列表

            with tqdm(total=len(result["List"]), leave=False, unit='img', ncols=100) as tq:

                tq.set_description('第%s页' % offset)

                for item in result["List"]:

                    # now += 1

                    hero_name = parse.unquote(item['sProdName'])

                    # print('---正在下载第 {} 页 {} 英雄 进度{}/{}...'.format(offset, hero_name, now, len(result["List"])))

                    hero_url = parse.unquote(item['sProdImgNo_{}'.format(str(size))])

                    dict_list.append({'name': hero_name, 'url': hero_url})  # 把所有对应英雄名及图片下载地址放进列表

                self.pool(down, dict_list)

                offset += 1

        print('下载完成！')

    @retry(stop_max_attempt_number=3)

    def request(self, url):

        """

        Send a request

        :param url: the url of request

        :param timeout: the time of request

        :return: the result of request

        """

        response = requests.get(url, timeout=10)

        assert response.status_code == 200

        return response

if __name__ == "__main__":

    HonorOfKings(save_path=r'E:\win10\Pictures\电脑图片\王者荣耀壁纸').hello().run()

更多资源请关注：【全面资源集】

python 爬取王者荣耀英雄皮肤代码的更多相关文章

Python爬取 | 王者荣耀英雄皮肤海报
这里只展示代码,具体介绍请点击下方链接. Python爬取 | 王者荣耀英雄皮肤海报 import requests import re import os import time import wi ...
利用python爬取王者荣耀英雄皮肤图片
前两天看到同学用python爬下来LOL的皮肤图片,感觉挺有趣的,我也想试试,于是决定来爬一爬王者荣耀的英雄和皮肤图片. 首先,我们找到王者的官网http://pvp.qq.com/web201605 ...
python学习--第二天爬取王者荣耀英雄皮肤
今天目的是爬取所有英雄皮肤在爬取所有之前,先完成一张皮肤的爬取打开anacond调出编译器Jupyter Notebook 打开王者荣耀官网下拉找到位于网页右边的英雄/皮肤点击[+更多] 进入 ...
Python 爬取 "王者荣耀.英雄壁纸" 过程中的矛和盾
1. 前言学习爬虫,最好的方式就是自己编写爬虫程序. 爬取目标网站上的数据,理论上讲是简单的,无非就是分析页面中的资源链接.然后下载.最后保存. 但是在实施过程却会遇到一些阻碍. 很多网站为了阻止爬 ...
用Python爬取"王者农药"英雄皮肤
0.引言作为一款现象级游戏,王者荣耀,想必大家都玩过或听过,游戏里中各式各样的英雄,每款皮肤都非常精美,用做电脑壁纸再合适不过了.本篇就来教大家如何使用Python来爬取这些精美的英雄皮肤. 1.环 ...
用Python爬取"王者农药"英雄皮肤原
padding: 10px; border-bottom: 1px solid #d3d3d3; background-color: #2e8b57; } .second-menu-item { pa ...
python爬取王者荣耀全英雄皮肤
import os import requests url = 'https://pvp.qq.com/web201605/js/herolist.json' herolist = requests. ...
python爬虫---爬取王者荣耀全部皮肤图片
代码: import requests json_headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win ...
python 爬取王者荣耀高清壁纸
代码地址如下:http://www.demodashi.com/demo/13104.html 一.前言打过王者的童鞋一般都会喜欢里边设计出来的英雄吧,特别想把王者荣耀的英雄的高清图片当成电脑桌面 ...

随机推荐

在不使用外延层的同轴半绝缘衬底材料上制作4H-SIC横向双重注入金属氧化物半导体场效应晶体管
在不使用外延层的同轴半绝缘衬底材料上制作4H-SIC横向双重注入金属氧化物半导体场效应晶体管杂志:日本应用物理杂志在不使用外延层在同轴的半绝缘SIC衬底上制作4H-SIC横向双重注入金属氧化物 ...
PAT-1136（A Delayed Palindrome）字符串处理+字符串和数字间的转换
A Delayed Palindrome PAT-1136 我这里将数字转换为字符串使用的是stringstream字符串流扩充:将字符串转换为数字可以使用stoi函数,函数头为cstdlib #i ...
redis基础:redis下载安装与配置,redis数据类型使用,redis常用指令,jedis使用,RDB和AOF持久化
知识点梳理课堂讲义课程计划 1. REDIS 入门 (了解) (操作) 2. 数据类型 (重点) (操作) (理解) 3. 常用指令 (操作) 4. Jedis (重点) (操作) ...
CSDN博客转MD格式
基于大神作品修改原文,使用了一下发现有一些小问题,爬取的博客标题如果含有字符是Windows不支持的命名格式,会卡在界面,进行了一下优化,加了一些字符过滤处理,但是tomd模块对html的处理还是不是 ...
SpringCloud-服务与注册
SpringCloud- Eureka服务注册与发现 1.概述 springcloud是一个非常优秀的微服务框架,要管理众多的服务,就需要对这些服务进行治理,管理每个服务与每个服务之间的依赖关系,可以 ...
MySQL全面瓦解24：构建高性能索引（策略篇）
学习如果构建高性能的索引之前,我们先来了解下之前的知识,以下两篇是基础原理,了解之后,对面后续索引构建的原则和优化方法会有更清晰的理解: MySQL全面瓦解22:索引的介绍和原理分析 MySQL全面瓦 ...
给出镜像FreeBSD 基本要求
硬盘 ports 500G update 500G portsnap 500G pkg arm64 amd64 i386 11-12-13 4TB 网络流量一个月专线大概2w RMB CPU 内存其 ...
WPF 基础 - 在模板中找元素
1. 在 ControlTemplate 中寻找元素 <Window.Resources> <ControlTemplate x:Key="cTmp"> & ...
利用jmeter对WebRTC应用进行压力测试（java）
利用jmeter对WebRTC应用进行压力测试(java) 说明:WebRTC是一款开源的多人即时视频API,与一般的http请求不同,webrtc应用实际压力主要是码流最近负责了一个WebRTC的 ...
LZZY高级语言程序设计之169页**5.17
import java.util.Scanner;public class MQ3 { public static void main(String[] args) { Scanner sc = ne ...

python 爬取王者荣耀英雄皮肤代码

python 爬取王者荣耀英雄皮肤代码的更多相关文章

随机推荐

热门专题