方案说明

功能要求:实现网页加载后将页面截取成长图片
涉及模块:PyQT5 PIL
逻辑说明:

1:完成窗口设置,利用PyQT5 QWebEngineView加载网页地址,待网页加载完成后,调用check_pag;
class MainWindow(QMainWindow):
def __init__(self, parent=None):
super(MainWindow, self).__init__(parent)
self.setWindowTitle('易哈佛')
self.temp_height = 0
self.setWindowFlag(Qt.WindowMinMaxButtonsHint, False) # 禁用最大化,最小化
# self.setWindowFlag(Qt.WindowStaysOnTopHint, True) # 窗口顶置
self.setWindowFlag(Qt.FramelessWindowHint, True) # 窗口无边框 def urlScreenShot(self, url):
self.browser = QWebEngineView()
self.browser.load(QUrl(url))
geometry = self.chose_screen()
self.setGeometry(geometry)
self.browser.loadFinished.connect(self.check_page)
self.setCentralWidget(self.browser) def get_page_size(self):
size = self.browser.page().contentsSize()
self.set_height = size.height()
self.set_width = size.width()
return size.width(), size.height() def chose_screen(self):
width, height = 750, 1370
desktop = QApplication.desktop()
screen_count = desktop.screenCount()
for i in range(0, screen_count):
rect = desktop.availableGeometry(i)
s_width, s_height = rect.width(), rect.height()
if s_width > width and s_height > height:
return QRect(rect.left(), rect.top(), width, height)
return QRect(0, 0, width, height) if __name__ == '__main__':
app = QApplication(sys.argv)
win = MainWindow()
win.show()
app.exit(app.exec_())

2:收集页面高度,并计算分次截屏的次数和余量高度;实例化图片合并工具,设置定时器,超时信号发出后,执行exe_command;

    def check_page(self):
p_width, p_height = self.get_page_size()
self.page, self.over_flow_size = divmod(p_height, self.height())
if self.page == 0:
self.page = 1
self.ssm = ScreenShotMerge(self.page, self.over_flow_size)
self.timer = QTimer(self)
self.timer.timeout.connect(self.exe_command)
self.timer.setInterval(400)
self.timer.start()

3:exe_command用来控制截图次数,并在每次截图完成后控制网页向下滑屏幕的高度;所有的页面都已截取时,完成图片合并。

    def exe_command(self):
if self.page > 0:
self.screen_shot()
self.run_js() elif self.page < 0:
self.timer.stop()
self.ssm.image_merge()
self.close() elif self.over_flow_size > 0:
self.screen_shot()
self.page -= 1 def run_js(self):
script = """
var scroll = function (dHeight) {
var t = document.documentElement.scrollTop
var h = document.documentElement.scrollHeight
dHeight = dHeight || 0
var current = t + dHeight
if (current > h) {
window.scrollTo(0, document.documentElement.clientHeight)
} else {
window.scrollTo(0, current)
}
}
"""
command = script + '\n scroll({})'.format(self.height())
self.browser.page().runJavaScript(command)

4:screen_shot在每次截图完成后将图片保存,并将图片对象由图片合并根据保存到列表中。

   def screen_shot(self):
screen = QApplication.primaryScreen()
winid = self.browser.winId()
pix = screen.grabWindow(int(winid))
name = '{}/temp.png'.format(self.ssm.root_path)
pix.save(name)
self.ssm.add_im(name)

5:截图合并工具,在每次截图完成后将图片对象保存,完成余量截图的重绘和截图的合并。

class ScreenShotMerge():
def __init__(self, page, over_flow_size):
self.im_list = []
self.page = page
self.over_flow_size = over_flow_size
self.get_path() def get_path(self):
self.root_path = Path(__file__).parent.joinpath('temp')
if not self.root_path.exists():
self.root_path.mkdir(parents=True)
self.save_path = self.root_path.joinpath('merge.png') def add_im(self, path):
if len(self.im_list) == self.page:
im = self.reedit_image(path)
else:
im = Image.open(path)
im.save('{}/{}.png'.format(self.root_path, len(self.im_list) + 1))
self.im_list.append(im) def get_new_size(self):
max_width = 0
total_height = 0
# 计算合成后图片的宽度(以最宽的为准)和高度
for img in self.im_list:
width, height = img.size
if width > max_width:
max_width = width
total_height += height
return max_width, total_height def image_merge(self, ):
if len(self.im_list) > 1:
max_width, total_height = self.get_new_size()
# 产生一张空白图
new_img = Image.new('RGB', (max_width - 15, total_height), 255)
x = y = 0
for img in self.im_list:
width, height = img.size
new_img.paste(img, (x, y))
y += height
new_img.save(self.save_path)
print('截图成功:', self.save_path)
else:
obj = self.im_list[0]
width, height = obj.size
left, top, right, bottom = 0, 0, width, height
box = (left, top, right, bottom)
region = obj.crop(box)
new_img = Image.new('RGB', (width, height), 255)
new_img.paste(region, box)
new_img.save(self.save_path)
print('截图成功:', self.save_path) def reedit_image(self, path):
obj = Image.open(path)
width, height = obj.size
left, top, right, bottom = 0, height - self.over_flow_size, width, height
box = (left, top, right, bottom)
region = obj.crop(box)
return region

截图功能完整代码

#!/usr/bin/env python
# -*- coding:UTF-8 -*-
# Author:Leslie-x
import sys
from PyQt5.QtCore import *
from PyQt5.QtWidgets import *
from PyQt5.QtWebEngineWidgets import *
from PIL import Image
from pathlib import Path class ScreenShotMerge():
def __init__(self, page, over_flow_size):
self.im_list = []
self.page = page
self.over_flow_size = over_flow_size
self.get_path() def get_path(self):
self.root_path = Path(__file__).parent.joinpath('temp')
if not self.root_path.exists():
self.root_path.mkdir(parents=True)
self.save_path = self.root_path.joinpath('merge.png') def add_im(self, path):
if len(self.im_list) == self.page:
im = self.reedit_image(path)
else:
im = Image.open(path)
im.save('{}/{}.png'.format(self.root_path, len(self.im_list) + 1))
self.im_list.append(im) def get_new_size(self):
max_width = 0
total_height = 0
# 计算合成后图片的宽度(以最宽的为准)和高度
for img in self.im_list:
width, height = img.size
if width > max_width:
max_width = width
total_height += height
return max_width, total_height def image_merge(self, ):
if len(self.im_list) > 1:
max_width, total_height = self.get_new_size()
# 产生一张空白图
new_img = Image.new('RGB', (max_width - 15, total_height), 255)
x = y = 0
for img in self.im_list:
width, height = img.size
new_img.paste(img, (x, y))
y += height
new_img.save(self.save_path)
print('截图成功:', self.save_path)
else:
obj = self.im_list[0]
width, height = obj.size
left, top, right, bottom = 0, 0, width, height
box = (left, top, right, bottom)
region = obj.crop(box)
new_img = Image.new('RGB', (width, height), 255)
new_img.paste(region, box)
new_img.save(self.save_path)
print('截图成功:', self.save_path) def reedit_image(self, path):
obj = Image.open(path)
width, height = obj.size
left, top, right, bottom = 0, height - self.over_flow_size, width, height
box = (left, top, right, bottom)
region = obj.crop(box)
return region class MainWindow(QMainWindow):
def __init__(self, parent=None):
super(MainWindow, self).__init__(parent)
self.setWindowTitle('易哈佛')
self.temp_height = 0
self.setWindowFlag(Qt.WindowMinMaxButtonsHint, False) # 禁用最大化,最小化
# self.setWindowFlag(Qt.WindowStaysOnTopHint, True) # 窗口顶置
self.setWindowFlag(Qt.FramelessWindowHint, True) # 窗口无边框 def urlScreenShot(self, url):
self.browser = QWebEngineView()
self.browser.load(QUrl(url))
geometry = self.chose_screen()
self.setGeometry(geometry)
self.browser.loadFinished.connect(self.check_page)
self.setCentralWidget(self.browser) def get_page_size(self):
size = self.browser.page().contentsSize()
self.set_height = size.height()
self.set_width = size.width()
return size.width(), size.height() def chose_screen(self):
width, height = 750, 1370
desktop = QApplication.desktop()
screen_count = desktop.screenCount()
for i in range(0, screen_count):
rect = desktop.availableGeometry(i)
s_width, s_height = rect.width(), rect.height()
if s_width > width and s_height > height:
return QRect(rect.left(), rect.top(), width, height)
return QRect(0, 0, width, height) def check_page(self):
p_width, p_height = self.get_page_size()
self.page, self.over_flow_size = divmod(p_height, self.height())
if self.page == 0:
self.page = 1
self.ssm = ScreenShotMerge(self.page, self.over_flow_size)
self.timer = QTimer(self)
self.timer.timeout.connect(self.exe_command)
self.timer.setInterval(400)
self.timer.start() def exe_command(self):
if self.page > 0:
self.screen_shot()
self.run_js() elif self.page < 0:
self.timer.stop()
self.ssm.image_merge()
self.close() elif self.over_flow_size > 0:
self.screen_shot()
self.page -= 1 def run_js(self):
script = """
var scroll = function (dHeight) {
var t = document.documentElement.scrollTop
var h = document.documentElement.scrollHeight
dHeight = dHeight || 0
var current = t + dHeight
if (current > h) {
window.scrollTo(0, document.documentElement.clientHeight)
} else {
window.scrollTo(0, current)
}
}
"""
command = script + '\n scroll({})'.format(self.height())
self.browser.page().runJavaScript(command) def screen_shot(self):
screen = QApplication.primaryScreen()
winid = self.browser.winId()
pix = screen.grabWindow(int(winid))
name = '{}/temp.png'.format(self.ssm.root_path)
pix.save(name)
self.ssm.add_im(name) if __name__ == '__main__':
url = 'http://blog.sina.com.cn/lm/rank/focusbang//'
app = QApplication(sys.argv)
win = MainWindow()
win.urlScreenShot(url)
win.show()
app.exit(app.exec_())

Python实现网页截图(PyQT5)的更多相关文章

  1. Python中使用 Selenium 实现网页截图实例

    Selenium 是一个可以让浏览器自动化地执行一系列任务的工具,常用于自动化测试.不过,也可以用来给网页截图.目前,它支持 Java.C#.Ruby 以及 Python 四种客户端语言.如果你使用 ...

  2. Python各种花式截图工具,截到你手软

    前言: 最近,项目中遇到了一个关于实现通过给定URL,实现对网页屏幕进行截图的一个功能,前面代码中已经用python的第三方库实现了截图功能,但在上线以后出现了一些bug,所以就改bug的任务就落在了 ...

  3. Python下载网页的几种方法

    get和post方式总结 get方式:以URL字串本身传递数据参数,在服务器端可以从'QUERY_STRING'这个变量中直接读取,效率较高,但缺乏安全性,也无法来处理复杂的数据(只能是字符串,比如在 ...

  4. 使用PhantomJS实现网页截图服务

    这是上半年遇到的一个小需求,想实现网页的抓取,并保存为图片.研究了不少工具,效果都不理想,不是显示太差了(Canvas.Html2Image.Cobra),就是性能不怎么样(如SWT的Brower). ...

  5. html2canvas 网页截图 下载 上传

    利用html2canvas插件 对网页截图 并下载和上传图片. <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//E ...

  6. Python编写网页爬虫爬取oj上的代码信息

    OJ升级,代码可能会丢失. 所以要事先备份. 一開始傻傻的复制粘贴, 后来实在不能忍, 得益于大潇的启示和聪神的原始代码, 网页爬虫走起! 已经有段时间没看Python, 这次网页爬虫的原始代码是 p ...

  7. 爬虫学习笔记(1)-- 利用Python从网页抓取数据

    最近想从一个网站上下载资源,懒得一个个的点击下载了,想写一个爬虫把程序全部下载下来,在这里做一个简单的记录 Python的基础语法在这里就不多做叙述了,黑马程序员上有一个基础的视频教学,可以跟着学习一 ...

  8. iPhone 收藏网址[添加到书签] 和 [添加到主屏幕] 显示自定义图标,而不是网页截图

    iPhone 收藏网址[添加到书签] 和 [添加到主屏幕] 显示自定义图标,而不是网页截图: <!-- Safari浏览器[添加到书签] --> <link rel="sh ...

  9. chrome也可以整张网页截图,保存完整网页为图片

    转自:http://www.webkaka.com/blog/archives/chrome-save-a-webpage.html 关于浏览器截图,一直以为Chrome无能为力,最近发现,原来Chr ...

随机推荐

  1. 四则运算 SPEC 20160911

    本文档随时可能修改,并且没有另行通知. 请确保每一次在开始修改你的代码前,读标题中的日期,如果晚于你上次阅读, 请重读一次. 教师节你去探望初中数学老师,她感叹你当年真是个优秀学生啊,从来不报怨作 业 ...

  2. Daily Scrumming* 2015.12.10 今天集体请假一天

    今天由于所有成员均在进行编译原理实验的相关工作,全体请假一天.......

  3. Python 四种数值类型(int,long,float,complex)区别及转换

    Python支持四种不同的数值类型,包括int(整数)long(长整数)float(浮点实际值)complex (复数), 数字数据类型存储数值.他们是不可改变的数据类型,这意味着改变数字数据类型的结 ...

  4. final发布48小时用户调查报告

    小组名称:飞天小女警 项目名称:礼物挑选小工具 小组成员:沈柏杉(组长).程媛媛.杨钰宁.谭力铭 调查问卷标题:用户调查报告 调查目的:在final版本发布后的用户调查报告 调查问卷的数量:11 问卷 ...

  5. PPT 遥控器

    1. 下载 最新版本: 百度袋鼠输入: http://daishu.baidu.com/?from=pptweb 百度PPT遥控器:http://ppt.baidu.com/ 2. 安装过程忽略 3. ...

  6. 一条sql语句搞定基于mysql的sql执行顺序的基本理解

    对数据库基本操作是每个程序员基本功,如何理解并快速记住sql执行的顺序呢,其实一条复杂的sql就能搞定: SELECT DISTINCT <select_list> FROM <le ...

  7. maven基础知识汇总

    maven的dependency中scope=compile和provided的区别 对于scope=compile的情况(默认scope),也就是说这个项目在编译,测试,运行阶段都需要这个artif ...

  8. hihoCoder 1632 Secret Poems(ACM-ICPC北京赛区2017网络同步赛)

    时间限制:1000ms 单点时限:1000ms 内存限制:256MB 描述 The Yongzheng Emperor (13 December 1678 – 8 October 1735), was ...

  9. ubuntu中报错:无法分配内存 (errno=12)

    今天碰到一个大坑,差点要了老命! 之前装了ubuntu双系统,后来崩溃,想在就想装VMware虚拟机,再装ubuntu,一切进展顺利,直到在虚拟机的ubuntu中安装IDEA时出现了问题. 安装过程并 ...

  10. python_面向对象小试题

    打印啥? class Animal(object): hobby = "eat" def run(self): print(self.hobby) return self.hobb ...