2018-02-16 GetSameTypeQuestion
'目前存在的BUG
'图片补丁存在多个URL
Private Declare Function URLDownloadToFile Lib "urlmon" Alias "URLDownloadToFileA" (ByVal pCaller As Long, ByVal szURL As String, ByVal szFileName As String, ByVal dwReserved As Long, ByVal lpfnCB As Long) As Long
Private Declare Function DeleteUrlCacheEntry Lib "wininet" Alias "DeleteUrlCacheEntryA" (ByVal lpszUrlName As String) As Long
Sub DownloadImageName(ByVal ImageURL As String, ByVal ImagePath As String)
Dim lngRetVal As Long
lngRetVal = URLDownloadToFile(0, ImageURL, ImagePath, 0, 0)
If lngRetVal = 0 Then
DeleteUrlCacheEntry ImageURL '清除缓存
'MsgBox "成功"
Else
'MsgBox "失败"
End If
End Sub
Sub LoopGetSubject()
Dim StartTime As Variant
Dim UsedTime As Variant
StartTime = VBA.Timer
Dim Sht As Worksheet
Set Sht = ThisWorkbook.ActiveSheet
With Sht
EndRow = .Cells(.Cells.Rows.Count, 1).End(xlUp).Row
For i = 2 To EndRow
SetFontRed .Cells(i, 1).Resize(1, 3)
FindText = Mid(.Cells(i, 3).Text, 4, Len(.Cells(i, 3).Text) - 8)
ExamUrl = .Cells(i, 2).Text
Call GetExamTextByUrl(ExamUrl, FindText)
Next i
End With
Set Sht = Nothing
UsedTime = VBA.Timer - StartTime
Debug.Print "UsedTime :" & Format(UsedTime, "#0.0000 Seconds")
MsgBox "UsedTime :" & Format(UsedTime, "#0.0000 Seconds")
End Sub
Sub GetSubject()
SetFontRed Application.ActiveCell
FindText = Mid(Application.ActiveCell.Text, 4, Len(Application.ActiveCell.Text) - 8)
ExamUrl = Application.ActiveCell.Offset(0, -1).Text
Call GetExamTextByUrl(ExamUrl, FindText)
End Sub
Sub GetExamTextByUrl(ByVal ExamUrl As String, ByVal FindText As String)
Dim Subject As String
Dim Question As String
Dim ImageURL As String
Dim Answer As String
Dim HasGetContent As Boolean
Dim docName As String
Dim docPath As String
Dim Independent As Boolean
Dim IsQuestion As Boolean
Dim IsAnswer As Boolean
Dim oneP As Object
Dim nextTag As Object 'send request
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", ExamUrl, False
.Send
WebText = .responsetext
'Debug.Print WebText
End With
With CreateObject("htmlfile")
.write WebText
Set examdiv = .getElementById("sina_keyword_ad_area2")
'获取试卷文本内容
ExamText = examdiv.innerText
'判断试卷是否含有独立答案
Independent = ExamText Like "*参考答案*"
'Debug.Print " Independent "; Independent
'设定搜集题目Word文档名称和路径
docName = Application.ActiveSheet.Name & "_题目搜集.doc"
docPath = ThisWorkbook.Path & "\" & docName
'判断某个段落是否为题目/答案的开始
IsQuestion = False
IsAnswer = False
'判断是否已经提取到内容
HasGetContent = False
'循环所有段落
For Each oneP In .getElementsByTagName("p")
If HasGetContent = False Then
'判断某段内容是否为题号行
If oneP.innerText Like "##.*" Or oneP.innerText Like "##.*" Then
Subject = ""
Question = ""
ImageURL = ""
Answer = ""
'开始记录题干内容
Subject = oneP.innerText
'Debug.Print OneP.innerText
Else
If InStr(oneP.innerText, FindText) = 0 Then
'过滤不相干的问题,仅保留符合条件的问题
If Not RegTest(oneP.innerText, "([\((]\d[\))]).*") Then
'继续记录问题内容
Subject = Subject & oneP.innerText
End If
End If
End If
'提取题目图片的地址
Set nextTag = oneP.NextSibling
If Not nextTag Is Nothing Then
If UCase(nextTag.tagName) = "A" Then
If nextTag.HasChildNodes Then
If nextTag.href Like "http://photo.blog.sina.com.cn/showpic.html*" Then
ImageURL = ImageURL & "|" & nextTag.FirstChild.getAttribute("real_src")
'Debug.Print ImageURL
End If
End If
End If
End If '提取题目的序号和问题的序号
If InStr(oneP.innerText, FindText) > 0 Then
SubjectIndex = RegGet(Subject, "(\d{1,2})[..].*")
Question = oneP.innerText
questionIndex = RegGet(Question, "[\((](\d)[\))].*")
'Debug.Print "题序:"; SubjectIndex; " 问序: "; questionIndex
HasGetContent = True
End If Else
'提取内容后 开始找答案
'试卷不含独立答案,答案就附在每道题后面
If Independent = False Then If IsAnswer = False Then
If RegTest(oneP.innerText, "[\((](" & questionIndex & ")[\))].*") Then
Answer = oneP.innerText
IsAnswer = True
'Exit For
End If
Else
Debug.Print oneP.innerText
If RegTest(oneP.innerText, "[\((](\d)[\))].*") Or RegTest(oneP.innerText, "(\d{1,2})[..].*") Then
Exit For
Else
Answer = Answer & oneP.innerText
End If
End If Else
'试卷还有独立参考答案
'判断某段内容的题号是否符合条件
If RegTest(oneP.innerText, "(" & SubjectIndex & ")[\..].*") Then
IsQuestion = True
'Debug.Print isQuestion
End If
If IsQuestion = True Then
'判断某段内容的问题序号是否符合条件
If IsAnswer = False Then
If RegTest(oneP.innerText, "([\((]" & questionIndex & "[\))]).*") Then
'记录问题答案
Answer = oneP.innerText
IsAnswer = True
'Exit For
End If
Else
Debug.Print oneP.innerText
If RegTest(oneP.innerText, "[\((](\d)[\))].*") Or RegTest(oneP.innerText, "(\d{1,2})[..].*") Then
Exit For
Else
Answer = Answer & oneP.innerText
End If
End If
End If
End If
End If
Next oneP
'图片地址处理
ImageURL = Mid(ImageURL, 2)
'测试
Debug.Print Subject
Debug.Print ImageURL
Debug.Print Question
Debug.Print Answer
End With '【补丁,有待改进】2017年下半年部分图片提取不到的问题修正
If Len(ImageURL) = 0 Then
hasimagetext = Split(WebText, FindText)(0)
hasimagetext = Split(hasimagetext, "real_src")(UBound(Split(hasimagetext, "real_src")))
ImageURL = Split(hasimagetext, """")(1)
End If '输出题目内容到Word文档
Dim wdApp As Object
Dim Doc As Object On Error Resume Next
Set wdApp = GetObject(, "Word.Application")
On Error GoTo 0
If Not wdApp Is Nothing Then
wdApp.Visible = True
On Error Resume Next
Set Doc = wdApp.documents(docName)
On Error GoTo 0
If Doc Is Nothing Then
Set Doc = wdApp.documents.Add()
Doc.SaveAs docPath
End If
Else
Set wdApp = CreateObject("Word.Application")
wdApp.Visible = True
Set Doc = wdApp.documents.Add()
Doc.SaveAs docPath
End If Doc.Activate
wdApp.Selection.EndKey 6
wdApp.Selection.TypeParagraph
wdApp.Selection.InsertBreak 7
'输出题干内容
wdApp.Selection.TypeText Text:=Subject
wdApp.Selection.TypeParagraph '下载图片并插入WORD文档
If ImageURL <> "" Then
If InStr(ImageURL, "|") = 0 Then
ImagePath = ThisWorkbook.Path & Application.PathSeparator & "tmp.jpg"
DownloadImageName ImageURL, ImagePath
wdApp.Selection.InlineShapes.AddPicture Filename:=ImagePath, LinkToFile:=False, SaveWithDocument:=True
wdApp.Selection.TypeParagraph
Kill ImagePath
'Stop
Else
ImageURLs = Split(ImageURL, "|")
For n = LBound(ImageURLs) To UBound(ImageURLs) Step 1
ImagePath = ThisWorkbook.Path & Application.PathSeparator & "tmp.jpg"
DownloadImageName ImageURL, ImagePath
wdApp.Selection.InlineShapes.AddPicture Filename:=ImagePath, LinkToFile:=False, SaveWithDocument:=True
wdApp.Selection.TypeParagraph
Kill ImagePath
Next n
End If
End If
'输出问题内容
wdApp.Selection.TypeText Text:=Question
wdApp.Selection.TypeParagraph
'输出答案内容
wdApp.Selection.TypeText Text:="【答案】" & Answer
wdApp.Selection.TypeParagraph
Set wdApp = Nothing
Set Doc = Nothing
Set oneP = Nothing
End Sub
Private Function RegTest(ByVal OrgText As String, ByVal Pattern As String) As Boolean
'传递参数 :原字符串, 匹配模式
Dim Regex As Object
Set Regex = CreateObject("VBScript.RegExp")
With Regex
.Global = True
.Pattern = Pattern
End With
RegTest = Regex.test(OrgText)
Set Regex = Nothing
End Function
Public Function RegGet(ByVal OrgText As String, ByVal Pattern As String) As String
'传递参数 :原字符串, 匹配模式
Dim Regex As Object
Dim Mh As Object
Set Regex = CreateObject("VBScript.RegExp")
With Regex
.Global = True
.Pattern = Pattern
End With
If Regex.test(OrgText) Then
Set Mh = Regex.Execute(OrgText)
RegGet = Mh.Item(0).submatches(0)
Else
RegGet = ""
End If
Set Regex = Nothing
End Function
Sub SetFontRed(ByVal Rng As Range)
With Rng.Font
.Color = -16776961
.TintAndShade = 0
End With
End Sub
2018-02-16 GetSameTypeQuestion的更多相关文章
- 5820. 【NOIP提高A组模拟2018.8.16】 非法输入(模拟,字符串)
5820. [NOIP提高A组模拟2018.8.16] 非法输入 (File IO): input:aplusb.in output:aplusb.out Time Limits: 1000 ms ...
- 2018.11.16 浪在ACM 集训队第五次测试赛
2018.11.16 浪在ACM 集训队第五次测试赛 整理人:李继朋 Problem A : 参考博客:[1]朱远迪 Problem B : 参考博客: Problem C : 参考博客:[1]马鸿儒 ...
- 读书笔记-《Maven实战》-2018/4/16
第一章:Maven简介 1:Maven:Maven原本的单词意思为"知识的积累",谷歌翻译为"行家",而作为Apache的开源项目,Maven是一个主要服务于基 ...
- 2018.6.16 PHP小实验
PHP实验 实验一 <?php /** * Created by PhpStorm. * User: qichunlin * Date: 2018/5/17 * Time: 下午5:35 */ ...
- AtCoder Beginner Contest 100 2018/06/16
A - Happy Birthday! Time limit : 2sec / Memory limit : 1000MB Score: 100 points Problem Statement E8 ...
- 【开发工具】- Idea.2018.02注册码激活
1.从下面地址下载一个jar包,名称是 JetbrainsCrack-3.1-release-enc.jar 下载地址: 链接: https://pan.baidu.com/s/1VZjklI3qh ...
- 本周ASP.NET英文技术文章推荐[02/03 - 02/16]:MVC、Visual Studio 2008、安全性、性能、LINQ to JavaScript、jQuery...
摘要 继续坚持,继续推荐.本期共有9篇文章: 最新的ASP.NET MVC框架开发计划 Visual Studio 2008 Web开发相关的Hotfix发布 ASP.NET安全性教程系列 ASP.N ...
- h5视频和音频 -2018/04/16
HTML5 规定了一种通过 video 元素来包含视频的标准方法. 当前video元素支持的三种视频格式: (1)Ogg 带有Theora视频编码和Vorbis音频编码的ogg文件 (2)MPEG4带 ...
- 2018.02.12 noip模拟赛T2
二兵的赌注 Description游戏中,二兵要进入了一家奇怪的赌场.赌场中有n个庄家,每个庄家都可以猜大猜小,猜一次一元钱.每一次开彩前,你都可以到任意个庄家那里下赌注.如果开彩结果是大,你就可以得 ...
- 【资料下载区】【iCore4相关代码、资料下载地址】更新日期2018/02/24
[iCore4相关文档][更新中...] iCore4原理图(PDF)下载iCore4引脚注释(PDF)下载iCore4机械尺寸(PDF)下载 [iCore4相关例程代码][ARM] DEMO测试程序 ...
随机推荐
- 【python39--面向对象组合】
一.组合 定义:当几个对象是水平方向的时候,就应该考虑组合,当对象是纵向的时候用继承,组合就是用一个类把2个平级层次的类放在一起,然后实例化就可以了 #现在定义一个类,叫水池,水池里面有鱼和乌龟cla ...
- EGIT
https://jingyan.baidu.com/article/64d05a0262f013de55f73bcc.html
- 几道cf水题
题意:给你包含n个元素的数组和k种元素,要求k种元素要用完,并且每种颜色至少用一次,n个元素,如果某几个元素的值相同,这些个元素也不能染成同一种元素. 思路:如果元素个数n小于k或者值相同的元素的个数 ...
- Python3基础 list list()生成空列表
Python : 3.7.0 OS : Ubuntu 18.04.1 LTS IDE : PyCharm 2018.2.4 Conda ...
- Vue学习【第四篇】:Vue 之webpack打包工具的使用
什么是webpack webpack是一个模块打包工具.用vue项目来举例:浏览器它是只认识js,不认识vue的.而我们写的代码后缀大多是.vue的,在每个.vue文件中都可能html.js.css甚 ...
- 集训DAYn——组合数学(1)
组合 又到了我们信息老师讲数学课了,吼吼吼 然后数学老师中途探望了一下,哇塞塞,然后他看到黑板上的题,微妙的笑了. 排列: 从n个数中有序的选出m个数的方案数是多少?第一个数有n种取法,第二个数有n- ...
- sql 指定数据库中的信息操作
查是否有该表名 SELECT * FROM sys.objects WHERE name='表名'查表字段的信息select * from syscolumns where id=Object_Id( ...
- P4238 【模板】多项式求逆
思路 多项式求逆就是对于一个多项式\(A(x)\),求一个多项式\(B(x)\),使得\(A(x)B(x) \equiv 1 \ (mod x^n)\) 假设现在多项式只有一项,显然\(B(x)\)的 ...
- 论文笔记:ReNet: A Recurrent Neural Network Based Alternative to Convolutional Networks
ReNet: A Recurrent Neural Network Based Alternative to Convolutional Networks2018-03-05 11:13:05 ...
- (zhuan) How to Train Neural Networks With Backpropagation
this blog from: http://blog.demofox.org/2017/03/09/how-to-train-neural-networks-with-backpropagation ...