'目前存在的BUG
'图片补丁存在多个URL
Private Declare Function URLDownloadToFile Lib "urlmon" Alias "URLDownloadToFileA" (ByVal pCaller As Long, ByVal szURL As String, ByVal szFileName As String, ByVal dwReserved As Long, ByVal lpfnCB As Long) As Long
Private Declare Function DeleteUrlCacheEntry Lib "wininet" Alias "DeleteUrlCacheEntryA" (ByVal lpszUrlName As String) As Long
Sub DownloadImageName(ByVal ImageURL As String, ByVal ImagePath As String)
Dim lngRetVal As Long
lngRetVal = URLDownloadToFile(0, ImageURL, ImagePath, 0, 0)
If lngRetVal = 0 Then
DeleteUrlCacheEntry ImageURL '清除缓存
'MsgBox "成功"
Else
'MsgBox "失败"
End If
End Sub
Sub LoopGetSubject()
Dim StartTime As Variant
Dim UsedTime As Variant
StartTime = VBA.Timer
Dim Sht As Worksheet
Set Sht = ThisWorkbook.ActiveSheet
With Sht
EndRow = .Cells(.Cells.Rows.Count, 1).End(xlUp).Row
For i = 2 To EndRow
SetFontRed .Cells(i, 1).Resize(1, 3)
FindText = Mid(.Cells(i, 3).Text, 4, Len(.Cells(i, 3).Text) - 8)
ExamUrl = .Cells(i, 2).Text
Call GetExamTextByUrl(ExamUrl, FindText)
Next i
End With
Set Sht = Nothing
UsedTime = VBA.Timer - StartTime
Debug.Print "UsedTime :" & Format(UsedTime, "#0.0000 Seconds")
MsgBox "UsedTime :" & Format(UsedTime, "#0.0000 Seconds")
End Sub
Sub GetSubject()
SetFontRed Application.ActiveCell
FindText = Mid(Application.ActiveCell.Text, 4, Len(Application.ActiveCell.Text) - 8)
ExamUrl = Application.ActiveCell.Offset(0, -1).Text
Call GetExamTextByUrl(ExamUrl, FindText)
End Sub
Sub GetExamTextByUrl(ByVal ExamUrl As String, ByVal FindText As String)
Dim Subject As String
Dim Question As String
Dim ImageURL As String
Dim Answer As String
Dim HasGetContent As Boolean
Dim docName As String
Dim docPath As String
Dim Independent As Boolean
Dim IsQuestion As Boolean
Dim IsAnswer As Boolean
Dim oneP As Object
Dim nextTag As Object 'send request
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", ExamUrl, False
.Send
WebText = .responsetext
'Debug.Print WebText
End With
With CreateObject("htmlfile")
.write WebText
Set examdiv = .getElementById("sina_keyword_ad_area2")
'获取试卷文本内容
ExamText = examdiv.innerText
'判断试卷是否含有独立答案
Independent = ExamText Like "*参考答案*"
'Debug.Print " Independent "; Independent
'设定搜集题目Word文档名称和路径
docName = Application.ActiveSheet.Name & "_题目搜集.doc"
docPath = ThisWorkbook.Path & "\" & docName
'判断某个段落是否为题目/答案的开始
IsQuestion = False
IsAnswer = False
'判断是否已经提取到内容
HasGetContent = False
'循环所有段落
For Each oneP In .getElementsByTagName("p")
If HasGetContent = False Then
'判断某段内容是否为题号行
If oneP.innerText Like "##.*" Or oneP.innerText Like "##.*" Then
Subject = ""
Question = ""
ImageURL = ""
Answer = ""
'开始记录题干内容
Subject = oneP.innerText
'Debug.Print OneP.innerText
Else
If InStr(oneP.innerText, FindText) = 0 Then
'过滤不相干的问题,仅保留符合条件的问题
If Not RegTest(oneP.innerText, "([\((]\d[\))]).*") Then
'继续记录问题内容
Subject = Subject & oneP.innerText
End If
End If
End If
'提取题目图片的地址
Set nextTag = oneP.NextSibling
If Not nextTag Is Nothing Then
If UCase(nextTag.tagName) = "A" Then
If nextTag.HasChildNodes Then
If nextTag.href Like "http://photo.blog.sina.com.cn/showpic.html*" Then
ImageURL = ImageURL & "|" & nextTag.FirstChild.getAttribute("real_src")
'Debug.Print ImageURL
End If
End If
End If
End If '提取题目的序号和问题的序号
If InStr(oneP.innerText, FindText) > 0 Then
SubjectIndex = RegGet(Subject, "(\d{1,2})[..].*")
Question = oneP.innerText
questionIndex = RegGet(Question, "[\((](\d)[\))].*")
'Debug.Print "题序:"; SubjectIndex; " 问序: "; questionIndex
HasGetContent = True
End If Else
'提取内容后 开始找答案
'试卷不含独立答案,答案就附在每道题后面
If Independent = False Then If IsAnswer = False Then
If RegTest(oneP.innerText, "[\((](" & questionIndex & ")[\))].*") Then
Answer = oneP.innerText
IsAnswer = True
'Exit For
End If
Else
Debug.Print oneP.innerText
If RegTest(oneP.innerText, "[\((](\d)[\))].*") Or RegTest(oneP.innerText, "(\d{1,2})[..].*") Then
Exit For
Else
Answer = Answer & oneP.innerText
End If
End If Else
'试卷还有独立参考答案
'判断某段内容的题号是否符合条件
If RegTest(oneP.innerText, "(" & SubjectIndex & ")[\..].*") Then
IsQuestion = True
'Debug.Print isQuestion
End If
If IsQuestion = True Then
'判断某段内容的问题序号是否符合条件
If IsAnswer = False Then
If RegTest(oneP.innerText, "([\((]" & questionIndex & "[\))]).*") Then
'记录问题答案
Answer = oneP.innerText
IsAnswer = True
'Exit For
End If
Else
Debug.Print oneP.innerText
If RegTest(oneP.innerText, "[\((](\d)[\))].*") Or RegTest(oneP.innerText, "(\d{1,2})[..].*") Then
Exit For
Else
Answer = Answer & oneP.innerText
End If
End If
End If
End If
End If
Next oneP
'图片地址处理
ImageURL = Mid(ImageURL, 2)
'测试
Debug.Print Subject
Debug.Print ImageURL
Debug.Print Question
Debug.Print Answer
End With '【补丁,有待改进】2017年下半年部分图片提取不到的问题修正
If Len(ImageURL) = 0 Then
hasimagetext = Split(WebText, FindText)(0)
hasimagetext = Split(hasimagetext, "real_src")(UBound(Split(hasimagetext, "real_src")))
ImageURL = Split(hasimagetext, """")(1)
End If '输出题目内容到Word文档
Dim wdApp As Object
Dim Doc As Object On Error Resume Next
Set wdApp = GetObject(, "Word.Application")
On Error GoTo 0
If Not wdApp Is Nothing Then
wdApp.Visible = True
On Error Resume Next
Set Doc = wdApp.documents(docName)
On Error GoTo 0
If Doc Is Nothing Then
Set Doc = wdApp.documents.Add()
Doc.SaveAs docPath
End If
Else
Set wdApp = CreateObject("Word.Application")
wdApp.Visible = True
Set Doc = wdApp.documents.Add()
Doc.SaveAs docPath
End If Doc.Activate
wdApp.Selection.EndKey 6
wdApp.Selection.TypeParagraph
wdApp.Selection.InsertBreak 7
'输出题干内容
wdApp.Selection.TypeText Text:=Subject
wdApp.Selection.TypeParagraph '下载图片并插入WORD文档
If ImageURL <> "" Then
If InStr(ImageURL, "|") = 0 Then
ImagePath = ThisWorkbook.Path & Application.PathSeparator & "tmp.jpg"
DownloadImageName ImageURL, ImagePath
wdApp.Selection.InlineShapes.AddPicture Filename:=ImagePath, LinkToFile:=False, SaveWithDocument:=True
wdApp.Selection.TypeParagraph
Kill ImagePath
'Stop
Else
ImageURLs = Split(ImageURL, "|")
For n = LBound(ImageURLs) To UBound(ImageURLs) Step 1
ImagePath = ThisWorkbook.Path & Application.PathSeparator & "tmp.jpg"
DownloadImageName ImageURL, ImagePath
wdApp.Selection.InlineShapes.AddPicture Filename:=ImagePath, LinkToFile:=False, SaveWithDocument:=True
wdApp.Selection.TypeParagraph
Kill ImagePath
Next n
End If
End If
'输出问题内容
wdApp.Selection.TypeText Text:=Question
wdApp.Selection.TypeParagraph
'输出答案内容
wdApp.Selection.TypeText Text:="【答案】" & Answer
wdApp.Selection.TypeParagraph
Set wdApp = Nothing
Set Doc = Nothing
Set oneP = Nothing
End Sub
Private Function RegTest(ByVal OrgText As String, ByVal Pattern As String) As Boolean
'传递参数 :原字符串, 匹配模式
Dim Regex As Object
Set Regex = CreateObject("VBScript.RegExp")
With Regex
.Global = True
.Pattern = Pattern
End With
RegTest = Regex.test(OrgText)
Set Regex = Nothing
End Function
Public Function RegGet(ByVal OrgText As String, ByVal Pattern As String) As String
'传递参数 :原字符串, 匹配模式
Dim Regex As Object
Dim Mh As Object
Set Regex = CreateObject("VBScript.RegExp")
With Regex
.Global = True
.Pattern = Pattern
End With
If Regex.test(OrgText) Then
Set Mh = Regex.Execute(OrgText)
RegGet = Mh.Item(0).submatches(0)
Else
RegGet = ""
End If
Set Regex = Nothing
End Function
Sub SetFontRed(ByVal Rng As Range)
With Rng.Font
.Color = -16776961
.TintAndShade = 0
End With
End Sub

  

2018-02-16 GetSameTypeQuestion的更多相关文章

  1. 5820. 【NOIP提高A组模拟2018.8.16】 非法输入(模拟,字符串)

    5820. [NOIP提高A组模拟2018.8.16] 非法输入 (File IO): input:aplusb.in output:aplusb.out Time Limits: 1000 ms   ...

  2. 2018.11.16 浪在ACM 集训队第五次测试赛

    2018.11.16 浪在ACM 集训队第五次测试赛 整理人:李继朋 Problem A : 参考博客:[1]朱远迪 Problem B : 参考博客: Problem C : 参考博客:[1]马鸿儒 ...

  3. 读书笔记-《Maven实战》-2018/4/16

    第一章:Maven简介 1:Maven:Maven原本的单词意思为"知识的积累",谷歌翻译为"行家",而作为Apache的开源项目,Maven是一个主要服务于基 ...

  4. 2018.6.16 PHP小实验

    PHP实验 实验一 <?php /** * Created by PhpStorm. * User: qichunlin * Date: 2018/5/17 * Time: 下午5:35 */ ...

  5. AtCoder Beginner Contest 100 2018/06/16

    A - Happy Birthday! Time limit : 2sec / Memory limit : 1000MB Score: 100 points Problem Statement E8 ...

  6. 【开发工具】- Idea.2018.02注册码激活

    1.从下面地址下载一个jar包,名称是  JetbrainsCrack-3.1-release-enc.jar 下载地址: 链接: https://pan.baidu.com/s/1VZjklI3qh ...

  7. 本周ASP.NET英文技术文章推荐[02/03 - 02/16]:MVC、Visual Studio 2008、安全性、性能、LINQ to JavaScript、jQuery...

    摘要 继续坚持,继续推荐.本期共有9篇文章: 最新的ASP.NET MVC框架开发计划 Visual Studio 2008 Web开发相关的Hotfix发布 ASP.NET安全性教程系列 ASP.N ...

  8. h5视频和音频 -2018/04/16

    HTML5 规定了一种通过 video 元素来包含视频的标准方法. 当前video元素支持的三种视频格式: (1)Ogg 带有Theora视频编码和Vorbis音频编码的ogg文件 (2)MPEG4带 ...

  9. 2018.02.12 noip模拟赛T2

    二兵的赌注 Description游戏中,二兵要进入了一家奇怪的赌场.赌场中有n个庄家,每个庄家都可以猜大猜小,猜一次一元钱.每一次开彩前,你都可以到任意个庄家那里下赌注.如果开彩结果是大,你就可以得 ...

  10. 【资料下载区】【iCore4相关代码、资料下载地址】更新日期2018/02/24

    [iCore4相关文档][更新中...] iCore4原理图(PDF)下载iCore4引脚注释(PDF)下载iCore4机械尺寸(PDF)下载 [iCore4相关例程代码][ARM] DEMO测试程序 ...

随机推荐

  1. 牛客网数据库SQL实战(16-20)

    16.统计出当前各个title类型对应的员工当前薪水对应的平均工资.结果给出title以及平均工资avg.CREATE TABLE `salaries` (`emp_no` int(11) NOT N ...

  2. Android灯光系统_编写HAL_lights.c【转】

    本文转载自:https://blog.csdn.net/qq_33443989/article/details/77074411 1>. 编写灯光系统的HAL层 之 HAL_light.c1&l ...

  3. 三星固态硬盘ssd产品线收集

    目录 三星ssd按时间展示: 三星ssd后缀带a与不带a的区别,举其中一例: 下面是从三星中国的官网截图的产品线: 产品线列表: 总结: 参考: 最近在淘宝看到了很多拆机ssd,三星作为世界上唯一一家 ...

  4. 马虎的算式|2013年蓝桥杯B组题解析第二题-fishers

    小明是个急性子,上小学的时候经常把老师写在黑板上的题目抄错了. 有一次,老师出的题目是:36 x 495 = ? 他却给抄成了:396 x 45 = ? 但结果却很戏剧性,他的答案竟然是对的!! 假设 ...

  5. (转)Jenkins持续集成

    (二期)14.持续集成工具jenkins [课程14]持续集...概念.xmind0.6MB [课程14]持续集成...kins.xmind43.3KB [课程14预习]持续...kins.xmind ...

  6. methods 方法选项

    最简单的使用方法,一个数字,每点击一下按钮加1 html <div id="app"> <span v-text="number">&l ...

  7. js点击显示隐藏

    这个栗子…… 可以不吃,先预设一个变量表示div的状态,例子中0是显示的,一开始是隐藏的.当点击时判断状态是显示0的还是隐藏1的:如果是显示的就把div隐藏,再把变量改变为1.再次点击时把会判断到变量 ...

  8. 小程序学习一 .json 文件配置

    微信小程序——配置 以下就是小编对小程序配置的资料进行的系统的整理,希望能对开发者有帮助. 我们使用app.json文件来对微信小程序进行全局配置,决定页面文件的路径.窗口表现.设置网络超时时间.设置 ...

  9. 用tsMuxeR GUI给ts视频添加音轨

    收藏比赛的都应该知道,高清的直播流录制了后一般是ts或者mkv封装,前者用tsMuxeR GUI可以对视频音频轨进行操作,后者用mkvtoolnix,两者都是无损操作. 至于其他格式就不考虑了,随便用 ...

  10. Spring boot2.0 设置文件上传大小限制

    今天把Spring boot版本升级到了2.0后,发现原来的文件上传大小限制设置不起作用了,原来的application.properties设置如下: spring.http.multipart.m ...