'目前存在的BUG
'图片补丁存在多个URL
Private Declare Function URLDownloadToFile Lib "urlmon" Alias "URLDownloadToFileA" (ByVal pCaller As Long, ByVal szURL As String, ByVal szFileName As String, ByVal dwReserved As Long, ByVal lpfnCB As Long) As Long
Private Declare Function DeleteUrlCacheEntry Lib "wininet" Alias "DeleteUrlCacheEntryA" (ByVal lpszUrlName As String) As Long
Sub DownloadImageName(ByVal ImageURL As String, ByVal ImagePath As String)
Dim lngRetVal As Long
lngRetVal = URLDownloadToFile(0, ImageURL, ImagePath, 0, 0)
If lngRetVal = 0 Then
DeleteUrlCacheEntry ImageURL '清除缓存
'MsgBox "成功"
Else
'MsgBox "失败"
End If
End Sub
Sub LoopGetSubject()
Dim StartTime As Variant
Dim UsedTime As Variant
StartTime = VBA.Timer
Dim Sht As Worksheet
Set Sht = ThisWorkbook.ActiveSheet
With Sht
EndRow = .Cells(.Cells.Rows.Count, 1).End(xlUp).Row
For i = 2 To EndRow
SetFontRed .Cells(i, 1).Resize(1, 3)
FindText = Mid(.Cells(i, 3).Text, 4, Len(.Cells(i, 3).Text) - 8)
ExamUrl = .Cells(i, 2).Text
Call GetExamTextByUrl(ExamUrl, FindText)
Next i
End With
Set Sht = Nothing
UsedTime = VBA.Timer - StartTime
Debug.Print "UsedTime :" & Format(UsedTime, "#0.0000 Seconds")
MsgBox "UsedTime :" & Format(UsedTime, "#0.0000 Seconds")
End Sub
Sub GetSubject()
SetFontRed Application.ActiveCell
FindText = Mid(Application.ActiveCell.Text, 4, Len(Application.ActiveCell.Text) - 8)
ExamUrl = Application.ActiveCell.Offset(0, -1).Text
Call GetExamTextByUrl(ExamUrl, FindText)
End Sub
Sub GetExamTextByUrl(ByVal ExamUrl As String, ByVal FindText As String)
Dim Subject As String
Dim Question As String
Dim ImageURL As String
Dim Answer As String
Dim HasGetContent As Boolean
Dim docName As String
Dim docPath As String
Dim Independent As Boolean
Dim IsQuestion As Boolean
Dim IsAnswer As Boolean
Dim oneP As Object
Dim nextTag As Object 'send request
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", ExamUrl, False
.Send
WebText = .responsetext
'Debug.Print WebText
End With
With CreateObject("htmlfile")
.write WebText
Set examdiv = .getElementById("sina_keyword_ad_area2")
'获取试卷文本内容
ExamText = examdiv.innerText
'判断试卷是否含有独立答案
Independent = ExamText Like "*参考答案*"
'Debug.Print " Independent "; Independent
'设定搜集题目Word文档名称和路径
docName = Application.ActiveSheet.Name & "_题目搜集.doc"
docPath = ThisWorkbook.Path & "\" & docName
'判断某个段落是否为题目/答案的开始
IsQuestion = False
IsAnswer = False
'判断是否已经提取到内容
HasGetContent = False
'循环所有段落
For Each oneP In .getElementsByTagName("p")
If HasGetContent = False Then
'判断某段内容是否为题号行
If oneP.innerText Like "##.*" Or oneP.innerText Like "##.*" Then
Subject = ""
Question = ""
ImageURL = ""
Answer = ""
'开始记录题干内容
Subject = oneP.innerText
'Debug.Print OneP.innerText
Else
If InStr(oneP.innerText, FindText) = 0 Then
'过滤不相干的问题,仅保留符合条件的问题
If Not RegTest(oneP.innerText, "([\((]\d[\))]).*") Then
'继续记录问题内容
Subject = Subject & oneP.innerText
End If
End If
End If
'提取题目图片的地址
Set nextTag = oneP.NextSibling
If Not nextTag Is Nothing Then
If UCase(nextTag.tagName) = "A" Then
If nextTag.HasChildNodes Then
If nextTag.href Like "http://photo.blog.sina.com.cn/showpic.html*" Then
ImageURL = ImageURL & "|" & nextTag.FirstChild.getAttribute("real_src")
'Debug.Print ImageURL
End If
End If
End If
End If '提取题目的序号和问题的序号
If InStr(oneP.innerText, FindText) > 0 Then
SubjectIndex = RegGet(Subject, "(\d{1,2})[..].*")
Question = oneP.innerText
questionIndex = RegGet(Question, "[\((](\d)[\))].*")
'Debug.Print "题序:"; SubjectIndex; " 问序: "; questionIndex
HasGetContent = True
End If Else
'提取内容后 开始找答案
'试卷不含独立答案,答案就附在每道题后面
If Independent = False Then If IsAnswer = False Then
If RegTest(oneP.innerText, "[\((](" & questionIndex & ")[\))].*") Then
Answer = oneP.innerText
IsAnswer = True
'Exit For
End If
Else
Debug.Print oneP.innerText
If RegTest(oneP.innerText, "[\((](\d)[\))].*") Or RegTest(oneP.innerText, "(\d{1,2})[..].*") Then
Exit For
Else
Answer = Answer & oneP.innerText
End If
End If Else
'试卷还有独立参考答案
'判断某段内容的题号是否符合条件
If RegTest(oneP.innerText, "(" & SubjectIndex & ")[\..].*") Then
IsQuestion = True
'Debug.Print isQuestion
End If
If IsQuestion = True Then
'判断某段内容的问题序号是否符合条件
If IsAnswer = False Then
If RegTest(oneP.innerText, "([\((]" & questionIndex & "[\))]).*") Then
'记录问题答案
Answer = oneP.innerText
IsAnswer = True
'Exit For
End If
Else
Debug.Print oneP.innerText
If RegTest(oneP.innerText, "[\((](\d)[\))].*") Or RegTest(oneP.innerText, "(\d{1,2})[..].*") Then
Exit For
Else
Answer = Answer & oneP.innerText
End If
End If
End If
End If
End If
Next oneP
'图片地址处理
ImageURL = Mid(ImageURL, 2)
'测试
Debug.Print Subject
Debug.Print ImageURL
Debug.Print Question
Debug.Print Answer
End With '【补丁,有待改进】2017年下半年部分图片提取不到的问题修正
If Len(ImageURL) = 0 Then
hasimagetext = Split(WebText, FindText)(0)
hasimagetext = Split(hasimagetext, "real_src")(UBound(Split(hasimagetext, "real_src")))
ImageURL = Split(hasimagetext, """")(1)
End If '输出题目内容到Word文档
Dim wdApp As Object
Dim Doc As Object On Error Resume Next
Set wdApp = GetObject(, "Word.Application")
On Error GoTo 0
If Not wdApp Is Nothing Then
wdApp.Visible = True
On Error Resume Next
Set Doc = wdApp.documents(docName)
On Error GoTo 0
If Doc Is Nothing Then
Set Doc = wdApp.documents.Add()
Doc.SaveAs docPath
End If
Else
Set wdApp = CreateObject("Word.Application")
wdApp.Visible = True
Set Doc = wdApp.documents.Add()
Doc.SaveAs docPath
End If Doc.Activate
wdApp.Selection.EndKey 6
wdApp.Selection.TypeParagraph
wdApp.Selection.InsertBreak 7
'输出题干内容
wdApp.Selection.TypeText Text:=Subject
wdApp.Selection.TypeParagraph '下载图片并插入WORD文档
If ImageURL <> "" Then
If InStr(ImageURL, "|") = 0 Then
ImagePath = ThisWorkbook.Path & Application.PathSeparator & "tmp.jpg"
DownloadImageName ImageURL, ImagePath
wdApp.Selection.InlineShapes.AddPicture Filename:=ImagePath, LinkToFile:=False, SaveWithDocument:=True
wdApp.Selection.TypeParagraph
Kill ImagePath
'Stop
Else
ImageURLs = Split(ImageURL, "|")
For n = LBound(ImageURLs) To UBound(ImageURLs) Step 1
ImagePath = ThisWorkbook.Path & Application.PathSeparator & "tmp.jpg"
DownloadImageName ImageURL, ImagePath
wdApp.Selection.InlineShapes.AddPicture Filename:=ImagePath, LinkToFile:=False, SaveWithDocument:=True
wdApp.Selection.TypeParagraph
Kill ImagePath
Next n
End If
End If
'输出问题内容
wdApp.Selection.TypeText Text:=Question
wdApp.Selection.TypeParagraph
'输出答案内容
wdApp.Selection.TypeText Text:="【答案】" & Answer
wdApp.Selection.TypeParagraph
Set wdApp = Nothing
Set Doc = Nothing
Set oneP = Nothing
End Sub
Private Function RegTest(ByVal OrgText As String, ByVal Pattern As String) As Boolean
'传递参数 :原字符串, 匹配模式
Dim Regex As Object
Set Regex = CreateObject("VBScript.RegExp")
With Regex
.Global = True
.Pattern = Pattern
End With
RegTest = Regex.test(OrgText)
Set Regex = Nothing
End Function
Public Function RegGet(ByVal OrgText As String, ByVal Pattern As String) As String
'传递参数 :原字符串, 匹配模式
Dim Regex As Object
Dim Mh As Object
Set Regex = CreateObject("VBScript.RegExp")
With Regex
.Global = True
.Pattern = Pattern
End With
If Regex.test(OrgText) Then
Set Mh = Regex.Execute(OrgText)
RegGet = Mh.Item(0).submatches(0)
Else
RegGet = ""
End If
Set Regex = Nothing
End Function
Sub SetFontRed(ByVal Rng As Range)
With Rng.Font
.Color = -16776961
.TintAndShade = 0
End With
End Sub

  

2018-02-16 GetSameTypeQuestion的更多相关文章

  1. 5820. 【NOIP提高A组模拟2018.8.16】 非法输入(模拟,字符串)

    5820. [NOIP提高A组模拟2018.8.16] 非法输入 (File IO): input:aplusb.in output:aplusb.out Time Limits: 1000 ms   ...

  2. 2018.11.16 浪在ACM 集训队第五次测试赛

    2018.11.16 浪在ACM 集训队第五次测试赛 整理人:李继朋 Problem A : 参考博客:[1]朱远迪 Problem B : 参考博客: Problem C : 参考博客:[1]马鸿儒 ...

  3. 读书笔记-《Maven实战》-2018/4/16

    第一章:Maven简介 1:Maven:Maven原本的单词意思为"知识的积累",谷歌翻译为"行家",而作为Apache的开源项目,Maven是一个主要服务于基 ...

  4. 2018.6.16 PHP小实验

    PHP实验 实验一 <?php /** * Created by PhpStorm. * User: qichunlin * Date: 2018/5/17 * Time: 下午5:35 */ ...

  5. AtCoder Beginner Contest 100 2018/06/16

    A - Happy Birthday! Time limit : 2sec / Memory limit : 1000MB Score: 100 points Problem Statement E8 ...

  6. 【开发工具】- Idea.2018.02注册码激活

    1.从下面地址下载一个jar包,名称是  JetbrainsCrack-3.1-release-enc.jar 下载地址: 链接: https://pan.baidu.com/s/1VZjklI3qh ...

  7. 本周ASP.NET英文技术文章推荐[02/03 - 02/16]:MVC、Visual Studio 2008、安全性、性能、LINQ to JavaScript、jQuery...

    摘要 继续坚持,继续推荐.本期共有9篇文章: 最新的ASP.NET MVC框架开发计划 Visual Studio 2008 Web开发相关的Hotfix发布 ASP.NET安全性教程系列 ASP.N ...

  8. h5视频和音频 -2018/04/16

    HTML5 规定了一种通过 video 元素来包含视频的标准方法. 当前video元素支持的三种视频格式: (1)Ogg 带有Theora视频编码和Vorbis音频编码的ogg文件 (2)MPEG4带 ...

  9. 2018.02.12 noip模拟赛T2

    二兵的赌注 Description游戏中,二兵要进入了一家奇怪的赌场.赌场中有n个庄家,每个庄家都可以猜大猜小,猜一次一元钱.每一次开彩前,你都可以到任意个庄家那里下赌注.如果开彩结果是大,你就可以得 ...

  10. 【资料下载区】【iCore4相关代码、资料下载地址】更新日期2018/02/24

    [iCore4相关文档][更新中...] iCore4原理图(PDF)下载iCore4引脚注释(PDF)下载iCore4机械尺寸(PDF)下载 [iCore4相关例程代码][ARM] DEMO测试程序 ...

随机推荐

  1. if语句题目练习

    #print("您好") # # count=1 #count 表示计数 # while count<=8: # print("你是小学僧吗?") # p ...

  2. ODAC(V9.5.15) 学习笔记(四)TOraQuery (1)

    TOraQuery是ODAC中常用的一个组件,其继承关系如下: TDataSet ---TMemDataSet ---TCustomDADataSet ---TOraDataSet ---TCusto ...

  3. Bootstrap3基础 form-group 输入框之间出现间隔

      内容 参数   OS   Windows 10 x64   browser   Firefox 65.0.2   framework     Bootstrap 3.3.7   editor    ...

  4. 集训总结DAY.1(18.5.22)——KMP

    DAY 1——5.22 in the morning 依稀记得我们有一场contest. at night chf大佬讲KMP,先膜一波~~~ luoguP3375KMP模板题 KMP算法,又称模式匹 ...

  5. 题解——CF Manthan, Codefest 18 (rated, Div. 1 + Div. 2) T4(模拟)

    随便模拟下就过了qwq 然后忘了特判WA了QwQ #include <cstdio> #include <algorithm> #include <cstring> ...

  6. mybatis结合mysql批量操作及查询sql

    MySQL数据库 批量操作主要使用的是Mybatis的foreach,遍历参数列表执行相应的操作,所以批量插入/更新/删除的写法是类似的,只是SQL略有区别而已.MySql批量操作需要数据库连接配置a ...

  7. php的Allowed memory size of 134217728 bytes exhausted问题

    提示Allowed memory size of 134217728 bytes exhausted,出现这种错误的情况常见的有三种: 0:查询的数据量大. 1:数据量不大,但是php.ini配置的内 ...

  8. Maven Web项目解决跨域问题

    跨域问题目前笔者所用到的方案大致有三种:jsonp,SpringMVC 4以上注解方式和cros三方过滤器. Jsonp JSONP(JSON with Padding)是一个非官方的协议,它允许在服 ...

  9. MySql查询功能梳理

    CREATE DATABASE CristinMysql Create table employee( eId int(9) not null auto_increment, eName varcha ...

  10. Unicode转字符串

    /// <summary> /// Unicode转字符串 /// </summary> /// <returns>The to string.</retur ...