COM-IE-(2)
# -*- coding:UTF-8 -*-
import sys
from time import sleep
import win32com.client
from win32com.client import DispatchEx stdin, stdout, stderr = sys.stdin, sys.stdout, sys.stderr
reload(sys)
sys.setdefaultencoding("utf-8")
sys.stdin, sys.stdout, sys.stderr = stdin, stdout, stderr class COM_IE:
def __init__(self,url=None):
self.url = url
self.Visible = 1
self.ie = self.openIE(url)
self.document = ""
self.text = ""
self.charset = None def ExistIE(self,url):
ShellWindowsCLSID = '{9BA05972-F6A8-11CF-A442-00A0C90A8F39}'
ies=DispatchEx(ShellWindowsCLSID)
if len(ies)==0:
return None
for ie in ies:
if ie.LocationURL==url:
return ie
return None def NewIE(self,url):
ie = DispatchEx("InternetExplorer.Application")
ie.Visible = self.Visible
ie.Navigate(url)
return ie def openIE(self,url):
ie = self.ExistIE(url)
if ie==None:
ie = self.NewIE(url)
return ie def WaitIE(self):
# while self.ie.Busy:
# leep(1)
while 1:
state = self.ie.ReadyState
if state ==4:
# print "load done..."
self.charset = self.ie.Document.charset
self.document = self.ie.Document.body.innerHTML
self.text = self.ie.Document.body.innerText
break
sleep(1) def Visible(self):
self.ie.Visible = self.Visible def GetBody(self):
self.WaitIE()
return self.ie.Document.body def GetNodes(self,parentNode,tag):
"""
>>> coldiv=GetNodes(body,"div")
"""
childNodes=[]
for childNode in parentNode.getElementsByTagName(tag):
childNodes.append(childNode)
return childNodes def NodeByAttr(self,Nodes,nodeattr,nodeval):
"""
>>> div_id_editor=NodeByAttr(coldiv,"id","editor_ifr")
"""
for node in Nodes:
if str(node.getAttribute(nodeattr))==nodeval:
return node
return None def SetNodeHtml(self,body,node_type,node_attr,node_attr_val,node_inner_html):
tags = self.GetNodes(body,node_type)
node = self.NodeByAttr(tags,node_attr,node_attr_val)
node.innerHTML = node_inner_html def SetNodeVal(self,body,node_type,node_attr,node_attr_val,node_value):
tags = self.GetNodes(body,node_type)
node = self.NodeByAttr(tags,node_attr,node_attr_val)
node.value = node_value def NodeClick(self,body,node_type,node_attr,node_attr_val):
tags = self.GetNodes(body,node_type)
node = self.NodeByAttr(tags,node_attr,node_attr_val)
node.click() def GetNodeHtml(self,body,node_type,node_attr,node_attr_val):
tags = self.GetNodes(body,node_type)
node = self.NodeByAttr(tags,node_attr,node_attr_val)
html = node.innerHTML
return html def GetNodeVal(self,body,node_type,node_attr,node_attr_val):
tags = self.GetNodes(body,node_type)
node = self.NodeByAttr(tags,node_attr,node_attr_val)
value = node.value
return value #mutiple nodes
def NodesByAttr(self,Nodes,nodeattr=None,nodeval=None):
"""
>>> div_id_editor=NodeByAttr(coldiv,"id","editor_ifr")
"""
value_list = []
for node in Nodes:
# print node.nodeType,node.nodeName #,node.getAttribute("id"),node.innerText
value_dict = {}
if not nodeattr:
nodeattr_list = ["id","nodeName","nodeType","nodeValue","className",
"innerHTML","innerText","href","name","title","type","value"]
for attr in nodeattr_list:
value_dict[attr] = node.getAttribute(attr)
value_list.append(value_dict)
else:
if not nodeval:
value_dict[nodeattr] = node.getAttribute(nodeattr)
value_list.append(value_dict)
else:
if str(node.getAttribute(nodeattr))==nodeval:
value_dict[nodeattr] = node.getAttribute(nodeattr)
value_list.append(value_dict)
return value_list #mutiple nodes
def GetNodesVal(self,body,node_type,node_attr=None,node_val=None): # print '*'*50
tags = self.GetNodes(body,node_type)
value_list = self.NodesByAttr(tags,node_attr,node_val)
return value_list def Quit(self):
self.ie.Quit() if __name__=="__main__": url = "http://blog.csdn.net/agoago_2009/"
IE = COM_IE(url)
BODY = IE.GetBody() # a_list = IE.GetNodesVal(BODY,"a","href")
a_list = IE.GetNodesVal(BODY,"a")
for a in a_list:
print a.get("innerText"),a.get("href") '''
IE.SetNodeVal(BODY,"input","id","inputSearch","COM")
IE.NodeClick(BODY,"input","id","btnSubmit") IE.WaitIE()
print IE.document.strip()[:100]
print IE.charset
print IE.text.strip()[:100]
''' raw_input('quit')
IE.Quit()
随机推荐
- ruby 异常处理 begin rescue end
begin 代码1 rescue 代码 end 如果执行 代码1 发生异常 则转至 代码2 若正常,则执行完跳出
- 一些神奇的JS功效
1: 沉睡排序 var numbers=[1,2,3,4,5,5,99,4,20,11,200]; numbers.forEach((num)=>{ setTimeout(()=>{ co ...
- [置顶]【实用 .NET Core开发系列】- 导航篇
前言 此系列从出发点来看,是 上个系列的续篇, 上个系列因为后面工作的原因,后面几篇没有写完,后来.NET Core出来之后,注意力就转移到了.NET Core上,所以再也就没有继续下去,此是原因之一 ...
- WebService--jax
使用javax.jws编写webservice服务: 服务端: 1,定义webservice接口: package com.jws.serviceInterface; import javax.jws ...
- hash算法与hashmap
参考博客: http://zha-zi.iteye.com/blog/1124484 http://www.cnblogs.com/dolphin0520/p/3681042.html(参考了hash ...
- app.config 配置多项 配置集合 自定义配置
C#程序的配置文件,使用的最多的是appSettings 下的<add key="Interval" value="30"/>,这种配置单项的很方便 ...
- Composer笔记
安装 windows中安装Composer 一般来说,windows下安装composer有两种办法,一种是直接下载并运行Composer-Setup.exe,这种方法在中国似乎很难完成安装.另一种就 ...
- 一位有着工匠精神的博主写的关于IEnumerable接口的详细解析
在此,推荐一位有着工匠精神的博主写的一篇关于IEnumerable接口的深入解析的文章:http://www.cnblogs.com/zhaopei/p/5769782.html#autoid-0-0 ...
- Mysql--单表数据记录查询
1.简单数据记录查询 1.1 简单数据查询 1.1.1 查询所有字段数据 例子: 1.1.2 "*"符号的使用 语法: 例子: 1.1.3 查询指定字段数据 例子: ...
- 外部地址访问xampp
默认情况下xampp只能访问本地服务器的地址.即localhost如果需要在外部机器访问XAMPP,则需要修改配置:找到xampp的文件夹,找到apache文件夹中的conf->extra-&g ...