# -*- coding:UTF-8 -*-
import sys
from time import sleep
import win32com.client
from win32com.client import DispatchEx stdin, stdout, stderr = sys.stdin, sys.stdout, sys.stderr
reload(sys)
sys.setdefaultencoding("utf-8")
sys.stdin, sys.stdout, sys.stderr = stdin, stdout, stderr class COM_IE:
def __init__(self,url=None):
self.url = url
self.Visible = 1
self.ie = self.openIE(url)
self.document = ""
self.text = ""
self.charset = None def ExistIE(self,url):
ShellWindowsCLSID = '{9BA05972-F6A8-11CF-A442-00A0C90A8F39}'
ies=DispatchEx(ShellWindowsCLSID)
if len(ies)==0:
return None
for ie in ies:
if ie.LocationURL==url:
return ie
return None def NewIE(self,url):
ie = DispatchEx("InternetExplorer.Application")
ie.Visible = self.Visible
ie.Navigate(url)
return ie def openIE(self,url):
ie = self.ExistIE(url)
if ie==None:
ie = self.NewIE(url)
return ie def WaitIE(self):
# while self.ie.Busy:
# leep(1)
while 1:
state = self.ie.ReadyState
if state ==4:
# print "load done..."
self.charset = self.ie.Document.charset
self.document = self.ie.Document.body.innerHTML
self.text = self.ie.Document.body.innerText
break
sleep(1) def Visible(self):
self.ie.Visible = self.Visible def GetBody(self):
self.WaitIE()
return self.ie.Document.body def GetNodes(self,parentNode,tag):
"""
>>> coldiv=GetNodes(body,"div")
"""
childNodes=[]
for childNode in parentNode.getElementsByTagName(tag):
childNodes.append(childNode)
return childNodes def NodeByAttr(self,Nodes,nodeattr,nodeval):
"""
>>> div_id_editor=NodeByAttr(coldiv,"id","editor_ifr")
"""
for node in Nodes:
if str(node.getAttribute(nodeattr))==nodeval:
return node
return None def SetNodeHtml(self,body,node_type,node_attr,node_attr_val,node_inner_html):
tags = self.GetNodes(body,node_type)
node = self.NodeByAttr(tags,node_attr,node_attr_val)
node.innerHTML = node_inner_html def SetNodeVal(self,body,node_type,node_attr,node_attr_val,node_value):
tags = self.GetNodes(body,node_type)
node = self.NodeByAttr(tags,node_attr,node_attr_val)
node.value = node_value def NodeClick(self,body,node_type,node_attr,node_attr_val):
tags = self.GetNodes(body,node_type)
node = self.NodeByAttr(tags,node_attr,node_attr_val)
node.click() def GetNodeHtml(self,body,node_type,node_attr,node_attr_val):
tags = self.GetNodes(body,node_type)
node = self.NodeByAttr(tags,node_attr,node_attr_val)
html = node.innerHTML
return html def GetNodeVal(self,body,node_type,node_attr,node_attr_val):
tags = self.GetNodes(body,node_type)
node = self.NodeByAttr(tags,node_attr,node_attr_val)
value = node.value
return value #mutiple nodes
def NodesByAttr(self,Nodes,nodeattr=None,nodeval=None):
"""
>>> div_id_editor=NodeByAttr(coldiv,"id","editor_ifr")
"""
value_list = []
for node in Nodes:
# print node.nodeType,node.nodeName #,node.getAttribute("id"),node.innerText
value_dict = {}
if not nodeattr:
nodeattr_list = ["id","nodeName","nodeType","nodeValue","className",
"innerHTML","innerText","href","name","title","type","value"]
for attr in nodeattr_list:
value_dict[attr] = node.getAttribute(attr)
value_list.append(value_dict)
else:
if not nodeval:
value_dict[nodeattr] = node.getAttribute(nodeattr)
value_list.append(value_dict)
else:
if str(node.getAttribute(nodeattr))==nodeval:
value_dict[nodeattr] = node.getAttribute(nodeattr)
value_list.append(value_dict)
return value_list #mutiple nodes
def GetNodesVal(self,body,node_type,node_attr=None,node_val=None): # print '*'*50
tags = self.GetNodes(body,node_type)
value_list = self.NodesByAttr(tags,node_attr,node_val)
return value_list def Quit(self):
self.ie.Quit() if __name__=="__main__": url = "http://blog.csdn.net/agoago_2009/"
IE = COM_IE(url)
BODY = IE.GetBody() # a_list = IE.GetNodesVal(BODY,"a","href")
a_list = IE.GetNodesVal(BODY,"a")
for a in a_list:
print a.get("innerText"),a.get("href") '''
IE.SetNodeVal(BODY,"input","id","inputSearch","COM")
IE.NodeClick(BODY,"input","id","btnSubmit") IE.WaitIE()
print IE.document.strip()[:100]
print IE.charset
print IE.text.strip()[:100]
''' raw_input('quit')
IE.Quit()

随机推荐

  1. 修改oracle服务器端字符集

    ----设置字符集步聚------- conn /as sysdba; shutdown immediate; startup mount; alter system enable restricte ...

  2. C#截取当前活动窗体的图片

    using System; using System.Collections.Generic; using System.ComponentModel; using System.Data; usin ...

  3. [转载] Java NIO教程

    转载自并发编程网 – ifeve.com http://ifeve.com/java-nio-all/ 关于通道(Channels).缓冲区(Buffers).选择器(Selectors)的故事. 从 ...

  4. 基于MyBatis的数据服务接口

    背景 作为软件系统开发,数据操作是系统开发不可避免的一个重要组成部分.因为其重要性围绕着数据操作也出现了众多框架.成熟框架是为了普适众多数据操作要求的,因此为了更好的实现技术落地,需要对框架进行丰富和 ...

  5. Python 面向对象(一) 基础

    Python 中一切皆对象 什么是面向对象? 面向对象就是将一些事物的共有特征抽象成类,从类来创建实例. 类class 可以理解为模版 比如人类,都具有身高.体重.年龄.性别.籍贯...等属性,但属性 ...

  6. P2915 [USACO08NOV]奶牛混合起来Mixed Up Cows

    题目描述 约翰家有N头奶牛,第i头奶牛的编号是Si,每头奶牛的编号都是唯一的.这些奶牛最近 在闹脾气,为表达不满的情绪,她们在挤奶的时候一定要排成混乱的队伍.在一只混乱的队 伍中,相邻奶牛的编号之差均 ...

  7. ThinkPHP中处理验证码不显示问题

    在调用验证码之前加上 ob_clean(); 不显示验证码的代码: public function verify(){               $Verify = new \Think\Verif ...

  8. Class 与 Style 绑定

    将 v-bind 用于 class 和 style 时,Vue.js 做了专门的增强.表达式结果的类型除了字符串之外,还可以是对象或数组. 绑定 HTML Class 对象语法 <div cla ...

  9. 在.NET Core类库中使用EF Core迁移数据库到SQL Server

    前言 如果大家刚使用EntityFramework Core作为ORM框架的话,想必都会遇到数据库迁移的一些问题. 起初我是在ASP.NET Core的Web项目中进行的,但后来发现放在此处并不是很合 ...

  10. Nginx的知识分享,感兴趣的可以看一下

    我干了五年了,我想把我的经验跟大家分享一下,欢迎大家阅读. 1. Nginx入门简介 WEB服务器也称为WWW(WORLD WIDE WEB)服务器,主要功能是提供网上信息浏览服务. WWW 是 In ...