(为编写完善能拿下来数据)

企查查代码数据如下:

 #encoding:utf-8
import requests
from lxml import etree
import random
import re
#目标采集地址
base_url1='http://m.qichacha.com'
base_url='https://m.qichacha.com/search?key=' user_agent=[
"Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.11 TaoBrowser/2.0 Safari/536.11",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER",
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; LBBROWSER) ",
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E; LBBROWSER)",
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; QQBrowser/7.0.3698.400)",
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E) ",
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 SE 2.X MetaSr 1.0",
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SV1; QQDownload 732; .NET4.0C; .NET4.0E; SE 2.X MetaSr 1.0) ",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Maxthon/4.4.3.4000 Chrome/30.0.1599.101 Safari/537.36",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 UBrowser/4.0.3214.0 Safari/537.36",
]
cookie=[
'UM_distinctid=16518280d1e3e5-00788a93df88f7-5b193413-1fa400-16518280d1f949; zg_did=%7B%22did%22%3A%20%2216518280dd3b45-0da4e4ab13f793-5b193413-1fa400-16518280dd51f6%22%7D; acw_tc=7b81f49815356947470295339e1fc37a590000ea6190b3cf75ab42853b; PHPSESSID=4l787fmr2v90mh2khj8n6n64l5; CNZZDATA1254842228=226405416-1535690886-null%7C1535690886; Hm_lvt_3456bee468c83cc63fb5147f119f1075=1535595956,1535618789,1535618810,1535694746; zg_de1d1a35bfa24ce29bbf2c7eb17e6c4f=%7B%22sid%22%3A%201535694746927%2C%22updated%22%3A%201535695379242%2C%22info%22%3A%201535595953976%2C%22superProperty%22%3A%20%22%7B%7D%22%2C%22platform%22%3A%20%22%7B%7D%22%2C%22utm%22%3A%20%22%7B%7D%22%2C%22referrerDomain%22%3A%20%22m.baidu.com%22%2C%22cuid%22%3A%20%227d775544e16a1cc0d0ab63b42b4b8aef%22%7D; Hm_lpvt_3456bee468c83cc63fb5147f119f1075=1535695379',
'UM_distinctid=16518280d1e3e5-00788a93df88f7-5b193413-1fa400-16518280d1f949; zg_did=%7B%22did%22%3A%20%2216518280dd3b45-0da4e4ab13f793-5b193413-1fa400-16518280dd51f6%22%7D; acw_tc=7b81f49815356947470295339e1fc37a590000ea6190b3cf75ab42853b; PHPSESSID=4l787fmr2v90mh2khj8n6n64l5; CNZZDATA1254842228=226405416-1535690886-null%7C1535690886; Hm_lvt_3456bee468c83cc63fb5147f119f1075=1535595956,1535618789,1535618810,1535694746; zg_de1d1a35bfa24ce29bbf2c7eb17e6c4f=%7B%22sid%22%3A%201535694746927%2C%22updated%22%3A%201535695791508%2C%22info%22%3A%201535595953976%2C%22superProperty%22%3A%20%22%7B%7D%22%2C%22platform%22%3A%20%22%7B%7D%22%2C%22utm%22%3A%20%22%7B%7D%22%2C%22referrerDomain%22%3A%20%22m.baidu.com%22%2C%22cuid%22%3A%20%227d775544e16a1cc0d0ab63b42b4b8aef%22%7D; Hm_lpvt_3456bee468c83cc63fb5147f119f1075=1535695792',
'UM_distinctid=16518280d1e3e5-00788a93df88f7-5b193413-1fa400-16518280d1f949; zg_did=%7B%22did%22%3A%20%2216518280dd3b45-0da4e4ab13f793-5b193413-1fa400-16518280dd51f6%22%7D; acw_tc=7b81f49815356947470295339e1fc37a590000ea6190b3cf75ab42853b; PHPSESSID=4l787fmr2v90mh2khj8n6n64l5; CNZZDATA1254842228=226405416-1535690886-null%7C1535690886; Hm_lvt_3456bee468c83cc63fb5147f119f1075=1535595956,1535618789,1535618810,1535694746; zg_de1d1a35bfa24ce29bbf2c7eb17e6c4f=%7B%22sid%22%3A%201535694746927%2C%22updated%22%3A%201535695924595%2C%22info%22%3A%201535595953976%2C%22superProperty%22%3A%20%22%7B%7D%22%2C%22platform%22%3A%20%22%7B%7D%22%2C%22utm%22%3A%20%22%7B%7D%22%2C%22referrerDomain%22%3A%20%22m.baidu.com%22%2C%22cuid%22%3A%20%227d775544e16a1cc0d0ab63b42b4b8aef%22%7D; Hm_lpvt_3456bee468c83cc63fb5147f119f1075=1535695925',
'UM_distinctid=16518280d1e3e5-00788a93df88f7-5b193413-1fa400-16518280d1f949; zg_did=%7B%22did%22%3A%20%2216518280dd3b45-0da4e4ab13f793-5b193413-1fa400-16518280dd51f6%22%7D; acw_tc=7b81f49815356947470295339e1fc37a590000ea6190b3cf75ab42853b; PHPSESSID=4l787fmr2v90mh2khj8n6n64l5; CNZZDATA1254842228=226405416-1535690886-null%7C1535690886; Hm_lvt_3456bee468c83cc63fb5147f119f1075=1535595956,1535618789,1535618810,1535694746; zg_de1d1a35bfa24ce29bbf2c7eb17e6c4f=%7B%22sid%22%3A%201535694746927%2C%22updated%22%3A%201535696003819%2C%22info%22%3A%201535595953976%2C%22superProperty%22%3A%20%22%7B%7D%22%2C%22platform%22%3A%20%22%7B%7D%22%2C%22utm%22%3A%20%22%7B%7D%22%2C%22referrerDomain%22%3A%20%22m.baidu.com%22%2C%22cuid%22%3A%20%227d775544e16a1cc0d0ab63b42b4b8aef%22%7D; Hm_lpvt_3456bee468c83cc63fb5147f119f1075=1535696005'
]
# 请求头设置
headers={
'User-agent': random.choice(user_agent),
'cookie': random.choice(cookie)
} name_list=['成都创信广告有限公司'] for name in name_list:
start_url=base_url+str(name)
print(start_url)
response = requests.get(start_url, headers=headers)
_response=response.text
# print(_response)
# content = etree.HTML(_response)
# print(content)
#获取筛选信息链接
search_url=re.findall('</div> <a href="(.*?)" class="a-decoration"> <div class="list-item"> <div class="list-item-top">',_response)
url=base_url1+search_url[0]
# print(url)
# print('*'*100)
response1 = requests.get(url,headers=headers)
_response1=response1.text
#公司名称
company_name=re.findall('<div class="company-name">(.*?)<',_response1)[0]
print('公司名称:'+company_name)
#法人
legal_person=re.findall('<a class="oper" href=".*?">(.*?)</a>',_response1)[0]
print('法人:'+legal_person)
#电话
telephone=re.findall('<a href="tel:.*?" class="phone a-decoration">(.*?)</a>',_response1)[0]
print('电话:'+telephone)
# #地址
# address=re.findall('</div> <div class="address">(.*?)</div> </div>',_response1)[0]
# print(address)
# # print('地址:'+address) # #注册号
# registration_number=re.findall('</div><div class="basic-item-right">(.*?)</div>',_response1)
# print(registration_number)
执行结果如下图:

6.requests编写企查查爬虫的更多相关文章

  1. Python爬虫爬企查查数据

    因为制作B2b网站需要,需要入库企业信息数据.所以目光锁定企查查数据,废话不多说,开干! #-*- coding-8 -*- import requests import lxml import sy ...

  2. 如何用python无账号无限制获取企查查信息

    前言 文的文字及图片来源于网络,仅供学习.交流使用,不具有任何商业用途,版权归原作者所有,如有问题请及时联系我们以作处理. PS:如有需要Python学习资料的小伙伴可以加点击下方链接自行获取http ...

  3. 12.通过微信小程序端访问企查查(采集工商信息)

    需要注意的问题: 一.1.微信端访问企查查小程序需要登录.2.访问抓包获取的url是有时效性的过一段时间就不能用了. http://xcx.qichacha.com/wxa/v1/base/getEn ...

  4. 11.采集手机端app企查查上司公司数据(未成功)

    ---恢复内容开始--- 采集企查查手机端app数据: 1.首先手机端安装app并usb连接电脑端,fiddler监控手机请求数据对数据进行分析抓取. 手机端界面与fiddler界面参照: 2.对获取 ...

  5. 企查查app新增企业数据抓取

    企查查每日新增企业数据抓取尚未完成的工作: 需要自行抓包获取设备id,appid,sign等等 sign和时间戳保持一致即可 把所有的数据库.redis配置 无法自动登录,账号需要独立 redis数据 ...

  6. XPath2Doc,一个半自动采集网页生成Word Docx文件的工具,带企查查和天眼查模板

    原始出处:https://www.cnblogs.com/Charltsing/p/XPath2Doc.html 很多人需要从网站采集一些数据填写Word模板,手工操作费时费力还容易出错,所以我给朋友 ...

  7. Nebula Graph 在企查查的应用

    本文首发于 Nebula Graph Community 公众号 背景 企查查是企查查科技有限公司旗下的一款企业信用查询工具,旨在为用户提供快速查询企业工商信息.法院判决信息.关联企业信息.法律诉讼. ...

  8. Python 利用Python编写简单网络爬虫实例3

    利用Python编写简单网络爬虫实例3 by:授客 QQ:1033553122 实验环境 python版本:3.3.5(2.7下报错 实验目的 获取目标网站“http://bbs.51testing. ...

  9. Python 利用Python编写简单网络爬虫实例2

    利用Python编写简单网络爬虫实例2 by:授客 QQ:1033553122 实验环境 python版本:3.3.5(2.7下报错 实验目的 获取目标网站“http://www.51testing. ...

随机推荐

  1. HDU 3342:Legal or Not(拓扑排序)

    Legal or Not Time Limit: 2000/1000 MS (Java/Others)    Memory Limit: 32768/32768 K (Java/Others) Tot ...

  2. HDU 1232:畅通工程(并查集模板)

    畅通工程 Time Limit: 4000/2000 MS (Java/Others)    Memory Limit: 65536/32768 K (Java/Others)Total Submis ...

  3. @ModelAttribute的用法

  4. AJAX异步实现简单的瀑布流

    传统瀑布流布局ul-li,需要先设定显示几列,每列是一个li,需要左浮动并指定宽度,li里面的布局也要先布局好,主要是要定宽,高度自动:然后通过ajax异步,从数据库中得到数据,遍历后将数据插入最矮的 ...

  5. LG2731 骑马修栅栏 Riding the Fences

    题意 John是一个与其他农民一样懒的人.他讨厌骑马,因此从来不两次经过一个栅栏.你必须编一个程序,读入栅栏网络的描述,并计算出一条修栅栏的路径,使每个栅栏都恰好被经过一次.John能从任何一个顶点( ...

  6. smarty中调用php内置函数

    http://blog.csdn.net/clevercode/article/details/50373633

  7. FastAdmin 将会员模块升级为基础模块的升级指导

    说明 FastAdmin 于 2018-01-19 将会员模块升级为基础模块. 因为有数据库改动,所以需要对旧的数据库进行升级,不然没有办法使用和显示. 升级流程 git 合并代码 略 导入数据表 D ...

  8. 数独求解程序 php版

    数独求解程序 php版 <?php class Sudoku { var $matrix; function __construct($arr = null) { if ($arr == nul ...

  9. 注意字符串的strlen与sizeof的差别

    unsigned char AT_RESET[]="r\r\n"; printf("strlen=%d sizeof=%d\n",strlen(AT_RESET ...

  10. 洛谷4059找爸爸(Code+第一次月赛)

    题目:https://www.luogu.org/problemnew/show/P4059 dp. 1.看出-A-B(k-1)可以理解成连续空格的第一个 -A,其余 -B: 2.把会干扰的“上一步右 ...