Python爬虫视频教程零基础小白到scrapy爬虫高手-轻松入门

https://item.taobao.com/item.htm?spm=a1z38n.10677092.0.0.482434a6EmUbbW&id=564564604865

#coding=utf-8
import requests
from bs4 import BeautifulSoup
import urllib.request as ur
import urllib.parse as par
import re #cookie 自动获取
#cookie_start = "entrezSort=pubmed:; "
#cookie_end = "; _ga=GA1.2.82560226.1459065437; _gat=1; ncbi_prevPHID=3971636F6DCF48491459222804198SID; prevselfurl=http%3A//www.ncbi.nlm.nih.gov/; unloadnext=jsevent%3Dunloadnext%26ncbi_pingaction%3Dunload%26eventid%3D1%26jsperf_dns%3D3923%26jsperf_connect%3D252%26jsperf_ttfb%3D310%26jsperf_basePage%3D9%26jsperf_frontEnd%3D4645%26jsperf_navType%3D0%26jsperf_redirectCount%3D0%26maxScroll_x%3D0%26maxScroll_y%3D0%26currScroll_x%3D0%26currScroll_y%3D0%26hasScrolled%3Dfalse%26ncbi_phid%3D3971636F6DCF48491459222804198SID%26sgSource%3Dnative"
pubmed_url = r'http://www.ncbi.nlm.nih.gov/pubmed' pubmed_r = requests.get(pubmed_url)
print(pubmed_r.cookies)
pubmed_cookie_dict = requests.utils.dict_from_cookiejar(pubmed_r.cookies)
print(pubmed_cookie_dict)
cookie = ";".join(["%s=%s" % (k, v) for k, v in pubmed_cookie_dict.items()])
print(cookie) #heads
heads={
"Host":"www.ncbi.nlm.nih.gov",
"Origin":"http://www.ncbi.nlm.nih.gov",
"Referer":"http://www.ncbi.nlm.nih.gov/pubmed",
"User-Agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.85 Safari/537.36",
"Cookie": cookie
} kw_url="http://www.ncbi.nlm.nih.gov/pubmed/?term=bone+fracture+&EntrezSystem2.PEntrez.PubMed.Pubmed_PageController.PreviousPageName=results&EntrezSystem2.PEntrez.PubMed.Pubmed_PageController.SpecialPageName=&EntrezSystem2.PEntrez.PubMed.Pubmed_Facets.FacetsUrlFrag=filters%3D&EntrezSystem2.PEntrez.PubMed.Pubmed_Facets.FacetSubmitted=false&EntrezSystem2.PEntrez.PubMed.Pubmed_Facets.BMFacets=&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.Pubmed_DisplayBar.sPresentation=docsum&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.Pubmed_DisplayBar.sPageSize=200&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.Pubmed_DisplayBar.sSort=none&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.Pubmed_DisplayBar.FFormat=docsum&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.Pubmed_DisplayBar.FSort=&email_format=docsum&email_sort=&email_count=20&email_start=1&email_address=&email_subj=bone+fracture+-+PubMed&email_add_text=&EmailCheck1=&EmailCheck2=&coll_start=1&citman_count=20&citman_start=1&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.Pubmed_DisplayBar.FileFormat=docsum&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.Pubmed_DisplayBar.LastPresentation=docsum&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.Pubmed_DisplayBar.Presentation=docsum&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.Pubmed_DisplayBar.PageSize=200&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.Pubmed_DisplayBar.LastPageSize=200&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.Pubmed_DisplayBar.Sort=&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.Pubmed_DisplayBar.LastSort=&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.Pubmed_DisplayBar.FileSort=&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.Pubmed_DisplayBar.Format=&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.Pubmed_DisplayBar.LastFormat=&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.Pubmed_DisplayBar.PrevPageSize=200&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.Pubmed_DisplayBar.PrevPresentation=docsum&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.Pubmed_DisplayBar.PrevSort=&CollectionStartIndex=1&CitationManagerStartIndex=1&CitationManagerCustomRange=false&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.Pubmed_ResultsController.ResultCount=188348&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.Pubmed_ResultsController.RunLastQuery=&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.Entrez_Pager.cPage=1&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.Entrez_Pager.cPage=1&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.Pubmed_DisplayBar.sPresentation2=docsum&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.Pubmed_DisplayBar.sPageSize2=20&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.Pubmed_DisplayBar.sSort2=none&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.Pubmed_DisplayBar.FFormat2=docsum&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.Pubmed_DisplayBar.FSort2=&email_format2=docsum&email_sort2=&email_count2=20&email_start2=1&email_address2=&email_subj2=bone+fracture+-+PubMed&email_add_text2=&EmailCheck1=&EmailCheck2=&coll_start2=1&citman_count2=20&citman_start2=1&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.EmailTab.EmailReport=&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.EmailTab.EmailFormat=&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.EmailTab.EmailCount=&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.EmailTab.EmailStart=&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.EmailTab.EmailSort=&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.EmailTab.Email=&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.EmailTab.EmailSubject=&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.EmailTab.EmailText=&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.EmailTab.EmailQueryKey=&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.EmailTab.EmailHID=1JUaNZm7_muMenLr1UCg7qnKsykz3SEoWOFpZKTJpg1OPsm8DTZY4SFI2IPzWw7ap95YjxnHe5u1jGZfR_IvM4TVVA2azjhpkrl&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.EmailTab.QueryDescription=&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.EmailTab.Key=&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.EmailTab.Answer=&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.EmailTab.Holding=&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.EmailTab.HoldingFft=&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.EmailTab.HoldingNdiSet=&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.EmailTab.OToolValue=&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.EmailTab.SubjectList=&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.TimelineAdPlaceHolder.CurrTimelineYear=&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.TimelineAdPlaceHolder.BlobID=NCID_1_330998348_130.14.18.34_9001_1459126857_1489724809_0MetA0_S_MegaStore_F_1&EntrezSystem2.PEntrez.DbConnector.Db=pubmed&EntrezSystem2.PEntrez.DbConnector.LastDb=pubmed&EntrezSystem2.PEntrez.DbConnector.Term=bone+fracture&EntrezSystem2.PEntrez.DbConnector.LastTabCmd=&EntrezSystem2.PEntrez.DbConnector.LastQueryKey=1&EntrezSystem2.PEntrez.DbConnector.IdsFromResult=&EntrezSystem2.PEntrez.DbConnector.LastIdsFromResult=&EntrezSystem2.PEntrez.DbConnector.LinkName=&EntrezSystem2.PEntrez.DbConnector.LinkReadableName=&EntrezSystem2.PEntrez.DbConnector.LinkSrcDb=&EntrezSystem2.PEntrez.DbConnector.Cmd=PageChanged&EntrezSystem2.PEntrez.DbConnector.TabCmd=&EntrezSystem2.PEntrez.DbConnector.QueryKey=&p%24a=EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.Entrez_Pager.Page&p%24l=EntrezSystem2&p%24st=pubmed&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.Entrez_Pager.CurrPage=1"
print(kw_url) r = requests.post(kw_url, headers=heads)
soup = BeautifulSoup(r.content.decode('gbk', 'ignore'))
#print(soup)
pagitation = soup.find('input', id="pageno")['last']
print('共有页面数:' + pagitation)

  

pubmed_cookie 自动获取的更多相关文章

  1. 转载-centos网络配置(手动设置,自动获取)的2种方法

    转载地址:http://blog.51yip.com/linux/1120.html 重新启动网络配置 # service network restart 或 # /etc/init.d/networ ...

  2. 分享一个快速设置背景的js 自动获取背景图的长宽

    我来分享一个快速设置背景的js (需要jq支持!) 快速切图铺页面用---就是不需要手动输入背景图的长宽 自动获取背景图的长宽 : <div class="wrap"> ...

  3. paip.enhes efis 自动获取文件的中文编码

    paip.enhes efis 自动获取文件的中文编码 ##为什么需要自动获取文件的中文编码 提高开发效率,自动获取文件的中文编码  .不需要手动设置编码...轻松的.. ##cpdetector 可 ...

  4. 关于启明星系统移除apppath配置,让系统自动获取路径来设置cookie的解决方法

    启明星系统底层使用统一接口,特别是用户,用户登录后,都会建立一个 userinfo 的cookie.请看下面2个网址: http://120.24.86.232/book http://120.24. ...

  5. ThinkPHP自动获取关键词(调用第三方插件)

    ThinkPHP自动获取关键词调用在线discuz词库 先按照下图路径放好插件 方法如下 /** * 自动获取关键词(调用第三方插件) * @return [type] [description] * ...

  6. 基于jquery的表格动态创建,自动绑定,自动获取值

    最近刚加入GUT项目,学习了很多其他同事写的代码,感觉受益匪浅. 在GUT项目中,经常会碰到这样一个问题:动态生成表格,包括从数据库中读取数据,并绑定在表格中,以及从在页面上通过jQuery新增删除表 ...

  7. I.MX6 Linux 自动获取AR1020 event input节点

    /*********************************************************************** * I.MX6 Linux 自动获取AR1020 ev ...

  8. ARM-Linux配置DHCP自动获取IP地址

    备注:内核版本:2.6.30.9busybox版本:1.15.2 PC Linux和开发板Linux的工作用户:root 1. 配置内核:[*] Networking support --->N ...

  9. Oracle VM Virtual 下CentOS不能自动获取IP地址

    在CentOS配置网卡开机自动获取IP地址: vi /etc/sysconfig/network-scripts/ifcfg-eth0 将 ONBOOT="no" 改为 ONBOO ...

随机推荐

  1. Daily Scrum- 12/31

    Meeting Minutes 更新了统计单词背诵精度的统计数字计算方法: 确定了词反转的效果的动画: Burndown     Progress   part 组员 今日工作 Time (h) 明日 ...

  2. Genymotion安装总结

    周末的时候为了测试论文中的Almond虚拟助手软件,所以要去Google Play上去下载. 但是我的两个安卓模拟器:夜神和海马玩模拟器的安卓版本太低了,导致无法使用 谷歌服务,所以连商店都进不去. ...

  3. docker网络调试过程

    #1: 添加永久网桥 vi /etc/sysconfig/network-scripts/ifcfg-br0 DEVICE=br0 TYPE=Bridge BOOTROTO=static IPADDR ...

  4. [cnbeta]华为值多少钱,全世界非上市公司中估值最高的巨头

    华为值多少钱,全世界非上市公司中估值最高的巨头 https://www.cnbeta.com/articles/tech/808203.htm   小米.美团都曾表达过不想.不急于上市,但没人信,所以 ...

  5. [日常工作] Inspur 服务器安装ESXi的简单过程

    1. 公司里面使用虚拟化来进行功能测试 性能测试, 现阶段和之前主要是用虚拟机来搞. 前期用过hyperV 但是感觉 没有SystemCenter的VMM的授权比较难搞一些. 所以还是用ESXi的多了 ...

  6. K3CLOUD安装教程

    1.安装SQLSERVER2008 2.安装K3CLOUD安装包,此处各种安装iis,tomcat,ftp等环境,有过it经验的应该都能自己搞定,不详细赘述 3.进入管理中心,进行设置,默认为127. ...

  7. spring学习总结(一)_Ioc基础(下)

    本篇文章继续上篇文章讲解Ioc基础,这篇文章主要介绍零配置实现ioc,现在相信大家项目中也基本都是没有了xml配置文件.废话不多说.一起学习 代码示例 BookDao.java package com ...

  8. Guava的RateLimiter在单机限流中的正确用法

    错误使用 在实现限流时,网上的各种文章基本都会提到Guava的RateLimiter,用于实现单机的限流,并给出类似的代码: public void method() { RateLimiter ra ...

  9. LODOP弹出对话框获取保存文件的路径

    通常一般不会让用户自己在文本框里填上路径,因为路径要输入字母字符等比较麻烦,而且用户硬盘里文件很多,也不知道要保存在哪里,LODOP可以弹出一个选择保存路径的弹窗,然后把返回选择的路径值.这样用户就可 ...

  10. Bootstrap媒体对象

    前面的话 在Web页面或者说移动页面制作中,常常看到图文混排效果,图片居左(或居右),内容居右(或居左)排列.常常把这样的效果称为媒体对象.可以说它是一种抽象的样式,可以用来构建不同类型的组件.本文将 ...