httpwebrequest 模拟登录 获取cookies 以前的代码,记录备忘!
2个类,一个基类,一个构建头信息调用类
关于如何获取到post中的内容,你之需要用http抓包工具把你与目标网站的请求信息抓下来后,打开分析下按照抓下来的包中的数
据进行构建就行了
- using System;
- using System.Collections.Generic;
- using System.Text;
- using System.Net;
- using System.IO;
- namespace bot
- {
- public class Html
- {
- /// <summary>
- /// httpwebrequest类中的一些属性的集合
- /// </summary>
- public struct RequestPPT
- {
- private string strAccept;
- /// <summary>
- /// 获取或设置request类中的Accept属性
- /// 用以设置接受的文件类型
- /// </summary>
- public string Accept
- {
- get
- {
- return strAccept;
- }
- set
- {
- strAccept = value;
- }
- }
- private string strContentType;
- /// <summary>
- /// 获取或设置request类中的ContentType属性
- /// 用以设置请求的媒体类型
- /// </summary>
- public string ContentType
- {
- get
- {
- return strContentType;
- }
- set
- {
- strContentType = value;
- }
- }
- /// <summary>
- /// 获取或设置request类中的UserAgent属性
- /// 用以设置请求的客户端信息
- /// </summary>
- private string strUserAgent;
- public string UserAgent
- {
- get
- {
- return strUserAgent;
- }
- set
- {
- strUserAgent = value;
- }
- }
- private string strMethod;
- /// <summary>
- /// 获取或设置request类中的Method属性
- /// 可以将 Method 属性设置为任何 HTTP 1.1 协议谓词:GET、HEAD、POST、PUT、DELETE、TRACE 或 OPTIONS。
- /// 如果 ContentLength 属性被设置为 -1 以外的任何值,则必须将 Method 属性设置为上载数据的协议属性。
- /// </summary>
- public string Method
- {
- get
- {
- return strMethod;
- }
- set
- {
- strMethod = value;
- }
- }
- }
- /// <summary>
- /// 构建一个httt请求以获取目标链接的cookies,需要传入目标的登录地址和相关的post信息,返回完成登录的cookies,以及返回的html内容
- /// </summary>
- /// <param name="url">登录页面的地址
- /// <param name="post">post信息
- /// <param name="strHtml">输出的html代码
- /// <param name="rppt">请求的标头所需要的相关属性设置
- /// <returns>请求完成后的cookies</returns>
- public CookieCollection funGetCookie(string url, byte[] post, out string strHtml, RequestPPT rppt,string server)
- {
- CookieCollection ckclReturn = new CookieCollection();
- CookieContainer cc = new CookieContainer();
- HttpWebRequest hwRequest;
- HttpWebResponse hwResponse;
- //请求cookies的格式
- //hwRequest = (HttpWebRequest)HttpWebRequest.Create(new Uri(url));
- //hwResponse = (HttpWebResponse)hwRequest.GetResponse();
- //string cookie = hwResponse.Headers.Get("Set-Cookie");
- //cookie = cookie.Split(';')[0];
- //hwRequest = null;
- //hwResponse = null;
- //构建即将发送的包头
- //cc.SetCookies(new Uri(server), cookie);
- hwRequest = (HttpWebRequest)HttpWebRequest.Create(new Uri(url));
- hwRequest.CookieContainer = cc;
- hwRequest.Accept = rppt.Accept;
- hwRequest.ContentType = rppt.ContentType;
- hwRequest.UserAgent = rppt.UserAgent;
- hwRequest.Method = rppt.Method;
- hwRequest.ContentLength = post.Length;
- //写入标头
- Stream stream;
- stream = hwRequest.GetRequestStream();
- stream.Write(post, 0, post.Length);
- stream.Close();
- //发送请求获取响应内容
- try
- {
- hwResponse = (HttpWebResponse)hwRequest.GetResponse();
- }
- catch
- {
- strHtml = "";
- return ckclReturn;
- }
- stream = hwResponse.GetResponseStream();
- StreamReader sReader = new StreamReader(stream, Encoding.Default);
- strHtml = sReader.ReadToEnd();
- sReader.Close();
- stream.Close();
- //获取缓存内容
- ckclReturn = hwResponse.Cookies;
- return ckclReturn;
- }
- /// <summary>
- /// 根据已经获取的有效cookies来获取目标链接的内容
- /// </summary>
- /// <param name="strUri">目标链接的url
- /// <param name="ccl">已经获取到的有效cookies
- /// <param name="rppt">头属性的相关设置
- /// <returns>目标连接的纯文本:"txt/html"</returns>
- public string funGetHtmlByCookies(string strUri, CookieCollection ccl, RequestPPT rppt)
- {
- CookieContainer cc = new CookieContainer();
- HttpWebRequest hwRequest;
- HttpWebResponse hwResponse;
- //构建即将发送的包头
- hwRequest = (HttpWebRequest)HttpWebRequest.Create(new Uri(strUri));
- cc.Add(ccl);
- hwRequest.CookieContainer = cc;
- hwRequest.Accept = rppt.Accept;
- hwRequest.ContentType = rppt.ContentType;
- hwRequest.UserAgent = rppt.UserAgent;
- hwRequest.Method = rppt.Method;
- hwRequest.ContentLength = 0;
- //发送请求获取响应内容
- try
- {
- hwResponse = (HttpWebResponse)hwRequest.GetResponse();
- }
- catch
- {
- return "";
- }
- Stream stream;
- stream = hwResponse.GetResponseStream();
- StreamReader sReader = new StreamReader(stream, Encoding.Default);
- string strHtml = sReader.ReadToEnd();
- sReader.Close();
- stream.Close();
- //返回值
- return strHtml;
- }
- /// <summary>
- /// 根据已经获取的有效cookies来获取目标链接的内容
- /// </summary>
- /// <param name="strUri">目标链接的url
- ///<param name="post">post的byte信息
- /// <param name="ccl">已经获取到的有效cookies
- /// <param name="rppt">头属性的相关设置
- /// <returns>目标连接的纯文本:"txt/html"</returns>
- public string funGetHtmlByCookies(string strUri,byte[] post, CookieCollection ccl, RequestPPT rppt)
- {
- CookieContainer cc = new CookieContainer();
- HttpWebRequest hwRequest;
- HttpWebResponse hwResponse;
- //构建即将发送的包头
- hwRequest = (HttpWebRequest)HttpWebRequest.Create(new Uri(strUri));
- cc.Add(ccl);
- hwRequest.CookieContainer = cc;
- hwRequest.Accept = rppt.Accept;
- hwRequest.ContentType = rppt.ContentType;
- hwRequest.UserAgent = rppt.UserAgent;
- hwRequest.Method = rppt.Method;
- hwRequest.ContentLength = post.Length;
- //写入post信息
- Stream stream;
- stream = hwRequest.GetRequestStream();
- stream.Write(post, 0, post.Length);
- stream.Close();
- //发送请求获取响应内容
- try
- {
- hwResponse = (HttpWebResponse)hwRequest.GetResponse();
- }
- catch
- {
- return"" ;
- }
- stream = hwResponse.GetResponseStream();
- StreamReader sReader = new StreamReader(stream, Encoding.Default);
- string strHtml = sReader.ReadToEnd();
- sReader.Close();
- stream.Close();
- //返回值
- return strHtml;
- }
- }
- }
- 第二个
- <pre class="">using System;
- using System.IO;
- using System.Collections.Generic;
- using System.Text;
- using System.Net;
- using System.Data;
- using System.Xml;
- using System.Text.RegularExpressions;
- namespace bot
- {
- public class SisHtml :Html
- {
- public SisHtml()
- {
- }
- /// <summary>
- /// 设置主机ip地址
- /// </summary>
- public string Host
- {
- get {
- return strHost;
- }
- set {
- strHost = value;
- }
- }
- private string strHost;
- /// <summary>
- /// 获取目标登录链接的cookies
- /// </summary>
- /// <param name="url">目标的登录链接</param>
- /// <param name="dir">构造头的泛型键值对</param>
- /// <param name="strHtml">登录后返回的页面内容</param>
- /// <returns>登录后的cookies</returns>
- public CookieCollection funGetCookie(string url, Dictionary<string, string> dir, out string strHtml)
- {
- CookieCollection cc = new CookieCollection();
- RequestPPT rppt = new RequestPPT();
- //构建post内容
- string strPost = funMakePost(dir);
- byte[] post = Encoding.Default.GetBytes(strPost);
- //设置标头属性
- rppt.Accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, */*";
- rppt.ContentType = "application/x-www-form-urlencoded";
- rppt.Method = "Post";
- rppt.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; SV1; .NET CLR 1.1.4322; InfoPath.1; .NET CLR 2.0.50727; .NET CLR 3.0.04506.648; .NET CLR 3.5.21022)";
- string server ="http://"+ new Uri(url).Host;
- return cc = base.funGetCookie(url, post, out strHtml, rppt, server);
- }
- /// <summary>
- /// 根据已经获取到cookies来获取目标链接的内容
- /// </summary>
- /// <param name="strUri">目标的url</param>
- /// <param name="ccl">已经获取好的cookies</param>
- /// <returns>目标url的纯文本:"txt/html"</returns>
- public string funGetHtmlByCookies(string strUri,CookieCollection ccl )
- {
- RequestPPT rppt = new RequestPPT();
- //设置头属性
- rppt.Accept = "txt/html";
- rppt.ContentType = "application/x-www-form-urlencoded";
- rppt.Method = "Post";
- rppt.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; SV1; .NET CLR 1.1.4322; InfoPath.1; .NET CLR 2.0.50727; .NET CLR 3.0.04506.648; .NET CLR 3.5.21022)";
- return base.funGetHtmlByCookies(strUri, ccl, rppt);
- }
- /// <summary>
- /// 投票帖子用的方法
- /// </summary>
- /// <param name="strHtml">投票帖子的htmlcode</param>
- /// <param name="ccl">有效的cookies</param>
- /// <returns>投票完成以后的htmlcode</returns>
- public string funVote(string strHtml,CookieCollection ccl)
- {
- //判断是不是选取投票
- try
- {
- strHtml = strHtml.Substring(strHtml.IndexOf("<form"), strHtml.LastIndexOf("</form>") - strHtml.IndexOf("<form") + 7);
- }
- catch
- {
- return "";
- }
- string strCheck = @"name=""pollanswers[]""";
- //如果代码中包含关键信息说明没有被投票过
- if(strHtml.IndexOf(strCheck)>0)
- {
- //获取post头的需求信息
- string strFormHash = "77b49df4";
- string strPollanswers;
- strPollanswers = strHtml.Substring(strHtml.IndexOf(strCheck)+strCheck.Length, 20).Split('"')[1];
- string strPollansubmit = "提交";
- Dictionary<string,string>dir = new Dictionary<string,string>();
- dir.Add("formhash",strFormHash);
- dir.Add("pollanswers[]",strPollanswers);
- dir.Add("pollsubmit",strPollansubmit);
- string strPost = funMakePost(dir);
- byte[] post = Encoding.Default.GetBytes(strPost);
- //获取请求的路径
- string strUrl= "http://"+Host+"/bbs/";
- string strActionUrl =@"method=""post""";
- strUrl+= strHtml.Substring(strHtml.IndexOf(strActionUrl)+strActionUrl.Length,100).Split('"')[1].Replace("amp;","");
- //构建头
- RequestPPT rppt = new RequestPPT();
- rppt.Accept = "txt/html";
- rppt.ContentType = "application/x-www-form-urlencoded";
- rppt.Method = "Post";
- rppt.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; SV1; .NET CLR 1.1.4322; InfoPath.1; .NET CLR 2.0.50727; .NET CLR 3.0.04506.648; .NET CLR 3.5.21022)";
- strHtml = base.funGetHtmlByCookies(strUrl, post, ccl, rppt);
- }
- return strHtml;
- }
- /// <summary>
- /// 根据泛型来构建字符串用于post
- /// </summary>
- /// <param name="dir">带有键值对的泛型</param>
- /// <returns>构建完毕的字符串</returns>
- private string funMakePost(Dictionary<string,string> dir)
- {
- string strPost="";
- foreach (KeyValuePair<string, string> kvp in dir)
- {
- strPost += kvp.Key + "=";
- if (kvp.Value == "")
- {
- strPost += "''";
- }
- else
- {
- strPost += kvp.Value;
- }
- strPost += "&";
- }
- strPost = strPost.Substring(0, strPost.Length - 1);
- return strPost;
- }
- /// <summary>
- /// 获取下一个列表页面的路径
- /// </summary>
- /// <param name="strHtml">当前页面的htmlcode</param>
- /// <returns>下一个列表页面的路径</returns>
- public string funGetNextUrl(string strHtml)
- {
- string strUrl = "";
- //判断是否是列表型页面
- if (strHtml.IndexOf("<form") != -1)
- {
- return strUrl;
- }
- string strKey =@"class=""next""";
- strUrl = "http://"+Host+"/bbs/"+strHtml.Substring(strHtml.IndexOf(strKey) - 100, 100).Split('"')[1].Replace("amp;", "");
- return strUrl;
- }
- public DataTable funGetListTable(string strHtml)
- {
- DataTable dt = new DataTable();
- DataColumn dc = new DataColumn("Url");
- dt.Columns.Add(dc);
- DataRow dr ;
- string strReg = @"viewthread.php(/S)+highlight=";
- Regex rg = new Regex(strReg);
- MatchCollection mc = rg.Matches(strHtml);
- foreach (Match ms in mc)
- {
- dr = dt.NewRow();
- dr[0] = "http://" + Host + "/bbs/" + ms.ToString().Replace("amp;", "");
- dt.Rows.Add(dr);
- }
- return dt;
- }
- }
- }
- </pre>
httpwebrequest 模拟登录 获取cookies 以前的代码,记录备忘!的更多相关文章
- 记一次HTTPClient模拟登录获取Cookie的开发历程
记一次HTTPClient模拟登录获取Cookie的开发历程 环境: springboot : 2.7 jdk: 1.8 httpClient : 4.5.13 设计方案 通过新建一个 ...
- HttpWebRequest 模拟登录响应点击事件(分享自己用的HttpHelper类)
平时也经常采集网站数据,也做模拟登录,但一般都是html控件POST到页面登录:还没有遇到用户服务器控件button按钮点击事件登录的,今天像往常一样POST传递参数,但怎么都能登录不了:最后发现还有 ...
- POST信息模拟登录获取页面内容
最近项目里有一个是要模拟登录后,访问固定页面获取内容的要求,一开始用JQ AJAX好像不支持跨域请求.后使用.net中HttpWebRequest对象来获取.一开始访问总是无法在第二个页面正常访问,好 ...
- ph模拟登录获取信息
cURL 是一个功能强大的PHP库,使用PHP的cURL库可以简单和有效地抓取网页并采集内容,设置cookie完成模拟登录网页,curl提供了丰富的函数,开发者可以从PHP手册中获取更多关于cURL信 ...
- 微博验证码的识别并登录获取cookies
记得以前微博是用的宫格验证码,现在不一样了,用的是滑块验证码和 点触验证码,每天登陆的第一次基本用的是滑块,继续登录就都用的是点触验证码.所以滑块验证码不写,感兴趣的可以补上. 代码: 这里用的超级鹰 ...
- Python入门小练习 003 利用cookielib模拟登录获取账户信息
为了方便, 使用chinaunix的账户获取账户主题. 有些网站可能需要验证码, 找一些不用验证码的网站 下面 ****** 很多个星号的均为私密信息, 所以用星号代替 #!/usr/bin/pyt ...
- C#常用代码片段备忘
以下是从visual studio中整理出来的常用代码片段,以作备忘 快捷键: eh 用途: 类中事件实现函数模板 private void MyMethod(object sender, Event ...
- Python爬虫 —— 知乎之selenium模拟登陆获取cookies+requests.Session()访问+session序列化
代码如下: # coding:utf-8 from selenium import webdriver import requests import sys import time from lxml ...
- 终极利器!利用appium和mitmproxy登录获取cookies
环境搭建 参考我之前写的https://www.cnblogs.com/c-x-a/p/9163221.html appium 代码start_appium.py # -*- coding: utf- ...
随机推荐
- applicationContext.xml 配置(扫描)
<?xml version="1.0" encoding="UTF-8"?> <beans xmlns="http://www.sp ...
- HDU 3452 Bonsai
可以转化成最小割的求解,题目其实就是要求把点分成两个集合,增加一个超级汇点,一部分的点在根节点所在集合内,一部分节点在超级汇点所在的集合内,这两就分开了,又要求费用最小,那么就是最小割. #inclu ...
- 常用的linux系统监控命令整理
找到最耗CPU的java线程ps命令 命令:ps -mp pid -o THREAD,tid,time 或者 ps -Lfp pid 结果展示: 这个命令的作用,主要是可以获取到对应一个进程下的线程的 ...
- MFC中为菜单或按钮添加快捷键功能
1.新建一快捷键资源,ACCELERATOR,关联相应的ID号,下图所示中,其中,第一个ID为自定义快捷键ID,按CTRL+R,此时响应该ID以应的消息响应函数, 第二个ID为菜单ID,此时按CTRL ...
- 嵌套表SHAPE
SQL语法 SHAPE {<master query>} APPEND ({ <child table query> } RELATE <master column> ...
- IDEA类文件不编译问题
用IDEA的人遇到过类文件上有个小叉吗? 1.在 .gitignore 里面把这个文件去掉 2.setting->builder->compiler->子目录 去掉不编译的文件
- JavaScript在智能手机上的应用-使用手机GPS定位用户所在城市
---------------------------- <script type="text/javascript" language="javascript&q ...
- Gridview导出成Excel
在aspx里面加上 <%@ Page Language="C#" AutoEventWireup="true" CodeFile="AAAAAA ...
- Entity Framework 学习中级篇2—存储过程(上)
目前,EF对存储过程的支持并不完善.存在以下问题: l EF不支持存储过程返回多表联合查询的结果集. l EF仅支持返回返回某个表的全部字段,以便转换成对应的实体.无法 ...
- vs找不到svn源代码管理插件之我见
使用svn要安装两个文件,一个客户端:TortoiseSVN-1.8.msi,一个插件:AnkhSvn-2.5.msi:两个都安装好之后,在vs的tool(工具)选项卡中,选择自定义,然后选择sour ...