1、有些网站访问速度慢,而且这个网站的连接数(比如全球内衣,另外对于女生各种什么内衣不懂的也可以上去查看了解哈),因为没有即时的关闭,造成抓取页面数据的时候超时也严重。

解决:把相应的HttpWebResponse.Close(),   HttpWebRequest.Abort();  以及HttpWebRequest.KeepAlive=false,还有吧超时时间设置长一点, 之后连接超时的几率就贬低了。还有直接c盘的host文件的域名直接指向某个IP,减少去dns服务器查找的时间

2、抓中国供应商的时候开了多线程跑的太快,几十条就出现拉动类的验证码。

解决:使用代理或者移动的宽带,去拨号

RASDisplay ras = new RASDisplay();
ras.Disconnect();//断开连接
ras.Connect("ADSL");//重新拨号

//因为拨号不会马上连接成功,需要时间
Thread.Sleep(5000);

下面是封装请求的类库

public static string getRequest(string url, string charset = "utf-8")
{
HttpWebRequest myreq = null;
HttpWebResponse myres = null;
StreamReader reader = null;
Stream stream = null;
string result = "";
string code = charset; //charset.ToLower()
//code = "utf-8";
try
{
myreq = (HttpWebRequest)WebRequest.Create(url);
myreq.Timeout = 20000;
myreq.Method = "GET";
myreq.KeepAlive = false;
myreq.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8";
//myreq.UserAgent = "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)";
myreq.Headers.Add("content", "text/html; charset=" + code);
//myreq.UserAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727)";
myreq.UserAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.106 Safari/537.36";

myreq.KeepAlive = true;
myres = (HttpWebResponse)myreq.GetResponse();
stream = myres.GetResponseStream();
reader = new StreamReader(stream, System.Text.Encoding.GetEncoding(code));
result = reader.ReadToEnd();

reader.Close();
reader.Dispose();

stream.Close();
stream.Dispose();
}
catch
{ }
finally
{
if (myreq != null)
{
myres.Close();
}
if (myreq != null)
{
myreq.Abort();
}

}

return result;
}

#region 自动拨号
/* 自动拨号
* 1、右击“网上邻居”--属性;
2、选择“宽带连接”,右击“属性”- >“选项”;
3、把“提示名称、密码和证书等”前面的对号去掉,点“确定”退出;

4、生成模式务必改成x86
*/
public struct RASCONN
{
public int dwSize;
public IntPtr hrasconn;
[MarshalAs(UnmanagedType.ByValTStr, SizeConst = 257)]
public string szEntryName;
[MarshalAs(UnmanagedType.ByValTStr, SizeConst = 17)]
public string szDeviceType;
[MarshalAs(UnmanagedType.ByValTStr, SizeConst = 129)]
public string szDeviceName;
}

[StructLayout(LayoutKind.Sequential, CharSet = CharSet.Auto)]
public struct RasStats
{
public int dwSize;
public int dwBytesXmited;
public int dwBytesRcved;
public int dwFramesXmited;
public int dwFramesRcved;
public int dwCrcErr;
public int dwTimeoutErr;
public int dwAlignmentErr;
public int dwHardwareOverrunErr;
public int dwFramingErr;
public int dwBufferOverrunErr;
public int dwCompressionRatioIn;
public int dwCompressionRatioOut;
public int dwBps;
public int dwConnectionDuration;
}

[StructLayout(LayoutKind.Sequential, CharSet = CharSet.Auto)]
public struct RasEntryName
{
public int dwSize;
//[MarshalAs(UnmanagedType.ByValTStr,SizeConst=(int)RasFieldSizeConstants.RAS_MaxEntryName + 1)]
public string szEntryName;
//#if WINVER5
// public int dwFlags;
// [MarshalAs(UnmanagedType.ByValTStr,SizeConst=260+1)]
// public string szPhonebookPath;
//#endif
}
public class RAS
{
[DllImport("Rasapi32.dll", EntryPoint = "RasEnumConnectionsA",
SetLastError = true)]

internal static extern int RasEnumConnections
(
ref RASCONN lprasconn, // buffer to receive connections data
ref int lpcb, // size in bytes of buffer
ref int lpcConnections // number of connections written to buffer
);

[DllImport("rasapi32.dll", CharSet = CharSet.Auto)]
internal static extern uint RasGetConnectionStatistics(
IntPtr hRasConn, // handle to the connection
[In, Out]RasStats lpStatistics // buffer to receive statistics
);
[DllImport("rasapi32.dll", CharSet = CharSet.Auto)]
public extern static uint RasHangUp(
IntPtr hrasconn // handle to the RAS connection to hang up
);

[DllImport("rasapi32.dll", CharSet = CharSet.Auto)]
public extern static uint RasEnumEntries(
string reserved, // reserved, must be NULL
string lpszPhonebook, // pointer to full path and
// file name of phone-book file
[In, Out]RasEntryName[] lprasentryname, // buffer to receive
// phone-book entries
ref int lpcb, // size in bytes of buffer
out int lpcEntries // number of entries written
// to buffer
);

[DllImport("wininet.dll", CharSet = CharSet.Auto)]
public extern static int InternetDial(
IntPtr hwnd,
[In]string lpszConnectoid,
uint dwFlags,
ref int lpdwConnection,
uint dwReserved
);

public RAS()
{
}
}
public enum DEL_CACHE_TYPE //要删除的类型。
{
File,//表示internet临时文件
Cookie //表示Cookie
}

public class RASDisplay
{
[DllImport("wininet.dll", CharSet = CharSet.Auto)]
public static extern bool DeleteUrlCacheEntry(
DEL_CACHE_TYPE type
);
private string m_duration;
private string m_ConnectionName;
private string[] m_ConnectionNames;
private double m_TX;
private double m_RX;
private bool m_connected;
private IntPtr m_ConnectedRasHandle;

RasStats status = new RasStats();
public RASDisplay()
{
m_connected = true;

RAS lpras = new RAS();
RASCONN lprasConn = new RASCONN();

lprasConn.dwSize = Marshal.SizeOf(typeof(RASCONN));
lprasConn.hrasconn = IntPtr.Zero;

int lpcb = 0;
int lpcConnections = 0;
int nRet = 0;
lpcb = Marshal.SizeOf(typeof(RASCONN));

nRet = RAS.RasEnumConnections(ref lprasConn, ref lpcb, ref
lpcConnections);

if (nRet != 0)
{
m_connected = false;
return;

}

if (lpcConnections > 0)
{
//for (int i = 0; i < lpcConnections; i++)

//{
RasStats stats = new RasStats();

m_ConnectedRasHandle = lprasConn.hrasconn;
RAS.RasGetConnectionStatistics(lprasConn.hrasconn, stats);

m_ConnectionName = lprasConn.szEntryName;

int Hours = 0;
int Minutes = 0;
int Seconds = 0;

Hours = ((stats.dwConnectionDuration / 1000) / 3600);
Minutes = ((stats.dwConnectionDuration / 1000) / 60) - (Hours * 60);
Seconds = ((stats.dwConnectionDuration / 1000)) - (Minutes * 60) - (Hours * 3600);

m_duration = Hours + " hours " + Minutes + " minutes " + Seconds + " secs";
m_TX = stats.dwBytesXmited;
m_RX = stats.dwBytesRcved;
//}
}
else
{
m_connected = false;
}

int lpNames = 1;
int entryNameSize = 0;
int lpSize = 0;
RasEntryName[] names = null;

entryNameSize = Marshal.SizeOf(typeof(RasEntryName));
lpSize = lpNames * entryNameSize;

names = new RasEntryName[lpNames];
names[0].dwSize = entryNameSize;

uint retval = RAS.RasEnumEntries(null, null, names, ref lpSize, out lpNames);

//if we have more than one connection, we need to do it again
if (lpNames > 1)
{
names = new RasEntryName[lpNames];
for (int i = 0; i < names.Length; i++)
{
names[i].dwSize = entryNameSize;
}

retval = RAS.RasEnumEntries(null, null, names, ref lpSize, out lpNames);

}
m_ConnectionNames = new string[names.Length];

if (lpNames > 0)
{
for (int i = 0; i < names.Length; i++)
{
m_ConnectionNames[i] = names[i].szEntryName;
}
}
}

public string Duration
{
get
{
return m_connected ? m_duration : "";
}
}

public string[] Connections
{
get
{
return m_ConnectionNames;
}
}

public double BytesTransmitted
{
get
{
return m_connected ? m_TX : 0;
}
}
public double BytesReceived
{
get
{
return m_connected ? m_RX : 0;

}
}
public string ConnectionName
{
get
{
return m_connected ? m_ConnectionName : "";
}
}
public bool IsConnected
{
get
{
return m_connected;
}
}

public int Connect(string Connection)
{
int temp = 0;
uint INTERNET_AUTO_DIAL_UNATTENDED = 2;
int retVal = RAS.InternetDial(IntPtr.Zero, Connection, INTERNET_AUTO_DIAL_UNATTENDED, ref temp, 0);
return retVal;
}
public void Disconnect()
{
RAS.RasHangUp(m_ConnectedRasHandle);
}
}

endregion 自动拨号

HttpWebRequest抓数据遇到的问题的更多相关文章

  1. C# HttpWebRequest提交数据方式浅析

    C# HttpWebRequest提交数据方式学习之前我们先来看看什么是HttpWebRequest,它是 .net 基类库中的一个类,在命名空间 System.Net 下面,用来使用户通过HTTP协 ...

  2. 【转】C# HttpWebRequest提交数据方式

    [转]C# HttpWebRequest提交数据方式 HttpWebRequest和HttpWebResponse类是用于发送和接收HTTP数据的最好选择.它们支持一系列有用的属性.这两个类位 于Sy ...

  3. HttpWebRequest抓取网页数据返回异常:远程服务器返回错误: (503) 服务器不可用

      解决方法:   HttpWebRequest request = (HttpWebRequest)WebRequest.Create(webURL);                //声明一个H ...

  4. 在使用 HttpWebRequest Post数据时候返回 400错误

    笔者有一个项目中用到了上传zip并解压的功能.开始觉得很简单,因为之前曾经做过之类的上传文件的功能,所以并不为意,于是使用copy大法.正如你所料,如果一切很正常的能运行的话就不会有这篇笔记了. 整个 ...

  5. HttpWebRequest 抓取页面异常处理办法

    抓取页面异常处理办法 public static string GetHtmlTest(string URI) { string fullhtml = null; while (true) { try ...

  6. HttpWebRequest抓取网页内容与直接输入URL得到的内容不一致!球大神帮忙!!

    一.前言 我在做一个百度收录情况查询的软件,就是通过软件来批量查询文章链接是否被百度收录,主要是用来查询某个网址的收录次数还有网站的排行数,思路是借鉴别人的. 二.问题描述 首先需要考虑的是能够支持哪 ...

  7. 花瓶使用笔记 (抓数据时,记得添加host,不然抓不了包的)

    情况一: 有时候抓不了app的数据,那么把app的host 添加一下就可以了 proxy > SSL Proxying Settings 情况二: 开了 翻 墙 是抓不了包的! (掉了一次坑)

  8. 使用HttpWebRequest post数据时要注意UrlEncode

    今天在用HttpWebResponse类向一个远程页面post数据时,遇到了一个怪问题:通过对比自己post的参数和服务器接收到的值,发现参数中的一个+号被替换成了空格. 造成这个错误的原因在于+号在 ...

  9. C#使用HttpWebRequest发送数据和使用HttpWebResponse接收数据的一个简单示例

    新建一个.NET Core控制台项目,代码如下所示: using System; using System.Text; using System.Net; using System.Collectio ...

随机推荐

  1. Checbox的操作含已选、未选及判断代码

    Checbox的操作包括已选.未选.判断等等,下面有个不错的示例,使用jquery完成,感兴趣的朋友可以参考下 $("#chk1").attr("checked" ...

  2. C#XML创建与节点对象引用

    我们在创建xml过程中会遇到不同的级别有相同节点的情况.如下面的xml: <?xml version="1.0" encoding="GBK"> & ...

  3. PHP全局变量

    1.global 关键字 2.$GLOBALS 3.使用静态变量

  4. web2py相关-------------------------------(一)初遇

    在第一次换工作的时候,希望转行 做编程,就面试了很多家,很多人问我 懂不懂html 书写,我非常诚实的告诉他们不会. 当然那时我很天真的认为,只要自己愿意学习这些都不是问题,事实证明人的学习能力是很快 ...

  5. bzoj 3053 HDU 4347 : The Closest M Points kd树

    bzoj 3053 HDU 4347 : The Closest M Points  kd树 题目大意:求k维空间内某点的前k近的点. 就是一般的kd树,根据实测发现,kd树的两种建树方式,即按照方差 ...

  6. bzoj 1200: [HNOI2005]木梳 DP

    1200: [HNOI2005]木梳 Time Limit: 10 Sec  Memory Limit: 162 MBSubmit: 266  Solved: 125[Submit][Status] ...

  7. uva 11437 - Triangle Fun

    计算几何: 直线交点: #include<cstdio> using namespace std; struct node { double x,y; node(,):x(x),y(y){ ...

  8. 【POJ 1984】Navigation Nightmare(带权并查集)

    Navigation Nightmare Description Farmer John's pastoral neighborhood has N farms (2 <= N <= 40 ...

  9. 李洪强iOS开发之-环信02.1_环信 SDK 2.x到3.0升级文档

    李洪强iOS开发之-环信02.1_环信 SDK 2.x到3.0升级文档 SDK 2.x 至 3.0 升级指南 环信 SDK 3.0 升级文档 3.0 中的核心类为 EMClient 类,通过 EMCl ...

  10. Android开源项目发现---ListView篇(持续更新)

    资料转载地址:https://github.com/Trinea/android-open-project 1. android-pulltorefresh 一个强大的拉动刷新开源项目,支持各种控件下 ...