1、有些网站访问速度慢,而且这个网站的连接数(比如全球内衣,另外对于女生各种什么内衣不懂的也可以上去查看了解哈),因为没有即时的关闭,造成抓取页面数据的时候超时也严重。

解决:把相应的HttpWebResponse.Close(),   HttpWebRequest.Abort();  以及HttpWebRequest.KeepAlive=false,还有吧超时时间设置长一点, 之后连接超时的几率就贬低了。还有直接c盘的host文件的域名直接指向某个IP,减少去dns服务器查找的时间

2、抓中国供应商的时候开了多线程跑的太快,几十条就出现拉动类的验证码。

解决:使用代理或者移动的宽带,去拨号

RASDisplay ras = new RASDisplay();
ras.Disconnect();//断开连接
ras.Connect("ADSL");//重新拨号

//因为拨号不会马上连接成功,需要时间
Thread.Sleep(5000);

下面是封装请求的类库

public static string getRequest(string url, string charset = "utf-8")
{
HttpWebRequest myreq = null;
HttpWebResponse myres = null;
StreamReader reader = null;
Stream stream = null;
string result = "";
string code = charset; //charset.ToLower()
//code = "utf-8";
try
{
myreq = (HttpWebRequest)WebRequest.Create(url);
myreq.Timeout = 20000;
myreq.Method = "GET";
myreq.KeepAlive = false;
myreq.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8";
//myreq.UserAgent = "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)";
myreq.Headers.Add("content", "text/html; charset=" + code);
//myreq.UserAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727)";
myreq.UserAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.106 Safari/537.36";

myreq.KeepAlive = true;
myres = (HttpWebResponse)myreq.GetResponse();
stream = myres.GetResponseStream();
reader = new StreamReader(stream, System.Text.Encoding.GetEncoding(code));
result = reader.ReadToEnd();

reader.Close();
reader.Dispose();

stream.Close();
stream.Dispose();
}
catch
{ }
finally
{
if (myreq != null)
{
myres.Close();
}
if (myreq != null)
{
myreq.Abort();
}

}

return result;
}

#region 自动拨号
/* 自动拨号
* 1、右击“网上邻居”--属性;
2、选择“宽带连接”,右击“属性”- >“选项”;
3、把“提示名称、密码和证书等”前面的对号去掉,点“确定”退出;

4、生成模式务必改成x86
*/
public struct RASCONN
{
public int dwSize;
public IntPtr hrasconn;
[MarshalAs(UnmanagedType.ByValTStr, SizeConst = 257)]
public string szEntryName;
[MarshalAs(UnmanagedType.ByValTStr, SizeConst = 17)]
public string szDeviceType;
[MarshalAs(UnmanagedType.ByValTStr, SizeConst = 129)]
public string szDeviceName;
}

[StructLayout(LayoutKind.Sequential, CharSet = CharSet.Auto)]
public struct RasStats
{
public int dwSize;
public int dwBytesXmited;
public int dwBytesRcved;
public int dwFramesXmited;
public int dwFramesRcved;
public int dwCrcErr;
public int dwTimeoutErr;
public int dwAlignmentErr;
public int dwHardwareOverrunErr;
public int dwFramingErr;
public int dwBufferOverrunErr;
public int dwCompressionRatioIn;
public int dwCompressionRatioOut;
public int dwBps;
public int dwConnectionDuration;
}

[StructLayout(LayoutKind.Sequential, CharSet = CharSet.Auto)]
public struct RasEntryName
{
public int dwSize;
//[MarshalAs(UnmanagedType.ByValTStr,SizeConst=(int)RasFieldSizeConstants.RAS_MaxEntryName + 1)]
public string szEntryName;
//#if WINVER5
// public int dwFlags;
// [MarshalAs(UnmanagedType.ByValTStr,SizeConst=260+1)]
// public string szPhonebookPath;
//#endif
}
public class RAS
{
[DllImport("Rasapi32.dll", EntryPoint = "RasEnumConnectionsA",
SetLastError = true)]

internal static extern int RasEnumConnections
(
ref RASCONN lprasconn, // buffer to receive connections data
ref int lpcb, // size in bytes of buffer
ref int lpcConnections // number of connections written to buffer
);

[DllImport("rasapi32.dll", CharSet = CharSet.Auto)]
internal static extern uint RasGetConnectionStatistics(
IntPtr hRasConn, // handle to the connection
[In, Out]RasStats lpStatistics // buffer to receive statistics
);
[DllImport("rasapi32.dll", CharSet = CharSet.Auto)]
public extern static uint RasHangUp(
IntPtr hrasconn // handle to the RAS connection to hang up
);

[DllImport("rasapi32.dll", CharSet = CharSet.Auto)]
public extern static uint RasEnumEntries(
string reserved, // reserved, must be NULL
string lpszPhonebook, // pointer to full path and
// file name of phone-book file
[In, Out]RasEntryName[] lprasentryname, // buffer to receive
// phone-book entries
ref int lpcb, // size in bytes of buffer
out int lpcEntries // number of entries written
// to buffer
);

[DllImport("wininet.dll", CharSet = CharSet.Auto)]
public extern static int InternetDial(
IntPtr hwnd,
[In]string lpszConnectoid,
uint dwFlags,
ref int lpdwConnection,
uint dwReserved
);

public RAS()
{
}
}
public enum DEL_CACHE_TYPE //要删除的类型。
{
File,//表示internet临时文件
Cookie //表示Cookie
}

public class RASDisplay
{
[DllImport("wininet.dll", CharSet = CharSet.Auto)]
public static extern bool DeleteUrlCacheEntry(
DEL_CACHE_TYPE type
);
private string m_duration;
private string m_ConnectionName;
private string[] m_ConnectionNames;
private double m_TX;
private double m_RX;
private bool m_connected;
private IntPtr m_ConnectedRasHandle;

RasStats status = new RasStats();
public RASDisplay()
{
m_connected = true;

RAS lpras = new RAS();
RASCONN lprasConn = new RASCONN();

lprasConn.dwSize = Marshal.SizeOf(typeof(RASCONN));
lprasConn.hrasconn = IntPtr.Zero;

int lpcb = 0;
int lpcConnections = 0;
int nRet = 0;
lpcb = Marshal.SizeOf(typeof(RASCONN));

nRet = RAS.RasEnumConnections(ref lprasConn, ref lpcb, ref
lpcConnections);

if (nRet != 0)
{
m_connected = false;
return;

}

if (lpcConnections > 0)
{
//for (int i = 0; i < lpcConnections; i++)

//{
RasStats stats = new RasStats();

m_ConnectedRasHandle = lprasConn.hrasconn;
RAS.RasGetConnectionStatistics(lprasConn.hrasconn, stats);

m_ConnectionName = lprasConn.szEntryName;

int Hours = 0;
int Minutes = 0;
int Seconds = 0;

Hours = ((stats.dwConnectionDuration / 1000) / 3600);
Minutes = ((stats.dwConnectionDuration / 1000) / 60) - (Hours * 60);
Seconds = ((stats.dwConnectionDuration / 1000)) - (Minutes * 60) - (Hours * 3600);

m_duration = Hours + " hours " + Minutes + " minutes " + Seconds + " secs";
m_TX = stats.dwBytesXmited;
m_RX = stats.dwBytesRcved;
//}
}
else
{
m_connected = false;
}

int lpNames = 1;
int entryNameSize = 0;
int lpSize = 0;
RasEntryName[] names = null;

entryNameSize = Marshal.SizeOf(typeof(RasEntryName));
lpSize = lpNames * entryNameSize;

names = new RasEntryName[lpNames];
names[0].dwSize = entryNameSize;

uint retval = RAS.RasEnumEntries(null, null, names, ref lpSize, out lpNames);

//if we have more than one connection, we need to do it again
if (lpNames > 1)
{
names = new RasEntryName[lpNames];
for (int i = 0; i < names.Length; i++)
{
names[i].dwSize = entryNameSize;
}

retval = RAS.RasEnumEntries(null, null, names, ref lpSize, out lpNames);

}
m_ConnectionNames = new string[names.Length];

if (lpNames > 0)
{
for (int i = 0; i < names.Length; i++)
{
m_ConnectionNames[i] = names[i].szEntryName;
}
}
}

public string Duration
{
get
{
return m_connected ? m_duration : "";
}
}

public string[] Connections
{
get
{
return m_ConnectionNames;
}
}

public double BytesTransmitted
{
get
{
return m_connected ? m_TX : 0;
}
}
public double BytesReceived
{
get
{
return m_connected ? m_RX : 0;

}
}
public string ConnectionName
{
get
{
return m_connected ? m_ConnectionName : "";
}
}
public bool IsConnected
{
get
{
return m_connected;
}
}

public int Connect(string Connection)
{
int temp = 0;
uint INTERNET_AUTO_DIAL_UNATTENDED = 2;
int retVal = RAS.InternetDial(IntPtr.Zero, Connection, INTERNET_AUTO_DIAL_UNATTENDED, ref temp, 0);
return retVal;
}
public void Disconnect()
{
RAS.RasHangUp(m_ConnectedRasHandle);
}
}

endregion 自动拨号

HttpWebRequest抓数据遇到的问题的更多相关文章

  1. C# HttpWebRequest提交数据方式浅析

    C# HttpWebRequest提交数据方式学习之前我们先来看看什么是HttpWebRequest,它是 .net 基类库中的一个类,在命名空间 System.Net 下面,用来使用户通过HTTP协 ...

  2. 【转】C# HttpWebRequest提交数据方式

    [转]C# HttpWebRequest提交数据方式 HttpWebRequest和HttpWebResponse类是用于发送和接收HTTP数据的最好选择.它们支持一系列有用的属性.这两个类位 于Sy ...

  3. HttpWebRequest抓取网页数据返回异常:远程服务器返回错误: (503) 服务器不可用

      解决方法:   HttpWebRequest request = (HttpWebRequest)WebRequest.Create(webURL);                //声明一个H ...

  4. 在使用 HttpWebRequest Post数据时候返回 400错误

    笔者有一个项目中用到了上传zip并解压的功能.开始觉得很简单,因为之前曾经做过之类的上传文件的功能,所以并不为意,于是使用copy大法.正如你所料,如果一切很正常的能运行的话就不会有这篇笔记了. 整个 ...

  5. HttpWebRequest 抓取页面异常处理办法

    抓取页面异常处理办法 public static string GetHtmlTest(string URI) { string fullhtml = null; while (true) { try ...

  6. HttpWebRequest抓取网页内容与直接输入URL得到的内容不一致!球大神帮忙!!

    一.前言 我在做一个百度收录情况查询的软件,就是通过软件来批量查询文章链接是否被百度收录,主要是用来查询某个网址的收录次数还有网站的排行数,思路是借鉴别人的. 二.问题描述 首先需要考虑的是能够支持哪 ...

  7. 花瓶使用笔记 (抓数据时,记得添加host,不然抓不了包的)

    情况一: 有时候抓不了app的数据,那么把app的host 添加一下就可以了 proxy > SSL Proxying Settings 情况二: 开了 翻 墙 是抓不了包的! (掉了一次坑)

  8. 使用HttpWebRequest post数据时要注意UrlEncode

    今天在用HttpWebResponse类向一个远程页面post数据时,遇到了一个怪问题:通过对比自己post的参数和服务器接收到的值,发现参数中的一个+号被替换成了空格. 造成这个错误的原因在于+号在 ...

  9. C#使用HttpWebRequest发送数据和使用HttpWebResponse接收数据的一个简单示例

    新建一个.NET Core控制台项目,代码如下所示: using System; using System.Text; using System.Net; using System.Collectio ...

随机推荐

  1. js与uri中location关系

    //获取域名host = window.location.host;host2=document.domain; //获取页面完整地址url = window.location.href; docum ...

  2. C++ 11 笔记 (五) : std::thread

    这真是一个巨大的话题.我猜记录完善绝B需要一本书的容量. 所以..我只是略有了解,等以后用的深入了再慢慢补充吧. C++写多线程真是一个痛苦的事情,当初用过C语言的CreateThread,见过boo ...

  3. IEngineEditor与IWorkspaceEdit,以及相关的事件监听

    转自原文 IEngineEditor与IWorkspaceEdit,以及相关的事件监听 IEngineEditor适用于直接在图层上的编辑,例如使用"要素编辑"工具菜单上的&quo ...

  4. Java集合类操作优化总结

    清单 1.集合类之间关系 Collection├List│├LinkedList│├ArrayList│└Vector│ └Stack└SetMap├Hashtable├HashMap└WeakHas ...

  5. Android学习之Image操作及时间日期选择器

    一.基础学习 1.ImageView是图片容器,就相当于RadioGroup是RadioButton的容器一样,是View的直接子类. 1: <ImageView 2: android:id=& ...

  6. Python---十年语言之首

    这个图表的数据非常的有意思,没有大起大浮并不是件坏事,这表明不断的有群体(来自Java和PHP——一个大部落)希望学习这种语言.Python是唯一一个在这个图表上表现的与众不同的语言. 我们都知道,P ...

  7. 【CF】196 Div.2 D. Book of Evil

    显然这个图是一课树,看着题目首先联想到LCA(肯定是可以解的).但是看了一下数据大小,应该会TLE.然后,忽然想到一个前面做过的题目,大概是在一定条件下树中某结点旋转成为根后查询最长路径.结果灵感就来 ...

  8. 根据指定的commit查找对应的log

    find commit by hash sha in git 问题: I need to find a commit in Git by given hash SHA. For example, if ...

  9. linux使用man命令后退出

    linux使用man命令后 使用q退出

  10. bzoj1232

    由题意知,最后要保留的边肯定都要被走过 来回一条边所花费的时间=2*边长+安慰边两端的牛所要花的时间和 总时间就等于所保留边来回的时间和+根节点时间: 不难想到做一下最小生成树即可 贪心可知,根一定选 ...