抓取小视频的url地址,然后将地址信息拷贝到迅雷里批量下载就ok了

主程序 代码

            //yazhouqingseAV 35
//zhifusiwaAV 29
//zipaishipin 30
//oumeiqingseAV 28
//katongdongman 31
//tongxingAV 32
//sanjidianying 33
//fengkuangqunjiao 34 var client = new WinHttpHelper();
var type = "fengkuangqunjiao";
var classid = ; for (int i = ; i > -; i++)
{
Console.WriteLine(i);
var index = "_" + i;
if (i == )
index = ""; string pageUrl = "http://www.lang34.com/se/" + type + "/index" + index + ".html"; var trs = RegexHelper.GetMathList(client.GET(pageUrl, Encoding.UTF8), "" + type + "/(.*?).html");
foreach (var item in trs)
{
string temp = "";
if (RegexHelper.GetMatchStr(item.ToString(), "" + type + "/(.*?).html", true, out temp))
{
string url = "http://www.lang34.com/e/DownSys/play/?classid=" + classid + "&id=" + temp + "&pathid=0";
string htmltext = client.GET(url, Encoding.UTF8); string mp4 = "";
if (RegexHelper.GetMatchStr(htmltext, "f:'(.*?)',", true, out mp4))
{
string titile = "";
RegexHelper.GetMatchStr(htmltext, " <title>(.*?)</title>", true, out titile); string output = mp4 + "?title" + titile + "\r\n";
Console.WriteLine(output);
File.AppendAllText("D://" + type + ".txt", output);
}
} }
}

网络请求类

using System;
using System.Collections.Generic;
using System.Text; namespace MyHelper4Web
{
public class WinHttpHelper
{
WinHttp.WinHttpRequest request; public string Accept = "*/*";
public string UserAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET4.0C; InfoPath.2; .NET4.0E)";
public string ContentType = "application/json";// "application/x-www-form-urlencoded";
public int SetTimeOut = ;//请求超时时间秒
public bool AllowAutoRedirect = true;//是否允许自动跳转
public bool AllowHttpstoHttp = false;//是否允许http与https转换 public WinHttpHelper()
{
request = new WinHttp.WinHttpRequest();
} /// <summary>
/// 传入请求头的HttpHelper构造函数
/// </summary>
/// <param name="Accept">Accept</param>
/// <param name="UserAgent">UserAgent</param>
/// <param name="ContentType">ContentType</param>
public WinHttpHelper(string Accept, string UserAgent, string ContentType)
{
this.Accept = Accept;
this.UserAgent = UserAgent;
this.ContentType = ContentType;
} /// <summary>
/// 传入请求头的HttpHelper构造函数
/// </summary>
/// <param name="Accept">Accept</param>
/// <param name="UserAgent">UserAgent</param>
/// <param name="ContentType">ContentType</param>
/// <param name="SetTimeOut">SetTimeOut</param>
public WinHttpHelper(string Accept, string UserAgent, string ContentType, int SetTimeOut)
{
this.Accept = Accept;
this.UserAgent = UserAgent;
this.ContentType = ContentType;
this.SetTimeOut = SetTimeOut;
} /// <summary>
/// GET方式请求网页
/// </summary>
/// <param name="Url">请求的url</param>
/// <returns>以字节数组形式返回响应内容</returns>
public byte[] GET(string Url,string refer)
{
byte[] responsebody;
try
{
//不允许自动跳转
if (AllowAutoRedirect == false)
{
request.set_Option(WinHttp.WinHttpRequestOption.WinHttpRequestOption_EnableRedirects, false);
}
//允许https与http转换
if (AllowHttpstoHttp == true)
{
request.set_Option(WinHttp.WinHttpRequestOption.WinHttpRequestOption_EnableHttpsToHttpRedirects, true);
}
request.Open("GET", Url, true);
request.SetRequestHeader("Accept", Accept);
request.SetRequestHeader("User-Agent", UserAgent);
if (!string.IsNullOrEmpty(refer))
{
request.SetRequestHeader("Referer", refer);
}
request.Send("");
request.WaitForResponse(SetTimeOut);
responsebody = (byte[])request.ResponseBody;
}
catch (Exception ex)
{
responsebody = Encoding.Default.GetBytes(ex.Message + ex.Source);
////LogHelper.Log.Error("GET方式请求网页异常", ex);
}
return responsebody;
} /// <summary>
/// GET方式请求网页
/// </summary>
/// <param name="Url">请求的url</param>
/// <param name="Encode">转换字符串用的编码</param>
/// <returns>以字符串形式返回响应内容</returns>
public string GET(string Url, Encoding Encode)
{
string htmltext = "";
try
{
byte[] htmlbyte = GET(Url,"");
htmltext = Encode.GetString(htmlbyte);
}
catch (Exception ex)
{
htmltext = ex.Message + ex.Source;
////LogHelper.Log.Error("GET方式请求网页异常", ex);
}
return htmltext;
} public string GET(string Url,string refer , Encoding Encode)
{
byte[] htmlbyte = GET(Url, refer); return Encode.GetString(htmlbyte);
} /// <summary>
/// POST方式请求网页
/// </summary>
/// <param name="Url">请求的Url</param>
/// <param name="PostData">请求传的值</param>
/// <param name="Refer">Refer</param>
/// <returns>以字节数组形式返回响应内容</returns>
public byte[] POST(string Url, string PostData, string Refer)
{
byte[] responsebody;
try
{
//不允许自动跳转
if (AllowAutoRedirect == false)
{
request.set_Option(WinHttp.WinHttpRequestOption.WinHttpRequestOption_EnableRedirects, false);
}
//允许https与http转换
if (AllowHttpstoHttp == true)
{
request.set_Option(WinHttp.WinHttpRequestOption.WinHttpRequestOption_EnableHttpsToHttpRedirects, true);
}
request.Open("POST", Url, true);
request.SetRequestHeader("Accept", Accept);
request.SetRequestHeader("User-Agent", UserAgent);
request.SetRequestHeader("Content-Type", ContentType);
if (!string.IsNullOrEmpty(Refer))
{
request.SetRequestHeader("Referer", Refer);
}
request.Send(PostData);
request.WaitForResponse(SetTimeOut);
responsebody = (byte[])request.ResponseBody;
}
catch (Exception ex)
{
responsebody = Encoding.Default.GetBytes(ex.Message + ex.Source);
////LogHelper.Log.Error("POST方式请求网页异常", ex);
}
return responsebody;
} /// <summary>
/// POST方式请求网页
/// </summary>
/// <param name="Url">请求的Url</param>
/// <param name="PostData">请求传的值</param>
/// <returns>以字节数组形式返回响应内容</returns>
public byte[] POST(string Url, string PostData)
{
byte[] responsebody;
responsebody = POST(Url, PostData, "");
return responsebody;
} /// <summary>
/// POST方式请求网页
/// </summary>
/// <param name="Url">请求的Url</param>
/// <param name="PostData">请求传的值</param>
/// <param name="Refer">Refer</param>
/// <param name="Encode">转换字符串用的编码</param>
/// <returns>以字符串形式返回响应内容</returns>
public string POST(string Url, string PostData, string Refer, Encoding Encode)
{
string htmltext = string.Empty;
try
{
byte[] responsebody = POST(Url, PostData, Refer);
htmltext = Encode.GetString(responsebody);
}
catch (Exception ex)
{
htmltext = ex.Message + ex.Source;
////LogHelper.Log.Error("POST方式请求网页异常", ex);
}
return htmltext;
} /// <summary>
/// POST方式请求网页
/// </summary>
/// <param name="Url">请求的Url</param>
/// <param name="PostData">请求传的值</param>
/// <param name="Encode">转换字符串用的编码</param>
/// <returns>以字符串形式返回响应内容</returns>
public string POST(string Url, string PostData, Encoding Encode)
{
string htmltext = string.Empty;
try
{
byte[] responsebody = POST(Url, PostData, "");
htmltext = Encode.GetString(responsebody);
}
catch (Exception ex)
{
htmltext = ex.Message + ex.Source;
////LogHelper.Log.Error("POST方式请求网页异常", ex);
}
return htmltext;
} public string GetAllCookis()
{
string cookis = "";
try
{
cookis = request.GetAllResponseHeaders();
}
catch (Exception)
{
return "";
}
return cookis;
}
}
}

正则表达式类

using System;
using System.Collections.Generic;
using System.Text;
using System.Text.RegularExpressions;
using System.Collections; namespace MyHelper4Web
{
public class RegexHelper
{
/// <summary>
///
/// </summary>
/// <param name="htmltext"></param>
/// <param name="pattern"></param>
/// <param name="isCut"></param>
/// <param name="result"></param>
/// <returns></returns>
public static bool GetMatchStr(string htmltext, string pattern, bool isCut, out string result)
{
bool IsGetSuccess = false;
result = "";
try
{
IsGetSuccess = GetMatchStr(htmltext, pattern, out result);
if (!isCut)
{
string[] replaceStrs = new string[];
if (pattern.Contains("(.*?)"))
{
string splitStr = pattern.Replace("(.*?)", "|");
replaceStrs = splitStr.Split('|');
}
result = replaceStrs[] + result + replaceStrs[];
}
}
catch (Exception ex)
{
IsGetSuccess = false;
} return IsGetSuccess;
} public static string GetMatchString(string htmltext, string pattern, bool isCut)
{
string result = "";
try
{
GetMatchStr(htmltext, pattern, out result);
if (isCut)
{
string[] replaceStrs = new string[];
if (pattern.Contains("(.*?)"))
{
string splitStr = pattern.Replace("(.*?)", "|");
replaceStrs = splitStr.Split('|');
}
result = result.Replace(replaceStrs[], "").Replace(replaceStrs[], "");
}
return result;
}
catch (Exception ex)
{
return "";
} } /// <summary>
/// 正则表达式dan匹配方法
/// </summary>
/// <param name="htmltext">网页内容</param>
/// <param name="pattern">模式字符串</param>
/// <param name="result">返回匹配成功的字符串</param>
/// <returns>匹配是否成功</returns>
public static bool GetMatchStr(string htmltext, string pattern, out string result)
{
bool IsGetSuccess = false;
result = "";
try
{
string[] replaceStrs=new string[];
if (pattern.Contains("(.*?)"))
{
string splitStr = pattern.Replace("(.*?)", "^");
replaceStrs = splitStr.Split('^');
}
Regex regex = new Regex(pattern, RegexOptions.Singleline | RegexOptions.IgnoreCase);
Match match = regex.Match(htmltext);
if (match.Success)
{
result = match.ToString();
result = result.Replace(replaceStrs[], "").Replace(replaceStrs[], "");
}
else
{
IsGetSuccess = false;
}
}
catch (Exception ex)
{
IsGetSuccess = false;
}
finally
{
if (!string.IsNullOrEmpty(result))
{
IsGetSuccess = true;
}
else
{
IsGetSuccess = false;
}
}
return IsGetSuccess;
} /// <summary>
/// 正则多匹配,返回匹配ArrayList数组
/// </summary>
/// <param name="htmltext">网页内容</param>
/// <param name="pattern">模式字符串</param>
/// <returns></returns>
public static ArrayList GetMathList(string htmltext, string pattern)
{
ArrayList list = new ArrayList();
try
{
MatchCollection mc;
//定义一个Regex对象实例
Regex regex = new Regex(pattern, RegexOptions.Singleline | RegexOptions.IgnoreCase);
//或者多行匹配模式RegexOptions.Multiline
mc = regex.Matches(htmltext);
//在输入字符串中找到所有匹配
for (int i = ; i < mc.Count; i++)
{
//匹配一条信息就处理
string groupcode = mc[i].Value.ToString();
//处理函数
list.Add(groupcode);
}
}
catch (Exception)
{
return null;
}
return list;
} ///// <summary>
///// 正则表达式duo匹配方法
///// </summary>
///// <param name="htmltext">网页内容</param>
///// <param name="patterns">模式字符串数组</param>
///// <param name="result">返回匹配成功的字符串</param>
///// <returns>匹配是否成功</returns>
//public static bool GetMathStr(string htmltext, string[] patterns, out string result)
//{
// bool IsGetSuccess = false;
// result = "";
// try
// {
// string temp = htmltext;
// for (int i = 0; i < patterns.Length; i++)
// {
// Regex regex = new Regex(patterns[i], RegexOptions.Singleline | RegexOptions.IgnoreCase);
// Match match = regex.Match(temp);
// if (match.Success)
// {
// temp = match.ToString();
// if (i == patterns.Length - 1)
// {
// result = temp;
// }
// }
// else
// {
// break;
// }
// }
// }
// catch (Exception ex)
// {
// IsGetSuccess = false;
// }
// finally
// {
// if (!string.IsNullOrEmpty(result))
// {
// IsGetSuccess = true;
// }
// else
// {
// IsGetSuccess = false;
// }
// }
// return IsGetSuccess;
//}
}
}

【C#爬虫】抓取XX网站mp4资源地址的更多相关文章

  1. python爬虫 抓取一个网站的所有网址链接

    sklearn实战-乳腺癌细胞数据挖掘 https://study.163.com/course/introduction.htm?courseId=1005269003&utm_campai ...

  2. python爬虫--爬取某网站电影下载地址

    前言:因为自己还是python世界的一名小学生,还有很多路要走,所以本文以目的为向导,达到目的即可,对于那些我自己都没弄懂的原理,不做去做过多解释,以免误人子弟,大家可以网上搜索. 友情提示:本代码用 ...

  3. 一个简单的scrapy爬虫抓取豆瓣刘亦菲的图片地址

    一.第一步是创建一个scrapy项目 sh-3.2# scrapy startproject liuyifeiImage sh-3.2# chmod -R 777 liuyifeiImage/ 二.分 ...

  4. 爬虫抓取页面数据原理(php爬虫框架有很多 )

    爬虫抓取页面数据原理(php爬虫框架有很多 ) 一.总结 1.php爬虫框架有很多,包括很多傻瓜式的软件 2.照以前写过java爬虫的例子来看,真的非常简单,就是一个获取网页数据的类或者方法(这里的话 ...

  5. python 爬虫抓取心得

    quanwei9958 转自 python 爬虫抓取心得分享 urllib.quote('要编码的字符串') 如果你要在url请求里面放入中文,对相应的中文进行编码的话,可以用: urllib.quo ...

  6. C# 爬虫 抓取小说

    心血来潮,想研究下爬虫,爬点小说. 通过百度选择了个小说网站,随便找了一本小书http://www.23us.so/files/article/html/13/13655/index.html. 1. ...

  7. Java 实现 HttpClients+jsoup,Jsoup,htmlunit,Headless Chrome 爬虫抓取数据

    最近整理一下手头上搞过的一些爬虫,有HttpClients+jsoup,Jsoup,htmlunit,HeadlessChrome 一,HttpClients+jsoup,这是第一代比较low,很快就 ...

  8. PID控制器的应用:控制网络爬虫抓取速度

    一.初识PID控制器 冬天乡下人喜欢烤火取暖,常见的情形就是四人围着麻将桌,桌底放一盆碳火.有人觉得火不够大,那加点木炭吧,还不够,再加点.片刻之后,又觉得火太大,脚都快被烤熟了,那就取出一些木碳…… ...

  9. Python爬虫抓取东方财富网股票数据并实现MySQL数据库存储

    Python爬虫可以说是好玩又好用了.现想利用Python爬取网页股票数据保存到本地csv数据文件中,同时想把股票数据保存到MySQL数据库中.需求有了,剩下的就是实现了. 在开始之前,保证已经安装好 ...

随机推荐

  1. spring通过注解依赖注入和获取xml配置混合的方式

    spring的xml配置文件中某个<bean></bean>中的property的用法是什么样的? /spring-beans/src/test/java/org/spring ...

  2. 解决ajax请求cors跨域问题

    ”已阻止跨源请求:同源策略禁止读取位于 ***** 的远程资源.(原因:CORS 头缺少 'Access-Control-Allow-Origin').“ ”已阻止跨源请求:同源策略禁止读取位于 ** ...

  3. 浅谈PHP代码设计结构

    浅谈PHP代码设计结构 您的评价:       还行  收藏该经验       coding多年,各种代码日夜相伴,如何跟代码友好的相处,不光成为职业生涯的一种回应,也是编写者功力的直接显露. 如何看 ...

  4. 利用FTP将Linux文件备份到Windows

    windows:Windows Server 2008 linux: CentOS release 5.5 (Final)       首先在windows上安装好FTP,本人使用的是Windows ...

  5. C#垃圾回收

    析构方法: 我们知道引用类型都有构造方法(constructor),相对应的也有一个析构方法(destructor).顾名思义,构造方法,就是在创建这个对象时,要执行的方法.例如,我们可以通过构造方法 ...

  6. zepto源码研究 - zepto.js (zepto.init)

    简要:当我们用$()时,便会直接调用zepto.init 生成zepto对象,那zepto.init是如何根据不同类型的参数来生产指定对象呢? zepto.init = function(select ...

  7. Apache 2.x+jboss6.1反向代理session共享问题设置

    2016年8月4日,第一次开笔写博客园,今天在公司解决了一个问题. apache+jboss做负载均衡的问题一直困扰了很久.问题描述如下,使用apche做反向代理转发给3台jboss 的app,app ...

  8. 检查DOM能力的函数

    var a=document.implementation.hasFeature("Core","2.0"); var b=document.implement ...

  9. C++中getline函数的使用

    代码: #include <iostream> #include <cstdio> using namespace std; int main(){ char* s; s = ...

  10. C++拾遗(十三)友元和嵌套类

    友元类 使用友元的场合: 1.两个类既不是is-a关系也不是has-a关系,但是两个类之间又需要有联系,且一个类能访问另一个类的私有成员和保护成员. 2.一个类需要用到另外多个类的私有成员. C++p ...