using System;
using System.Collections.Generic;
using System.ComponentModel.Design;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using CDPWIB.DAL;
using CDPWIB.Data;
using CommonUtility;
using HtmlAgilityPack;
using MongoDB.Driver;
using MongoDB.Driver.Builders;
using MongoDB.Driver.Linq;
using Newtonsoft.Json;
using Newtonsoft.Json.Linq;
using WebKit; namespace CDPWIB.WebCollection
{
internal class QiDianCol : INovalCollect
{
private int Source = Convert.ToInt32(NovalSource.QiDian); private readonly MongoCollection<NovalTempBase> Novalcol =
MongoConnectionFactory.GetMongoCollction<NovalTempBase>("Noval", typeof (NovalTempBase)); public void GetNovalTypeTemp()
{
try
{
var typecol = MongoConnectionFactory.GetMongoCollction<NovalTypeTemp>("Noval", typeof (NovalTypeTemp));
var subcol = MongoConnectionFactory.GetMongoCollction<NovalSubType>("Noval", typeof (NovalSubType));
// 大类 http://www.qidian.com/Javascript/qidian.bookstore.js?t=20130917
string typeshtml =
NetHelper.HttpGet("http://www.qidian.com/Javascript/qidian.bookstore.js?t=20130917")
.Replace("/", "")
.Replace("&nbsp", "")
.Replace("\r", "")
.Replace("\n", "")
.Replace("\t", "")
.Replace("|", "")
.Replace(" ", "");
;
string subtypes =
NetHelper.HttpGet("http://script.cmfu.com/script/BookStore.js ")
.Replace("&nbsp", "")
.Replace("\r", "")
.Replace("\n", "")
.Replace("\t", "")
.Replace("|", "")
.Replace(" ", "");
; Match mtype = Regex.Match(typeshtml, "CategoryArr:(.*?)]]",
RegexOptions.IgnoreCase | RegexOptions.Multiline | RegexOptions.Singleline); string typesstring = mtype.Groups[].Value + "]]";
JArray typearr = (JArray) JsonConvert.DeserializeObject(typesstring);
//JsonTextWriter Match msubtype = Regex.Match(subtypes, "SubCategoryArr=(.*?);",
RegexOptions.IgnoreCase | RegexOptions.Multiline | RegexOptions.Singleline); string subtypesstring = msubtype.Groups[].Value; JArray subarr = (JArray) JsonConvert.DeserializeObject(subtypesstring); List<NovalTypeTemp> lstypes = new List<NovalTypeTemp>();
//CategoryArr: [["全部", "-1"], ["玄幻", "21"], ["奇幻", "1"], ["武侠", "2"], ["仙侠", "22"], ["都市", "4"], ["历史", "5"], ["军事", "6"], ["游戏", "7"]
for (int i = ; i < typearr.Count; i++)
{
if (typearr[i][].ToString() != "-1")
{
NovalTypeTemp type = new NovalTypeTemp()
{
WebNum = typearr[i][].ToString().ToInt(),
Name = typearr[i][].ToString(),
Source = Source
};
lstypes.Add(type);
}
}
IMongoQuery query = Query<NovalTypeTemp>.EQ(p => p.Source, Source); typecol.Remove(query); typecol.InsertBatch(lstypes);
List<NovalSubType> subtypels = new List<NovalSubType>(); foreach (var NovalTypeTemp in lstypes)
{ for (int i = ; i < subarr.Count; i++)
{
var obj = subarr[i];
if (obj[].ToString() == NovalTypeTemp.WebNum.ToString())
{
NovalSubType subtype = new NovalSubType()
{
Name = obj[].ToString(),
ParentWebNum = NovalTypeTemp.WebNum,
WebNum = obj[].ToString().ToInt(),
Source = Source
};
subtypels.Add(subtype);
}
} }
query = Query<NovalSubType>.EQ(p => p.Source, Source);
subcol.Remove(query);
subcol.InsertBatch(subtypels);
}
catch (Exception ex)
{
throw;
}
} /// <summary>
/// 根据点击数页面查小说
/// </summary>
public void GetNovals()
{
//取1到10页
//得到月点击排行小说。
string sourcehtml = string.Empty;
HtmlDocument htmldocc = new HtmlDocument();
List<NovalTempBase> qdls = new List<NovalTempBase>();
for (int j = ; j < ; j++)
{
sourcehtml =
NetHelper.HttpGet("http://top.qidian.com/Book/TopDetail.aspx?TopType&Time=2&PageIndex=" + j);
;
htmldocc.LoadHtml(sourcehtml);
var doc = htmldocc.GetElementbyId("textlist");
//string tablehtml = "<table>" + doc.InnerHtml + "</table>";
// htmldocc.LoadHtml(tablehtml);
//一页50列
for (int i = ; i < ; i++)
{
var trdoc = doc.SelectSingleNode("tr[" + i + "]");
//这里的下标,从1算起
var tdtype = trdoc.SelectSingleNode("td[2]/a");
var tdbook = trdoc.SelectSingleNode("td[3]/a[1]");
var tdclick = trdoc.SelectSingleNode("td[4]");
var tdauth = trdoc.SelectSingleNode("td[5]/a");
Match typematch = Regex.Match(tdtype.OuterHtml, "ChannelId=(\\d*?)&SubCategoryId=(\\d*?)'");
Match bookmatck = Regex.Match(tdbook.OuterHtml, "Book/(\\d*?).aspx");
Match authmatch = Regex.Match(tdauth.OuterHtml, "id=(\\d*?)\"");
int authid = authmatch.Groups[].Value.ToInt();
int type = typematch.Groups[].Value.ToInt();
int subtype = typematch.Groups[].Value.ToInt();
int booknum = bookmatck.Groups[].Value.ToInt();
string bookname = tdbook.InnerText.Trim();
//http://image.cmfu.com/books/3127618/3127618.jpg
string titleimg = "http://image.cmfu.com/books/" + booknum + "/" + booknum + ".jpg"; bool exist= qdls.Exists(p => p.SourceWebNum == booknum);
if (!exist)
{
NovalTempBase qidian = new NovalTempBase()
{
AuthName = tdauth.InnerText.Trim(),
AuthId = authid,
SubType = subtype,
TitleImg = titleimg,
Title = bookname,
TotalClick = tdclick.InnerText.ToInt(),
TotalComment = ,
Type = type,
SourceWebNum = booknum,
Source = Source
};
qdls.Add(qidian);
} }
} PublicMethod.InsertAndUpdateNovalTmp(qdls,Source);
} //public void GetNovalsByType()
//{
//}
/// <summary>
/// 得到小说章节 ,个别来源,带分卷。
/// </summary>
public void GetNovalChapers()
{ //http://sight.qq.com/book/chapterpage?uin=0&g_tk=5381&callback=_Callback&pagesize=100&pageno=2&bid=16043&_r=0.6934567329008132
var novalcol = MongoConnectionFactory.GetMongoCollction<NovalTempBase>("Noval", typeof (NovalTempBase)); var books = novalcol.AsQueryable().Where(p=>p.Source==Source).ToList();
foreach (var infoQidian in books)
{
GetSingleNovalChapers(infoQidian.SourceWebNum);
}
} public void GetSingleNovalChapers(int novalwebnum)
{
IMongoQuery q2 = Query<NovalVolumeTemp>.EQ(p => p.Source, Source);
IMongoQuery q1 = Query<NovalVolumeTemp>.EQ(p => p.NovalWebNum, novalwebnum);
IMongoQuery[] qarray = { q1, q2 }; IMongoQuery query = Query.And(qarray); var chaptercol = MongoConnectionFactory.GetMongoCollction<NovalChapterTemp>("Noval", typeof(NovalChapterTemp));
var volumecol = MongoConnectionFactory.GetMongoCollction<NovalVolumeTemp>("Noval", typeof (NovalVolumeTemp));
List<NovalChapterTemp> lschapters = new List<NovalChapterTemp>();
List<NovalVolumeTemp> lsvolumes = new List<NovalVolumeTemp>();
int chapterorder = ;
int volumeorder = ;
HtmlDocument htmldocc = new HtmlDocument();
//http://read.qidian.com/BookReader/3127618.aspx string sourcehtml = string.Empty;
string url = "http://read.qidian.com/BookReader/" + novalwebnum + ".aspx";
try
{
sourcehtml = NetHelper.HttpGet(url);
//目录主页
htmldocc.LoadHtml(sourcehtml);
var doc = htmldocc.GetElementbyId("content");
int i = ; var topdoc = doc.SelectSingleNode("div[" + i + "]");
while (topdoc != null)
{
var topa = topdoc.SelectSingleNode("div/a");
//如果是vip章节,没有这个A标签。
int topnum;
//分卷信息
if (topa != null)
{
string topahtml = topa.OuterHtml;
//href="http://www.qidian.com/BookReader/vol,107580,486625.aspx"
Match m = Regex.Match(topahtml, ",(\\d*?).aspx");
topnum = m.Groups[].Value.ToInt();
var topaname = topdoc.SelectSingleNode("div/b");
string topname = topaname.InnerText.Trim(); topname = topname.Replace("&nbsp", "").Split(';')[];
//if(topname=="作品相关")
NovalVolumeTemp volume = new NovalVolumeTemp()
{
Sort = volumeorder,
WebNum = topnum,
Name = topname,
NovalWebNum = novalwebnum,
Source = Source
};
lsvolumes.Add(volume);
volumeorder++;
}
else
{
topnum = ;
} var contextdoc = doc.SelectSingleNode("div[" + (i + ) + "]");
var chaperas = contextdoc.SelectNodes("div/ul/li/a");
//<a itemprop='url' href="http://read.qidian.com/BookReader/107580,20901221.aspx" title='凡人修仙传 字数:84 更新时间:2008-08-01 07:54:48'><span itemprop='headline'>呵呵!终于上架了!</span></a>
//,(\d*?).aspx
string chaptername = string.Empty;
//章节信息
int chapterwebnum = ;
for (int x = ; x < chaperas.Count; x++)
{
var chapera = chaperas[x];
chaptername = chapera.InnerText.Trim();
Match chapmatchwebnum = Regex.Match(chapera.OuterHtml, ",(\\d*?).aspx");
chapterwebnum = chapmatchwebnum.Groups[].Value.ToInt();
NovalChapterTemp chapter = new NovalChapterTemp()
{
Name = chaptername,
Sort = chapterorder,
WebNum = chapterwebnum,
VolumeId = topnum
,
NovalWebNum = novalwebnum,
Source = Source
};
lschapters.Add(chapter);
chapterorder++;
}
i += ;
topdoc = doc.SelectSingleNode("div[" + i + "]");
}
volumecol.Remove(query);
volumecol.InsertBatch(lsvolumes);
PublicMethod.InsertChapterTempToSQL(lschapters, Source, novalwebnum); }
catch (Exception ex)
{
return;
} } public void GetNovalCilckComment()
{
var novalcol = MongoConnectionFactory.GetMongoCollction<NovalTempBase>("Noval", typeof(NovalTempBase)); var books = novalcol.AsQueryable().Where(p => p.Source == Source).ToList();
string sourcehtml = string.Empty;
string url = string.Empty;
HtmlDocument htmldocc = new HtmlDocument();
foreach (var novalTempBase in books)
{
//http://www.qidian.com/Book/3106580.aspx
url = "http://www.qidian.com/Book/" + novalTempBase.SourceWebNum + ".aspx";
sourcehtml = NetHelper.HttpGet(url);
htmldocc.LoadHtml(sourcehtml);
var cliclickdiv = htmldocc.GetElementbyId("contentdiv");
// /div/div/div[1]/table/tbody/tr/td[1] var clickcount =
cliclickdiv.SelectSingleNode("div/div[1]/table/tr/td[1]")
.InnerText.Replace("总点击", "")
.Replace(":", "").Trim(); int click = Convert.ToInt32(clickcount); // string urlcom = "http://forum.qidian.com/NewForum/List.aspx?BookId=3106580";
////http://forum.qidian.com/NewForum/List.aspx?BookId=3106580 // // http://c.pingba.qidian.com/BookComment.aspx?BookId=3106580
// url = "http://c.pingba.qidian.com/BookComment.aspx?" + novalTempBase.SourceWebNum;
// sourcehtml = NetHelper.HttpGet(url);
// htmldocc.LoadHtml(sourcehtml);
novalTempBase.TotalClick = click;
novalcol.Save(novalTempBase);
} //目录主页 } }
}

MongoDBcrud操作,采集部分代码的更多相关文章

  1. SELECT控件操作的JS代码示例

    SELECT控件操作的JS代码示例 1 检测是否有选中 if(objSelect.selectedIndex > -1) { //说明选中 } else { //说明没有选中 } 2.动态创建s ...

  2. PHP禁止同一IP频繁访问以防止网站被防攻击或采集的代码

    PHP禁止同一IP频繁访问以防止网站被防攻击或采集的代码 <?php /* *通过禁止IP频繁访问防止网站被防攻击代码*design by www.scutephp.com*/header('C ...

  3. C#开发中使用Npoi操作excel实例代码

    C#开发中使用Npoi操作excel实例代码 出处:西西整理 作者:西西 日期:2012/11/16 9:35:50 [大 中 小] 评论: 0 | 我要发表看法 Npoi 是什么? 1.整个Exce ...

  4. 30个php操作redis常用方法代码例子

    From: http://www.jb51.net/article/51884.htm 这篇文章主要介绍了30个php操作redis常用方法代码例子,本文其实不止30个方法,可以操作string类型. ...

  5. php foreach 操作数组的代码

    php foreach 操作数组的代码.   foreach()有两种用法:  foreach(array_name as $value)  {  statement;  }  这里的array_na ...

  6. 30 个 php 操作 redis 常用方法代码例子

    这篇文章主要介绍了 30 个 php 操作 redis 常用方法代码例子 , 本文其实不止 30 个方法 , 可以操作 string 类型. list 类型和 set 类型的数据 , 需要的朋友可以参 ...

  7. Redis:安装、配置、操作和简单代码实例(C语言Client端)

    Redis:安装.配置.操作和简单代码实例(C语言Client端) - hj19870806的专栏 - 博客频道 - CSDN.NET Redis:安装.配置.操作和简单代码实例(C语言Client端 ...

  8. Installshield停止操作系统进程的代码--IS5版本适用

    原文:Installshield停止操作系统进程的代码--IS5版本适用 出处:http://www.installsite.org/pages/en/isp_ext.htm这个地址上有不少好东西,有 ...

  9. Installshield停止操作系统进程的代码 --IS6及以上版本适用

    原文:Installshield停止操作系统进程的代码 --IS6及以上版本适用 setup.rul的代码 Code;end;///////////////////////////////////// ...

  10. C# FTP操作类的代码

    如下代码是关于C# FTP操作类的代码.using System;using System.Collections.Generic;using System.Text;using System.Net ...

随机推荐

  1. PAT Advanced 1038 Recover the Smallest Number (30) [贪⼼算法]

    题目 Given a collection of number segments, you are supposed to recover the smallest number from them. ...

  2. oracle 学习(四)游标

    显式游标 隐式游标:如果在PL/SQL程序段中使用SELECT语句进行操作,PL/SQL 会隐含的处理游标定义,即为隐式游标.这种游标不需要像显式那样声明,也不必打开关闭. CREATE OR REP ...

  3. 洛谷 P1258 小车问题

    题目传送门 解题思路: 首先,每个人都要做一次车,而且两个人要同时到达,这样才能使总时间最短. 那么,我们设起点为A,终点为B,小车先带甲开到C点后甲下车走到B点,同时小车掉头与已经走到D点的乙相向而 ...

  4. Django框架的安装与使用

    Django框架的安装与使用 在使用Django框架开发web应用程序时,开发阶段同样依赖wsgiref模块来实现Server的功能,我们使用Django框架是为了快速地开发application, ...

  5. 计量经济与时间序列_关于Box-Jenkins的ARMA模型的经济学意义(重要思路)

    1 很多人已经了解到AR(1)这种最简单的时间序列模型,ARMA模型包括AR模型和MA模型两个部分,这里要详细介绍Box-Jenkins模型的观念(有些资料中把ARMA模型叫做Box-Jenkins模 ...

  6. Django路由层与视图层

    表与表之间建关系 图书管理系统为例 书籍表 出版社表 作者表 三个表之间的关系: 考虑表之间的关系:换位思考 1.书籍和出版社是一对多,外键字段建立在书籍表中 2.书籍和作者是多对多, 需要建立第三方 ...

  7. Java基础的坑

    仍会出现NPE 需要改成

  8. 添加新硬盘,扩展Centos7根分区

    ##背景介绍,系统安装时,分配的硬盘容量太小,根分区空间不够用,现添加一个新硬盘,通过以下步骤来扩展centos7根分区 [root@t201 ~]# df -h 文件系统 容量 已用 可用 已用% ...

  9. 迅为IMX6Q开发板提供原理图_底板PCB_驱动程序源码_芯片和LCD数据手册_开发板环境_使用手册

      迅为IMX6开发板: Android4.4/6.0系统  Linux + Qt5.7系统  Ubuntu12.04系统 部分案例:HMI:3D打印机:医疗设备:工控机:触控一体机:车载终端 核心板 ...

  10. [Algo] 66. All Valid Permutations Of Parentheses I

    Given N pairs of parentheses “()”, return a list with all the valid permutations. Assumptions N > ...