Dooioo Deal
using AnfleCrawler.Common;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net;
using System.Text;
using System.Threading.Tasks; namespace AnfleCrawler.DataAnalyzer
{
internal class Dooioo : AnalyzerBase
{
protected override void AnalyzeInternal(PageLandEntity current)
{
var lander = Crawler.Lander;
var pHandler = CreateContentHandler(current);
switch (current.Depth)
{
case :
{
var dom = lander.GetDocument(pHandler);
DoPerPaging(current, dom.DocumentNode, ".pagination a:last-child"); foreach (var node in QueryNodes(dom.DocumentNode, "#hlist a"))
{
var url = GetHref(node, current.Url);
Crawler.PushUrl(url, DataDepth.Houses);
}
}
break;
case DataDepth.Houses:
{
var dom = lander.GetDocument(pHandler);
var attrs = new AttributeFiller(); var Nset = QueryNodes(dom.DocumentNode, "#building-info li").Select(p =>
{
var spans = QueryTexts(p, "span").ToArray();
return string.Format("{0}:{1}", spans[], spans[]);
});
attrs.Append(Nset); Guid hashKey = GenHashKey(current.Url.OriginalString);
var bo = Crawler.Repository.LoadHouses(hashKey);
bo.SiteID = current.Url.GetDomain();
bo.PageUrl = current.Url.OriginalString;
bo.CityName = Crawler.Config.CityName;
attrs.FillEntity(bo, new Dictionary<string, string>()
{
{"小区名", "小区名称"},
{"板块", "所属区域"},
{"建造年代", "竣工时间"},
{"地址", "小区地址"},
{"物业类型", "物业类别"},
});
MapMark(bo);
Repository.Save(bo);
Crawler.OutWrite("保存楼盘 {0}", bo.小区名称); var Pset = QueryNodes(dom.DocumentNode, ".pagination a", false);
if (Pset.Any())
{
string pageCount = Pset.Skip(Pset.Count() - ).First().InnerText;
Crawler.PushUrl(new Uri(string.Format("http://www.dooioo.com/ershoufang/s117862?p=[2-{0}]", pageCount)), DataDepth.Deal, bo.RowID);
}
SaveHouselisting(bo.RowID, current, dom);
}
break;
case DataDepth.Deal:
{
Guid housesID = (Guid)current.State;
pHandler.CrossLoad = (arg, xDom) =>
{
string pName = "p";
if (arg.IsRepost)
{
arg.IsRepost = false;
return;
}
var query = System.Web.HttpUtility.ParseQueryString(arg.RequestUrl.Query);
int pageIndex;
if (!int.TryParse(query[pName], out pageIndex))
{
pageIndex = ;
} var input = xDom.GetElementsByTagName("ul").Cast<System.Windows.Forms.HtmlElement>()
.Where(p => p.GetAttribute("class").Contains("pagination")).FirstOrDefault();
if (input == null)
{
App.LogInfo("CrossLoad xPaing:{0} {1}", this.GetType().Name, xDom.Body.InnerHtml);
return;
}
var btn = input.GetElementsByTagName("a").Cast<System.Windows.Forms.HtmlElement>()
.Where(p => p.InnerText == pageIndex.ToString()).First();
btn.InvokeMember("click");
arg.IsRepost = true;
};
var dom = lander.GetDocument(pHandler);
SaveHouselisting(housesID, current, dom);
}
break;
}
} private void SaveHouselisting(Guid housesID, PageLandEntity current, HtmlAgilityPack.HtmlDocument dom)
{
var attrs = new AttributeFiller();
foreach (var node in QueryNodes(dom.DocumentNode, "#history-list tr"))
{
var spans = QueryTexts(node, "td").ToArray();
attrs.Append("HousesID:{0}", housesID); DateTime dump;
if (DateTime.TryParse(spans[], out dump))
{
attrs.Append("TransactionDate:{0}", dump);
} attrs.Append("SoldPriceOrRent:{0}", spans[]);
attrs.Append("UnitPriceOrLease:{0}", spans[]);
attrs.Append("Apartment:{0}", spans[]);
attrs.Append("ServiceBroker:{0}", spans[]);
attrs.Append("Area:{0}", spans[]); var bo = new HouselistingEntity();
attrs.FillEntity(bo);
Repository.SaveHouselisting(bo);
Crawler.OutWrite("保存小区出售记录 {0}", housesID);
}
}
}
}
Dooioo Deal的更多相关文章
- zlhome.com Deal
using AnfleCrawler.Common; using System; using System.Collections.Generic; using System.Linq; using ...
- XML节点名称中有小数点处理(deal with dot)导致使用xpath时报错解决方法
<?xml version="1.0"?> <ModifyFiles> <_Layout.cshtml>123456</_Layout.c ...
- whu 1464 deal with numbers
WHU 1464 deal with numbers 题意: 给你一串数字,对着串数字有三项操作: Minus a,b,c:对区间[a,b]总的每个数都减c. Division a,b,c:对区间[ ...
- OK335xS canutils deal with compile error
/************************************************************************************** * OK335xS ca ...
- 能让你聪明的工作DEAL四法则,来自《每周工作四小时》书籍
来自书籍<每周工作四小时>,作者蒂莫西·费里斯(Tim Ferriss,昵称:蒂姆) 能让你聪明的工作DEAL四法则: 第一步:D——定位(Definition) 第二步:E——精简( ...
- how to deal with EINTR fault
[how to deal with EINTR fault] EINTR:interupted error.是指一个调用被信号给中断,对于同步的耗时调用来说,这个操作常见,譬如select.read. ...
- Spoken English Practice( Believe it or not, I don't need to make believe its a big deal. (believe,deal, You don't say))
音标复习 绿色:连读:红色:略读:蓝色:浊化:橙色:弱读 口语蜕变(2017/6/25) Sorry, t ...
- If you want the rainbow, you have to deal with the rain.
If you want the rainbow, you have to deal with the rain.想要彩虹,就先忍受雨水.
- Using SMOTEBoost(过采样) and RUSBoost(使用聚类+集成学习) to deal with class imbalance
Using SMOTEBoost and RUSBoost to deal with class imbalance from:https://aitopics.org/doc/news:1B9F7A ...
随机推荐
- android中ContentProvider获取联系人 总结
35.内容提供者:ContentResolver 用内容提供者来获取联系人信息 35-1:权限 <!-- 对联系人的读.写权限 --> <uses-permission androi ...
- 回传数据startActivityForResult()
1.调用者Activity01开启新的界面选用startActivityForResult(intent,requestCode);在Activity01中Intent intent=new Inte ...
- 20160330001 调用及触发Office Outlook 约会
using System;using System.Collections.Generic;using System.ComponentModel;using System.Data;using Sy ...
- 用一个案列详细讲解UITextFiled
一. 登陆界面的搭建 首先涉及到登录界面状态栏颜色的问题,我们需要将状态栏颜色改为白色,可以在控制器内实现方法更改 - (UIStatusBarStyle)preferredStatusBarStyl ...
- Android官方数据绑定框架DataBinding
数据绑定框架给我们带来了更大的方便性,以前我们可能需要在Activity里写很多的findViewById,烦人的代码也增加了我们代码的耦合性,现在我们马上就可以抛弃那么多的findViewById. ...
- 理解GRUB2工作原理及配置选项与方法
GRUB2是借鉴GRUB改写到更加安全强大到多系统引导程序,现在大部分较新的Linux发行版都是使用GRUB2作为引导程序的.GRUB2采用了模块化设计,使得GRUB2核心更加精炼,使用更加灵活,同时 ...
- 第 十一 天 Flagmeng 和动画
1.flagment 的使用,生命周期. 传递数据. 2. 基本动画的使用. 3. 对话框的使用. 4.样式和主题.
- linux文件上传,给文件或目录添加apache权限
系统环境:ubuntu11.10/apache2/php5.3.6 在LAMP环境中,测试一个简单的php文件上传功能时,发现/var/log/apache2/error.log中出现如下php警告: ...
- 深入理解CSS网页布局-理论篇
在CSS网页开发布局中,需要对浮动和定位有深刻的理解才能在开发中游刃有余. 基于此,在博客园中做了本篇总结,这些总结来自实践经验和阅读一些书籍后的理解总结,主要内容为浮动,清除浮动,定位. (可点击屏 ...
- c++ 指针常量,常量指针
当const遇到指针 一般来说,const修饰指针可以分为下面的集中情况. 描述 例子 含义 备注 const在*的左边 const int *b=&a; int const *b=& ...