using AnfleCrawler.Common;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net;
using System.Text;
using System.Threading.Tasks; namespace AnfleCrawler.DataAnalyzer
{
internal class Dooioo : AnalyzerBase
{
protected override void AnalyzeInternal(PageLandEntity current)
{
var lander = Crawler.Lander;
var pHandler = CreateContentHandler(current);
switch (current.Depth)
{
case :
{
var dom = lander.GetDocument(pHandler);
DoPerPaging(current, dom.DocumentNode, ".pagination a:last-child"); foreach (var node in QueryNodes(dom.DocumentNode, "#hlist a"))
{
var url = GetHref(node, current.Url);
Crawler.PushUrl(url, DataDepth.Houses);
}
}
break;
case DataDepth.Houses:
{
var dom = lander.GetDocument(pHandler);
var attrs = new AttributeFiller(); var Nset = QueryNodes(dom.DocumentNode, "#building-info li").Select(p =>
{
var spans = QueryTexts(p, "span").ToArray();
return string.Format("{0}:{1}", spans[], spans[]);
});
attrs.Append(Nset); Guid hashKey = GenHashKey(current.Url.OriginalString);
var bo = Crawler.Repository.LoadHouses(hashKey);
bo.SiteID = current.Url.GetDomain();
bo.PageUrl = current.Url.OriginalString;
bo.CityName = Crawler.Config.CityName;
attrs.FillEntity(bo, new Dictionary<string, string>()
{
{"小区名", "小区名称"},
{"板块", "所属区域"},
{"建造年代", "竣工时间"},
{"地址", "小区地址"},
{"物业类型", "物业类别"},
});
MapMark(bo);
Repository.Save(bo);
Crawler.OutWrite("保存楼盘 {0}", bo.小区名称); var Pset = QueryNodes(dom.DocumentNode, ".pagination a", false);
if (Pset.Any())
{
string pageCount = Pset.Skip(Pset.Count() - ).First().InnerText;
Crawler.PushUrl(new Uri(string.Format("http://www.dooioo.com/ershoufang/s117862?p=[2-{0}]", pageCount)), DataDepth.Deal, bo.RowID);
}
SaveHouselisting(bo.RowID, current, dom);
}
break;
case DataDepth.Deal:
{
Guid housesID = (Guid)current.State;
pHandler.CrossLoad = (arg, xDom) =>
{
string pName = "p";
if (arg.IsRepost)
{
arg.IsRepost = false;
return;
}
var query = System.Web.HttpUtility.ParseQueryString(arg.RequestUrl.Query);
int pageIndex;
if (!int.TryParse(query[pName], out pageIndex))
{
pageIndex = ;
} var input = xDom.GetElementsByTagName("ul").Cast<System.Windows.Forms.HtmlElement>()
.Where(p => p.GetAttribute("class").Contains("pagination")).FirstOrDefault();
if (input == null)
{
App.LogInfo("CrossLoad xPaing:{0} {1}", this.GetType().Name, xDom.Body.InnerHtml);
return;
}
var btn = input.GetElementsByTagName("a").Cast<System.Windows.Forms.HtmlElement>()
.Where(p => p.InnerText == pageIndex.ToString()).First();
btn.InvokeMember("click");
arg.IsRepost = true;
};
var dom = lander.GetDocument(pHandler);
SaveHouselisting(housesID, current, dom);
}
break;
}
} private void SaveHouselisting(Guid housesID, PageLandEntity current, HtmlAgilityPack.HtmlDocument dom)
{
var attrs = new AttributeFiller();
foreach (var node in QueryNodes(dom.DocumentNode, "#history-list tr"))
{
var spans = QueryTexts(node, "td").ToArray();
attrs.Append("HousesID:{0}", housesID); DateTime dump;
if (DateTime.TryParse(spans[], out dump))
{
attrs.Append("TransactionDate:{0}", dump);
} attrs.Append("SoldPriceOrRent:{0}", spans[]);
attrs.Append("UnitPriceOrLease:{0}", spans[]);
attrs.Append("Apartment:{0}", spans[]);
attrs.Append("ServiceBroker:{0}", spans[]);
attrs.Append("Area:{0}", spans[]); var bo = new HouselistingEntity();
attrs.FillEntity(bo);
Repository.SaveHouselisting(bo);
Crawler.OutWrite("保存小区出售记录 {0}", housesID);
}
}
}
}

Dooioo Deal的更多相关文章

  1. zlhome.com Deal

    using AnfleCrawler.Common; using System; using System.Collections.Generic; using System.Linq; using ...

  2. XML节点名称中有小数点处理(deal with dot)导致使用xpath时报错解决方法

    <?xml version="1.0"?> <ModifyFiles> <_Layout.cshtml>123456</_Layout.c ...

  3. whu 1464 deal with numbers

    WHU 1464  deal with numbers 题意: 给你一串数字,对着串数字有三项操作: Minus a,b,c:对区间[a,b]总的每个数都减c. Division a,b,c:对区间[ ...

  4. OK335xS canutils deal with compile error

    /************************************************************************************** * OK335xS ca ...

  5. 能让你聪明的工作DEAL四法则,来自《每周工作四小时》书籍

    来自书籍<每周工作四小时>,作者蒂莫西·费里斯(Tim Ferriss,昵称:蒂姆)   能让你聪明的工作DEAL四法则: 第一步:D——定位(Definition) 第二步:E——精简( ...

  6. how to deal with EINTR fault

    [how to deal with EINTR fault] EINTR:interupted error.是指一个调用被信号给中断,对于同步的耗时调用来说,这个操作常见,譬如select.read. ...

  7. Spoken English Practice( Believe it or not, I don't need to make believe its a big deal. (believe,deal, You don't say))

    音标复习                                                绿色:连读:红色:略读:蓝色:浊化:橙色:弱读 口语蜕变(2017/6/25) Sorry, t ...

  8. If you want the rainbow, you have to deal with the rain.

    If you want the rainbow, you have to deal with the rain.想要彩虹,就先忍受雨水.

  9. Using SMOTEBoost(过采样) and RUSBoost(使用聚类+集成学习) to deal with class imbalance

    Using SMOTEBoost and RUSBoost to deal with class imbalance from:https://aitopics.org/doc/news:1B9F7A ...

随机推荐

  1. linux下tftp使用(转一些命令)

    转载于:http://cache.baiducontent.com/c?m=9d78d513d99d1af31fa7837e7c5083205b4380122ba6d1020ba5843990732c ...

  2. lua table 排序--满足多条件排序

    前提 假设 一个小怪 有三种属性,等级(level).品质(quality).id(pid) 我们需要对他们进行排序,两种排序情况,第一是单一属性排序,比如按照等级进行排序,或者多种属性进行优先级排序 ...

  3. 【原创】js中利用cookie实现记住密码功能

    在登录界面添加记住密码功能,我首先想到的是在java后台中调用cookie存放账号密码,大致如下: HttpServletRequest request HttpServletResponse res ...

  4. SourceTree 免登录跳过初始设置

    SourceTree 安装之后需要使用账号登陆以授权,以前是可以不登陆的,但是现在是强制登陆. 虽然是免费授权,但是碰上不可抗力因素,登陆不是很方便,这里记录一下跳过这个初始化的步骤. 安装之后,转到 ...

  5. jQuery 效果 —— 滑动

    jQuery 效果 -- 滑动 1.向下滑动元素 (1)使用slideDown()方法可以用于向下滑动元素 $("button").click(function(){ $(&quo ...

  6. mysql 增删改查基本语句

    增: insert insert into 表名(字段1,字段2,字段3......字段N) values(值1,值2,值3): 如果不申明插入那些字段,则默认所有字段. 在插入时注意,往哪个表增加, ...

  7. Beaglebone Black – 连接 GY-91 MPU9250+BMP280 九轴传感器(2)

    这次用 SPI.BBB 有两套 SPI 接口可用,两套都是默认 disable,需要用 overlay 方式启用,即: echo BB-SPIDEV0 > /sys/devices/bone_c ...

  8. log4j2的使用

    预备知识 日志级别:log4j默认六个级别,即trace.debug.info.warn.error.fatal ,对应意味着该消息为追踪.调试.普通信息.警告.错误.严重错误.可以根据需要子定义其他 ...

  9. viewport和media query

    viewport: 你可以定义viewport的宽度.如果你不使用width=device-width,在移动端上你的页面延伸会超过视窗布局的宽度(width=980px),如果你使用了width=d ...

  10. Android Studio 简单设置

    转自:http://ask.android-studio.org/?/article/14 界面设置 默认的 Android Studio 为灰色界面,可以选择使用炫酷的黑色界面.Settings - ...