using AnfleCrawler.Common;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks; namespace AnfleCrawler.DataAnalyzer
{
internal class Zlhome : AnalyzerBase
{
protected override void AnalyzeInternal(PageLandEntity current)
{
var lander = Crawler.Lander;
var pHandler = CreateContentHandler(current);
switch (current.Depth)
{
case :
{
var dom = lander.GetDocument(pHandler);
DoPerPaging(current, dom.DocumentNode, ".page:first-child a:last-child"); foreach (var node in QueryNodes(dom.DocumentNode, ".xqlistBox .l_img a"))
{
var url = GetHref(node, current.Url);
Crawler.PushUrl(url, DataDepth.Houses);
}
}
break;
case DataDepth.Houses:
{
var dom = lander.GetDocument(pHandler);
var attrs = new AttributeFiller(); attrs.Append("小区名称:{0}", QueryTexts(dom.DocumentNode, ".sc a").First().Replace("关注", string.Empty)); attrs.Append(QueryTexts(dom.DocumentNode, ".c:last-child li")); Guid hashKey = GenHashKey(current.Url.OriginalString);
var bo = Crawler.Repository.LoadHouses(hashKey);
bo.SiteID = "Zlhome.com";
bo.PageUrl = current.Url.OriginalString;
bo.CityName = Crawler.Config.CityName;
attrs.FillEntity(bo, new Dictionary<string, string>()
{
{"地址", "小区地址"},
{"所属片区", "所属区域"},
{"物业类型", "物业类别"},
{"骏工日期", "竣工时间"},
});
MapMark(bo);
Repository.Save(bo);
Crawler.OutWrite("保存楼盘 {0}", bo.小区名称); var pNode = QueryNodes(dom.DocumentNode, ".xqinfo").Skip().First();
var dealNode = QueryNode(pNode, "a");
var url = GetHref(dealNode, current.Url);
Crawler.PushUrl(url, DataDepth.Deal, bo.RowID);
}
break;
case DataDepth.Deal:
{
Guid housesID = (Guid)current.State;
var dom = lander.GetDocument(pHandler); bool isRent = false;
foreach (var table in QueryNodes(dom.DocumentNode, ".cjxxtable"))
{
foreach (var node in QueryNodes(table, "tr"))
{
var spans = QueryTexts(node, "td").ToArray();
DateTime? transactionDate = null;
DateTime dump;
if (DateTime.TryParse(spans[], out dump))
{
transactionDate = dump;
}
Repository.SaveHouselisting(new HouselistingEntity()
{
HousesID = housesID,
TransactionDate = transactionDate,
Area = spans[],
Apartment = spans[],
Orientation = spans[],
Floor = spans[],
UnitPriceOrLease = spans[],
SoldPriceOrRent = spans[],
ServiceBroker = spans[],
IsRent = isRent
});
Crawler.OutWrite("保存小区{1}记录 {0}", housesID, isRent ? "出租" : "出售");
}
isRent = true;
}
}
break;
}
}
}
}

zlhome.com Deal的更多相关文章

  1. Dooioo Deal

    using AnfleCrawler.Common; using System; using System.Collections.Generic; using System.Linq; using ...

  2. XML节点名称中有小数点处理(deal with dot)导致使用xpath时报错解决方法

    <?xml version="1.0"?> <ModifyFiles> <_Layout.cshtml>123456</_Layout.c ...

  3. whu 1464 deal with numbers

    WHU 1464  deal with numbers 题意: 给你一串数字,对着串数字有三项操作: Minus a,b,c:对区间[a,b]总的每个数都减c. Division a,b,c:对区间[ ...

  4. OK335xS canutils deal with compile error

    /************************************************************************************** * OK335xS ca ...

  5. 能让你聪明的工作DEAL四法则,来自《每周工作四小时》书籍

    来自书籍<每周工作四小时>,作者蒂莫西·费里斯(Tim Ferriss,昵称:蒂姆)   能让你聪明的工作DEAL四法则: 第一步:D——定位(Definition) 第二步:E——精简( ...

  6. how to deal with EINTR fault

    [how to deal with EINTR fault] EINTR:interupted error.是指一个调用被信号给中断,对于同步的耗时调用来说,这个操作常见,譬如select.read. ...

  7. Spoken English Practice( Believe it or not, I don't need to make believe its a big deal. (believe,deal, You don't say))

    音标复习                                                绿色:连读:红色:略读:蓝色:浊化:橙色:弱读 口语蜕变(2017/6/25) Sorry, t ...

  8. If you want the rainbow, you have to deal with the rain.

    If you want the rainbow, you have to deal with the rain.想要彩虹,就先忍受雨水.

  9. Using SMOTEBoost(过采样) and RUSBoost(使用聚类+集成学习) to deal with class imbalance

    Using SMOTEBoost and RUSBoost to deal with class imbalance from:https://aitopics.org/doc/news:1B9F7A ...

随机推荐

  1. python实现指定目录下JAVA文件单词计数的多进程版本

    要说明的是, 串行版本足够快了, 在我的酷睿双核 debian7.6 下运行只要 0.2s , 简直是难以超越. 多进程版本难以避免大量的进程创建和数据同步与传输开销, 性能反而不如串行版本, 只能作 ...

  2. winform 传值,构造函数等

    窗体转换 制作一个登陆窗体,实现点击按钮关闭此窗体并打开另一个窗体 直接在按钮点击事件中,实例化一个想要打开的窗体 使用show方法打开,并把登陆窗体的visible属性改为false Form1 f ...

  3. mysql5.6中 order by 多个字段排序问题

    今天用order by排序 后面跟了多个字段,如sql语句: SELECT a.id,a.loginname,a.address,u.id,u.`name`,u.address FROM admin_ ...

  4. CSS3的chapter2

    CSS的选择符有很多,大致分为八种: 通配选择符 元素选择符 群组选择符 关系选择符 id及class类选择符 伪类选择符 属性选择符 伪对象选择符 1.通配选择符: 可以使用模糊指定的方式来对对象进 ...

  5. iOS 键盘类型定制归纳

    一.键盘风格 支持8种风格键盘. typedef enum { UIKeyboardTypeDefault, // 默认键盘:支持所有字符 UIKeyboardTypeASCIICapable, // ...

  6. 【Unity3D游戏开发】NGUI之多分辨率下完美分布式协同开发 (五)

    NGUI多分辨率下完美分布式协同开发:不同分辨率下相对于屏幕坐标的Perfab数据不再丢失 NGUI多分辨率下完美分布式协同开发不同分辨率下相对于屏幕坐标的Perfab数据不再丢失 开发问题 原因分析 ...

  7. Oracle Form Data Entry Sample

    I shared a data entry example form here in this post for Oracle Forms beginner developers, so that t ...

  8. file_get_contents微信头像等待时间过长的原因

    UPDATE 2016/05/13 stackoverflow上的解决方法:http://stackoverflow.com/questions/3629504/php-file-get-conten ...

  9. C# 调用 Outlook发送邮件实例

    添加引用:Microsoft.Office.Interop.Outlook using System; using System.Collections.Generic; using System.L ...

  10. python成长之路【第六篇】:python模块--time和datetime

    1.时间表现形式 时间戳  (1970年1月1日之后的秒,即:time.time())格式化的时间字符串   (2014-11-11 11:11,    即:time.strftime('%Y-%m- ...