using AnfleCrawler.Common;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks; namespace AnfleCrawler.DataAnalyzer
{
internal class Mytophome : AnalyzerBase
{
protected override void AnalyzeInternal(PageLandEntity current)
{
var lander = Crawler.Lander;
var pHandler = CreateContentHandler(current);
switch (current.Depth)
{
case :
{
var dom = lander.GetDocument(pHandler);
var nextNode = QueryNode(dom.DocumentNode, "nobr").ParentNode;
nextNode.SetAttributeValue("id", PagingHack);
DoPerPaging(current, dom.DocumentNode, string.Format("#{0}", PagingHack)); foreach (var node in QueryNodes(dom.DocumentNode, ".deD_ctt li"))
{
var Nset = QueryNodes(node, "span").ToArray();
var hUrl = GetHref(QueryNode(Nset[], "a"), current.Url);
var query = System.Web.HttpUtility.ParseQueryString(hUrl.Query);
string shid = query["estateId"];
hUrl = new Uri(string.Format("http://{0}/wiki/{1}/detail.html", hUrl.Authority, shid));
Guid housesID;
try
{
CheckHouses(hUrl, out housesID);
}
catch (HtmlNodeMissingException ex)
{
App.LogError(ex, "OrgUrl={0} HousesUrl={1}", shid, hUrl);
continue;
} var vals = Nset.Select(p => p.InnerText.HtmlTrim()).ToArray();
DateTime? transactionDate = null;
DateTime dump;
if (DateTime.TryParse(vals.Last(), out dump))
{
transactionDate = dump;
}
if (vals.Length == )
{
Repository.SaveHouselisting(new HouselistingEntity()
{
HousesID = housesID,
TransactionDate = transactionDate,
BuildingName = vals[],
Area = string.Format("{0}平方", vals[]),
SoldPriceOrRent = string.Format("{0}万", vals[]),
UnitPriceOrLease = string.Format("{0}元/平方", vals[]),
});
}
else
{
Repository.SaveHouselisting(new HouselistingEntity()
{
HousesID = housesID,
TransactionDate = transactionDate,
Area = string.Format("{0}平方", vals[]),
SoldPriceOrRent = string.Format("{0}万", vals[]),
UnitPriceOrLease = string.Format("{0}元/平方", vals[]),
});
}
Crawler.OutWrite("保存小区出售记录 {0}", housesID);
}
}
break;
}
} private void CheckHouses(Uri housesUrl, out Guid housesID)
{
var pHandler = CreateContentHandler(new PageLandEntity()
{
Url = housesUrl,
Depth = DataDepth.Houses
});
pHandler.AjaxBlocks.Add(HACK);
var dom = Crawler.Lander.GetDocument(pHandler);
var attrs = new AttributeFiller(); attrs.Append(QueryTexts(dom.DocumentNode, ".xxjs_rbar_ct li")); housesID = GenHashKey(housesUrl.OriginalString);
var bo = Crawler.Repository.LoadHouses(housesID);
if (!string.IsNullOrEmpty(bo.SiteID))
{
return;
}
bo.SiteID = "Mytophome.com";
bo.PageUrl = housesUrl.OriginalString;
bo.CityName = Crawler.Config.CityName;
attrs.FillEntity(bo, new Dictionary<string, string>()
{
{"楼盘名称", "小区名称"},
{"楼盘地址", "小区地址"},
{"发展商", "开发商"},
{"物管公司", "物业公司"},
{"物管电话", "物业办公电话"},
});
MapMark(bo);
Crawler.Repository.Save(bo);
Crawler.OutWrite("保存楼盘 {0}", bo.小区名称);
}
}
}

Mytophome Deal的更多相关文章

  1. zlhome.com Deal

    using AnfleCrawler.Common; using System; using System.Collections.Generic; using System.Linq; using ...

  2. Dooioo Deal

    using AnfleCrawler.Common; using System; using System.Collections.Generic; using System.Linq; using ...

  3. XML节点名称中有小数点处理(deal with dot)导致使用xpath时报错解决方法

    <?xml version="1.0"?> <ModifyFiles> <_Layout.cshtml>123456</_Layout.c ...

  4. whu 1464 deal with numbers

    WHU 1464  deal with numbers 题意: 给你一串数字,对着串数字有三项操作: Minus a,b,c:对区间[a,b]总的每个数都减c. Division a,b,c:对区间[ ...

  5. OK335xS canutils deal with compile error

    /************************************************************************************** * OK335xS ca ...

  6. 能让你聪明的工作DEAL四法则,来自《每周工作四小时》书籍

    来自书籍<每周工作四小时>,作者蒂莫西·费里斯(Tim Ferriss,昵称:蒂姆)   能让你聪明的工作DEAL四法则: 第一步:D——定位(Definition) 第二步:E——精简( ...

  7. how to deal with EINTR fault

    [how to deal with EINTR fault] EINTR:interupted error.是指一个调用被信号给中断,对于同步的耗时调用来说,这个操作常见,譬如select.read. ...

  8. Spoken English Practice( Believe it or not, I don't need to make believe its a big deal. (believe,deal, You don't say))

    音标复习                                                绿色:连读:红色:略读:蓝色:浊化:橙色:弱读 口语蜕变(2017/6/25) Sorry, t ...

  9. If you want the rainbow, you have to deal with the rain.

    If you want the rainbow, you have to deal with the rain.想要彩虹,就先忍受雨水.

随机推荐

  1. JS逗号运算符的用法详解

    逗号运算符的用法详解 注意: 一.由于目前正在功读JavaScript技术,所以这里拿JavaScript为例.你可以自己在PHP中试试. 二.JavaScript语法比较复杂,因此拿JavaScri ...

  2. 马哥教育视频笔记:01(Linux常用命令)

    1.查看缓存中使用的命令和命令路径 [wskwskwsk@localhost /]$ hash 命中 命令 /usr/bin/printenv /usr/bin/ls /usr/bin/clear 2 ...

  3. 异常问题解决Error:Execution failed for task ':app:processDebugManifest'

    Error:Execution failed for task ':app:processDebugManifest' www.MyException.Cn  网友分享于:2015-12-28  浏览 ...

  4. Android知识散点

    1.所有活动都需要在AndroidMainfest.xml中注册后才能生效. <activity android:name=".MainActivity" android:l ...

  5. UVa 12299 RMQ with Shifts(移位RMQ)

    p.MsoNormal { margin: 0pt; margin-bottom: .0001pt; text-align: justify; font-family: "Times New ...

  6. git学习笔记总结

    git试免费的开源的分布式版本控制系统,github是一个用git做版本控制的项目托管平台.说白了git就是帮忙你管理你开发的代码,代码每次修改的历史,多人更好的一起开发项目. 分布式版本控制系统,每 ...

  7. [poj2155]Matrix(二维树状数组)

    Matrix Time Limit: 3000MS   Memory Limit: 65536K Total Submissions: 25004   Accepted: 9261 Descripti ...

  8. Spring + Mybatis 使用 PageHelper 插件分页

    原文:http://www.cnblogs.com/yucongblog/p/5330886.html 先增加maven依赖: <dependency> <groupId>co ...

  9. 【转载】jQuery1.5之后的deferred对象详解

    原文:http://www.ruanyifeng.com/blog/2011/08/a_detailed_explanation_of_jquery_deferred_object.html 原文作者 ...

  10. dll文件是什么

    dll实际上是动态链接库的缩写,从windows1.0开始,动态链接库就是整个操作系统的基础,那么这有什么作用呢?在dos时代,程序员是通过编写程序来达到预期的目的的,每实现一个目的就需要编写一个程序 ...