using AnfleCrawler.Common;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks; namespace AnfleCrawler.DataAnalyzer
{
internal class Mytophome : AnalyzerBase
{
protected override void AnalyzeInternal(PageLandEntity current)
{
var lander = Crawler.Lander;
var pHandler = CreateContentHandler(current);
switch (current.Depth)
{
case :
{
var dom = lander.GetDocument(pHandler);
var nextNode = QueryNode(dom.DocumentNode, "nobr").ParentNode;
nextNode.SetAttributeValue("id", PagingHack);
DoPerPaging(current, dom.DocumentNode, string.Format("#{0}", PagingHack)); foreach (var node in QueryNodes(dom.DocumentNode, ".deD_ctt li"))
{
var Nset = QueryNodes(node, "span").ToArray();
var hUrl = GetHref(QueryNode(Nset[], "a"), current.Url);
var query = System.Web.HttpUtility.ParseQueryString(hUrl.Query);
string shid = query["estateId"];
hUrl = new Uri(string.Format("http://{0}/wiki/{1}/detail.html", hUrl.Authority, shid));
Guid housesID;
try
{
CheckHouses(hUrl, out housesID);
}
catch (HtmlNodeMissingException ex)
{
App.LogError(ex, "OrgUrl={0} HousesUrl={1}", shid, hUrl);
continue;
} var vals = Nset.Select(p => p.InnerText.HtmlTrim()).ToArray();
DateTime? transactionDate = null;
DateTime dump;
if (DateTime.TryParse(vals.Last(), out dump))
{
transactionDate = dump;
}
if (vals.Length == )
{
Repository.SaveHouselisting(new HouselistingEntity()
{
HousesID = housesID,
TransactionDate = transactionDate,
BuildingName = vals[],
Area = string.Format("{0}平方", vals[]),
SoldPriceOrRent = string.Format("{0}万", vals[]),
UnitPriceOrLease = string.Format("{0}元/平方", vals[]),
});
}
else
{
Repository.SaveHouselisting(new HouselistingEntity()
{
HousesID = housesID,
TransactionDate = transactionDate,
Area = string.Format("{0}平方", vals[]),
SoldPriceOrRent = string.Format("{0}万", vals[]),
UnitPriceOrLease = string.Format("{0}元/平方", vals[]),
});
}
Crawler.OutWrite("保存小区出售记录 {0}", housesID);
}
}
break;
}
} private void CheckHouses(Uri housesUrl, out Guid housesID)
{
var pHandler = CreateContentHandler(new PageLandEntity()
{
Url = housesUrl,
Depth = DataDepth.Houses
});
pHandler.AjaxBlocks.Add(HACK);
var dom = Crawler.Lander.GetDocument(pHandler);
var attrs = new AttributeFiller(); attrs.Append(QueryTexts(dom.DocumentNode, ".xxjs_rbar_ct li")); housesID = GenHashKey(housesUrl.OriginalString);
var bo = Crawler.Repository.LoadHouses(housesID);
if (!string.IsNullOrEmpty(bo.SiteID))
{
return;
}
bo.SiteID = "Mytophome.com";
bo.PageUrl = housesUrl.OriginalString;
bo.CityName = Crawler.Config.CityName;
attrs.FillEntity(bo, new Dictionary<string, string>()
{
{"楼盘名称", "小区名称"},
{"楼盘地址", "小区地址"},
{"发展商", "开发商"},
{"物管公司", "物业公司"},
{"物管电话", "物业办公电话"},
});
MapMark(bo);
Crawler.Repository.Save(bo);
Crawler.OutWrite("保存楼盘 {0}", bo.小区名称);
}
}
}

Mytophome Deal的更多相关文章

  1. zlhome.com Deal

    using AnfleCrawler.Common; using System; using System.Collections.Generic; using System.Linq; using ...

  2. Dooioo Deal

    using AnfleCrawler.Common; using System; using System.Collections.Generic; using System.Linq; using ...

  3. XML节点名称中有小数点处理(deal with dot)导致使用xpath时报错解决方法

    <?xml version="1.0"?> <ModifyFiles> <_Layout.cshtml>123456</_Layout.c ...

  4. whu 1464 deal with numbers

    WHU 1464  deal with numbers 题意: 给你一串数字,对着串数字有三项操作: Minus a,b,c:对区间[a,b]总的每个数都减c. Division a,b,c:对区间[ ...

  5. OK335xS canutils deal with compile error

    /************************************************************************************** * OK335xS ca ...

  6. 能让你聪明的工作DEAL四法则,来自《每周工作四小时》书籍

    来自书籍<每周工作四小时>,作者蒂莫西·费里斯(Tim Ferriss,昵称:蒂姆)   能让你聪明的工作DEAL四法则: 第一步:D——定位(Definition) 第二步:E——精简( ...

  7. how to deal with EINTR fault

    [how to deal with EINTR fault] EINTR:interupted error.是指一个调用被信号给中断,对于同步的耗时调用来说,这个操作常见,譬如select.read. ...

  8. Spoken English Practice( Believe it or not, I don't need to make believe its a big deal. (believe,deal, You don't say))

    音标复习                                                绿色:连读:红色:略读:蓝色:浊化:橙色:弱读 口语蜕变(2017/6/25) Sorry, t ...

  9. If you want the rainbow, you have to deal with the rain.

    If you want the rainbow, you have to deal with the rain.想要彩虹,就先忍受雨水.

随机推荐

  1. Qt报表控件NCReport教程:报表创建示例

    NCReport是 一款10多年时间的老牌报表控件,最初是在2002年时作为qt3的应用程序的一个联合项目,后来就成为了一个独立的GPL项目.现在的NCReport 是一款轻量级.快速.多平台.简单易 ...

  2. JavaScript原型理解

    这东西我还不是很理解,但是把自己实践的过程记录下来,希望积累到一定程度,能自然而而然的理解了.很多东西我是这样慢慢理解的,明白为啥是那样子,真的很神奇哦.少说废话,开始吧. 可以先阅读这篇文章 fun ...

  3. 赤红血OL

    包含海量的PSD文档!!全PSD源文档-446M.你值得拥有! <ignore_js_op> <ignore_js_op> <ignore_js_op> <i ...

  4. 17.KVM安装之虚拟磁盘,安装脚本

    1.创建磁盘 vm.list 指定虚拟磁盘名称和主机名 create_vm.sh    #创建vm.list中的虚拟磁盘,并指定大小100G #!/bin/bash VM_DIR="/opt ...

  5. 试用VSCode

    VSCode是代码编辑器,不是IDE. 优点: 1.内置ES6代码高亮和提示,语法验证 2.除了支持到变量定义处Go to definition,还支持弹框显示变量定义出peek definition ...

  6. storm-kafka组件中KafkaOffsetMetric相关统计指标说明

    storm-kafka组件中KafkaOffsetMetric相关统计指标说明 storm-kafka是storm提供的一个读取kakfa的组件,用于从kafka队列中消费数据.KafkaOffset ...

  7. C#浅拷贝与深拷贝区别

    也许会有人这样解释C# 中浅拷贝与深拷贝区别: 浅拷贝是对引用类型拷贝地址,对值类型直接进行拷贝. 不能说它完全错误,但至少还不够严谨.比如:string 类型咋说? 其实,我们可以通过实践来寻找答案 ...

  8. js函数、变量提升(hoisting)

    其实我只是想复习下变量提升的,然后看到了函数提升,然后再看到了函数声明.函数表达式. 有必要怀着敬仰之心提及园子里的TOM大叔的解密命名函数表达式,不愧是大叔,好好地脑补了下基础知识. 在ECMASc ...

  9. php 判断是手机版还是电脑端

    function isMobile() { // 如果有HTTP_X_WAP_PROFILE则一定是移动设备 if (isset ($_SERVER['HTTP_X_WAP_PROFILE'])) { ...

  10. List怎么遍历删除元素

    public static void main(String[] args) {  List<String> list = new ArrayList<String>();   ...