Mytophome Deal
using AnfleCrawler.Common;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks; namespace AnfleCrawler.DataAnalyzer
{
internal class Mytophome : AnalyzerBase
{
protected override void AnalyzeInternal(PageLandEntity current)
{
var lander = Crawler.Lander;
var pHandler = CreateContentHandler(current);
switch (current.Depth)
{
case :
{
var dom = lander.GetDocument(pHandler);
var nextNode = QueryNode(dom.DocumentNode, "nobr").ParentNode;
nextNode.SetAttributeValue("id", PagingHack);
DoPerPaging(current, dom.DocumentNode, string.Format("#{0}", PagingHack)); foreach (var node in QueryNodes(dom.DocumentNode, ".deD_ctt li"))
{
var Nset = QueryNodes(node, "span").ToArray();
var hUrl = GetHref(QueryNode(Nset[], "a"), current.Url);
var query = System.Web.HttpUtility.ParseQueryString(hUrl.Query);
string shid = query["estateId"];
hUrl = new Uri(string.Format("http://{0}/wiki/{1}/detail.html", hUrl.Authority, shid));
Guid housesID;
try
{
CheckHouses(hUrl, out housesID);
}
catch (HtmlNodeMissingException ex)
{
App.LogError(ex, "OrgUrl={0} HousesUrl={1}", shid, hUrl);
continue;
} var vals = Nset.Select(p => p.InnerText.HtmlTrim()).ToArray();
DateTime? transactionDate = null;
DateTime dump;
if (DateTime.TryParse(vals.Last(), out dump))
{
transactionDate = dump;
}
if (vals.Length == )
{
Repository.SaveHouselisting(new HouselistingEntity()
{
HousesID = housesID,
TransactionDate = transactionDate,
BuildingName = vals[],
Area = string.Format("{0}平方", vals[]),
SoldPriceOrRent = string.Format("{0}万", vals[]),
UnitPriceOrLease = string.Format("{0}元/平方", vals[]),
});
}
else
{
Repository.SaveHouselisting(new HouselistingEntity()
{
HousesID = housesID,
TransactionDate = transactionDate,
Area = string.Format("{0}平方", vals[]),
SoldPriceOrRent = string.Format("{0}万", vals[]),
UnitPriceOrLease = string.Format("{0}元/平方", vals[]),
});
}
Crawler.OutWrite("保存小区出售记录 {0}", housesID);
}
}
break;
}
} private void CheckHouses(Uri housesUrl, out Guid housesID)
{
var pHandler = CreateContentHandler(new PageLandEntity()
{
Url = housesUrl,
Depth = DataDepth.Houses
});
pHandler.AjaxBlocks.Add(HACK);
var dom = Crawler.Lander.GetDocument(pHandler);
var attrs = new AttributeFiller(); attrs.Append(QueryTexts(dom.DocumentNode, ".xxjs_rbar_ct li")); housesID = GenHashKey(housesUrl.OriginalString);
var bo = Crawler.Repository.LoadHouses(housesID);
if (!string.IsNullOrEmpty(bo.SiteID))
{
return;
}
bo.SiteID = "Mytophome.com";
bo.PageUrl = housesUrl.OriginalString;
bo.CityName = Crawler.Config.CityName;
attrs.FillEntity(bo, new Dictionary<string, string>()
{
{"楼盘名称", "小区名称"},
{"楼盘地址", "小区地址"},
{"发展商", "开发商"},
{"物管公司", "物业公司"},
{"物管电话", "物业办公电话"},
});
MapMark(bo);
Crawler.Repository.Save(bo);
Crawler.OutWrite("保存楼盘 {0}", bo.小区名称);
}
}
}
Mytophome Deal的更多相关文章
- zlhome.com Deal
using AnfleCrawler.Common; using System; using System.Collections.Generic; using System.Linq; using ...
- Dooioo Deal
using AnfleCrawler.Common; using System; using System.Collections.Generic; using System.Linq; using ...
- XML节点名称中有小数点处理(deal with dot)导致使用xpath时报错解决方法
<?xml version="1.0"?> <ModifyFiles> <_Layout.cshtml>123456</_Layout.c ...
- whu 1464 deal with numbers
WHU 1464 deal with numbers 题意: 给你一串数字,对着串数字有三项操作: Minus a,b,c:对区间[a,b]总的每个数都减c. Division a,b,c:对区间[ ...
- OK335xS canutils deal with compile error
/************************************************************************************** * OK335xS ca ...
- 能让你聪明的工作DEAL四法则,来自《每周工作四小时》书籍
来自书籍<每周工作四小时>,作者蒂莫西·费里斯(Tim Ferriss,昵称:蒂姆) 能让你聪明的工作DEAL四法则: 第一步:D——定位(Definition) 第二步:E——精简( ...
- how to deal with EINTR fault
[how to deal with EINTR fault] EINTR:interupted error.是指一个调用被信号给中断,对于同步的耗时调用来说,这个操作常见,譬如select.read. ...
- Spoken English Practice( Believe it or not, I don't need to make believe its a big deal. (believe,deal, You don't say))
音标复习 绿色:连读:红色:略读:蓝色:浊化:橙色:弱读 口语蜕变(2017/6/25) Sorry, t ...
- If you want the rainbow, you have to deal with the rain.
If you want the rainbow, you have to deal with the rain.想要彩虹,就先忍受雨水.
随机推荐
- Qt报表控件NCReport教程:报表创建示例
NCReport是 一款10多年时间的老牌报表控件,最初是在2002年时作为qt3的应用程序的一个联合项目,后来就成为了一个独立的GPL项目.现在的NCReport 是一款轻量级.快速.多平台.简单易 ...
- JavaScript原型理解
这东西我还不是很理解,但是把自己实践的过程记录下来,希望积累到一定程度,能自然而而然的理解了.很多东西我是这样慢慢理解的,明白为啥是那样子,真的很神奇哦.少说废话,开始吧. 可以先阅读这篇文章 fun ...
- 赤红血OL
包含海量的PSD文档!!全PSD源文档-446M.你值得拥有! <ignore_js_op> <ignore_js_op> <ignore_js_op> <i ...
- 17.KVM安装之虚拟磁盘,安装脚本
1.创建磁盘 vm.list 指定虚拟磁盘名称和主机名 create_vm.sh #创建vm.list中的虚拟磁盘,并指定大小100G #!/bin/bash VM_DIR="/opt ...
- 试用VSCode
VSCode是代码编辑器,不是IDE. 优点: 1.内置ES6代码高亮和提示,语法验证 2.除了支持到变量定义处Go to definition,还支持弹框显示变量定义出peek definition ...
- storm-kafka组件中KafkaOffsetMetric相关统计指标说明
storm-kafka组件中KafkaOffsetMetric相关统计指标说明 storm-kafka是storm提供的一个读取kakfa的组件,用于从kafka队列中消费数据.KafkaOffset ...
- C#浅拷贝与深拷贝区别
也许会有人这样解释C# 中浅拷贝与深拷贝区别: 浅拷贝是对引用类型拷贝地址,对值类型直接进行拷贝. 不能说它完全错误,但至少还不够严谨.比如:string 类型咋说? 其实,我们可以通过实践来寻找答案 ...
- js函数、变量提升(hoisting)
其实我只是想复习下变量提升的,然后看到了函数提升,然后再看到了函数声明.函数表达式. 有必要怀着敬仰之心提及园子里的TOM大叔的解密命名函数表达式,不愧是大叔,好好地脑补了下基础知识. 在ECMASc ...
- php 判断是手机版还是电脑端
function isMobile() { // 如果有HTTP_X_WAP_PROFILE则一定是移动设备 if (isset ($_SERVER['HTTP_X_WAP_PROFILE'])) { ...
- List怎么遍历删除元素
public static void main(String[] args) { List<String> list = new ArrayList<String>(); ...