zlhome.com Deal
using AnfleCrawler.Common;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks; namespace AnfleCrawler.DataAnalyzer
{
internal class Zlhome : AnalyzerBase
{
protected override void AnalyzeInternal(PageLandEntity current)
{
var lander = Crawler.Lander;
var pHandler = CreateContentHandler(current);
switch (current.Depth)
{
case :
{
var dom = lander.GetDocument(pHandler);
DoPerPaging(current, dom.DocumentNode, ".page:first-child a:last-child"); foreach (var node in QueryNodes(dom.DocumentNode, ".xqlistBox .l_img a"))
{
var url = GetHref(node, current.Url);
Crawler.PushUrl(url, DataDepth.Houses);
}
}
break;
case DataDepth.Houses:
{
var dom = lander.GetDocument(pHandler);
var attrs = new AttributeFiller(); attrs.Append("小区名称:{0}", QueryTexts(dom.DocumentNode, ".sc a").First().Replace("关注", string.Empty)); attrs.Append(QueryTexts(dom.DocumentNode, ".c:last-child li")); Guid hashKey = GenHashKey(current.Url.OriginalString);
var bo = Crawler.Repository.LoadHouses(hashKey);
bo.SiteID = "Zlhome.com";
bo.PageUrl = current.Url.OriginalString;
bo.CityName = Crawler.Config.CityName;
attrs.FillEntity(bo, new Dictionary<string, string>()
{
{"地址", "小区地址"},
{"所属片区", "所属区域"},
{"物业类型", "物业类别"},
{"骏工日期", "竣工时间"},
});
MapMark(bo);
Repository.Save(bo);
Crawler.OutWrite("保存楼盘 {0}", bo.小区名称); var pNode = QueryNodes(dom.DocumentNode, ".xqinfo").Skip().First();
var dealNode = QueryNode(pNode, "a");
var url = GetHref(dealNode, current.Url);
Crawler.PushUrl(url, DataDepth.Deal, bo.RowID);
}
break;
case DataDepth.Deal:
{
Guid housesID = (Guid)current.State;
var dom = lander.GetDocument(pHandler); bool isRent = false;
foreach (var table in QueryNodes(dom.DocumentNode, ".cjxxtable"))
{
foreach (var node in QueryNodes(table, "tr"))
{
var spans = QueryTexts(node, "td").ToArray();
DateTime? transactionDate = null;
DateTime dump;
if (DateTime.TryParse(spans[], out dump))
{
transactionDate = dump;
}
Repository.SaveHouselisting(new HouselistingEntity()
{
HousesID = housesID,
TransactionDate = transactionDate,
Area = spans[],
Apartment = spans[],
Orientation = spans[],
Floor = spans[],
UnitPriceOrLease = spans[],
SoldPriceOrRent = spans[],
ServiceBroker = spans[],
IsRent = isRent
});
Crawler.OutWrite("保存小区{1}记录 {0}", housesID, isRent ? "出租" : "出售");
}
isRent = true;
}
}
break;
}
}
}
}
zlhome.com Deal的更多相关文章
- Dooioo Deal
using AnfleCrawler.Common; using System; using System.Collections.Generic; using System.Linq; using ...
- XML节点名称中有小数点处理(deal with dot)导致使用xpath时报错解决方法
<?xml version="1.0"?> <ModifyFiles> <_Layout.cshtml>123456</_Layout.c ...
- whu 1464 deal with numbers
WHU 1464 deal with numbers 题意: 给你一串数字,对着串数字有三项操作: Minus a,b,c:对区间[a,b]总的每个数都减c. Division a,b,c:对区间[ ...
- OK335xS canutils deal with compile error
/************************************************************************************** * OK335xS ca ...
- 能让你聪明的工作DEAL四法则,来自《每周工作四小时》书籍
来自书籍<每周工作四小时>,作者蒂莫西·费里斯(Tim Ferriss,昵称:蒂姆) 能让你聪明的工作DEAL四法则: 第一步:D——定位(Definition) 第二步:E——精简( ...
- how to deal with EINTR fault
[how to deal with EINTR fault] EINTR:interupted error.是指一个调用被信号给中断,对于同步的耗时调用来说,这个操作常见,譬如select.read. ...
- Spoken English Practice( Believe it or not, I don't need to make believe its a big deal. (believe,deal, You don't say))
音标复习 绿色:连读:红色:略读:蓝色:浊化:橙色:弱读 口语蜕变(2017/6/25) Sorry, t ...
- If you want the rainbow, you have to deal with the rain.
If you want the rainbow, you have to deal with the rain.想要彩虹,就先忍受雨水.
- Using SMOTEBoost(过采样) and RUSBoost(使用聚类+集成学习) to deal with class imbalance
Using SMOTEBoost and RUSBoost to deal with class imbalance from:https://aitopics.org/doc/news:1B9F7A ...
随机推荐
- VS2010中qDebug输出乱码的问题
1.开发环境:安装Qt5.3.2(离线安装包安装):VS版本为:2010 SP1Rel:源代码默认保存格式为GB2312. 2.输出乱码的代码 #include <QtCore/QCoreApp ...
- 关于SSIS中解密数据库字符串的方法
此文章适合于SSIS新手,我是个小白,在繁复查阅资料后仍无果到最后解决问题,走了很多弯路,现在讲其中一些关于SSIS的理解写出来,供大家参考,在正文之前,我就我自己的理解,阐明一些概念. 什么是SSI ...
- URL、URI和URN三者之间的区别
URI 统一资源标识符 Uniform Resource Identifier URL 统一资源定位符 Uniform Resource Locator URN 统一资源 ...
- node.js报错总结
1. Error: EISDIR, read 这个报错是目标不应该是文件夹,而是其他类型,本错误出现在使用fs.createReadStream()里参数传递了个文件夹,但是应该传递个文件. 参考地址 ...
- SC.UI
IController using Microsoft.Practices.Prism.Events; using Microsoft.Practices.Prism.Regions; using M ...
- JavaScript中的prototype使用说明
参考 http://abruzzi.iteye.com/blog/1026125 http://www.jb51.net/article/23052.htm
- [问题2015S01] 复旦高等代数 II(14级)每周一题(第二教学周)
[问题2015S01] 设 \(M_n(\mathbb{R})\) 是 \(n\) 阶实方阵全体构成的实线性空间, \(\varphi\) 是 \(M_n(\mathbb{R})\) 上的线性变换, ...
- [问题2014A02] 解答一(两次升阶法,由张钧瑞同学、董麒麟同学提供)
[问题2014A02] 解答一(两次升阶法,由张钧瑞同学.董麒麟同学提供) 将原行列式 \(|A|\) 升阶,考虑如下 \(n+1\) 阶行列式: \[|B|=\begin{vmatrix} 1 &a ...
- Log4Net 配置SQL2008数据库 并传入自定义业务对象
最近根据业务需要,俺们老大要求我们了解一个c#的组件——Log4Net 这玩意儿从来没弄过,感觉挺深奥的,结果经过2天的研究,还算小有所成吧,基本思路已经清晰明了了,不过过程中遇到一些很奇葩的问题,和 ...
- CSS边距---盒子模型
CSS盒子模型 盒子模型主要是有margin(外边距).border(边框).padding(内边距).content(内容)组成,这些属性我们可以把它转移到我们日常生活中的盒子上来理解,日常生活中所 ...