Mytophome Deal
using AnfleCrawler.Common;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks; namespace AnfleCrawler.DataAnalyzer
{
internal class Mytophome : AnalyzerBase
{
protected override void AnalyzeInternal(PageLandEntity current)
{
var lander = Crawler.Lander;
var pHandler = CreateContentHandler(current);
switch (current.Depth)
{
case :
{
var dom = lander.GetDocument(pHandler);
var nextNode = QueryNode(dom.DocumentNode, "nobr").ParentNode;
nextNode.SetAttributeValue("id", PagingHack);
DoPerPaging(current, dom.DocumentNode, string.Format("#{0}", PagingHack)); foreach (var node in QueryNodes(dom.DocumentNode, ".deD_ctt li"))
{
var Nset = QueryNodes(node, "span").ToArray();
var hUrl = GetHref(QueryNode(Nset[], "a"), current.Url);
var query = System.Web.HttpUtility.ParseQueryString(hUrl.Query);
string shid = query["estateId"];
hUrl = new Uri(string.Format("http://{0}/wiki/{1}/detail.html", hUrl.Authority, shid));
Guid housesID;
try
{
CheckHouses(hUrl, out housesID);
}
catch (HtmlNodeMissingException ex)
{
App.LogError(ex, "OrgUrl={0} HousesUrl={1}", shid, hUrl);
continue;
} var vals = Nset.Select(p => p.InnerText.HtmlTrim()).ToArray();
DateTime? transactionDate = null;
DateTime dump;
if (DateTime.TryParse(vals.Last(), out dump))
{
transactionDate = dump;
}
if (vals.Length == )
{
Repository.SaveHouselisting(new HouselistingEntity()
{
HousesID = housesID,
TransactionDate = transactionDate,
BuildingName = vals[],
Area = string.Format("{0}平方", vals[]),
SoldPriceOrRent = string.Format("{0}万", vals[]),
UnitPriceOrLease = string.Format("{0}元/平方", vals[]),
});
}
else
{
Repository.SaveHouselisting(new HouselistingEntity()
{
HousesID = housesID,
TransactionDate = transactionDate,
Area = string.Format("{0}平方", vals[]),
SoldPriceOrRent = string.Format("{0}万", vals[]),
UnitPriceOrLease = string.Format("{0}元/平方", vals[]),
});
}
Crawler.OutWrite("保存小区出售记录 {0}", housesID);
}
}
break;
}
} private void CheckHouses(Uri housesUrl, out Guid housesID)
{
var pHandler = CreateContentHandler(new PageLandEntity()
{
Url = housesUrl,
Depth = DataDepth.Houses
});
pHandler.AjaxBlocks.Add(HACK);
var dom = Crawler.Lander.GetDocument(pHandler);
var attrs = new AttributeFiller(); attrs.Append(QueryTexts(dom.DocumentNode, ".xxjs_rbar_ct li")); housesID = GenHashKey(housesUrl.OriginalString);
var bo = Crawler.Repository.LoadHouses(housesID);
if (!string.IsNullOrEmpty(bo.SiteID))
{
return;
}
bo.SiteID = "Mytophome.com";
bo.PageUrl = housesUrl.OriginalString;
bo.CityName = Crawler.Config.CityName;
attrs.FillEntity(bo, new Dictionary<string, string>()
{
{"楼盘名称", "小区名称"},
{"楼盘地址", "小区地址"},
{"发展商", "开发商"},
{"物管公司", "物业公司"},
{"物管电话", "物业办公电话"},
});
MapMark(bo);
Crawler.Repository.Save(bo);
Crawler.OutWrite("保存楼盘 {0}", bo.小区名称);
}
}
}
Mytophome Deal的更多相关文章
- zlhome.com Deal
using AnfleCrawler.Common; using System; using System.Collections.Generic; using System.Linq; using ...
- Dooioo Deal
using AnfleCrawler.Common; using System; using System.Collections.Generic; using System.Linq; using ...
- XML节点名称中有小数点处理(deal with dot)导致使用xpath时报错解决方法
<?xml version="1.0"?> <ModifyFiles> <_Layout.cshtml>123456</_Layout.c ...
- whu 1464 deal with numbers
WHU 1464 deal with numbers 题意: 给你一串数字,对着串数字有三项操作: Minus a,b,c:对区间[a,b]总的每个数都减c. Division a,b,c:对区间[ ...
- OK335xS canutils deal with compile error
/************************************************************************************** * OK335xS ca ...
- 能让你聪明的工作DEAL四法则,来自《每周工作四小时》书籍
来自书籍<每周工作四小时>,作者蒂莫西·费里斯(Tim Ferriss,昵称:蒂姆) 能让你聪明的工作DEAL四法则: 第一步:D——定位(Definition) 第二步:E——精简( ...
- how to deal with EINTR fault
[how to deal with EINTR fault] EINTR:interupted error.是指一个调用被信号给中断,对于同步的耗时调用来说,这个操作常见,譬如select.read. ...
- Spoken English Practice( Believe it or not, I don't need to make believe its a big deal. (believe,deal, You don't say))
音标复习 绿色:连读:红色:略读:蓝色:浊化:橙色:弱读 口语蜕变(2017/6/25) Sorry, t ...
- If you want the rainbow, you have to deal with the rain.
If you want the rainbow, you have to deal with the rain.想要彩虹,就先忍受雨水.
随机推荐
- IsBackground的理解
1.当在主线程中创建了一个线程,那么该线程的IsBackground默认是设置为FALSE的. 2.当主线程退出的时候,IsBackground=FALSE的线程还会继续执行下去,直到线程执行结束. ...
- python : HTML+CSS (左侧菜单)
左侧菜单 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3 ...
- nwjs如何打包文件为exe文件并修改exe图标
1.下载nw.js,如果是SDK版的可以调试页面,打包后可不可以调试还没有试,不是SDK的话没有调试选项,试了一下,打包后的文件也一样调试不了. 2.把要打包的文件和package.json都放在nw ...
- OpenCV 绘制图像直方图
OpenCV绘制图像直方图,版本2.4.11 直方图可展示图像中的像素分布,是用以表示数字图像中亮度分布的直方图,标绘了图像中每个亮度值的像素数.可以借助观察该直方图了解需要如何调整亮度分布.这种直方 ...
- Windows server 2012 AD DS 搭建步骤
服务器版本:Windows server 2012 1. 配置网络,由于本机会搭建DNS服务器,因此首选DNS服务器设置为127.0.0.1 2. 打开服务器管理器 3. 点击添加角色和功能,下 ...
- ASP.NET MVC view引入命名空间
两种方式:1,在cshtml中引入@using Admin.Models 2,在 Views 文件夹中的 Web.config 文件中添加引用如: <pages pageBaseType=&qu ...
- URL的格式
URL RFC: http://www.ietf.org/rfc/rfc1738.txt URI RFC: http://www.ietf.org/rfc/rfc2396.txt 转自: http ...
- jq 移除包含某个字符串的类名js
el.removeClass (function (index, css) { return (css.match (/(^|\s)star\S+/g) || []).join(' ');//移除以“ ...
- linux mysql5.5安装与配置(转帖,在网上收集,自用)
MySQL是一个关系型数据库管理系统 ,由瑞典MySQL AB公司开发,目前属于Oracle 公司.MySQL分为社区版和商业版,由于其体积小.速度快.总体拥有成本低,尤其是开放源码 这一特点,一般中 ...
- PHP下拉框内容随单选框内容变化
这久在修改一个项目的小东西,要求把下拉框改为单选框,由于代码封闭,修改不了获取函数,所以想了个办法让下拉框的内容随单选框的内容变化,下面把代码分享给大家: <!DOCTYPE html PUBL ...