zlhome.com Deal
using AnfleCrawler.Common;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks; namespace AnfleCrawler.DataAnalyzer
{
internal class Zlhome : AnalyzerBase
{
protected override void AnalyzeInternal(PageLandEntity current)
{
var lander = Crawler.Lander;
var pHandler = CreateContentHandler(current);
switch (current.Depth)
{
case :
{
var dom = lander.GetDocument(pHandler);
DoPerPaging(current, dom.DocumentNode, ".page:first-child a:last-child"); foreach (var node in QueryNodes(dom.DocumentNode, ".xqlistBox .l_img a"))
{
var url = GetHref(node, current.Url);
Crawler.PushUrl(url, DataDepth.Houses);
}
}
break;
case DataDepth.Houses:
{
var dom = lander.GetDocument(pHandler);
var attrs = new AttributeFiller(); attrs.Append("小区名称:{0}", QueryTexts(dom.DocumentNode, ".sc a").First().Replace("关注", string.Empty)); attrs.Append(QueryTexts(dom.DocumentNode, ".c:last-child li")); Guid hashKey = GenHashKey(current.Url.OriginalString);
var bo = Crawler.Repository.LoadHouses(hashKey);
bo.SiteID = "Zlhome.com";
bo.PageUrl = current.Url.OriginalString;
bo.CityName = Crawler.Config.CityName;
attrs.FillEntity(bo, new Dictionary<string, string>()
{
{"地址", "小区地址"},
{"所属片区", "所属区域"},
{"物业类型", "物业类别"},
{"骏工日期", "竣工时间"},
});
MapMark(bo);
Repository.Save(bo);
Crawler.OutWrite("保存楼盘 {0}", bo.小区名称); var pNode = QueryNodes(dom.DocumentNode, ".xqinfo").Skip().First();
var dealNode = QueryNode(pNode, "a");
var url = GetHref(dealNode, current.Url);
Crawler.PushUrl(url, DataDepth.Deal, bo.RowID);
}
break;
case DataDepth.Deal:
{
Guid housesID = (Guid)current.State;
var dom = lander.GetDocument(pHandler); bool isRent = false;
foreach (var table in QueryNodes(dom.DocumentNode, ".cjxxtable"))
{
foreach (var node in QueryNodes(table, "tr"))
{
var spans = QueryTexts(node, "td").ToArray();
DateTime? transactionDate = null;
DateTime dump;
if (DateTime.TryParse(spans[], out dump))
{
transactionDate = dump;
}
Repository.SaveHouselisting(new HouselistingEntity()
{
HousesID = housesID,
TransactionDate = transactionDate,
Area = spans[],
Apartment = spans[],
Orientation = spans[],
Floor = spans[],
UnitPriceOrLease = spans[],
SoldPriceOrRent = spans[],
ServiceBroker = spans[],
IsRent = isRent
});
Crawler.OutWrite("保存小区{1}记录 {0}", housesID, isRent ? "出租" : "出售");
}
isRent = true;
}
}
break;
}
}
}
}
zlhome.com Deal的更多相关文章
- Dooioo Deal
using AnfleCrawler.Common; using System; using System.Collections.Generic; using System.Linq; using ...
- XML节点名称中有小数点处理(deal with dot)导致使用xpath时报错解决方法
<?xml version="1.0"?> <ModifyFiles> <_Layout.cshtml>123456</_Layout.c ...
- whu 1464 deal with numbers
WHU 1464 deal with numbers 题意: 给你一串数字,对着串数字有三项操作: Minus a,b,c:对区间[a,b]总的每个数都减c. Division a,b,c:对区间[ ...
- OK335xS canutils deal with compile error
/************************************************************************************** * OK335xS ca ...
- 能让你聪明的工作DEAL四法则,来自《每周工作四小时》书籍
来自书籍<每周工作四小时>,作者蒂莫西·费里斯(Tim Ferriss,昵称:蒂姆) 能让你聪明的工作DEAL四法则: 第一步:D——定位(Definition) 第二步:E——精简( ...
- how to deal with EINTR fault
[how to deal with EINTR fault] EINTR:interupted error.是指一个调用被信号给中断,对于同步的耗时调用来说,这个操作常见,譬如select.read. ...
- Spoken English Practice( Believe it or not, I don't need to make believe its a big deal. (believe,deal, You don't say))
音标复习 绿色:连读:红色:略读:蓝色:浊化:橙色:弱读 口语蜕变(2017/6/25) Sorry, t ...
- If you want the rainbow, you have to deal with the rain.
If you want the rainbow, you have to deal with the rain.想要彩虹,就先忍受雨水.
- Using SMOTEBoost(过采样) and RUSBoost(使用聚类+集成学习) to deal with class imbalance
Using SMOTEBoost and RUSBoost to deal with class imbalance from:https://aitopics.org/doc/news:1B9F7A ...
随机推荐
- PHP中怎样创建一个空对象?
如果没有声明一个对象然后就对其属性赋值会出现警告.那么我们给它创建一个空对象然后赋值就好了.PHP中创建一个空对象代码如下: 第一种方式: $empty_object = new stdClass() ...
- Unity中通过类名字符串取组件类型的方法(Types.GetType用法)
正常调用Type.GetType取不到组件,因为会先创建实例在获取,而Unity组件无法通过new来创建. 第二种创建方式是通过程序集,具体如下 Assembly.GetExecutingAssemb ...
- Android first---文件读取(登录案例编写为主)
以android登录案例来介绍文件的读取与androidAPI给予的方法 第一步:绘制界面 绘制方法:在线性布局下面设置相对布局 代码部分: <LinearLayout xmlns:androi ...
- Exchange Server 2013就地存档
9.1就地存档 就地存档有助于重新获得对组织邮件数据的控制,而无需个人存储 (.pst) 文件,并且允许用户在可通过 Microsoft Outlook 2010及更高版本和 Microsoft Of ...
- Bootstrap_进度条
<!--基本进度条--> <div class="progress"> <div %</div> </div> <!-- ...
- qq2440启动linux后出现错误提示request_module: runaway loop modprobe binfmt-464c
1.情景: 编译busybox时加了make CROSS_COMPILE=arm-linux-,但是还是出现了此情况! 2.解决方案如下: 配置busybox时,在配置中发现busybox setti ...
- 一个java的DES加密解密类转换成C#
一个java的des加密解密代码如下: //package com.visionsky.util; import java.security.*; //import java.util.regex.P ...
- mysql 日期格式化
SELECT plc.id, plc.policy_no, plc.out_date, og.organ_name, ir.insurer_name, pd.product_name, plc.pol ...
- hdu2896病毒侵袭(ac自动机)
链接 ac自动机的模板题 说2个注意的地方 一是题目说明包含所有ASCII字符,可以开到0-127 包含空格 题目会输入多个源串,在加完当前的val值时,不应清0,可以开个标记数组. #include ...
- 20160928_CentOS6.4x64_Oracle11gR2x64
1.“Oracle Database 11g Release 2” 下载地址: http://www.oracle.com/technetwork/database/enterprise-editio ...