zlhome.com Deal
using AnfleCrawler.Common;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks; namespace AnfleCrawler.DataAnalyzer
{
internal class Zlhome : AnalyzerBase
{
protected override void AnalyzeInternal(PageLandEntity current)
{
var lander = Crawler.Lander;
var pHandler = CreateContentHandler(current);
switch (current.Depth)
{
case :
{
var dom = lander.GetDocument(pHandler);
DoPerPaging(current, dom.DocumentNode, ".page:first-child a:last-child"); foreach (var node in QueryNodes(dom.DocumentNode, ".xqlistBox .l_img a"))
{
var url = GetHref(node, current.Url);
Crawler.PushUrl(url, DataDepth.Houses);
}
}
break;
case DataDepth.Houses:
{
var dom = lander.GetDocument(pHandler);
var attrs = new AttributeFiller(); attrs.Append("小区名称:{0}", QueryTexts(dom.DocumentNode, ".sc a").First().Replace("关注", string.Empty)); attrs.Append(QueryTexts(dom.DocumentNode, ".c:last-child li")); Guid hashKey = GenHashKey(current.Url.OriginalString);
var bo = Crawler.Repository.LoadHouses(hashKey);
bo.SiteID = "Zlhome.com";
bo.PageUrl = current.Url.OriginalString;
bo.CityName = Crawler.Config.CityName;
attrs.FillEntity(bo, new Dictionary<string, string>()
{
{"地址", "小区地址"},
{"所属片区", "所属区域"},
{"物业类型", "物业类别"},
{"骏工日期", "竣工时间"},
});
MapMark(bo);
Repository.Save(bo);
Crawler.OutWrite("保存楼盘 {0}", bo.小区名称); var pNode = QueryNodes(dom.DocumentNode, ".xqinfo").Skip().First();
var dealNode = QueryNode(pNode, "a");
var url = GetHref(dealNode, current.Url);
Crawler.PushUrl(url, DataDepth.Deal, bo.RowID);
}
break;
case DataDepth.Deal:
{
Guid housesID = (Guid)current.State;
var dom = lander.GetDocument(pHandler); bool isRent = false;
foreach (var table in QueryNodes(dom.DocumentNode, ".cjxxtable"))
{
foreach (var node in QueryNodes(table, "tr"))
{
var spans = QueryTexts(node, "td").ToArray();
DateTime? transactionDate = null;
DateTime dump;
if (DateTime.TryParse(spans[], out dump))
{
transactionDate = dump;
}
Repository.SaveHouselisting(new HouselistingEntity()
{
HousesID = housesID,
TransactionDate = transactionDate,
Area = spans[],
Apartment = spans[],
Orientation = spans[],
Floor = spans[],
UnitPriceOrLease = spans[],
SoldPriceOrRent = spans[],
ServiceBroker = spans[],
IsRent = isRent
});
Crawler.OutWrite("保存小区{1}记录 {0}", housesID, isRent ? "出租" : "出售");
}
isRent = true;
}
}
break;
}
}
}
}
zlhome.com Deal的更多相关文章
- Dooioo Deal
using AnfleCrawler.Common; using System; using System.Collections.Generic; using System.Linq; using ...
- XML节点名称中有小数点处理(deal with dot)导致使用xpath时报错解决方法
<?xml version="1.0"?> <ModifyFiles> <_Layout.cshtml>123456</_Layout.c ...
- whu 1464 deal with numbers
WHU 1464 deal with numbers 题意: 给你一串数字,对着串数字有三项操作: Minus a,b,c:对区间[a,b]总的每个数都减c. Division a,b,c:对区间[ ...
- OK335xS canutils deal with compile error
/************************************************************************************** * OK335xS ca ...
- 能让你聪明的工作DEAL四法则,来自《每周工作四小时》书籍
来自书籍<每周工作四小时>,作者蒂莫西·费里斯(Tim Ferriss,昵称:蒂姆) 能让你聪明的工作DEAL四法则: 第一步:D——定位(Definition) 第二步:E——精简( ...
- how to deal with EINTR fault
[how to deal with EINTR fault] EINTR:interupted error.是指一个调用被信号给中断,对于同步的耗时调用来说,这个操作常见,譬如select.read. ...
- Spoken English Practice( Believe it or not, I don't need to make believe its a big deal. (believe,deal, You don't say))
音标复习 绿色:连读:红色:略读:蓝色:浊化:橙色:弱读 口语蜕变(2017/6/25) Sorry, t ...
- If you want the rainbow, you have to deal with the rain.
If you want the rainbow, you have to deal with the rain.想要彩虹,就先忍受雨水.
- Using SMOTEBoost(过采样) and RUSBoost(使用聚类+集成学习) to deal with class imbalance
Using SMOTEBoost and RUSBoost to deal with class imbalance from:https://aitopics.org/doc/news:1B9F7A ...
随机推荐
- 批处理命令——choice
[1]choice命令简介 使用此命令可以提示用户输入一个选择项,根据用户输入的选择项再决定执行具体的过程. 使用时应该加/c:参数,c: 后应写提示可输入的字符或数字,之间无空格.冒号是可选项. 使 ...
- Locality Sensitive Hash 局部敏感哈希
Locality Sensitive Hash是一种常见的用于处理高维向量的索引办法.与其它基于Tree的数据结构,诸如KD-Tree.SR-Tree相比,它较好地克服了Curse of Dimens ...
- Java线程锁一个简单Lock
/** * @author * * Lock 是java.util.concurrent.locks下提供的java线程锁,作用跟synchronized类似, * 单是比它更加面向对象,两个线程执行 ...
- oracle物化视图
物化视图是一种特殊的物理表,“物化”(Materialized)视图是相对普通视图而言的.普通视图是虚拟表,应用的局限性大,任何对视图的查询,Oracle都实际上转换为视图SQL语句的查询. 这样对整 ...
- 1106 c程序的推导过程
- JavaScript为input/textarea自定义hover,focus效果
<title>JavaScript为input/textarea自定义hover,focus效果</title> <script type="text/java ...
- (五)AOS编程
一.LOG AOS_LOG(index) //断言,会打印出断言传进来的值 AOS_ASSERT(0); //只会打印断言位置 return AOS_FAIL; //返回错误,函数 ...
- 【Todo】【读书笔记】机器学习-周志华
书籍位置: /Users/baidu/Documents/Data/Interview/机器学习-数据挖掘/<机器学习_周志华.pdf> 一共442页.能不能这个周末先囫囵吞枣看完呢.哈哈 ...
- cordova环境搭建,搭建项目,以及拍照功能的实现
一.配置环境 1.配置java环境: 下载对应系统环境的jdk:http://www.oracle.com/technetwork/java/javase/downloads/index-jsp-13 ...
- Deep Learning 16:用自编码器对数据进行降维_读论文“Reducing the Dimensionality of Data with Neural Networks”的笔记
前言 论文“Reducing the Dimensionality of Data with Neural Networks”是深度学习鼻祖hinton于2006年发表于<SCIENCE > ...