using AnfleCrawler.Common;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks; namespace AnfleCrawler.DataAnalyzer
{
internal class Mytophome : AnalyzerBase
{
protected override void AnalyzeInternal(PageLandEntity current)
{
var lander = Crawler.Lander;
var pHandler = CreateContentHandler(current);
switch (current.Depth)
{
case :
{
var dom = lander.GetDocument(pHandler);
var nextNode = QueryNode(dom.DocumentNode, "nobr").ParentNode;
nextNode.SetAttributeValue("id", PagingHack);
DoPerPaging(current, dom.DocumentNode, string.Format("#{0}", PagingHack)); foreach (var node in QueryNodes(dom.DocumentNode, ".deD_ctt li"))
{
var Nset = QueryNodes(node, "span").ToArray();
var hUrl = GetHref(QueryNode(Nset[], "a"), current.Url);
var query = System.Web.HttpUtility.ParseQueryString(hUrl.Query);
string shid = query["estateId"];
hUrl = new Uri(string.Format("http://{0}/wiki/{1}/detail.html", hUrl.Authority, shid));
Guid housesID;
try
{
CheckHouses(hUrl, out housesID);
}
catch (HtmlNodeMissingException ex)
{
App.LogError(ex, "OrgUrl={0} HousesUrl={1}", shid, hUrl);
continue;
} var vals = Nset.Select(p => p.InnerText.HtmlTrim()).ToArray();
DateTime? transactionDate = null;
DateTime dump;
if (DateTime.TryParse(vals.Last(), out dump))
{
transactionDate = dump;
}
if (vals.Length == )
{
Repository.SaveHouselisting(new HouselistingEntity()
{
HousesID = housesID,
TransactionDate = transactionDate,
BuildingName = vals[],
Area = string.Format("{0}平方", vals[]),
SoldPriceOrRent = string.Format("{0}万", vals[]),
UnitPriceOrLease = string.Format("{0}元/平方", vals[]),
});
}
else
{
Repository.SaveHouselisting(new HouselistingEntity()
{
HousesID = housesID,
TransactionDate = transactionDate,
Area = string.Format("{0}平方", vals[]),
SoldPriceOrRent = string.Format("{0}万", vals[]),
UnitPriceOrLease = string.Format("{0}元/平方", vals[]),
});
}
Crawler.OutWrite("保存小区出售记录 {0}", housesID);
}
}
break;
}
} private void CheckHouses(Uri housesUrl, out Guid housesID)
{
var pHandler = CreateContentHandler(new PageLandEntity()
{
Url = housesUrl,
Depth = DataDepth.Houses
});
pHandler.AjaxBlocks.Add(HACK);
var dom = Crawler.Lander.GetDocument(pHandler);
var attrs = new AttributeFiller(); attrs.Append(QueryTexts(dom.DocumentNode, ".xxjs_rbar_ct li")); housesID = GenHashKey(housesUrl.OriginalString);
var bo = Crawler.Repository.LoadHouses(housesID);
if (!string.IsNullOrEmpty(bo.SiteID))
{
return;
}
bo.SiteID = "Mytophome.com";
bo.PageUrl = housesUrl.OriginalString;
bo.CityName = Crawler.Config.CityName;
attrs.FillEntity(bo, new Dictionary<string, string>()
{
{"楼盘名称", "小区名称"},
{"楼盘地址", "小区地址"},
{"发展商", "开发商"},
{"物管公司", "物业公司"},
{"物管电话", "物业办公电话"},
});
MapMark(bo);
Crawler.Repository.Save(bo);
Crawler.OutWrite("保存楼盘 {0}", bo.小区名称);
}
}
}

Mytophome Deal的更多相关文章

  1. zlhome.com Deal

    using AnfleCrawler.Common; using System; using System.Collections.Generic; using System.Linq; using ...

  2. Dooioo Deal

    using AnfleCrawler.Common; using System; using System.Collections.Generic; using System.Linq; using ...

  3. XML节点名称中有小数点处理(deal with dot)导致使用xpath时报错解决方法

    <?xml version="1.0"?> <ModifyFiles> <_Layout.cshtml>123456</_Layout.c ...

  4. whu 1464 deal with numbers

    WHU 1464  deal with numbers 题意: 给你一串数字,对着串数字有三项操作: Minus a,b,c:对区间[a,b]总的每个数都减c. Division a,b,c:对区间[ ...

  5. OK335xS canutils deal with compile error

    /************************************************************************************** * OK335xS ca ...

  6. 能让你聪明的工作DEAL四法则,来自《每周工作四小时》书籍

    来自书籍<每周工作四小时>,作者蒂莫西·费里斯(Tim Ferriss,昵称:蒂姆)   能让你聪明的工作DEAL四法则: 第一步:D——定位(Definition) 第二步:E——精简( ...

  7. how to deal with EINTR fault

    [how to deal with EINTR fault] EINTR:interupted error.是指一个调用被信号给中断,对于同步的耗时调用来说,这个操作常见,譬如select.read. ...

  8. Spoken English Practice( Believe it or not, I don't need to make believe its a big deal. (believe,deal, You don't say))

    音标复习                                                绿色:连读:红色:略读:蓝色:浊化:橙色:弱读 口语蜕变(2017/6/25) Sorry, t ...

  9. If you want the rainbow, you have to deal with the rain.

    If you want the rainbow, you have to deal with the rain.想要彩虹,就先忍受雨水.

随机推荐

  1. Mvc请求管道中的19个事件

    下面是请求管道中的19个事件. (1)BeginRequest: 开始处理请求 (2)AuthenticateRequest授权验证请求,获取用户授权信息 (3):PostAuthenticateRe ...

  2. JAVA下的Thread.sleep方法一定要try

    try { Thread.sleep(1000); } catch (InterruptedException e) { e.printStackTrace(); } 不同于C#,JAVA里的Thre ...

  3. MySQL如何关联查询

    总的来说,mysql认为任何一个查询都是一次关联,并不仅仅是一个查询需要用到两个表匹配才叫关联,所以,在mysql中,每一个查询,每一个片段(包括子查询,甚至单表select)都可能是关联.所以,理解 ...

  4. javascript 设计模式2----策略模式

    1.定义:定义一系类的算法,把它们一个个封装起来,并且使它们可以相互替换 2.解释:就是把算法和一个规则单独分封,在使用时单独调用. 简单例子: var strategies = { "S& ...

  5. 视频 之自定义VideoView

    package com.lvshandian.partylive.view;import android.content.Context;import android.util.AttributeSe ...

  6. C#:类和结构的区别

    第一.引用类型和值类型 类属于引用类型,而结构属于值类型. 结构在赋值时进行复制. 将结构赋值给新变量时,将复制所有数据,并且对新副本所做的任何修改不会更改原始副本的数据. 第二.继承性 类可以继承类 ...

  7. java编写一个可以上下移动的小球:运行后,可以通过上下左右键进行移动

    /* * 功能:加深对事件处理机制的理解 * 1.通过控制上下左右键,来控制一个小球的位置 */package com.test1;import java.awt.*;import javax.swi ...

  8. 关于分布式事务、两阶段提交、一阶段提交、Best Efforts 1PC模式和事务补偿机制的研究 转载

    1.XA XA是由X/Open组织提出的分布式事务的规范.XA规范主要定义了(全局)事务管理器(Transaction Manager)和(局部)资源管理器(Resource Manager)之间的接 ...

  9. 配置org.springframework.scheduling.quartz.CronTriggerBean (转载)

    在项目中又用到了定时器,对于定时器的应用总是模模糊糊的,今天结合网上找到的资料与自己在项目中写的简单地在此写一下,以备需要时查阅. 一个Quartz的CronTrigger表达式分为七项子表达式,其中 ...

  10. day 2

    三目运算符 A?B:C 等价于 if(A) B; else C; 实例: int i: i=(3>2?5:1)  //如果3>2为真,i的值为5,否则为1 printf(“%d”,i): ...