using System;
using System.Collections.Generic;
using System.Linq;
using System.Web;
using System.Text.RegularExpressions;
using System.Configuration; /// <summary>
////// </summary>
public static class SearchConst
{ public static readonly string ARG_CLIENT = "client"; public static readonly string ARG_WORD = "word"; public static readonly int DataColumnCount = ; public static readonly int ColumnOfUrl = ; public static readonly int ColumnOfTitle = ; public static readonly int ColumnOfInfo = ; public static readonly int ColumnOfAdUrl = ; public static readonly string FMT_Date = "yyyy/MM/dd"; public static readonly string FMT_TIME = "HH:mm:ss"; public static readonly string UserAgentPC = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:11.0) Gecko/20100101 Firefox/11.0"; public static readonly string UserAgentMobile = "Mozilla/5.0 (iPhone; CPU iPhone OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A403 Safari/8536.25"; public static readonly string SearchKeyWordPlace = "#{q}"; public static readonly string DefaultEncode = "UTF-8"; public static readonly string AttributeHref = "href"; public static readonly string FILEEXT_ZIP = ".zip"; public static readonly string FILE_TXT = "source.txt"; public static readonly string FILE_KEY = "SavePath"; public static readonly string BATCH_PARALLES_KEY = "BatchParalles"; public static readonly string FLG_ENABLED = ""; public static readonly string CLIENT_MONITOR = "BJMOR"; public static readonly string MSG_E_PAGE_STYLE_CHANGE = "fff"; public static class Google
{ public static readonly string UserAgent = UserAgentPC; public static readonly string[] XPATH_ROOT = { "mbEnd", "mbEnd" };
public static readonly string[] XPATH_CITE = { "//div[@id='mbEnd']//ol/li//cite", "//div[@id='mbEnd']//ol/li//cite" }; //获取url
public static readonly string[] XPATH_H3 = { "//div[@id='mbEnd']//ol/li//h3", "//div[@id='mbEnd']//ol/li/h3" }; // //获取标题
public static readonly string[] XPATH_ADURL = { "//div[@id='mbEnd']//ol/li//h3//a[1]", "//div[@id='mbEnd']//ol/li/h3//a[1]" };
public static readonly string[] XPATH_INFO = { "//div[@id='mbEnd']//ol/li//div[@class='ac ads-creative']", "//div[@id='mbEnd']//ol/li//div[@class='ads-creative']" };
// top info
public static readonly string[] XPATH_ROOT_TOP = { "taw", "taw" };
public static readonly string[] XPATH_CITE_TOP = { "//div[@id='tads']//ol/li//cite", "//div[@id='tads']//ol/li//cite" };
public static readonly string[] XPATH_H3_TOP = { "//div[@id='tads']//ol/li//h3", "//div[@id='tads']//ol/li/h3" };
public static readonly string[] XPATH_ADURL_TOP = { "//div[@id='tads']//ol/li//h3//a[1]", "//div[@id='tads']//ol/li/h3//a[1]" };
public static readonly string[] XPATH_INFO_TOP = { "//div[@id='tads']//ol/li//div[@class='ac ads-creative']", "//div[@id='tads']//ol/li//div[@class='ads-creative']" };
//
public static readonly Regex RegexAdUrl = new Regex(@"adurl=(http[\S]*$)");
//
public static readonly string BAITAI_ID = "";
} public static class GoogleM
{
public static readonly string UserAgent = UserAgentMobile; //info
public static readonly string[] XPATH_ROOT = { "bottomads", "bottomads" };
public static readonly string[] XPATH_CITE = { "//div[@id='tadsb']/ol/li//cite", "//div[@id='tadsb']/ol/li//cite" };
public static readonly string[] XPATH_H3 = { "//div[@id='tadsb']/ol/li//h3", "//div[@id='tadsb']/ol/li//h3" };
public static readonly string[] XPATH_ADURL = { "//div[@id='tadsb']/ol/li//h3//a", "//div[@id='tadsb']/ol/li//h3//a" };
public static readonly string[] XPATH_INFO = { "//div[@id='tadsb']/ol/li//div[@class='ac ads-creative']", "//div[@id='tadsb']/ol/li//div[@class='ads-creative']" }; // top info
public static readonly string[] XPATH_ROOT_TOP = { "tads", "tads" };
public static readonly string[] XPATH_CITE_TOP = { "//div[@id='tads']/ol/li//cite", "//div[@id='tads']/ol/li//cite" };
public static readonly string[] XPATH_H3_TOP = { "//div[@id='tads']/ol/li//h3", "//div[@id='tads']/ol/li//h3" };
public static readonly string[] XPATH_ADURL_TOP = { "//div[@id='tads']/ol/li//h3//a", "//div[@id='tads']/ol/li//h3//a" };
public static readonly string[] XPATH_INFO_TOP = { "//div[@id='tads']/ol/li//div[@class='ac ads-creative']", "//div[@id='tads']/ol/li//div[@class='ads-creative']" };
//
public static readonly Regex RegexAdUrl = new Regex(@"adurl=(http[\S]*$)");
//
public static readonly string BAITAI_ID = "";
} public static class MSN
{
public static readonly string UserAgent = UserAgentPC;
//b_context/b_ad
public static readonly string[] XPATH_ROOT = { "sidebar", "b_context" };
public static readonly string[] XPATH_CITE = { "//div[@class='sb_adsNv2']//li//cite", "//ol[@id='b_context']//li[@class='b_ad']//li//cite" };
public static readonly string[] XPATH_H3 = { "//div[@class='sb_adsNv2']//li//h3", "//ol[@id='b_context']//li[@class='b_ad']//li//h2" };
public static readonly string[] XPATH_ADURL = { "//div[@class='sb_adsNv2']//li//a", "//ol[@id='b_context']//li[@class='b_ad']//li//a" };
public static readonly string[] XPATH_INFO = { "//div[@class='sb_adsNv2']//li//p", "//ol[@id='b_context']//li[@class='b_ad']//li//p" };
//b_results/b_ad
public static readonly string[] XPATH_ROOT_TOP = { "results_container", "b_results" };
public static readonly string[] XPATH_CITE_TOP = { "//div[@class='sb_adsWv2']//li//cite", "//ol[@id='b_results']//li[@class='b_ad']//li//cite" };
public static readonly string[] XPATH_H3_TOP = { "//div[@class='sb_adsWv2']//li//h3", "//ol[@id='b_results']//li[@class='b_ad']//li//h2" };
public static readonly string[] XPATH_ADURL_TOP = { "//div[@class='sb_adsWv2']//li//a", "//ol[@id='b_results']//li[@class='b_ad']//li//a" };
public static readonly string[] XPATH_INFO_TOP = { "//div[@class='sb_adsWv2']//li//p", "//ol[@id='b_results']//li[@class='b_ad']//li//p" };
//
public static readonly Regex RegexAdUrl = new Regex(@"\*\*(http[\S]*$)");
//
public static readonly string BAITAI_ID = "";
} public static class Yahoo
{
public static readonly string UserAgent = UserAgentPC; public static readonly string XPATH_ROOT = "sIn";
public static readonly string XPATH_CITE1 = "//div[@id='So3']/div[@class='bd']/div[@class='w']/div[@class='a cf']";
public static readonly string XPATH_H31 = "//div[@id='So3']/div[@class='bd']/div[@class='w']/h3";
public static readonly string XPATH_ADURL1 = "//div[@id='So3']/div[@class='bd']/div[@class='w']/h3/a";
public static readonly string XPATH_INFO1 = "//div[@id='So3']/div[@class='bd']/div[@class='w']/p";
//
public static readonly string XPATH_ROOT_TOP = "So1";
public static readonly string XPATH_CITE_TOP = "//div[@id='So1']/div[@class='bd']/div[@class='w']/div[@class='a cf']";
public static readonly string XPATH_H3_TOP = "//div[@id='So1']/div[@class='bd']/div[@class='w']/h3";
public static readonly string XPATH_ADURL_TOP = "//div[@id='So1']/div[@class='bd']/div[@class='w']/h3/a";
public static readonly string XPATH_INFO_TOP = "//div[@id='So1']/div[@class='bd']/div[@class='w']/p";
//
public static readonly Regex RegexAdUrl = new Regex(@"\*\*(http[\S]*$)");
public static readonly string NullUrl = "&gt;";
//
public static readonly string BAITAI_ID = "";
} public static class Yahoo2
{
public static readonly string UserAgent = UserAgentPC; public static readonly string XPATH_ROOT_TOP = "contents";
public static readonly string XPATH_CITE_TOP = "//div[@id='contents']/div[@class='cWrap']/div[@class='listWrap cf']/ul/li/cite";
public static readonly string XPATH_H3_TOP = "//div[@id='contents']/div[@class='cWrap']/div[@class='listWrap cf']/ul/li/h2/a";
public static readonly string XPATH_ADURL_TOP = "//div[@id='contents']/div[@class='cWrap']/div[@class='listWrap cf']/ul/li/h2/a";
public static readonly string XPATH_INFO_TOP = "//div[@id='contents']/div[@class='cWrap']/div[@class='listWrap cf']/ul/li/p[@class='smr']";
//
public static readonly Regex RegexAdUrl = new Regex(@"\*\*(http[\S]*$)");
public static readonly string NullUrl = "&gt;";
//
public static readonly string BAITAI_ID = "";
} public static class YahooM
{
public static readonly string UserAgent = UserAgentMobile; public static readonly string XPATH_ROOT = "contentsInner";
public static readonly string XPATH_CITE = "//div[@id='contentsInner']//aside[@class='So']/div[@class='bd']/ul/li/cite";
public static readonly string XPATH_H3 = "//div[@id='contentsInner']//aside[@class='So']/div[@class='bd']/ul/li/h3";
public static readonly string XPATH_ADURL = "//div[@id='contentsInner']//aside[@class='So']/div[@class='bd']/ul/li/h3/a";
public static readonly string XPATH_INFO = "//div[@id='contentsInner']//aside[@class='So']/div[@class='bd']/ul/li/p[@class='dtl']"; public static readonly string XPATH_ROOT_TOP = "contentsInner";
public static readonly string XPATH_CITE_TOP = "//div[@id='contentsInner']/aside[@class='So next-cmm']/div[@class='bd']/ul/li/cite";
public static readonly string XPATH_H3_TOP = "//div[@id='contentsInner']/aside[@class='So next-cmm']/div[@class='bd']/ul/li/h3";
public static readonly string XPATH_ADURL_TOP = "//div[@id='contentsInner']/aside[@class='So next-cmm']/div[@class='bd']/ul/li/h3/a";
public static readonly string XPATH_INFO_TOP = "//div[@id='contentsInner']/aside[@class='So next-cmm']/div[@class='bd']/ul/li/p[@class='dtl']";
//
public static readonly Regex RegexAdUrl = new Regex(@"\*\*(http[\S]*$)");
public static readonly string NullUrl = "&gt;";
//
public static readonly string BAITAI_ID = "";
} public static class BaiDu { public static readonly string UserAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko"; public static readonly string[] XPATH_ROOT = { "ec_im_container", "ec_im_container" }; //第一种情况 。
public static readonly string[] XPATH_CITE = { "//a/font[@size='-1' and @class]","//a/font[@size='-1' and @class]" }; //第一种情况
public static readonly string[] XPATH_H3 = { "//a[contains(@class,'EC_BL')and contains(@id,'dfs')and @data-is-main-url]", "//a[contains(@class,'EC_BL')and contains(@id,'dfs')and @data-is-main-url]" };//第一种情况
public static readonly string[] XPATH_ADURL = { "//a[contains(@class,'EC_BL')and contains(@id,'dfs')and @data-is-main-url]", "//a[contains(@class,'EC_BL')and contains(@id,'dfs')and @data-is-main-url]" };
public static readonly string[] XPATH_INFO = { "//a[contains(@class,'EC_BL')and contains(@id,'dfs')and @data-click]/font[1]", "//a[contains(@class,'EC_BL')and contains(@id,'dfs')and @data-click]/font[1]" };
// top info
public static readonly string[] XPATH_ROOT_TOP = { "content_left", "content_left" };
public static readonly string[] XPATH_CITE_TOP = { "//table[@data-click]/tbody/tr/td//a[not(@data-is-main-url) and not(contains(@href,'tool'))]/span", "//div[@class and @style]/div/div/a/span[1]|//div/table/tbody/tr/td[2]/div//a/span[1]" }; //前下后上
public static readonly string[] XPATH_H3_TOP = { "//table/tbody/tr/td/a[ @data-is-main-url]", "//div[@class and @style]/div/div/h3" }; //前下后上
public static readonly string[] XPATH_ADURL_TOP = { "//table/tbody/tr/td/a[ @data-is-main-url]", "//table/tbody/tr/td/a[ @data-is-main-url]" }; //前下后上
public static readonly string[] XPATH_INFO_TOP = { "//table[@data-click and @class]/tbody/tr[3]/td/a[not(./span)]|//table[@data-click and @class]/tbody/tr/td/table/tbody/tr/td/div/font/a", "//div[@class and @style]/div/div[not(./span)]/a|//div/table/tbody/tr/td/div/font/a[not(./span)]" }; //前
//
//public static readonly Regex RegexAdUrl = new Regex(@"http[\S]*$");
//
public static readonly string BAITAI_ID = "";
} public static class CnBing { public static readonly string UserAgent = UserAgentPC; public static readonly string[] XPATH_ROOT = { "b_context", "b_context" };
public static readonly string[] XPATH_CITE = { "//div[@class='sb_add sb_adTA']//cite", "//div[@class='sb_add sb_adTA']//cite" };
public static readonly string[] XPATH_H3 = { "//div[@class='sb_add sb_adTA']//h2/a", "//div[@class='sb_add sb_adTA']//h2/a" };//第一种情况
public static readonly string[] XPATH_ADURL = { "//div[@class='sb_add sb_adTA']//h2/a", "//div[@class='sb_add sb_adTA']//h2/a" };
public static readonly string[] XPATH_INFO = { "//div[@class='sb_add sb_adTA']//div[@class='b_caption']/p", "//div[@class='sb_add sb_adTA']//div[@class='b_caption']/p" };
// top info
public static readonly string[] XPATH_ROOT_TOP = { "gg", "gg" };
public static readonly string[] XPATH_CITE_TOP = { "", "" }; //前下后上
public static readonly string[] XPATH_H3_TOP = { "", "" }; //前下后上
public static readonly string[] XPATH_ADURL_TOP = { "", "" }; //前下后上
public static readonly string[] XPATH_INFO_TOP = { "", "" }; //前下部分广告后上
//
public static readonly Regex RegexAdUrl = new Regex(@"rturl=(http[\S]*$)");
//
public static readonly string BAITAI_ID = "";
} public static class HaoSou { public static readonly string UserAgent = UserAgentPC;
// 右边的广告
public static readonly string[] XPATH_ROOT = { "side", "side" }; //获取范围
public static readonly string[] XPATH_CITE = { "//ul[@id='rightbox']/li/p/cite[not(contains(text(),' http://e.360.cn'))]|//div[@id='m-spread-left']//cite", "//ul[@id='rightbox']/li/p/cite[not(contains(text(),' http://e.360.cn'))]|//div[@id='m-spread-left']//cite" }; //第一种情况
public static readonly string[] XPATH_H3 = { "//ul[@id='rightbox']/li/h3/a[not(contains(text(),'ss'))]|//div[@id='m-spread-left']//h3/a", "//ul[@id='rightbox']/li/h3/a[not(contains(text(),'ss'))]|//div[@id='m-spread-left']//h3/a" };//第一种情况
public static readonly string[] XPATH_ADURL = { "//ul[@id='rightbox']/li/h3/a[not(contains(text(),'ss'))]|//div[@id='m-spread-left']//h3/a", "//ul[@id='rightbox']/li/h3/a[not(contains(text(),'ss'))]|//div[@id='m-spread-left']//h3/a" };
public static readonly string[] XPATH_INFO = { "//ul[@id='e_idea_pp']/li//p|//ul[@id='rightbox']/li/p[not(contains(text(),'4000-360-360'))]", "//ul[@id='e_idea_pp']/li//p|//ul[@id='rightbox']/li/p[not(contains(text(),'4000-360-360'))]" };
// top info
public static readonly string[] XPATH_ROOT_TOP = {"ss", "sss" };
public static readonly string[] XPATH_CITE_TOP = { "", "" }; //前下后上
public static readonly string[] XPATH_H3_TOP = { "", "" }; //前下后上
public static readonly string[] XPATH_ADURL_TOP = { "", "" }; //前下后上
public static readonly string[] XPATH_INFO_TOP = { "", "" }; //前下部分广告后上
//
//public static readonly Regex RegexAdUrl = new Regex(@"http[\S]*$");
//
public static readonly string BAITAI_ID = "";
} public static class Sogou {
public static readonly string UserAgent = UserAgentPC;
//right 部分
public static readonly string[] XPATH_ROOT = { "right" };
public static readonly string[] XPATH_CITE = { "//div[@class='bizr_fb']" };//绿色的url
public static readonly string[] XPATH_H3 = { "//h3[@class='bizr_title']" };//#ad_leftresult_0 > h3:nth-child(1)
public static readonly string[] XPATH_ADURL = { "//h3[@class='bizr_title']/a" };//.h3的url
public static readonly string[] XPATH_INFO = { "//div[@class='bizr_ft']" };
//top 部分
public static readonly string[] XPATH_ROOT_TOP = { "promotion_adv_container" };//*[@id="promotion_adv_container"]/div/div
public static readonly string[] XPATH_CITE_TOP = { "//div[contains(@class,'biz_rb')and @id]/div//cite" };
public static readonly string[] XPATH_H3_TOP = { "//h3[@class='biz_title']" };
public static readonly string[] XPATH_ADURL_TOP = { "//h3[@class='biz_title']/a" };
public static readonly string[] XPATH_INFO_TOP = { "//div[@class='crown_info_box' or @class='biz_ft']|//div[contains(@id,'box_id')]/table" };// "" //
//public static readonly Regex RegexAdUrl = new Regex(@"\*\*(http[\S]*$)");
//
public static readonly string BAITAI_ID = "";
public static readonly string NullUrl = "&gt;";
} }

using System;using System.Collections.Generic;using System.Linq;using System.Web;using System.Text.RegularExpressions;using System.Configuration;
/// <summary>/// SearchHelper の概要の説明です/// </summary>public static class SearchConst{
    public static readonly string ARG_CLIENT = "client";
    public static readonly string ARG_WORD = "word";
    public static readonly int DataColumnCount = 4;
    public static readonly int ColumnOfUrl = 0;
    public static readonly int ColumnOfTitle = 1;
    public static readonly int ColumnOfInfo = 2;
    public static readonly int ColumnOfAdUrl = 3;
    public static readonly string FMT_Date = "yyyy/MM/dd";
    public static readonly string FMT_TIME = "HH:mm:ss";
    public static readonly string UserAgentPC = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:11.0) Gecko/20100101 Firefox/11.0";
    public static readonly string UserAgentMobile = "Mozilla/5.0 (iPhone; CPU iPhone OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A403 Safari/8536.25";
    public static readonly string SearchKeyWordPlace = "#{q}";
    public static readonly string DefaultEncode = "UTF-8";
    public static readonly string AttributeHref = "href";
    public static readonly string FILEEXT_ZIP = ".zip";
    public static readonly string FILE_TXT = "source.txt";
    public static readonly string FILE_KEY = "SavePath";
    public static readonly string BATCH_PARALLES_KEY = "BatchParalles";
    public static readonly string FLG_ENABLED = "1";
    public static readonly string CLIENT_MONITOR = "BJMOR";
    public static readonly string MSG_E_PAGE_STYLE_CHANGE = "スポンサーチェックの検索媒体レイアウト変更";

public static class Google    {
        public static readonly string UserAgent = UserAgentPC;
        public static readonly string[] XPATH_ROOT = { "mbEnd", "mbEnd" };        public static readonly string[] XPATH_CITE = { "//div[@id='mbEnd']//ol/li//cite", "//div[@id='mbEnd']//ol/li//cite" };  //获取url        public static readonly string[] XPATH_H3 = { "//div[@id='mbEnd']//ol/li//h3", "//div[@id='mbEnd']//ol/li/h3" }; //  //获取标题        public static readonly string[] XPATH_ADURL = { "//div[@id='mbEnd']//ol/li//h3//a[1]", "//div[@id='mbEnd']//ol/li/h3//a[1]" };         public static readonly string[] XPATH_INFO = { "//div[@id='mbEnd']//ol/li//div[@class='ac ads-creative']", "//div[@id='mbEnd']//ol/li//div[@class='ads-creative']" };        // top info        public static readonly string[] XPATH_ROOT_TOP = { "taw", "taw" };        public static readonly string[] XPATH_CITE_TOP = { "//div[@id='tads']//ol/li//cite", "//div[@id='tads']//ol/li//cite" };        public static readonly string[] XPATH_H3_TOP = { "//div[@id='tads']//ol/li//h3", "//div[@id='tads']//ol/li/h3" };        public static readonly string[] XPATH_ADURL_TOP = { "//div[@id='tads']//ol/li//h3//a[1]", "//div[@id='tads']//ol/li/h3//a[1]" };        public static readonly string[] XPATH_INFO_TOP = { "//div[@id='tads']//ol/li//div[@class='ac ads-creative']", "//div[@id='tads']//ol/li//div[@class='ads-creative']" };        //        public static readonly Regex RegexAdUrl = new Regex(@"adurl=(http[\S]*$)");        //        public static readonly string BAITAI_ID = "001";    }
    public static class GoogleM    {        public static readonly string UserAgent = UserAgentMobile;
        //info        public static readonly string[] XPATH_ROOT = { "bottomads", "bottomads" };        public static readonly string[] XPATH_CITE = { "//div[@id='tadsb']/ol/li//cite", "//div[@id='tadsb']/ol/li//cite" };        public static readonly string[] XPATH_H3 = { "//div[@id='tadsb']/ol/li//h3", "//div[@id='tadsb']/ol/li//h3" };        public static readonly string[] XPATH_ADURL = { "//div[@id='tadsb']/ol/li//h3//a", "//div[@id='tadsb']/ol/li//h3//a" };        public static readonly string[] XPATH_INFO = { "//div[@id='tadsb']/ol/li//div[@class='ac ads-creative']", "//div[@id='tadsb']/ol/li//div[@class='ads-creative']" };
        // top info        public static readonly string[] XPATH_ROOT_TOP = { "tads", "tads" };        public static readonly string[] XPATH_CITE_TOP = { "//div[@id='tads']/ol/li//cite", "//div[@id='tads']/ol/li//cite" };        public static readonly string[] XPATH_H3_TOP = { "//div[@id='tads']/ol/li//h3", "//div[@id='tads']/ol/li//h3" };        public static readonly string[] XPATH_ADURL_TOP = { "//div[@id='tads']/ol/li//h3//a", "//div[@id='tads']/ol/li//h3//a" };        public static readonly string[] XPATH_INFO_TOP = { "//div[@id='tads']/ol/li//div[@class='ac ads-creative']", "//div[@id='tads']/ol/li//div[@class='ads-creative']" };        //        public static readonly Regex RegexAdUrl = new Regex(@"adurl=(http[\S]*$)");        //        public static readonly string BAITAI_ID = "005";    }
    public static class MSN    {        public static readonly string UserAgent = UserAgentPC;        //b_context/b_ad        public static readonly string[] XPATH_ROOT = { "sidebar", "b_context" };        public static readonly string[] XPATH_CITE = { "//div[@class='sb_adsNv2']//li//cite", "//ol[@id='b_context']//li[@class='b_ad']//li//cite" };        public static readonly string[] XPATH_H3 = { "//div[@class='sb_adsNv2']//li//h3", "//ol[@id='b_context']//li[@class='b_ad']//li//h2" };        public static readonly string[] XPATH_ADURL = { "//div[@class='sb_adsNv2']//li//a", "//ol[@id='b_context']//li[@class='b_ad']//li//a" };        public static readonly string[] XPATH_INFO = { "//div[@class='sb_adsNv2']//li//p", "//ol[@id='b_context']//li[@class='b_ad']//li//p" };        //b_results/b_ad        public static readonly string[] XPATH_ROOT_TOP = { "results_container", "b_results" };        public static readonly string[] XPATH_CITE_TOP = { "//div[@class='sb_adsWv2']//li//cite", "//ol[@id='b_results']//li[@class='b_ad']//li//cite" };        public static readonly string[] XPATH_H3_TOP = { "//div[@class='sb_adsWv2']//li//h3", "//ol[@id='b_results']//li[@class='b_ad']//li//h2" };        public static readonly string[] XPATH_ADURL_TOP = { "//div[@class='sb_adsWv2']//li//a", "//ol[@id='b_results']//li[@class='b_ad']//li//a" };        public static readonly string[] XPATH_INFO_TOP = { "//div[@class='sb_adsWv2']//li//p", "//ol[@id='b_results']//li[@class='b_ad']//li//p" };        //        public static readonly Regex RegexAdUrl = new Regex(@"\*\*(http[\S]*$)");        //        public static readonly string BAITAI_ID = "003";    }
    public static class Yahoo    {        public static readonly string UserAgent = UserAgentPC;
        public static readonly string XPATH_ROOT = "sIn";        public static readonly string XPATH_CITE1 = "//div[@id='So3']/div[@class='bd']/div[@class='w']/div[@class='a cf']";        public static readonly string XPATH_H31 = "//div[@id='So3']/div[@class='bd']/div[@class='w']/h3";        public static readonly string XPATH_ADURL1 = "//div[@id='So3']/div[@class='bd']/div[@class='w']/h3/a";        public static readonly string XPATH_INFO1 = "//div[@id='So3']/div[@class='bd']/div[@class='w']/p";        //        public static readonly string XPATH_ROOT_TOP = "So1";        public static readonly string XPATH_CITE_TOP = "//div[@id='So1']/div[@class='bd']/div[@class='w']/div[@class='a cf']";        public static readonly string XPATH_H3_TOP = "//div[@id='So1']/div[@class='bd']/div[@class='w']/h3";        public static readonly string XPATH_ADURL_TOP = "//div[@id='So1']/div[@class='bd']/div[@class='w']/h3/a";        public static readonly string XPATH_INFO_TOP = "//div[@id='So1']/div[@class='bd']/div[@class='w']/p";        //        public static readonly Regex RegexAdUrl = new Regex(@"\*\*(http[\S]*$)");        public static readonly string NullUrl = "&gt;";        //        public static readonly string BAITAI_ID = "002";    }
    public static class Yahoo2    {        public static readonly string UserAgent = UserAgentPC;
        public static readonly string XPATH_ROOT_TOP = "contents";        public static readonly string XPATH_CITE_TOP = "//div[@id='contents']/div[@class='cWrap']/div[@class='listWrap cf']/ul/li/cite";        public static readonly string XPATH_H3_TOP = "//div[@id='contents']/div[@class='cWrap']/div[@class='listWrap cf']/ul/li/h2/a";        public static readonly string XPATH_ADURL_TOP = "//div[@id='contents']/div[@class='cWrap']/div[@class='listWrap cf']/ul/li/h2/a";        public static readonly string XPATH_INFO_TOP = "//div[@id='contents']/div[@class='cWrap']/div[@class='listWrap cf']/ul/li/p[@class='smr']";        //        public static readonly Regex RegexAdUrl = new Regex(@"\*\*(http[\S]*$)");        public static readonly string NullUrl = "&gt;";        //        public static readonly string BAITAI_ID = "004";    }
    public static class YahooM    {        public static readonly string UserAgent = UserAgentMobile;
        public static readonly string XPATH_ROOT = "contentsInner";        public static readonly string XPATH_CITE = "//div[@id='contentsInner']//aside[@class='So']/div[@class='bd']/ul/li/cite";        public static readonly string XPATH_H3 = "//div[@id='contentsInner']//aside[@class='So']/div[@class='bd']/ul/li/h3";        public static readonly string XPATH_ADURL = "//div[@id='contentsInner']//aside[@class='So']/div[@class='bd']/ul/li/h3/a";        public static readonly string XPATH_INFO = "//div[@id='contentsInner']//aside[@class='So']/div[@class='bd']/ul/li/p[@class='dtl']";

public static readonly string XPATH_ROOT_TOP = "contentsInner";        public static readonly string XPATH_CITE_TOP = "//div[@id='contentsInner']/aside[@class='So next-cmm']/div[@class='bd']/ul/li/cite";        public static readonly string XPATH_H3_TOP = "//div[@id='contentsInner']/aside[@class='So next-cmm']/div[@class='bd']/ul/li/h3";        public static readonly string XPATH_ADURL_TOP = "//div[@id='contentsInner']/aside[@class='So next-cmm']/div[@class='bd']/ul/li/h3/a";        public static readonly string XPATH_INFO_TOP = "//div[@id='contentsInner']/aside[@class='So next-cmm']/div[@class='bd']/ul/li/p[@class='dtl']";        //        public static readonly Regex RegexAdUrl = new Regex(@"\*\*(http[\S]*$)");        public static readonly string NullUrl = "&gt;";        //        public static readonly string BAITAI_ID = "006";    }
    public static class BaiDu {
        public static readonly string UserAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko";
        public static readonly string[] XPATH_ROOT = { "ec_im_container", "ec_im_container" }; //第一种情况   好像就一种情况。        public static readonly string[] XPATH_CITE = { "//a/font[@size='-1' and @class]","//a/font[@size='-1' and @class]" }; //第一种情况        public static readonly string[] XPATH_H3 = { "//a[contains(@class,'EC_BL')and contains(@id,'dfs')and @data-is-main-url]", "//a[contains(@class,'EC_BL')and contains(@id,'dfs')and @data-is-main-url]" };//第一种情况        public static readonly string[] XPATH_ADURL = { "//a[contains(@class,'EC_BL')and contains(@id,'dfs')and @data-is-main-url]", "//a[contains(@class,'EC_BL')and contains(@id,'dfs')and @data-is-main-url]" };        public static readonly string[] XPATH_INFO = { "//a[contains(@class,'EC_BL')and contains(@id,'dfs')and @data-click]/font[1]", "//a[contains(@class,'EC_BL')and contains(@id,'dfs')and @data-click]/font[1]" };        // top info        public static readonly string[] XPATH_ROOT_TOP = { "content_left", "content_left" };        public static readonly string[] XPATH_CITE_TOP = { "//table[@data-click]/tbody/tr/td//a[not(@data-is-main-url) and not(contains(@href,'tool'))]/span", "//div[@class and @style]/div/div/a/span[1]|//div/table/tbody/tr/td[2]/div//a/span[1]" };   //前下后上        public static readonly string[] XPATH_H3_TOP = { "//table/tbody/tr/td/a[ @data-is-main-url]", "//div[@class and @style]/div/div/h3" };    //前下后上        public static readonly string[] XPATH_ADURL_TOP = { "//table/tbody/tr/td/a[ @data-is-main-url]", "//table/tbody/tr/td/a[ @data-is-main-url]" };    //前下后上        public static readonly string[] XPATH_INFO_TOP = { "//table[@data-click and @class]/tbody/tr[3]/td/a[not(./span)]|//table[@data-click and @class]/tbody/tr/td/table/tbody/tr/td/div/font/a", "//div[@class and @style]/div/div[not(./span)]/a|//div/table/tbody/tr/td/div/font/a[not(./span)]" };         //前下部分广告后上        //        //public static readonly Regex RegexAdUrl = new Regex(@"http[\S]*$");        //        public static readonly string BAITAI_ID = "007";    }
    public static class CnBing {
        public static readonly string UserAgent = UserAgentPC;
        public static readonly string[] XPATH_ROOT = { "b_context", "b_context" };        public static readonly string[] XPATH_CITE = { "//div[@class='sb_add sb_adTA']//cite", "//div[@class='sb_add sb_adTA']//cite" };        public static readonly string[] XPATH_H3 = { "//div[@class='sb_add sb_adTA']//h2/a", "//div[@class='sb_add sb_adTA']//h2/a" };//第一种情况        public static readonly string[] XPATH_ADURL = { "//div[@class='sb_add sb_adTA']//h2/a", "//div[@class='sb_add sb_adTA']//h2/a" };        public static readonly string[] XPATH_INFO = { "//div[@class='sb_add sb_adTA']//div[@class='b_caption']/p", "//div[@class='sb_add sb_adTA']//div[@class='b_caption']/p" };        // top info        public static readonly string[] XPATH_ROOT_TOP = { "なし", "なし" };        public static readonly string[] XPATH_CITE_TOP = { "", "" };   //前下后上        public static readonly string[] XPATH_H3_TOP = { "", "" };    //前下后上        public static readonly string[] XPATH_ADURL_TOP = { "", "" };    //前下后上        public static readonly string[] XPATH_INFO_TOP = { "", "" };         //前下部分广告后上        //        public static readonly Regex RegexAdUrl = new Regex(@"rturl=(http[\S]*$)");        //        public static readonly string BAITAI_ID = "008";    }
    public static class HaoSou {
        public static readonly string UserAgent = UserAgentPC;            // 右边的广告        public static readonly string[] XPATH_ROOT = { "side", "side" }; //获取范围        public static readonly string[] XPATH_CITE = { "//ul[@id='rightbox']/li/p/cite[not(contains(text(),' http://e.360.cn'))]|//div[@id='m-spread-left']//cite", "//ul[@id='rightbox']/li/p/cite[not(contains(text(),' http://e.360.cn'))]|//div[@id='m-spread-left']//cite" }; //第一种情况        public static readonly string[] XPATH_H3 = { "//ul[@id='rightbox']/li/h3/a[not(contains(text(),'好搜推广'))]|//div[@id='m-spread-left']//h3/a", "//ul[@id='rightbox']/li/h3/a[not(contains(text(),'好搜推广'))]|//div[@id='m-spread-left']//h3/a" };//第一种情况        public static readonly string[] XPATH_ADURL = { "//ul[@id='rightbox']/li/h3/a[not(contains(text(),'好搜推广'))]|//div[@id='m-spread-left']//h3/a", "//ul[@id='rightbox']/li/h3/a[not(contains(text(),'好搜推广'))]|//div[@id='m-spread-left']//h3/a" };        public static readonly string[] XPATH_INFO = { "//ul[@id='e_idea_pp']/li//p|//ul[@id='rightbox']/li/p[not(contains(text(),'4000-360-360'))]", "//ul[@id='e_idea_pp']/li//p|//ul[@id='rightbox']/li/p[not(contains(text(),'4000-360-360'))]" };        // top info        public static readonly string[] XPATH_ROOT_TOP = {"なし", "なし" };        public static readonly string[] XPATH_CITE_TOP = { "", "" };   //前下后上        public static readonly string[] XPATH_H3_TOP = { "", "" };    //前下后上        public static readonly string[] XPATH_ADURL_TOP = { "", "" };    //前下后上        public static readonly string[] XPATH_INFO_TOP = { "", "" };         //前下部分广告后上        //        //public static readonly Regex RegexAdUrl = new Regex(@"http[\S]*$");        //        public static readonly string BAITAI_ID = "009";    }
    public static class Sogou {        public static readonly string UserAgent = UserAgentPC;        //right 部分        public static readonly string[] XPATH_ROOT = { "right" };        public static readonly string[] XPATH_CITE = { "//div[@class='bizr_fb']" };//绿色的url        public static readonly string[] XPATH_H3 = { "//h3[@class='bizr_title']" };//#ad_leftresult_0 > h3:nth-child(1)        public static readonly string[] XPATH_ADURL = { "//h3[@class='bizr_title']/a" };//.h3的url        public static readonly string[] XPATH_INFO = { "//div[@class='bizr_ft']" };        //top 部分                                  public static readonly string[] XPATH_ROOT_TOP = { "promotion_adv_container" };//*[@id="promotion_adv_container"]/div/div        public static readonly string[] XPATH_CITE_TOP = { "//div[contains(@class,'biz_rb')and @id]/div//cite" };        public static readonly string[] XPATH_H3_TOP = { "//h3[@class='biz_title']" };        public static readonly string[] XPATH_ADURL_TOP = { "//h3[@class='biz_title']/a" };        public static readonly string[] XPATH_INFO_TOP = { "//div[@class='crown_info_box' or @class='biz_ft']|//div[contains(@id,'box_id')]/table" };//   ""
        //        //public static readonly Regex RegexAdUrl = new Regex(@"\*\*(http[\S]*$)");        //0        public static readonly string BAITAI_ID = "010";        public static readonly string NullUrl = "&gt;";    }
}

xpath 参考的更多相关文章

  1. 【转】XPath 示例

    XPath 示例   其他版本   本主题回顾整个 XPath 参考中出现的语法示例. 所有示例均基于 XPath 语法的示例 XML 文件 (inventory.xml). 有关在测试文件中使用 X ...

  2. XPATH 带命名空间数据的读取

    在XML中,很多情况下有命名空间,如果直接使用XPATH 读取是会读到空节点. 解决办法如下: InputStream is=loader.getResourceAsStream("com/ ...

  3. HtmlCleanner结合xpath用法(转载)

    HtmlCleaner cleaner = new HtmlCleaner(); TagNode node = cleaner.clean(new URL("http://finance.s ...

  4. scrapy2_初窥Scrapy

    递归知识:oop,xpath,jsp,items,pipline等专业网络知识,初级水平并不是很scrapy,可以从简单模块自己写. 初窥Scrapy Scrapy是一个为了爬取网站数据,提取结构性数 ...

  5. 较全的IT方面帮助文档

    http://www.shouce.ren/post/d/id/108632 XSLT参考手册-新.CHMhttp://www.shouce.ren/post/d/id/108633 XSL-FO参考 ...

  6. selenium java 浏览器操作

    环境搭建 selenium 2.53 selenium-java-2.53.0.jar selenium-java-2.53.0-srcs.jar 原代码包 拷贝的工程lib下,做build path ...

  7. JDOM 操作XML

    http://www.cnblogs.com/hoojo/archive/2011/08/11/2134638.html 可扩展标记语言——eXtensible Markup Language 用户可 ...

  8. python爬虫 scrapy2_初窥Scrapy

    sklearn实战-乳腺癌细胞数据挖掘 https://study.163.com/course/introduction.htm?courseId=1005269003&utm_campai ...

  9. [开发笔记]-Linq to xml学习笔记

    最近需要用到操作xml文档的方法,学习了一下linq to xml,特此记录. 测试代码: class Program { //参考: LINQ to XML 编程基础 - luckdv - 博客园 ...

随机推荐

  1. Mac下载安装Android Studio教程

    今天把公司闲置的一台Mac-mini重装了下系统感觉用着速度还不错,平时上班用的机器USB有些问题,所以打算用这台Mac.以往开发用Intellij Idea就够用,但是这次项目引用的jar包太多,遭 ...

  2. JAVA 8 默认方法-Default Methods

    什么是默认方法-Default Methods 简单的说,就是可以在接口中定义一个已实现方法,且该接口的实现类不需要实现该方法: 如下示例: interface GreetingService { v ...

  3. BIEE11G常用函数及使用说明

    BIEE常用函数使用手册 1.AGGREGATE AT 此函数根据指定的级别聚合列.使用AGGREGATE AT 可确保始终在关键字AT 之后指定的级别执行度量聚合,而无论WHERE 子句如何. 语法 ...

  4. 如何分析解读systemstat dump产生的trc文件

    ORACLE数据库的systemstat dump生成trace文件虽然比较简单,但是怎么从trace文件中浩如烟海的信息中提炼有用信息,并作出分析诊断是一件技术活,下面收集.整理如何分析解读syst ...

  5. 利用phpmyadmin修改mysql的root密码及如何进入修改密码后的phpmyadmin

    1.利用phpmyadmin修改mysql的root密码 很多人利用phpmyadmin或者命令行来修改了mysql的root密码,重启后发现mysql登录错误,这是为什么呢?修改mysql的root ...

  6. MySQL 优化之 index merge(索引合并)

    深入理解 index merge 是使用索引进行优化的重要基础之一.理解了 index merge 技术,我们才知道应该如何在表上建立索引. 1. 为什么会有index merge 我们的 where ...

  7. Mac上打开拷贝到移动硬盘里的文件提示“已经被osx使用不能打开”解决办法

    在终端里面粘贴xattr -d com.apple.FinderInfo(此处按一下空格),然后把文件拖进终端窗口,按一下回车就好了

  8. MySQL基础学习(一) 命令行命令

    1. 命令行登录 mysql -uroot -p 按照提示输入密码 常用登录选项 -u 指定用户 -p 密码 -h 数据库所在主机 -P 端口 -D 指定数据库 2.命令行退出 exit quit \ ...

  9. Altium Desiner 警告 adding hidden net

    这是因为 一些元件 隐藏了 vcc GND 或者没有使用vcc GND ,用不着它也报警告了. 这里可以将 vcc GND删掉这个管脚.

  10. SCCM 客户端的修复

    1. Stopping the SMS Agent Host service (net stop ccmexec) 2. Stopping the WMI service (net stop winm ...