C# 分析搜索引擎url 得到搜索关键字
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Web; namespace HbTui.Portal
{
public class SearchKeyWord
{
private string[][] _Enginers = new string[][]
{
new string[]{"google","utf8","q"},
new string[]{"baidu","gb2312","wd"},
new string[]{"yahoo","utf8","p"},
new string[]{"yisou","utf8","search"},
new string[]{"live","utf8","q"},
new string[]{"tom","gb2312","word"},
new string[]{"","gb2312","q"},
new string[]{"iask","gb2312","k"},
new string[]{"soso","gb2312","w"},
new string[]{"sogou","gb2312","query"},
new string[]{"zhongsou","gb2312","w"},
new string[]{"","gb2312","p"},
new string[]{"openfind","utf8","q"},
new string[]{"alltheweb","utf8","q"},
new string[]{"lycos","utf8","query"},
new string[]{"youdao","utf8","q"},
new string[]{"onseek","utf8","q"},
new string[]{"jike","utf8","q"}
}; //搜索引擎名称
private string _EngineName = string.Empty;
public string EngineName
{
get
{
return _EngineName;
}
} //搜索引擎编码
private string _Coding = "utf8";
public string Coding
{
get
{
return _Coding;
}
}
//搜索引擎关键字查询参数名称
private string _RegexWord = "";
public string RegexWord
{
get
{
return _RegexWord;
}
} private string _Regex = @"("; //搜索引擎关键字
//建立搜索关键字正则表达式
public void EngineRegEx(string myString)
{
for (int i = , j = _Enginers.Length; i < j; i++)
{
if (myString.Contains(_Enginers[i][]))
{
_EngineName = _Enginers[i][];
_Coding = _Enginers[i][];
_RegexWord = _Enginers[i][];
_Regex += _EngineName + @".+.*[?/&]" + _RegexWord + @"[=:])(?<key>[^&]*)";
break;
}
}
} //得到搜索引擎关键字
public string SearchKey(string myString)
{
EngineRegEx(myString.ToLower());
if (_EngineName != "")
{
Regex myReg = new Regex(_Regex, RegexOptions.IgnoreCase);
Match matche = myReg.Match(myString);
myString = matche.Groups["key"].Value;
//去处表示为空格的+
myString = myString.Replace("+", " ");
if (_Coding == "gb2312")
{
myString = GetUTF8String(myString);
}
else
{
myString = Uri.UnescapeDataString(myString);
}
}
return myString;
} //整句转码
public string GetUTF8String(string myString)
{
Regex myReg = new Regex("(?<key>%..%..)", RegexOptions.IgnoreCase);
MatchCollection matches = myReg.Matches(myString);
string myWord;
for (int i = , j = matches.Count; i < j; i++)
{
myWord = matches[i].Groups["key"].Value.ToString();
myString = myString.Replace(myWord, GB2312ToUTF8(myWord));
}
return myString;
}
//单字GB2312转UTF8 URL编码
public string GB2312ToUTF8(string myString)
{
string[] myWord = myString.Split('%');
byte[] myByte = new byte[] { Convert.ToByte(myWord[], ), Convert.ToByte(myWord[], ) };
Encoding GB = Encoding.GetEncoding("GB2312");
Encoding U8 = Encoding.UTF8;
myByte = Encoding.Convert(GB, U8, myByte);
char[] Chars = new char[U8.GetCharCount(myByte, , myByte.Length)];
U8.GetChars(myByte, , myByte.Length, Chars, );
return new string(Chars);
} //判断否为搜索引擎爬虫,并返回其类型
public string isCrawler(string SystemInfo)
{
string[] BotList = new string[] { "Google", "Baidu", "yisou", "MSN", "Yahoo", "live", "tom", "", "TMCrawler", "iask", "Sogou", "soso", "youdao", "zhongsou", "", "openfind", "alltheweb", "lycos", "bing", "" };
foreach (string Bot in BotList)
{
if (SystemInfo.ToLower().Contains(Bot.ToLower()))
{
return Bot;
}
}
return "null";
}
public bool IsSearchEnginesGet(string str)
{
string[] strArray = new string[] { "Google", "Baidu", "yisou", "MSN", "Yahoo", "live", "tom", "", "TMCrawler", "iask",
"Sogou", "soso", "youdao", "zhongsou", "", "openfind","alltheweb", "lycos", "bing", ""};
str = str.ToLower();
for (int i = ; i < strArray.Length; i++)
{
if (str.IndexOf(strArray[i].ToLower()) >= )
{
return true;
}
}
return false;
}
}
}
C# 分析搜索引擎url 得到搜索关键字的更多相关文章
- C#分析搜索引擎URL得到搜索关键字,并判断页面停留时间以及来源页面
前台代码: var start; var end; var state; var lasturl = document.referrer; start = new Date($.ajax({ asyn ...
- 百度搜索URL参数 搜索关键字
http://www.baidu.com/s?wd=关键字 wd(Keyword):查询的关键词: http://www.baidu.com/s?wd=关键字&cl=3 cl(Class):搜 ...
- 搜索引擎case︱从搜索序列文本看高端商务车︱统计之都
朱雪宁(北京大学光华管理学院) 王汉生(北京大学光华管理学院) 摘要:本文对100万搜索引擎用户的13亿搜索序列文本进行探索分析,对高端车用户以及商学院人群做了描述对比,并 ...
- google 搜索关键字技巧
google 搜索关键字技巧 来源 https://www.cnblogs.com/qiudabai/articles/9143328.html inurl: 用于搜索网页上包含的URL. 这个语法 ...
- Django项目:CRM(客户关系管理系统)--26--18PerfectCRM实现King_admin搜索关键字
search_fields = ('name','qq',) 登陆密码设置参考 http://www.cnblogs.com/ujq3/p/8553784.html search_fields = ( ...
- 在 Angular 中实现搜索关键字高亮
在 Angular 中,我们不应该试图直接修改 DOM 的内容,当需要更新 DOM 内容的时候,应该修改的其实是我们的数据模型,也就是 $scope 中的数据,Angular 会帮助我们将修改之后的数 ...
- 仿百度壁纸客户端(五)——实现搜索动画GestureDetector手势识别,动态更新搜索关键字
仿百度壁纸客户端(五)--实现搜索动画GestureDetector手势识别,动态更新搜索关键字 百度壁纸系列 仿百度壁纸客户端(一)--主框架搭建,自定义Tab + ViewPager + Frag ...
- Elasticsearch 基于 URL 的搜索请求
背景 Elasticsearch 不像关系型数据库,没有简易的 SQL 用来查询数据,只能通过调用 RESTful API 实现查询.大体上查询分为两种,基于 URL 的和基于请求主体的.基于 URL ...
- 帝国cms搜索关键字调用标签(showsearch)怎么用
前面ytkah介绍了如何让帝国CMS7.2搜索模板支持动态标签调用,现在我们来说说怎么调用帝国cms搜索关键字调用标签(showsearch).在帝国cms后台那边的使用方法:[showsearch] ...
随机推荐
- Ubuntu14.04安装配置SVN及Trac
还是个实习生的时候,项目管理十分欠缺,会出现很多问题,痛定思痛,决定要改变现状,养成良好的项目管理习惯,看网上工具很多,在这里尝试使用SVN作代码版本控制,使用trac作为项目管理追踪.本文采用的操作 ...
- humble number(hd1058)
Problem Description A number whose only prime factors are 2,3,5 or 7 is called a humble number. The ...
- Oracle EBS-SQL (PO-14):检查供应商信息sql
select pvs.org_id, pvs.vendor_id, pvs.vendor_site_id, hou.name 经营组织, ...
- try/catch异常捕捉
StringWriter sw = new StringWriter(); e.printStackTrace(new PrintWriter(sw)); System.out.println(sw. ...
- haproxy hdr和path
path : string This extracts the request's URL path, which starts at the first slash and ends before ...
- C语言的本质(23)——C标准库之输入与输出(上)
1..文件的基本概念 所谓"文件"是指一组相关数据的有序集合.这个数据集有一个名称,叫做文件名.实际上在前面的各章中我们已经多次使用了文件,例如源程序文件.目标文件.可执行文件.库 ...
- cf466B Wonder Room
B. Wonder Room time limit per test 1 second memory limit per test 256 megabytes input standard input ...
- PHP的优点
1.语法简单 2.学习成本低 3.开发效率高 4.跨平台 5.开发部署方便 6.开源框架非常丰富(如:ThinkPHP) 7.开源CMS系统非常丰富(如:Joomla,Wordpress) 8.开源网 ...
- E6全部刷机包
此版本号基于R533_G_11.11.10P_GSZMCAUT679DA01B_LP064DA_T679DA_S005_E001_P002_R001_G004_1FF.sbf制作耳机接听或挂机正常内置 ...
- oracle em命令行配置及界面按钮乱码问题解决方法
一.配置EM dbconsole db [oracle@rusky ~]$ lsnrctl start [oracle@rusky ~]$ emctl start dbconsoleTZ set to ...