C#分析搜索引擎URL得到搜索关键字,并判断页面停留时间以及来源页面
前台代码:
var start;
var end;
var state;
var lasturl = document.referrer;
start = new Date($.ajax({ async: false }).getResponseHeader("Date"));//start是用户进入页面时间,此当前时间为服务器时间主要是处理客户端本地机器时间错误问题
$(document).ready(function () {//用户页面加载完毕
$(window).unload(function () {//页面卸载,就是用户关闭页面、点击链接跳转到其他页面或者刷新页面都会执行
end = new Date($.ajax({ async: false }).getResponseHeader("Date"));//用户退出时间
state = (end.getTime() - start.getTime()) / 1000;//停留时间=退出时间-开始时间(得到的是一个整数,毫秒为单位,1秒=1000)
if (window.location.pathname == "/default.htm") {
$.post("/ashx/SiteStats.ashx", { start_time: start.Format("yyyy-MM-dd hh:mm:ss.S"), state_time: state, action: "首页访问", state_url: window.location.href, last_url: lasturl, state_title: $("title").html() });//写入数据库
}
else if (window.location.pathname == "/GeRenKuanDai/gerenkuandai.htm" || window.location.pathname.indexOf("GeRenKuanDai") > 0) {
$.post("/ashx/SiteStats.ashx", { start_time: start.Format("yyyy-MM-dd hh:mm:ss.S"), state_time: state, action: "家庭宽带信息资费查看", state_url: window.location.href, last_url: lasturl, state_title: $("title").html() });//写入数据库
}
else if (window.location.pathname == "/boxdetail/default.htm" || window.location.pathname.indexOf("boxdetail") > 0) {
$.post("/ashx/SiteStats.ashx", { start_time: start.Format("yyyy-MM-dd hh:mm:ss.S"), state_time: state, action: "查看大卖系列", state_url: window.location.href, last_url: lasturl, state_title: $("title").html() });//写入数据库
}
else if (window.location.pathname == "/cesu/default.htm" || window.location.pathname.indexOf("cesu") > 0) {
$.post("/ashx/SiteStats.ashx", { start_time: start.Format("yyyy-MM-dd hh:mm:ss.S"), state_time: state, action: "测速", state_url: window.location.href, last_url: lasturl, state_title: $("title").html() });//写入数据库
}
else if (window.location.pathname == "/government/default.htm" || window.location.pathname.indexOf("government") > 0) {
$.post("/ashx/SiteStats.ashx", { start_time: start.Format("yyyy-MM-dd hh:mm:ss.S"), state_time: state, action: "企业宽带查看", state_url: window.location.href, last_url: lasturl, state_title: $("title").html() });//写入数据库
} else if (window.location.pathname == "/GwbnNewApply/default.htm" || window.location.pathname.indexOf("GwbnNewApply") > 0) {
$.post("/ashx/SiteStats.ashx", { start_time: start.Format("yyyy-MM-dd hh:mm:ss.S"), state_time: state, action: "宽带新开户页面", state_url: window.location.href, last_url: lasturl, state_title: $("title").html() });//写入数据库
}
else if (window.location.pathname == "/wodechangkuan/default.htm" || window.location.pathname.indexOf("wodechangkuan") > 0) {
$.post("/ashx/SiteStats.ashx", { start_time: start.Format("yyyy-MM-dd hh:mm:ss.S"), state_time: state, action: "网上营业厅-用户登录", state_url: window.location.href, last_url: lasturl, state_title: $("title").html() });//写入数据库
}
else if (window.location.pathname == "/xinwen/default.htm" || window.location.pathname.indexOf("xinwen") > 0) {
$.post("/ashx/SiteStats.ashx", { start_time: start.Format("yyyy-MM-dd hh:mm:ss.S"), state_time: state, action: "新闻浏览", state_url: window.location.href, last_url: lasturl, state_title: $("title").html() });//写入数据库
}
else {
$.post("/ashx/SiteStats.ashx", { start_time: start.Format("yyyy-MM-dd hh:mm:ss.S"), state_time: state, action: "", state_url: window.location.href, last_url: lasturl, state_title: $("title").html() });//写入数据库
}
});
});
Date.prototype.Format = function (fmt) {
var o = {
"M+": this.getMonth() + 1, //月份
"d+": this.getDate(), //日
"h+": this.getHours(), //小时
"m+": this.getMinutes(), //分
"s+": this.getSeconds(), //秒
"q+": Math.floor((this.getMonth() + 3) / 3), //季度
"S": this.getMilliseconds() //毫秒
};
if (/(y+)/.test(fmt)) fmt = fmt.replace(RegExp.$1, (this.getFullYear() + "").substr(4 - RegExp.$1.length));
for (var k in o)
if (new RegExp("(" + k + ")").test(fmt)) fmt = fmt.replace(RegExp.$1, (RegExp.$1.length == 1) ? (o[k]) : (("00" + o[k]).substr(("" + o[k]).length)));
return fmt;
}
后台ashx页面:
<%@ WebHandler Language="C#" Class="SiteStats" %> using System;
using System.Collections.Generic;
using System.Text;
using System.Text.RegularExpressions;
using System.Web;
using Newtonsoft.Json;
using Newtonsoft.Json.Linq;
using System.Web.Script.Serialization;
using System.Net;
using System.IO;
public class SiteStats : IHttpHandler
{
string Keyword = String.Empty;
string Engine = String.Empty;
Maticsoft.BLL.tb_SiteStats tbSiteStatesBLL = new Maticsoft.BLL.tb_SiteStats();
Maticsoft.Model.tb_SiteStats tbSiteStatesModle = new Maticsoft.Model.tb_SiteStats();
public void ProcessRequest(HttpContext context)
{
context.Response.ContentType = "text/plain";
//#region 防止钓鱼
//string host = HttpContext.Current.Request.UrlReferrer.Host;
//if (string.Compare(host, HttpContext.Current.Request.Url.Host) > 0)
//{
// return;
//}
//#endregion
//取得来源url地址
//string url = HttpContext.Current.Request.UrlReferrer == null ? "" : HttpContext.Current.Request.UrlReferrer.ToString();
string url = context.Request["last_url"];
//判断是否搜索引擎链接
if (IsSearchEnginesGet(url))
{
//取得搜索关键字
Keyword = SearchKey(url);
//取得搜索引擎名称
Engine = EngineName;
}
tbSiteStatesModle.ID = Guid.NewGuid().ToString();
//tbSiteStatesModle.VisitTime = Convert.ToDateTime(context.Request["start_time"]);
tbSiteStatesModle.State_time = context.Request["state_time"];
tbSiteStatesModle.VisitTime = DateTime.Now.AddSeconds(-(Convert.ToDouble(tbSiteStatesModle.State_time.ToString())));
tbSiteStatesModle.State_url = context.Request["state_url"];
tbSiteStatesModle.State_time = context.Request["state_time"];
tbSiteStatesModle.State_title = context.Request["state_title"];
tbSiteStatesModle.UserAction = context.Request["action"];
tbSiteStatesModle.IpAddress = ReadIP();
tbSiteStatesModle.Last_title = string.IsNullOrEmpty(url) ? "" : GetTitle(url);
#region -跨域请求JSON数据-
//HttpWebRequest request = (HttpWebRequest)WebRequest.Create("http://ip.chinaz.com/getip.aspx");
//request.Method = "get";
//request.ContentType = "application/x-www-form-urlencoded";
//request.Timeout = 60 * 1000; // 第三方的超时时间请设为60秒
//Encoding encoding = Encoding.GetEncoding("gb2312");
//string json = "";
//using (Stream requestStream = request.GetRequestStream())
//{
// using (HttpWebResponse response = (HttpWebResponse)request.GetResponse())
// {
// using (Stream myResponseStream = response.GetResponseStream())
// {
// using (StreamReader myStreamReader = new StreamReader(myResponseStream, encoding))
// {
// json = myStreamReader.ReadToEnd();
// myStreamReader.Close();
// myResponseStream.Close();
// }
// }
// }
//}
//var str = serializer.DeserializeObject(json);
WebClient wc = new WebClient();
byte[] bytes = wc.DownloadData("http://ip.taobao.com/service/getIpInfo.php?ip=" + ReadIP());
wc.Encoding = Encoding.UTF8;
string jsonText = Encoding.UTF8.GetString(bytes);
JObject jo = (JObject)JsonConvert.DeserializeObject(jsonText);
#endregion
//string area = UnicodeToStr(jo["data"]["country"].ToString()) + UnicodeToStr(jo["data"]["area"].ToString() + UnicodeToStr(jo["data"]["city"].ToString()) + UnicodeToStr(jo["data"]["region"].ToString()));国家 地区 省份 市区 地区 ISP公司
if (UnicodeToStr(jo["code"].ToString()) == "")//返回成功
tbSiteStatesModle.ExtStr1 = UnicodeToStr(UnicodeToStr(jo["data"]["region"].ToString()) + UnicodeToStr(jo["data"]["city"].ToString()) + UnicodeToStr(jo["data"]["county"].ToString()) + UnicodeToStr(jo["data"]["isp"].ToString()));
HttpCookie cookie = context.Request.Cookies["unid"];
if (null == cookie && tbSiteStatesBLL.ExistsByIP(tbSiteStatesModle.IpAddress) != true)
{
tbSiteStatesModle.IsNewUser = "新访客";
cookie = new HttpCookie("unid", Guid.NewGuid().ToString());//获取用户唯一标识
cookie.Expires = DateTime.Now.AddYears();//设置cookie过期时间
context.Response.Cookies.Add(cookie);//将cookie写入客户端
}
if (null == cookie && tbSiteStatesBLL.ExistsByIP(tbSiteStatesModle.IpAddress) == true)
{
tbSiteStatesModle.IsNewUser = "老访客";
cookie = new HttpCookie("unid", Guid.NewGuid().ToString());//获取用户唯一标识
cookie.Expires = DateTime.Now.AddYears();//设置cookie过期时间
context.Response.Cookies.Add(cookie);//将cookie写入客户端
}
else
tbSiteStatesModle.IsNewUser = "老访客";
tbSiteStatesModle.Last_url = url;
tbSiteStatesModle.ExtStr2 = cookie != null ? cookie.Value : "";
tbSiteStatesModle.VisitWay = Engine;
tbSiteStatesModle.KeyWords = Keyword;
tbSiteStatesBLL.Add(tbSiteStatesModle);
//context.Response.Write(Keyword + Engine);
}
/// <summary>
/// 读取客户端IP地址
/// </summary>
/// <returns></returns>
public static string ReadIP()
{
HttpRequest request = HttpContext.Current.Request;
string IP = request.ServerVariables["HTTP_X_FORWARDED_FOR"];
if (string.IsNullOrEmpty(IP))
{
IP = request.ServerVariables["REMOTE_ADDR"];
}
if (string.IsNullOrEmpty(IP))
{
IP = request.UserHostAddress;
}
if (string.IsNullOrEmpty(IP))
{
IP = "0.0.0.0";
}
return IP;
}
/// <summary>
/// 根据URL提取网站Title
/// </summary>
/// <param name="Url"></param>
/// <returns></returns>
public string GetTitle(string Url)
{
string strHtml;
StreamReader sr = null; //用来读取流
Encoding code = Encoding.GetEncoding("utf-8"); //定义编码
//构造web请求,发送请求,获取响应
WebRequest HttpWebRequest = null;
WebResponse HttpWebResponse = null;
HttpWebRequest = WebRequest.Create(Url);
HttpWebResponse = HttpWebRequest.GetResponse(); //获得流
sr = new StreamReader(HttpWebResponse.GetResponseStream(), code);
strHtml = sr.ReadToEnd();
int start = strHtml.IndexOf("<title>") + ; strHtml = strHtml.Substring(start); int end = strHtml.LastIndexOf("</title>"); string title = strHtml.Substring(, end); ; return title;
} /// <summary>
/// Unicode转中文-支持非Unicode编码字符串
/// </summary>
/// <param name="str"></param>
/// <returns></returns>
public static string UnicodeToStr(string str)
{
string outStr = "";
Regex reg = new Regex(@"(?i)\\u([0-9a-f]{4})");
outStr = reg.Replace(str, delegate (Match m1)
{
return ((char)Convert.ToInt32(m1.Groups[].Value, )).ToString();
});
return outStr;
}
/// <summary>
/// GMT时间转成本地时间
/// </summary>
/// <param name="gmt">字符串形式的GMT时间</param>
/// <returns></returns>
public static DateTime GmtToLocal(string gmt)
{
DateTime dt = DateTime.MinValue;
try
{
string pattern = "";
if (gmt.IndexOf("+0") != -)
{ gmt = gmt.Replace("GMT", "");
pattern = "ddd, dd MMM yyyy HH':'mm':'ss zzz";
}
if (gmt.ToUpper().IndexOf("GMT") != -)
{
pattern = "ddd, dd MMM yyyy HH':'mm':'ss 'GMT'";
}
if (pattern != "")
{
dt = DateTime.ParseExact(gmt, pattern, System.Globalization.CultureInfo.InvariantCulture, System.Globalization.DateTimeStyles.AdjustToUniversal);
dt = dt.ToLocalTime();
}
else
{
dt = Convert.ToDateTime(gmt);
}
}
catch
{
}
return dt;
}
#region -搜索引擎-
//搜索引擎特征
private string[][] _Enginers = new string[][]
{
new string[]{"google","utf8","q"},
new string[]{"baidu", "gb2312", "wd"},
new string[]{"yahoo","utf8","p"},
new string[]{"yisou","utf8","search"},
new string[]{"live","utf8","q"},
new string[]{"tom","gb2312","word"},
new string[]{"","gb2312","q"},
new string[]{"iask","gb2312","k"},
new string[]{"soso","gb2312","w"},
new string[]{"sogou","gb2312","query"},
new string[]{"zhongsou","gb2312","w"},
new string[]{"so", "utf8", "q"},
new string[]{"openfind","utf8","q"},
new string[]{"alltheweb","utf8","q"},
new string[]{"lycos","utf8","query"},
new string[]{"onseek","utf8","q"},
new string[]{"youdao","utf8","q"},
new string[]{"bing","utf8","q"},
new string[]{"","gb2312","kw"}
};
//搜索引擎名称
private string _EngineName = "";
public string EngineName
{
get
{
return _EngineName;
}
}
//搜索引擎编码
private string _Coding = "utf8";
public string Coding
{
get
{
return _Coding;
}
}
//搜索引擎关键字查询参数名称
private string _RegexWord = "";
public string RegexWord
{
get
{
return _RegexWord;
}
} private string _Regex = @"("; //搜索引擎关键字
//建立搜索关键字正则表达式
public void EngineRegEx(string myString)
{
for (int i = , j = _Enginers.Length; i < j; i++)
{
if (myString.Contains(_Enginers[i][]))
{
_EngineName = _Enginers[i][];
_Coding = _Enginers[i][];
_RegexWord = _Enginers[i][];
_Regex += _EngineName + @".+.*[?/ &]" + _RegexWord + @"[=:])(?<key>[^&]*)";
break;
}
}
}
//得到搜索引擎关键字
public string SearchKey(string myString)
{
EngineRegEx(myString.ToLower());
if (_EngineName != "")
{
Regex myReg = new Regex(_Regex, RegexOptions.IgnoreCase);
Match matche = myReg.Match(myString);
myString = matche.Groups["key"].Value;
//去处表示为空格的+
myString = myString.Replace("+", " ");
if (_Coding == "gb2312")
{
//myString = GetUTF8String(myString);
myString = System.Web.HttpUtility.UrlDecode(myString);
}
else
{
myString = Uri.UnescapeDataString(myString);
}
}
return myString;
}
//整句转码
public string GetUTF8String(string myString)
{
Regex myReg = new Regex("(?< key >%..%..)", RegexOptions.IgnoreCase); MatchCollection matches = myReg.Matches(myString);
string myWord;
for (int i = , j = matches.Count; i < j; i++)
{
myWord = matches[i].Groups["key"].Value.ToString();
myString = myString.Replace(myWord, GB2312ToUTF8(myWord));
}
return myString;
}
//单字GB2312转UTF8 URL编码
public string GB2312ToUTF8(string myString)
{
string[] myWord = myString.Split('%');
byte[] myByte = new byte[] { Convert.ToByte(myWord[], ), Convert.ToByte(myWord[], ) };
Encoding GB = Encoding.GetEncoding("GB2312");
Encoding U8 = Encoding.UTF8;
myByte = Encoding.Convert(GB, U8, myByte);
char[] Chars = new char[U8.GetCharCount(myByte, , myByte.Length)];
U8.GetChars(myByte, , myByte.Length, Chars, );
return new string(Chars);
} //判断是否为搜索引擎爬虫,并返回其类型
public string isCrawler(string SystemInfo)
{
string[] BotList = new string[] { "Google", "Baidu", "yisou", "MSN", "Yahoo", "live",
"tom", "", "TMCrawler", "iask", "Sogou", "soso", "youdao", "zhongsou", "so",
"openfind", "alltheweb", "lycos", "bing", "" };
foreach (string Bot in BotList)
{
if (SystemInfo.ToLower().Contains(Bot.ToLower()))
{
return Bot;
}
}
return "null";
}
/// <summary>
/// 判断是否搜索引擎链接
/// </summary>
/// <param name="str"></param>
/// <returns></returns>
public bool IsSearchEnginesGet(string str)
{
string[] strArray = new string[] { "Google", "Baidu", "yisou", "MSN", "Yahoo", "live", "tom"
, "", "TMCrawler", "iask", "Sogou", "soso", "youdao", "zhongsou", "so", "openfind",
"alltheweb", "lycos", "bing", "" };
str = str.ToLower();
for (int i = ; i < strArray.Length; i++)
{
if (str.IndexOf(strArray[i].ToLower()) >= )
{
return true;
}
}
return false;
}
#endregion -搜索引擎-
public bool IsReusable
{
get
{
return false;
}
}
}
C#分析搜索引擎URL得到搜索关键字,并判断页面停留时间以及来源页面的更多相关文章
- C# 分析搜索引擎url 得到搜索关键字
using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.T ...
- 百度搜索URL参数 搜索关键字
http://www.baidu.com/s?wd=关键字 wd(Keyword):查询的关键词: http://www.baidu.com/s?wd=关键字&cl=3 cl(Class):搜 ...
- 搜索引擎case︱从搜索序列文本看高端商务车︱统计之都
朱雪宁(北京大学光华管理学院) 王汉生(北京大学光华管理学院) 摘要:本文对100万搜索引擎用户的13亿搜索序列文本进行探索分析,对高端车用户以及商学院人群做了描述对比,并 ...
- google 搜索关键字技巧
google 搜索关键字技巧 来源 https://www.cnblogs.com/qiudabai/articles/9143328.html inurl: 用于搜索网页上包含的URL. 这个语法 ...
- Django项目:CRM(客户关系管理系统)--26--18PerfectCRM实现King_admin搜索关键字
search_fields = ('name','qq',) 登陆密码设置参考 http://www.cnblogs.com/ujq3/p/8553784.html search_fields = ( ...
- 在 Angular 中实现搜索关键字高亮
在 Angular 中,我们不应该试图直接修改 DOM 的内容,当需要更新 DOM 内容的时候,应该修改的其实是我们的数据模型,也就是 $scope 中的数据,Angular 会帮助我们将修改之后的数 ...
- 仿百度壁纸客户端(五)——实现搜索动画GestureDetector手势识别,动态更新搜索关键字
仿百度壁纸客户端(五)--实现搜索动画GestureDetector手势识别,动态更新搜索关键字 百度壁纸系列 仿百度壁纸客户端(一)--主框架搭建,自定义Tab + ViewPager + Frag ...
- Elasticsearch 基于 URL 的搜索请求
背景 Elasticsearch 不像关系型数据库,没有简易的 SQL 用来查询数据,只能通过调用 RESTful API 实现查询.大体上查询分为两种,基于 URL 的和基于请求主体的.基于 URL ...
- 帝国cms搜索关键字调用标签(showsearch)怎么用
前面ytkah介绍了如何让帝国CMS7.2搜索模板支持动态标签调用,现在我们来说说怎么调用帝国cms搜索关键字调用标签(showsearch).在帝国cms后台那边的使用方法:[showsearch] ...
随机推荐
- USB协议-USB的包结构及包的分类
USB是串行总线,所以数据是一位一位地在数据线上传送的.既然是一位一位地传送,就存在着一个数据位先后的问题.USB使用的是LSB在前的方式,即先出来的是最低位数据,接下来是次低位,最后是最高位(MSB ...
- 【转】How-To-Ask-Questions-The-Smart-Way
提问的智慧 How To Ask Questions The Smart Way Copyright © 2001,2006,2014 Eric S. Raymond, Rick Moen 本指南英文 ...
- VIM正则表达式。
看了那个30分钟正则表达式入门.但是是在VIM使用过程中偶尔会出现达不得想要效果的情况.后面百度了下,确然,VIM的正则还是有点区别的. 转自:http://www.cppblog.com/kefen ...
- 349. Intersection of Two Arrays
Given two arrays, write a function to compute their intersection. Example:Given nums1 = [1, 2, 2, 1] ...
- 让Windows Server 2008 + IIS 7+ ASP.NET 支持10万并发请求(转载)
转自:http://www.cnblogs.com/dudu/archive/2009/11/10/1600062.html 今天下午17点左右,博客园博客站点出现这样的错误信息: Error Sum ...
- error_log() 范例
<?php// 如果无法连接到数据库,发送通知到服务器日志if (!Ora_Logon($username, $password)) { error_log("Oracle da ...
- phpwind将服务器数据同步到本地之后网站不显示或者排版错误
在将phpwind的数据同步到本地服务器之后 如果访问本地服务器的首页不能显示的话 首先要查看global.php文件中的D_P变量,官方默认 的此变量应该指向和R_P变量是同一个文件夹即网站的根目录 ...
- *** 安全沙箱冲突 *** 到 127.0.0.1:9999 的连接已停止 - 不允许从 file:///E:/flash/Flash/Vod/tag/Letvcloud__MainVodNew/bin-debug/Player.swf 进行连接
http://bbs.9ria.com/thread-69309-1-1.html C:\Windows\System32\Macromed\Flash\NPSWF64_21_0_0_242.dll ...
- 【转】非常适用的Sourceinsight插件,提高效率事半功倍
原文网址:http://www.cnblogs.com/wangqiguo/p/3713211.html 一直使用sourceinsight编辑C/C++代码,sourceinsight是一个非常好用 ...
- FileReader/FileWriter复制文件
public class Test{ /*FileReader/FileWriter读写乱码,待处理*/ public static void main(String[] args) throws I ...