Lucene.Net+盘古分词->开发自己的搜索引擎

//封装类

using System;
using System.Collections.Generic;
using System.Linq;
using System.Web;
using Lucene.Net.Analysis;
using Lucene.Net.Index;
using Lucene.Net.Documents;
using System.Reflection;
using Lucene.Net.QueryParsers;
using Lucene.Net.Search;
namespace SearchTest
{
 /// <summary>
 /// 盘古分词在lucene.net中的使用帮助类
 /// 调用PanGuLuceneHelper.instance
 /// </summary>
 public class PanGuLuceneHelper
 {
 private PanGuLuceneHelper() { }

#region 单一实例
 private static PanGuLuceneHelper _instance = null;
 /// <summary>
 /// 单一实例
 /// </summary>
 public static PanGuLuceneHelper instance
 {
 get
 {
 if (_instance == null) _instance = new PanGuLuceneHelper();
 return _instance;
 }
 }
 #endregion

#region 分词测试
 /// <summary>
 /// 分词测试
 /// </summary>
 /// <param name="keyword"></param>
 /// <returns></returns>
 public string Token(string keyword)
 {
 string ret = "";
 System.IO.StringReader reader = new System.IO.StringReader(keyword);
 Lucene.Net.Analysis.TokenStream ts = analyzer.TokenStream(keyword, reader);
 bool hasNext = ts.IncrementToken();
 Lucene.Net.Analysis.Tokenattributes.ITermAttribute ita;
 while (hasNext)
 {
 ita = ts.GetAttribute<Lucene.Net.Analysis.Tokenattributes.ITermAttribute>();
 ret += ita.Term + "|";
 hasNext = ts.IncrementToken();
 }
 ts.CloneAttributes();
 reader.Close();
 analyzer.Close();
 return ret;
 }
 #endregion

#region 创建索引
 /// <summary>
 /// 创建索引
 /// </summary>
 /// <param name="datalist"></param>
 /// <returns></returns>
 public bool CreateIndex(List<MySearchUnit> datalist)
 {
 IndexWriter writer = null;
 try
 {
 writer = new IndexWriter(directory_luce, analyzer, false, IndexWriter.MaxFieldLength.LIMITED);//false表示追加（true表示删除之前的重新写入）
 }
 catch
 {
 writer = new IndexWriter(directory_luce, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);//false表示追加（true表示删除之前的重新写入）
 }
 foreach (MySearchUnit data in datalist)
 {
 CreateIndex(writer, data);
 }
 writer.Optimize();
 writer.Dispose();
 return true;
 }

public bool CreateIndex(IndexWriter writer, MySearchUnit data)
        {
            try
            {

if (data == null) return false;
Document doc = new Document();
Type type = data.GetType();//assembly.GetType("Reflect_test.PurchaseOrderHeadManageModel", true, true); //命名空间名称 + 类名

//创建类的实例
 //object obj = Activator.CreateInstance(type, true);
 //获取公共属性
 PropertyInfo[] Propertys = type.GetProperties();
 for (int i = 0; i < Propertys.Length; i++)
 {
 //Propertys[i].SetValue(Propertys[i], i, null); //设置值
 PropertyInfo pi = Propertys[i];
 string name=pi.Name;
 object objval = pi.GetValue(data, null);
 string value = objval == null ? "" : objval.ToString(); //值
 if (name == "id" || name=="flag" )//id在写入索引时必是不分词，否则是模糊搜索和删除，会出现混乱
 {
 doc.Add(new Field(name, value, Field.Store.YES, Field.Index.NOT_ANALYZED));//id不分词
 }
 else
 {
 doc.Add(new Field(name, value, Field.Store.YES, Field.Index.ANALYZED));
 }
 }
 writer.AddDocument(doc);
 }
 catch (System.IO.FileNotFoundException fnfe)
 {
 throw fnfe;
 }
 return true;
 }
 #endregion

#region 在title和content字段中查询数据
 /// <summary>
 /// 在title和content字段中查询数据
 /// </summary>
 /// <param name="keyword"></param>
 /// <returns></returns>
 public List<MySearchUnit> Search(string keyword)
 {

string[] fileds = { "title", "content" };//查询字段
 //Stopwatch st = new Stopwatch();
 //st.Start();
 QueryParser parser = null;// new QueryParser(Lucene.Net.Util.Version.LUCENE_30, field, analyzer);//一个字段查询
 parser = new MultiFieldQueryParser(version, fileds, analyzer);//多个字段查询
 Query query = parser.Parse(keyword);
 int n = 1000;
 IndexSearcher searcher = new IndexSearcher(directory_luce, true);//true-表示只读
 TopDocs docs = searcher.Search(query, (Filter)null, n);
 if (docs == null || docs.TotalHits == 0)
 {
 return null;
 }
 else
 {
 List<MySearchUnit> list = new List<MySearchUnit>();
 int counter = 1;
 foreach (ScoreDoc sd in docs.ScoreDocs)//遍历搜索到的结果
 {
 try
 {
 Document doc = searcher.Doc(sd.Doc);
 string id = doc.Get("id");
 string title = doc.Get("title");
 string content = doc.Get("content");
 string flag = doc.Get("flag");
 string imageurl = doc.Get("imageurl");
 string updatetime = doc.Get("updatetime");

string createdate = doc.Get("createdate");
 PanGu.HighLight.SimpleHTMLFormatter simpleHTMLFormatter = new PanGu.HighLight.SimpleHTMLFormatter("", "");
 PanGu.HighLight.Highlighter highlighter = new PanGu.HighLight.Highlighter(simpleHTMLFormatter, new PanGu.Segment());
 highlighter.FragmentSize = 50;
 content = highlighter.GetBestFragment(keyword, content);
 string titlehighlight = highlighter.GetBestFragment(keyword, title);
 if (titlehighlight != "") title = titlehighlight;
 list.Add(new MySearchUnit(id, title, content, flag,imageurl, updatetime));
 }
 catch (Exception ex)
 {
 Console.WriteLine(ex.Message);
 }
 counter++;
 }
 return list;
 }
 //st.Stop();
 //Response.Write("查询时间：" + st.ElapsedMilliseconds + " 毫秒 ");

}
#endregion

#region 在不同的分类下再根据title和content字段中查询数据(分页)
 /// <summary>
 /// 在不同的类型下再根据title和content字段中查询数据(分页)
 /// </summary>
 /// <param name="_flag">分类,传空值查询全部</param>
 /// <param name="keyword"></param>
 /// <param name="PageIndex"></param>
 /// <param name="PageSize"></param>
 /// <param name="TotalCount"></param>
 /// <returns></returns>
 public List<MySearchUnit> Search(string _flag,string keyword, int PageIndex, int PageSize, out int TotalCount)
 {
 if (PageIndex < 1) PageIndex = 1;
 //Stopwatch st = new Stopwatch();
 //st.Start();
 BooleanQuery bq = new BooleanQuery();
 if (_flag != "")
 {
 QueryParser qpflag = new QueryParser(version, "flag", analyzer);
 Query qflag = qpflag.Parse(_flag);
 bq.Add(qflag, Occur.MUST);//与运算
 }
 if (keyword != "")
 {
 string[] fileds = { "title", "content" };//查询字段
 QueryParser parser = null;// new QueryParser(version, field, analyzer);//一个字段查询
 parser = new MultiFieldQueryParser(version, fileds, analyzer);//多个字段查询
 Query queryKeyword = parser.Parse(keyword);
 bq.Add(queryKeyword, Occur.MUST);//与运算
 }

 TopScoreDocCollector collector = TopScoreDocCollector.Create(PageIndex * PageSize, false);
 IndexSearcher searcher = new IndexSearcher(directory_luce, true);//true-表示只读
 searcher.Search(bq, collector);
 if (collector == null || collector.TotalHits == 0)
 {
 TotalCount = 0;
 return null;
 }
 else
 {
 int start = PageSize * (PageIndex - 1);
 //结束数
 int limit = PageSize;
 ScoreDoc[] hits = collector.TopDocs(start, limit).ScoreDocs;
 List<MySearchUnit> list = new List<MySearchUnit>();
 int counter = 1;
 TotalCount = collector.TotalHits;
 foreach (ScoreDoc sd in hits)//遍历搜索到的结果
 {
 try
 {
 Document doc = searcher.Doc(sd.Doc);
 string id = doc.Get("id");
 string title = doc.Get("title");
 string content = doc.Get("content");
 string flag = doc.Get("flag");
 string imageurl = doc.Get("imageurl");
 string updatetime = doc.Get("updatetime");

PanGu.HighLight.SimpleHTMLFormatter simpleHTMLFormatter = new PanGu.HighLight.SimpleHTMLFormatter("", "");
 PanGu.HighLight.Highlighter highlighter = new PanGu.HighLight.Highlighter(simpleHTMLFormatter, new PanGu.Segment());
 highlighter.FragmentSize = 50;
 content = highlighter.GetBestFragment(keyword, content);
 string titlehighlight = highlighter.GetBestFragment(keyword, title);
 if (titlehighlight != "") title = titlehighlight;
 list.Add(new MySearchUnit(id, title, content, flag,imageurl, updatetime));
 }
 catch (Exception ex)
 {
 Console.WriteLine(ex.Message);
 }
 counter++;
 }
 return list;
 }
 //st.Stop();
 //Response.Write("查询时间：" + st.ElapsedMilliseconds + " 毫秒 ");

}
#endregion

#region 删除索引数据（根据id）
 /// <summary>
 /// 删除索引数据（根据id）
 /// </summary>
 /// <param name="id"></param>
 /// <returns></returns>
 public bool Delete(string id)
 {
 bool IsSuccess = false;
 Term term = new Term("id", id);
 //Analyzer analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
 //Version version = new Version();
 //MultiFieldQueryParser parser = new MultiFieldQueryParser(version, new string[] { "name", "job" }, analyzer);//多个字段查询
 //Query query = parser.Parse("小王");

//IndexReader reader = IndexReader.Open(directory_luce, false);
 //reader.DeleteDocuments(term);
 //Response.Write("删除记录结果： " + reader.HasDeletions + " ");
 //reader.Dispose();

IndexWriter writer = new IndexWriter(directory_luce, analyzer, false, IndexWriter.MaxFieldLength.LIMITED);
            writer.DeleteDocuments(term); // writer.DeleteDocuments(term)或者writer.DeleteDocuments(query);
            ////writer.DeleteAll();
            writer.Commit();
            //writer.Optimize();//
            IsSuccess = writer.HasDeletions();
            writer.Dispose();
            return IsSuccess;
        }
        #endregion

#region 删除全部索引数据
 /// <summary>
 /// 删除全部索引数据
 /// </summary>
 /// <returns></returns>
 public bool DeleteAll()
 {
 bool IsSuccess = true;
 try
 {
 IndexWriter writer = new IndexWriter(directory_luce, analyzer, false, IndexWriter.MaxFieldLength.LIMITED);
 writer.DeleteAll();
 writer.Commit();
 //writer.Optimize();//
 IsSuccess = writer.HasDeletions();
 writer.Dispose();
 }
 catch
 {
 IsSuccess = false;
 }
 return IsSuccess;
 }
 #endregion

#region directory_luce
 private Lucene.Net.Store.Directory _directory_luce = null;
 /// <summary>
 /// Lucene.Net的目录-参数
 /// </summary>
 public Lucene.Net.Store.Directory directory_luce
 {
 get
 {
 if (_directory_luce == null) _directory_luce = Lucene.Net.Store.FSDirectory.Open(directory);
 return _directory_luce;
 }
 }
 #endregion

#region directory
 private System.IO.DirectoryInfo _directory = null;
 /// <summary>
 /// 索引在硬盘上的目录
 /// </summary>
 public System.IO.DirectoryInfo directory
 {
 get
 {
 if (_directory == null)
 {
 string dirPath = AppDomain.CurrentDomain.BaseDirectory + "SearchIndex";
 if (System.IO.Directory.Exists(dirPath) == false) _directory = System.IO.Directory.CreateDirectory(dirPath);
 else _directory = new System.IO.DirectoryInfo(dirPath);
 }
 return _directory;
 }
 }
 #endregion

#region analyzer
 private Analyzer _analyzer = null;
 /// <summary>
 /// 分析器
 /// </summary>
 public Analyzer analyzer
 {
 get
 {
 //if (_analyzer == null)
 {
 _analyzer = new Lucene.Net.Analysis.PanGu.PanGuAnalyzer();//盘古分词分析器
 //_analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);//标准分析器
 }
 return _analyzer;
 }
 }
 #endregion

#region version
 private static Lucene.Net.Util.Version _version = Lucene.Net.Util.Version.LUCENE_30;
 /// <summary>
 /// 版本号枚举类
 /// </summary>
 public Lucene.Net.Util.Version version
 {
 get
 {
 return _version;
 }
 }
 #endregion
 }

#region 索引的一个行单元，相当于数据库中的一行数据
 /// <summary>
 /// 索引的一个行单元，相当于数据库中的一行数据
 /// </summary>
 public class MySearchUnit
 {
 public MySearchUnit(string _id, string _title, string _content, string _flag, string _imageurl, string _updatetime)
 {
 this.id = _id;
 this.title = _title;
 this.content = _content;
 this.flag = _flag;
 this.imageurl = _imageurl;
 this.updatetime = _updatetime;
 }
 /// <summary>
 /// 唯一的id号
 /// </summary>
 public string id { get; set; }
 /// <summary>
 /// 标题
 /// </summary>
 public string title { get; set; }
 /// <summary>
 /// 内容
 /// </summary>
 public string content { get; set; }
 /// <summary>
 /// 其他信息
 /// </summary>
 public string flag { get; set; }
 /// <summary>
 /// 图片路径
 /// </summary>
 public string imageurl { get; set; }
 /// <summary>
 /// 时间
 /// </summary>
 public string updatetime { get; set; }
 }
 #endregion
}

原文地址：http://blog.csdn.net/pukuimin1226/article/details/17558247

Lucene.Net+盘古分词->开发自己的搜索引擎的更多相关文章

【盘古分词】Lucene.Net 盘古分词实现公众号智能自动回复
盘古分词是一个基于 .net framework 的中英文分词组件.主要功能中文未登录词识别盘古分词可以对一些不在字典中的未登录词自动识别词频优先盘古分词可以根据词频来解决分词的歧义问题多元 ...
【原创】Lucene.Net+盘古分词器(详细介绍)
本章阅读概要 1.Lucenne.Net简介 2.介绍盘古分词器 3.Lucene.Net实例分析 4.结束语(Demo下载) Lucene.Net简介 Lucene.net是Lucene的.net移 ...
Lucene.Net+盘古分词器(详细介绍)(转)
出处:http://www.cnblogs.com/magicchaiy/archive/2013/06/07/LuceneNet%E7%9B%98%E5%8F%A4%E5%88%86%E8%AF%8 ...
Lucene.Net+盘古分词器(详细介绍)
本章阅读概要1.Lucenne.Net简介2.介绍盘古分词器3.Lucene.Net实例分析4.结束语(Demo下载)Lucene.Net简介 Lucene.net是Lucene的.net移植版本,是 ...
Lucene.Net+盘古分词
前言各位朋友,谢谢大家的支持,由于文件过大,有考虑到版权的问题,故没有提供下载,本人已建立一个搜索技术交流群:77570783,源代码已上传至群共享,需要的朋友,请自行下载! 首先自问自答几个问题, ...
完整的站内搜索Demo(Lucene.Net+盘古分词)
前言首先自问自答几个问题,以让各位看官了解写此文的目的什么是站内搜索?与一般搜索的区别? 很多网站都有搜索功能,很多都是用SQL语句的Like实现的,但是Like无法做到模糊匹配(例如我搜索“.n ...
使用Lucene.net+盘古分词实现搜索查询
这里我的的Demo的逻辑是这样的:首先我基本的数据是储存在Sql数据库中,然后我把我的必需的数据推送到MongoDB中,这样再去利用Lucene.net+盘古创建索引:其中为什么要这样把数据推送到Mo ...
完整的站内搜索实战应用(Lucene.Net+盘古分词)
首先自问自答几个问题,以让各位看官了解写此文的目的什么是站内搜索?与一般搜索的区别? 多网站都有搜索功能,很多都是用SQL语句的Like实现的,但是Like无法做到模糊匹配(例如我搜索". ...
Lucene.net 全文检索盘古分词
lucene.net + 盘古分词引用: 1.Lucene.Net.dll 2.PanGu.Lucene.Analyzer.dll 3.PanGu.HighLight.dll 4.PanGu.dll ...

随机推荐

MySQL 1045登录失败
当你登录MySQL数据库出现:Error 1045错误时(如下图),就表明你输入的用户名或密码错误被拒绝访问了,最简单的解决方法就是将MySQL数据库卸载然后重装,但这样的缺点就是就以前的数据库中的信 ...
ThinkPHP的RBAC
基于角色的访问控制(Role-Based Access Control) 在RBAC中,权限与角色相关联,用户通过成为适当角色的成员而得到这些角色的权限. ThinkPHP通过5张表实现权限控制 th ...
filter,map,reduce,lambda（python3）
1.filter filter(function,sequence) 对sequence中的item依次执行function(item),将执行的结果为True(符合函数判断)的item组成一个lis ...
【caffe】执行训练
@tags caffe 训练是在windows平台上. 主要是使用/caffe.exe,配合动作参数train,以及指定solver文件.e.g.: cd %caffe_root% %caffe_b ...
BZOJ1853 [Scoi2010]幸运数字
本文版权归ljh2000和博客园共有,欢迎转载,但须保留此声明,并给出原文链接,谢谢合作. 本文作者:ljh2000作者博客:http://www.cnblogs.com/ljh2000-jump/转 ...
SVN中(trunk tags branches)的使用理解
trunk--主干(永远都是最新的,每发布一个版本会在tags和branches上进行分支) tags-标签(只读,用于存放发布后的文件冻结,以及对应发布后版本的源文件:可以是来自主干或者分支的发布: ...
waf2控件名
1,查询表格(queryGrid),编辑表格(editGrid) wafGrid 2,快速F7 wafPromptQuick 3,表格F7 wafPromptGrid 4,自定义F7 wafPromp ...
xml解析模块
XML XML是可扩展标记语言的缩写,是实现不同语言或程序之间进行数据交换的协议,主要可以对key添加属性. 页面做展示(字符类型的一个xml格式数据)\做配置文件(内部xml格式的数据).,每一个节 ...
zip压缩与解压缩示例
范例: zip命令可以用来将文件压缩成为常用的zip格式.unzip命令则用来解压缩zip文件. 1. 我想把一个文件abc.txt和一个目录dir1压缩成为yasuo.zip: # zip -r y ...
（译）你应该知道的jQuery小技巧
帮助提高你jQuery应用的简单小技巧回到顶部按钮图片预加载判断图片是否加载完自动修补破损图像 Hover切换class类禁用输入停止正在加载的链接 toggle fade/slide 简 ...

Lucene.Net+盘古分词->开发自己的搜索引擎

Lucene.Net+盘古分词->开发自己的搜索引擎的更多相关文章

随机推荐

热门专题