lucene4.4 索引的增删改查

package com.lucene.test;

import java.io.File;

import java.io.FileReader;

import java.io.IOException;

import java.util.Date;

import org.apache.log4j.Logger;

import org.apache.lucene.analysis.standard.StandardAnalyzer;

import org.apache.lucene.document.Document;

import org.apache.lucene.document.Field.Store;

import org.apache.lucene.document.IntField;

import org.apache.lucene.document.StringField;

import org.apache.lucene.document.TextField;

import org.apache.lucene.index.DirectoryReader;

import org.apache.lucene.index.DocsAndPositionsEnum;

import org.apache.lucene.index.Fields;

import org.apache.lucene.index.IndexWriter;

import org.apache.lucene.index.IndexWriterConfig;

import org.apache.lucene.index.MultiFields;

import org.apache.lucene.index.Term;

import org.apache.lucene.index.Terms;

import org.apache.lucene.index.TermsEnum;

import org.apache.lucene.queryparser.classic.ParseException;

import org.apache.lucene.queryparser.classic.QueryParser;

import org.apache.lucene.search.DocIdSetIterator;

import org.apache.lucene.search.IndexSearcher;

import org.apache.lucene.search.Query;

import org.apache.lucene.search.ScoreDoc;

import org.apache.lucene.search.TopDocs;

import org.apache.lucene.store.Directory;

import org.apache.lucene.store.FSDirectory;

import org.apache.lucene.util.BytesRef;

import org.apache.lucene.util.Version;

publicclass IndexUtil {

privatestaticfinal Logger LOGGER = Logger.getLogger(IndexUtil.class);

private Directory directory = null;

private DirectoryReader reader = null;

private IndexWriterConfig config = null;

private IndexWriter writer = null;

publicstaticfinal IndexUtil Instance = new IndexUtil();

private IndexUtil() {

try {

directory = FSDirectory.open(new File("D:/lucene/index"));

config = new IndexWriterConfig(Version.LUCENE_44,

new StandardAnalyzer(Version.LUCENE_44));

} catch (IOException e) {

e.printStackTrace();

}

/**

*添加索引

*@throwsIOException

publicvoid index() throws IOException {

writer = new IndexWriter(directory, config);

File file = new File("D:\\lucene\\example");

Document document = null;

int id = 0;

long start = new Date().getTime();

LOGGER.info("添加索引…………………………");

for (File f : file.listFiles()) {

document = new Document();

document.add(new StringField("name",f.getName(), Store.YES));

document.add(new IntField("id", id++,Store.YES));

document.add(new StringField("path",f.getAbsolutePath(), Store.YES));

document.add(new TextField("context", new FileReader(f)));

writer.addDocument(document);

}

long end = new Date().getTime();

LOGGER.info("添加索引完成，用时：" + (end - start) / 1000.0 + "s…………………………");

writer.close();

}

/**

*查询索引

*@throwsIOException

*@throwsParseException

publicvoid search() throws IOException, ParseException {

reader = DirectoryReader.open(directory);

QueryParser parser = newQueryParser(Version.LUCENE_44, "context",

new StandardAnalyzer(Version.LUCENE_44));

Query query = parser.parse("lucene");

IndexSearcher searcher = new IndexSearcher(reader);

TopDocs docs = searcher.search(query,100);

/**

*reader.maxDoc()包含索引文档的总数包含可用的和已经删除的数量

*reader.numDocs()当前可用的索引文档的数量不包含已经删除的

*reader.numDeletedDocs()删除的索引文档的数量

LOGGER.info("总记录：" + docs.totalHits + " 命中文档数：" + docs.scoreDocs.length

+ " 最大的文档数maxDoc：" + reader.maxDoc() + " 删除文件数numDeletedDocs："

+ reader.numDeletedDocs() + " numDocs" + reader.numDocs());

for (ScoreDoc doc : docs.scoreDocs) {

Document document = reader.document(doc.doc);

LOGGER.info("id:" +document.get("id") + " name:"

+ document.get("name") + " path:" + document.get("path"));

}

reader.close();

}

/**

*更新索引

*@throwsIOException

publicvoid update() throws IOException {

writer = new IndexWriter(directory, config);

Document document = new Document();

document.add(new StringField("name", "新文件", Store.YES));

document.add(new IntField("id", 12, Store.YES));

document.add(new StringField("path", "D:\\lucene\\example\\新文件.txt", Store.YES));

writer.updateDocument(new Term("id", "2"),document);

writer.commit();

writer.close();

}

/**

*删除索引删除的索引会保存到一个新的文件中（以del为结尾的文件相当于删除到回收站）

*@throwsIOException

publicvoid delete() throws IOException {

writer = new IndexWriter(directory, config);

writer.deleteDocuments(new Term("name", "11.txt"));

writer.close();

}

/**

*删除所有的索引删除的索引会保存到一个新的文件中（以del为结尾的文件相当于删除到回收站）

*@throwsIOException

publicvoid deleteAll() throws IOException {

writer = new IndexWriter(directory, config);

writer.deleteAll();

writer.close();

}

/**

*删除已经删除的索引对应上一个删除方法删除回收站的文件

*@throwsIOException

publicvoid forceMergeDeletes() throws IOException {

writer = new IndexWriter(directory, config);

writer.forceMergeDeletes();// 清空回收站

writer.close();

}

/**

*显示所有的索引

*@throwsIOException

publicvoid showIndex() throws IOException {

reader = DirectoryReader.open(directory);

Fields fields = MultiFields.getFields(reader); //获取directory中所有的field

for (String field : fields) {

LOGGER.info(field);

}

//显示 field 中 context的所有的分词

Terms terms = fields.terms("context");

TermsEnum termsEnum = terms.iterator(null);

BytesRef term = null;

while ((term=termsEnum.next()) !=null) {

System.out.print(term.utf8ToString()+"\t");//分词的内容

System.out.print(termsEnum.docFreq()+"\t");//出现该分词的有文档的数量

System.out.print(termsEnum.totalTermFreq()+"\t");//分词的总数

DocsAndPositionsEnumdocsAndPositionsEnum = termsEnum.docsAndPositions(null, null);

//如果要查询的字段没有被分词，docsAndPositionsEnum就会为空继续循环

if(docsAndPositionsEnum==null){

continue;

}

int docId ;

while ((docId = docsAndPositionsEnum.nextDoc())!= DocIdSetIterator.NO_MORE_DOCS) {

Document document = reader.document(docId);//获取document对象

System.out.print(docId+"\t");//分词的总数

System.out.print(document.get("name")+"\t");//可以获取document中field的值

int freq = docsAndPositionsEnum.freq();//该document中该分词出现的次数

for (int i = 0; i < freq; i++) {

System.out.print(docsAndPositionsEnum.nextPosition()+":"); //分词的位置

System.out.print("["+docsAndPositionsEnum.startOffset()+"");//分词起始偏移量的位置

System.out.print(docsAndPositionsEnum.endOffset()+"],");//分词结束偏移量的位置

System.out.print(docsAndPositionsEnum.getPayload()+"\t");

}

System.out.println();

}

reader.close();

}

lucene4.4 索引的增删改查的更多相关文章

elasticsearch索引的增删改查入门
为了方便直观我们使用Head插件提供的接口进行演示,实际上内部调用的RESTful接口. RESTful接口URL的格式: http://localhost:9200/<index>/&l ...
列表(索引切片增删改查嵌套) range 元组的初识
li = ["alex", "WuSir", "ritian", "barry", "wenzhou" ...
lucene_03_索引的增删改查
lucene索引的添加见 http://www.cnblogs.com/getchen/p/8615276.html 入门代码. 公共代码 public <T extends Query> ...
Java solr 索引数据增删改查
具体代码如下: import java.io.IOException; import java.util.*; import org.apache.solr.client.solrj.SolrClie ...
elasticsearch java索引的增删改查
1.创建索引并插入数据 Map<String, Object> json = new HashMap<String, Object>(); json.put("use ...
Elasticsearch 索引文档的增删改查
利用Elasticsearch-head可以在界面上(http://127.0.0.1:9100/)对索引进行增删改查 1.RESTful接口使用方法为了方便直观我们使用Head插件提供的接口进行演 ...
Es图形化软件使用之ElasticSearch-head、Kibana，Elasticsearch之-倒排索引操作、映射管理、文档增删改查
今日内容概要 ElasticSearch之-ElasticSearch-head ElasticSearch之-安装Kibana Elasticsearch之-倒排索引 Elasticsearch之- ...
分布式搜索elasticsearch 索引文档的增删改查入门
1.RESTful接口使用方法为了方便直观我们使用Head插件提供的接口进行演示,实际上内部调用的RESTful接口. RESTful接口URL的格式: http://localhost:9200/ ...
mongoDB 学习笔记纯干货（mongoose、增删改查、聚合、索引、连接、备份与恢复、监控等等）
最后更新时间:2017-07-13 11:10:49 原始文章链接:http://www.lovebxm.com/2017/07/13/mongodb_primer/ MongoDB - 简介官网: ...

随机推荐

Flex中怎么给表格中的滚动栏定位
1.问题背景假设表格中的字段过多,会出现滚动栏,在将滚动栏滚到一定的位置时,又一次刷新表格.滚动栏会回到原处,原来查看的字段还得继续滚动,才干查看到. 2.实现实例 <? xml versio ...
设计模式 - 命令模式(command pattern) 宏命令(macro command) 具体解释
命令模式(command pattern) 宏命令(macro command) 具体解释本文地址: http://blog.csdn.net/caroline_wendy 參考: 命名模式(撤销) ...
Eclipse一个打开文件夹目录插件——Open In Explorer
MyEclipse开发的都常用到其中一个"Open In Explorer"的小插件,可以直接在Windows资源管理器中打开选中文件所在的目录,工具虽小,但我们经常需要用到它由 ...
查询PO的预付款剩余金额
FUNCTION zrfc_mm016. *"---------------------------------------------------------------------- * ...
VC 为静态控件添加事件
操作系统:Windows 7软件环境:Visual C++ 2008 SP1本次目的:为静态控件添加事件,如:STATIC.Picture Control等等有时候我们找到一个图片,为对话框背景添加 ...
PHP $_SERVER
$_SERVER 是一个包括了诸如头信息(header).路径(path).以及脚本位置(script locations)等等信息的数组.这个数组中的项目由 Web server创建.不能保证每一个 ...
Oracle_Database_11g_标准版_企业版__下载地址_详细列表
Oracle_Database_11g_标准版_企业版__下载地址_详细列表 Oracle Database 11g Release 2 Standard Edition and Enterprise ...
（1）前言——（10）jquery项目的历史（History of the jQuery project）
This book covers the functionality and syntax of jQuery 1.6.x, the latest version at the time of wri ...
APNS 那些事！
之前在消息推送中间件APush里实现了对APNS的桥接.并利用业余时间阅读了官方指南Local and Push Notification Programming Guide.蛮有心得的.稍作总结.分 ...
c#后台弹出提示
Page.ClientScript.RegisterClientScriptBlock(typeof(string), "", @"<script>alert ...

lucene4.4 索引的增删改查

lucene4.4 索引的增删改查的更多相关文章

随机推荐

热门专题