1.创建一个简单的索引:

package lia.meetlucene;

import java.io.File;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.apache.lucene.analysis.standard.StandardAnalyzer; public class BasicIndexer {
public static void main(String[] args) throws java.io.IOException {
String indexDir = "C:/Users/Administrator/Desktop/xdj"; Directory dir = FSDirectory.open(new File(indexDir));
/*
* writer = new IndexWriter(dir, //3 创建Lucene Index Writer new
* StandardAnalyzer( //3 Version.LUCENE_30),//3 true, //3
* IndexWriter.MaxFieldLength.UNLIMITED); //3
*/
IndexWriter writer = new IndexWriter(dir, // 3 创建Lucene Index Writer
new StandardAnalyzer(Version.LUCENE_30),//
true, //
IndexWriter.MaxFieldLength.UNLIMITED); // 3 // Document
Document doc = new Document(); // Field -title
String title = "i love china";
Field field = new Field("title", title, Field.Store.YES,
Field.Index.ANALYZED);
// add field
doc.add(field); // Field -content
String content = "i love you, my mother land! ";
field = new Field("content", content, Field.Store.YES,
Field.Index.ANALYZED);
// add field
doc.add(field); // add document
writer.addDocument(doc); // close IndexWriter
writer.close(); // message
System.out.println("Index Created!");
}
}

2.创建一个复杂点的索引:

package lia.meetlucene;

import java.io.File;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.apache.lucene.analysis.standard.StandardAnalyzer; public class BasicIndexer {
public static void main(String[] args) throws java.io.IOException {
String indexDir = "C:/Users/Administrator/Desktop/xdj"; Directory dir = FSDirectory.open(new File(indexDir));
/*
* writer = new IndexWriter(dir, //3 创建Lucene Index Writer new
* StandardAnalyzer( //3 Version.LUCENE_30),//3 true, //3
* IndexWriter.MaxFieldLength.UNLIMITED); //3
*/
IndexWriter writer = new IndexWriter(dir, // 3 创建Lucene Index Writer
new StandardAnalyzer(Version.LUCENE_30),//
true, //
IndexWriter.MaxFieldLength.UNLIMITED); // 3 // 创建Document--1
Document doc = new Document(); // 创建Field -title
String title = "i love china";
Field field = new Field("title", title, Field.Store.YES,
Field.Index.NOT_ANALYZED);
// 添加add field
doc.add(field); // 创建Field -content
String content = "i love you, my mother land! ";
field = new Field("content", content, Field.Store.YES,
Field.Index.NOT_ANALYZED);
// 添加add field
doc.add(field); // 创建Field -time
String time = "2007-05-31";
field = new Field("time", time, Field.Store.YES, Field.Index.NO);
// 创建add field
doc.add(field); // 添加add document
writer.addDocument(doc); // 创建Document--2
doc = new Document(); // 创建Field -title
title = "i love mom";
field = new Field("title", title, Field.Store.YES,
Field.Index.NOT_ANALYZED);
// add field
doc.add(field); // 创建Field -content
content = "i love you, my mother! ";
field = new Field("content", content, Field.Store.YES,
Field.Index.NOT_ANALYZED);
// 添加add field
doc.add(field); // 创建Field -time
time = "2007-05-31";
field = new Field("time", time, Field.Store.YES, Field.Index.NO);
// 添加add field
doc.add(field); // 添加add document
writer.addDocument(doc); // 创建Document--3
doc = new Document(); // 创建Field -title
title = "i love xiaoyue";
field = new Field("title", title, Field.Store.YES,
Field.Index.NOT_ANALYZED);
// 添加add field
doc.add(field); // 创建Field -content
content = "i love you, my wife! ";
field = new Field("content", content, Field.Store.YES,
Field.Index.NOT_ANALYZED);
// 添加add field
doc.add(field); // 创建Field -time
time = "2007-05-31";
field = new Field("time", time, Field.Store.YES, Field.Index.NO);
// add field
doc.add(field); // 添加add document
writer.addDocument(doc); // 关闭close IndexWriter
writer.close(); // 提示message
System.out.println("Index Three Created!");
}
}

3.文件创建一个索引

package lia.meetlucene;

import java.io.File;
import java.io.FileReader; import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.apache.lucene.analysis.standard.StandardAnalyzer; public class BasicIndexer {
public static void main(String[] args) throws java.io.IOException {
String indexDir = "C:/Users/Administrator/Desktop/xdj"; Directory dir = FSDirectory.open(new File(indexDir));
/*
* writer = new IndexWriter(dir, //3 创建Lucene Index Writer new
* StandardAnalyzer( //3 Version.LUCENE_30),//3 true, //3
* IndexWriter.MaxFieldLength.UNLIMITED); //3
*/
IndexWriter writer = new IndexWriter(dir, // 3 创建Lucene Index Writer
new StandardAnalyzer(Version.LUCENE_30),//
true, //
IndexWriter.MaxFieldLength.UNLIMITED); // 3 // 创建Document
Document doc = new Document();
File f = new File(
"E:/xdj/tengxun/a_______________mm/2014-02-19 06.59.53.xml"); // 创建Field -name
String name = f.getName();
Field field = new Field("name", name, Field.Store.YES,
Field.Index.NOT_ANALYZED);
// 添加add field
doc.add(field); // 创建Field -content
field = new Field("content", new FileReader(f)); // FileText.getText(f);
// add field
doc.add(field); // 创建Field -path
String path = f.getPath();
field = new Field("path", path, Field.Store.YES, Field.Index.NO);
// 添加add field
doc.add(field); // 添加add document
writer.addDocument(doc); // 创建**************************************************************/
doc = new Document();
f = new File(
"E:/xdj/tengxun/a_______________mm/2014-02-04 11.43.01.xml"); // 创建Field -name
name = f.getName();
field = new Field("name", name, Field.Store.YES,
Field.Index.NOT_ANALYZED);
// add field
doc.add(field); // 创建Field -content
field = new Field("content", new FileReader(f));
// 添加add field
doc.add(field); // 创建Field -path
path = f.getPath();
field = new Field("path", path, Field.Store.YES, Field.Index.NO);
// 添加add field
doc.add(field); // 添加add document
writer.addDocument(doc); // 关闭close IndexWriter
writer.close(); // 提示message
System.out.println("File Index Created!");
}
}

4.某个文件夹的所有文件创建索引

package lia.meetlucene;

/**
* Copyright Manning Publications Co.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific lan
*/ import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Version; import java.io.File;
import java.io.FileFilter;
import java.io.IOException;
import java.io.FileReader; // From chapter 1 /**
* This code was originally written for Erik's Lucene intro java.net article
*/
public class Indexer { public static void main(String[] args) throws Exception {
// args = new String[2];
// args[0] = "E:/xiaodajun/new/lia2e/src/lia/meetlucene";
// args[1] =
// "E:/xiaodajun/new/lia2e/src/lia/meetlucene/data";//"src/lia/meetlucene/data";
// C:/Users/Administrator/Desktop/xdj/data
if (args.length != 2) {
throw new IllegalArgumentException("Usage: java "
+ Indexer.class.getName() + " <index dir> <data dir>");
}
// String indexDir = args[0]; // 1
// String dataDir = args[1]; // String indexDir = "C:/Users/Administrator/Desktop/xdj/suoyin";
String dataDir = "C:/Users/Administrator/Desktop/xdj/data"; // String indexDir =
// "E:/xdj/tengxun";//"C:/Users/Administrator/Desktop/xdj/suoyin";
// String dataDir =
// "E:/xdj/tengxunsuoying";//"C:/Users/Administrator/Desktop/xdj/weibohanzi"; long start = System.currentTimeMillis();
// ///////////////////////////////////////////////////////////////////////////////////////////
Indexer indexer = new Indexer(indexDir);
int numIndexed;
try {
numIndexed = indexer.index(dataDir, new TextFilesFilter());
} finally {
indexer.close();
}
long end = System.currentTimeMillis();
// /////////////////////////////////////////////////////////////////////////////////////////////
System.out.println("Indexing " + numIndexed + " files took "
+ (end - start) + " milliseconds");
} private IndexWriter writer; public Indexer(String indexDir) throws IOException {
Directory dir = FSDirectory.open(new File(indexDir)); /*
* writer = new IndexWriter(dir, //3 创建Lucene Index Writer new
* StandardAnalyzer( //3 Version.LUCENE_30),//3 true, //3
* IndexWriter.MaxFieldLength.UNLIMITED); //3
*/
writer = new IndexWriter(dir, // 3 创建Lucene Index Writer
new SmartChineseAnalyzer(Version.LUCENE_20),//
true, //
IndexWriter.MaxFieldLength.UNLIMITED); //
} public void close() throws IOException {
writer.close(); // 4 关闭Lucene Index Writer
} public int index(String dataDir, FileFilter filter) throws Exception { File[] files = new File(dataDir).listFiles(); for (File f : files) {
if (!f.isDirectory() && !f.isHidden() && f.exists() && f.canRead()
&& (filter == null || filter.accept(f))) {
indexFile(f);
}
} return writer.numDocs(); // 5返沪被索引文档数
} private static class TextFilesFilter implements FileFilter {
public boolean accept(File path) {
return path.getName().toLowerCase() // 6只索引.txt文件,采用FileFilter
.endsWith(".txt"); //
}
} protected Document getDocument(File f) throws Exception {
Document doc = new Document();
doc.add(new Field("contents", new FileReader(f))); // 7索引文件内容
doc.add(new Field("filename", f.getName(), // 8索引文件名
Field.Store.YES, Field.Index.NOT_ANALYZED));//
doc.add(new Field("fullpath", f.getCanonicalPath(), // 9索引文件完整路径
Field.Store.YES, Field.Index.NOT_ANALYZED));//
return doc;
} // Store.是否存储 yes no compress(压缩之后再存)
// Index。是否进行索引 Index.ANALYZED 分词后进行索引,NOT_ANALYZED 不索引,NOT_ANALYZED 不分词直接索引 private void indexFile(File f) throws Exception {
System.out.println("Indexing " + f.getCanonicalPath());
Document doc = getDocument(f);
writer.addDocument(doc); // 10向Lucene索引中添加文档
}
} /*
* #1 Create index in this directory #2 Index *.txt files from this directory #3
* Create Lucene IndexWriter #4 Close IndexWriter #5 Return number of documents
* indexed #6 Index .txt files only, using FileFilter #7 Index file content #8
* Index file name #9 Index file full path #10 Add document to Lucene index
*/

5.<Lucene in action>第二版索引demo

package lia.meetlucene;

/**
* Copyright Manning Publications Co.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific lan
*/ import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Version; import java.io.File;
import java.io.FileFilter;
import java.io.IOException;
import java.io.FileReader; // From chapter 1 /**
* This code was originally written for Erik's Lucene intro java.net article
*/
public class Indexer { public static void main(String[] args) throws Exception {
// args = new String[2];
// args[0] = "E:/xiaodajun/new/lia2e/src/lia/meetlucene";
// args[1] =
// "E:/xiaodajun/new/lia2e/src/lia/meetlucene/data";//"src/lia/meetlucene/data";
// C:/Users/Administrator/Desktop/xdj/data
if (args.length != 2) {
throw new IllegalArgumentException("Usage: java "
+ Indexer.class.getName() + " <index dir> <data dir>");
}
// String indexDir = args[0]; // 1
// String dataDir = args[1]; // 2 // String indexDir = "C:/Users/Administrator/Desktop/xdj/suoyin";
// String dataDir = "C:/Users/Administrator/Desktop/xdj/data"; String indexDir = "C:/Users/Administrator/Desktop/xdj/suoyin";
String dataDir = "C:/Users/Administrator/Desktop/xdj/tengxun/A__Vae"; long start = System.currentTimeMillis();
// ///////////////////////////////////////////////////////////////////////////////////////////
Indexer indexer = new Indexer(indexDir);
int numIndexed;
try {
numIndexed = indexer.index(dataDir, new TextFilesFilter());
} finally {
indexer.close();
}
long end = System.currentTimeMillis();
// /////////////////////////////////////////////////////////////////////////////////////////////
System.out.println("Indexing " + numIndexed + " files took "
+ (end - start) + " milliseconds");
} private IndexWriter writer; public Indexer(String indexDir) throws IOException {
Directory dir = FSDirectory.open(new File(indexDir)); /*
* writer = new IndexWriter(dir, //3 创建Lucene Index Writer new
* StandardAnalyzer( //3 Version.LUCENE_30),//3 true, //3
* IndexWriter.MaxFieldLength.UNLIMITED); //3
*/
writer = new IndexWriter(dir, // 3 创建Lucene Index Writer
new SmartChineseAnalyzer(Version.LUCENE_20),// 3
// new StandardAnalyzer(Version.LUCENE_30),
true, //
IndexWriter.MaxFieldLength.UNLIMITED); //
} public void close() throws IOException {
writer.close(); // 4 关闭Lucene Index Writer
} public int index(String dataDir, FileFilter filter) throws Exception { File[] files = new File(dataDir).listFiles(); for (File f : files) {
if (!f.isDirectory() && !f.isHidden() && f.exists() && f.canRead()
&& (filter == null || filter.accept(f))) { indexFile(f);
}
} return writer.numDocs(); // 5返沪被索引文档数
} private static class TextFilesFilter implements FileFilter {
public boolean accept(File path) {
return path.getName().toLowerCase() // 6只索引.txt文件,采用FileFilter
.endsWith(".xml"); //
}
} protected Document getDocument(File f) throws Exception {
Document doc = new Document();
doc.add(new Field("contents", new FileReader(f))); // 7索引文件内容
doc.add(new Field("filename", f.getName(), // 8索引文件名
Field.Store.YES, Field.Index.NOT_ANALYZED));//
doc.add(new Field("fullpath", f.getCanonicalPath(), // 9索引文件完整路径
Field.Store.YES, Field.Index.NOT_ANALYZED));//
return doc;
} // Store.是否存储 yes no compress(压缩之后再存)
// Index。是否进行索引 Index.ANALYZED 分词后进行索引,NOT_ANALYZED 不索引,NOT_ANALYZED 不分词直接索引 private void indexFile(File f) throws Exception {
System.out.println("Indexing " + f.getCanonicalPath());
Document doc = getDocument(f);
writer.addDocument(doc); // 10向Lucene索引中添加文档
}
} /*
* #1 Create index in this directory #2 Index *.txt files from this directory #3
* Create Lucene IndexWriter #4 Close IndexWriter #5 Return number of documents
* indexed #6 Index .txt files only, using FileFilter #7 Index file content #8
* Index file name #9 Index file full path #10 Add document to Lucene index
*/

lucene 建立索引的不同方式的更多相关文章

  1. Lucene建立索引搜索入门实例

                                第一部分:Lucene建立索引 Lucene建立索引主要有以下两步:第一步:建立索引器第二步:添加索引文件准备在f盘建立lucene文件夹,然后 ...

  2. 【转】Lucene不同版本中Field的Keyword、UnIndex,导致lucene 建立索引总是报错 急!!

    lucene 建立索引 总是报错 急!! http://zhidao.baidu.com/link?url=iaVs9JH4DfN6iwaWImt7VMJENWCWGGaWFGPjqhUw_jz7Fs ...

  3. lucene 建立索引的过程

    时间 -- ::  CSDN博客 原文 http://blog.csdn.net/caohaicheng/article/details/ 看lucene主页(http://lucene.apach ...

  4. Lucene4.9学习笔记——Lucene建立索引

    基本上创建索引需要三个步骤: 1.创建索引库IndexWriter对象 2.根据文件创建文档Document 3.向索引库中写入文档内容 这其中主要涉及到了IndexWriter(索引的核心组件,用于 ...

  5. lucene建立索引的过程

    建立索引过程 用户提交数据=>solr建立索引=>调用lucene包建立索引 官方建立索引和查询索引的例子如下: http://lucene.apache.org/core/4_10_3/ ...

  6. html抽取文本信息-java版(适合lucene建立索引)

    import org.htmlparser.NodeFilter; import org.htmlparser.Parser; import org.htmlparser.beans.StringBe ...

  7. 利用Lucene将被索引文件目录中的所有文件建立索引

    1.新建两个文件夹htm和index,其中htm中存放被索引的文件,index文件中存放建立的索引文件. 2.新建解析目录中所有文件的类,用来解析指定目录下的所有文件. import java.io. ...

  8. Lucene实现索引和查询

    0引言 随着万维网的发展和大数据时代的到来,每天都有大量的数字化信息在生产.存储.传递和转化,如何从大量的信息中以一定的方式找到满足自己需求的信息,使之有序化并加以利用成为一大难题.全文检索技术是现如 ...

  9. 和我一起打造个简单搜索之Logstash实时同步建立索引

    用过 Solr 的朋友都知道,Solr 可以直接在配置文件中配置数据库连接从而完成索引的同步创建,但是 ElasticSearch 本身并不具备这样的功能,那如何建立索引呢?方法其实很多,可以使用 J ...

随机推荐

  1. codeforces B. New Year Present 解题报告

    题目链接:http://codeforces.com/contest/379/problem/B 题目意思:给定一个有n个钱包的序列,其中第i个钱包需要投入ai个钱币,需要编写一个程序,使得在对第i个 ...

  2. Codeforces7C 扩展欧几里得

    Line Time Limit: 1000MS   Memory Limit: 262144KB   64bit IO Format: %I64d & %I64u Submit Status ...

  3. [Android Pro] sqlite数据库的char,varchar,text,nchar,nvarchar,ntext的区别

    reference : http://blog.csdn.net/xingfeng0501/article/details/7817121 1.CHAR.CHAR存储定长数据很方便,CHAR字段上的索 ...

  4. Android procrank , showmap 内存分析

    (一)DDMS 的Heap Dump 1) Data Object:java object. 2) Class Object:object of type Class, e.g. what you'd ...

  5. 神经网络(luogu 1038 答案错误,出题人语体教)

    题目背景 人工神经网络(Artificial Neural Network)是一种新兴的具有自我学习能力的计算系统,在模式识别.函数逼近及贷款风险评估等诸多领域有广泛的应用.对神经网络的研究一直是当今 ...

  6. 谈JavaScript代码封装

    前言 也算老生常谈的问题了,再深入搞一搞怎么玩儿封装,如果看到这篇文章的你,正好你也是追求完美的代码洁癖狂者,那么这篇文章相信非常适合你. 举一个例子,编写一个Person类,具有name和birth ...

  7. 说说localStorage

    HTML5的本地存储是大势所趋,如果仅存储在内存中,则是sessionStorage,他们的语法都是一样,仅仅是一个存储在本地文件系统中,另一个存储在内存中(随着浏览器的关闭而消失),其语句如下: l ...

  8. 分布式架构从零开始========》【基于Java自身技术实现消息方式的系统间通信】

    基于Java自身包实现消息方式的系统间通信的方式有:TCP/IP+BIO,TCP/IP+NIO,UDP/IP+BIO,UDP/IP+NIO.下面就这4种类型一一做个详细的介绍: 一.TCP/IP+BI ...

  9. C++ friend

    如果类A希望类B可以访问它的私有成员, 可以把类B设置为友元类. // 类A,希望把私有成员公开给类B class A {     friend class B;// 把B设置为友元类 public: ...

  10. [转]有关USES_CONVERSION

    转自:http://blog.csdn.net/p40614021/article/details/6778100 ATL:转换宏是各种字符编码之间进行转换的一种很方便的方式,在函数调用时,它们显得非 ...