lucene中创建索引库

package com.hope.lucene;

import org.apache.commons.io.FileUtils;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.*;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.junit.Test;

import java.io.File;

/**
 * @author newcityman
 * @date 2020/1/15 - 0:01
 */
public class LuceneFirst {

    /**
     * 创建索引
     * @throws Exception
     */
    @Test
    public void createIndex() throws  Exception{
        //1、创建一个Director对象，指定索引库保存的位置
        //把索引库保存到磁盘
        Directory directory = FSDirectory.open(new File("G:\\workspace_idea3\\lucene\\temp\\index").toPath());
        //2、基于Directory对象，创建一个IndexWriter对象
        IndexWriter indexWriter = new IndexWriter(directory,new IndexWriterConfig());
        //3、读取磁盘上的文件，对应每个文件创建一个文档对象
        File file = new File("G:\\workspace_idea3\\lucene\\temp\\searchsource");
        File[] files = file.listFiles();
        for (File f : files) {
            //取文件名
            String fileName = f.getName();
            //取文件路径
            String filePath = f.getPath();
            //取文件内容
            String fileContent = FileUtils.readFileToString(f, "utf-8");
            //文件大小
            long fileSize = FileUtils.sizeOf(f);

            //创建Field
            TextField fieldName = new TextField("name", fileName, Field.Store.YES);
            TextField fieldPath = new TextField("path", filePath, Field.Store.YES);
            TextField fieldContent = new TextField("content", fileContent, Field.Store.YES);
            TextField fieldSize = new TextField("size", fileSize+"", Field.Store.YES);

            //4、向文档对象中添加Field
            //创建文档
            Document document = new Document();
            document.add(fieldName);
            document.add(fieldPath);
            document.add(fieldContent);
            document.add(fieldSize);
            //5、把文档对象写入到索引库中
            indexWriter.addDocument(document);
        }
        //6、关闭indexWriter对象
            indexWriter.close();
    }

    /**
     * 查询索引
     * @throws Exception
     */
    @Test
    public void  searchIndex() throws  Exception{
        //1、创建一个Directory对象，指定索引库位置
        Directory directory = FSDirectory.open(new File("G:\\workspace_idea3\\lucene\\temp\\index").toPath());
        //2、创建IndexReader对象
        IndexReader indexReader = DirectoryReader.open(directory);
        //3、创建IndexSearch对象
        IndexSearcher indexSearcher = new IndexSearcher(indexReader);
        //4、创建Query对象，TermQuery对象
        TermQuery termQuery = new TermQuery(new Term("content", "spring"));
        //5、执行查询TopDocs
        //参数1：查询对象   参数2：查询结果返回的最大记录数
        TopDocs topDocs = indexSearcher.search(termQuery, 10);
        //6、取查询结果的总记录数
        System.out.println("查询总记录数："+topDocs.totalHits);
        //7、取文档列表
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        //8、打印文档内容
        for (ScoreDoc scoreDoc : scoreDocs) {
            //取文档id
            int docId = scoreDoc.doc;
            //根据id取文档对象
            Document document = indexSearcher.doc(docId);
            System.out.println(document.get("name"));
            System.out.println(document.get("path"));
            System.out.println(document.get("size"));
           // System.out.println(document.get("content"));
            System.out.println("++++++++++++++++++++++++++++++");
        }
        //9、关闭IndexReader对象
       indexReader.close();
    }

/**
 * 查询标准分词器的分词效果
 *
 * @throws Exception
 */
@Test
public void testTokenStream() throws Exception {
    // 1、创建一个Analyzer对象，StrandAnalyzer对象
    Analyzer analyzer = new StandardAnalyzer();
    // 2、使用分词器对象的tokenStream方法获取一个TokenStream对象
    TokenStream tokenStream = analyzer.tokenStream("", "Learn how to create a web page with Spring MVC.");
    // 3、向TokenStream对象中设置一个引用，相当于一个指针
    CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
    // 4、调用TokenStream对象的reset方法，如果不调用会抛异常
    tokenStream.reset();
    // 5、使用while循环遍历TokenStream对象
    while (tokenStream.incrementToken()) {
        System.out.println(charTermAttribute.toString());
    }
    // 6、关闭TokenStream对象
    tokenStream.close();
}

lucene中创建索引库的更多相关文章

lucene简介创建索引和搜索初步
lucene简介创建索引和搜索初步一.什么是Lucene? Lucene最初是由Doug Cutting开发的,2000年3月,发布第一个版本,是一个全文检索引擎的架构,提供了完整的查询引擎和索引 ...
HBase中创建索引
hbasene(https://github.com/akkumar/hbasene)是开源项目,在hbase存储上封装使用Lucene来创建索引,代码API非常简单,熟悉lucene的朋友可以很方便 ...
lucene入门创建索引——（二）
1.程序宏观结构图
在Linux中创建静态库.a和动态库.so
转自:http://www.cnblogs.com/laojie4321/archive/2012/03/28/2421056.html 在Linux中创建静态库.a和动态库.so 我们通常把一些公用 ...
在Linux中创建静态库和动态库
我们通常把一些公用函数制作成函数库,供其它程序使用. 函数库分为静态库和动态库两种. 静态库在程序编译时会被连接到目标代码中,程序运行时将不再需要该静态库. 动态库在程序编译时并不会被连接到目标代码中 ...
在Linux中创建静态库和动态库（转）
我们通常把一些公用函数制作成函数库,供其它程序使用.函数库分为静态库和动态库两种.静态库在程序编译时会被连接到目标代码中,程序运行时将不再需要该静态库.动态库在程序编译时并不会被连接到目标代码中,而 ...
elasticsearch kabana中创建索引
在kabana中创建索引和索引类型语法 PUT clockin{ "mappings": { "time": { } }} 查询索引下的所有数据 GET clo ...
Elasticsearch之curl创建索引库
关于curl的介绍,请移步 Elasticsearch学习概念之curl 启动es,请移步 Elasticsearch的前后台运行与停止(tar包方式) Elasticsearch的前后台运行与停止( ...
Elasticsearch之curl创建索引库和索引时注意事项
前提, Elasticsearch之curl创建索引库 Elasticsearch之curl创建索引注意事项 1.索引库名称必须要全部小写,不能以下划线开头,也不能包含逗号 2.如果没有明确指定索引 ...

随机推荐

SpringCloud升级之路2020.0.x版-31. FeignClient 实现断路器以及线程隔离限流的思路
本系列代码地址:https://github.com/JoJoTec/spring-cloud-parent 在前面一节,我们实现了 FeignClient 粘合 resilience4j 的 Ret ...
Are we ready for learned cardinality estimation?
Are we ready for learned Cardinality Estimation 摘要文章包括三大部分: 对于一个静态的数据库,本文将五种基于学习的基数估计方法与九中传统的基数估计方法 ...
Oracle system 用户无法登录问题
新手刚用Oracle数据库时,可能会遇到system用户无法登录情况. 问题原因:1.可能输入默认密码时输入错误(比较低级,一般不会范). 2.可能你在安装的时候设置了密码,但是在登录的时候密码不正确 ...
在dotnet6发布之际,FastNat内网穿透,给开发人员送的硬货福利
一.FastNat可为您解决的问题 1.没有公网服务器,但是想发布共享本地的站点或网络程序到公网上,以供他人访问: 此项功能大大方面开发人员进行远程调试,微信小程序等开发工作进行. 2.需要远程到在其 ...
干掉if-else的方法
策略模式+工厂方法消除if else 假设需求为,根据不同勋章类型,处理相对应的勋章服务,优化前有以下代码: String medalType = "guest"; if (&qu ...
salesforce零基础学习（一百零九）Lightning Login启用以及配置
本篇参考:https://help.salesforce.com/s/articleView?id=sf.security_ll_overview.htm&type=5 我们在之前的篇中提到过 ...
软虹sdk基本使用
虹软SDK的简单使用 Java实现人脸识别,但是又不会自己实现算法,找SDK时发现了虹软.虹软SDK具有免费.识别率高等优点,然后到网上搜这个SDK的教程,没搜到,就自己探索,发现它自带的官方文档其实 ...
Python 3 快速入门 2 —— 流程控制与函数
本文假设你已经有一门面向对象编程语言基础,如Java等,且希望快速了解并使用Python语言.本文对重点语法和数据结构以及用法进行详细说明,同时对一些难以理解的点进行了图解,以便大家快速入门.一些较偏 ...
python检验代理ip是否可用、代理ip检验
python检验代理ip是否可用.代理ip检验安装相关模块: pip install requests 验证代理IP是否可用脚本: import random import telnetlib im ...
linux下面升级 Python版本并修改yum属性信息
最近需要在linux下使用python,故需要升级一下python版本,上网查询了一下相关资料,更新了一下linux下面的python环境,记录如下: linux下面升级 Python版本并修改yum ...

lucene中创建索引库

lucene中创建索引库的更多相关文章

随机推荐

热门专题