bleve搜索引擎源码分析之索引—

接下来看看下面index部分的源码实现：

    data := struct {

        Name string

        Des  string

    }{

        Name: "hello world this is bone",

        Des:  "this is a good time",

    }

    // index some data

    index.Index("id", data)

其中，

index.Index("id", data)

实现代码：

// Index adds the specified index operation to the

// batch.  NOTE: the bleve Index is not updated

// until the batch is executed.

func (b *Batch) Index(id string, data interface{}) error {

    if id == "" {

        return ErrorEmptyID

    }

    doc := document.NewDocument(id)

    err := b.index.Mapping().MapDocument(doc, data)

    if err != nil {

        return err

    }

    b.internal.Update(doc)

    return nil

}

根据mapping来映射文档，

 b.index.Mapping().MapDocument(doc, data)

该代码的实现：

func (im *IndexMappingImpl) MapDocument(doc *document.Document, data interface{}) error {

    docType := im.determineType(data)

    docMapping := im.mappingForType(docType)

    walkContext := im.newWalkContext(doc, docMapping)

    if docMapping.Enabled {

        docMapping.walkDocument(data, []string{}, []uint64{}, walkContext)

        // see if the _all field was disabled

        allMapping := docMapping.documentMappingForPath("_all")

        if allMapping == nil || (allMapping.Enabled != false) {

            field := document.NewCompositeFieldWithIndexingOptions("_all", true, []string{}, walkContext.excludedFromAll, document.IndexField|document.IncludeTermVectors)

            doc.AddField(field)

        }

    }

    return nil

}

func (dm *DocumentMapping) walkDocument(data interface{}, path []string, indexes []uint64, context *walkContext) {

    // allow default "json" tag to be overriden

    structTagKey := dm.StructTagKey

    if structTagKey == "" {

        structTagKey = "json"

    }

    val := reflect.ValueOf(data)

    typ := val.Type()

    switch typ.Kind() {

    case reflect.Map:

        // FIXME can add support for other map keys in the future

        if typ.Key().Kind() == reflect.String {

            for _, key := range val.MapKeys() {

                fieldName := key.String()

                fieldVal := val.MapIndex(key).Interface()

                dm.processProperty(fieldVal, append(path, fieldName), indexes, context)

            }

        }

    case reflect.Struct:

        for i := ; i < val.NumField(); i++ {

            field := typ.Field(i)

            fieldName := field.Name

            // anonymous fields of type struct can elide the type name

            if field.Anonymous && field.Type.Kind() == reflect.Struct {

                fieldName = ""

            }

            // if the field has a name under the specified tag, prefer that

            tag := field.Tag.Get(structTagKey)

            tagFieldName := parseTagName(tag)

            if tagFieldName == "-" {

                continue

            }

            // allow tag to set field name to empty, only if anonymous

            if field.Tag != "" && (tagFieldName != "" || field.Anonymous) {

                fieldName = tagFieldName

            }

            if val.Field(i).CanInterface() {

                fieldVal := val.Field(i).Interface()

                newpath := path

                if fieldName != "" {

                    newpath = append(path, fieldName)

                }

                dm.processProperty(fieldVal, newpath, indexes, context)

            }

        }

    case reflect.Slice, reflect.Array:

        for i := ; i < val.Len(); i++ {

            if val.Index(i).CanInterface() {

                fieldVal := val.Index(i).Interface()

                dm.processProperty(fieldVal, path, append(indexes, uint64(i)), context)

            }

        }

    case reflect.Ptr:

        ptrElem := val.Elem()

        if ptrElem.IsValid() && ptrElem.CanInterface() {

            dm.processProperty(ptrElem.Interface(), path, indexes, context)

        }

    case reflect.String:

        dm.processProperty(val.String(), path, indexes, context)

    case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:

        dm.processProperty(float64(val.Int()), path, indexes, context)

    case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:

        dm.processProperty(float64(val.Uint()), path, indexes, context)

    case reflect.Float32, reflect.Float64:

        dm.processProperty(float64(val.Float()), path, indexes, context)

    case reflect.Bool:

        dm.processProperty(val.Bool(), path, indexes, context)

    }

}

func (dm *DocumentMapping) processProperty(property interface{}, path []string, indexes []uint64, context *walkContext) {

    pathString := encodePath(path)

    // look to see if there is a mapping for this field

    subDocMapping := dm.documentMappingForPath(pathString)

    closestDocMapping := dm.closestDocMapping(pathString)

    // check to see if we even need to do further processing

    if subDocMapping != nil && !subDocMapping.Enabled {

        return

    }

    propertyValue := reflect.ValueOf(property)

    if !propertyValue.IsValid() {

        // cannot do anything with the zero value

        return

    }

    propertyType := propertyValue.Type()

    switch propertyType.Kind() {

    case reflect.String:

        propertyValueString := propertyValue.String()

        if subDocMapping != nil {

            // index by explicit mapping

            for _, fieldMapping := range subDocMapping.Fields {

                fieldMapping.processString(propertyValueString, pathString, path, indexes, context)

            }

        } else if closestDocMapping.Dynamic {

            // automatic indexing behavior

            // first see if it can be parsed by the default date parser

            dateTimeParser := context.im.DateTimeParserNamed(context.im.DefaultDateTimeParser)

            if dateTimeParser != nil {

                parsedDateTime, err := dateTimeParser.ParseDateTime(propertyValueString)

                if err != nil {

                    // index as text

                    fieldMapping := newTextFieldMappingDynamic(context.im)

                    fieldMapping.processString(propertyValueString, pathString, path, indexes, context)

                } else {

                    // index as datetime

                    fieldMapping := newDateTimeFieldMappingDynamic(context.im)

                    fieldMapping.processTime(parsedDateTime, pathString, path, indexes, context)

                }

            }

    case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:

        dm.processProperty(float64(propertyValue.Int()), path, indexes, context)

        return

    case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:

        dm.processProperty(float64(propertyValue.Uint()), path, indexes, context)

        return

    case reflect.Float64, reflect.Float32:

        propertyValFloat := propertyValue.Float()

        if subDocMapping != nil {

            // index by explicit mapping

            for _, fieldMapping := range subDocMapping.Fields {

                fieldMapping.processFloat64(propertyValFloat, pathString, path, indexes, context)

            }

        } else if closestDocMapping.Dynamic {

            // automatic indexing behavior

            fieldMapping := newNumericFieldMappingDynamic(context.im)

            fieldMapping.processFloat64(propertyValFloat, pathString, path, indexes, context)

        }

    case reflect.Bool:

        propertyValBool := propertyValue.Bool()

        if subDocMapping != nil {

            // index by explicit mapping

            for _, fieldMapping := range subDocMapping.Fields {

                fieldMapping.processBoolean(propertyValBool, pathString, path, indexes, context)

            }

        } else if closestDocMapping.Dynamic {

            // automatic indexing behavior

            fieldMapping := newBooleanFieldMappingDynamic(context.im)

            fieldMapping.processBoolean(propertyValBool, pathString, path, indexes, context)

        }

    case reflect.Struct:

        switch property := property.(type) {

        case time.Time:

            // don't descend into the time struct

            if subDocMapping != nil {

                // index by explicit mapping

                for _, fieldMapping := range subDocMapping.Fields {

                    fieldMapping.processTime(property, pathString, path, indexes, context)

                }

            } else if closestDocMapping.Dynamic {

                fieldMapping := newDateTimeFieldMappingDynamic(context.im)

                fieldMapping.processTime(property, pathString, path, indexes, context)

            }

        default:

            dm.walkDocument(property, path, indexes, context)

        }

    default:

        dm.walkDocument(property, path, indexes, context)

    }

}

分词的部分终于来了！

func (fm *FieldMapping) processString(propertyValueString string, pathString string, path []string, indexes []uint64, context *walkContext) {

    fieldName := getFieldName(pathString, path, fm)

    options := fm.Options()

    if fm.Type == "text" {

        analyzer := fm.analyzerForField(path, context)

        field := document.NewTextFieldCustom(fieldName, indexes, []byte(propertyValueString), options, analyzer)

        context.doc.AddField(field)     

        if !fm.IncludeInAll {

            context.excludedFromAll = append(context.excludedFromAll, fieldName)

        }

    } else if fm.Type == "datetime" {

        dateTimeFormat := context.im.DefaultDateTimeParser

        if fm.DateFormat != "" {

            dateTimeFormat = fm.DateFormat

        }

        dateTimeParser := context.im.DateTimeParserNamed(dateTimeFormat)

        if dateTimeParser != nil {

            parsedDateTime, err := dateTimeParser.ParseDateTime(propertyValueString)

            if err == nil {

                fm.processTime(parsedDateTime, pathString, path, indexes, context)

            }

        }

    }

}

func (fm *FieldMapping) processFloat64(propertyValFloat float64, pathString string, path []string, indexes []uint64, context *walkContext) {

    fieldName := getFieldName(pathString, path, fm)

    if fm.Type == "number" {

        options := fm.Options()

        field := document.NewNumericFieldWithIndexingOptions(fieldName, indexes, propertyValFloat, options)

        context.doc.AddField(field)

        if !fm.IncludeInAll {

            context.excludedFromAll = append(context.excludedFromAll, fieldName)

        }

    }

}

bleve搜索引擎源码分析之索引——mapping真复杂啊的更多相关文章

bleve搜索引擎源码分析之索引——mapping和lucene一样，也有_all
例子: package main import ( "fmt" "github.com/blevesearch/bleve" ) func main() { / ...
Spark源码分析 – 汇总索引
http://jerryshao.me/categories.html#architecture-ref http://blog.csdn.net/pelick/article/details/172 ...
wukong引擎源码分析之索引——part 1 倒排列表本质是有序数组存储
searcher.IndexDocument(0, types.DocumentIndexData{Content: "此次百度收购将成中国互联网最大并购"}) engine.go ...
wukong引擎源码分析之索引——part 3 文档评分无非就是将docid对应的fields信息存储起来，为搜索结果rank评分用
之前的文章分析过,接受索引请求处理的代码在segmenter_worker.go里: func (engine *Engine) segmenterWorker() { for { request : ...
lua源码分析伪索引
Lua 提供了一个注册表, 这是一个预定义出来的表, 可以用来保存任何 C 代码想保存的 Lua 值. 这个表可以用有效伪索引 LUA_REGISTRYINDEX 来定位. 任何 C 库都可以在这张 ...
wukong引擎源码分析之索引——part 2 持久化直接set（key，docID数组）在kv存储里
前面说过,接收indexerRequest的代码在index_worker.go里: func (engine *Engine) indexerAddDocumentWorker(shard int) ...
4 weekend110的textinputformat对切片规划的源码分析 + 倒排索引的mr实现 + 多个job在同一个main方法中提交
好的,现在,来weekend110的textinputformat对切片规划的源码分析, Inputformat默认是textinputformat,一通百通. 这就是今天,weekend110的te ...
【异常及源码分析】org.mybatis.spring.MyBatisSystemException: nested exception is org.apache.ibatis.type.TypeException: Could not set parameters for mapping: ParameterMapping
一.异常出现的场景 1)异常出现的SQL @Select("SELECT\n" + " id,discount_type ,min_charge, ${cardFee} ...
Solr4.8.0源码分析(14)之SolrCloud索引深入(1)
Solr4.8.0源码分析(14) 之 SolrCloud索引深入(1) 上一章节<Solr In Action 笔记(4) 之 SolrCloud分布式索引基础>简要学习了SolrClo ...

随机推荐

Leetcode 274.H指数
H指数给定一位研究者论文被引用次数的数组(被引用次数是非负整数).编写一个方法,计算出研究者的 h 指数. h 指数的定义: "一位有 h 指数的学者,代表他(她)的 N 篇论文中至多有 ...
使用jemalloc优化nginx和mysql内存管理
预先安装autoconf 和 make yum -y install autoconf make jemalloc的安装jiemalloc 开源项目网站 http://www.canonware.co ...
用 Gearman 分发 PHP 应用程序的工作负载【转载】
通过本文,了解工作分发系统 Gearman 并分发用 PHP.C.Ruby 及其他受支持语言编写的应用程序的工作负载. 尽管一个 Web 应用程序的大部分内容都与表示有关,但它的价值与竞争优势却可能体 ...
idea2019设置智能提示忽略大小写
2019的设置和2018的不太一样,话不多说,直接上干货.setting --> Editor --> General --> Code Completion 直接把这个选项前面的勾 ...
jackon - com.fasterxml.jackson.databind.exc.InvalidDefinitionException && UnrecognizedPropertyException: Unrecognized field 异常
在用jackson解析json数据是碰到的问题 1.首先是InvalidDefinitionException 测试发现可能是目标类中无无参数构造方法导致异常. 添加无参构造方法后发现前一个异常解决但 ...
Spring Data Redis与Jedis的选择（转）
说明:内容可能有点旧,需要在业务上做权衡. Redis的客户端有两种实现方式,一是可以直接调用Jedis来实现,二是可以使用Spring Data Redis,通过Spring的封装来调用.应该使用哪 ...
无限级分类Asp.net Mvc实现
无限级分类Asp.net Mvc实现无限级分类涉及到异步加载子类.加载当前类和匹配问题,现在做一个通用的实现. (一) 效果如下: (二)设计.实现及使用 (1)数据库 (a)表设计db ...
解决pycharm下安装reportLab报错的问题
在利用pycharm中自带的第三方安装工具安装reportLab时提示安装失败.失败的原因是缺失第三方扩展包.经过查阅查阅资料了解到一些python的第三方扩展包是需要python-dev支持的.我装 ...
深度学习笔记之使用Faster-Rcnn进行目标检测（原理篇）
不多说,直接上干货! Object Detection发展介绍 Faster rcnn是用来解决计算机视觉(CV)领域中Object Detection的问题的.经典的解决方案是使用: SS(sele ...
深度学习主机环境配置: Ubuntu16.04+Nvidia GTX 1080+CUDA8.0
不多说,直接上干货! 深度学习主机环境配置: Ubuntu16.04+Nvidia GTX 1080+CUDA8.0

bleve搜索引擎源码分析之索引——mapping真复杂啊

bleve搜索引擎源码分析之索引——mapping真复杂啊的更多相关文章

随机推荐

热门专题