bleve搜索引擎源码分析之索引——mapping真复杂啊
接下来看看下面index部分的源码实现:
data := struct {
Name string
Des string
}{
Name: "hello world this is bone",
Des: "this is a good time",
}
// index some data
index.Index("id", data)
其中,
index.Index("id", data)
实现代码:
// Index adds the specified index operation to the
// batch. NOTE: the bleve Index is not updated
// until the batch is executed.
func (b *Batch) Index(id string, data interface{}) error {
if id == "" {
return ErrorEmptyID
}
doc := document.NewDocument(id)
err := b.index.Mapping().MapDocument(doc, data)
if err != nil {
return err
}
b.internal.Update(doc)
return nil
}
根据mapping来映射文档,
b.index.Mapping().MapDocument(doc, data)
该代码的实现:
func (im *IndexMappingImpl) MapDocument(doc *document.Document, data interface{}) error {
docType := im.determineType(data)
docMapping := im.mappingForType(docType)
walkContext := im.newWalkContext(doc, docMapping)
if docMapping.Enabled {
docMapping.walkDocument(data, []string{}, []uint64{}, walkContext)
// see if the _all field was disabled
allMapping := docMapping.documentMappingForPath("_all")
if allMapping == nil || (allMapping.Enabled != false) {
field := document.NewCompositeFieldWithIndexingOptions("_all", true, []string{}, walkContext.excludedFromAll, document.IndexField|document.IncludeTermVectors)
doc.AddField(field)
}
}
return nil
}
func (dm *DocumentMapping) walkDocument(data interface{}, path []string, indexes []uint64, context *walkContext) {
// allow default "json" tag to be overriden
structTagKey := dm.StructTagKey
if structTagKey == "" {
structTagKey = "json"
}
val := reflect.ValueOf(data)
typ := val.Type()
switch typ.Kind() {
case reflect.Map:
// FIXME can add support for other map keys in the future
if typ.Key().Kind() == reflect.String {
for _, key := range val.MapKeys() {
fieldName := key.String()
fieldVal := val.MapIndex(key).Interface()
dm.processProperty(fieldVal, append(path, fieldName), indexes, context)
}
}
case reflect.Struct:
for i := ; i < val.NumField(); i++ {
field := typ.Field(i)
fieldName := field.Name
// anonymous fields of type struct can elide the type name
if field.Anonymous && field.Type.Kind() == reflect.Struct {
fieldName = ""
}
// if the field has a name under the specified tag, prefer that
tag := field.Tag.Get(structTagKey)
tagFieldName := parseTagName(tag)
if tagFieldName == "-" {
continue
}
// allow tag to set field name to empty, only if anonymous
if field.Tag != "" && (tagFieldName != "" || field.Anonymous) {
fieldName = tagFieldName
}
if val.Field(i).CanInterface() {
fieldVal := val.Field(i).Interface()
newpath := path
if fieldName != "" {
newpath = append(path, fieldName)
}
dm.processProperty(fieldVal, newpath, indexes, context)
}
}
case reflect.Slice, reflect.Array:
for i := ; i < val.Len(); i++ {
if val.Index(i).CanInterface() {
fieldVal := val.Index(i).Interface()
dm.processProperty(fieldVal, path, append(indexes, uint64(i)), context)
}
}
case reflect.Ptr:
ptrElem := val.Elem()
if ptrElem.IsValid() && ptrElem.CanInterface() {
dm.processProperty(ptrElem.Interface(), path, indexes, context)
}
case reflect.String:
dm.processProperty(val.String(), path, indexes, context)
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
dm.processProperty(float64(val.Int()), path, indexes, context)
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
dm.processProperty(float64(val.Uint()), path, indexes, context)
case reflect.Float32, reflect.Float64:
dm.processProperty(float64(val.Float()), path, indexes, context)
case reflect.Bool:
dm.processProperty(val.Bool(), path, indexes, context)
}
}
func (dm *DocumentMapping) processProperty(property interface{}, path []string, indexes []uint64, context *walkContext) {
pathString := encodePath(path)
// look to see if there is a mapping for this field
subDocMapping := dm.documentMappingForPath(pathString)
closestDocMapping := dm.closestDocMapping(pathString)
// check to see if we even need to do further processing
if subDocMapping != nil && !subDocMapping.Enabled {
return
}
propertyValue := reflect.ValueOf(property)
if !propertyValue.IsValid() {
// cannot do anything with the zero value
return
}
propertyType := propertyValue.Type()
switch propertyType.Kind() {
case reflect.String:
propertyValueString := propertyValue.String()
if subDocMapping != nil {
// index by explicit mapping
for _, fieldMapping := range subDocMapping.Fields {
fieldMapping.processString(propertyValueString, pathString, path, indexes, context)
}
} else if closestDocMapping.Dynamic {
// automatic indexing behavior
// first see if it can be parsed by the default date parser
dateTimeParser := context.im.DateTimeParserNamed(context.im.DefaultDateTimeParser)
if dateTimeParser != nil {
parsedDateTime, err := dateTimeParser.ParseDateTime(propertyValueString)
if err != nil {
// index as text
fieldMapping := newTextFieldMappingDynamic(context.im)
fieldMapping.processString(propertyValueString, pathString, path, indexes, context)
} else {
// index as datetime
fieldMapping := newDateTimeFieldMappingDynamic(context.im)
fieldMapping.processTime(parsedDateTime, pathString, path, indexes, context)
}
}
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
dm.processProperty(float64(propertyValue.Int()), path, indexes, context)
return
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
dm.processProperty(float64(propertyValue.Uint()), path, indexes, context)
return
case reflect.Float64, reflect.Float32:
propertyValFloat := propertyValue.Float()
if subDocMapping != nil {
// index by explicit mapping
for _, fieldMapping := range subDocMapping.Fields {
fieldMapping.processFloat64(propertyValFloat, pathString, path, indexes, context)
}
} else if closestDocMapping.Dynamic {
// automatic indexing behavior
fieldMapping := newNumericFieldMappingDynamic(context.im)
fieldMapping.processFloat64(propertyValFloat, pathString, path, indexes, context)
}
case reflect.Bool:
propertyValBool := propertyValue.Bool()
if subDocMapping != nil {
// index by explicit mapping
for _, fieldMapping := range subDocMapping.Fields {
fieldMapping.processBoolean(propertyValBool, pathString, path, indexes, context)
}
} else if closestDocMapping.Dynamic {
// automatic indexing behavior
fieldMapping := newBooleanFieldMappingDynamic(context.im)
fieldMapping.processBoolean(propertyValBool, pathString, path, indexes, context)
}
case reflect.Struct:
switch property := property.(type) {
case time.Time:
// don't descend into the time struct
if subDocMapping != nil {
// index by explicit mapping
for _, fieldMapping := range subDocMapping.Fields {
fieldMapping.processTime(property, pathString, path, indexes, context)
}
} else if closestDocMapping.Dynamic {
fieldMapping := newDateTimeFieldMappingDynamic(context.im)
fieldMapping.processTime(property, pathString, path, indexes, context)
}
default:
dm.walkDocument(property, path, indexes, context)
}
default:
dm.walkDocument(property, path, indexes, context)
}
}
分词的部分终于来了!
func (fm *FieldMapping) processString(propertyValueString string, pathString string, path []string, indexes []uint64, context *walkContext) {
fieldName := getFieldName(pathString, path, fm)
options := fm.Options()
if fm.Type == "text" {
analyzer := fm.analyzerForField(path, context)
field := document.NewTextFieldCustom(fieldName, indexes, []byte(propertyValueString), options, analyzer)
context.doc.AddField(field)
if !fm.IncludeInAll {
context.excludedFromAll = append(context.excludedFromAll, fieldName)
}
} else if fm.Type == "datetime" {
dateTimeFormat := context.im.DefaultDateTimeParser
if fm.DateFormat != "" {
dateTimeFormat = fm.DateFormat
}
dateTimeParser := context.im.DateTimeParserNamed(dateTimeFormat)
if dateTimeParser != nil {
parsedDateTime, err := dateTimeParser.ParseDateTime(propertyValueString)
if err == nil {
fm.processTime(parsedDateTime, pathString, path, indexes, context)
}
}
}
}
func (fm *FieldMapping) processFloat64(propertyValFloat float64, pathString string, path []string, indexes []uint64, context *walkContext) {
fieldName := getFieldName(pathString, path, fm)
if fm.Type == "number" {
options := fm.Options()
field := document.NewNumericFieldWithIndexingOptions(fieldName, indexes, propertyValFloat, options)
context.doc.AddField(field)
if !fm.IncludeInAll {
context.excludedFromAll = append(context.excludedFromAll, fieldName)
}
}
}
bleve搜索引擎源码分析之索引——mapping真复杂啊的更多相关文章
- bleve搜索引擎源码分析之索引——mapping和lucene一样,也有_all
例子: package main import ( "fmt" "github.com/blevesearch/bleve" ) func main() { / ...
- Spark源码分析 – 汇总索引
http://jerryshao.me/categories.html#architecture-ref http://blog.csdn.net/pelick/article/details/172 ...
- wukong引擎源码分析之索引——part 1 倒排列表本质是有序数组存储
searcher.IndexDocument(0, types.DocumentIndexData{Content: "此次百度收购将成中国互联网最大并购"}) engine.go ...
- wukong引擎源码分析之索引——part 3 文档评分 无非就是将docid对应的fields信息存储起来,为搜索结果rank评分用
之前的文章分析过,接受索引请求处理的代码在segmenter_worker.go里: func (engine *Engine) segmenterWorker() { for { request : ...
- lua源码分析 伪索引
Lua 提供了一个 注册表, 这是一个预定义出来的表, 可以用来保存任何 C 代码想保存的 Lua 值. 这个表可以用有效伪索引 LUA_REGISTRYINDEX 来定位. 任何 C 库都可以在这张 ...
- wukong引擎源码分析之索引——part 2 持久化 直接set(key,docID数组)在kv存储里
前面说过,接收indexerRequest的代码在index_worker.go里: func (engine *Engine) indexerAddDocumentWorker(shard int) ...
- 4 weekend110的textinputformat对切片规划的源码分析 + 倒排索引的mr实现 + 多个job在同一个main方法中提交
好的,现在,来weekend110的textinputformat对切片规划的源码分析, Inputformat默认是textinputformat,一通百通. 这就是今天,weekend110的te ...
- 【异常及源码分析】org.mybatis.spring.MyBatisSystemException: nested exception is org.apache.ibatis.type.TypeException: Could not set parameters for mapping: ParameterMapping
一.异常出现的场景 1)异常出现的SQL @Select("SELECT\n" + " id,discount_type ,min_charge, ${cardFee} ...
- Solr4.8.0源码分析(14)之SolrCloud索引深入(1)
Solr4.8.0源码分析(14) 之 SolrCloud索引深入(1) 上一章节<Solr In Action 笔记(4) 之 SolrCloud分布式索引基础>简要学习了SolrClo ...
随机推荐
- maven中的groupId和artifactId到底指的是什么?
具体回答如下: groupid和artifactId被统称为“坐标”是为了保证项目唯一性而提出的,如果你要把你项目弄到maven本地仓库去,你想要找到你的项目就必须根据这两个id去查找. groupI ...
- BestCoder Round #90 A+B题解!
BestCoder Round #90 A Kblack loves flag 题意有点迷不造思路很简单但不造怎么求随机数,纠结了一会后直接粘上题目所给的代码稍加修改A了. const int _K ...
- PTA 01-复杂度2 Maximum Subsequence Sum (25分)
题目地址 https://pta.patest.cn/pta/test/16/exam/4/question/663 5-1 Maximum Subsequence Sum (25分) Given ...
- uva12558 Egyptian Fractions (HARD version)(迭代深搜)
Egyptian Fractions (HARD version) 题解:迭代深搜模板题,因为最小个数,以此为乐观估价函数来迭代深搜,就可以了. #include<cstdio> #inc ...
- [NOIP1998] 普及组
三连击 题目描述 将1,2,…,9共9个数分成三组,分别组成三个三位数,且使这三个三位数构成1:2:3的比例,试求出所有满足条件的三个三位数. 输入输出格式 输入格式: 木有输入 输出格式: 若干行, ...
- 50个必备的实用jQuery代码段(转载)
本文会给你们展示50个jquery代码片段,这些代码能够给你的javascript项目提供帮助.其中的一些代码段是从jQuery1.4.2才开始支持的做法,另一些则是真正有用的函数或方法,他们能够帮助 ...
- Max Sum Plus Plus-HDU1024(dp)
Problem Description Now I think you have got an AC in Ignatius.L's "Max Sum" problem. To b ...
- css三大布局
标准流: 从左到右,从上到下块级元素独占一行,行内元素碰到父盒子边缘换行 浮动: 特点 1 元素浮动之后不占据原来的位置(脱标),变成立体,下面可以有东西,只影响下面的 2 浮动的盒子在一行上显示 3 ...
- 10-JS的函数学习
<html> <head> <title>js的函数学习</title> <meta charset="UTF-8"/> ...
- java课堂测试—根据模板完成一个简单的技术需求征集系统
课堂上老师发布了一个页面模板要求让我们实现一个系统的功能,模仿以后后端的简单工作情况. 然后在这个模板的基础上,提供了一个注册的网页模板,接着点击注册的按钮,发现register里面调用了zhu/zh ...