bleve搜索引擎源码分析之索引——mapping真复杂啊
接下来看看下面index部分的源码实现:
data := struct {
Name string
Des string
}{
Name: "hello world this is bone",
Des: "this is a good time",
} // index some data
index.Index("id", data)
其中,
index.Index("id", data)
实现代码:
// Index adds the specified index operation to the
// batch. NOTE: the bleve Index is not updated
// until the batch is executed.
func (b *Batch) Index(id string, data interface{}) error {
if id == "" {
return ErrorEmptyID
}
doc := document.NewDocument(id)
err := b.index.Mapping().MapDocument(doc, data)
if err != nil {
return err
}
b.internal.Update(doc)
return nil
}
根据mapping来映射文档,
b.index.Mapping().MapDocument(doc, data)
该代码的实现:
func (im *IndexMappingImpl) MapDocument(doc *document.Document, data interface{}) error {
docType := im.determineType(data)
docMapping := im.mappingForType(docType)
walkContext := im.newWalkContext(doc, docMapping)
if docMapping.Enabled {
docMapping.walkDocument(data, []string{}, []uint64{}, walkContext) // see if the _all field was disabled
allMapping := docMapping.documentMappingForPath("_all")
if allMapping == nil || (allMapping.Enabled != false) {
field := document.NewCompositeFieldWithIndexingOptions("_all", true, []string{}, walkContext.excludedFromAll, document.IndexField|document.IncludeTermVectors)
doc.AddField(field)
}
} return nil
}
func (dm *DocumentMapping) walkDocument(data interface{}, path []string, indexes []uint64, context *walkContext) {
// allow default "json" tag to be overriden
structTagKey := dm.StructTagKey
if structTagKey == "" {
structTagKey = "json"
} val := reflect.ValueOf(data)
typ := val.Type()
switch typ.Kind() {
case reflect.Map:
// FIXME can add support for other map keys in the future
if typ.Key().Kind() == reflect.String {
for _, key := range val.MapKeys() {
fieldName := key.String()
fieldVal := val.MapIndex(key).Interface()
dm.processProperty(fieldVal, append(path, fieldName), indexes, context)
}
}
case reflect.Struct:
for i := ; i < val.NumField(); i++ {
field := typ.Field(i)
fieldName := field.Name
// anonymous fields of type struct can elide the type name
if field.Anonymous && field.Type.Kind() == reflect.Struct {
fieldName = ""
} // if the field has a name under the specified tag, prefer that
tag := field.Tag.Get(structTagKey)
tagFieldName := parseTagName(tag)
if tagFieldName == "-" {
continue
}
// allow tag to set field name to empty, only if anonymous
if field.Tag != "" && (tagFieldName != "" || field.Anonymous) {
fieldName = tagFieldName
} if val.Field(i).CanInterface() {
fieldVal := val.Field(i).Interface()
newpath := path
if fieldName != "" {
newpath = append(path, fieldName)
}
dm.processProperty(fieldVal, newpath, indexes, context)
}
}
case reflect.Slice, reflect.Array:
for i := ; i < val.Len(); i++ {
if val.Index(i).CanInterface() {
fieldVal := val.Index(i).Interface()
dm.processProperty(fieldVal, path, append(indexes, uint64(i)), context)
}
}
case reflect.Ptr:
ptrElem := val.Elem()
if ptrElem.IsValid() && ptrElem.CanInterface() {
dm.processProperty(ptrElem.Interface(), path, indexes, context)
}
case reflect.String:
dm.processProperty(val.String(), path, indexes, context)
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
dm.processProperty(float64(val.Int()), path, indexes, context)
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
dm.processProperty(float64(val.Uint()), path, indexes, context)
case reflect.Float32, reflect.Float64:
dm.processProperty(float64(val.Float()), path, indexes, context)
case reflect.Bool:
dm.processProperty(val.Bool(), path, indexes, context)
} }
func (dm *DocumentMapping) processProperty(property interface{}, path []string, indexes []uint64, context *walkContext) {
pathString := encodePath(path)
// look to see if there is a mapping for this field
subDocMapping := dm.documentMappingForPath(pathString)
closestDocMapping := dm.closestDocMapping(pathString) // check to see if we even need to do further processing
if subDocMapping != nil && !subDocMapping.Enabled {
return
} propertyValue := reflect.ValueOf(property)
if !propertyValue.IsValid() {
// cannot do anything with the zero value
return
}
propertyType := propertyValue.Type()
switch propertyType.Kind() {
case reflect.String:
propertyValueString := propertyValue.String()
if subDocMapping != nil {
// index by explicit mapping
for _, fieldMapping := range subDocMapping.Fields {
fieldMapping.processString(propertyValueString, pathString, path, indexes, context)
}
} else if closestDocMapping.Dynamic {
// automatic indexing behavior // first see if it can be parsed by the default date parser
dateTimeParser := context.im.DateTimeParserNamed(context.im.DefaultDateTimeParser)
if dateTimeParser != nil {
parsedDateTime, err := dateTimeParser.ParseDateTime(propertyValueString)
if err != nil {
// index as text
fieldMapping := newTextFieldMappingDynamic(context.im)
fieldMapping.processString(propertyValueString, pathString, path, indexes, context)
} else {
// index as datetime
fieldMapping := newDateTimeFieldMappingDynamic(context.im)
fieldMapping.processTime(parsedDateTime, pathString, path, indexes, context)
}
}
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
dm.processProperty(float64(propertyValue.Int()), path, indexes, context)
return
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
dm.processProperty(float64(propertyValue.Uint()), path, indexes, context)
return
case reflect.Float64, reflect.Float32:
propertyValFloat := propertyValue.Float()
if subDocMapping != nil {
// index by explicit mapping
for _, fieldMapping := range subDocMapping.Fields {
fieldMapping.processFloat64(propertyValFloat, pathString, path, indexes, context)
}
} else if closestDocMapping.Dynamic {
// automatic indexing behavior
fieldMapping := newNumericFieldMappingDynamic(context.im)
fieldMapping.processFloat64(propertyValFloat, pathString, path, indexes, context)
}
case reflect.Bool:
propertyValBool := propertyValue.Bool()
if subDocMapping != nil {
// index by explicit mapping
for _, fieldMapping := range subDocMapping.Fields {
fieldMapping.processBoolean(propertyValBool, pathString, path, indexes, context)
}
} else if closestDocMapping.Dynamic {
// automatic indexing behavior
fieldMapping := newBooleanFieldMappingDynamic(context.im)
fieldMapping.processBoolean(propertyValBool, pathString, path, indexes, context)
}
case reflect.Struct:
switch property := property.(type) {
case time.Time:
// don't descend into the time struct
if subDocMapping != nil {
// index by explicit mapping
for _, fieldMapping := range subDocMapping.Fields {
fieldMapping.processTime(property, pathString, path, indexes, context)
}
} else if closestDocMapping.Dynamic {
fieldMapping := newDateTimeFieldMappingDynamic(context.im)
fieldMapping.processTime(property, pathString, path, indexes, context)
}
default:
dm.walkDocument(property, path, indexes, context)
}
default:
dm.walkDocument(property, path, indexes, context)
}
}
分词的部分终于来了!
func (fm *FieldMapping) processString(propertyValueString string, pathString string, path []string, indexes []uint64, context *walkContext) {
fieldName := getFieldName(pathString, path, fm)
options := fm.Options()
if fm.Type == "text" {
analyzer := fm.analyzerForField(path, context)
field := document.NewTextFieldCustom(fieldName, indexes, []byte(propertyValueString), options, analyzer)
context.doc.AddField(field) if !fm.IncludeInAll {
context.excludedFromAll = append(context.excludedFromAll, fieldName)
}
} else if fm.Type == "datetime" {
dateTimeFormat := context.im.DefaultDateTimeParser
if fm.DateFormat != "" {
dateTimeFormat = fm.DateFormat
}
dateTimeParser := context.im.DateTimeParserNamed(dateTimeFormat)
if dateTimeParser != nil {
parsedDateTime, err := dateTimeParser.ParseDateTime(propertyValueString)
if err == nil {
fm.processTime(parsedDateTime, pathString, path, indexes, context)
}
}
}
} func (fm *FieldMapping) processFloat64(propertyValFloat float64, pathString string, path []string, indexes []uint64, context *walkContext) {
fieldName := getFieldName(pathString, path, fm)
if fm.Type == "number" {
options := fm.Options()
field := document.NewNumericFieldWithIndexingOptions(fieldName, indexes, propertyValFloat, options)
context.doc.AddField(field) if !fm.IncludeInAll {
context.excludedFromAll = append(context.excludedFromAll, fieldName)
}
}
}
bleve搜索引擎源码分析之索引——mapping真复杂啊的更多相关文章
- bleve搜索引擎源码分析之索引——mapping和lucene一样,也有_all
例子: package main import ( "fmt" "github.com/blevesearch/bleve" ) func main() { / ...
- Spark源码分析 – 汇总索引
http://jerryshao.me/categories.html#architecture-ref http://blog.csdn.net/pelick/article/details/172 ...
- wukong引擎源码分析之索引——part 1 倒排列表本质是有序数组存储
searcher.IndexDocument(0, types.DocumentIndexData{Content: "此次百度收购将成中国互联网最大并购"}) engine.go ...
- wukong引擎源码分析之索引——part 3 文档评分 无非就是将docid对应的fields信息存储起来,为搜索结果rank评分用
之前的文章分析过,接受索引请求处理的代码在segmenter_worker.go里: func (engine *Engine) segmenterWorker() { for { request : ...
- lua源码分析 伪索引
Lua 提供了一个 注册表, 这是一个预定义出来的表, 可以用来保存任何 C 代码想保存的 Lua 值. 这个表可以用有效伪索引 LUA_REGISTRYINDEX 来定位. 任何 C 库都可以在这张 ...
- wukong引擎源码分析之索引——part 2 持久化 直接set(key,docID数组)在kv存储里
前面说过,接收indexerRequest的代码在index_worker.go里: func (engine *Engine) indexerAddDocumentWorker(shard int) ...
- 4 weekend110的textinputformat对切片规划的源码分析 + 倒排索引的mr实现 + 多个job在同一个main方法中提交
好的,现在,来weekend110的textinputformat对切片规划的源码分析, Inputformat默认是textinputformat,一通百通. 这就是今天,weekend110的te ...
- 【异常及源码分析】org.mybatis.spring.MyBatisSystemException: nested exception is org.apache.ibatis.type.TypeException: Could not set parameters for mapping: ParameterMapping
一.异常出现的场景 1)异常出现的SQL @Select("SELECT\n" + " id,discount_type ,min_charge, ${cardFee} ...
- Solr4.8.0源码分析(14)之SolrCloud索引深入(1)
Solr4.8.0源码分析(14) 之 SolrCloud索引深入(1) 上一章节<Solr In Action 笔记(4) 之 SolrCloud分布式索引基础>简要学习了SolrClo ...
随机推荐
- 软件测试人员遇到发现的bug不能重现怎么办?
软件测试人员遇到发现的bug不能重现怎么办? 刚刚进入测试的童鞋们,想必都遇到过提出的bug,开发要求重现之后,但是在系统上已经重现不了的情况吧. 那么碰到这样的情况,不管开发还是测试都很纠结,开 ...
- C#窗体学生成绩管理系统
c#学生成绩管理系统 实现用户登录.注册 所有成绩查询.个人成绩查询 管理员审核.添加.删除用户 项目源码GIT:https://github.com/soulsjie/StuScoreMa.git
- ERP类系统设计学习
文章:分布式.服务化的ERP系统架构设计 文章的方法是对系统进行拆分,拆分成多个子系统.
- BestCoder Round #90 A+B题解!
BestCoder Round #90 A Kblack loves flag 题意有点迷不造思路很简单但不造怎么求随机数,纠结了一会后直接粘上题目所给的代码稍加修改A了. const int _K ...
- C# CreateDataTable
public DataTable CreateDataTable() { DataTable dataTable = new DataTable(); ...
- [luoguP1773] 符文之语_NOI导刊2010提高(02)(DP)
传送门 f[i][j]表示前i个数余数为j的最优解 sum[i][j]表示字符串i~j所构成的数 #include <cstdio> #include <cstring> #d ...
- 运动员最佳匹配问题(km算法)
洛谷传送门 带权二分图最大权完美匹配. 裸的km算法. 注意开long long. #include <cstdio> #include <cstring> #include ...
- [HNOI2015]实验比较 树形dp+组合数学
在合并的时候有可以加等于,或者继续用小于, 比如siz[x]和siz[y]合并,小于的区间为max(siz[x],siz[y])<=k<=siz[x]+siz[y], 然后就是合并成多少个 ...
- 矩形周长(codevs 2149)
题目描述 Description N(N<5000) 张矩形的海报,照片和其他同样形状的图片贴在墙上.它们的边都是垂直的或水平的.每个矩形可以部分或者全部覆盖其他矩形.所有的矩形组成的集合的轮廓 ...
- gcc,gdb基础学习2
gdb调试: 源文件:test.cc >>g++ -g test.cc -o test.o 这里需要参数-g,因为接下来要使用gdb进行调试,生成目标文件test·o 因为需要将调试信息 ...