ElasticSearch使用实践(文档操作)

可以使用Docker安装ES和Kibana:

使用docker-compose安装ElasticSearch和Kibana：

version: '3.1'

services:

  elasticsearch:

    image: elasticsearch:7.13.3

    container_name: elasticsearch

    privileged: true

    environment:

      - "cluster.name=elasticsearch"

      - "discovery.type=single-node"

      - "ES_JAVA_OPTS=-Xms512m -Xmx1096m"

      - bootstrap.memory_lock=true

    volumes:

      - ./es/plugins:/usr/local/dockercompose/elasticsearch/plugins

      - ./es/data:/usr/local/dockercompose/elasticsearch/data:rw

      - ./es/logs:/usr/local/dockercompose/elasticsearch/logs:rw

    ports:

      - 9200:9200

      - 9300:9300

    deploy:

     resources:

        limits:

           cpus: "2"

           memory: 1000M

        reservations:

           memory: 200M

  kibana:

    image: kibana:7.13.3

    container_name: kibana

    depends_on:

      - elasticsearch

    environment:

      ELASTICSEARCH_HOSTS: http://elasticsearch:9200

      I18N_LOCALE: zh-CN

    ports:

      - 5601:5601

执行：

docker-compose up -d

运行完毕后，访问本机的5601端口就可以看到Kibana的界面

数据基本操作

数据层面的基本概念：

索引：一类相似文档的集合，ES将数据存储在一个或多个index中。一个索引有多个分片，索引的数据会以某种方式分散到各个分片上存储
mapping：定义了索引里的文档有哪些字段，以及字段的类型
文档：向ES写入每一条数据都是一个文档，搜索也以文档为单位
字段：每个文档都有一个或多个字段，每个字段都有类型

索引管理

定义如下mapping，并创建索引：

PUT books

{

  "mappings": {

    "properties": {

        "book_id": {

          "type": "keyword"

        },

        "name": {

          "type": "text"

        },

        "author": {

          "type": "keyword"

        },

        "intro": {

          "type": "text"

        }

      }

  },

  "settings": {

    "number_of_shards": 3,

    "number_of_replicas": 1

  }

}

在 Kibana 中运行上述示例，可以创建 books 索引。books 索引包含 book_id(书本 ID)、name(书本名字)、author(作者)、intro(简介)四个字段

"settings": {

    "number_of_shards": 3,

    "number_of_replicas": 1

}

这一段指定了books索引还有3个分片和1个副本备份

如果执行DELETE books就可以删除索引

Go语言代码：

package main

const mapping = `{

		"mappings": {

			"properties": {

				"book_id": {

					"type": "keyword"

				},

				"name": {

					"type": "text"

				},

				"author": {

					"type": "keyword"

				},

				"intro": {

					"type": "text"

				}

			}

		},

		"settings": {

			"number_of_shards": 3,

			"number_of_replicas": 1

		}

	}`

func main() {

	client, err := elastic.NewClient(

		elastic.SetURL("http://127.0.0.1:9200"),

		elastic.SetSniff(false),

	)

	if err != nil {

		panic(err)

	}

	ctx := context.Background()

	resp, err := client.CreateIndex("books").

		BodyString(mapping).Do(ctx)

	if err != nil {

		log.Fatalln("Error creating the index:", err)

	}

	if !resp.Acknowledged {

		log.Fatalln("Error creating the index:", resp.Index)

	}

	fmt.Println("Index created:", resp.Index)

}

新建文档

ES提供了两种创建文档的方式，一种是使用Index API索引文档，一种是使用Create API创建文档

使用Index：

PUT books/_doc/1

{

  "book_id": "4ee82462",

  "name": "深入Linux内核架构",

  "author": "Wolfgang Mauerer",

  "intro": "内容全面深入，领略linux内核的无限风光。"

}

Go语言代码：

type Book struct {

	BookId string `json:"book_id"`

	Name   string `json:"name"`

	Author string `json:"author"`

	Intro  string `json:"intro"`

}

func main() {

	client, err := elastic.NewClient(

		elastic.SetURL("http://127.0.0.1:9200"),

		elastic.SetSniff(false),

	)

	if err != nil {

		panic(err)

	}

	// 创建文档数据

	book := Book{

		BookId: "4ee82462",

		Name:   "深入Linux内核架构",

		Author: "Wolfgang Mauerer",

		Intro:  "内容全面深入，领略linux内核的无限风光。",

	}

	ctx := context.Background()

	put, err := client.Index().Index("books").Id("1").BodyJson(book).Do(ctx)

	if err != nil {

		return

	}

	if err != nil {

		panic(err)

	}

	fmt.Printf("Document id: %s, Index id: %s\n", put.Id, put.Index)

}

使用Create API：

PUT books/_create/2

{

  "book_id": "4ee82463",

  "name": "时间简史",

  "author": "史蒂芬霍金",

  "intro": "探索时间和空间核心秘密的引人入胜的故事。"

}

获取文档

可以通过ES的GET API来获取文档内容，获取文档有两种情况，一种是只获取一个文档内容，另一种是同时获取多个文档的内容。获取文档有两种情况，一种是只获取一个文档的内容，另一种是同时获取多个文档的内容。

通过GET API获取单个文档：

GET books/_doc/1

响应：

{

  "_index" : "books",

  "_type" : "_doc",

  "_id" : "1",

  "_version" : 2,

  "_seq_no" : 1,

  "_primary_term" : 1,

  "found" : true,

  "_source" : {

    "book_id" : "4ee82462",

    "name" : "深入Linux内核架构",

    "author" : "Wolfgang Mauerer",

    "intro" : "内容全面深入，领略linux内核的无限风光。"

  }

}

Go语言代码：

type Book struct {

	BookId string `json:"book_id"`

	Name   string `json:"name"`

	Author string `json:"author"`

	Intro  string `json:"intro"`

}

func main() {

	client, err := elastic.NewClient(

		elastic.SetURL("http://127.0.0.1:9200"),

		elastic.SetSniff(false),

	)

	if err != nil {

		panic(err)

	}

	ctx := context.Background()

	get, err := client.Get().Index("books").Id("1").Do(ctx)

	if err != nil {

		return

	}

	if err != nil {

		panic(err)

	}

	if get.Found {

		fmt.Printf("document id=%s version=%d index=%s\n",

			get.Id, get.Version, get.Index)

	}

	book := Book{}

	data, _ := get.Source.MarshalJSON()

	err = json.Unmarshal(data, &book)

	if err != nil {

		panic(err)

	}

	fmt.Println(book.Name, book.Author)

}

更新文档

POST books/_update/2

{

  "doc": {

    "name":"时间简史（视频版）",

    "intro": "探索时间和空间核心秘密的引人入胜的视频故事。"

  }

}

响应信息：

{

  "_index" : "books",

  "_type" : "_doc",

  "_id" : "2",

  "_version" : 2,

  "result" : "noop",

  "_shards" : {

    "total" : 0,

    "successful" : 0,

    "failed" : 0

  },

  "_seq_no" : 1,

  "_primary_term" : 1

}

Go语言代码：

package main

type BookUpdate struct {

	Doc struct {

		Name  string `json:"name"`

		Intro string `json:"intro"`

	} `json:"doc"`

}

func main() {

	client, err := elastic.NewClient(

		elastic.SetURL("http://127.0.0.1:9200"),

		elastic.SetSniff(false),

	)

	if err != nil {

		panic(err)

	}

	update := BookUpdate{

		Doc: struct {

			Name  string `json:"name"`

			Intro string `json:"intro"`

		}{

			Name:  "时间简史（视频版）",

			Intro: "探索时间和空间核心秘密的引人入胜的视频故事。",

		},

	}

	_, err = client.Update().Index("books").

		Id("2").Doc(update).Do(context.Background())

	if err != nil {

		log.Fatalln("Error updating the document:", err)

	}

	fmt.Printf("Document updated\n")

}

也可以通过索引文档的方式来更新数据，但会先删除数据再写入新的数据，所以无法只更新某些字段

除了用指定ID的方式来更新数据，还可以用update_by_query的方式：

POST books/_update_by_query

{

  "query": {

    "term": {

      "book_id": {

        "value": "4ee82462"

      }

    }

  },

  "script": {

    "source": "ctx._source.name='深入Linux内核架构1'",

    "lang": "painless"

  }

}

响应信息：

{

  "took" : 37,

  "timed_out" : false,

  "total" : 1,

  "updated" : 1,

  "deleted" : 0,

  "batches" : 1,

  "version_conflicts" : 0,

  "noops" : 0,

  "retries" : {

    "bulk" : 0,

    "search" : 0

  },

  "throttled_millis" : 0,

  "requests_per_second" : -1.0,

  "throttled_until_millis" : 0,

  "failures" : [ ]

}

Go语言代码：

package main

func main() {

	client, err := elastic.NewClient(

		elastic.SetURL("http://127.0.0.1:9200"),

		elastic.SetSniff(false),

	)

	if err != nil {

		panic(err)

	}

	ctx := context.Background()

	resp, err := client.UpdateByQuery("books").

		Query(elastic.NewTermQuery("book_id", "4ee82462")).

		Script(elastic.NewScript("ctx._source.name='深入Linux内核架构1'")).

		ProceedOnVersionConflict().Do(ctx)

	if err != nil {

		panic(err)

	}

	fmt.Println("Documents updated:", resp.Updated)

}

删除文档

DELETE books/_doc/2

Go语言代码：

package main

func main() {

	client, err := elastic.NewClient(

		elastic.SetURL("http://127.0.0.1:9200"),

		elastic.SetSniff(false),

	)

	if err != nil {

		panic(err)

	}

	ctx := context.Background()

	resp, err := client.Delete().Index("books").Id("2").Do(ctx)

	if err != nil {

		panic(err)

	}

	fmt.Println("document deleted:", resp.Result)

}

批量操作文档

Bulk API 支持在一次调用中操作不同的索引，使用时可以在 Body 中指定索引也可以在 URI 中指定索引。而且还可以同时支持 4 中类型的操作：

Index
Create
Update
Delete

Bulk API 的格式是用换行符分隔 JSON 的结构，第一行指定操作类型和元数据（索引、文档id等），紧接着的一行是这个操作的内容（文档数据，如果有的话。像简单的删除就没有。），其格式如下：

POST _bulk

{ "index" : { "_index" : "books", "_id" : "1" } }

{ "book_id": "4ee82462","name": "深入Linux内核架构","author": "Wolfgang Mauerer","intro": "内容全面深入，领略linux内核的无限风光。" }

{ "delete" : { "_index" : "books", "_id" : "2" } }

{ "create" : { "_index" : "books", "_id" : "3" } }

{ "book_id": "4ee82464","name": "深入Linux内核架构第三版","author": "Wolfgang Mauerer","intro": "内容全面深入，再次领略linux内核的无限风光。" }

{ "update" : {"_index" : "books", "_id" : "4"} }

{ "doc" : {"intro" : "书本的内容非常好，值得一看"} }