golang consistent hash 菜鸟分析
一直找集群的算法,刚好golang上面有一个适合。下面作为菜鸟来分析一下
- // Copyright (C) 2012 Numerotron Inc.
- // Use of this source code is governed by an MIT-style license
- // that can be found in the LICENSE file.
- // Package consistent provides a consistent hashing function.
- //
- // Consistent hashing is often used to distribute requests to a changing set of servers. For example,
- // say you have some cache servers cacheA, cacheB, and cacheC. You want to decide which cache server
- // to use to look up information on a user.
- //
- // You could use a typical hash table and hash the user id
- // to one of cacheA, cacheB, or cacheC. But with a typical hash table, if you add or remove a server,
- // almost all keys will get remapped to different results, which basically could bring your service
- // to a grinding halt while the caches get rebuilt.
- //
- // With a consistent hash, adding or removing a server drastically reduces the number of keys that
- // get remapped.
- //
- // Read more about consistent hashing on wikipedia: http://en.wikipedia.org/wiki/Consistent_hashing
- //
- package main
- import (
- "errors"
- "fmt"
- "hash/crc32"
- "log"
- "sort"
- "strconv"
- "sync"
- )
- type uints []uint32
- // Len returns the length of the uints array.
- func (x uints) Len() int { return len(x) }
- // Less returns true if element i is less than element j.
- func (x uints) Less(i, j int) bool { return x[i] < x[j] }
- // Swap exchanges elements i and j.
- func (x uints) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
- // ErrEmptyCircle is the error returned when trying to get an element when nothing has been added to hash.
- var ErrEmptyCircle = errors.New("empty circle")
- // Consistent holds the information about the members of the consistent hash circle.
- type Consistent struct {
- circle map[uint32]string
- members map[string]bool
- sortedHashes uints // 已经排好序的hashes slice , 主要有力搜索 (存储的内容是全部虚拟hashes值)
- NumberOfReplicas int
- count int64
- scratch [64]byte
- sync.RWMutex
- }
- // New creates a new Consistent object with a default setting of 20 replicas for each entry.
- //
- // To change the number of replicas, set NumberOfReplicas before adding entries.
- func New() *Consistent {
- c := new(Consistent)
- c.NumberOfReplicas = 20
- c.circle = make(map[uint32]string)
- c.members = make(map[string]bool)
- //log.Printf("%p", c)
- return c
- }
- // eltKey generates a string key for an element with an index.
- func (c *Consistent) eltKey(elt string, idx int) string {
- return elt + "|" + strconv.Itoa(idx)
- }
- // Add inserts a string element in the consistent hash.
- func (c *Consistent) Add(elt string) {
- c.Lock()
- defer c.Unlock()
- for i := 0; i < c.NumberOfReplicas; i++ {
- fmt.Println("i:",i,c.hashKey(c.eltKey(elt, i)))
- c.circle[c.hashKey(c.eltKey(elt, i))] = elt
- }
- //log.Fatal(len(c.circle))
- //log.Println(len(c.members), elt)
- c.members[elt] = true
- c.updateSortedHashes()
- c.count++
- }
- // Remove removes an element from the hash.
- func (c *Consistent) Remove(elt string) {
- c.Lock()
- defer c.Unlock()
- for i := 0; i < c.NumberOfReplicas; i++ {
- delete(c.circle, c.hashKey(c.eltKey(elt, i)))
- }
- delete(c.members, elt)
- c.updateSortedHashes()
- c.count--
- }
- // Set sets all the elements in the hash. If there are existing elements not present in elts, they will be removed.
- func (c *Consistent) Set(elts []string) {
- mems := c.Members()
- for _, k := range mems {
- found := false
- for _, v := range elts {
- if k == v {
- found = true
- break
- }
- }
- if !found {
- c.Remove(k)
- }
- }
- for _, v := range elts {
- c.RLock()
- _, exists := c.members[v]
- c.RUnlock()
- if exists {
- continue
- }
- c.Add(v)
- }
- }
- func (c *Consistent) Members() []string {
- c.RLock()
- defer c.RUnlock()
- var m []string
- for k := range c.members {
- m = append(m, k)
- }
- return m
- }
- // Get returns an element close to where name hashes to in the circle.
- func (c *Consistent) Get(name string) (string, error) {
- c.RLock()
- defer c.RUnlock()
- if len(c.circle) == 0 {
- return "", ErrEmptyCircle
- }
- key := c.hashKey(name)
- log.Println("need search --> key:",key,"servername:",name)
- i := c.search(key)
- fmt.Println(c.sortedHashes[i],c.circle[c.sortedHashes[i]])
- return c.circle[c.sortedHashes[i]], nil
- }
- func (c *Consistent) search(key uint32) (i int) {
- f := func(x int) bool {
- log.Println("i",i)
- // 拿不到相等的
- return c.sortedHashes[x] > key
- }
- i = sort.Search(len(c.sortedHashes), f)
- log.Println("I:",i)
- if i >= len(c.sortedHashes) {
- i = 0
- }
- return
- }
- // GetTwo returns the two closest distinct elements to the name input in the circle.
- func (c *Consistent) GetTwo(name string) (string, string, error) {
- c.RLock()
- defer c.RUnlock()
- if len(c.circle) == 0 {
- return "", "", ErrEmptyCircle
- }
- //得到hashesw 值
- key := c.hashKey(name)
- //搜索hashes
- i := c.search(key)
- //获取值
- a := c.circle[c.sortedHashes[i]]
- //如果节点只有一个时,直接返回
- if c.count == 1 {
- return a, "", nil
- }
- start := i
- var b string
- for i = start + 1; i != start; i++ {
- if i >= len(c.sortedHashes) {
- i = 0
- }
- b = c.circle[c.sortedHashes[i]]
- //两个时候否为相同的节点,不是就返回
- if b != a {
- break
- }
- }
- return a, b, nil
- }
- // GetN returns the N closest distinct elements to the name input in the circle.
- func (c *Consistent) GetN(name string, n int) ([]string, error) {
- c.RLock()
- defer c.RUnlock()
- if len(c.circle) == 0 {
- return nil, ErrEmptyCircle
- }
- if c.count < int64(n) {
- n = int(c.count)
- }
- var (
- key = c.hashKey(name)
- i = c.search(key)
- start = i
- res = make([]string, 0, n)
- elem = c.circle[c.sortedHashes[i]]
- )
- res = append(res, elem)
- if len(res) == n {
- return res, nil
- }
- for i = start + 1; i != start; i++ {
- if i >= len(c.sortedHashes) {
- i = 0
- }
- elem = c.circle[c.sortedHashes[i]]
- if !sliceContainsMember(res, elem) {
- res = append(res, elem)
- }
- if len(res) == n {
- break
- }
- }
- return res, nil
- }
- func (c *Consistent) hashKey(key string) uint32 {
- //
- log.Println("key string:",key)
- if len(key) < 64 {
- var scratch [64]byte
- copy(scratch[:], key)
- //log.Fatal(len(key), scratch)
- return crc32.ChecksumIEEE(scratch[:len(key)])
- }
- return crc32.ChecksumIEEE([]byte(key))
- }
- // 对hash 进行排序
- func (c *Consistent) updateSortedHashes() {
- hashes := c.sortedHashes[:0]
- //reallocate if we're holding on to too much (1/4th)
- //log.Fatal("exit test:",cap(c.sortedHashes))
- if cap(c.sortedHashes)/(c.NumberOfReplicas*4) > len(c.circle) {
- hashes = nil
- }
- for k := range c.circle {
- hashes = append(hashes, k)
- log.Println(k)
- }
- sort.Sort(hashes)
- c.sortedHashes = hashes
- log.Println("tem hashes size :",len(hashes),len(c.sortedHashes))
- }
- func sliceContainsMember(set []string, member string) bool {
- for _, m := range set {
- if m == member {
- return true
- }
- }
- return false
- }
- func main() {
- c := New()
- //fmt.Printf("%T", D)
- c.Add("redis-1")
- c.Add("redis-2")
- c.Add("redis-3")
- log.Fatal(c.GetN("redis-2",1))
- v, ok := c.Get("redis-one")
- if ok == nil {
- for i, vv := range v {
- fmt.Println(i, vv)
- }
- }
- log.Println("members size:",len(c.members),"\tcircle size :",len(c.circle),"sortHashes:",len(c.sortedHashes),"scratch:",c.scratch)
- log.Println("sortHashes value:",c.sortedHashes)
- //log.Fatal("...")
- }
其中有几点不是很理解,scratch 这个东西好像没用到,还有就是在计算虚拟节点时,他是使用'>'来计算的,假设我们设置一个节点Redis,那满默认回事redis|1,redis|2..,这样进行节点分布,如果获取redis时,使用redis|1进行搜索,搜索出来就不是redis|1这个虚拟节点了,可能是其他节点。还有在求近距离节点是它是按升排序进行搜索的,而不考虑左右这个方式找最近节点。
- 1 type Consistent struct {
- 2 »···circle map[uint32]string // 用来存储node(string) 和 vnode的对应关系, vnode 是一个hash出来的uint32的整数,也就是最大分区数为4294967296
- 3 »···members map[string]bool // string 为 node, bool表示实体节点是否存活
- 4 »···sortedHashes uints // 已经排好序的hashes slice , 主要有力搜索 (存储的内容是全部vnode hashes值)
- 5 »···NumberOfReplicas int // node 的权重, 也就是node对应的vnode的个数
- 6 »···count int64 // 物理节点
- 7 »···scratch [64]byte
- 8 »···sync.RWMutex
- 9 }
这种一致性hash和 Dynamo算法的一致性hash是有很大区别的,这种hash排序不是全有序的;
测试例子:
- func main() {
- c := New()
- c.Set([]string{"redisA", "redisB"})
- fmt.Println(c.NumberOfReplicas)
- fmt.Println(c.Members())
- for k, v := range c.sortedHashes {
- fmt.Println(k, c.circle[v])
- }
- }
输出:
- ▶ go run consistent.go
- 20
- [redisB redisA]
- 0 redisA
- 1 redisB
- 2 redisA
- 3 redisB
- 4 redisA
- 5 redisB
- 6 redisA
- 7 redisB
- 8 redisA
- 9 redisA
- 10 redisB
- 11 redisA
- 12 redisA
- 13 redisB
- 14 redisA
- 15 redisB
- 16 redisB
- 17 redisA
- 18 redisB
- 19 redisB
- 20 redisA
- 21 redisB
- 22 redisA
- 23 redisB
- 24 redisA
- 25 redisB
- 26 redisA
- 27 redisB
- 28 redisA
- 29 redisB
- 30 redisB
- 31 redisA
- 32 redisB
- 33 redisB
- 34 redisA
- 35 redisA
- 36 redisB
- 37 redisA
- 38 redisA
- 39 redisB
31 A -> 32B -> 33B ,如果是Dynamo,那么应该是31A -> 32B -> 33A这样循环下去,所以如果想使用这种一致性hash算法来做备份容灾,是不行的。
golang consistent hash 菜鸟分析的更多相关文章
- consistent hash(一致性哈希算法)
一.产生背景 今天咱不去长篇大论特别详细地讲解consistent hash,我争取用最轻松的方式告诉你consistent hash算法是什么,如果需要深入,Google一下~. 举个栗子吧: 比如 ...
- 一文了解 Consistent Hash
本文首发于 vivo互联网技术 微信公众号 链接:https://mp.weixin.qq.com/s/LGLqEOlGExKob8xEXXWckQ作者:钱幸川 在分布式环境下面,我们经常会通过一定的 ...
- golang 性能调优分析工具 pprof(下)
golang 性能调优分析工具 pprof(上)篇, 这是下篇. 四.net/http/pprof 4.1 代码例子 1 go version go1.13.9 把上面的程序例子稍微改动下,命名为 d ...
- Nginx的负载均衡 - 一致性哈希 (Consistent Hash)
Nginx版本:1.9.1 我的博客:http://blog.csdn.net/zhangskd 算法介绍 当后端是缓存服务器时,经常使用一致性哈希算法来进行负载均衡. 使用一致性哈希的好处在于,增减 ...
- 一致性Hash算法(Consistent Hash)
分布式算法 在做服务器负载均衡时候可供选择的负载均衡的算法有很多,包括: 轮循算法(Round Robin).哈希算法(HASH).最少连接算法(Least Connection).响应速度算法(Re ...
- golang thrift 源码分析,服务器和客户端究竟是如何工作的
首先编写thrift文件(rpcserver.thrift),运行thrift --gen go rpcserver.thrift,生成代码 namespace go rpc service RpcS ...
- golang (5) http 请求分析
http 分析包分析 fmt.Println("get Content-Type: ", r.Header.Get("Content-Type")) var r ...
- oralce之 10046对Hash Join分析
前两天解决了一个优化SQL的case,SQL语句如下,big_table为150G大小,small_table很小,9000多条记录,不到1M大小,hash_area_size, sort_area_ ...
- 【go】继续go go go,ubuntu环境搭建及golang的依赖关系分析
这次是在ubuntu14.04 amd64上搭建go的编译环境,使用的IDE换成了sublime text,具体步骤参照的是 http://blog.csdn.net/aqiang912/articl ...
随机推荐
- python接口测试(三)——Excell文件读取进行参数化
python进行http请求时,需要对参数进行参数化,此时就可以运用Excel进行,具体如下: 1.梳理出请求中那些参数需要参数化,然后新建一个Excel,如图: 2.读取Excel中的内容,在读取前 ...
- 【转】V8 之旅: 垃圾回收器
垃圾回收器是一把十足的双刃剑.其好处是可以大幅简化程序的内存管理代码,因为内存管理无需程序员来操作,由此也减少了(但没有根除)长时间运转的程序的内存泄漏.对于某些程序员来说,它甚至能够提升代码的性能. ...
- JVM(1)——简介
网上流传着一段挺有意思的话-- 对于从事C或C++的开发人员来说,他们既是内存管理的最高权力的皇帝,也是最基础的劳动人民,担负着每一个对象生命开始到终结的维护工作,有点光杆司令的赶脚. 但对于java ...
- filebeat + logstash + elasticsearch + granfa
filebeat + logstash + elasticsearch + granfa https://www.cnblogs.com/wenchengxiaopenyou/p/9034213.ht ...
- Codeforces Round #390 (Div. 2) E(bitset优化)
题意就是一个给出2个字符矩阵,然后进行匹配,输出每个位置的匹配的结果 (超出的部分循环处理) 一种做法是使用fft,比较难写,所以没有写 这里使用一个暴力的做法,考虑到一共只出现26个字符 所以使用一 ...
- 【题解】HNOI2009无归岛
这题真的是无语了,在哪个岛上根本就没有任何的用处……不过我是画了下图,感受到一定是仙人掌,并不会证.有谁会证的求解…… 如果当做仙人掌来做确实十分的简单.只要像没有上司的舞会一样树形dp就好了,遇到环 ...
- BZOJ 1040: [ZJOI2008]骑士 | 在基环外向树上DP
题目: http://www.lydsy.com/JudgeOnline/problem.php?id=1040 题解: 我AC了 是自己写的 超开心 的 考虑断一条边 这样如果根节点不选答案一定正确 ...
- nowcoder 提高组模拟赛 选择题 解题报告
选择题 链接: https://www.nowcoder.com/acm/contest/178/B 来源:牛客网 题目描述 有一道选择题,有 \(a,b,c,d\) 四个选项. 现在有 \(n\) ...
- CF763B Timofey and Rectangles
题目戳这里. 首先答案肯定是YES,因为一个平面图肯定可以被4种颜色染色,关键是怎么输出方案. 由于4是一个特殊的数字\(4 = 2^2\),而我们还有一个条件就是边长为奇数,而奇数是会改变二进制位的 ...
- C++——设计与演化——读书笔记
<<c++设计与演化>>1.c++的保护模式来自于访问权限许可和转让的概念; 初始化和赋值的区分来自于转让能力的思考; c++的const概念是从读写保护机制中演化出来. 2. ...