Redis源码研究--字典

计划每天花1小时学习Redis 源码。在博客上做个记录。

--------6月18日-----------

redis的字典dict主要涉及几个数据结构，

dictEntry：具体的k-v链表结点

dictht：哈希表

dict：字典

具体关系为

 typedef struct dict {

     dictType *type;

     void *privdata;

     dictht ht[];

     int rehashidx; /* rehashing not in progress if rehashidx == -1 */

     int iterators; /* number of iterators currently running */

 } dict;

 typedef struct dictht {

     dictEntry **table;

     unsigned long size;

     unsigned long sizemask;

     unsigned long used;

 } dictht;

 typedef struct dictEntry {

     void *key;

     union {

         void *val;

         uint64_t u64;

         int64_t s64;

     } v;

     struct dictEntry *next;

 } dictEntry;

一个字典有两个哈希表，冲突后采用了链地址法，很好理解。

一些简单操作采用了宏

#define dictGetKey(he) ((he)->key)

#define dictGetVal(he) ((he)->v.val)

#define dictGetSignedIntegerVal(he) ((he)->v.s64)

#define dictGetUnsignedIntegerVal(he) ((he)->v.u64)

------------6月19日----------------------

字典具体用到了两种哈希算法，我只看了简单的那一种，没想到代码竟然可以那么少，算法名字为djb2，

 /* And a case insensitive hash function (based on djb hash) */

 unsigned int dictGenCaseHashFunction(const unsigned char *buf, int len) {

     unsigned int hash = (unsigned int)dict_hash_function_seed;

     while (len--)

         hash = ((hash << ) + hash) + (tolower(*buf++)); /* hash * 33 + c */

     return hash;

 }

dict_hash_function_seed是个全局变量，为5381.
The magic of number 33 (why it works better than many other constants, prime or not) has never been adequately explained.
JDK中采用的哈希算法取得数字是31，一个素数。
创建一个新字典并初始化：

 dict *dictCreate(dictType *type, void *privDataPtr){

     dict *d = malloc(sizeof(*d));

     _dictInit(d,type,privDataPtr);

     return d;

 }

 int _dictInit(dict *d, dictType *type, void *privDataPtr){

     _dictReset(&d->ht[]);

     _dictReset(&d->ht[]);

     d->type = type;

     d->privdata = privDataPtr;

     d->rehashidx = -;

     d->iterators = ;

     return DICT_OK;

 }

 static void _dictReset(dictht *ht){

     ht->table = NULL;

     ht->size = ;

     ht->sizemask = ;

     ht->used = ;

 }

学了这么多年c语言了，malloc(sizeof(*d))我还是第一次看到。
说到sizeof，我还要提一句，c99之后，sizeof是运行时确定的，c99还加入了动态数组这一概念。csdn上的回答是错的。
对字典进行紧缩处理，让 哈希表中的数/哈希表长度接近1：

 int dictResize(dict *d){

     int minimal;

     if (!dict_can_resize || dictIsRehashing(d)) return DICT_ERR;

     minimal = d->ht[].used;

     if (minimal < DICT_HT_INITIAL_SIZE)

         minimal = DICT_HT_INITIAL_SIZE;

     return dictExpand(d, minimal);

 }

 #define dictIsRehashing(ht) ((ht)->rehashidx != -1)

 #define DICT_HT_INITIAL_SIZE     4

当字典正在Rehash的时候不能进行Resize操作，初始时哈希表大小为4，哈希表大小一般都是2的幂次方。
如果minimal是5，经过dictExpand后，哈希表大小变为8.

 static unsigned long _dictNextPower(unsigned long size){

     unsigned long i = DICT_HT_INITIAL_SIZE;

     if (size >= LONG_MAX) return LONG_MAX;

     while() {

         if (i >= size)

             return i;

         i *= ;

     }

 }

 int dictExpand(dict *d, unsigned long size){

     dictht n; /* the new hash table */

     unsigned long realsize = _dictNextPower(size);

     /* the size is invalid if it is smaller than the number of

      * elements already inside the hash table */

     if (dictIsRehashing(d) || d->ht[].used > size)

         return DICT_ERR;

     /* Allocate the new hash table and initialize all pointers to NULL */

     n.size = realsize;

     n.sizemask = realsize-;

     n.table = zcalloc(realsize*sizeof(dictEntry*));

     n.used = ;

     /* Is this the first initialization? If so it's not really a rehashing

      * we just set the first hash table so that it can accept keys. */

     if (d->ht[].table == NULL) {

         d->ht[] = n;

         return DICT_OK;

     }

     /* Prepare a second hash table for incremental rehashing */

     d->ht[] = n;

     d->rehashidx = ;

     return DICT_OK;

 }

新建了一个哈希表n，size是扩展后的size，ht[0].table 为空说明这是第一次初始化，不是扩展，直接赋值。
ht[0].table 不为空，说明这是一次扩展，把n赋给ht[1]，ReHash标志rehashix也被设为0.
上边这段不大好理解，先看后面的，一会返过来再研究dictExpand函数。
--------------------6月20日--------------------------

向字典中添加元素需要调用dictAdd函数：

 /* Add an element to the target hash table */

 int dictAdd(dict *d, void *key, void *val){

     dictEntry *entry = dictAddRaw(d,key);

     if (!entry) return DICT_ERR;

     dictSetVal(d, entry, val);

     return DICT_OK;

 }

具体实现需要看dictAddRaw函数：

 dictEntry *dictAddRaw(dict *d, void *key){

     int index;

     dictEntry *entry;

     dictht *ht;

     if (dictIsRehashing(d)) _dictRehashStep(d);

     /* Get the index of the new element, or -1 if

      * the element already exists. */

     if ((index = _dictKeyIndex(d, key)) == -)

         return NULL;

     /* Allocate the memory and store the new entry */

     ht = dictIsRehashing(d) ? &d->ht[] : &d->ht[];

     entry = zmalloc(sizeof(*entry));

     entry->next = ht->table[index];

     ht->table[index] = entry;

     ht->used++;

     /* Set the hash entry fields. */

     dictSetKey(d, entry, key);

     return entry;

 }

先判断是不是在进行Rehash，如果在Rehash，执行渐进式Rehash。
找到要插入的key的位置，如果相同的key已经存在了，返回NULL
如果在进行Rehash，ht指向ht[1]表，然后利用链表头插法（这个我熟）将entry插入，更新used。
添加key前需要查找key的位置：

 /* Returns the index of a free slot that can be populated with

  * an hash entry for the given 'key'.

  * If the key already exists, -1 is returned.

  *

  * Note that if we are in the process of rehashing the hash table, the

  * index is always returned in the context of the second (new) hash table. */

 static int _dictKeyIndex(dict *d, const void *key){

     unsigned int h, idx, table;

     dictEntry *he;

     /* Expand the hash table if needed */

     if (_dictExpandIfNeeded(d) == DICT_ERR)

         return -;

     /* Compute the key hash value */

     h = dictHashKey(d, key);

     for (table = ; table <= ; table++) {

         idx = h & d->ht[table].sizemask;

         /* Search if this slot does not already contain the given key */

         he = d->ht[table].table[idx];

         while(he) {

             if (dictCompareKeys(d, key, he->key))

                 return -;

             he = he->next;

         }

         if (!dictIsRehashing(d)) break;

     }

     return idx;

 }

插入之前，程序会检查一下哈希表空间是否够，需不需要expand。通过某种哈希算法计算key对应的哈希值h，sizemask二进制格式大体是这样的011111111，哈希值跟它一与，相当于只保留了后面几位。算出来的idx就是要插入的索引号。然后需要比较在这个索引上的链表中有没有跟要插入的key一样的，如果重复了，返回-1.

最后判断下当前如果没有在进行Rehash，ht[2]表就不用管了。

-----------------------6月21日---------------------

 /* Expand the hash table if needed */

 static int _dictExpandIfNeeded(dict *d){

     /* Incremental rehashing already in progress. Return. */

     if (dictIsRehashing(d)) return DICT_OK;

     /* If the hash table is empty expand it to the initial size. */

     if (d->ht[].size == ) return dictExpand(d, DICT_HT_INITIAL_SIZE);

     /* If we reached the 1:1 ratio, and we are allowed to resize the hash

      * table (global setting) or we should avoid it but the ratio between

      * elements/buckets is over the "safe" threshold, we resize doubling

      * the number of buckets. */

     if (d->ht[].used >= d->ht[].size &&

         (dict_can_resize ||

          d->ht[].used/d->ht[].size > dict_force_resize_ratio))

     {

         return dictExpand(d, d->ht[].used*);

     }

     return DICT_OK;

 }

函数名前面带下划线的都表示这是private的。程序第4行又是先判断是否正在进行Rehash，

为什么要说又呢

如果哈希表是空的，那么我们扩展到DICT_HT_INITIAL_SIZE（4）个。

第13行有点不理解，used什么时候会大于size啊？？？？标记一下，以后再看。

dict_can_resize是个全局变量。dict_force_resize_ratio = 5.

/* Using dictEnableResize() / dictDisableResize() we make possible to

* enable/disable resizing of the hash table as needed. This is very important

* for Redis, as we use copy-on-write and don't want to move too much memory

* around when there is a child performing saving operations.

*

* Note that even when dict_can_resize is set to 0, not all resizes are

* prevented: an hash table is still allowed to grow if the ratio between

* the number of elements and the buckets > dict_force_resize_ratio. */

 void dictEnableResize(void) {

     dict_can_resize = ;

 }

 void dictDisableResize(void) {

     dict_can_resize = ;

 }

字典的 rehash 操作实际上就是执行以下任务：

创建一个比 ht[0]->table 更大的 ht[1]->table ；

将 ht[0]->table 中的所有键值对迁移到 ht[1]->table ；

将原有 ht[0] 的数据清空，并将 ht[1] 替换为新的 ht[0] ；

经过以上步骤之后，程序就在不改变原有键值对数据的基础上，增大了哈希表的大小。

--------------6月22日---------------------------

先上Rehash的代码

 int dictRehash(dict *d, int n) {

     if (!dictIsRehashing(d)) return ;

     while(n--) {

         dictEntry *de, *nextde;

         /* Check if we already rehashed the whole table... */

         if (d->ht[].used == ) {

             zfree(d->ht[].table);

             d->ht[] = d->ht[];

             _dictReset(&d->ht[]);

             d->rehashidx = -;

             return ;

         }

         /* Note that rehashidx can't overflow as we are sure there are more

          * elements because ht[0].used != 0 */

         assert(d->ht[].size > (unsigned)d->rehashidx);

         while(d->ht[].table[d->rehashidx] == NULL) d->rehashidx++;

         de = d->ht[].table[d->rehashidx];

         /* Move all the keys in this bucket from the old to the new hash HT */

         while(de) {

             unsigned int h;

             nextde = de->next;

             /* Get the index in the new hash table */

             h = dictHashKey(d, de->key) & d->ht[].sizemask;

             de->next = d->ht[].table[h];

             d->ht[].table[h] = de;

             d->ht[].used--;

             d->ht[].used++;

             de = nextde;

         }

         d->ht[].table[d->rehashidx] = NULL;

         d->rehashidx++;

     }

     return ;

 }

n步Rehash，在ht[0]中找到第一个不为空的table[rehashidx]，将这个位置的链表（可能只有一个元素）全部移到ht[1]中，并更新ht[0].used、ht[1].used。

执行过程中，ht[0]中的元素如果都已经转到了ht[1]中，即ht[0].used == 0，停止执行，释放ht[0].table指向的空间，ht[1]变为ht[0]，将rehashidx置为-1。

字典还剩一小部分，大体意思我弄懂了，加上之前看的动态字符串sds、双向链表adlist，加上空格注释统计了下共2248行。

   adlist.c

    adlist.h

   dict.c

   dict.h

   sds.c

    sds.h

  total

主要参考了《Redis 设计与实现》。谢谢90后作者了。

Redis源码研究--字典的更多相关文章

Redis源码研究--启动过程
---------------------6月23日--------------------------- Redis启动入口即main函数在redis.c文件,伪代码如下: int main(int ...
Redis源码研究：哈希表 - 蕫的博客
[http://dongxicheng.org/nosql/redis-code-hashtable/] 1. Redis中的哈希表前面提到Redis是个key/value存储系统,学过数据结构的人 ...
Redis源码研究--redis.h
------------7月3日------------ /* The redisOp structure defines a Redis Operation, that is an instance ...
Redis源码研究--跳表
-------------6月29日-------------------- 简单看了下跳表这一数据结构,理解起来很真实,效率可以和红黑树相比.我就喜欢这样的. typedef struct zski ...
Redis源码研究--字符串
之前看的内容,占个位子,以后补上. ------------8月2日------------- 好久没看了,惭愧,今天抽了点时间重新看了Redis的字符串,一边写博客,一边看. Redis的字符串主要 ...
Redis源码研究—基础知识
1. Redis 是什么 Redis是一个开源的使用ANSI C语言编写的基于内存的key/value存储系统,与memcache类似,但它支持的value类型更多,包括:字符串(string).链表 ...
Redis源码研究--双向链表
之前看的内容,占个位子,以后补上. ----------8月4日--------------- 双向链表这部分看的比较爽,代码写的中规中矩,心里窃喜,跟之前学的<数据结构>这本书中差不多. ...
［Redis源码阅读］dict字典的实现
dict的用途 dict是一种用于保存键值对的抽象数据结构,在redis中使用非常广泛,比如数据库.哈希结构的底层. 当执行下面这个命令: > set msg "hello" ...
redis 5.0.7 源码阅读——字典dict
redis中字典相关的文件为:dict.h与dict.c 与其说是一个字典,道不如说是一个哈希表. 一.数据结构 dictEntry typedef struct dictEntry { void * ...

随机推荐

(easy)LeetCode 205.Isomorphic Strings (*)
Given two strings s and t, determine if they are isomorphic. Two strings are isomorphic if the chara ...
ORACLE错误一览表【转】
http://blog.itpub.net/26892340/viewspace-722178/
chrome 修改标签页
插件名称:New Tab Redirect 标签格式:"file:///home/user/index.html"
增量升级（省流量更新）的Android客户端实现
转载与 zhouhuiah的专栏 http://blog.csdn.net/zhouhuiah/article/details/16939937 本文在以上两篇博客的基础上再增加了异常处理,并将生成的 ...
让执行程序引用特定目录下的Dll
当写一个软件,特别是大型的软件,经常会引用一些第三方的类库,再加上一些自己的项目,如果这些Dll全都放在主目录下的话,会显得比较杂乱.我们希望将项目的类库分类成文件夹存放,这样才显得比较整洁. 解决方 ...
Hibernate和JDBC、EJB比较
参考:http://m.blog.csdn.net/article/details?id=7228061 一.Hibernate是JDBC的轻量级的对象封装,它是一个独立的对象持久层框架,和App S ...
DoTween使用
官网:http://dotween.demigiant.com/ 1.step 这里使用lamda表达式,通过dotween的to方法将其移动到 Vector3(348, 196, 0)的值返回到Ve ...
关于lambda表达式在javascript中的使用
了解过js函数的同学应该都知道js的函数有很多种创建方式. 如: function fun(){}: var fun=function(){}: 但最近的学习中发现了lambda表达式型的创建js的匿 ...
Flash图表控件FusionCharts自定义图表y轴最大/最小值
自定义图表y轴的最大值和最小值用户可以使用FusionCharts图表中<chart>元素的yAxisMaxValue和yAxisMinValue属性设置图表限制. 示例: <ch ...
Hibernate 常见异常
Hibernate 常见异常net.sf.hibernate.MappingException 当出现net.sf.hibernate.MappingException: Error r ...

Redis源码研究--字典

Redis源码研究--字典的更多相关文章

随机推荐

热门专题