Python 2.7的字典实现简化版(C语言)
这是一个能自动调整大小的哈希字典,外部接口实现了下列功能.
1.字典级别:
创建字典 dict_new
归零字典 dict_clear
2.键值级别:
查找 dict_search
强制查找 dict_force_search
更新 dict_update
添加 dict_add
删除 dict_del
所谓强制查找就是假如key不存在,那么它将先在字典中添加这个key,值设置为默认值,再返回这个值的指针.
由于键值都是以空指针定义的,所以在处理一些简单的值类型时(如int),显得繁琐了些(比如valcmp),但好处是更加灵活了,比如稍作修改(valdup和get_default_val)就可以处理值为字符串的情况.
C确实很快,但繁重的内存管理果然名不虚传.这个简单的字典要求:
1.键(me_key)和值(me_value)的指针所指向的堆内存区域能够直接用free释放,如果这些区域还包含另一些堆指针,那么可能会出问题.
2.只需传递缓冲数据(main中的keybuf和valbuf)给键值函数,函数内部会根据情况申请或释放内存,或不做任何处理.
为方便处理,words文本格式要求每行一个词语.
/* Pure C simple version of python 2.7.8 hash table */
/* Sample usage: see main() */
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <string.h>
#define PyDict_MINSIZE 8
#define PERTURB_SHIFT 5
#define NEED_RESIZE(mp) ((mp)->ma_fill * 3 >= ((mp)->ma_mask + 1) * 2) typedef void PyObject; typedef struct {
size_t me_hash;
PyObject *me_key;
PyObject *me_value;
} PyDictEntry; typedef struct _dictobject PyDictObject;
struct _dictobject {
size_t ma_fill; /* # Active + # Dummy */
size_t ma_used; /* # Active */
size_t ma_mask;
PyDictEntry *ma_table;
size_t(*ma_keyhash)(PyObject *key);
int(*ma_keycmp)(PyObject *key1, PyObject *key2);
PyObject *(*ma_keydup)(PyObject *key);
PyObject *(*ma_valuedup)(PyObject *value);
PyObject *(*ma_default)(void);
}; /* Object used as dummy key to fill deleted entries */
static PyDictEntry _dummy_struct;
#define dummy (&_dummy_struct) static size_t
keyhash(PyObject *_key)
{
char *key = (char *)_key;
size_t hash = ;
for (; *key; key++)
hash = ((hash << ) + hash) + *key; /* hash * 33 + c */
return hash;
} static int
keycmp(PyObject *_key1, PyObject *_key2)
{
char *key1 = (char *)_key1;
char *key2 = (char *)_key2;
for (; *key1 == *key2; key1++, key2++)
if (*key1 == '\0')
return ;
return *key1 - *key2;
} static PyObject *
keydup(PyObject *key)
{
return (PyObject *)strdup((char *)key);
} static PyObject *
valuedup(PyObject *_value)
{
size_t *value = (size_t *)malloc(sizeof(size_t));
*value = *(size_t *)_value;
return (PyObject *)value;
} static PyObject *
get_default_value(void)
{
size_t *value = (size_t *)malloc(sizeof(size_t));
*value = ;
return (PyObject *)value;
} PyDictObject *
dict_new_custom(size_t ma_size,
size_t(*ma_keyhash)(PyObject *key),
int(*ma_keycmp)(PyObject *key1, PyObject *key2),
PyObject * (*ma_keydup)(PyObject *key),
PyObject * (*ma_valuedup)(PyObject *value),
PyObject * (*ma_default)(void))
{
PyDictObject *mp;
mp = (PyDictObject *)malloc(sizeof(PyDictObject));
if (mp == NULL)
return NULL;
size_t newsize;
for (newsize = PyDict_MINSIZE;
newsize < ma_size && newsize > ;
newsize <<= )
;
PyDictEntry *newtable = (PyDictEntry*)malloc(sizeof(PyDictEntry) * newsize);
if (newtable == NULL)
return NULL;
memset(newtable, , sizeof(PyDictEntry)* newsize);
mp->ma_table = newtable;
mp->ma_mask = newsize - ;
mp->ma_fill = mp->ma_used = ;
mp->ma_keyhash = ma_keyhash ? ma_keyhash : keyhash;
mp->ma_keycmp = ma_keycmp ? ma_keycmp : keycmp;
mp->ma_keydup = ma_keydup ? ma_keydup : keydup;
mp->ma_valuedup = ma_valuedup ? ma_valuedup : valuedup;
mp->ma_default = ma_default ? ma_default : get_default_value;
return mp;
} PyDictObject *
dict_new(void)
{
return dict_new_custom(, , , , , );
} /*intern basic search method, used by other fucntions*/
static PyDictEntry *
lookdict(PyDictObject *mp, PyObject *key, size_t hash)
{
size_t i;
size_t perturb;
PyDictEntry *freeslot;
size_t mask = mp->ma_mask;
PyDictEntry *ep0 = mp->ma_table;
PyDictEntry *ep;
i = (size_t)hash & mask;
ep = &ep0[i];
if (ep->me_key == NULL || ep->me_key == key)
return ep;
if (ep->me_key == dummy)
freeslot = ep;
else if (ep->me_hash == hash
&& mp->ma_keycmp(ep->me_key, key) == )
return ep;
else
freeslot = NULL;
for (perturb = hash;; perturb >>= PERTURB_SHIFT) {
i = (i << ) + i + perturb + ;
ep = &ep0[i & mask];
if (ep->me_key == NULL)
return freeslot == NULL ? ep : freeslot;
if (ep->me_key == key
|| (ep->me_hash == hash
&& ep->me_key != dummy
&& mp->ma_keycmp(ep->me_key, key) == ))
return ep;
if (ep->me_key == dummy && freeslot == NULL)
freeslot = ep;
}
assert(); /* NOT REACHED */
return ;
} /*faster method used when no dummy key exists in table*/
static PyDictEntry *
lookdict_nodummy(PyDictObject *mp, PyObject *key, size_t hash)
{
size_t i;
size_t perturb;
size_t mask = mp->ma_mask;
PyDictEntry *ep0 = mp->ma_table;
PyDictEntry *ep;
i = (size_t)hash & mask;
ep = &ep0[i];
if (ep->me_key == NULL
|| ep->me_key == key
|| (ep->me_hash == hash && mp->ma_keycmp(ep->me_key, key) == ))
return ep;
for (perturb = hash;; perturb >>= PERTURB_SHIFT) {
i = (i << ) + i + perturb + ;
ep = &ep0[i & mask];
if (ep->me_key == NULL
|| ep->me_key == key
|| (ep->me_hash == hash && mp->ma_keycmp(ep->me_key, key) == ))
return ep;
}
assert(); /* NOT REACHED */
return ;
} /*intern fast function to insert item when no dummy key exists in table*/
static void
insertdict_clean(PyDictObject *mp, PyObject *key, size_t hash, PyObject *value)
{
size_t i;
size_t perturb;
size_t mask = mp->ma_mask;
PyDictEntry *ep0 = mp->ma_table;
PyDictEntry *ep;
i = (size_t)hash & mask;
ep = &ep0[i];
for (perturb = hash; ep->me_key != NULL; perturb >>= PERTURB_SHIFT) {
i = (i << ) + i + perturb + ;
ep = &ep0[i & mask];
}
mp->ma_fill++;
mp->ma_used++;
ep->me_key = key;
ep->me_hash = hash;
ep->me_value = value;
} /*
Restructure the table by allocating a new table and reinserting all
items again. When entries have been deleted, the new table may
actually be smaller than the old one.
*/
static int
dict_resize(PyDictObject *mp, size_t minused)
{
size_t newsize;
PyDictEntry *oldtable, *newtable, *ep;
oldtable = mp->ma_table;
/* Find the smallest table size > minused. */
for (newsize = PyDict_MINSIZE;
newsize <= minused && newsize > ;
newsize <<= )
;
/* Get space for a new table. */
newtable = (PyDictEntry*)malloc(sizeof(PyDictEntry) * newsize);
if (newtable == NULL)
return -;
memset(newtable, , sizeof(PyDictEntry)* newsize);
mp->ma_table = newtable;
mp->ma_mask = newsize - ;
size_t used = mp->ma_used;
mp->ma_used = ;
mp->ma_fill = ;
for (ep = oldtable; used > ; ep++) {
/* only active entry */
if (ep->me_value != NULL) {
used--;
insertdict_clean(mp, ep->me_key, ep->me_hash, ep->me_value);
}
}
free(oldtable);
return ;
} PyObject *
dict_search(PyDictObject *mp, PyObject *key)
{
assert(key);
size_t hash = mp->ma_keyhash(key);
PyDictEntry *ep = lookdict(mp, key, hash);
return ep->me_value;
} int
dict_contain(PyDictObject *mp, PyObject *key)
{
return dict_search(mp, key) ? : ;
} int
dict_add(PyDictObject *mp, PyObject *key, PyObject *value)
{
assert(key);
assert(value);
size_t hash = mp->ma_keyhash(key);
PyDictEntry *ep = lookdict(mp, key, hash);
/*only for non-existing keys*/
assert(ep->me_value == NULL);
PyObject *old_key = ep->me_key;
if ((ep->me_key = mp->ma_keydup(key)) == NULL)
return -;
if ((ep->me_value = mp->ma_valuedup(value)) == NULL) {
free(ep->me_key);
return -;
}
if (old_key == NULL)
mp->ma_fill++;
mp->ma_used++;
ep->me_hash = hash;
if (NEED_RESIZE(mp))
return dict_resize(mp, (mp->ma_used > ? : ) * mp->ma_used);
return ;
} int
dict_update(PyDictObject *mp, PyObject *key, PyObject *value)
{
assert(key);
assert(value);
size_t hash = mp->ma_keyhash(key);
PyDictEntry *ep = lookdict(mp, key, hash);
/*only for existing keys*/
assert(ep->me_value != NULL);
PyObject *old_value = ep->me_value;
if ((ep->me_value = mp->ma_valuedup(value)) == NULL)
return -;
free(old_value);
return ;
} int
dict_del(PyDictObject *mp, PyObject *key)
{
assert(key);
size_t hash = mp->ma_keyhash(key);
PyDictEntry *ep = lookdict(mp, key, hash);
/*only for existing keys*/
assert(ep->me_value != NULL);
free(ep->me_key);
free(ep->me_value);
ep->me_key = dummy;
ep->me_value = NULL;
mp->ma_used--;
return ;
} PyObject *
dict_force_search(PyDictObject *mp, PyObject *key)
{
assert(key);
size_t hash = mp->ma_keyhash(key);
PyDictEntry *ep = lookdict(mp, key, hash);
if (ep->me_value == NULL) {
PyObject *old_key = ep->me_key;
if ((ep->me_key = mp->ma_keydup(key)) == NULL)
return NULL;
if ((ep->me_value = mp->ma_default()) == NULL) {
free(ep->me_key);
return NULL;
}
if (old_key == NULL)
mp->ma_fill++;
mp->ma_used++;
ep->me_hash = hash;
if (NEED_RESIZE(mp)) {
dict_resize(mp, (mp->ma_used > ? : ) * mp->ma_used);
ep = lookdict_nodummy(mp, key, hash);
}
}
return ep->me_value;
} void
dict_clear(PyDictObject *mp)
{
PyDictEntry *table = mp->ma_table;
assert(table != NULL);
size_t used = mp->ma_used;
if (mp->ma_fill == )
return;
PyDictEntry *ep;
for (ep = table; used > ; ep++) {
/*only free active entry, this is different from Python 2.7*/
if (ep->me_value != NULL) {
used--;
free(ep->me_key);
free(ep->me_value);
}
}
memset(table, , sizeof(PyDictEntry) * (mp->ma_mask + ));
} size_t
dict_len(PyDictObject *mp)
{
return mp->ma_used;
} /*helper function for sorting a PyDictEntry by its value*/
static int
_valcmp(const void *a, const void *b)
{
return *(size_t *)(*(PyDictEntry *)a).me_value > *(size_t *)(*
(PyDictEntry *)b).me_value ? - : ;
} /*print key value pair by value DESC order*/
static void
print_all_by_value_desc(PyDictObject *mp)
{
PyDictEntry *ep;
PyDictEntry *temp_table = (PyDictEntry *)malloc(sizeof(PyDictEntry) *
(mp->ma_used));
size_t i = , used = mp->ma_used;
for (ep = mp->ma_table; used > ; ep++) {
if (ep->me_value != NULL) {
used--;
temp_table[i++] = *ep;
}
}
used = mp->ma_used;
qsort(temp_table, used, sizeof(temp_table[]), _valcmp);
for (i = ; i < used; i++)
fprintf(stdout, "%s\t%d\n", (char *)temp_table[i].me_key,
*(size_t *)temp_table[i].me_value);
free(temp_table);
} void printd(PyDictObject *mp)
{
PyDictEntry *ep;
size_t used = mp->ma_used;
for (ep = mp->ma_table; used > ; ep++) {
if (ep->me_value) {
used--;
fprintf(stdout, "%s\t%d\t%u\n", (char *)ep->me_key, *(size_t *)ep->me_value,
ep->me_hash);
} else if (ep->me_key == dummy) {
fprintf(stdout, "it is a dummy key! it's hash is %u\n", ep->me_hash);
}
}
} /*scan words from stdin, print total amount for each word by DESC order*/
int main(void)
{
//PyDictObject *mp = dict_new_custom(32, 0, 0, 0, 0, 0);
PyDictObject *mp = dict_new();
FILE *fp;
fp = fopen("words", "r");
char keybuf[];
size_t valuebuf[] = { };
size_t *vp;
/* while (fscanf(stdin, "%s", keybuf) == 1) {
if (dict_contain(mp, keybuf)) {
vp = dict_search(mp, keybuf);
*vp += 1;
} else
dict_add(mp, keybuf, valuebuf);
}*/
while (fscanf(fp, "%s", keybuf) == ) {
vp = dict_force_search(mp, keybuf);
*vp += ;
} print_all_by_value_desc(mp);
//printd(mp);
dict_clear(mp);
fclose(fp);
free(mp);
return ;
}
Python 2.7的字典实现简化版(C语言)的更多相关文章
- 『Python基础-10』字典
# 『Python基础-10』字典 目录: 1.字典基本概念 2.字典键(key)的特性 3.字典的创建 4-7.字典的增删改查 8.遍历字典 1. 字典的基本概念 字典一种key - value 的 ...
- Python 优雅的操作字典【转】
Python 中的字典是Python中一个键值映射的数据结构,下面介绍一下如何优雅的操作字典. 1.1 创建字典 Python有两种方法可以创建字典,第一种是使用花括号,另一种是使用内建 函数dict ...
- 初学Python(三)——字典
初学Python(三)——字典 初学Python,主要整理一些学习到的知识点,这次是字典. #-*- coding:utf-8 -*- d = {1:"name",2:" ...
- python编程基础知识—字典
字典 在python中,字典是一系列键-值对,每个键都与一个值相关联,可使用键来访问相关联的值.与键相关联的值可以是数字.字符串.列表乃至字典,即可将任何python对象用在字典中的值. 在pytho ...
- python调用数据返回字典dict数据的现象2
python调用数据返回字典dict数据的现象2 思考: 话题1连接:https://www.cnblogs.com/zwgbk/p/10248479.html在打印和添加时候加上内存地址id(),可 ...
- python调用数据返回字典dict数据的现象1
python调用数据返回字典dict数据的现象1 思考: 可以看到这两种情况,区别在于构造函数make()里赋值给字典dict的方式不同.使用相同的调用方式,而结果却完全不同.可以看到第二种情况才是我 ...
- python基本数据类型之字典
python基本数据类型之字典 python中的字典是以键(key)值(value)对的形式储存数据,基本形式如下: d = {'Bart': 95, 'Michael': 34, 'Lisa': 5 ...
- Python 优雅的操作字典
Python 中的字典是Python中一个键值映射的数据结构,下面介绍一下如何优雅的操作字典. 来源:https://www.linuxzen.com/python-you-ya-de-cao-zuo ...
- Python数据类型详解——字典
Python数据类型详解--字典 引子 已经学习了列表,现在有个需求--把公司每个员工的姓名.年龄.职务.工资存到列表里,你怎么存? staff_list = [ ["Kwan", ...
随机推荐
- spring data学习
在Spring Data模块中定义依赖: <dependencies> <dependency> <groupId>org.springframework.data ...
- [HNOI 2001]矩阵乘积
Description Input Output Sample Input 1 2 3 4 2 3 1 1 3 1 4 5 2 2 1 3 1 2 1 2 2 2 1 1 3 1 2 3 2 4 1 ...
- hdu 5113(2014北京—搜索+剪枝)
题意:有N*M的棋盘,用K种颜色去染,要求相邻块不能同色.已知每种颜色要染的块数,问能不能染,如果能,输出任一种染法. 最开始dfs失败了- -,优先搜索一行,搜完后进入下一列,超时.本来以为搜索不行 ...
- hdu 5468(莫比乌斯+搜索)
hdu 5468 Puzzled Elena /*快速通道*/ Sample Input 5 1 2 1 3 2 4 2 5 6 2 3 4 5 Sample Output Case #1: ...
- hdu 4609 (FFT求解三角形)
Problem Description King OMeGa catched three men who had been streaking in the street. Looking as id ...
- day5 liaoxuefeng---virtualenv、图形界面、网络编程、电子邮件
一.virtualenv 二.图形界面 三.网络编程 四.电子邮件
- HashMap实现原理和源码解析
哈希表(hash table)也叫散列表,是一种非常重要的数据结构.许多缓存技术(比如memcached)的核心其实就是在内存中维护一张大的哈希表,本文会对java集合框架中的对应实现HashMap的 ...
- Linux学习之CentOS(十)----Linux 的账号与群组
Linux 的账号与群组 管理员的工作中,相当重要的一环就是『管理账号』啦!因为整个系统都是你在管理的, 并且所有一般用户的账号申请,都必须要透过你的协助才行!所以你就必须要了解一下如何管理好一个服务 ...
- Oracle trunc()函数的用法及四舍五入 round函数
--Oracle trunc()函数的用法/**************日期********************/1.select trunc(sysdate) from dual --2011 ...
- JQuery when() done() then()
jQuery.when(deferreds) 参数deferreds,一个或多个延时对象或JS对象,我们初略的认为它就是一个或多个异步请求. 例如:$.when($.ajax("page1. ...