Python 2.7的字典实现简化版(C语言)
这是一个能自动调整大小的哈希字典,外部接口实现了下列功能.
1.字典级别:
创建字典 dict_new
归零字典 dict_clear
2.键值级别:
查找 dict_search
强制查找 dict_force_search
更新 dict_update
添加 dict_add
删除 dict_del
所谓强制查找就是假如key不存在,那么它将先在字典中添加这个key,值设置为默认值,再返回这个值的指针.
由于键值都是以空指针定义的,所以在处理一些简单的值类型时(如int),显得繁琐了些(比如valcmp),但好处是更加灵活了,比如稍作修改(valdup和get_default_val)就可以处理值为字符串的情况.
C确实很快,但繁重的内存管理果然名不虚传.这个简单的字典要求:
1.键(me_key)和值(me_value)的指针所指向的堆内存区域能够直接用free释放,如果这些区域还包含另一些堆指针,那么可能会出问题.
2.只需传递缓冲数据(main中的keybuf和valbuf)给键值函数,函数内部会根据情况申请或释放内存,或不做任何处理.
为方便处理,words文本格式要求每行一个词语.
/* Pure C simple version of python 2.7.8 hash table */
/* Sample usage: see main() */
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <string.h>
#define PyDict_MINSIZE 8
#define PERTURB_SHIFT 5
#define NEED_RESIZE(mp) ((mp)->ma_fill * 3 >= ((mp)->ma_mask + 1) * 2) typedef void PyObject; typedef struct {
size_t me_hash;
PyObject *me_key;
PyObject *me_value;
} PyDictEntry; typedef struct _dictobject PyDictObject;
struct _dictobject {
size_t ma_fill; /* # Active + # Dummy */
size_t ma_used; /* # Active */
size_t ma_mask;
PyDictEntry *ma_table;
size_t(*ma_keyhash)(PyObject *key);
int(*ma_keycmp)(PyObject *key1, PyObject *key2);
PyObject *(*ma_keydup)(PyObject *key);
PyObject *(*ma_valuedup)(PyObject *value);
PyObject *(*ma_default)(void);
}; /* Object used as dummy key to fill deleted entries */
static PyDictEntry _dummy_struct;
#define dummy (&_dummy_struct) static size_t
keyhash(PyObject *_key)
{
char *key = (char *)_key;
size_t hash = ;
for (; *key; key++)
hash = ((hash << ) + hash) + *key; /* hash * 33 + c */
return hash;
} static int
keycmp(PyObject *_key1, PyObject *_key2)
{
char *key1 = (char *)_key1;
char *key2 = (char *)_key2;
for (; *key1 == *key2; key1++, key2++)
if (*key1 == '\0')
return ;
return *key1 - *key2;
} static PyObject *
keydup(PyObject *key)
{
return (PyObject *)strdup((char *)key);
} static PyObject *
valuedup(PyObject *_value)
{
size_t *value = (size_t *)malloc(sizeof(size_t));
*value = *(size_t *)_value;
return (PyObject *)value;
} static PyObject *
get_default_value(void)
{
size_t *value = (size_t *)malloc(sizeof(size_t));
*value = ;
return (PyObject *)value;
} PyDictObject *
dict_new_custom(size_t ma_size,
size_t(*ma_keyhash)(PyObject *key),
int(*ma_keycmp)(PyObject *key1, PyObject *key2),
PyObject * (*ma_keydup)(PyObject *key),
PyObject * (*ma_valuedup)(PyObject *value),
PyObject * (*ma_default)(void))
{
PyDictObject *mp;
mp = (PyDictObject *)malloc(sizeof(PyDictObject));
if (mp == NULL)
return NULL;
size_t newsize;
for (newsize = PyDict_MINSIZE;
newsize < ma_size && newsize > ;
newsize <<= )
;
PyDictEntry *newtable = (PyDictEntry*)malloc(sizeof(PyDictEntry) * newsize);
if (newtable == NULL)
return NULL;
memset(newtable, , sizeof(PyDictEntry)* newsize);
mp->ma_table = newtable;
mp->ma_mask = newsize - ;
mp->ma_fill = mp->ma_used = ;
mp->ma_keyhash = ma_keyhash ? ma_keyhash : keyhash;
mp->ma_keycmp = ma_keycmp ? ma_keycmp : keycmp;
mp->ma_keydup = ma_keydup ? ma_keydup : keydup;
mp->ma_valuedup = ma_valuedup ? ma_valuedup : valuedup;
mp->ma_default = ma_default ? ma_default : get_default_value;
return mp;
} PyDictObject *
dict_new(void)
{
return dict_new_custom(, , , , , );
} /*intern basic search method, used by other fucntions*/
static PyDictEntry *
lookdict(PyDictObject *mp, PyObject *key, size_t hash)
{
size_t i;
size_t perturb;
PyDictEntry *freeslot;
size_t mask = mp->ma_mask;
PyDictEntry *ep0 = mp->ma_table;
PyDictEntry *ep;
i = (size_t)hash & mask;
ep = &ep0[i];
if (ep->me_key == NULL || ep->me_key == key)
return ep;
if (ep->me_key == dummy)
freeslot = ep;
else if (ep->me_hash == hash
&& mp->ma_keycmp(ep->me_key, key) == )
return ep;
else
freeslot = NULL;
for (perturb = hash;; perturb >>= PERTURB_SHIFT) {
i = (i << ) + i + perturb + ;
ep = &ep0[i & mask];
if (ep->me_key == NULL)
return freeslot == NULL ? ep : freeslot;
if (ep->me_key == key
|| (ep->me_hash == hash
&& ep->me_key != dummy
&& mp->ma_keycmp(ep->me_key, key) == ))
return ep;
if (ep->me_key == dummy && freeslot == NULL)
freeslot = ep;
}
assert(); /* NOT REACHED */
return ;
} /*faster method used when no dummy key exists in table*/
static PyDictEntry *
lookdict_nodummy(PyDictObject *mp, PyObject *key, size_t hash)
{
size_t i;
size_t perturb;
size_t mask = mp->ma_mask;
PyDictEntry *ep0 = mp->ma_table;
PyDictEntry *ep;
i = (size_t)hash & mask;
ep = &ep0[i];
if (ep->me_key == NULL
|| ep->me_key == key
|| (ep->me_hash == hash && mp->ma_keycmp(ep->me_key, key) == ))
return ep;
for (perturb = hash;; perturb >>= PERTURB_SHIFT) {
i = (i << ) + i + perturb + ;
ep = &ep0[i & mask];
if (ep->me_key == NULL
|| ep->me_key == key
|| (ep->me_hash == hash && mp->ma_keycmp(ep->me_key, key) == ))
return ep;
}
assert(); /* NOT REACHED */
return ;
} /*intern fast function to insert item when no dummy key exists in table*/
static void
insertdict_clean(PyDictObject *mp, PyObject *key, size_t hash, PyObject *value)
{
size_t i;
size_t perturb;
size_t mask = mp->ma_mask;
PyDictEntry *ep0 = mp->ma_table;
PyDictEntry *ep;
i = (size_t)hash & mask;
ep = &ep0[i];
for (perturb = hash; ep->me_key != NULL; perturb >>= PERTURB_SHIFT) {
i = (i << ) + i + perturb + ;
ep = &ep0[i & mask];
}
mp->ma_fill++;
mp->ma_used++;
ep->me_key = key;
ep->me_hash = hash;
ep->me_value = value;
} /*
Restructure the table by allocating a new table and reinserting all
items again. When entries have been deleted, the new table may
actually be smaller than the old one.
*/
static int
dict_resize(PyDictObject *mp, size_t minused)
{
size_t newsize;
PyDictEntry *oldtable, *newtable, *ep;
oldtable = mp->ma_table;
/* Find the smallest table size > minused. */
for (newsize = PyDict_MINSIZE;
newsize <= minused && newsize > ;
newsize <<= )
;
/* Get space for a new table. */
newtable = (PyDictEntry*)malloc(sizeof(PyDictEntry) * newsize);
if (newtable == NULL)
return -;
memset(newtable, , sizeof(PyDictEntry)* newsize);
mp->ma_table = newtable;
mp->ma_mask = newsize - ;
size_t used = mp->ma_used;
mp->ma_used = ;
mp->ma_fill = ;
for (ep = oldtable; used > ; ep++) {
/* only active entry */
if (ep->me_value != NULL) {
used--;
insertdict_clean(mp, ep->me_key, ep->me_hash, ep->me_value);
}
}
free(oldtable);
return ;
} PyObject *
dict_search(PyDictObject *mp, PyObject *key)
{
assert(key);
size_t hash = mp->ma_keyhash(key);
PyDictEntry *ep = lookdict(mp, key, hash);
return ep->me_value;
} int
dict_contain(PyDictObject *mp, PyObject *key)
{
return dict_search(mp, key) ? : ;
} int
dict_add(PyDictObject *mp, PyObject *key, PyObject *value)
{
assert(key);
assert(value);
size_t hash = mp->ma_keyhash(key);
PyDictEntry *ep = lookdict(mp, key, hash);
/*only for non-existing keys*/
assert(ep->me_value == NULL);
PyObject *old_key = ep->me_key;
if ((ep->me_key = mp->ma_keydup(key)) == NULL)
return -;
if ((ep->me_value = mp->ma_valuedup(value)) == NULL) {
free(ep->me_key);
return -;
}
if (old_key == NULL)
mp->ma_fill++;
mp->ma_used++;
ep->me_hash = hash;
if (NEED_RESIZE(mp))
return dict_resize(mp, (mp->ma_used > ? : ) * mp->ma_used);
return ;
} int
dict_update(PyDictObject *mp, PyObject *key, PyObject *value)
{
assert(key);
assert(value);
size_t hash = mp->ma_keyhash(key);
PyDictEntry *ep = lookdict(mp, key, hash);
/*only for existing keys*/
assert(ep->me_value != NULL);
PyObject *old_value = ep->me_value;
if ((ep->me_value = mp->ma_valuedup(value)) == NULL)
return -;
free(old_value);
return ;
} int
dict_del(PyDictObject *mp, PyObject *key)
{
assert(key);
size_t hash = mp->ma_keyhash(key);
PyDictEntry *ep = lookdict(mp, key, hash);
/*only for existing keys*/
assert(ep->me_value != NULL);
free(ep->me_key);
free(ep->me_value);
ep->me_key = dummy;
ep->me_value = NULL;
mp->ma_used--;
return ;
} PyObject *
dict_force_search(PyDictObject *mp, PyObject *key)
{
assert(key);
size_t hash = mp->ma_keyhash(key);
PyDictEntry *ep = lookdict(mp, key, hash);
if (ep->me_value == NULL) {
PyObject *old_key = ep->me_key;
if ((ep->me_key = mp->ma_keydup(key)) == NULL)
return NULL;
if ((ep->me_value = mp->ma_default()) == NULL) {
free(ep->me_key);
return NULL;
}
if (old_key == NULL)
mp->ma_fill++;
mp->ma_used++;
ep->me_hash = hash;
if (NEED_RESIZE(mp)) {
dict_resize(mp, (mp->ma_used > ? : ) * mp->ma_used);
ep = lookdict_nodummy(mp, key, hash);
}
}
return ep->me_value;
} void
dict_clear(PyDictObject *mp)
{
PyDictEntry *table = mp->ma_table;
assert(table != NULL);
size_t used = mp->ma_used;
if (mp->ma_fill == )
return;
PyDictEntry *ep;
for (ep = table; used > ; ep++) {
/*only free active entry, this is different from Python 2.7*/
if (ep->me_value != NULL) {
used--;
free(ep->me_key);
free(ep->me_value);
}
}
memset(table, , sizeof(PyDictEntry) * (mp->ma_mask + ));
} size_t
dict_len(PyDictObject *mp)
{
return mp->ma_used;
} /*helper function for sorting a PyDictEntry by its value*/
static int
_valcmp(const void *a, const void *b)
{
return *(size_t *)(*(PyDictEntry *)a).me_value > *(size_t *)(*
(PyDictEntry *)b).me_value ? - : ;
} /*print key value pair by value DESC order*/
static void
print_all_by_value_desc(PyDictObject *mp)
{
PyDictEntry *ep;
PyDictEntry *temp_table = (PyDictEntry *)malloc(sizeof(PyDictEntry) *
(mp->ma_used));
size_t i = , used = mp->ma_used;
for (ep = mp->ma_table; used > ; ep++) {
if (ep->me_value != NULL) {
used--;
temp_table[i++] = *ep;
}
}
used = mp->ma_used;
qsort(temp_table, used, sizeof(temp_table[]), _valcmp);
for (i = ; i < used; i++)
fprintf(stdout, "%s\t%d\n", (char *)temp_table[i].me_key,
*(size_t *)temp_table[i].me_value);
free(temp_table);
} void printd(PyDictObject *mp)
{
PyDictEntry *ep;
size_t used = mp->ma_used;
for (ep = mp->ma_table; used > ; ep++) {
if (ep->me_value) {
used--;
fprintf(stdout, "%s\t%d\t%u\n", (char *)ep->me_key, *(size_t *)ep->me_value,
ep->me_hash);
} else if (ep->me_key == dummy) {
fprintf(stdout, "it is a dummy key! it's hash is %u\n", ep->me_hash);
}
}
} /*scan words from stdin, print total amount for each word by DESC order*/
int main(void)
{
//PyDictObject *mp = dict_new_custom(32, 0, 0, 0, 0, 0);
PyDictObject *mp = dict_new();
FILE *fp;
fp = fopen("words", "r");
char keybuf[];
size_t valuebuf[] = { };
size_t *vp;
/* while (fscanf(stdin, "%s", keybuf) == 1) {
if (dict_contain(mp, keybuf)) {
vp = dict_search(mp, keybuf);
*vp += 1;
} else
dict_add(mp, keybuf, valuebuf);
}*/
while (fscanf(fp, "%s", keybuf) == ) {
vp = dict_force_search(mp, keybuf);
*vp += ;
} print_all_by_value_desc(mp);
//printd(mp);
dict_clear(mp);
fclose(fp);
free(mp);
return ;
}
Python 2.7的字典实现简化版(C语言)的更多相关文章
- 『Python基础-10』字典
# 『Python基础-10』字典 目录: 1.字典基本概念 2.字典键(key)的特性 3.字典的创建 4-7.字典的增删改查 8.遍历字典 1. 字典的基本概念 字典一种key - value 的 ...
- Python 优雅的操作字典【转】
Python 中的字典是Python中一个键值映射的数据结构,下面介绍一下如何优雅的操作字典. 1.1 创建字典 Python有两种方法可以创建字典,第一种是使用花括号,另一种是使用内建 函数dict ...
- 初学Python(三)——字典
初学Python(三)——字典 初学Python,主要整理一些学习到的知识点,这次是字典. #-*- coding:utf-8 -*- d = {1:"name",2:" ...
- python编程基础知识—字典
字典 在python中,字典是一系列键-值对,每个键都与一个值相关联,可使用键来访问相关联的值.与键相关联的值可以是数字.字符串.列表乃至字典,即可将任何python对象用在字典中的值. 在pytho ...
- python调用数据返回字典dict数据的现象2
python调用数据返回字典dict数据的现象2 思考: 话题1连接:https://www.cnblogs.com/zwgbk/p/10248479.html在打印和添加时候加上内存地址id(),可 ...
- python调用数据返回字典dict数据的现象1
python调用数据返回字典dict数据的现象1 思考: 可以看到这两种情况,区别在于构造函数make()里赋值给字典dict的方式不同.使用相同的调用方式,而结果却完全不同.可以看到第二种情况才是我 ...
- python基本数据类型之字典
python基本数据类型之字典 python中的字典是以键(key)值(value)对的形式储存数据,基本形式如下: d = {'Bart': 95, 'Michael': 34, 'Lisa': 5 ...
- Python 优雅的操作字典
Python 中的字典是Python中一个键值映射的数据结构,下面介绍一下如何优雅的操作字典. 来源:https://www.linuxzen.com/python-you-ya-de-cao-zuo ...
- Python数据类型详解——字典
Python数据类型详解--字典 引子 已经学习了列表,现在有个需求--把公司每个员工的姓名.年龄.职务.工资存到列表里,你怎么存? staff_list = [ ["Kwan", ...
随机推荐
- 网络流入门-POJ1459PowerNetwork-Dinic模板
(我有什么错误或者你有什么意见,欢迎留言或私聊!谢谢!) (Ps:以前听说过网络流,想着以后再学,这次中南多校赛也碰到有关网络流的题目,想着这两天试着学学这个吧~~ 这是本人网络流入门第二题,不知道怎 ...
- win10+ ubuntu12.04双系统安装教程与遇到的问题
1. 准备ISO. 参考:网站http://mirrors.ustc.edu.cn/ubuntu-releases/precise/ 下载 ubuntu-12.04.5-desktop-amd64.i ...
- [AHOI 2016初中组]迷宫
Description 小雪和小可可被困在了一个无限大的迷宫中. 已经知道这个迷宫有 N 堵环状的墙,如果把整个迷宫看作是一个二维平面,那么每一堵墙都是平面上一个圆.任意两个圆不相交,不重合,也不会相 ...
- 计蒜客NOIP模拟赛(2)D1T3 深黑幻想
[问题描述] 凡终于发愤图强,决定专心搞OI,不再玩纸牌和坑钱了!没过多久就飘飘然了,总是陷入自己进了集训队的深黑幻想之中. 样听说了之后,决定考一考凡欧拉回路怎么写.样:“我给你出一道题 ...
- ●BOZJ 3144 [Hnoi2013]切糕
题链: http://www.lydsy.com/JudgeOnline/problem.php?id=3144 题解: "这是一个经典的最小割模型" ---引用自别人的博客 .. ...
- 【USACO12JAN】视频游戏的连击Video Game Combos
题目描述 Bessie is playing a video game! In the game, the three letters 'A', 'B', and 'C' are the only v ...
- hdu 3436 线段树 一顿操作
Queue-jumpers Time Limit: 2000/1000 MS (Java/Others) Memory Limit: 32768/32768 K (Java/Others) To ...
- bzoj2149拆迁队 斜率优化dp+分治
2149: 拆迁队 Time Limit: 10 Sec Memory Limit: 259 MBSubmit: 397 Solved: 177[Submit][Status][Discuss] ...
- (MariaDB/MySQL)之DML(2):数据更新、删除
本文目录:1.update语句2.delete语句 2.1 单表删除 2.2 多表删除3.truncate table 1.update语句 update用于修改表中记录. # 单表更新语法: UPD ...
- 浅谈Java中的equals和==与hashCode
转载:https://www.cnblogs.com/dolphin0520/p/3592500.html 参考:http://blog.csdn.net/yinzhijiezhan/article/ ...