Python 2.7的字典实现简化版(C语言)
这是一个能自动调整大小的哈希字典,外部接口实现了下列功能.
1.字典级别:
创建字典 dict_new
归零字典 dict_clear
2.键值级别:
查找 dict_search
强制查找 dict_force_search
更新 dict_update
添加 dict_add
删除 dict_del
所谓强制查找就是假如key不存在,那么它将先在字典中添加这个key,值设置为默认值,再返回这个值的指针.
由于键值都是以空指针定义的,所以在处理一些简单的值类型时(如int),显得繁琐了些(比如valcmp),但好处是更加灵活了,比如稍作修改(valdup和get_default_val)就可以处理值为字符串的情况.
C确实很快,但繁重的内存管理果然名不虚传.这个简单的字典要求:
1.键(me_key)和值(me_value)的指针所指向的堆内存区域能够直接用free释放,如果这些区域还包含另一些堆指针,那么可能会出问题.
2.只需传递缓冲数据(main中的keybuf和valbuf)给键值函数,函数内部会根据情况申请或释放内存,或不做任何处理.
为方便处理,words文本格式要求每行一个词语.
/* Pure C simple version of python 2.7.8 hash table */
/* Sample usage: see main() */
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <string.h>
#define PyDict_MINSIZE 8
#define PERTURB_SHIFT 5
#define NEED_RESIZE(mp) ((mp)->ma_fill * 3 >= ((mp)->ma_mask + 1) * 2) typedef void PyObject; typedef struct {
size_t me_hash;
PyObject *me_key;
PyObject *me_value;
} PyDictEntry; typedef struct _dictobject PyDictObject;
struct _dictobject {
size_t ma_fill; /* # Active + # Dummy */
size_t ma_used; /* # Active */
size_t ma_mask;
PyDictEntry *ma_table;
size_t(*ma_keyhash)(PyObject *key);
int(*ma_keycmp)(PyObject *key1, PyObject *key2);
PyObject *(*ma_keydup)(PyObject *key);
PyObject *(*ma_valuedup)(PyObject *value);
PyObject *(*ma_default)(void);
}; /* Object used as dummy key to fill deleted entries */
static PyDictEntry _dummy_struct;
#define dummy (&_dummy_struct) static size_t
keyhash(PyObject *_key)
{
char *key = (char *)_key;
size_t hash = ;
for (; *key; key++)
hash = ((hash << ) + hash) + *key; /* hash * 33 + c */
return hash;
} static int
keycmp(PyObject *_key1, PyObject *_key2)
{
char *key1 = (char *)_key1;
char *key2 = (char *)_key2;
for (; *key1 == *key2; key1++, key2++)
if (*key1 == '\0')
return ;
return *key1 - *key2;
} static PyObject *
keydup(PyObject *key)
{
return (PyObject *)strdup((char *)key);
} static PyObject *
valuedup(PyObject *_value)
{
size_t *value = (size_t *)malloc(sizeof(size_t));
*value = *(size_t *)_value;
return (PyObject *)value;
} static PyObject *
get_default_value(void)
{
size_t *value = (size_t *)malloc(sizeof(size_t));
*value = ;
return (PyObject *)value;
} PyDictObject *
dict_new_custom(size_t ma_size,
size_t(*ma_keyhash)(PyObject *key),
int(*ma_keycmp)(PyObject *key1, PyObject *key2),
PyObject * (*ma_keydup)(PyObject *key),
PyObject * (*ma_valuedup)(PyObject *value),
PyObject * (*ma_default)(void))
{
PyDictObject *mp;
mp = (PyDictObject *)malloc(sizeof(PyDictObject));
if (mp == NULL)
return NULL;
size_t newsize;
for (newsize = PyDict_MINSIZE;
newsize < ma_size && newsize > ;
newsize <<= )
;
PyDictEntry *newtable = (PyDictEntry*)malloc(sizeof(PyDictEntry) * newsize);
if (newtable == NULL)
return NULL;
memset(newtable, , sizeof(PyDictEntry)* newsize);
mp->ma_table = newtable;
mp->ma_mask = newsize - ;
mp->ma_fill = mp->ma_used = ;
mp->ma_keyhash = ma_keyhash ? ma_keyhash : keyhash;
mp->ma_keycmp = ma_keycmp ? ma_keycmp : keycmp;
mp->ma_keydup = ma_keydup ? ma_keydup : keydup;
mp->ma_valuedup = ma_valuedup ? ma_valuedup : valuedup;
mp->ma_default = ma_default ? ma_default : get_default_value;
return mp;
} PyDictObject *
dict_new(void)
{
return dict_new_custom(, , , , , );
} /*intern basic search method, used by other fucntions*/
static PyDictEntry *
lookdict(PyDictObject *mp, PyObject *key, size_t hash)
{
size_t i;
size_t perturb;
PyDictEntry *freeslot;
size_t mask = mp->ma_mask;
PyDictEntry *ep0 = mp->ma_table;
PyDictEntry *ep;
i = (size_t)hash & mask;
ep = &ep0[i];
if (ep->me_key == NULL || ep->me_key == key)
return ep;
if (ep->me_key == dummy)
freeslot = ep;
else if (ep->me_hash == hash
&& mp->ma_keycmp(ep->me_key, key) == )
return ep;
else
freeslot = NULL;
for (perturb = hash;; perturb >>= PERTURB_SHIFT) {
i = (i << ) + i + perturb + ;
ep = &ep0[i & mask];
if (ep->me_key == NULL)
return freeslot == NULL ? ep : freeslot;
if (ep->me_key == key
|| (ep->me_hash == hash
&& ep->me_key != dummy
&& mp->ma_keycmp(ep->me_key, key) == ))
return ep;
if (ep->me_key == dummy && freeslot == NULL)
freeslot = ep;
}
assert(); /* NOT REACHED */
return ;
} /*faster method used when no dummy key exists in table*/
static PyDictEntry *
lookdict_nodummy(PyDictObject *mp, PyObject *key, size_t hash)
{
size_t i;
size_t perturb;
size_t mask = mp->ma_mask;
PyDictEntry *ep0 = mp->ma_table;
PyDictEntry *ep;
i = (size_t)hash & mask;
ep = &ep0[i];
if (ep->me_key == NULL
|| ep->me_key == key
|| (ep->me_hash == hash && mp->ma_keycmp(ep->me_key, key) == ))
return ep;
for (perturb = hash;; perturb >>= PERTURB_SHIFT) {
i = (i << ) + i + perturb + ;
ep = &ep0[i & mask];
if (ep->me_key == NULL
|| ep->me_key == key
|| (ep->me_hash == hash && mp->ma_keycmp(ep->me_key, key) == ))
return ep;
}
assert(); /* NOT REACHED */
return ;
} /*intern fast function to insert item when no dummy key exists in table*/
static void
insertdict_clean(PyDictObject *mp, PyObject *key, size_t hash, PyObject *value)
{
size_t i;
size_t perturb;
size_t mask = mp->ma_mask;
PyDictEntry *ep0 = mp->ma_table;
PyDictEntry *ep;
i = (size_t)hash & mask;
ep = &ep0[i];
for (perturb = hash; ep->me_key != NULL; perturb >>= PERTURB_SHIFT) {
i = (i << ) + i + perturb + ;
ep = &ep0[i & mask];
}
mp->ma_fill++;
mp->ma_used++;
ep->me_key = key;
ep->me_hash = hash;
ep->me_value = value;
} /*
Restructure the table by allocating a new table and reinserting all
items again. When entries have been deleted, the new table may
actually be smaller than the old one.
*/
static int
dict_resize(PyDictObject *mp, size_t minused)
{
size_t newsize;
PyDictEntry *oldtable, *newtable, *ep;
oldtable = mp->ma_table;
/* Find the smallest table size > minused. */
for (newsize = PyDict_MINSIZE;
newsize <= minused && newsize > ;
newsize <<= )
;
/* Get space for a new table. */
newtable = (PyDictEntry*)malloc(sizeof(PyDictEntry) * newsize);
if (newtable == NULL)
return -;
memset(newtable, , sizeof(PyDictEntry)* newsize);
mp->ma_table = newtable;
mp->ma_mask = newsize - ;
size_t used = mp->ma_used;
mp->ma_used = ;
mp->ma_fill = ;
for (ep = oldtable; used > ; ep++) {
/* only active entry */
if (ep->me_value != NULL) {
used--;
insertdict_clean(mp, ep->me_key, ep->me_hash, ep->me_value);
}
}
free(oldtable);
return ;
} PyObject *
dict_search(PyDictObject *mp, PyObject *key)
{
assert(key);
size_t hash = mp->ma_keyhash(key);
PyDictEntry *ep = lookdict(mp, key, hash);
return ep->me_value;
} int
dict_contain(PyDictObject *mp, PyObject *key)
{
return dict_search(mp, key) ? : ;
} int
dict_add(PyDictObject *mp, PyObject *key, PyObject *value)
{
assert(key);
assert(value);
size_t hash = mp->ma_keyhash(key);
PyDictEntry *ep = lookdict(mp, key, hash);
/*only for non-existing keys*/
assert(ep->me_value == NULL);
PyObject *old_key = ep->me_key;
if ((ep->me_key = mp->ma_keydup(key)) == NULL)
return -;
if ((ep->me_value = mp->ma_valuedup(value)) == NULL) {
free(ep->me_key);
return -;
}
if (old_key == NULL)
mp->ma_fill++;
mp->ma_used++;
ep->me_hash = hash;
if (NEED_RESIZE(mp))
return dict_resize(mp, (mp->ma_used > ? : ) * mp->ma_used);
return ;
} int
dict_update(PyDictObject *mp, PyObject *key, PyObject *value)
{
assert(key);
assert(value);
size_t hash = mp->ma_keyhash(key);
PyDictEntry *ep = lookdict(mp, key, hash);
/*only for existing keys*/
assert(ep->me_value != NULL);
PyObject *old_value = ep->me_value;
if ((ep->me_value = mp->ma_valuedup(value)) == NULL)
return -;
free(old_value);
return ;
} int
dict_del(PyDictObject *mp, PyObject *key)
{
assert(key);
size_t hash = mp->ma_keyhash(key);
PyDictEntry *ep = lookdict(mp, key, hash);
/*only for existing keys*/
assert(ep->me_value != NULL);
free(ep->me_key);
free(ep->me_value);
ep->me_key = dummy;
ep->me_value = NULL;
mp->ma_used--;
return ;
} PyObject *
dict_force_search(PyDictObject *mp, PyObject *key)
{
assert(key);
size_t hash = mp->ma_keyhash(key);
PyDictEntry *ep = lookdict(mp, key, hash);
if (ep->me_value == NULL) {
PyObject *old_key = ep->me_key;
if ((ep->me_key = mp->ma_keydup(key)) == NULL)
return NULL;
if ((ep->me_value = mp->ma_default()) == NULL) {
free(ep->me_key);
return NULL;
}
if (old_key == NULL)
mp->ma_fill++;
mp->ma_used++;
ep->me_hash = hash;
if (NEED_RESIZE(mp)) {
dict_resize(mp, (mp->ma_used > ? : ) * mp->ma_used);
ep = lookdict_nodummy(mp, key, hash);
}
}
return ep->me_value;
} void
dict_clear(PyDictObject *mp)
{
PyDictEntry *table = mp->ma_table;
assert(table != NULL);
size_t used = mp->ma_used;
if (mp->ma_fill == )
return;
PyDictEntry *ep;
for (ep = table; used > ; ep++) {
/*only free active entry, this is different from Python 2.7*/
if (ep->me_value != NULL) {
used--;
free(ep->me_key);
free(ep->me_value);
}
}
memset(table, , sizeof(PyDictEntry) * (mp->ma_mask + ));
} size_t
dict_len(PyDictObject *mp)
{
return mp->ma_used;
} /*helper function for sorting a PyDictEntry by its value*/
static int
_valcmp(const void *a, const void *b)
{
return *(size_t *)(*(PyDictEntry *)a).me_value > *(size_t *)(*
(PyDictEntry *)b).me_value ? - : ;
} /*print key value pair by value DESC order*/
static void
print_all_by_value_desc(PyDictObject *mp)
{
PyDictEntry *ep;
PyDictEntry *temp_table = (PyDictEntry *)malloc(sizeof(PyDictEntry) *
(mp->ma_used));
size_t i = , used = mp->ma_used;
for (ep = mp->ma_table; used > ; ep++) {
if (ep->me_value != NULL) {
used--;
temp_table[i++] = *ep;
}
}
used = mp->ma_used;
qsort(temp_table, used, sizeof(temp_table[]), _valcmp);
for (i = ; i < used; i++)
fprintf(stdout, "%s\t%d\n", (char *)temp_table[i].me_key,
*(size_t *)temp_table[i].me_value);
free(temp_table);
} void printd(PyDictObject *mp)
{
PyDictEntry *ep;
size_t used = mp->ma_used;
for (ep = mp->ma_table; used > ; ep++) {
if (ep->me_value) {
used--;
fprintf(stdout, "%s\t%d\t%u\n", (char *)ep->me_key, *(size_t *)ep->me_value,
ep->me_hash);
} else if (ep->me_key == dummy) {
fprintf(stdout, "it is a dummy key! it's hash is %u\n", ep->me_hash);
}
}
} /*scan words from stdin, print total amount for each word by DESC order*/
int main(void)
{
//PyDictObject *mp = dict_new_custom(32, 0, 0, 0, 0, 0);
PyDictObject *mp = dict_new();
FILE *fp;
fp = fopen("words", "r");
char keybuf[];
size_t valuebuf[] = { };
size_t *vp;
/* while (fscanf(stdin, "%s", keybuf) == 1) {
if (dict_contain(mp, keybuf)) {
vp = dict_search(mp, keybuf);
*vp += 1;
} else
dict_add(mp, keybuf, valuebuf);
}*/
while (fscanf(fp, "%s", keybuf) == ) {
vp = dict_force_search(mp, keybuf);
*vp += ;
} print_all_by_value_desc(mp);
//printd(mp);
dict_clear(mp);
fclose(fp);
free(mp);
return ;
}
Python 2.7的字典实现简化版(C语言)的更多相关文章
- 『Python基础-10』字典
# 『Python基础-10』字典 目录: 1.字典基本概念 2.字典键(key)的特性 3.字典的创建 4-7.字典的增删改查 8.遍历字典 1. 字典的基本概念 字典一种key - value 的 ...
- Python 优雅的操作字典【转】
Python 中的字典是Python中一个键值映射的数据结构,下面介绍一下如何优雅的操作字典. 1.1 创建字典 Python有两种方法可以创建字典,第一种是使用花括号,另一种是使用内建 函数dict ...
- 初学Python(三)——字典
初学Python(三)——字典 初学Python,主要整理一些学习到的知识点,这次是字典. #-*- coding:utf-8 -*- d = {1:"name",2:" ...
- python编程基础知识—字典
字典 在python中,字典是一系列键-值对,每个键都与一个值相关联,可使用键来访问相关联的值.与键相关联的值可以是数字.字符串.列表乃至字典,即可将任何python对象用在字典中的值. 在pytho ...
- python调用数据返回字典dict数据的现象2
python调用数据返回字典dict数据的现象2 思考: 话题1连接:https://www.cnblogs.com/zwgbk/p/10248479.html在打印和添加时候加上内存地址id(),可 ...
- python调用数据返回字典dict数据的现象1
python调用数据返回字典dict数据的现象1 思考: 可以看到这两种情况,区别在于构造函数make()里赋值给字典dict的方式不同.使用相同的调用方式,而结果却完全不同.可以看到第二种情况才是我 ...
- python基本数据类型之字典
python基本数据类型之字典 python中的字典是以键(key)值(value)对的形式储存数据,基本形式如下: d = {'Bart': 95, 'Michael': 34, 'Lisa': 5 ...
- Python 优雅的操作字典
Python 中的字典是Python中一个键值映射的数据结构,下面介绍一下如何优雅的操作字典. 来源:https://www.linuxzen.com/python-you-ya-de-cao-zuo ...
- Python数据类型详解——字典
Python数据类型详解--字典 引子 已经学习了列表,现在有个需求--把公司每个员工的姓名.年龄.职务.工资存到列表里,你怎么存? staff_list = [ ["Kwan", ...
随机推荐
- C#在使用Assembly加载程序集时失败
错误现象: 进行插件读取时出现错误:"尝试从一个网络位置加载程序集,在早期版本的 .NET Framework 中,这会导致对该程序集进行沙盒处理.此发行版的 .NET Framework ...
- 一、spring的成长之路——代理设计模式
java常用的设计模式详解: 1.代理模式(JDK的动态代理) [IDept.java] 这是一个简单的就接口,进行数据的更新 package com.itcloud.pattern.proxy; ...
- LeetCode169:Majority Element(Hash表\位操作未懂)
题目来源: Given an array of size n, find the majority element. The majority element is the element that ...
- [LeetCode] Repeated String Match 重复字符串匹配
Given two strings A and B, find the minimum number of times A has to be repeated such that B is a su ...
- MySQL · 引擎特性 · InnoDB 同步机制
前言 现代操作系统以及硬件基本都支持并发程序,而在并发程序设计中,各个进程或者线程需要对公共变量的访问加以制约,此外,不同的进程或者线程需要协同工作以完成特征的任务,这就需要一套完善的同步机制,在Li ...
- pyqt5 动画学习(三) 指定控件的移动轨迹
这一篇来讲解自定义控件的移动轨迹 原理:我们采用QPainterPath先画一个弧线,然后加载一个物体让物体移动,设置100个关键帧,每个关键帧物体的坐标位置就是弧线的坐标位置,这样就能达到按照指定轨 ...
- hdu 5656 CA Loves GCD(n个任选k个的最大公约数和)
CA Loves GCD Accepts: 64 Submissions: 535 Time Limit: 6000/3000 MS (Java/Others) Memory Limit: 2 ...
- [bzoj4883][Lydsy2017年5月月赛]棋盘上的守卫
来自FallDream的博客,未经允许,请勿转载, 谢谢. 在一个n*m的棋盘上要放置若干个守卫.对于n行来说,每行必须恰好放置一个横向守卫:同理对于m列来说,每列 必须恰好放置一个纵向守卫.每个位置 ...
- bzoj1043[HAOI2008]下落的圆盘 计算几何
1043: [HAOI2008]下落的圆盘 Time Limit: 10 Sec Memory Limit: 162 MBSubmit: 1598 Solved: 676[Submit][Stat ...
- QCA4028软件平台启用双WAN指导
1 为何要启用双WAN QCA4028的硬件方案,基板上部署了一个LTE模块插槽,同时又外留了一个USB3.0接口,因此,就可以在此硬件平台上调试基于LTE的双WAN,预期实现: A 链路备份,在任意 ...