eventloop

从 server.c 的 main 方法看起

int main(int argc, char **argv) {
....... aeSetBeforeSleepProc(server.el,beforeSleep);
aeSetAfterSleepProc(server.el,afterSleep);
aeMain(server.el);
aeDeleteEventLoop(server.el);
return ;
}

aeMain.c

//在死循环中调用 aeProcessEvents 方法,处理可以执行的 time event 与 file event
// 在 server.c 的 main 函数中会被调用
void aeMain(aeEventLoop *eventLoop) {
eventLoop->stop = ;
while (!eventLoop->stop) {
if (eventLoop->beforesleep != NULL)
eventLoop->beforesleep(eventLoop);
aeProcessEvents(eventLoop, AE_ALL_EVENTS|AE_CALL_AFTER_SLEEP);
}
}
/* Process every pending time event, then every pending file event
* (that may be registered by time event callbacks just processed).
* Without special flags the function sleeps until some file event
* fires, or when the next time event occurs (if any).
*
* If flags is 0, the function does nothing and returns.
* if flags has AE_ALL_EVENTS set, all the kind of events are processed.
* if flags has AE_FILE_EVENTS set, file events are processed.
* if flags has AE_TIME_EVENTS set, time events are processed.
* if flags has AE_DONT_WAIT set the function returns ASAP until all
* if flags has AE_CALL_AFTER_SLEEP set, the aftersleep callback is called.
* the events that's possible to process without to wait are processed.
*
* The function returns the number of events processed. */
int aeProcessEvents(aeEventLoop *eventLoop, int flags)
{
........ /* Note that we want call select() even if there are no
* file events to process as long as we want to process time
* events, in order to sleep until the next time event is ready
* to fire. */
//优先执行 time event
if (eventLoop->maxfd != - ||
((flags & AE_TIME_EVENTS) && !(flags & AE_DONT_WAIT))) {
int j;
aeTimeEvent *shortest = NULL;
struct timeval tv, *tvp; if (flags & AE_TIME_EVENTS && !(flags & AE_DONT_WAIT))
//找到time event 链表里,最近的 time event
shortest = aeSearchNearestTimer(eventLoop);
//计算从现在起到这个time event 被执行,要等待多久
if (shortest) {
long now_sec, now_ms; aeGetTime(&now_sec, &now_ms);
tvp = &tv; /* How many milliseconds we need to wait for the next
* time event to fire? */
long long ms =
(shortest->when_sec - now_sec)* +
shortest->when_ms - now_ms; if (ms > ) {
tvp->tv_sec = ms/;
tvp->tv_usec = (ms % )*;
} else {
tvp->tv_sec = ;
tvp->tv_usec = ;
}
} else {
/* If we have to check for events but need to return
* ASAP because of AE_DONT_WAIT we need to set the timeout
* to zero */
if (flags & AE_DONT_WAIT) {
tv.tv_sec = tv.tv_usec = ;
tvp = &tv;
} else {
/* Otherwise we can block */
tvp = NULL; /* wait forever */
}
} /* Call the multiplexing API, will return only on timeout or when
* some event fires. */
//调用 IO 多路复用的代码,找到可读写的 file event
numevents = aeApiPoll(eventLoop, tvp); /* After sleep callback. */
if (eventLoop->aftersleep != NULL && flags & AE_CALL_AFTER_SLEEP)
eventLoop->aftersleep(eventLoop); //遍历 event loop 的 fired 数组对应的 fd
for (j = ; j < numevents; j++) {
aeFileEvent *fe = &eventLoop->events[eventLoop->fired[j].fd];
int mask = eventLoop->fired[j].mask;//记录了事件类型:read/write
int fd = eventLoop->fired[j].fd;//事件的 fd
int fired = ; /* Number of events fired for current fd. */ /* Normally we execute the readable event first, and the writable
* event laster. This is useful as sometimes we may be able
* to serve the reply of a query immediately after processing the
* query.
*
* However if AE_BARRIER is set in the mask, our application is
* asking us to do the reverse: never fire the writable event
* after the readable. In such a case, we invert the calls.
* This is useful when, for instance, we want to do things
* in the beforeSleep() hook, like fsynching a file to disk,
* before replying to a client. */
int invert = fe->mask & AE_BARRIER; /* Note the "fe->mask & mask & ..." code: maybe an already
* processed event removed an element that fired and we still
* didn't processed, so we check if the event is still valid.
*
* Fire the readable event if the call sequence is not
* inverted. */
if (!invert && fe->mask & mask & AE_READABLE) {
fe->rfileProc(eventLoop,fd,fe->clientData,mask);
fired++;
} /* Fire the writable event. */
if (fe->mask & mask & AE_WRITABLE) {
if (!fired || fe->wfileProc != fe->rfileProc) {
fe->wfileProc(eventLoop,fd,fe->clientData,mask);
fired++;
}
} /* If we have to invert the call, fire the readable event now
* after the writable one. */
if (invert && fe->mask & mask & AE_READABLE) {
if (!fired || fe->wfileProc != fe->rfileProc) {
fe->rfileProc(eventLoop,fd,fe->clientData,mask);
fired++;
}
} processed++;
}
}
/* Check time events */
if (flags & AE_TIME_EVENTS)
processed += processTimeEvents(eventLoop); return processed; /* return the number of processed file/time events */
}

标准的事件驱动框架,在死循环中调用aeProcessEvents方法

aeProcessEvents 方法比较长,里面会处理两种事件TimeEvent 与 FileEvent,本文关注的重点是 FileEvent

aeProcessEvents 调用 aeApiPoll 方法来查找监听的 fd 上有哪些是可用的,找到可用的 fd 之后,根据 fd 的事件类型,决定调用 wfileProc 还是rfileProc 来处理相关的事件, 本文里我们关心的是 client 发来的 command 会被如何处理,那就是rfileProc了,rfileProc的设置过程在后文中被提及

aeApiPoll 在多个文件中被实现,Redis 用条件编译的手法决定采用哪种实现,很有意思

/* Include the best multiplexing layer supported by this system.
* The following should be ordered by performances, descending. */
//用宏实现编译期重载,很稳
#ifdef HAVE_EVPORT
#include "ae_evport.c"
#else
#ifdef HAVE_EPOLL
#include "ae_epoll.c"
#else
#ifdef HAVE_KQUEUE
#include "ae_kqueue.c"
#else
#include "ae_select.c"
#endif
#endif
#endif

就看最经典的 epoll 好了:

typedef struct aeApiState {
int epfd;
struct epoll_event *events;
} aeApiState; //创建eventloop
static int aeApiCreate(aeEventLoop *eventLoop) {
aeApiState *state = zmalloc(sizeof(aeApiState)); if (!state) return -;
state->events = zmalloc(sizeof(struct epoll_event)*eventLoop->setsize);
if (!state->events) {
zfree(state);
return -;
}
state->epfd = epoll_create(); /* 1024 is just a hint for the kernel */
if (state->epfd == -) {
zfree(state->events);
zfree(state);
return -;
}
eventLoop->apidata = state;
return ;
} static int aeApiAddEvent(aeEventLoop *eventLoop, int fd, int mask) {
aeApiState *state = eventLoop->apidata;
struct epoll_event ee = {}; /* avoid valgrind warning */
/* If the fd was already monitored for some event, we need a MOD
* operation. Otherwise we need an ADD operation. */
int op = eventLoop->events[fd].mask == AE_NONE ?
EPOLL_CTL_ADD : EPOLL_CTL_MOD;//epoll_ctl函数的 op 参数的可能的取值:EPOLL_CTL_ADD 注册、EPOLL_CTL_MOD 修 改、EPOLL_CTL_DEL 删除 ee.events = ;
//同时修改 eventLoop 里 event 的 mask 标记,和关联的 epoll fd 所监听的事件集合
mask |= eventLoop->events[fd].mask; /* Merge old events */
if (mask & AE_READABLE) ee.events |= EPOLLIN;
if (mask & AE_WRITABLE) ee.events |= EPOLLOUT;
ee.data.fd = fd;
if (epoll_ctl(state->epfd,op,fd,&ee) == -) return -;
return ;
} //传入的 tvp 是 epoll 超时时间,如果 tvp 为 null,则永久阻塞
static int aeApiPoll(aeEventLoop *eventLoop, struct timeval *tvp) {
aeApiState *state = eventLoop->apidata;
int retval, numevents = ; retval = epoll_wait(state->epfd,state->events,eventLoop->setsize,
tvp ? (tvp->tv_sec* + tvp->tv_usec/) : -);
if (retval > ) {
int j; numevents = retval;
//遍历可读写的 fd
for (j = ; j < numevents; j++) {
int mask = ;
struct epoll_event *e = state->events+j; if (e->events & EPOLLIN) mask |= AE_READABLE;
if (e->events & EPOLLOUT) mask |= AE_WRITABLE;
if (e->events & EPOLLERR) mask |= AE_WRITABLE;
if (e->events & EPOLLHUP) mask |= AE_WRITABLE; //设置 eventLoop.fired 数组里的元素,这些元素代表可读写的 fd
eventLoop->fired[j].fd = e->data.fd;
eventLoop->fired[j].mask = mask;
}
}
return numevents;
}

代码不算复杂,实际上对系统调用做了一层简单的封装

调用 epoll_ctl 方法来注册监听 fd

调用 epoll_wait 方法来等待,直到被监听的 fd 上有事件发生为止

比较有趣的做法是aeFileEvent 结构体里定义了一个 mask 属性来记录这个 fd 被监听的事件,应该是为了便于后续查找。

新 client 建立连接

networking.c

client *createClient(int fd) {
client *c = zmalloc(sizeof(client)); //fd == -1,说明这是一个用于执行 lua 脚本的无连接的伪客户端,可以省去一些开销
/* passing -1 as fd it is possible to create a non connected client.
* This is useful since all the commands needs to be executed
* in the context of a client. When commands are executed in other
* contexts (for instance a Lua script) we need a non connected client. */
if (fd != -) {
anetNonBlock(NULL,fd);//将这个 fd 设为 non block 模式
anetEnableTcpNoDelay(NULL,fd);//调用 setsockopt 方法,禁止使用nagle 算法,确保数据包能尽可能快速的发出去
if (server.tcpkeepalive)
anetKeepAlive(NULL,fd,server.tcpkeepalive);
// 给这个 client 关联的 fd 注册 read 事件处理函数:readQueryFromClient,其定义在文件尾部
if (aeCreateFileEvent(server.el,fd,AE_READABLE,
readQueryFromClient, c) == AE_ERR)
{
close(fd);
zfree(c);
return NULL;
}
}

调用 aeCreateFileEvent 方法给这个 fd 注册 read 事件处理函数 readQueryFromClient,也就是设置到这个 fd 的 rfileProc 属性里

int aeCreateFileEvent(aeEventLoop *eventLoop, int fd, int mask,
aeFileProc *proc, void *clientData)
{
if (fd >= eventLoop->setsize) {
errno = ERANGE;
return AE_ERR;
}
aeFileEvent *fe = &eventLoop->events[fd]; if (aeApiAddEvent(eventLoop, fd, mask) == -)
return AE_ERR;
fe->mask |= mask;
if (mask & AE_READABLE) fe->rfileProc = proc;
if (mask & AE_WRITABLE) fe->wfileProc = proc;
fe->clientData = clientData;
if (fd > eventLoop->maxfd)
eventLoop->maxfd = fd;
return AE_OK;
}

当 client 发送 command 过来的时候,eventloop 会发现这个 fd 可读,然后调用 readQueryFromClient 进行处理

处理client 发送的 command

//回调函数,这个函数被触发的时候,说明 client 触发了 read 事件
void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask) {
.....
/* Time to process the buffer. If the client is a master we need to
* compute the difference between the applied offset before and after
* processing the buffer, to understand how much of the replication stream
* was actually applied to the master state: this quantity, and its
* corresponding part of the replication stream, will be propagated to
* the sub-slaves and to the replication backlog. */
if (!(c->flags & CLIENT_MASTER)) {
processInputBuffer(c);//非 master
} else {
//本机为 master,除了处理 buffer 里的命令,还要解决主从复制的问题
size_t prev_offset = c->reploff;
processInputBuffer(c);
size_t applied = c->reploff - prev_offset;
if (applied) {
replicationFeedSlavesFromMasterStream(server.slaves,
c->pending_querybuf, applied);
sdsrange(c->pending_querybuf,applied,-);
}
}
}

当 fd 可读时,eventloop 会触发 readQueryFromClient 这个回调函数,再调用 processInputBuffer 函数

/* This function is called every time, in the client structure 'c', there is
* more query buffer to process, because we read more data from the socket
* or because a client was blocked and later reactivated, so there could be
* pending query buffer, already representing a full command, to process. */
void processInputBuffer(client *c) {
..... if (c->reqtype == PROTO_REQ_INLINE) {
if (processInlineBuffer(c) != C_OK) break;
} else if (c->reqtype == PROTO_REQ_MULTIBULK) {
if (processMultibulkBuffer(c) != C_OK) break;
} else {
serverPanic("Unknown request type");
} /* Multibulk processing could see a <= 0 length. */
if (c->argc == ) {
resetClient(c);
} else {
/* Only reset the client when the command was executed. */
//终于开始执行 command 了
if (processCommand(c) == C_OK) {
if (c->flags & CLIENT_MASTER && !(c->flags & CLIENT_MULTI)) {
/* Update the applied replication offset of our master. */
c->reploff = c->read_reploff - sdslen(c->querybuf);
} /* Don't reset the client structure for clients blocked in a
* module blocking command, so that the reply callback will
* still be able to access the client argv and argc field.
* The client will be reset in unblockClientFromModule(). */
if (!(c->flags & CLIENT_BLOCKED) || c->btype != BLOCKED_MODULE)
resetClient(c);
}
/* freeMemoryIfNeeded may flush slave output buffers. This may
* result into a slave, that may be the active client, to be
* freed. */
if (server.current_client == NULL) break;
}
}
server.current_client = NULL;
}

调用processCommand 方法,顾名思义,里面会对 client 发来的指令做处理

其实现位于server.c 里

/* If this function gets called we already read a whole
* command, arguments are in the client argv/argc fields.
* processCommand() execute the command or prepare the
* server for a bulk read from the client.
*
* If C_OK is returned the client is still alive and valid and
* other operations can be performed by the caller. Otherwise
* if C_ERR is returned the client was destroyed (i.e. after QUIT). */
int processCommand(client *c) {
......
/* Now lookup the command and check ASAP about trivial error conditions
* such as wrong arity, bad command name and so forth. */
// 从 command dict 里查找对应的 command 实现,
c->cmd = c->lastcmd = lookupCommand(c->argv[]->ptr);
//检查 command 是否存在,以及参数的数量是否正确
if (!c->cmd) {
flagTransaction(c);
addReplyErrorFormat(c,"unknown command '%s'",
(char*)c->argv[]->ptr);
return C_OK;
} else if ((c->cmd->arity > && c->cmd->arity != c->argc) ||
(c->argc < -c->cmd->arity)) {
flagTransaction(c);
addReplyErrorFormat(c,"wrong number of arguments for '%s' command",
c->cmd->name);
return C_OK;
}
..... //前面是检查参数和处理各种异常情况
/* Exec the command */
//如果处在 multi 命令开启的事务环境中
if (c->flags & CLIENT_MULTI &&
c->cmd->proc != execCommand && c->cmd->proc != discardCommand &&
c->cmd->proc != multiCommand && c->cmd->proc != watchCommand)
{
//把命令放到 queue 里
queueMultiCommand(c);
addReply(c,shared.queued);
} else {
//执行非事务,普通命令,实现位于本文件的2200多行
call(c,CMD_CALL_FULL);
c->woff = server.master_repl_offset;
if (listLength(server.ready_keys))
handleClientsBlockedOnKeys();
}
return C_OK;
}

这个方法有两个关键点:

1. 调用 lookupCommand 方法查找 client 提交的 command 对应的实现(redis server 启动的时候会初始化一个 dict,里面存放了 command 名称到实现函数的映射关系,去这个 dict 里查就好了)

2. 执行函数,我们先不关注事务,只看最简单的普通命令,那么会调用call 方法

其实现位于 server.c 里

void call(client *c, int flags) {
......
/* Call the command. */
dirty = server.dirty;
start = ustime();
c->cmd->proc(c);//执行命令
duration = ustime()-start;//计算命令执行时间
dirty = server.dirty-dirty;
if (dirty < ) dirty = ;
....
}

主要是用 cmd 的 proc 属性,一个函数指针来完成实际操作

至于 cmd 和它的 proc 属性,是在上一步的 lookupCommand 方法里被设置的。

例如最简单的 get 方法,就对应于getCommand 这个方法:

    {"get",getCommand,,"rF",,NULL,,,,,},

其具体实现位于t_string.c 里,细节暂时就不跟进了。

现在我们就大致上能理解client 发送的 command 的流转过程了。

Redis 源码走读(一)事件驱动机制与命令处理的更多相关文章

  1. Redis 源码简洁剖析 12 - 一条命令的处理过程

    命令的处理过程 Redis server 和一个客户端建立连接后,会在事件驱动框架中注册可读事件--客户端的命令请求.命令处理对应 4 个阶段: 命令读取:对应 readQueryFromClient ...

  2. redis源码解析之事件驱动

    Redis 内部有个小型的事件驱动,它主要处理两项任务: 文件事件:使用I/O多路复用技术处理多个客户端请求,并返回执行结果. 时间事件:维护服务器的资源管理,状态检查. 主要的数据结构包括文件事件结 ...

  3. Redis 源码走读(二)对象系统

    Redis设计了多种数据结构,并以此为基础构建了多种对象,每种对象(除了新出的 stream 以外)都有超过一种的实现. redisObject 这个结构体反应了 Redis 对象的内存布局 type ...

  4. Redis源码解析:13Redis中的事件驱动机制

    Redis中,处理网络IO时,采用的是事件驱动机制.但它没有使用libevent或者libev这样的库,而是自己实现了一个非常简单明了的事件驱动库ae_event,主要代码仅仅400行左右. 没有选择 ...

  5. Redis源码阅读(一)事件机制

    Redis源码阅读(一)事件机制 Redis作为一款NoSQL非关系内存数据库,具有很高的读写性能,且原生支持的数据类型丰富,被广泛的作为缓存.分布式数据库.消息队列等应用.此外Redis还有许多高可 ...

  6. spring-data-redis-cache 使用及源码走读

    预期读者 准备使用 spring 的 data-redis-cache 的同学 了解 @CacheConfig,@Cacheable,@CachePut,@CacheEvict,@Caching 的使 ...

  7. Redis 源码简洁剖析 09 - Reactor 模型

    Reactor 模型 事件驱动框架 Redis 如何实现 Reactor 模型 事件的数据结构:aeFileEvent 主循环:aeMain 函数 事件捕获与分发:aeProcessEvents 函数 ...

  8. Redis 源码简洁剖析 10 - aeEventLoop 及事件

    aeEventLoop IO 事件处理 IO 事件创建 读事件处理 写事件处理 时间事件处理 时间事件定义 时间事件创建 时间事件回调函数 时间事件的触发处理 参考链接 Redis 源码简洁剖析系列 ...

  9. Redis 源码简洁剖析 11 - 主 IO 线程及 Redis 6.0 多 IO 线程

    Redis 到底是不是单线程的程序? 多 IO 线程的初始化 IO 线程运行函数 IOThreadMain 如何推迟客户端「读」操作? 如何推迟客户端「写」操作? 如何把待「读」客户端分配给 IO 线 ...

随机推荐

  1. Bjarne Stroustrup 语录1

    1. 请谈谈C++书.  没有,也不可能有一本书对于所有人来说都是最好的.不过对于那些真正的程序员来说,如果他喜欢从“经典风格”的书中间学习一些新的概念和技术,我推荐我的The C++ Program ...

  2. idea 控制台中文乱码

    idea 控制台中文乱码,网上找了好多基本都是说在tomcat配置文件里面添加-Dfile.encoding=UTF-8 添加后依然乱码, 需要在idea64.exe.vmoptions文件中添加-D ...

  3. CF985F Isomorphic Strings

    题目描述 You are given a string s s s of length n n n consisting of lowercase English letters. For two g ...

  4. 2018宁夏邀请赛L Continuous Intervals

    题目链接:https://nanti.jisuanke.com/t/28412 题意: 给出n个数的序列.问序列中有多少个区间满足,排序完之后任意两个相邻的数之差不大于1. 题解: 用max表示区间最 ...

  5. bzoj 1878

    莫队乱搞的第一题,(感觉这个算法初学的时候就能想到啊) 总之就是离线,然后扫一遍然后回答,用数组记录状态 但还是有一个地方不太明白 为什么要除siz?这样为什么会优化复杂度呢?? bool cmp(c ...

  6. boost 文件操作

    void testFileSystem() { boost::filesystem::path path("/test/test1"); //初始化 boost::filesyst ...

  7. POJ1182:食物链(并查集)

    食物链 Time Limit: 1000MS   Memory Limit: 10000K Total Submissions: 94930   Accepted: 28666 Description ...

  8. mybatis的mapper文件的大于号特殊符号使用

    第一种方法: 用了转义字符把>和<替换掉,然后就没有问题了. SELECT * FROM test WHERE 1 = 1 AND start_date  <= CURRENT_DA ...

  9. Step-By-Step: Setting up Active Directory in Windows Server 2016

    There are interesting new features now made available in Windows Server 2016 such as time based grou ...

  10. Windows下安装Mycat

    Mycat 首先在安装Mycat之前,需要安装JDK1.7以上,可以在cmd环境下输入 java -version 查看本地安装的java版本 如果未安装或者版本在1.7以下,请重新安装. 安装JDK ...