epoll oneshot
/* Epoll private bits inside the event mask */
#define EP_PRIVATE_BITS (EPOLLWAKEUP | EPOLLONESHOT | EPOLLET | EPOLLEXCLUSIVE)
主要是看下:惊群源:
1、socket wake_up
2、epoll_wait 中wake_up
目前data ready的时候调用sk_data_ready 唤醒进程,此时唤醒进程选择了 只唤醒一个
/ nr_exclusive是1
static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
int nr_exclusive, int wake_flags, void *key)
{
wait_queue_t *curr, *next;
list_for_each_entry_safe(curr, next, &q->task_list, task_list) {
unsigned flags = curr->flags;
if (curr->func(curr, mode, wake_flags, key) &&
(flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
break;
}
}
(flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
传进来的nr_exclusive是1, 所以flags & WQ_FLAG_EXCLUSIVE为真的时候,执行一次,就会跳出循环。
Epoll_create()在fork子进程之前
所有进程都共享一个 epfd, 所以data ready 唤醒进程的时候即使加上 nr_exclusive = 1 只唤醒一个进程, 那么唤醒那个一个呢?
也就是当连接到来时,我们需要选择一个进程来accept,这个时候,任何一个accept都是可以的。当连接建立以后,后续的读写事件,却与进程有了关联。一个请求与a进程建立连接后,后续的读写也应该由a进程来做。
当读写事件发生时,应该通知哪个进程呢?Epoll并不知道,因此,事件有可能错误通知另一个进程处理
Epoll_create()在fork子进程之后
每个进程的读写事件,只注册在自己进程的epoll中。所以不会出现竞争
但是accept呢???
目前有的内核版本说是会出现有的不会!!!
这就需要看内核版本实现了 无非就是唤醒的时候加上一些标志。。。当然是用reuseport 一劳永逸!!
如果不是是用reuseport实现只唤醒一个进程,那么wake_up的时候就是唤醒等待队列的头一个。。那怎么做到负载均衡呢???
所以还是reuseport好!!!!
*/
static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
int nr_exclusive, int wake_flags, void *key)
{
wait_queue_t *curr, *next; list_for_each_entry_safe(curr, next, &q->task_list, task_list) {
unsigned flags = curr->flags; if (curr->func(curr, mode, wake_flags, key) &&
(flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
break;
}
} void __wake_up(wait_queue_head_t *q, unsigned int mode,
int nr_exclusive, void *key)
{
unsigned long flags; spin_lock_irqsave(&q->lock, flags);
__wake_up_common(q, mode, nr_exclusive, 0, key);
spin_unlock_irqrestore(&q->lock, flags);
} /*
* This is the callback that is passed to the wait queue wakeup
* machanism. It is called by the stored file descriptors when they
* have events to report.
*/
static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *key)
{
int pwake = 0;
unsigned long flags;
struct epitem *epi = ep_item_from_wait(wait);
struct eventpoll *ep = epi->ep; spin_lock_irqsave(&ep->lock, flags); /*
* If the event mask does not contain any poll(2) event, we consider the
* descriptor to be disabled. This condition is likely the effect of the
* EPOLLONESHOT bit that disables the descriptor when an event is received,
* until the next EPOLL_CTL_MOD will be issued.
*/
if (!(epi->event.events & ~EP_PRIVATE_BITS))
goto out_unlock; /*
* Check the events coming with the callback. At this stage, not
* every device reports the events in the "key" parameter of the
* callback. We need to be able to handle both cases here, hence the
* test for "key" != NULL before the event match test.
*/
if (key && !((unsigned long) key & epi->event.events))
goto out_unlock; /*
* If we are trasfering events to userspace, we can hold no locks
* (because we're accessing user memory, and because of linux f_op->poll()
* semantics). All the events that happens during that period of time are
* chained in ep->ovflist and requeued later on.
*/
if (unlikely(ep->ovflist != EP_UNACTIVE_PTR)) {
if (epi->next == EP_UNACTIVE_PTR) {
epi->next = ep->ovflist;
ep->ovflist = epi;
}
goto out_unlock;
} /* If this file is already in the ready list we exit soon */
if (!ep_is_linked(&epi->rdllink))
list_add_tail(&epi->rdllink, &ep->rdllist); /*
* Wake up ( if active ) both the eventpoll wait list and the ->poll()
* wait list.
*/
if (waitqueue_active(&ep->wq))
wake_up_locked(&ep->wq);
if (waitqueue_active(&ep->poll_wait))
pwake++; out_unlock:
spin_unlock_irqrestore(&ep->lock, flags); /* We have to call this outside the lock */
if (pwake)
ep_poll_safewake(&ep->poll_wait); return 1;
} static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *key)
{
int pwake = 0;
unsigned long flags;
struct epitem *epi = ep_item_from_wait(wait);
struct eventpoll *ep = epi->ep;
int ewake = 0; if ((unsigned long)key & POLLFREE) {
ep_pwq_from_wait(wait)->whead = NULL;
/*
* whead = NULL above can race with ep_remove_wait_queue()
* which can do another remove_wait_queue() after us, so we
* can't use __remove_wait_queue(). whead->lock is held by
* the caller.
*/
list_del_init(&wait->task_list);
} spin_lock_irqsave(&ep->lock, flags); /*
* If the event mask does not contain any poll(2) event, we consider the
* descriptor to be disabled. This condition is likely the effect of the
* EPOLLONESHOT bit that disables the descriptor when an event is received,
* until the next EPOLL_CTL_MOD will be issued.
*/
if (!(epi->event.events & ~EP_PRIVATE_BITS))
goto out_unlock; /*
* Check the events coming with the callback. At this stage, not
* every device reports the events in the "key" parameter of the
* callback. We need to be able to handle both cases here, hence the
* test for "key" != NULL before the event match test.
*/
if (key && !((unsigned long) key & epi->event.events))
goto out_unlock; /*
* If we are transferring events to userspace, we can hold no locks
* (because we're accessing user memory, and because of linux f_op->poll()
* semantics). All the events that happen during that period of time are
* chained in ep->ovflist and requeued later on.
*/
if (unlikely(ep->ovflist != EP_UNACTIVE_PTR)) {
if (epi->next == EP_UNACTIVE_PTR) {
epi->next = ep->ovflist;
ep->ovflist = epi;
if (epi->ws) {
/*
* Activate ep->ws since epi->ws may get
* deactivated at any time.
*/
__pm_stay_awake(ep->ws);
} }
goto out_unlock;
} /* If this file is already in the ready list we exit soon */
if (!ep_is_linked(&epi->rdllink)) {
list_add_tail(&epi->rdllink, &ep->rdllist);
ep_pm_stay_awake_rcu(epi);
} /*
* Wake up ( if active ) both the eventpoll wait list and the ->poll()
* wait list.
*/
if (waitqueue_active(&ep->wq)) {
if ((epi->event.events & EPOLLEXCLUSIVE) &&
!((unsigned long)key & POLLFREE)) {
switch ((unsigned long)key & EPOLLINOUT_BITS) {
case POLLIN:
if (epi->event.events & POLLIN)
ewake = 1;
break;
case POLLOUT:
if (epi->event.events & POLLOUT)
ewake = 1;
break;
case 0:
ewake = 1;
break;
}
}
wake_up_locked(&ep->wq);
}
if (waitqueue_active(&ep->poll_wait))
pwake++; out_unlock:
spin_unlock_irqrestore(&ep->lock, flags); /* We have to call this outside the lock */
if (pwake)
ep_poll_safewake(&ep->poll_wait); if (epi->event.events & EPOLLEXCLUSIVE)
return ewake; return 1;
}
/*
* This is the callback that is used to add our wait queue to the
* target file wakeup lists.
*/
static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead,
poll_table *pt)
{
struct epitem *epi = ep_item_from_epqueue(pt);
struct eppoll_entry *pwq; if (epi->nwait >= 0 && (pwq = kmem_cache_alloc(pwq_cache, GFP_KERNEL))) {
init_waitqueue_func_entry(&pwq->wait, ep_poll_callback);
pwq->whead = whead;
pwq->base = epi;
if (epi->event.events & EPOLLEXCLUSIVE)
add_wait_queue_exclusive(whead, &pwq->wait);
else
add_wait_queue(whead, &pwq->wait);
list_add_tail(&pwq->llink, &epi->pwqlist);
epi->nwait++;
} else {
/* We have to signal that an error occurred */
epi->nwait = -1;
}
}
根据EPOLLEXCLUSIVE 加入到不同的唤醒 队列add_wait_queue_exclusive add_wait_queue
在wake_up的时候 通过nr-exclu 控制 但是 要想break还需要返回 ep_poll_callback返回 true;
对于epoll_oneshot
在epoll_wait后 send fd 到user时,
if (epi->event.events & EPOLLONESHOT) 不会重复添加进去 导致后续链式唤醒
epi->event.events &= EP_PRIVATE_BITS;
else if (!(epi->event.events & EPOLLET)) {
list_add_tail(&epi->rdllink, &ep->rdllist);
ep_pm_stay_awake(epi);
}
同时在ep_call_back的时候也会 继续检查 EPOLLONESHOT ,房子 epoll_wait返回时,在处理data中,fd又有数据需要相应,此时多线程 中别的线程可以相应。。。。乱序了!!!
/*
* If the event mask does not contain any poll(2) event, we consider the descriptor to be disabled. This condition is likely the effect of the
* EPOLLONESHOT bit that disables the descriptor when an event is received,* until the next EPOLL_CTL_MOD will be issued.
*/
if (!(epi->event.events & ~EP_PRIVATE_BITS))
goto out_unlock;
epoll oneshot的更多相关文章
- epoll中et+多线程模式中很重要的EPOLL_ONESHOT实验
因为et模式需要循环读取,但是在读取过程中,如果有新的事件到达,很可能触发了其他线程来处理这个socket,那就乱了. EPOLL_ONESHOT就是用来避免这种情况.注意在一个线程处理完一个sock ...
- Linux编程之epoll
现在有这么一个场景:我是一个很忙的大老板,我有100个手机,手机来信息了,我的秘书就会告诉我"老板,你的手机来信息了."我很生气,我的秘书就是这样子,每次手机来信息就只告诉我来信息 ...
- I/O多路复用之epoll实战
概念 IO多路复用是指内核一旦发现进程指定的一个或者多个IO条件准备读取,它就通知该进程 通俗理解(摘自网上一大神) 这些名词比较绕口,理解涵义就好.一个epoll场景:一个酒吧服务员(一个线程),前 ...
- Select、Poll、Epoll、 异步IO 介绍
一.概念相关介绍 同步IO和异步IO,阻塞IO和非阻塞IO分别是什么,到底有什么区别?不同的人在不同的上下文下给出的答案是不同的.所以先限定一下本文的上下文. 本文讨论的背景是Linux环境下的net ...
- epoll的LT和ET使用EPOLLONESHOT
epoll有两种触发的方式即LT(水平触发)和ET(边缘触发)两种,在前者,只要存在着事件就会不断的触发,直到处理完成,而后者只触发一次相同事件或者说只在从非触发到触发两个状态转换的时候儿才触发. 这 ...
- 基于epoll的简单的httpserver
该httpserver已经能够处理并发连接,支持多个client并发訪问,每一个连接能够持续读写数据.当然.这仅仅是一个简单的学习样例.还有非常多bug,发表出来仅仅是希望大家能够互相学习.我也在不断 ...
- python epoll实现异步socket
一.同步和异步: 在程序执行中,同步运行意味着等待调用的函数.线程.子进程等的返回结果后继续处理:异步指不等待当下的返回结果,直接运行主进程下面的程序,等到有返回结果时,通知主进程处理.有点高效. 二 ...
- Linux中epoll+线程池实现高并发
服务器并发模型通常可分为单线程和多线程模型,这里的线程通常是指“I/O线程”,即负责I/O操作,协调分配任务的“管理线程”,而实际的请求和任务通常交由所谓“工作者线程”处理.通常多线程模型下,每个线程 ...
- 第九章 用多线程来读取epoll模型下的client数据
#include <sys/types.h> #include <sys/socket.h> #include <netinet/in.h> #include &l ...
随机推荐
- lua 源码阅读 1.1 -> 2.1
lua 1.1 阅读1. hash.c 中 a) 对建立的 Hash *array 用 listhead 链式结构来管理,新增lua_hashcollector,用来做 Hash 的回收处理. ps: ...
- 多测师讲解htm_L标题标签001_高级讲师 肖sir
<!DOCTYPE html><html> <head> <meta charset="UTF-8"> <title>百 ...
- 【idea&spring mvc】搭建简易的spring mvc项目(基于maven)!
一.创建项目 1.打开idea,file--new--project 2.按照步骤①②③④操作 3.输入包名,并点击下一步 4.选择下载包的maven的setting.xml配置路径和包的存放地,然后 ...
- excel 无效引用 所引用的单元格不能位于256列
无效引用 该文件版本所包含的公式中,所引用的单元格不能位于256列(列IW 或更远)或 65536 行以外的区域. 原因及解决方法: 1.版本问题,把两文件都另存为一致的版本,改为后缀为xlsx. 2 ...
- spring boot:thymeleaf模板中insert/include/replace三种引用fragment方式的区别(spring boot 2.3.3)
一,thymeleaf模板中insert/include/replace三种引用fragment方式的区别 insert: 把整个fragment(包括fragment的节点tag)插入到当前节点内部 ...
- pycharm2018.3.5 下载激活(windows平台)
软件下载: 百度网盘下载 提取码: 73p7 激活操作: 1.下载jar包 JetbrainsCrack-4.2-release-enc.jar 链接:https://pan.baidu.com/s/ ...
- Android adb实现原理
adb定义: adb(Android Debug Bridge) 安卓调试桥,包含adb client.adb server和adbd三部分. adb client:运行在PC上,即DDMS或者在Wi ...
- 【应用程序见解 Application Insights】在Application Insights中通过自定义查询结果定义指标并显示在Dashboard中
问题情形 通过Application Insights收集到指标数据后,如Request,Trace,Exception.但是默认的Insights图表不能满足业务的需求,需要自定义相应的类SQL语句 ...
- 循序渐进VUE+Element 前端应用开发(21)--- 省市区县联动处理的组件使用
在很多应用中,往往都涉及到记录用户所在省份.城市.区县或者街道等信息,一般我们可以通过联动的Select或者类似的界面组件进行展示,或者使用Element中的el-cascader界面组件进行展示,而 ...
- JUC---06线程间通信(二)
二.线程间定制化调用通信 要使多线程之间按顺序调用,实现A->B->C按顺序输出,使用Lock锁实现,通过Lock锁创建三个Condition实例(三把钥匙),通过不同的条件,调用不同钥匙 ...