/* Epoll private bits inside the event mask */
#define EP_PRIVATE_BITS (EPOLLWAKEUP | EPOLLONESHOT | EPOLLET | EPOLLEXCLUSIVE)

主要是看下:惊群源:

1、socket wake_up 

2、epoll_wait 中wake_up 

目前data ready的时候调用sk_data_ready 唤醒进程,此时唤醒进程选择了  只唤醒一个

/ nr_exclusive是1  

static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,  

int nr_exclusive, int wake_flags, void *key)  

{  

wait_queue_t *curr, *next;  

list_for_each_entry_safe(curr, next, &q->task_list, task_list) {  

unsigned flags = curr->flags;  

if (curr->func(curr, mode, wake_flags, key) &&  

(flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)  

break;  

}  

}  

(flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)

传进来的nr_exclusive是1, 所以flags & WQ_FLAG_EXCLUSIVE为真的时候,执行一次,就会跳出循环。

Epoll_create()在fork子进程之前

所有进程都共享一个 epfd, 所以data ready 唤醒进程的时候即使加上 nr_exclusive = 1 只唤醒一个进程, 那么唤醒那个一个呢?

也就是当连接到来时,我们需要选择一个进程来accept,这个时候,任何一个accept都是可以的。当连接建立以后,后续的读写事件,却与进程有了关联。一个请求与a进程建立连接后,后续的读写也应该由a进程来做。

当读写事件发生时,应该通知哪个进程呢?Epoll并不知道,因此,事件有可能错误通知另一个进程处理

Epoll_create()在fork子进程之后

每个进程的读写事件,只注册在自己进程的epoll中。所以不会出现竞争

但是accept呢???

目前有的内核版本说是会出现有的不会!!!

这就需要看内核版本实现了 无非就是唤醒的时候加上一些标志。。。当然是用reuseport 一劳永逸!!

如果不是是用reuseport实现只唤醒一个进程,那么wake_up的时候就是唤醒等待队列的头一个。。那怎么做到负载均衡呢???

所以还是reuseport好!!!!

  */
static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
int nr_exclusive, int wake_flags, void *key)
{
wait_queue_t *curr, *next; list_for_each_entry_safe(curr, next, &q->task_list, task_list) {
unsigned flags = curr->flags; if (curr->func(curr, mode, wake_flags, key) &&
(flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
break;
}
} void __wake_up(wait_queue_head_t *q, unsigned int mode,
int nr_exclusive, void *key)
{
unsigned long flags; spin_lock_irqsave(&q->lock, flags);
__wake_up_common(q, mode, nr_exclusive, 0, key);
spin_unlock_irqrestore(&q->lock, flags);
} /*
* This is the callback that is passed to the wait queue wakeup
* machanism. It is called by the stored file descriptors when they
* have events to report.
*/
static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *key)
{
int pwake = 0;
unsigned long flags;
struct epitem *epi = ep_item_from_wait(wait);
struct eventpoll *ep = epi->ep; spin_lock_irqsave(&ep->lock, flags); /*
* If the event mask does not contain any poll(2) event, we consider the
* descriptor to be disabled. This condition is likely the effect of the
* EPOLLONESHOT bit that disables the descriptor when an event is received,
* until the next EPOLL_CTL_MOD will be issued.
*/
if (!(epi->event.events & ~EP_PRIVATE_BITS))
goto out_unlock; /*
* Check the events coming with the callback. At this stage, not
* every device reports the events in the "key" parameter of the
* callback. We need to be able to handle both cases here, hence the
* test for "key" != NULL before the event match test.
*/
if (key && !((unsigned long) key & epi->event.events))
goto out_unlock; /*
* If we are trasfering events to userspace, we can hold no locks
* (because we're accessing user memory, and because of linux f_op->poll()
* semantics). All the events that happens during that period of time are
* chained in ep->ovflist and requeued later on.
*/
if (unlikely(ep->ovflist != EP_UNACTIVE_PTR)) {
if (epi->next == EP_UNACTIVE_PTR) {
epi->next = ep->ovflist;
ep->ovflist = epi;
}
goto out_unlock;
} /* If this file is already in the ready list we exit soon */
if (!ep_is_linked(&epi->rdllink))
list_add_tail(&epi->rdllink, &ep->rdllist); /*
* Wake up ( if active ) both the eventpoll wait list and the ->poll()
* wait list.
*/
if (waitqueue_active(&ep->wq))
wake_up_locked(&ep->wq);
if (waitqueue_active(&ep->poll_wait))
pwake++; out_unlock:
spin_unlock_irqrestore(&ep->lock, flags); /* We have to call this outside the lock */
if (pwake)
ep_poll_safewake(&ep->poll_wait); return 1;
} static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *key)
{
int pwake = 0;
unsigned long flags;
struct epitem *epi = ep_item_from_wait(wait);
struct eventpoll *ep = epi->ep;
int ewake = 0; if ((unsigned long)key & POLLFREE) {
ep_pwq_from_wait(wait)->whead = NULL;
/*
* whead = NULL above can race with ep_remove_wait_queue()
* which can do another remove_wait_queue() after us, so we
* can't use __remove_wait_queue(). whead->lock is held by
* the caller.
*/
list_del_init(&wait->task_list);
} spin_lock_irqsave(&ep->lock, flags); /*
* If the event mask does not contain any poll(2) event, we consider the
* descriptor to be disabled. This condition is likely the effect of the
* EPOLLONESHOT bit that disables the descriptor when an event is received,
* until the next EPOLL_CTL_MOD will be issued.
*/
if (!(epi->event.events & ~EP_PRIVATE_BITS))
goto out_unlock; /*
* Check the events coming with the callback. At this stage, not
* every device reports the events in the "key" parameter of the
* callback. We need to be able to handle both cases here, hence the
* test for "key" != NULL before the event match test.
*/
if (key && !((unsigned long) key & epi->event.events))
goto out_unlock; /*
* If we are transferring events to userspace, we can hold no locks
* (because we're accessing user memory, and because of linux f_op->poll()
* semantics). All the events that happen during that period of time are
* chained in ep->ovflist and requeued later on.
*/
if (unlikely(ep->ovflist != EP_UNACTIVE_PTR)) {
if (epi->next == EP_UNACTIVE_PTR) {
epi->next = ep->ovflist;
ep->ovflist = epi;
if (epi->ws) {
/*
* Activate ep->ws since epi->ws may get
* deactivated at any time.
*/
__pm_stay_awake(ep->ws);
} }
goto out_unlock;
} /* If this file is already in the ready list we exit soon */
if (!ep_is_linked(&epi->rdllink)) {
list_add_tail(&epi->rdllink, &ep->rdllist);
ep_pm_stay_awake_rcu(epi);
} /*
* Wake up ( if active ) both the eventpoll wait list and the ->poll()
* wait list.
*/
if (waitqueue_active(&ep->wq)) {
if ((epi->event.events & EPOLLEXCLUSIVE) &&
!((unsigned long)key & POLLFREE)) {
switch ((unsigned long)key & EPOLLINOUT_BITS) {
case POLLIN:
if (epi->event.events & POLLIN)
ewake = 1;
break;
case POLLOUT:
if (epi->event.events & POLLOUT)
ewake = 1;
break;
case 0:
ewake = 1;
break;
}
}
wake_up_locked(&ep->wq);
}
if (waitqueue_active(&ep->poll_wait))
pwake++; out_unlock:
spin_unlock_irqrestore(&ep->lock, flags); /* We have to call this outside the lock */
if (pwake)
ep_poll_safewake(&ep->poll_wait); if (epi->event.events & EPOLLEXCLUSIVE)
return ewake; return 1;
}
/*
* This is the callback that is used to add our wait queue to the
* target file wakeup lists.
*/
static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead,
poll_table *pt)
{
struct epitem *epi = ep_item_from_epqueue(pt);
struct eppoll_entry *pwq; if (epi->nwait >= 0 && (pwq = kmem_cache_alloc(pwq_cache, GFP_KERNEL))) {
init_waitqueue_func_entry(&pwq->wait, ep_poll_callback);
pwq->whead = whead;
pwq->base = epi;
if (epi->event.events & EPOLLEXCLUSIVE)
add_wait_queue_exclusive(whead, &pwq->wait);
else
add_wait_queue(whead, &pwq->wait);
list_add_tail(&pwq->llink, &epi->pwqlist);
epi->nwait++;
} else {
/* We have to signal that an error occurred */
epi->nwait = -1;
}
}

根据EPOLLEXCLUSIVE 加入到不同的唤醒 队列add_wait_queue_exclusive  add_wait_queue

在wake_up的时候 通过nr-exclu  控制 但是 要想break还需要返回 ep_poll_callback返回 true;

对于epoll_oneshot

在epoll_wait后  send fd 到user时,

if (epi->event.events & EPOLLONESHOT)  不会重复添加进去 导致后续链式唤醒
                epi->event.events &= EP_PRIVATE_BITS;
  else if (!(epi->event.events & EPOLLET)) {
                list_add_tail(&epi->rdllink, &ep->rdllist);
                ep_pm_stay_awake(epi);
            }

同时在ep_call_back的时候也会 继续检查 EPOLLONESHOT ,房子 epoll_wait返回时,在处理data中,fd又有数据需要相应,此时多线程 中别的线程可以相应。。。。乱序了!!!

/*
     * If the event mask does not contain any poll(2) event, we consider the descriptor to be disabled. This condition is likely the effect of the
     * EPOLLONESHOT bit that disables the descriptor when an event is received,* until the next EPOLL_CTL_MOD will be issued.
     */
    if (!(epi->event.events & ~EP_PRIVATE_BITS))
        goto out_unlock;

epoll oneshot的更多相关文章

  1. epoll中et+多线程模式中很重要的EPOLL_ONESHOT实验

    因为et模式需要循环读取,但是在读取过程中,如果有新的事件到达,很可能触发了其他线程来处理这个socket,那就乱了. EPOLL_ONESHOT就是用来避免这种情况.注意在一个线程处理完一个sock ...

  2. Linux编程之epoll

    现在有这么一个场景:我是一个很忙的大老板,我有100个手机,手机来信息了,我的秘书就会告诉我"老板,你的手机来信息了."我很生气,我的秘书就是这样子,每次手机来信息就只告诉我来信息 ...

  3. I/O多路复用之epoll实战

    概念 IO多路复用是指内核一旦发现进程指定的一个或者多个IO条件准备读取,它就通知该进程 通俗理解(摘自网上一大神) 这些名词比较绕口,理解涵义就好.一个epoll场景:一个酒吧服务员(一个线程),前 ...

  4. Select、Poll、Epoll、 异步IO 介绍

    一.概念相关介绍 同步IO和异步IO,阻塞IO和非阻塞IO分别是什么,到底有什么区别?不同的人在不同的上下文下给出的答案是不同的.所以先限定一下本文的上下文. 本文讨论的背景是Linux环境下的net ...

  5. epoll的LT和ET使用EPOLLONESHOT

    epoll有两种触发的方式即LT(水平触发)和ET(边缘触发)两种,在前者,只要存在着事件就会不断的触发,直到处理完成,而后者只触发一次相同事件或者说只在从非触发到触发两个状态转换的时候儿才触发. 这 ...

  6. 基于epoll的简单的httpserver

    该httpserver已经能够处理并发连接,支持多个client并发訪问,每一个连接能够持续读写数据.当然.这仅仅是一个简单的学习样例.还有非常多bug,发表出来仅仅是希望大家能够互相学习.我也在不断 ...

  7. python epoll实现异步socket

    一.同步和异步: 在程序执行中,同步运行意味着等待调用的函数.线程.子进程等的返回结果后继续处理:异步指不等待当下的返回结果,直接运行主进程下面的程序,等到有返回结果时,通知主进程处理.有点高效. 二 ...

  8. Linux中epoll+线程池实现高并发

    服务器并发模型通常可分为单线程和多线程模型,这里的线程通常是指“I/O线程”,即负责I/O操作,协调分配任务的“管理线程”,而实际的请求和任务通常交由所谓“工作者线程”处理.通常多线程模型下,每个线程 ...

  9. 第九章 用多线程来读取epoll模型下的client数据

    #include <sys/types.h> #include <sys/socket.h> #include <netinet/in.h> #include &l ...

随机推荐

  1. OpenSSL加密系统简介

    加密基本原理 OpenSSL移植到arm开发板参考  http://blog.chinaunix.net/uid-27717694-id-3530600.html 1.公钥和私钥: 公钥和私钥就是俗称 ...

  2. Hash算法简介

    Hash算法性质 Hash算法用于计算消息摘要(Message Digest),可以将任意长的输入信息快速地转换为固定长度的输出.在区块链中主要利用了Hash算法的三种性质: 抗冲突性(Collisi ...

  3. 扫描仪扫描文件处理-Photoshop批处理弹出色阶设置框解决

    为什么我录制动作明明设置的有色阶,最后批处理的时候仍然弹出了色阶设置框?   出现问题原因可能是你在录入设置色阶动作的时候,是彩色图片或者灰阶中的一种,而批处理的时候遇到了另外一种色彩模式.所以动作中 ...

  4. swoft 事件监听和触发 打印sql日志

    需求 打印出swoft的所有sql日志到控制台或者文件 只要打开listener 下面 Dbranlisten.php 里面最后一行注释即可,swoft已经帮我们实现好了 ____ _____ ___ ...

  5. nginx安全:配置ssl证书(https证书)

    一,配置https证书的意义 https协议是由SSL+http协议构建的安全协议,支持加密传输和身份认证, 安全性比http要更好,因为数据的加密传输,更能保证数据的安全性和完整性 例如:不使用ht ...

  6. Python函数递归调用

    函数的递归调用: 是函数嵌套调用的一种特殊形式 具体是指: 在调用一个函数的过程中又直接或间接地调用到了本身 # 直接调用本身 def func(): print('我是func') func() f ...

  7. CentOS 8 关闭防火墙

    SELINUX=disabled vim /etc/selinux/config systemctl disable firewalld.service

  8. 对象部分初始化:原理以及验证代码(双重检查锁与volatile相关)

    对象部分初始化:原理以及验证代码(双重检查锁与volatile相关) 对象部分初始化被称为 Partially initialized objects / Partially constructed ...

  9. ssm整合之springmvc.xml文件

    <?xml version="1.0" encoding="UTF-8"?><beans xmlns="http://www.spr ...

  10. css自定义字体----使用外部字体文件

    css外部自定义字体 给大家分享一个使用的css小技巧!记得收藏呀!相信大家在浏览各种网站会见到各种奇形怪状花里胡哨的文字,还有就是一些浏览器兼容性问题,不会支持一些特殊的字体!给大家分享一个极其简单 ...