当前内容基于 Linux Kernel v5.4.121

io_uring

之前介绍过 io_uring 只增加了三个 Linux 系统调用分别是 io_uring_setupio_uring_enterio_uring_register

他们的入口都在 Linux 内核源码的 fs/io_uring.c 文件中,下面将逐个分析

系统调用 io_uring_setup

io_uring_setup 的作用在用户库源码分析中有过介绍,主要是初始化初始化 io_uring 结构体

io_uring_setup

/*
* Sets up an aio uring context, and returns the fd. Applications asks for a
* ring size, we return the actual sq/cq ring sizes (among other things) in the
* params structure passed in.
*/
static long io_uring_setup(u32 entries, struct io_uring_params __user *params)
{
struct io_uring_params p;
long ret;
int i; // 用户态拷贝到内核态
if (copy_from_user(&p, params, sizeof(p)))
return -EFAULT;
// 确认保留区域没有被赋值
for (i = 0; i < ARRAY_SIZE(p.resv); i++) {
if (p.resv[i])
return -EINVAL;
} // 检查 flags 参数
if (p.flags & ~(IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL |
IORING_SETUP_SQ_AFF))
return -EINVAL; // 分配内存空间,创建 workqueue,创建 fd 等
ret = io_uring_create(entries, &p);
if (ret < 0)
return ret; // 内核态拷贝回用户态
if (copy_to_user(params, &p, sizeof(p)))
return -EFAULT; return ret;
} SYSCALL_DEFINE2(io_uring_setup, u32, entries,
struct io_uring_params __user *, params)
{
return io_uring_setup(entries, params);
}

可以看到 io_uring_setup 的核心函数是 io_uring_create

io_uring_create

static int io_uring_create(unsigned entries, struct io_uring_params *p)
{
struct user_struct *user = NULL;
struct io_ring_ctx *ctx;
bool account_mem;
int ret; if (!entries || entries > IORING_MAX_ENTRIES)
return -EINVAL; /*
* Use twice as many entries for the CQ ring. It's possible for the
* application to drive a higher depth than the size of the SQ ring,
* since the sqes are only used at submission time. This allows for
* some flexibility in overcommitting a bit.
*/
p->sq_entries = roundup_pow_of_two(entries);
p->cq_entries = 2 * p->sq_entries; user = get_uid(current_user());
// 允许对共享内存段进行锁定
account_mem = !capable(CAP_IPC_LOCK); if (account_mem) {
// 不能对共享内存段进行锁定,就需要增加当前可以锁定的内存
ret = io_account_mem(user,
ring_pages(p->sq_entries, p->cq_entries));
if (ret) {
free_uid(user);
return ret;
}
} ctx = io_ring_ctx_alloc(p);
if (!ctx) {
if (account_mem)
io_unaccount_mem(user, ring_pages(p->sq_entries,
p->cq_entries));
free_uid(user);
return -ENOMEM;
}
ctx->compat = in_compat_syscall();
ctx->account_mem = account_mem;
ctx->user = user; ctx->creds = get_current_cred();
if (!ctx->creds) {
ret = -ENOMEM;
goto err;
} // 申请 io_rings SQEs
ret = io_allocate_scq_urings(ctx, p);
if (ret)
goto err; // 初始化 workqueue,[初始化内核线程用于进行 IO poll]
ret = io_sq_offload_start(ctx, p);
if (ret)
goto err; memset(&p->sq_off, 0, sizeof(p->sq_off));
p->sq_off.head = offsetof(struct io_rings, sq.head);
p->sq_off.tail = offsetof(struct io_rings, sq.tail);
p->sq_off.ring_mask = offsetof(struct io_rings, sq_ring_mask);
p->sq_off.ring_entries = offsetof(struct io_rings, sq_ring_entries);
p->sq_off.flags = offsetof(struct io_rings, sq_flags);
p->sq_off.dropped = offsetof(struct io_rings, sq_dropped);
p->sq_off.array = (char *)ctx->sq_array - (char *)ctx->rings; memset(&p->cq_off, 0, sizeof(p->cq_off));
p->cq_off.head = offsetof(struct io_rings, cq.head);
p->cq_off.tail = offsetof(struct io_rings, cq.tail);
p->cq_off.ring_mask = offsetof(struct io_rings, cq_ring_mask);
p->cq_off.ring_entries = offsetof(struct io_rings, cq_ring_entries);
p->cq_off.overflow = offsetof(struct io_rings, cq_overflow);
p->cq_off.cqes = offsetof(struct io_rings, cqes); /*
* Install ring fd as the very last thing, so we don't risk someone
* having closed it before we finish setup
*/
// 创建 fd 便于用户态访问 ctx
ret = io_uring_get_fd(ctx);
if (ret < 0)
goto err; p->features = IORING_FEAT_SINGLE_MMAP;
return ret;
err:
io_ring_ctx_wait_and_kill(ctx);
return ret;
}

#mermaid-svg-Nc2WQNTFRSKae9ox .label{font-family:'trebuchet ms', verdana, arial;font-family:var(--mermaid-font-family);fill:#333;color:#333}#mermaid-svg-Nc2WQNTFRSKae9ox .label text{fill:#333}#mermaid-svg-Nc2WQNTFRSKae9ox .node rect,#mermaid-svg-Nc2WQNTFRSKae9ox .node circle,#mermaid-svg-Nc2WQNTFRSKae9ox .node ellipse,#mermaid-svg-Nc2WQNTFRSKae9ox .node polygon,#mermaid-svg-Nc2WQNTFRSKae9ox .node path{fill:#ECECFF;stroke:#9370db;stroke-width:1px}#mermaid-svg-Nc2WQNTFRSKae9ox .node .label{text-align:center;fill:#333}#mermaid-svg-Nc2WQNTFRSKae9ox .node.clickable{cursor:pointer}#mermaid-svg-Nc2WQNTFRSKae9ox .arrowheadPath{fill:#333}#mermaid-svg-Nc2WQNTFRSKae9ox .edgePath .path{stroke:#333;stroke-width:1.5px}#mermaid-svg-Nc2WQNTFRSKae9ox .flowchart-link{stroke:#333;fill:none}#mermaid-svg-Nc2WQNTFRSKae9ox .edgeLabel{background-color:#e8e8e8;text-align:center}#mermaid-svg-Nc2WQNTFRSKae9ox .edgeLabel rect{opacity:0.9}#mermaid-svg-Nc2WQNTFRSKae9ox .edgeLabel span{color:#333}#mermaid-svg-Nc2WQNTFRSKae9ox .cluster rect{fill:#ffffde;stroke:#aa3;stroke-width:1px}#mermaid-svg-Nc2WQNTFRSKae9ox .cluster text{fill:#333}#mermaid-svg-Nc2WQNTFRSKae9ox div.mermaidTooltip{position:absolute;text-align:center;max-width:200px;padding:2px;font-family:'trebuchet ms', verdana, arial;font-family:var(--mermaid-font-family);font-size:12px;background:#ffffde;border:1px solid #aa3;border-radius:2px;pointer-events:none;z-index:100}#mermaid-svg-Nc2WQNTFRSKae9ox .actor{stroke:#ccf;fill:#ECECFF}#mermaid-svg-Nc2WQNTFRSKae9ox text.actor>tspan{fill:#000;stroke:none}#mermaid-svg-Nc2WQNTFRSKae9ox .actor-line{stroke:grey}#mermaid-svg-Nc2WQNTFRSKae9ox .messageLine0{stroke-width:1.5;stroke-dasharray:none;stroke:#333}#mermaid-svg-Nc2WQNTFRSKae9ox .messageLine1{stroke-width:1.5;stroke-dasharray:2, 2;stroke:#333}#mermaid-svg-Nc2WQNTFRSKae9ox #arrowhead path{fill:#333;stroke:#333}#mermaid-svg-Nc2WQNTFRSKae9ox .sequenceNumber{fill:#fff}#mermaid-svg-Nc2WQNTFRSKae9ox #sequencenumber{fill:#333}#mermaid-svg-Nc2WQNTFRSKae9ox #crosshead path{fill:#333;stroke:#333}#mermaid-svg-Nc2WQNTFRSKae9ox .messageText{fill:#333;stroke:#333}#mermaid-svg-Nc2WQNTFRSKae9ox .labelBox{stroke:#ccf;fill:#ECECFF}#mermaid-svg-Nc2WQNTFRSKae9ox .labelText,#mermaid-svg-Nc2WQNTFRSKae9ox .labelText>tspan{fill:#000;stroke:none}#mermaid-svg-Nc2WQNTFRSKae9ox .loopText,#mermaid-svg-Nc2WQNTFRSKae9ox .loopText>tspan{fill:#000;stroke:none}#mermaid-svg-Nc2WQNTFRSKae9ox .loopLine{stroke-width:2px;stroke-dasharray:2, 2;stroke:#ccf;fill:#ccf}#mermaid-svg-Nc2WQNTFRSKae9ox .note{stroke:#aa3;fill:#fff5ad}#mermaid-svg-Nc2WQNTFRSKae9ox .noteText,#mermaid-svg-Nc2WQNTFRSKae9ox .noteText>tspan{fill:#000;stroke:none}#mermaid-svg-Nc2WQNTFRSKae9ox .activation0{fill:#f4f4f4;stroke:#666}#mermaid-svg-Nc2WQNTFRSKae9ox .activation1{fill:#f4f4f4;stroke:#666}#mermaid-svg-Nc2WQNTFRSKae9ox .activation2{fill:#f4f4f4;stroke:#666}#mermaid-svg-Nc2WQNTFRSKae9ox .mermaid-main-font{font-family:"trebuchet ms", verdana, arial;font-family:var(--mermaid-font-family)}#mermaid-svg-Nc2WQNTFRSKae9ox .section{stroke:none;opacity:0.2}#mermaid-svg-Nc2WQNTFRSKae9ox .section0{fill:rgba(102,102,255,0.49)}#mermaid-svg-Nc2WQNTFRSKae9ox .section2{fill:#fff400}#mermaid-svg-Nc2WQNTFRSKae9ox .section1,#mermaid-svg-Nc2WQNTFRSKae9ox .section3{fill:#fff;opacity:0.2}#mermaid-svg-Nc2WQNTFRSKae9ox .sectionTitle0{fill:#333}#mermaid-svg-Nc2WQNTFRSKae9ox .sectionTitle1{fill:#333}#mermaid-svg-Nc2WQNTFRSKae9ox .sectionTitle2{fill:#333}#mermaid-svg-Nc2WQNTFRSKae9ox .sectionTitle3{fill:#333}#mermaid-svg-Nc2WQNTFRSKae9ox .sectionTitle{text-anchor:start;font-size:11px;text-height:14px;font-family:'trebuchet ms', verdana, arial;font-family:var(--mermaid-font-family)}#mermaid-svg-Nc2WQNTFRSKae9ox .grid .tick{stroke:#d3d3d3;opacity:0.8;shape-rendering:crispEdges}#mermaid-svg-Nc2WQNTFRSKae9ox .grid .tick text{font-family:'trebuchet ms', verdana, arial;font-family:var(--mermaid-font-family)}#mermaid-svg-Nc2WQNTFRSKae9ox .grid path{stroke-width:0}#mermaid-svg-Nc2WQNTFRSKae9ox .today{fill:none;stroke:red;stroke-width:2px}#mermaid-svg-Nc2WQNTFRSKae9ox .task{stroke-width:2}#mermaid-svg-Nc2WQNTFRSKae9ox .taskText{text-anchor:middle;font-family:'trebuchet ms', verdana, arial;font-family:var(--mermaid-font-family)}#mermaid-svg-Nc2WQNTFRSKae9ox .taskText:not([font-size]){font-size:11px}#mermaid-svg-Nc2WQNTFRSKae9ox .taskTextOutsideRight{fill:#000;text-anchor:start;font-size:11px;font-family:'trebuchet ms', verdana, arial;font-family:var(--mermaid-font-family)}#mermaid-svg-Nc2WQNTFRSKae9ox .taskTextOutsideLeft{fill:#000;text-anchor:end;font-size:11px}#mermaid-svg-Nc2WQNTFRSKae9ox .task.clickable{cursor:pointer}#mermaid-svg-Nc2WQNTFRSKae9ox .taskText.clickable{cursor:pointer;fill:#003163 !important;font-weight:bold}#mermaid-svg-Nc2WQNTFRSKae9ox .taskTextOutsideLeft.clickable{cursor:pointer;fill:#003163 !important;font-weight:bold}#mermaid-svg-Nc2WQNTFRSKae9ox .taskTextOutsideRight.clickable{cursor:pointer;fill:#003163 !important;font-weight:bold}#mermaid-svg-Nc2WQNTFRSKae9ox .taskText0,#mermaid-svg-Nc2WQNTFRSKae9ox .taskText1,#mermaid-svg-Nc2WQNTFRSKae9ox .taskText2,#mermaid-svg-Nc2WQNTFRSKae9ox .taskText3{fill:#fff}#mermaid-svg-Nc2WQNTFRSKae9ox .task0,#mermaid-svg-Nc2WQNTFRSKae9ox .task1,#mermaid-svg-Nc2WQNTFRSKae9ox .task2,#mermaid-svg-Nc2WQNTFRSKae9ox .task3{fill:#8a90dd;stroke:#534fbc}#mermaid-svg-Nc2WQNTFRSKae9ox .taskTextOutside0,#mermaid-svg-Nc2WQNTFRSKae9ox .taskTextOutside2{fill:#000}#mermaid-svg-Nc2WQNTFRSKae9ox .taskTextOutside1,#mermaid-svg-Nc2WQNTFRSKae9ox .taskTextOutside3{fill:#000}#mermaid-svg-Nc2WQNTFRSKae9ox .active0,#mermaid-svg-Nc2WQNTFRSKae9ox .active1,#mermaid-svg-Nc2WQNTFRSKae9ox .active2,#mermaid-svg-Nc2WQNTFRSKae9ox .active3{fill:#bfc7ff;stroke:#534fbc}#mermaid-svg-Nc2WQNTFRSKae9ox .activeText0,#mermaid-svg-Nc2WQNTFRSKae9ox .activeText1,#mermaid-svg-Nc2WQNTFRSKae9ox .activeText2,#mermaid-svg-Nc2WQNTFRSKae9ox .activeText3{fill:#000 !important}#mermaid-svg-Nc2WQNTFRSKae9ox .done0,#mermaid-svg-Nc2WQNTFRSKae9ox .done1,#mermaid-svg-Nc2WQNTFRSKae9ox .done2,#mermaid-svg-Nc2WQNTFRSKae9ox .done3{stroke:grey;fill:#d3d3d3;stroke-width:2}#mermaid-svg-Nc2WQNTFRSKae9ox .doneText0,#mermaid-svg-Nc2WQNTFRSKae9ox .doneText1,#mermaid-svg-Nc2WQNTFRSKae9ox .doneText2,#mermaid-svg-Nc2WQNTFRSKae9ox .doneText3{fill:#000 !important}#mermaid-svg-Nc2WQNTFRSKae9ox .crit0,#mermaid-svg-Nc2WQNTFRSKae9ox .crit1,#mermaid-svg-Nc2WQNTFRSKae9ox .crit2,#mermaid-svg-Nc2WQNTFRSKae9ox .crit3{stroke:#f88;fill:red;stroke-width:2}#mermaid-svg-Nc2WQNTFRSKae9ox .activeCrit0,#mermaid-svg-Nc2WQNTFRSKae9ox .activeCrit1,#mermaid-svg-Nc2WQNTFRSKae9ox .activeCrit2,#mermaid-svg-Nc2WQNTFRSKae9ox .activeCrit3{stroke:#f88;fill:#bfc7ff;stroke-width:2}#mermaid-svg-Nc2WQNTFRSKae9ox .doneCrit0,#mermaid-svg-Nc2WQNTFRSKae9ox .doneCrit1,#mermaid-svg-Nc2WQNTFRSKae9ox .doneCrit2,#mermaid-svg-Nc2WQNTFRSKae9ox .doneCrit3{stroke:#f88;fill:#d3d3d3;stroke-width:2;cursor:pointer;shape-rendering:crispEdges}#mermaid-svg-Nc2WQNTFRSKae9ox .milestone{transform:rotate(45deg) scale(0.8, 0.8)}#mermaid-svg-Nc2WQNTFRSKae9ox .milestoneText{font-style:italic}#mermaid-svg-Nc2WQNTFRSKae9ox .doneCritText0,#mermaid-svg-Nc2WQNTFRSKae9ox .doneCritText1,#mermaid-svg-Nc2WQNTFRSKae9ox .doneCritText2,#mermaid-svg-Nc2WQNTFRSKae9ox .doneCritText3{fill:#000 !important}#mermaid-svg-Nc2WQNTFRSKae9ox .activeCritText0,#mermaid-svg-Nc2WQNTFRSKae9ox .activeCritText1,#mermaid-svg-Nc2WQNTFRSKae9ox .activeCritText2,#mermaid-svg-Nc2WQNTFRSKae9ox .activeCritText3{fill:#000 !important}#mermaid-svg-Nc2WQNTFRSKae9ox .titleText{text-anchor:middle;font-size:18px;fill:#000;font-family:'trebuchet ms', verdana, arial;font-family:var(--mermaid-font-family)}#mermaid-svg-Nc2WQNTFRSKae9ox g.classGroup text{fill:#9370db;stroke:none;font-family:'trebuchet ms', verdana, arial;font-family:var(--mermaid-font-family);font-size:10px}#mermaid-svg-Nc2WQNTFRSKae9ox g.classGroup text .title{font-weight:bolder}#mermaid-svg-Nc2WQNTFRSKae9ox g.clickable{cursor:pointer}#mermaid-svg-Nc2WQNTFRSKae9ox g.classGroup rect{fill:#ECECFF;stroke:#9370db}#mermaid-svg-Nc2WQNTFRSKae9ox g.classGroup line{stroke:#9370db;stroke-width:1}#mermaid-svg-Nc2WQNTFRSKae9ox .classLabel .box{stroke:none;stroke-width:0;fill:#ECECFF;opacity:0.5}#mermaid-svg-Nc2WQNTFRSKae9ox .classLabel .label{fill:#9370db;font-size:10px}#mermaid-svg-Nc2WQNTFRSKae9ox .relation{stroke:#9370db;stroke-width:1;fill:none}#mermaid-svg-Nc2WQNTFRSKae9ox .dashed-line{stroke-dasharray:3}#mermaid-svg-Nc2WQNTFRSKae9ox #compositionStart{fill:#9370db;stroke:#9370db;stroke-width:1}#mermaid-svg-Nc2WQNTFRSKae9ox #compositionEnd{fill:#9370db;stroke:#9370db;stroke-width:1}#mermaid-svg-Nc2WQNTFRSKae9ox #aggregationStart{fill:#ECECFF;stroke:#9370db;stroke-width:1}#mermaid-svg-Nc2WQNTFRSKae9ox #aggregationEnd{fill:#ECECFF;stroke:#9370db;stroke-width:1}#mermaid-svg-Nc2WQNTFRSKae9ox #dependencyStart{fill:#9370db;stroke:#9370db;stroke-width:1}#mermaid-svg-Nc2WQNTFRSKae9ox #dependencyEnd{fill:#9370db;stroke:#9370db;stroke-width:1}#mermaid-svg-Nc2WQNTFRSKae9ox #extensionStart{fill:#9370db;stroke:#9370db;stroke-width:1}#mermaid-svg-Nc2WQNTFRSKae9ox #extensionEnd{fill:#9370db;stroke:#9370db;stroke-width:1}#mermaid-svg-Nc2WQNTFRSKae9ox .commit-id,#mermaid-svg-Nc2WQNTFRSKae9ox .commit-msg,#mermaid-svg-Nc2WQNTFRSKae9ox .branch-label{fill:lightgrey;color:lightgrey;font-family:'trebuchet ms', verdana, arial;font-family:var(--mermaid-font-family)}#mermaid-svg-Nc2WQNTFRSKae9ox .pieTitleText{text-anchor:middle;font-size:25px;fill:#000;font-family:'trebuchet ms', verdana, arial;font-family:var(--mermaid-font-family)}#mermaid-svg-Nc2WQNTFRSKae9ox .slice{font-family:'trebuchet ms', verdana, arial;font-family:var(--mermaid-font-family)}#mermaid-svg-Nc2WQNTFRSKae9ox g.stateGroup text{fill:#9370db;stroke:none;font-size:10px;font-family:'trebuchet ms', verdana, arial;font-family:var(--mermaid-font-family)}#mermaid-svg-Nc2WQNTFRSKae9ox g.stateGroup text{fill:#9370db;fill:#333;stroke:none;font-size:10px}#mermaid-svg-Nc2WQNTFRSKae9ox g.statediagram-cluster .cluster-label text{fill:#333}#mermaid-svg-Nc2WQNTFRSKae9ox g.stateGroup .state-title{font-weight:bolder;fill:#000}#mermaid-svg-Nc2WQNTFRSKae9ox g.stateGroup rect{fill:#ECECFF;stroke:#9370db}#mermaid-svg-Nc2WQNTFRSKae9ox g.stateGroup line{stroke:#9370db;stroke-width:1}#mermaid-svg-Nc2WQNTFRSKae9ox .transition{stroke:#9370db;stroke-width:1;fill:none}#mermaid-svg-Nc2WQNTFRSKae9ox .stateGroup .composit{fill:white;border-bottom:1px}#mermaid-svg-Nc2WQNTFRSKae9ox .stateGroup .alt-composit{fill:#e0e0e0;border-bottom:1px}#mermaid-svg-Nc2WQNTFRSKae9ox .state-note{stroke:#aa3;fill:#fff5ad}#mermaid-svg-Nc2WQNTFRSKae9ox .state-note text{fill:black;stroke:none;font-size:10px}#mermaid-svg-Nc2WQNTFRSKae9ox .stateLabel .box{stroke:none;stroke-width:0;fill:#ECECFF;opacity:0.7}#mermaid-svg-Nc2WQNTFRSKae9ox .edgeLabel text{fill:#333}#mermaid-svg-Nc2WQNTFRSKae9ox .stateLabel text{fill:#000;font-size:10px;font-weight:bold;font-family:'trebuchet ms', verdana, arial;font-family:var(--mermaid-font-family)}#mermaid-svg-Nc2WQNTFRSKae9ox .node circle.state-start{fill:black;stroke:black}#mermaid-svg-Nc2WQNTFRSKae9ox .node circle.state-end{fill:black;stroke:white;stroke-width:1.5}#mermaid-svg-Nc2WQNTFRSKae9ox #statediagram-barbEnd{fill:#9370db}#mermaid-svg-Nc2WQNTFRSKae9ox .statediagram-cluster rect{fill:#ECECFF;stroke:#9370db;stroke-width:1px}#mermaid-svg-Nc2WQNTFRSKae9ox .statediagram-cluster rect.outer{rx:5px;ry:5px}#mermaid-svg-Nc2WQNTFRSKae9ox .statediagram-state .divider{stroke:#9370db}#mermaid-svg-Nc2WQNTFRSKae9ox .statediagram-state .title-state{rx:5px;ry:5px}#mermaid-svg-Nc2WQNTFRSKae9ox .statediagram-cluster.statediagram-cluster .inner{fill:white}#mermaid-svg-Nc2WQNTFRSKae9ox .statediagram-cluster.statediagram-cluster-alt .inner{fill:#e0e0e0}#mermaid-svg-Nc2WQNTFRSKae9ox .statediagram-cluster .inner{rx:0;ry:0}#mermaid-svg-Nc2WQNTFRSKae9ox .statediagram-state rect.basic{rx:5px;ry:5px}#mermaid-svg-Nc2WQNTFRSKae9ox .statediagram-state rect.divider{stroke-dasharray:10,10;fill:#efefef}#mermaid-svg-Nc2WQNTFRSKae9ox .note-edge{stroke-dasharray:5}#mermaid-svg-Nc2WQNTFRSKae9ox .statediagram-note rect{fill:#fff5ad;stroke:#aa3;stroke-width:1px;rx:0;ry:0}:root{--mermaid-font-family: '"trebuchet ms", verdana, arial';--mermaid-font-family: "Comic Sans MS", "Comic Sans", cursive}#mermaid-svg-Nc2WQNTFRSKae9ox .error-icon{fill:#522}#mermaid-svg-Nc2WQNTFRSKae9ox .error-text{fill:#522;stroke:#522}#mermaid-svg-Nc2WQNTFRSKae9ox .edge-thickness-normal{stroke-width:2px}#mermaid-svg-Nc2WQNTFRSKae9ox .edge-thickness-thick{stroke-width:3.5px}#mermaid-svg-Nc2WQNTFRSKae9ox .edge-pattern-solid{stroke-dasharray:0}#mermaid-svg-Nc2WQNTFRSKae9ox .edge-pattern-dashed{stroke-dasharray:3}#mermaid-svg-Nc2WQNTFRSKae9ox .edge-pattern-dotted{stroke-dasharray:2}#mermaid-svg-Nc2WQNTFRSKae9ox .marker{fill:#333}#mermaid-svg-Nc2WQNTFRSKae9ox .marker.cross{stroke:#333}

:root { --mermaid-font-family: "trebuchet ms", verdana, arial;}
#mermaid-svg-Nc2WQNTFRSKae9ox {
color: rgba(0, 0, 0, 0.75);
font: ;
}

io_uring_setup
io_ring_ctx_alloc
io_allocate_scq_urings
io_sq_offload_start
io_uring_get_fd
  1. io_ring_ctx_alloc 主要用来申请空间,初始化列表头、互斥锁、自旋锁等结构

  2. io_allocate_scq_urings 来初始化整个 struct io_rings *rings,包括 SQCQ 头尾指针的初始化,以及 SQECQE 的初始化

    • 不同的是 SQCQ 头尾指针以及 CQE 都在 struct io_rings *rings 结构体中
    • SQE 则是在 struct io_ring_ctx *ctx 结构体中
  3. io_sq_offload_start 会根据用户通过 io_uring_setup 传递的 flags 来配置 io_uring 的运行方式,后续详细展开

  4. io_uring_get_fdstruct io_ring_ctx *ctx 暴露给用户态访问
    io_allocate_scq_urings 来初始化整个 struct io_rings *rings,包括 SQCQ 头尾指针的初始化,以及 SQECQE 的初始化

io_sq_offload_start

static int io_sq_offload_start(struct io_ring_ctx *ctx,
struct io_uring_params *p)
{
int ret; mmgrab(current->mm);
ctx->sqo_mm = current->mm; if (ctx->flags & IORING_SETUP_SQPOLL) {
// IORING_SETUP_SQPOLL 将会创建一个内核线程来 poll SQ
ret = -EPERM;
if (!capable(CAP_SYS_ADMIN))
goto err; ctx->sq_thread_idle = msecs_to_jiffies(p->sq_thread_idle);
if (!ctx->sq_thread_idle)
ctx->sq_thread_idle = HZ; if (p->flags & IORING_SETUP_SQ_AFF) {
int cpu = p->sq_thread_cpu; ret = -EINVAL;
if (cpu >= nr_cpu_ids)
goto err;
if (!cpu_online(cpu))
goto err; ctx->sqo_thread = kthread_create_on_cpu(io_sq_thread,
ctx, cpu,
"io_uring-sq");
} else {
ctx->sqo_thread = kthread_create(io_sq_thread, ctx,
"io_uring-sq");
}
if (IS_ERR(ctx->sqo_thread)) {
ret = PTR_ERR(ctx->sqo_thread);
ctx->sqo_thread = NULL;
goto err;
}
wake_up_process(ctx->sqo_thread);
} else if (p->flags & IORING_SETUP_SQ_AFF) {
/* Can't have SQ_AFF without SQPOLL */
ret = -EINVAL;
goto err;
} /* Do QD, or 2 * CPUS, whatever is smallest */
ctx->sqo_wq[0] = alloc_workqueue("io_ring-wq",
WQ_UNBOUND | WQ_FREEZABLE,
min(ctx->sq_entries - 1, 2 * num_online_cpus()));
if (!ctx->sqo_wq[0]) {
ret = -ENOMEM;
goto err;
} /*
* This is for buffered writes, where we want to limit the parallelism
* due to file locking in file systems. As "normal" buffered writes
* should parellelize on writeout quite nicely, limit us to having 2
* pending. This avoids massive contention on the inode when doing
* buffered async writes.
*/
// 对 buffer 写的 workqueue 深度进行限制,减少锁争用开销?
ctx->sqo_wq[1] = alloc_workqueue("io_ring-write-wq",
WQ_UNBOUND | WQ_FREEZABLE, 2);
if (!ctx->sqo_wq[1]) {
ret = -ENOMEM;
goto err;
} return 0;
err:
io_finish_async(ctx);
mmdrop(ctx->sqo_mm);
ctx->sqo_mm = NULL;
return ret;
}

flags 中配置了 IORING_SETUP_SQPOLL 时,将启动一个单独的内核线程 io_sq_thread,而当 IORING_SETUP_SQ_AFF 字段也配置时,将根据 sq_thread_cpu 字段,在指定的 CPU 上启用内核线程 io_sq_thread

同时该函数还会创建两个工作队列 ctx->sqo_wq[2] 分别名为 io_ring-wqio_ring-write-wq

  • io_ring-wq 主要处理读 IO,以及 direct 写 IO
  • io_ring-write-wq 主要是处理 buffer 写 IO

系统调用 io_uring_enter

SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
u32, min_complete, u32, flags, const sigset_t __user *, sig,
size_t, sigsz)
{
struct io_ring_ctx *ctx;
long ret = -EBADF;
int submitted = 0;
struct fd f; if (flags & ~(IORING_ENTER_GETEVENTS | IORING_ENTER_SQ_WAKEUP))
return -EINVAL; f = fdget(fd);
if (!f.file)
return -EBADF; ret = -EOPNOTSUPP;
if (f.file->f_op != &io_uring_fops)
goto out_fput; ret = -ENXIO;
ctx = f.file->private_data;
if (!percpu_ref_tryget(&ctx->refs))
goto out_fput; /*
* For SQ polling, the thread will do all submissions and completions.
* Just return the requested submit count, and wake the thread if
* we were asked to.
*/
ret = 0;
if (ctx->flags & IORING_SETUP_SQPOLL) {
if (flags & IORING_ENTER_SQ_WAKEUP)
wake_up(&ctx->sqo_wait);
submitted = to_submit;
} else if (to_submit) {
to_submit = min(to_submit, ctx->sq_entries); mutex_lock(&ctx->uring_lock);
submitted = io_ring_submit(ctx, to_submit);
mutex_unlock(&ctx->uring_lock); if (submitted != to_submit)
goto out;
}
if (flags & IORING_ENTER_GETEVENTS) {
unsigned nr_events = 0; min_complete = min(min_complete, ctx->cq_entries); if (ctx->flags & IORING_SETUP_IOPOLL) {
ret = io_iopoll_check(ctx, &nr_events, min_complete);
} else {
ret = io_cqring_wait(ctx, min_complete, sig, sigsz);
}
} out:
percpu_ref_put(&ctx->refs);
out_fput:
fdput(f);
return submitted ? submitted : ret;
}

TODO

系统调用 io_uring_register

SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode,
void __user *, arg, unsigned int, nr_args)
{
struct io_ring_ctx *ctx;
long ret = -EBADF;
struct fd f; f = fdget(fd);
if (!f.file)
return -EBADF; ret = -EOPNOTSUPP;
if (f.file->f_op != &io_uring_fops)
goto out_fput; ctx = f.file->private_data; mutex_lock(&ctx->uring_lock);
ret = __io_uring_register(ctx, opcode, arg, nr_args);
mutex_unlock(&ctx->uring_lock);
out_fput:
fdput(f);
return ret;
}

TODO

内核线程 io_sq_thread

TODO

IOPOLL 模式

启用

io_uring_setup 初始化时 flags 配置了 IORING_SETUP_IOPOLL 字段后将开启 IOPOLL 模式

限制

开启此选项必须保证后续只用 O_DIRECT 打开文件并且文件系统的 file_operations 中注册了 iopoll 函数,否则 IO 将下发失败

调用栈

开启后内核将调用注册的 iopoll 函数来主动轮询设备驱动确认 IO 是否完成

f_op->iopoll 函数调用关系进行了分析

#mermaid-svg-jchZkVlCmNP0o6CT .label{font-family:'trebuchet ms', verdana, arial;font-family:var(--mermaid-font-family);fill:#333;color:#333}#mermaid-svg-jchZkVlCmNP0o6CT .label text{fill:#333}#mermaid-svg-jchZkVlCmNP0o6CT .node rect,#mermaid-svg-jchZkVlCmNP0o6CT .node circle,#mermaid-svg-jchZkVlCmNP0o6CT .node ellipse,#mermaid-svg-jchZkVlCmNP0o6CT .node polygon,#mermaid-svg-jchZkVlCmNP0o6CT .node path{fill:#ECECFF;stroke:#9370db;stroke-width:1px}#mermaid-svg-jchZkVlCmNP0o6CT .node .label{text-align:center;fill:#333}#mermaid-svg-jchZkVlCmNP0o6CT .node.clickable{cursor:pointer}#mermaid-svg-jchZkVlCmNP0o6CT .arrowheadPath{fill:#333}#mermaid-svg-jchZkVlCmNP0o6CT .edgePath .path{stroke:#333;stroke-width:1.5px}#mermaid-svg-jchZkVlCmNP0o6CT .flowchart-link{stroke:#333;fill:none}#mermaid-svg-jchZkVlCmNP0o6CT .edgeLabel{background-color:#e8e8e8;text-align:center}#mermaid-svg-jchZkVlCmNP0o6CT .edgeLabel rect{opacity:0.9}#mermaid-svg-jchZkVlCmNP0o6CT .edgeLabel span{color:#333}#mermaid-svg-jchZkVlCmNP0o6CT .cluster rect{fill:#ffffde;stroke:#aa3;stroke-width:1px}#mermaid-svg-jchZkVlCmNP0o6CT .cluster text{fill:#333}#mermaid-svg-jchZkVlCmNP0o6CT div.mermaidTooltip{position:absolute;text-align:center;max-width:200px;padding:2px;font-family:'trebuchet ms', verdana, arial;font-family:var(--mermaid-font-family);font-size:12px;background:#ffffde;border:1px solid #aa3;border-radius:2px;pointer-events:none;z-index:100}#mermaid-svg-jchZkVlCmNP0o6CT .actor{stroke:#ccf;fill:#ECECFF}#mermaid-svg-jchZkVlCmNP0o6CT text.actor>tspan{fill:#000;stroke:none}#mermaid-svg-jchZkVlCmNP0o6CT .actor-line{stroke:grey}#mermaid-svg-jchZkVlCmNP0o6CT .messageLine0{stroke-width:1.5;stroke-dasharray:none;stroke:#333}#mermaid-svg-jchZkVlCmNP0o6CT .messageLine1{stroke-width:1.5;stroke-dasharray:2, 2;stroke:#333}#mermaid-svg-jchZkVlCmNP0o6CT #arrowhead path{fill:#333;stroke:#333}#mermaid-svg-jchZkVlCmNP0o6CT .sequenceNumber{fill:#fff}#mermaid-svg-jchZkVlCmNP0o6CT #sequencenumber{fill:#333}#mermaid-svg-jchZkVlCmNP0o6CT #crosshead path{fill:#333;stroke:#333}#mermaid-svg-jchZkVlCmNP0o6CT .messageText{fill:#333;stroke:#333}#mermaid-svg-jchZkVlCmNP0o6CT .labelBox{stroke:#ccf;fill:#ECECFF}#mermaid-svg-jchZkVlCmNP0o6CT .labelText,#mermaid-svg-jchZkVlCmNP0o6CT .labelText>tspan{fill:#000;stroke:none}#mermaid-svg-jchZkVlCmNP0o6CT .loopText,#mermaid-svg-jchZkVlCmNP0o6CT .loopText>tspan{fill:#000;stroke:none}#mermaid-svg-jchZkVlCmNP0o6CT .loopLine{stroke-width:2px;stroke-dasharray:2, 2;stroke:#ccf;fill:#ccf}#mermaid-svg-jchZkVlCmNP0o6CT .note{stroke:#aa3;fill:#fff5ad}#mermaid-svg-jchZkVlCmNP0o6CT .noteText,#mermaid-svg-jchZkVlCmNP0o6CT .noteText>tspan{fill:#000;stroke:none}#mermaid-svg-jchZkVlCmNP0o6CT .activation0{fill:#f4f4f4;stroke:#666}#mermaid-svg-jchZkVlCmNP0o6CT .activation1{fill:#f4f4f4;stroke:#666}#mermaid-svg-jchZkVlCmNP0o6CT .activation2{fill:#f4f4f4;stroke:#666}#mermaid-svg-jchZkVlCmNP0o6CT .mermaid-main-font{font-family:"trebuchet ms", verdana, arial;font-family:var(--mermaid-font-family)}#mermaid-svg-jchZkVlCmNP0o6CT .section{stroke:none;opacity:0.2}#mermaid-svg-jchZkVlCmNP0o6CT .section0{fill:rgba(102,102,255,0.49)}#mermaid-svg-jchZkVlCmNP0o6CT .section2{fill:#fff400}#mermaid-svg-jchZkVlCmNP0o6CT .section1,#mermaid-svg-jchZkVlCmNP0o6CT .section3{fill:#fff;opacity:0.2}#mermaid-svg-jchZkVlCmNP0o6CT .sectionTitle0{fill:#333}#mermaid-svg-jchZkVlCmNP0o6CT .sectionTitle1{fill:#333}#mermaid-svg-jchZkVlCmNP0o6CT .sectionTitle2{fill:#333}#mermaid-svg-jchZkVlCmNP0o6CT .sectionTitle3{fill:#333}#mermaid-svg-jchZkVlCmNP0o6CT .sectionTitle{text-anchor:start;font-size:11px;text-height:14px;font-family:'trebuchet ms', verdana, arial;font-family:var(--mermaid-font-family)}#mermaid-svg-jchZkVlCmNP0o6CT .grid .tick{stroke:#d3d3d3;opacity:0.8;shape-rendering:crispEdges}#mermaid-svg-jchZkVlCmNP0o6CT .grid .tick text{font-family:'trebuchet ms', verdana, arial;font-family:var(--mermaid-font-family)}#mermaid-svg-jchZkVlCmNP0o6CT .grid path{stroke-width:0}#mermaid-svg-jchZkVlCmNP0o6CT .today{fill:none;stroke:red;stroke-width:2px}#mermaid-svg-jchZkVlCmNP0o6CT .task{stroke-width:2}#mermaid-svg-jchZkVlCmNP0o6CT .taskText{text-anchor:middle;font-family:'trebuchet ms', verdana, arial;font-family:var(--mermaid-font-family)}#mermaid-svg-jchZkVlCmNP0o6CT .taskText:not([font-size]){font-size:11px}#mermaid-svg-jchZkVlCmNP0o6CT .taskTextOutsideRight{fill:#000;text-anchor:start;font-size:11px;font-family:'trebuchet ms', verdana, arial;font-family:var(--mermaid-font-family)}#mermaid-svg-jchZkVlCmNP0o6CT .taskTextOutsideLeft{fill:#000;text-anchor:end;font-size:11px}#mermaid-svg-jchZkVlCmNP0o6CT .task.clickable{cursor:pointer}#mermaid-svg-jchZkVlCmNP0o6CT .taskText.clickable{cursor:pointer;fill:#003163 !important;font-weight:bold}#mermaid-svg-jchZkVlCmNP0o6CT .taskTextOutsideLeft.clickable{cursor:pointer;fill:#003163 !important;font-weight:bold}#mermaid-svg-jchZkVlCmNP0o6CT .taskTextOutsideRight.clickable{cursor:pointer;fill:#003163 !important;font-weight:bold}#mermaid-svg-jchZkVlCmNP0o6CT .taskText0,#mermaid-svg-jchZkVlCmNP0o6CT .taskText1,#mermaid-svg-jchZkVlCmNP0o6CT .taskText2,#mermaid-svg-jchZkVlCmNP0o6CT .taskText3{fill:#fff}#mermaid-svg-jchZkVlCmNP0o6CT .task0,#mermaid-svg-jchZkVlCmNP0o6CT .task1,#mermaid-svg-jchZkVlCmNP0o6CT .task2,#mermaid-svg-jchZkVlCmNP0o6CT .task3{fill:#8a90dd;stroke:#534fbc}#mermaid-svg-jchZkVlCmNP0o6CT .taskTextOutside0,#mermaid-svg-jchZkVlCmNP0o6CT .taskTextOutside2{fill:#000}#mermaid-svg-jchZkVlCmNP0o6CT .taskTextOutside1,#mermaid-svg-jchZkVlCmNP0o6CT .taskTextOutside3{fill:#000}#mermaid-svg-jchZkVlCmNP0o6CT .active0,#mermaid-svg-jchZkVlCmNP0o6CT .active1,#mermaid-svg-jchZkVlCmNP0o6CT .active2,#mermaid-svg-jchZkVlCmNP0o6CT .active3{fill:#bfc7ff;stroke:#534fbc}#mermaid-svg-jchZkVlCmNP0o6CT .activeText0,#mermaid-svg-jchZkVlCmNP0o6CT .activeText1,#mermaid-svg-jchZkVlCmNP0o6CT .activeText2,#mermaid-svg-jchZkVlCmNP0o6CT .activeText3{fill:#000 !important}#mermaid-svg-jchZkVlCmNP0o6CT .done0,#mermaid-svg-jchZkVlCmNP0o6CT .done1,#mermaid-svg-jchZkVlCmNP0o6CT .done2,#mermaid-svg-jchZkVlCmNP0o6CT .done3{stroke:grey;fill:#d3d3d3;stroke-width:2}#mermaid-svg-jchZkVlCmNP0o6CT .doneText0,#mermaid-svg-jchZkVlCmNP0o6CT .doneText1,#mermaid-svg-jchZkVlCmNP0o6CT .doneText2,#mermaid-svg-jchZkVlCmNP0o6CT .doneText3{fill:#000 !important}#mermaid-svg-jchZkVlCmNP0o6CT .crit0,#mermaid-svg-jchZkVlCmNP0o6CT .crit1,#mermaid-svg-jchZkVlCmNP0o6CT .crit2,#mermaid-svg-jchZkVlCmNP0o6CT .crit3{stroke:#f88;fill:red;stroke-width:2}#mermaid-svg-jchZkVlCmNP0o6CT .activeCrit0,#mermaid-svg-jchZkVlCmNP0o6CT .activeCrit1,#mermaid-svg-jchZkVlCmNP0o6CT .activeCrit2,#mermaid-svg-jchZkVlCmNP0o6CT .activeCrit3{stroke:#f88;fill:#bfc7ff;stroke-width:2}#mermaid-svg-jchZkVlCmNP0o6CT .doneCrit0,#mermaid-svg-jchZkVlCmNP0o6CT .doneCrit1,#mermaid-svg-jchZkVlCmNP0o6CT .doneCrit2,#mermaid-svg-jchZkVlCmNP0o6CT .doneCrit3{stroke:#f88;fill:#d3d3d3;stroke-width:2;cursor:pointer;shape-rendering:crispEdges}#mermaid-svg-jchZkVlCmNP0o6CT .milestone{transform:rotate(45deg) scale(0.8, 0.8)}#mermaid-svg-jchZkVlCmNP0o6CT .milestoneText{font-style:italic}#mermaid-svg-jchZkVlCmNP0o6CT .doneCritText0,#mermaid-svg-jchZkVlCmNP0o6CT .doneCritText1,#mermaid-svg-jchZkVlCmNP0o6CT .doneCritText2,#mermaid-svg-jchZkVlCmNP0o6CT .doneCritText3{fill:#000 !important}#mermaid-svg-jchZkVlCmNP0o6CT .activeCritText0,#mermaid-svg-jchZkVlCmNP0o6CT .activeCritText1,#mermaid-svg-jchZkVlCmNP0o6CT .activeCritText2,#mermaid-svg-jchZkVlCmNP0o6CT .activeCritText3{fill:#000 !important}#mermaid-svg-jchZkVlCmNP0o6CT .titleText{text-anchor:middle;font-size:18px;fill:#000;font-family:'trebuchet ms', verdana, arial;font-family:var(--mermaid-font-family)}#mermaid-svg-jchZkVlCmNP0o6CT g.classGroup text{fill:#9370db;stroke:none;font-family:'trebuchet ms', verdana, arial;font-family:var(--mermaid-font-family);font-size:10px}#mermaid-svg-jchZkVlCmNP0o6CT g.classGroup text .title{font-weight:bolder}#mermaid-svg-jchZkVlCmNP0o6CT g.clickable{cursor:pointer}#mermaid-svg-jchZkVlCmNP0o6CT g.classGroup rect{fill:#ECECFF;stroke:#9370db}#mermaid-svg-jchZkVlCmNP0o6CT g.classGroup line{stroke:#9370db;stroke-width:1}#mermaid-svg-jchZkVlCmNP0o6CT .classLabel .box{stroke:none;stroke-width:0;fill:#ECECFF;opacity:0.5}#mermaid-svg-jchZkVlCmNP0o6CT .classLabel .label{fill:#9370db;font-size:10px}#mermaid-svg-jchZkVlCmNP0o6CT .relation{stroke:#9370db;stroke-width:1;fill:none}#mermaid-svg-jchZkVlCmNP0o6CT .dashed-line{stroke-dasharray:3}#mermaid-svg-jchZkVlCmNP0o6CT #compositionStart{fill:#9370db;stroke:#9370db;stroke-width:1}#mermaid-svg-jchZkVlCmNP0o6CT #compositionEnd{fill:#9370db;stroke:#9370db;stroke-width:1}#mermaid-svg-jchZkVlCmNP0o6CT #aggregationStart{fill:#ECECFF;stroke:#9370db;stroke-width:1}#mermaid-svg-jchZkVlCmNP0o6CT #aggregationEnd{fill:#ECECFF;stroke:#9370db;stroke-width:1}#mermaid-svg-jchZkVlCmNP0o6CT #dependencyStart{fill:#9370db;stroke:#9370db;stroke-width:1}#mermaid-svg-jchZkVlCmNP0o6CT #dependencyEnd{fill:#9370db;stroke:#9370db;stroke-width:1}#mermaid-svg-jchZkVlCmNP0o6CT #extensionStart{fill:#9370db;stroke:#9370db;stroke-width:1}#mermaid-svg-jchZkVlCmNP0o6CT #extensionEnd{fill:#9370db;stroke:#9370db;stroke-width:1}#mermaid-svg-jchZkVlCmNP0o6CT .commit-id,#mermaid-svg-jchZkVlCmNP0o6CT .commit-msg,#mermaid-svg-jchZkVlCmNP0o6CT .branch-label{fill:lightgrey;color:lightgrey;font-family:'trebuchet ms', verdana, arial;font-family:var(--mermaid-font-family)}#mermaid-svg-jchZkVlCmNP0o6CT .pieTitleText{text-anchor:middle;font-size:25px;fill:#000;font-family:'trebuchet ms', verdana, arial;font-family:var(--mermaid-font-family)}#mermaid-svg-jchZkVlCmNP0o6CT .slice{font-family:'trebuchet ms', verdana, arial;font-family:var(--mermaid-font-family)}#mermaid-svg-jchZkVlCmNP0o6CT g.stateGroup text{fill:#9370db;stroke:none;font-size:10px;font-family:'trebuchet ms', verdana, arial;font-family:var(--mermaid-font-family)}#mermaid-svg-jchZkVlCmNP0o6CT g.stateGroup text{fill:#9370db;fill:#333;stroke:none;font-size:10px}#mermaid-svg-jchZkVlCmNP0o6CT g.statediagram-cluster .cluster-label text{fill:#333}#mermaid-svg-jchZkVlCmNP0o6CT g.stateGroup .state-title{font-weight:bolder;fill:#000}#mermaid-svg-jchZkVlCmNP0o6CT g.stateGroup rect{fill:#ECECFF;stroke:#9370db}#mermaid-svg-jchZkVlCmNP0o6CT g.stateGroup line{stroke:#9370db;stroke-width:1}#mermaid-svg-jchZkVlCmNP0o6CT .transition{stroke:#9370db;stroke-width:1;fill:none}#mermaid-svg-jchZkVlCmNP0o6CT .stateGroup .composit{fill:white;border-bottom:1px}#mermaid-svg-jchZkVlCmNP0o6CT .stateGroup .alt-composit{fill:#e0e0e0;border-bottom:1px}#mermaid-svg-jchZkVlCmNP0o6CT .state-note{stroke:#aa3;fill:#fff5ad}#mermaid-svg-jchZkVlCmNP0o6CT .state-note text{fill:black;stroke:none;font-size:10px}#mermaid-svg-jchZkVlCmNP0o6CT .stateLabel .box{stroke:none;stroke-width:0;fill:#ECECFF;opacity:0.7}#mermaid-svg-jchZkVlCmNP0o6CT .edgeLabel text{fill:#333}#mermaid-svg-jchZkVlCmNP0o6CT .stateLabel text{fill:#000;font-size:10px;font-weight:bold;font-family:'trebuchet ms', verdana, arial;font-family:var(--mermaid-font-family)}#mermaid-svg-jchZkVlCmNP0o6CT .node circle.state-start{fill:black;stroke:black}#mermaid-svg-jchZkVlCmNP0o6CT .node circle.state-end{fill:black;stroke:white;stroke-width:1.5}#mermaid-svg-jchZkVlCmNP0o6CT #statediagram-barbEnd{fill:#9370db}#mermaid-svg-jchZkVlCmNP0o6CT .statediagram-cluster rect{fill:#ECECFF;stroke:#9370db;stroke-width:1px}#mermaid-svg-jchZkVlCmNP0o6CT .statediagram-cluster rect.outer{rx:5px;ry:5px}#mermaid-svg-jchZkVlCmNP0o6CT .statediagram-state .divider{stroke:#9370db}#mermaid-svg-jchZkVlCmNP0o6CT .statediagram-state .title-state{rx:5px;ry:5px}#mermaid-svg-jchZkVlCmNP0o6CT .statediagram-cluster.statediagram-cluster .inner{fill:white}#mermaid-svg-jchZkVlCmNP0o6CT .statediagram-cluster.statediagram-cluster-alt .inner{fill:#e0e0e0}#mermaid-svg-jchZkVlCmNP0o6CT .statediagram-cluster .inner{rx:0;ry:0}#mermaid-svg-jchZkVlCmNP0o6CT .statediagram-state rect.basic{rx:5px;ry:5px}#mermaid-svg-jchZkVlCmNP0o6CT .statediagram-state rect.divider{stroke-dasharray:10,10;fill:#efefef}#mermaid-svg-jchZkVlCmNP0o6CT .note-edge{stroke-dasharray:5}#mermaid-svg-jchZkVlCmNP0o6CT .statediagram-note rect{fill:#fff5ad;stroke:#aa3;stroke-width:1px;rx:0;ry:0}:root{--mermaid-font-family: '"trebuchet ms", verdana, arial';--mermaid-font-family: "Comic Sans MS", "Comic Sans", cursive}#mermaid-svg-jchZkVlCmNP0o6CT .error-icon{fill:#522}#mermaid-svg-jchZkVlCmNP0o6CT .error-text{fill:#522;stroke:#522}#mermaid-svg-jchZkVlCmNP0o6CT .edge-thickness-normal{stroke-width:2px}#mermaid-svg-jchZkVlCmNP0o6CT .edge-thickness-thick{stroke-width:3.5px}#mermaid-svg-jchZkVlCmNP0o6CT .edge-pattern-solid{stroke-dasharray:0}#mermaid-svg-jchZkVlCmNP0o6CT .edge-pattern-dashed{stroke-dasharray:3}#mermaid-svg-jchZkVlCmNP0o6CT .edge-pattern-dotted{stroke-dasharray:2}#mermaid-svg-jchZkVlCmNP0o6CT .marker{fill:#333}#mermaid-svg-jchZkVlCmNP0o6CT .marker.cross{stroke:#333}

:root { --mermaid-font-family: "trebuchet ms", verdana, arial;}
#mermaid-svg-jchZkVlCmNP0o6CT {
color: rgba(0, 0, 0, 0.75);
font: ;
}

io_uring_create
io_ring_ctx_wait_and_kill
io_uring_release
io_iopoll_reap_events
io_ring_ctx_free
io_iopoll_getevents
SYSCALL_DEFINE6(io_uring_enter, ……)
io_iopoll_check
io_sq_thread
io_do_iopoll
f_op->iopoll

主要有三条调用路线(所有调用逻辑都会判断是否在初始化时配置了 IORING_SETUP_IOPOLL):

  1. io_uring 销毁时需要调用
  2. 系统调用 io_uring_enter 将会触发,用于轮询 IO 完成情况,直到到达指定的 wait_nr 数量 IO 完成后才会退出轮询
  3. 当初始化时同时配置了 IORING_SETUP_SQPOLL 时,io_sq_thread 内核线程触发,当存在未完成的 IO 时调用,用于更新 IO 完成情况( io_do_iopoll 的参数 min = 0,即每次调用无论是否有新完成的 IO 都会退出轮询,不会阻塞线程)

本文作者: ywang_wnlo
本文链接: https://ywang-wnlo.github.io/posts/4f0d345c.html
版权声明: 本博客所有文章除特别声明外,均采用 BY-NC-SA 许可协议。转载请注明出处!

【io_uring】内核源码分析(更新中)的更多相关文章

  1. 鸿蒙内核源码分析(编译脚本篇) | 如何防编译环境中的牛皮癣 | 百篇博客分析OpenHarmony源码 | v58.01

    百篇博客系列篇.本篇为: v58.xx 鸿蒙内核源码分析(环境脚本篇) | 编译鸿蒙原来如此简单 | 51.c.h.o 本篇用两个脚本完成鸿蒙(L1)的编译环境安装/源码下载/编译过程,让编译,调试鸿 ...

  2. Linux内核源码分析--内核启动之(3)Image内核启动(C语言部分)(Linux-3.0 ARMv7)

    http://blog.chinaunix.net/uid-20543672-id-3157283.html Linux内核源码分析--内核启动之(3)Image内核启动(C语言部分)(Linux-3 ...

  3. Linux内核源码分析--内核启动之(6)Image内核启动(do_basic_setup函数)(Linux-3.0 ARMv7)【转】

    原文地址:Linux内核源码分析--内核启动之(6)Image内核启动(do_basic_setup函数)(Linux-3.0 ARMv7) 作者:tekkamanninja 转自:http://bl ...

  4. kernel 3.10内核源码分析--hung task机制

    kernel 3.10内核源码分析--hung task机制 一.相关知识: 长期以来,处于D状态(TASK_UNINTERRUPTIBLE状态)的进程 都是让人比较烦恼的问题,处于D状态的进程不能接 ...

  5. Linux内核源码分析之setup_arch (四)

    前言 Linux内核源码分析之setup_arch (三) 基本上把setup_arch主要的函数都分析了,由于距离上一篇时间比较久了,所以这里重新贴一下大致的流程图,本文主要分析的是bootmem_ ...

  6. LiteOS内核源码分析:任务LOS_Schedule

    摘要:调度,Schedule也称为Dispatch,是操作系统的一个重要模块,它负责选择系统要处理的下一个任务.调度模块需要协调处于就绪状态的任务对资源的竞争,按优先级策略从就绪队列中获取高优先级的任 ...

  7. 鸿蒙内核源码分析(管道文件篇) | 如何降低数据流动成本 | 百篇博客分析OpenHarmony源码 | v70.01

    百篇博客系列篇.本篇为: v70.xx 鸿蒙内核源码分析(管道文件篇) | 如何降低数据流动成本 | 51.c.h.o 文件系统相关篇为: v62.xx 鸿蒙内核源码分析(文件概念篇) | 为什么说一 ...

  8. 鸿蒙内核源码分析(文件句柄篇) | 深挖应用操作文件的细节 | 百篇博客分析OpenHarmony源码 | v69.01

    百篇博客系列篇.本篇为: v69.xx 鸿蒙内核源码分析(文件句柄篇) | 深挖应用操作文件的细节 | 51.c.h.o 文件系统相关篇为: v62.xx 鸿蒙内核源码分析(文件概念篇) | 为什么说 ...

  9. 鸿蒙内核源码分析(VFS篇) | 文件系统和谐共处的基础 | 百篇博客分析OpenHarmony源码 | v68.01

    子曰:"质胜文则野,文胜质则史.文质彬彬,然后君子." <论语>:雍也篇 百篇博客系列篇.本篇为: v68.xx 鸿蒙内核源码分析(VFS篇) | 文件系统和谐共处的基 ...

  10. 鸿蒙内核源码分析(字符设备篇) | 字节为单位读写的设备 | 百篇博客分析OpenHarmony源码 | v67.01

    百篇博客系列篇.本篇为: v67.xx 鸿蒙内核源码分析(字符设备篇) | 字节为单位读写的设备 | 51.c.h.o 文件系统相关篇为: v62.xx 鸿蒙内核源码分析(文件概念篇) | 为什么说一 ...

随机推荐

  1. idea过期解决

    用作用作发现过期了,苦恼,好办直接 搞个code 就行 MNQ043JMTU-eyJsaWNlbnNlSWQiOiJNTlEwNDNKTVRVIiwibGljZW5zZWVOYW1lIjoiR1VPI ...

  2. nginx 反向代理proxy_pass 后加斜杠和不加斜杆的区别

    今日准备使用nginx 将上次使用docker 部署的一个vue项目进行地址代理,让他看起来高达尚一点,原本docker打包的镜像只是向外暴露了一个8191的端口,访问的时候就只能是 http://w ...

  3. 使用Mybatis-Plus问题解答

    我们使用一个新的框架难免会遇到各种问题,当然使用这款国产的优秀的Mybatis-Plus框架也不例外,下面我就给大家列举一下使用Mybatis-Plus可能遇到的一些问题,并做一下一一的解答. 1:如 ...

  4. 没用,随便写的(Dec_8_2022)

    import numpy as np from PIL import Image import pandas as pd import matplotlib.pyplot as plt # 第一个 # ...

  5. Python 九九乘法表的多种实现方式

    简介 九九乘法表是初学者学习编程的必要练手题目之一,因此各种语言都有对应的实现方式,而 Python 也不例外.在 Python 中,我们可以使用多种方式来生成一个简单的九九乘法表. 本文共介绍了七种 ...

  6. 6 种方式读取 Springboot 的配置,老鸟都这么玩(原理+实战)

    大家好,我是小富- 从配置文件中获取属性应该是SpringBoot开发中最为常用的功能之一,但就是这么常用的功能,仍然有很多开发者在这个方面踩坑. 我整理了几种获取配置属性的方式,目的不仅是要让大家学 ...

  7. Apple、AWS 这些科技巨头,已悄然入局隐私计算

    随着数字化时代的到来,数据已经成为企业竞争的重要资源.然而,与此同时,数据隐私泄露的风险也在不断增加,这已经成为了公共安全和个人权利保护的重要问题.为了解决这个问题,科技巨头谷歌.苹果.亚马逊纷纷入局 ...

  8. Python Flask - 快速构建Web应用详解

    本文将详细探讨Python Flask Web服务.我将首先简单介绍Flask,然后将逐步进入Flask中的路由.模板.表单处理以及数据库集成等高级概念,目标是能够让大家了解并掌握使用Flask来创建 ...

  9. 浏览器手动设置Cookie

    浏览器手动设置Cookie js代码: document.cookie="{KEY}={Value}":  可多次执执行.

  10. 基于FFMPEG+SDL的简单的视频播放器分析

    基于FFMPEG+SDL的简单的视频播放器分析 前言 最近看了雷霄骅前辈的博客<最简单的基于FFMPEG+SDL的视频播放器 ver2 (采用SDL2.0)>,参照他的代码,在window ...