linux内核情景分析之匿名管道
管道的机制由pipe()创建,由pipe()所建立的管道两端都在同一进程.所以必须在fork的配合下,才可以在具有亲缘关系的进程通信
/** sys_pipe() is the normal C calling standard for creating* a pipe. It's not the way Unix traditionally does this, though.*/asmlinkage int sys_pipe(unsigned long * fildes){int fd[2];//fd表示,一个读,一个写int error;error = do_pipe(fd);if (!error) {if (copy_to_user(fildes, fd, 2*sizeof(int)))//从内核态拷贝到用户态error = -EFAULT;}return error;}
int do_pipe(int *fd){struct qstr this;char name[32];//目录项名字struct dentry *dentry;//目录结构struct inode * inode;//inode对应一个管道,不过管道无实际存储,不在硬盘也不在文件系统struct file *f1, *f2;//父子进程操作管道对应的文件对象int error;int i,j;error = -ENFILE;f1 = get_empty_filp();//获取一个file对象,因为管道在不同的进程,两端不可以共享一个fileif (!f1)goto no_files;f2 = get_empty_filp();//获取1个file对象,同上if (!f2)goto close_f1;inode = get_pipe_inode();//获取一个inode用于表示管道这个无形文件,分配缓存,初始化if (!inode)goto close_f12;error = get_unused_fd();//获取一个fd用于绑定file对象if (error < 0)goto close_f12_inode;i = error;error = get_unused_fd();//同上if (error < 0)goto close_f12_inode_i;j = error;error = -ENOMEM;sprintf(name, "[%lu]", inode->i_ino);//操作this.name = name;this.len = strlen(name);this.hash = inode->i_ino; /* will go */dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &this);//分配一个目录,因为file无直接指向inode.file只能通过指向目录的指针指向目录项中转找到inodeif (!dentry)goto close_f12_inode_i_j;dentry->d_op = &pipefs_dentry_operations;d_add(dentry, inode);//将目录与inode挂钩f1->f_vfsmnt = f2->f_vfsmnt = mntget(mntget(pipe_mnt));//挂载到vfsf1->f_dentry = f2->f_dentry = dget(dentry);//设置file对象的指向目录指针/*设置只读属性, read file */f1->f_pos = f2->f_pos = 0;f1->f_flags = O_RDONLY;//设置只读属性f1->f_op = &read_pipe_fops;//设置f1的读管道指针操作f1->f_mode = 1;f1->f_version = 0;/* 写文件相关对象,只写数学,操作为write_pipe_fopswrite file */f2->f_flags = O_WRONLY;//设置只可写属性f2->f_op = &write_pipe_fops;//设置写操作f2->f_mode = 2;f2->f_version = 0;fd_install(i, f1);//fd与file对象绑定fd_install(j, f2);//fd与file对象绑定fd[0] = i;fd[1] = j;return 0;close_f12_inode_i_j:put_unused_fd(j);close_f12_inode_i:put_unused_fd(i);close_f12_inode:free_page((unsigned long) PIPE_BASE(*inode));kfree(inode->i_pipe);inode->i_pipe = NULL;iput(inode);close_f12:put_filp(f2);close_f1:put_filp(f1);no_files:return error;}
struct pipe_inode_info {wait_queue_head_t wait;//等待队列char *base;//用于指向一页的缓冲区unsigned int start;unsigned int readers;//读的个数unsigned int writers;//写的个数unsigned int waiting_readers;//等待读的个数unsigned int waiting_writers;//等待写的个数unsigned int r_counter;//读的次数unsigned int w_counter;//写的次数};
#define PIPE_SEM(inode) (&(inode).i_sem)//inode信号量#define PIPE_WAIT(inode) (&(inode).i_pipe->wait)//inode的i_pipe的管道等待队列#define PIPE_BASE(inode) ((inode).i_pipe->base)//inode管道的base缓冲区指针#define PIPE_START(inode) ((inode).i_pipe->start)//管道的起始地址#define PIPE_LEN(inode) ((inode).i_size)//管道剩余数据#define PIPE_READERS(inode) ((inode).i_pipe->readers)//管道读的对象个数#define PIPE_WRITERS(inode) ((inode).i_pipe->writers)//管道写的对象个数#define PIPE_WAITING_READERS(inode) ((inode).i_pipe->waiting_readers)//管道等待的对象个数#define PIPE_WAITING_WRITERS(inode) ((inode).i_pipe->waiting_writers)#define PIPE_RCOUNTER(inode) ((inode).i_pipe->r_counter)#define PIPE_WCOUNTER(inode) ((inode).i_pipe->w_counter)#define PIPE_EMPTY(inode) (PIPE_LEN(inode) == 0)//管道是否为空#define PIPE_FULL(inode) (PIPE_LEN(inode) == PIPE_SIZE)//管道是否满了#define PIPE_FREE(inode) (PIPE_SIZE - PIPE_LEN(inode))//管道剩余空间#define PIPE_END(inode) ((PIPE_START(inode) + PIPE_LEN(inode)) & (PIPE_SIZE-1))#define PIPE_MAX_RCHUNK(inode) (PIPE_SIZE - PIPE_START(inode))#define PIPE_MAX_WCHUNK(inode) (PIPE_SIZE - PIPE_END(inode))
static struct inode * get_pipe_inode(void){struct inode *inode = get_empty_inode();//分配一空的inode节点//inode第一个成分i_pipe指向一个pipe_inode_info,只有inode表示为一个管道才使用if (!inode)goto fail_inode;if(!pipe_new(inode))//分配缓冲区,以及i_ipipe结构初始化goto fail_iput;PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 1;//设置writeinode->i_fop = &rdwr_pipe_fops;//设置管道相关读写操作inode->i_sb = pipe_mnt->mnt_sb;/** Mark the inode dirty from the very beginning,* that way it will never be moved to the dirty* list because "mark_inode_dirty()" will think* that it already _is_ on the dirty list.*/inode->i_state = I_DIRTY;inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR;inode->i_uid = current->fsuid;inode->i_gid = current->fsgid;inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;inode->i_blksize = PAGE_SIZE;return inode;fail_iput:iput(inode);fail_inode:return NULL;}
struct file_operations rdwr_pipe_fops = {llseek: pipe_lseek,read: pipe_read,write: pipe_write,poll: pipe_poll,ioctl: pipe_ioctl,open: pipe_rdwr_open,release: pipe_rdwr_release,};
struct inode* pipe_new(struct inode* inode){unsigned long page;page = __get_free_page(GFP_USER);//获取一页用作管道的缓冲区if (!page)return NULL;//再分配一缓冲区用作pipe_inode_info数据结构inode->i_pipe = kmalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);if (!inode->i_pipe)goto fail_page;init_waitqueue_head(PIPE_WAIT(*inode));//初始化倒艿廊待队列PIPE_BASE(*inode) = (char*) page;//指向缓冲区PIPE_START(*inode) = PIPE_LEN(*inode) = 0;//长度与起始地址PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 0;//设置为0PIPE_WAITING_READERS(*inode) = PIPE_WAITING_WRITERS(*inode) = 0;//等待读写数目设置为0PIPE_RCOUNTER(*inode) = PIPE_WCOUNTER(*inode) = 1;//计数设置为1return inode;fail_page:free_page(page);return NULL;}
struct file_operations read_pipe_fops = {llseek: pipe_lseek,read: pipe_read,write: bad_pipe_w,poll: pipe_poll,ioctl: pipe_ioctl,open: pipe_read_open,release: pipe_read_release,};
static intpipe_read_release(struct inode *inode, struct file *filp){return pipe_release(inode, 1, 0);//1表示读的相关描述符减1,因为关闭,写端设置为0}
static intpipe_write_release(struct inode *inode, struct file *filp){return pipe_release(inode, 0, 1);}
static intpipe_release(struct inode *inode, int decr, int decw){down(PIPE_SEM(*inode));PIPE_READERS(*inode) -= decr;//共享计数-decrPIPE_WRITERS(*inode) -= decw;//共享计数-decwif (!PIPE_READERS(*inode) && !PIPE_WRITERS(*inode)) {//如果读端跟写端的相关fd都关闭了struct pipe_inode_info *info = inode->i_pipe;inode->i_pipe = NULL;//free_page((unsigned long) info->base);//将页面释放kfree(info);//将inode释放} else {wake_up_interruptible(PIPE_WAIT(*inode));}up(PIPE_SEM(*inode));return 0;}
static ssize_tpipe_read(struct file *filp, char *buf, size_t count, loff_t *ppos){struct inode *inode = filp->f_dentry->d_inode;//获取inodessize_t size, read, ret;//不允许seek操作/* Seeks are not allowed on pipes. */ret = -ESPIPE;read = 0;if (ppos != &filp->f_pos)//ppos必须指向filp->f_posgoto out_nolock;/* Always return 0 on null read. */ret = 0;if (count == 0)goto out_nolock;/* Get the pipe semaphore */ret = -ERESTARTSYS;if (down_interruptible(PIPE_SEM(*inode)))goto out_nolock;if (PIPE_EMPTY(*inode)) {//管道中的字节数如果等于0,表示为空管道do_more_read:ret = 0;if (!PIPE_WRITERS(*inode))//如果管道无人写,那就等于写端关闭,那么客户端也要关闭goto out;ret = -EAGAIN;if (filp->f_flags & O_NONBLOCK)//设置非阻塞直接返回,因为管道为空goto out;for (;;) {PIPE_WAITING_READERS(*inode)++;pipe_wait(inode);//休眠,因为没有数据可读PIPE_WAITING_READERS(*inode)--;ret = -ERESTARTSYS;if (signal_pending(current))//当前进程有信号未处理goto out;ret = 0;if (!PIPE_EMPTY(*inode))//如果管道不为空,跳出这循环break;if (!PIPE_WRITERS(*inode))//没有写端,直接跳出goto out;}}/* Read what data is available. */ret = -EFAULT; //如果读取//count表示剩余数不为0,并且pipe还有数据while (count > 0 && (size = PIPE_LEN(*inode))) {char *pipebuf = PIPE_BASE(*inode) + PIPE_START(*inode);//起始位置ssize_t chars = PIPE_MAX_RCHUNK(*inode);//start到baseif (chars > count)//如果start到base的数据大于countchars = count;if (chars > size)//chars = size;//有3种情况.(1)读取到要求长度,刚好或者还有剩余,直接返回要求长度,否则返回实际长度if (copy_to_user(buf, pipebuf, chars))goto out;read += chars;//read等于实际读取长度PIPE_START(*inode) += chars;//起始位置更改PIPE_START(*inode) &= (PIPE_SIZE - 1);//对齐PIPE_LEN(*inode) -= chars;//长度更爱count -= chars;//要求长度-chars长度buf += chars;//用户缓冲+chars}/* Cache behaviour optimization */if (!PIPE_LEN(*inode))//如果长度为0,就把start设置到页开头PIPE_START(*inode) = 0;//如果读取的数据不够要求的长度并且还有等待写进程并且未设置阻塞if (count && PIPE_WAITING_WRITERS(*inode) && !(filp->f_flags & O_NONBLOCK)) {/** We know that we are going to sleep: signal* writers synchronously that there is more* room.*/wake_up_interruptible_sync(PIPE_WAIT(*inode));//唤醒if (!PIPE_EMPTY(*inode))//管道必须为空BUG();goto do_more_read;//继续读}/* Signal writers asynchronously that there is more room. */wake_up_interruptible(PIPE_WAIT(*inode));ret = read;out:up(PIPE_SEM(*inode));out_nolock:if (read)ret = read;return ret;}
static ssize_tpipe_write(struct file *filp, const char *buf, size_t count, loff_t *ppos){struct inode *inode = filp->f_dentry->d_inode;//获取节点ssize_t free, written, ret;/* Seeks are not allowed on pipes. */ret = -ESPIPE;written = 0;if (ppos != &filp->f_pos)goto out_nolock;/* Null write succeeds. */ret = 0;if (count == 0)//写的数据要求为0,直接跳到out_nolockgoto out_nolock;ret = -ERESTARTSYS;if (down_interruptible(PIPE_SEM(*inode)))//枷锁goto out_nolock;/* No readers yields SIGPIPE. */if (!PIPE_READERS(*inode))//如果没有读的fd了,直接发送sigpipe信号goto sigpipe;//是否超过缓冲区大小,超过设置为1/* If count <= PIPE_BUF, we have to make it atomic. */free = (count <= PIPE_BUF ? count : 1);/* Wait, or check for, available space. */if (filp->f_flags & O_NONBLOCK) {//表示即使读不到东西,也不该阻塞ret = -EAGAIN;//PIPE_SIZE - PIPE_LEN(inode)if (PIPE_FREE(*inode) < free)//管道剩余的空间小于要写入的数据,直接退出goto out;} else {while (PIPE_FREE(*inode) < free) {//如果要写入的字节数大于整个缓冲区的大小,那就睡眠PIPE_WAITING_WRITERS(*inode)++;//等待写++pipe_wait(inode);//睡眠PIPE_WAITING_WRITERS(*inode)--;ret = -ERESTARTSYS;if (signal_pending(current))//有信号要处理goto out;if (!PIPE_READERS(*inode))//如果不存在读的fd,发送sigpipe信号goto sigpipe;}}/* Copy into available space. */ret = -EFAULT;while (count > 0) {int space;char *pipebuf = PIPE_BASE(*inode) + PIPE_END(*inode);ssize_t chars = PIPE_MAX_WCHUNK(*inode);////如果没有剩余空间了,那么就只说明,要写的字节大于缓冲区的总大小,执行下面的do_while循环if ((space = PIPE_FREE(*inode)) != 0) {//space获取剩余空间if (chars > count)chars = count;if (chars > space)chars = space;//space与count中选取最小的那个//拷贝到管道if (copy_from_user(pipebuf, buf, chars))goto out;written += chars;//写入多少数据PIPE_LEN(*inode) += chars;//长度++count -= chars;buf += chars;space = PIPE_FREE(*inode);continue;}//如果剩余空间等于0ret = written;if (filp->f_flags & O_NONBLOCK)break;do {/** Synchronous wake-up: it knows that this process* is going to give up this CPU, so it doesnt have* to do idle reschedules.*/wake_up_interruptible_sync(PIPE_WAIT(*inode));//唤醒等待的进程PIPE_WAITING_WRITERS(*inode)++;pipe_wait(inode);//睡眠等待PIPE_WAITING_WRITERS(*inode)--;if (signal_pending(current))//唤醒很可能是有信号goto out;if (!PIPE_READERS(*inode))//如果没inode读管道goto sigpipe;} while (!PIPE_FREE(*inode));//如果管道一直是满的,继续do_while循环,直到有剩余空间ret = -EFAULT;}/* Signal readers asynchronously that there is more data. */wake_up_interruptible(PIPE_WAIT(*inode));//唤醒等待读的进程inode->i_ctime = inode->i_mtime = CURRENT_TIME;mark_inode_dirty(inode);out:up(PIPE_SEM(*inode));out_nolock:if (written)ret = written;return ret;sigpipe://读端都关闭了,那就发送sigpipe信号if (written)goto out;up(PIPE_SEM(*inode));send_sig(SIGPIPE, current, 0);return -EPIPE;}
在阻塞的情况下:
· 如果write的字节数小于等于PIPE_BUF,那么write会阻塞到写入所有数据,并且 写入操作是原子的。
· 如果write的字节数大于PIPE_BUF,那么write会阻塞到写入所有数据,但写入操作不是原子的,即write会根据当前缓冲区剩余的大小,写入相应的字节数,然后等待下一次有空余的缓冲区,这中间可能会有其他进程进行write操作。
在非阻塞的情况下:
· 如果write的字节数小于等于PIPE_BUF,且管道或FIFO有足以存放要写入数据大小的空间,那么就写入所有数据;
· 如果write的字节数小于等于PIPE_BUF,且管道或FIFO没有足够存放要写入数据大小的空间,那么就会立即返回EAGAIN错误。
· 如果write的字节数大于PIPE_BUF,且管道或FIFO有至少1B的空间,那么就内核就会写入相应的字节数,然后返回已写入的字节数;
· 如果write的字节数大于PIPE_BUF,且管道或FIFO无任何的空间,那么就会立即返回EAGAIN错误。
linux内核情景分析之匿名管道的更多相关文章
- linux内核情景分析之命名管道
管道是一种"无名","无形文件,只可以近亲进程使用,不可以再任意两个进程通信使用,所以只能实现"有名","有形"的文件来实现就可以 ...
- linux内核情景分析之execve()
用来描述用户态的cpu寄存器在内核栈中保存情况.可以获取用户空间的信息 struct pt_regs { long ebx; //可执行文件路径的指针(regs.ebx中 long ecx; //命令 ...
- Linux内核情景分析之消息队列
早期的Unix通信只有管道与信号,管道的缺点: 所载送的信息是无格式的字节流,不知道分界线在哪,也没通信规范,另外缺乏控制手段,比如保温优先级,管道机制的大小只有1页,管道很容易写满而读取没有及时,发 ...
- Linux内核情景分析的alloc_pages
NUMA结构的alloc_pages ==================== mm/numa.c 43 43 ==================== 43 #ifdef CONFIG_DISCON ...
- linux内核情景分析之内核中的互斥操作
信号量机制: struct sempahore是其结构,定义如下 struct semaphore { atomic_t count;//资源数目 int sleepers;//等待进程数目 wait ...
- Linux内核情景分析之异常访问,用户堆栈的扩展
情景假设: 在堆内存中申请了一块内存,然后释放掉该内存,然后再去访问这块内存.也就是所说的野指针访问. 当cpu产生页面错误时,会把失败的线性地址放在cr2寄存器.线性地址缺页异常的4种情况 1.如果 ...
- linux内核情景分析之exit与Wait
//第一层系统调用 asmlinkage long sys_exit(int error_code) { do_exit((error_code&0xff)<<8); } 其主体是 ...
- linux内核情景分析之信号实现
信号在进程间通信是异步的,每个进程的task_struct结构有一个sig指针,指向一个signal_struct结构 定义如下 struct signal_struct { atomic_t cou ...
- linux内核情景分析之强制性调度
从系统调用返回到用户空间是否调度,从ret_with_reschedule可看出,是否真正调度,取决于当前进程的pcb中的need_resched是否设置为1,那如何设置为1取决于以下几种情况: 时间 ...
随机推荐
- JZOJ 5775. 【NOIP2008模拟】农夫约的假期
5775. [NOIP2008模拟]农夫约的假期 (File IO): input:shuru.in output:shuru.out Time Limits: 1000 ms Memory Lim ...
- C++构造函数实例——拷贝构造,赋值
#define _CRT_SECURE_NO_WARNINGS //windows系统 #include <iostream> #include <cstdlib> #incl ...
- CQRS之旅——旅程3(订单和注册限界上下文)
旅程3:订单和注册限界上下文 CQRS之旅的第一站 "寓言家和鳄鱼是一样的,只是名字不同" --约翰·劳森 描述: 订单和注册上下文有一部分职责在会议预订的过程中,在此上下文中,一 ...
- git---gui使用
1.登陆的命令: git config –global user.email "1455971532@qq.com" git config –global user.name &q ...
- git使用问题整理
git访问远端仓库报"fatal: Authentication failed for"错误的,可能原因是账户密码变更,git配置了使用creditial helper,所以需要取 ...
- 使用code::blocks编译windows的dll链接库
因为机子上没有安装Visual Studio,所以找到了一种通过code::blocks编译dll的方式,踩到的坑是code::blocks默认的compiler是32位的,这样编译出的dll也是32 ...
- .net core 项目加载提示项目文件不完整缺少预期导入的解决办法
今天把在远端的仓库的代码在另一台电脑上拷贝下来,电脑上.net core 环境也已经安装了,但是发现有几个项目没有加载成功,然后重新加载项目,vs2017却提示 项目文件不完整,缺少预期导入 查看错误 ...
- Leetcode 523.连续的子数组和
连续的子数组和 给定一个包含非负数的数组和一个目标整数 k,编写一个函数来判断该数组是否含有连续的子数组,其大小至少为 2,总和为 k 的倍数,即总和为 n*k,其中 n 也是一个整数. 示例 1: ...
- [oldboy-django][6其他]rest framwork有关事
官网地址: https://github.com/encode/django-rest-framework 英文教程:http://www.django-rest-framework.org/tuto ...
- 四则运算UI组结对作业报告
Github提交地址: 小组成员:陈兆庭,陈昶金: 一.编程阶段 清明节开始接触和调研关于UI设计的方法.由于两人的各方面知识储备均不足,在各种东西C#.MFC.Qt中进行调查和讨论,最终因为网上说Q ...