linux内核情景分析之exit与Wait
//第一层系统调用asmlinkage long sys_exit(int error_code){do_exit((error_code&0xff)<<8);}
NORET_TYPE void do_exit(long code){struct task_struct *tsk = current;//获取当前进程描述符if (in_interrupt())//禁止中断时调用do_exitpanic("Aiee, killing interrupt handler!");if (!tsk->pid)//空转进程也就是0号进程禁止退出panic("Attempted to kill the idle task!");if (tsk->pid == 1)//1号进程禁止退出panic("Attempted to kill init!");tsk->flags |= PF_EXITING;//退出进程时,设置此标志位/*进程退出时,可能已经设置了实时定时器,real_timer已挂载到内核定时器队列,现在进程要退出,没必要存在了,就把当前进程从定时器队列中脱离出来*/del_timer_sync(&tsk->real_timer);fake_volatile:#ifdef CONFIG_BSD_PROCESS_ACCTacct_process(code);#endif/*如果是指针共享,那就只是减少mm->mm_users,如果有独立的进程空间,那就直接释放页表,mm_struct,vm_struct以及所有的vma*/__exit_mm(tsk);//加锁lock_kernel();//如果调用exit()之前该信号量还没退出,那就把它撤销sem_exit();//如果只是指针共享,那就减少files_struct->count,如果是独享,那就销毁__exit_files(tsk);//以上相同,释放fs->count__exit_fs(tsk);//释放信号处理函数表exit_sighand(tsk);//空函数exit_thread();///表示进程是否为会话主管if (current->leader)disassociate_ctty(1);//删除终端,释放tty//若正在执行的代码是符合iBCS2标准的程序,则减少相对应模块的引用数目put_exec_domain(tsk->exec_domain);/* 若正在执行的代码属于全局执行文件结构格则减少相对应模块的引用数目 */if (tsk->binfmt && tsk->binfmt->module)__MOD_DEC_USE_COUNT(tsk->binfmt->module);tsk->exit_code = code;//将当前进程设置为僵死状态;并给父进程发信号;其当前进程的子进程的父进程设置为init进程或者其他线程exit_notify();schedule();BUG();
static inline void __exit_mm(struct task_struct * tsk){struct mm_struct * mm = tsk->mm;//获取当前进程的内存描述符mm_release();//唤醒睡眠的父进程if (mm) {atomic_inc(&mm->mm_count);if (mm != tsk->active_mm) BUG();//确保mm与active_mm一样/* more a memory barrier than a real lock */task_lock(tsk);tsk->mm = NULL;//设置为NULLtask_unlock(tsk);//刷新tlbenter_lazy_tlb(mm, current, smp_processor_id());mmput(mm);//释放页表等等}}
/** Send signals to all our closest relatives so that they know* to properly mourn us..*/static void exit_notify(void){struct task_struct * p, *t;//其当前进程的子进程的父进程设置为init进程,如果父进程是线程,那就托孤给其他线程forget_original_parent(current);/** Check to see if any process groups have become orphaned* as a result of our exiting, and if they have any stopped* jobs, send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2)** Case i: Our father is in a different pgrp than we are* and we were the only connection outside, so our pgrp* is about to become orphaned.*/t = current->p_pptr;//获取其养父/*如果当前进程与父进程属于相同的会话,又处于不同的组,当前进程挂了,整个组如果成了孤儿组,那就要给这个进程组的所有进程发送一个SIGHUP跟SIGCONT信号*/if ((t->pgrp != current->pgrp) &&//组不同而会话相同(t->session == current->session) &&will_become_orphaned_pgrp(current->pgrp, current) &&//判断是否是孤儿进程组has_stopped_jobs(current->pgrp)) {////如果进程组中有处于TASK_STOP状态的进程kill_pg(current->pgrp,SIGHUP,1);//先发送SIGHUP在发送SIGCONTkill_pg(current->pgrp,SIGCONT,1);}/* Let father know we died** Thread signals are configurable, but you aren't going to use* that to send signals to arbitary processes.* That stops right now.** If the parent exec id doesn't match the exec id we saved* when we started then we know the parent has changed security* domain.** If our self_exec id doesn't match our parent_exec_id then* we have changed execution domain as these two values started* the same after a fork.**/if(current->exit_signal != SIGCHLD &&( current->parent_exec_id != t->self_exec_id ||current->self_exec_id != current->parent_exec_id)&& !capable(CAP_KILL))current->exit_signal = SIGCHLD;//给父进程发的信号是SIGCHLD/** This loop does two things:** A. Make init inherit all the child processes* B. Check to see if any process groups have become orphaned* as a result of our exiting, and if they have any stopped* jobs, send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2)*/write_lock_irq(&tasklist_lock);current->state = TASK_ZOMBIE;//设置为僵尸进程do_notify_parent(current, current->exit_signal);//由父进程来料理后事//将子进程队列中的每个进程都转移到托孤的父进程的子进程队列中去while (current->p_cptr != NULL) {//p_cptr表示子进程p = current->p_cptr;//p指向子进程current->p_cptr = p->p_osptr;//子进程指向子进程他哥,形成一个队列p->p_ysptr = NULL;//子进程的滴滴设置为0p->ptrace = 0;p->p_pptr = p->p_opptr;//将养父改为亲父p->p_osptr = p->p_pptr->p_cptr;//子进程的哥哥改为子进程的养父的子进程,移到子进程队列if (p->p_osptr)p->p_osptr->p_ysptr = p;p->p_pptr->p_cptr = p;if (p->state == TASK_ZOMBIE)//并且判断每个子进程是否是僵尸状态do_notify_parent(p, p->exit_signal);/** process group orphan check* Case ii: Our child is in a different pgrp* than we are, and it was the only connection* outside, so the child pgrp is now orphaned.孤儿进程组: 一个进程组中的所有进程的父进程要么是该进程组的一个进程,要么不是该进程组所在的会话中的进程。 一个进程组不是孤儿进程组的条件是,该组中有一个进程其父进程在属于同一个会话的另一个组中。*/if ((p->pgrp != current->pgrp) &&(p->session == current->session)) {int pgrp = p->pgrp;write_unlock_irq(&tasklist_lock);//父进程所在的组是否是孤儿进程组,以及是否含有stop进程if (is_orphaned_pgrp(pgrp) && has_stopped_jobs(pgrp)) {kill_pg(pgrp,SIGHUP,1);kill_pg(pgrp,SIGCONT,1);}write_lock_irq(&tasklist_lock);}}write_unlock_irq(&tasklist_lock);}
/** When we die, we re-parent all our children.* Try to give them to another thread in our process* group, and if no such member exists, give it to* the global child reaper process (ie "init")*/static inline void forget_original_parent(struct task_struct * father){struct task_struct * p, *reaper;read_lock(&tasklist_lock);/* 获取当前用户空间的下一线程 */reaper = next_thread(father);if (reaper == father)//如果相等说明是进程,不是线程组,那就只能托孤给init进程reaper = child_reaper;//init进程for_each_task(p) {if (p->p_opptr == father) {//搜索所有task_struct数据结构,发现其进程生父就是要退出的进程/* We dont want people slaying init */p->exit_signal = SIGCHLD;//设置发送信号p->self_exec_id++;p->p_opptr = reaper;//将要死的进程的子进程托孤给reaper(当前线程的其他线程或者init进程?if (p->pdeath_signal)send_sig(p->pdeath_signal, p, 0);//发送信号,告知儿子死了}}read_unlock(&tasklist_lock);}
/** Let a parent know about a status change of a child.让一个父亲知道有关儿子的改变参数为当前要退出进程,以及信号*/void do_notify_parent(struct task_struct *tsk, int sig){struct siginfo info;int why, status;info.si_signo = sig;info.si_errno = 0;info.si_pid = tsk->pid;info.si_uid = tsk->uid;/* FIXME: find out whether or not this is supposed to be c*time. */info.si_utime = tsk->times.tms_utime;info.si_stime = tsk->times.tms_stime;status = tsk->exit_code & 0x7f;why = SI_KERNEL; /* shouldn't happen */switch (tsk->state) {case TASK_STOPPED:/* FIXME -- can we deduce CLD_TRAPPED or CLD_CONTINUED? */if (tsk->ptrace & PT_PTRACED)why = CLD_TRAPPED;elsewhy = CLD_STOPPED;break;default:if (tsk->exit_code & 0x80)why = CLD_DUMPED;else if (tsk->exit_code & 0x7f)why = CLD_KILLED;else {why = CLD_EXITED;status = tsk->exit_code >> 8;}break;}info.si_code = why;info.si_status = status;send_sig_info(sig, &info, tsk->p_pptr);//发送信号wake_up_parent(tsk->p_pptr);//唤醒父进程}
/** This function is typically called only by the session leader, when* it wants to disassociate itself from its controlling tty.** It performs the following functions:* (1) Sends a SIGHUP and SIGCONT to the foreground process group* (2) Clears the tty from being controlling the session* (3) Clears the controlling tty for all processes in the* session group.*当前进程是一个会话的主进程(current->leader非0)那就还要将整个session与中断切断,并释放tty,pcb有个tty指针* The argument on_exit is set to 1 if called when a process is* exiting; it is 0 if called by the ioctl TIOCNOTTY.*/void disassociate_ctty(int on_exit){struct tty_struct *tty = current->tty;//获取当前进程的ttystruct task_struct *p;int tty_pgrp = -1;if (tty) {tty_pgrp = tty->pgrp;//获取进程组的ttyif (on_exit && tty->driver.type != TTY_DRIVER_TYPE_PTY)//统计tty设备打开的次数tty_vhangup(tty);} else {if (current->tty_old_pgrp) {kill_pg(current->tty_old_pgrp, SIGHUP, on_exit);//给当前进程组发送sighup与sigcont信号kill_pg(current->tty_old_pgrp, SIGCONT, on_exit);}return;}if (tty_pgrp > 0) {kill_pg(tty_pgrp, SIGHUP, on_exit);if (!on_exit)kill_pg(tty_pgrp, SIGCONT, on_exit);}current->tty_old_pgrp = 0;//进程控制终端所在的组标识设置为0tty->session = 0;//会话设置为0tty->pgrp = -1;//组设置为-1read_lock(&tasklist_lock);for_each_task(p)//遍历每个进程是否位于同一会话if (p->session == current->session)//当前进程是会话的主进程p->tty = NULL;//切断tty终端read_unlock(&tasklist_lock);}
//等待子进程的pidasmlinkage long sys_wait4(pid_t pid,unsigned int * stat_addr, int options, struct rusage * ru){int flag, retval;DECLARE_WAITQUEUE(wait, current);//为当前进程分配一个waitqueue结构struct task_struct *tsk;if (options & ~(WNOHANG|WUNTRACED|__WNOTHREAD|__WCLONE|__WALL))return -EINVAL;//添加到当前进程的waitchldexit对列中add_wait_queue(¤t->wait_chldexit,&wait);repeat:flag = 0;current->state = TASK_INTERRUPTIBLE;//设置为睡眠,让其他进程先运行,等待子进程挂了read_lock(&tasklist_lock);tsk = current;do {struct task_struct *p;for (p = tsk->p_cptr ; p ; p = p->p_osptr) {//p表示当前进程的子进程if (pid>0) {if (p->pid != pid)//是否等于参数pid,不等于就继续continue;} else if (!pid) {//不是0号进程if (p->pgrp != current->pgrp)continue;} else if (pid != -1) {//不是-1(随便)if (p->pgrp != -pid)continue;}/* Wait for all children (clone and not) if __WALL is set;* otherwise, wait for clone children *only* if __WCLONE is* set; otherwise, wait for non-clone children *only*. (Note:* A "clone" child here is one that reports to its parent* using a signal other than SIGCHLD.) *///判断子进程的信号是否是sigchldif (((p->exit_signal != SIGCHLD) ^ ((options & __WCLONE) != 0))&& !(options & __WALL))continue;flag = 1;//表示是当前进程的子进程switch (p->state) {case TASK_STOPPED://等待子进程被跟踪if (!p->exit_code)//是否设置了退出码continue;if (!(options & WUNTRACED) && !(p->ptrace & PT_PTRACED))//判断条件是否跟踪continue;read_unlock(&tasklist_lock);retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0;if (!retval && stat_addr)retval = put_user((p->exit_code << 8) | 0x7f, stat_addr);if (!retval) {p->exit_code = 0;retval = p->pid;}goto end_wait4;//满足直接跳到end_wait4case TASK_ZOMBIE://僵尸状态current->times.tms_cutime += p->times.tms_utime + p->times.tms_cutime;current->times.tms_cstime += p->times.tms_stime + p->times.tms_cstime;read_unlock(&tasklist_lock);retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0;if (!retval && stat_addr)retval = put_user(p->exit_code, stat_addr);//指定位置保存退出码if (retval)goto end_wait4;retval = p->pid;if (p->p_opptr != p->p_pptr) {//生父与养父是否相同write_lock_irq(&tasklist_lock);REMOVE_LINKS(p);//将task_struct从养父队列中脱离出来p->p_pptr = p->p_opptr;//将养父设置为生父SET_LINKS(p);do_notify_parent(p, SIGCHLD);//通知生父进程自己挂了write_unlock_irq(&tasklist_lock);} elserelease_task(p);//释放残留的资源如pcb等等goto end_wait4;//子进程处于僵死状态,goto end_wait4default:continue;}}if (options & __WNOTHREAD)//如果设置了wnothread直接跳出break;tsk = next_thread(tsk);//到同一进程的寻找下一个线程,一线程创建的子进程挂了,其他线程调用wait应该没用吧?} while (tsk != current);read_unlock(&tasklist_lock);if (flag) {//如果pid不是当前进程的子进程,直接到end_wait4retval = 0;if (options & WNOHANG)//设置了wnohanggoto end_wait4;retval = -ERESTARTSYS;if (signal_pending(current))//当前进程是否有信号未处理goto end_wait4;schedule();//被调度.等待被子进程唤醒goto repeat;}retval = -ECHILD;end_wait4:current->state = TASK_RUNNING;//将当前进程改为可运行状态remove_wait_queue(¤t->wait_chldexit,&wait);return retval;}
下列条件之一得到满足时才结束,goto end_wait4:
1、所等待的子进程的状态变成TASK_STOPPED,TASK_ZOMBIE;
2、所等待的子进程存在,可不在上述两个状态,而调用参数options中的WHONANG标志位为1,或者当前进程接受到了其他的信号;
3、进程号pid的那个进程根本不存在,或者不是当前进程的子进程。
否则,当前进程将其自身的状态设成TASK_INTERRUPTIBLE,并调用schedule()。
static void release_task(struct task_struct * p)//释放子进程留下的资源{if (p != current) {#ifdef CONFIG_SMP/** Wait to make sure the process isn't on the* runqueue (active on some other CPU still)*/for (;;) {task_lock(p);if (!p->has_cpu)break;task_unlock(p);do {barrier();} while (p->has_cpu);}task_unlock(p);#endifatomic_dec(&p->user->processes);//子进程数目减少free_uid(p->user);//是否uidunhash_process(p);//把子进程的pcb从队列摘下来release_thread(p);//检查进程的LDT是否已释放current->cmin_flt += p->min_flt + p->cmin_flt;current->cmaj_flt += p->maj_flt + p->cmaj_flt;current->cnswap += p->nswap + p->cnswap;/** Potentially available timeslices are retrieved* here - this way the parent does not get penalized* for creating too many processes.** (this cannot be used to artificially 'generate'* timeslices, because any timeslice recovered here* was given away by the parent in the first place.)*/current->counter += p->counter;if (current->counter >= MAX_COUNTER)current->counter = MAX_COUNTER;free_task_struct(p);//将2个物理页大小的pcb释放} else {printk("task releasing itself\n");}}
linux内核情景分析之exit与Wait的更多相关文章
- linux内核情景分析之execve()
用来描述用户态的cpu寄存器在内核栈中保存情况.可以获取用户空间的信息 struct pt_regs { long ebx; //可执行文件路径的指针(regs.ebx中 long ecx; //命令 ...
- Linux内核情景分析之消息队列
早期的Unix通信只有管道与信号,管道的缺点: 所载送的信息是无格式的字节流,不知道分界线在哪,也没通信规范,另外缺乏控制手段,比如保温优先级,管道机制的大小只有1页,管道很容易写满而读取没有及时,发 ...
- Linux内核情景分析之异常访问,用户堆栈的扩展
情景假设: 在堆内存中申请了一块内存,然后释放掉该内存,然后再去访问这块内存.也就是所说的野指针访问. 当cpu产生页面错误时,会把失败的线性地址放在cr2寄存器.线性地址缺页异常的4种情况 1.如果 ...
- Linux内核情景分析的alloc_pages
NUMA结构的alloc_pages ==================== mm/numa.c 43 43 ==================== 43 #ifdef CONFIG_DISCON ...
- linux内核情景分析之内核中的互斥操作
信号量机制: struct sempahore是其结构,定义如下 struct semaphore { atomic_t count;//资源数目 int sleepers;//等待进程数目 wait ...
- linux内核情景分析之命名管道
管道是一种"无名","无形文件,只可以近亲进程使用,不可以再任意两个进程通信使用,所以只能实现"有名","有形"的文件来实现就可以 ...
- linux内核情景分析之信号实现
信号在进程间通信是异步的,每个进程的task_struct结构有一个sig指针,指向一个signal_struct结构 定义如下 struct signal_struct { atomic_t cou ...
- linux内核情景分析之强制性调度
从系统调用返回到用户空间是否调度,从ret_with_reschedule可看出,是否真正调度,取决于当前进程的pcb中的need_resched是否设置为1,那如何设置为1取决于以下几种情况: 时间 ...
- linux内核情景分析之匿名管道
管道的机制由pipe()创建,由pipe()所建立的管道两端都在同一进程.所以必须在fork的配合下,才可以在具有亲缘关系的进程通信 /* * sys_pipe() is the normal C c ...
随机推荐
- android stadio mapping文件的使用
mapping文件就是在对代码混淆以后,你用来查看混淆前和混淆后的对比文件. 一般用来查看线上的bug; java.lang.NullPointerException: Attempt to invo ...
- django中间件CsrfViewMiddleware源码分析,探究csrf实现
Django Documentation csrf保护基于以下: 1. 一个CSRF cookie 基于一个随机生成的值,其他网站无法得到.此cookie由CsrfViewMiddleware产生.它 ...
- 直接插入排序&希尔排序
1.直接插入排序 时间复杂度O(n2) 工作原理: 通过构建有序序列,对于未排序数据,在已排序的序列中,从后向前扫描,找到相应的位置并插入. 插入排序在实现上,在从后向前扫描的过程中,需要反复把已排序 ...
- 四大关键步骤掌握CloudOps模型
[TechTarget中国原创] 要让IT运维向云演进,企业必须拥抱自动化,并且改变资源预配的思考方式. 新涌现的术语CloudOps——云运维的简写,指代企业如何运行以及管理基于云的系统.并且,随着 ...
- 【Python】python内置函数、列表生成式、生成器
一.内置函数 1 print(all([1,2,3,4]))#判断可迭代的对象里面的值是否都为真 2 print(any([0,1,2,3,4]))#判断可迭代的对象里面的值是否有一个为真 3 pri ...
- win 7 查看端口被占用
开始---->运行---->cmd,或者是window+R组合键,调出命令窗口 输入命令:netstat -ano,列出所有端口的情况.在列表中我们观察被占用的端口,比如是4915 ...
- glance上传镜像
glance image-create --name "centos68-test" --file centos68.dsk \ --disk-format raw --conta ...
- 课时34:丰富的else语句以及简洁的with语句
目录: 一.丰富的else语句 二.简洁的with语句 三.课时34课后习题及答案 *********************** 一.丰富的else语句 ********************** ...
- PEAR DB 事务相关
1.autoCommit().commit().rollback() function autoCommit($onoff=false) 指定是否自动提交事务.有的后端数据库不支持. function ...
- 电脑显卡4种接口类型:VGA、DVI、HDMI、DP
电脑显卡全称显示接口卡(Video card,Graphics card),又称为显示适配器(Video adapter),显示器配置卡简称为显卡,是个人电脑最基本组成部分之一.对于显卡接口类型,主要 ...