linux内核情景分析之exit与Wait

//第一层系统调用
asmlinkage long sys_exit(int error_code)
{
	do_exit((error_code&0xff)<<8);
}

其主体是do_exit,接下来我们来看看do_exit的实现

NORET_TYPE void do_exit(long code)
{
	struct task_struct *tsk = current;//获取当前进程描述符

	if (in_interrupt())//禁止中断时调用do_exit
		panic("Aiee, killing interrupt handler!");
	if (!tsk->pid)//空转进程也就是0号进程禁止退出
		panic("Attempted to kill the idle task!");
	if (tsk->pid == 1)//1号进程禁止退出
		panic("Attempted to kill init!");
	tsk->flags |= PF_EXITING;//退出进程时,设置此标志位
	/*
	进程退出时,可能已经设置了实时定时器,real_timer已挂载到内核定时器队列,
	现在进程要退出,没必要存在了,就把当前进程从定时器队列中脱离出来
	*/
	del_timer_sync(&tsk->real_timer);

fake_volatile:
#ifdef CONFIG_BSD_PROCESS_ACCT
	acct_process(code);
#endif
	/*如果是指针共享,那就只是减少mm->mm_users,
	如果有独立的进程空间,那就直接释放页表,mm_struct,vm_struct
	以及所有的vma*/
	__exit_mm(tsk);
    //加锁
	lock_kernel();
	//如果调用exit()之前该信号量还没退出,那就把它撤销
	sem_exit();
	//如果只是指针共享,那就减少files_struct->count,如果是独享,那就销毁
	__exit_files(tsk);
	//以上相同,释放fs->count
	__exit_fs(tsk);
	//释放信号处理函数表
	exit_sighand(tsk);
	//空函数
	exit_thread();
///表示进程是否为会话主管
	if (current->leader)
		disassociate_ctty(1);//删除终端,释放tty
//若正在执行的代码是符合iBCS2标准的程序,则减少相对应模块的引用数目
	put_exec_domain(tsk->exec_domain);
	/* 若正在执行的代码属于全局执行文
	件结构格则减少相对应模块的引用数目  */

	if (tsk->binfmt && tsk->binfmt->module)
		__MOD_DEC_USE_COUNT(tsk->binfmt->module);

	tsk->exit_code = code;
	//将当前进程设置为僵死状态；并给父进程发信号；其当前进程的子进程的父进程设置为init进程或者其他线程
	exit_notify();
	schedule();
	BUG();

接着挨个分析释放资源相关函数(信号量就等到进程间通信学完再分析)

static inline void __exit_mm(struct task_struct * tsk)
{
	struct mm_struct * mm = tsk->mm;//获取当前进程的内存描述符

	mm_release();//唤醒睡眠的父进程
	if (mm) {
		atomic_inc(&mm->mm_count);
		if (mm != tsk->active_mm) BUG();//确保mm与active_mm一样
		/* more a memory barrier than a real lock */
		task_lock(tsk);
		tsk->mm = NULL;//设置为NULL
		task_unlock(tsk);
        //刷新tlb
		enter_lazy_tlb(mm, current, smp_processor_id());
		mmput(mm);//释放页表等等
	}
}

以上资源释放完后,进程设置为僵尸状态,还保留pcb以及内核栈,自己并不释放而是由父进程负责,将调用exit_notify()通知其父进程

原因:让父进程可以统计信息,接下来看看exit_notify()


/*
 * Send signals to all our closest relatives so that they know
 * to properly mourn us..
 */
static void exit_notify(void)
{
	struct task_struct * p, *t;
//其当前进程的子进程的父进程设置为init进程,如果父进程是线程,那就托孤给其他线程
	forget_original_parent(current);
	/*
	 * Check to see if any process groups have become orphaned
	 * as a result of our exiting, and if they have any stopped
	 * jobs, send them a SIGHUP and then a SIGCONT.  (POSIX 3.2.2.2)
	 *
	 * Case i: Our father is in a different pgrp than we are
	 * and we were the only connection outside, so our pgrp
	 * is about to become orphaned.
	 */
	 
	t = current->p_pptr;//获取其养父
	/*
	如果当前进程与父进程属于相同的会话,又处于不同的组,当前进程挂了,整个组如果成了孤儿组,那就要
	给这个进程组的所有进程发送一个SIGHUP跟SIGCONT信号
	*/
	
	if ((t->pgrp != current->pgrp) &&//组不同而会话相同
	    (t->session == current->session) &&
	    will_become_orphaned_pgrp(current->pgrp, current) &&//判断是否是孤儿进程组
	    has_stopped_jobs(current->pgrp)) {////如果进程组中有处于TASK_STOP状态的进程
		kill_pg(current->pgrp,SIGHUP,1);//先发送SIGHUP在发送SIGCONT
		kill_pg(current->pgrp,SIGCONT,1);
	}

	/* Let father know we died 
	 *
	 * Thread signals are configurable, but you aren't going to use
	 * that to send signals to arbitary processes. 
	 * That stops right now.
	 *
	 * If the parent exec id doesn't match the exec id we saved
	 * when we started then we know the parent has changed security
	 * domain.
	 *
	 * If our self_exec id doesn't match our parent_exec_id then
	 * we have changed execution domain as these two values started
	 * the same after a fork.
	 *	
	 */
	
	if(current->exit_signal != SIGCHLD &&
	    ( current->parent_exec_id != t->self_exec_id  ||
	      current->self_exec_id != current->parent_exec_id) 
	    && !capable(CAP_KILL))
		current->exit_signal = SIGCHLD;//给父进程发的信号是SIGCHLD  	/*
	 * This loop does two things:
	 *
  	 * A.  Make init inherit all the child processes
	 * B.  Check to see if any process groups have become orphaned
	 *	as a result of our exiting, and if they have any stopped
	 *	jobs, send them a SIGHUP and then a SIGCONT.  (POSIX 3.2.2.2)
	 */

	write_lock_irq(&tasklist_lock);
	current->state = TASK_ZOMBIE;//设置为僵尸进程
	do_notify_parent(current, current->exit_signal);//由父进程来料理后事
	//将子进程队列中的每个进程都转移到托孤的父进程的子进程队列中去
	while (current->p_cptr != NULL) {//p_cptr表示子进程
		p = current->p_cptr;//p指向子进程
		current->p_cptr = p->p_osptr;//子进程指向子进程他哥,形成一个队列
		p->p_ysptr = NULL;//子进程的滴滴设置为0
		p->ptrace = 0;

		p->p_pptr = p->p_opptr;//将养父改为亲父
		p->p_osptr = p->p_pptr->p_cptr;//子进程的哥哥改为子进程的养父的子进程,移到子进程队列
		if (p->p_osptr)
			p->p_osptr->p_ysptr = p;
		p->p_pptr->p_cptr = p;
		if (p->state == TASK_ZOMBIE)//并且判断每个子进程是否是僵尸状态
			do_notify_parent(p, p->exit_signal);
		/*
		 * process group orphan check
		 * Case ii: Our child is in a different pgrp
		 * than we are, and it was the only connection
		 * outside, so the child pgrp is now orphaned.
		 
		 孤儿进程组： 一个进程组中的所有进程的父进程要么是该进程组的一个进程，
		 要么不是该进程组所在的会话中的进程。 一个进程组不是孤儿进程组的条件是，
		 该组中有一个进程其父进程在属于同一个会话的另一个组中。
		 
	
		 */
		 
		if ((p->pgrp != current->pgrp) &&
		    (p->session == current->session)) {
			int pgrp = p->pgrp;

			write_unlock_irq(&tasklist_lock);
			//父进程所在的组是否是孤儿进程组,以及是否含有stop进程
			if (is_orphaned_pgrp(pgrp) && has_stopped_jobs(pgrp)) {
				kill_pg(pgrp,SIGHUP,1);
				kill_pg(pgrp,SIGCONT,1);
			}
			write_lock_irq(&tasklist_lock);
		}
	}
	write_unlock_irq(&tasklist_lock);
}

子进程托孤给其他进程(如果该进程是线程,也就是含有其他线程),否则托孤给init进程


/*
 * When we die, we re-parent all our children.
 * Try to give them to another thread in our process
 * group, and if no such member exists, give it to
 * the global child reaper process (ie "init")
 */
static inline void forget_original_parent(struct task_struct * father)
{
	struct task_struct * p, *reaper;

	read_lock(&tasklist_lock);

	/* 获取当前用户空间的下一线程 */
	reaper = next_thread(father);
	if (reaper == father)//如果相等说明是进程,不是线程组,那就只能托孤给init进程
		reaper = child_reaper;//init进程

	for_each_task(p) {
		if (p->p_opptr == father) {//搜索所有task_struct数据结构,发现其进程生父就是要退出的进程
			/* We dont want people slaying init */
			p->exit_signal = SIGCHLD;//设置发送信号
			p->self_exec_id++;
			p->p_opptr = reaper;//将要死的进程的子进程托孤给reaper(当前线程的其他线程或者init进程?
			if (p->pdeath_signal) 
            send_sig(p->pdeath_signal, p, 0);//发送信号,告知儿子死了
		}
	}
	read_unlock(&tasklist_lock);
}

接下来查看do_notify_parent发送信号给父进程


/*
 * Let a parent know about a status change of a child.
 让一个父亲知道有关儿子的改变
 参数为当前要退出进程,以及信号
 */

void do_notify_parent(struct task_struct *tsk, int sig)
{
	struct siginfo info;
	int why, status;

	info.si_signo = sig;
	info.si_errno = 0;
	info.si_pid = tsk->pid;
	info.si_uid = tsk->uid;

	/* FIXME: find out whether or not this is supposed to be c*time. */
	info.si_utime = tsk->times.tms_utime;
	info.si_stime = tsk->times.tms_stime;

	status = tsk->exit_code & 0x7f;
	why = SI_KERNEL;	/* shouldn't happen */
	switch (tsk->state) {
	case TASK_STOPPED:
		/* FIXME -- can we deduce CLD_TRAPPED or CLD_CONTINUED? */
		if (tsk->ptrace & PT_PTRACED)
			why = CLD_TRAPPED;
		else
			why = CLD_STOPPED;
		break;

	default:
		if (tsk->exit_code & 0x80)
			why = CLD_DUMPED;
		else if (tsk->exit_code & 0x7f)
			why = CLD_KILLED;
		else {
			why = CLD_EXITED;
			status = tsk->exit_code >> 8;
		}
		break;
	}
	info.si_code = why;
	info.si_status = status;

	send_sig_info(sig, &info, tsk->p_pptr);//发送信号
	wake_up_parent(tsk->p_pptr);//唤醒父进程
}


/*
 * This function is typically called only by the session leader, when
 * it wants to disassociate itself from its controlling tty.
 *
 * It performs the following functions:
 * 	(1)  Sends a SIGHUP and SIGCONT to the foreground process group
 * 	(2)  Clears the tty from being controlling the session
 * 	(3)  Clears the controlling tty for all processes in the
 * 		session group.
 *当前进程是一个会话的主进程(current->leader非0)那就还要将整个session与中断切断,并释放tty,pcb有个tty指针
 * The argument on_exit is set to 1 if called when a process is
 * exiting; it is 0 if called by the ioctl TIOCNOTTY.
 */
void disassociate_ctty(int on_exit)
{
	struct tty_struct *tty = current->tty;//获取当前进程的tty
	struct task_struct *p;
	int tty_pgrp = -1;

	if (tty) {
		tty_pgrp = tty->pgrp;//获取进程组的tty
		if (on_exit && tty->driver.type != TTY_DRIVER_TYPE_PTY)//统计tty设备打开的次数
			tty_vhangup(tty);
	} else {
		if (current->tty_old_pgrp) {
			kill_pg(current->tty_old_pgrp, SIGHUP, on_exit);//给当前进程组发送sighup与sigcont信号
			kill_pg(current->tty_old_pgrp, SIGCONT, on_exit);
		}
		return;
	}
	if (tty_pgrp > 0) {
		kill_pg(tty_pgrp, SIGHUP, on_exit);
		if (!on_exit)
			kill_pg(tty_pgrp, SIGCONT, on_exit);
	}

	current->tty_old_pgrp = 0;//进程控制终端所在的组标识设置为0
	tty->session = 0;//会话设置为0
	tty->pgrp = -1;//组设置为-1

	read_lock(&tasklist_lock);
	for_each_task(p)//遍历每个进程是否位于同一会话
	  	if (p->session == current->session)//当前进程是会话的主进程
			p->tty = NULL;//切断tty终端
	read_unlock(&tasklist_lock);
}

do_exit流程:

禁止中断调用,0号进程,1号进程退出

如果有独立空间那就删除独立空间,释放页表,释放信号量,释放文件对象,释放信号处理函数表

如果是会话控制进程,删除终端,释放tty,接下来调用exit_notify()函数

如果当前进程是是线程(也就包含其他线程,非独享),托孤给其他线程,否则托孤给init进程

判断当前进程退出是否会导致孤儿进程组出现

设置发送信号为SIGCHLD,将当前进程设置为僵尸状态,接着调用do_notify_parent发送信号给父进程,并唤醒父进程

并将僵尸进程的所有子进程的队列移到托孤的队列.最后shedule()

                       //等待子进程的pid
asmlinkage long sys_wait4(pid_t pid,unsigned int * stat_addr, int options, struct rusage * ru)
{
	int flag, retval;
	DECLARE_WAITQUEUE(wait, current);//为当前进程分配一个waitqueue结构
	struct task_struct *tsk;

	if (options & ~(WNOHANG|WUNTRACED|__WNOTHREAD|__WCLONE|__WALL))
		return -EINVAL;
    //添加到当前进程的waitchldexit对列中
	add_wait_queue(&current->wait_chldexit,&wait);
repeat:
	flag = 0;
	current->state = TASK_INTERRUPTIBLE;//设置为睡眠,让其他进程先运行,等待子进程挂了
	read_lock(&tasklist_lock);
	tsk = current;
	do {
		struct task_struct *p;
	 	for (p = tsk->p_cptr ; p ; p = p->p_osptr) {//p表示当前进程的子进程
			if (pid>0) {
				if (p->pid != pid)//是否等于参数pid,不等于就继续
					continue;
			} else if (!pid) {//不是0号进程
				if (p->pgrp != current->pgrp)
					continue;
			} else if (pid != -1) {//不是-1(随便)
				if (p->pgrp != -pid)
					continue;
			}
			/* Wait for all children (clone and not) if __WALL is set;
			 * otherwise, wait for clone children *only* if __WCLONE is
			 * set; otherwise, wait for non-clone children *only*.  (Note:
			 * A "clone" child here is one that reports to its parent
			 * using a signal other than SIGCHLD.) */
			 //判断子进程的信号是否是sigchld
			if (((p->exit_signal != SIGCHLD) ^ ((options & __WCLONE) != 0))
			    && !(options & __WALL))
				continue;
			flag = 1;//表示是当前进程的子进程
			switch (p->state) {
			case TASK_STOPPED://等待子进程被跟踪
				if (!p->exit_code)//是否设置了退出码
					continue;
				if (!(options & WUNTRACED) && !(p->ptrace & PT_PTRACED))//判断条件是否跟踪
					continue;
				read_unlock(&tasklist_lock);
				retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0; 
				if (!retval && stat_addr) 
					retval = put_user((p->exit_code << 8) | 0x7f, stat_addr);
				if (!retval) {
					p->exit_code = 0;
					retval = p->pid;
				}
				goto end_wait4;//满足直接跳到end_wait4
			case TASK_ZOMBIE://僵尸状态
				current->times.tms_cutime += p->times.tms_utime + p->times.tms_cutime;
				current->times.tms_cstime += p->times.tms_stime + p->times.tms_cstime;
				read_unlock(&tasklist_lock);
				retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0;
				if (!retval && stat_addr)
					retval = put_user(p->exit_code, stat_addr);//指定位置保存退出码
				if (retval)
					goto end_wait4; 
				retval = p->pid;
				if (p->p_opptr != p->p_pptr) {//生父与养父是否相同
					write_lock_irq(&tasklist_lock);
					REMOVE_LINKS(p);//将task_struct从养父队列中脱离出来
					p->p_pptr = p->p_opptr;//将养父设置为生父
					SET_LINKS(p);
					do_notify_parent(p, SIGCHLD);//通知生父进程自己挂了
					write_unlock_irq(&tasklist_lock);
				} else
					release_task(p);//释放残留的资源如pcb等等
				goto end_wait4;//子进程处于僵死状态，goto end_wait4
			default:
				continue;
			}
		}
		if (options & __WNOTHREAD)//如果设置了wnothread直接跳出
			break;
		tsk = next_thread(tsk);//到同一进程的寻找下一个线程,一线程创建的子进程挂了,其他线程调用wait应该没用吧?
	} while (tsk != current);
	read_unlock(&tasklist_lock);
	if (flag) {//如果pid不是当前进程的子进程，直接到end_wait4  
		retval = 0;
		if (options & WNOHANG)//设置了wnohang
			goto end_wait4;
		retval = -ERESTARTSYS;
		if (signal_pending(current))//当前进程是否有信号未处理
			goto end_wait4;
		schedule();//被调度.等待被子进程唤醒
		goto repeat;
	}
	retval = -ECHILD;
end_wait4:
	current->state = TASK_RUNNING;//将当前进程改为可运行状态
	remove_wait_queue(&current->wait_chldexit,&wait);
	return retval;
}

下列条件之一得到满足时才结束，goto end_wait4：

1、所等待的子进程的状态变成TASK_STOPPED，TASK_ZOMBIE；

2、所等待的子进程存在，可不在上述两个状态，而调用参数options中的WHONANG标志位为1，或者当前进程接受到了其他的信号；

3、进程号pid的那个进程根本不存在，或者不是当前进程的子进程。

否则，当前进程将其自身的状态设成TASK_INTERRUPTIBLE，并调用schedule()。

释放残余的子进程资源


static void release_task(struct task_struct * p)//释放子进程留下的资源
{
	if (p != current) {
#ifdef CONFIG_SMP
		/*
		 * Wait to make sure the process isn't on the
		 * runqueue (active on some other CPU still)
		 */
		for (;;) {
			task_lock(p);
			if (!p->has_cpu)
				break;
			task_unlock(p);
			do {
				barrier();
			} while (p->has_cpu);
		}
		task_unlock(p);
#endif
		atomic_dec(&p->user->processes);//子进程数目减少
		free_uid(p->user);//是否uid
		unhash_process(p);//把子进程的pcb从队列摘下来

		release_thread(p);//检查进程的LDT是否已释放
		current->cmin_flt += p->min_flt + p->cmin_flt;
		current->cmaj_flt += p->maj_flt + p->cmaj_flt;
		current->cnswap += p->nswap + p->cnswap;
		/*
		 * Potentially available timeslices are retrieved
		 * here - this way the parent does not get penalized
		 * for creating too many processes.
		 *
		 * (this cannot be used to artificially 'generate'
		 * timeslices, because any timeslice recovered here
		 * was given away by the parent in the first place.)
		 */
		current->counter += p->counter;
		if (current->counter >= MAX_COUNTER)
			current->counter = MAX_COUNTER;
		free_task_struct(p);//将2个物理页大小的pcb释放
	} else {
		printk("task releasing itself\n");
	}
}

来自为知笔记(Wiz)

linux内核情景分析之exit与Wait的更多相关文章

linux内核情景分析之execve()
用来描述用户态的cpu寄存器在内核栈中保存情况.可以获取用户空间的信息 struct pt_regs { long ebx; //可执行文件路径的指针(regs.ebx中 long ecx; //命令 ...
Linux内核情景分析之消息队列
早期的Unix通信只有管道与信号,管道的缺点: 所载送的信息是无格式的字节流,不知道分界线在哪,也没通信规范,另外缺乏控制手段,比如保温优先级,管道机制的大小只有1页,管道很容易写满而读取没有及时,发 ...
Linux内核情景分析之异常访问,用户堆栈的扩展
情景假设: 在堆内存中申请了一块内存,然后释放掉该内存,然后再去访问这块内存.也就是所说的野指针访问. 当cpu产生页面错误时,会把失败的线性地址放在cr2寄存器.线性地址缺页异常的4种情况 1.如果 ...
Linux内核情景分析的alloc_pages
NUMA结构的alloc_pages ==================== mm/numa.c 43 43 ==================== 43 #ifdef CONFIG_DISCON ...
linux内核情景分析之内核中的互斥操作
信号量机制: struct sempahore是其结构,定义如下 struct semaphore { atomic_t count;//资源数目 int sleepers;//等待进程数目 wait ...
linux内核情景分析之命名管道
管道是一种"无名","无形文件,只可以近亲进程使用,不可以再任意两个进程通信使用,所以只能实现"有名","有形"的文件来实现就可以 ...
linux内核情景分析之信号实现
信号在进程间通信是异步的,每个进程的task_struct结构有一个sig指针,指向一个signal_struct结构定义如下 struct signal_struct { atomic_t cou ...
linux内核情景分析之强制性调度
从系统调用返回到用户空间是否调度,从ret_with_reschedule可看出,是否真正调度,取决于当前进程的pcb中的need_resched是否设置为1,那如何设置为1取决于以下几种情况: 时间 ...
linux内核情景分析之匿名管道
管道的机制由pipe()创建,由pipe()所建立的管道两端都在同一进程.所以必须在fork的配合下,才可以在具有亲缘关系的进程通信 /* * sys_pipe() is the normal C c ...

随机推荐

Spring---加载配置文件的几种方法（org.springframework.beans.factory.BeanDefinitionStoreException）
Spring中的几种容器都支持使用xml装配bean,包括:XmlBeanFactory ,ClassPathXmlApplicationContext ,FileSystemXmlApplicati ...
notepad++ 换行技巧 log换行
有时候,服务器收集上来的日志,格式很乱,看log很难,如下: java.lang.IllegalStateException: BEvent.init() must be call first\n\t ...
获取ubuntu中软件包的有用地址
http://us.archive.ubuntu.com/ubuntu/pool/main/g/gettext/
《Cracking the Coding Interview》——第16章：线程与锁——题目3
2014-04-27 19:26 题目:哲学家吃饭问题,死锁问题经典模型(专门用来黑哲学家的?). 解法:死锁四条件:1. 资源互斥.2. 请求保持.3. 非抢占.4. 循环等待.所以,某砖家拿起一只 ...
【Lowest Common Ancestor of a Binary Tree】cpp
题目: Given a binary tree, find the lowest common ancestor (LCA) of two given nodes in the tree. Accor ...
Node应用进程管理器pm2的使用
本文转载自:豆瓣-PM2介绍更多内容见github上的pm2说明文档pm2 pm2 是一个带有负载均衡功能的Node应用的进程管理器. 当你要把你的独立代码利用全部的服务器上的所有CPU,并保证进程 ...
flask-ssti,xss的防止
源码 import uuid from flask import Flask, request, make_response, session,render_template, url_for, re ...
HTML5 FileReader接口学习笔记
1.FileReader概述 FileReader 对象允许Web应用程序异步读取存储在用户计算机上的文件(或原始数据缓冲区)的内容,使用 File 或 Blob 对象指定要读取的文件或数据. 其中F ...
JavaWeb项目中使用ajax上传文件
1.jsp $("#cxsc").click(function(){ var bankId = $("#bankId").val(); var formdata ...
团队项目-第九次scrum 会议
时间:11.5 时长:40分钟地点:F楼1039教室工作情况团队成员已完成任务待完成任务解小锐完成员工commit函数的数值函数编写完成多种招聘方式的逻辑编写陈鑫实现游戏的暂停功能 ...

linux内核情景分析之exit与Wait

linux内核情景分析之exit与Wait的更多相关文章

随机推荐

热门专题